aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2022-10-04 15:52:09 +0000
committerMartin Matuska <mm@FreeBSD.org>2022-10-04 15:52:45 +0000
commitf193a24ec57067da831f732865e5871e311704af (patch)
treee74b8b6e6dafe40102301ea55765ab95cf47b016
parent6d2cfa2d34e3ade155f65066f25716dc734ac302 (diff)
parent6a6bd493988c75331deab06e5352a9bed035a87d (diff)
downloadsrc-f193a24ec57067da831f732865e5871e311704af.tar.gz
src-f193a24ec57067da831f732865e5871e311704af.zip
zfs: merge openzfs/zfs@6a6bd4939 (zfs-2.1-release) into stable/13
OpenZFS release 2.1.6 Notable upstream pull requeset merges: #11733 ICP: Add missing stack frame info to SHA asm files #12274 Optimize txg_kick() process #12284 Add Module Parameter Regarding Log Size Limit #12285 Introduce a tunable to exclude special class buffers from L2ARC #12287 Remove refcount from spa_config_*() #12425 Avoid small buffer copying on write #12516 Fix NFS and large reads on older kernels #12678 spa.c: Replace VERIFY(nvlist_*(...) == 0) with fnvlist_* #12789 Improve log spacemap load time #13022 Add more control/visibility and speedup spa_load_verify() #13106 add physical device size to SIZE column in 'zpool list -v' #13388 Improve mg_aliquot math #13405 Revert "Reduce dbuf_find() lock contention" #13452 More speculative prefetcher improvements #13476 Refactor Log Size Limit #13540 AVL: Remove obsolete branching optimizations #13553 Reduce ZIO io_lock contention on sorted scrub #13555 Scrub mirror children without BPs #13563 FreeBSD: Improve crypto_dispatch() handling #13576 Several sorted scrub optimizations #13579 Fix and disable blocks statistics during scrub #13582 Several B-tree optimizations #13591 Avoid two 64-bit divisions per scanned block #13606 Avoid memory copies during mirror scrub #13613 Avoid memory copy when verifying raidz/draid parity #13643 Fix scrub resume from newly created hole #13756 FreeBSD: Mark ZFS_MODULE_PARAM_CALL as MPSAFE #13767 arcstat: fix -p option #13781 Importing from cachefile can trip assertion #13794 Apply arc_shrink_shift to ARC above arc_c_min #13798 Improve too large physical ashift handling #13811 Fix column width in 'zpool iostat -v' and 'zpool list -v' #13842 make DMU_OT_IS_METADATA and DMU_OT_IS_ENCRYPTED return B_TRUE or B_FALSE #13855 zfs recv hangs if max recordsize is less than received recordsize #13861 Fix use-after-free in btree code #13865 vdev_draid_lookup_map() should not iterate outside draid_maps #13878 Delay ZFS_PROP_SHARESMB property to handle it for encrypted raw receive #13882 FreeBSD: Fix integer conversion for vnlru_free{,_vfsops}() #13885 Fix incorrect size given to bqueue_enqueue() call in dmu_redact.c #13908 FreeBSD: stop passing LK_INTERLOCK to VOP_LOCK #13930 zpool: Don't print "repairing" on force faulted drives #13954 Fix bad free in skein code Obtained from: OpenZFS OpenZFS tag: zfs-2.1.6 OpenZFS commit: 6a6bd493988c75331deab06e5352a9bed035a87d Relnotes: yes
-rw-r--r--cddl/lib/libicp/Makefile4
-rw-r--r--cddl/lib/libicp_rescue/Makefile4
-rw-r--r--sys/contrib/openzfs/META4
-rw-r--r--sys/contrib/openzfs/Makefile.am7
-rwxr-xr-xsys/contrib/openzfs/cmd/arcstat/arcstat.in2
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.c113
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c155
-rw-r--r--sys/contrib/openzfs/cmd/zed/zed_disk_event.c60
-rw-r--r--sys/contrib/openzfs/cmd/zfs/zfs_main.c2
-rw-r--r--sys/contrib/openzfs/cmd/zfs/zfs_project.c14
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_main.c28
-rw-r--r--sys/contrib/openzfs/config/always-compiler-options.m458
-rw-r--r--sys/contrib/openzfs/config/always-parallel.m48
-rw-r--r--sys/contrib/openzfs/config/kernel-blk-queue.m416
-rw-r--r--sys/contrib/openzfs/config/kernel-blkdev.m428
-rw-r--r--sys/contrib/openzfs/config/kernel-block-device-operations.m417
-rw-r--r--sys/contrib/openzfs/config/kernel-get-disk-ro.m44
-rw-r--r--sys/contrib/openzfs/config/kernel-make-request-fn.m420
-rw-r--r--sys/contrib/openzfs/config/kernel-shrink.m467
-rw-r--r--sys/contrib/openzfs/config/kernel-xattr-handler.m429
-rw-r--r--sys/contrib/openzfs/config/zfs-build.m45
-rw-r--r--sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in2
-rw-r--r--sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py2
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h2
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/xattr_compat.h14
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/shrinker.h4
-rw-r--r--sys/contrib/openzfs/include/sys/abd.h1
-rw-r--r--sys/contrib/openzfs/include/sys/abd_impl.h1
-rw-r--r--sys/contrib/openzfs/include/sys/arc.h1
-rw-r--r--sys/contrib/openzfs/include/sys/bqueue.h14
-rw-r--r--sys/contrib/openzfs/include/sys/btree.h12
-rw-r--r--sys/contrib/openzfs/include/sys/crypto/icp.h3
-rw-r--r--sys/contrib/openzfs/include/sys/dbuf.h20
-rw-r--r--sys/contrib/openzfs/include/sys/dmu.h6
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_objset.h4
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_tx.h1
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_zfetch.h16
-rw-r--r--sys/contrib/openzfs/include/sys/dsl_pool.h8
-rw-r--r--sys/contrib/openzfs/include/sys/dsl_scan.h2
-rw-r--r--sys/contrib/openzfs/include/sys/fs/zfs.h43
-rw-r--r--sys/contrib/openzfs/include/sys/metaslab.h3
-rw-r--r--sys/contrib/openzfs/include/sys/metaslab_impl.h1
-rw-r--r--sys/contrib/openzfs/include/sys/range_tree.h21
-rw-r--r--sys/contrib/openzfs/include/sys/spa.h21
-rw-r--r--sys/contrib/openzfs/include/sys/spa_impl.h4
-rw-r--r--sys/contrib/openzfs/include/sys/spa_log_spacemap.h9
-rw-r--r--sys/contrib/openzfs/include/sys/sysevent/dev.h3
-rw-r--r--sys/contrib/openzfs/include/sys/txg.h2
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_impl.h1
-rw-r--r--sys/contrib/openzfs/include/sys/zil.h11
-rw-r--r--sys/contrib/openzfs/include/sys/zio.h19
-rw-r--r--sys/contrib/openzfs/lib/libavl/Makefile.am3
-rw-r--r--sys/contrib/openzfs/lib/libefi/Makefile.am3
-rw-r--r--sys/contrib/openzfs/lib/libicp/Makefile.am5
-rw-r--r--sys/contrib/openzfs/lib/libnvpair/Makefile.am3
-rw-r--r--sys/contrib/openzfs/lib/libshare/Makefile.am3
-rw-r--r--sys/contrib/openzfs/lib/libspl/Makefile.am3
-rw-r--r--sys/contrib/openzfs/lib/libtpool/Makefile.am6
-rw-r--r--sys/contrib/openzfs/lib/libunicode/Makefile.am3
-rw-r--r--sys/contrib/openzfs/lib/libuutil/Makefile.am3
-rw-r--r--sys/contrib/openzfs/lib/libzfs/Makefile.am4
-rw-r--r--sys/contrib/openzfs/lib/libzfs_core/Makefile.am3
-rw-r--r--sys/contrib/openzfs/lib/libzfsbootenv/Makefile.am3
-rw-r--r--sys/contrib/openzfs/lib/libzpool/Makefile.am6
-rw-r--r--sys/contrib/openzfs/lib/libzstd/Makefile.am2
-rw-r--r--sys/contrib/openzfs/lib/libzutil/Makefile.am5
-rw-r--r--sys/contrib/openzfs/lib/libzutil/zutil_import.c2
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.474
-rw-r--r--sys/contrib/openzfs/man/man8/zdb.818
-rw-r--r--sys/contrib/openzfs/module/.gitignore1
-rw-r--r--sys/contrib/openzfs/module/avl/avl.c24
-rw-r--r--sys/contrib/openzfs/module/icp/Makefile.in6
-rw-r--r--sys/contrib/openzfs/module/icp/algs/edonr/edonr.c2
-rw-r--r--sys/contrib/openzfs/module/icp/algs/sha1/sha1.c835
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S24
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S4
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S6
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S2
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S1353
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S28
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S28
-rw-r--r--sys/contrib/openzfs/module/icp/illumos-crypto.c2
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha1/sha1.h61
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h65
-rw-r--r--sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h73
-rw-r--r--sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h6
-rw-r--r--sys/contrib/openzfs/module/icp/io/sha1_mod.c1230
-rw-r--r--sys/contrib/openzfs/module/icp/io/skein_mod.c18
-rw-r--r--sys/contrib/openzfs/module/lua/ldo.c13
-rw-r--r--sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S10
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c2
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c11
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c41
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c3
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c3
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c4
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c41
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/abd_os.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c12
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c8
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/abd.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c36
-rw-r--r--sys/contrib/openzfs/module/zfs/bqueue.c23
-rw-r--r--sys/contrib/openzfs/module/zfs/btree.c783
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c120
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf_stats.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_objset.c34
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_redact.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_tx.c59
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_zfetch.c185
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_bookmark.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_pool.c91
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_scan.c358
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c141
-rw-r--r--sys/contrib/openzfs/module/zfs/range_tree.c79
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c307
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_log_spacemap.c231
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c19
-rw-r--r--sys/contrib/openzfs/module/zfs/txg.c36
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c44
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_draid.c12
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_mirror.c145
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_raidz.c33
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_removal.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c15
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_log.c68
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_vnops.c15
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c18
-rw-r--r--sys/contrib/openzfs/module/zfs/zvol.c7
-rw-r--r--sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in2
-rw-r--r--sys/contrib/openzfs/rpm/generic/zfs.spec.in49
-rw-r--r--sys/contrib/openzfs/tests/runfiles/common.run3
-rwxr-xr-xsys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_objset_id.ksh43
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh5
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh112
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_lun_expsz.ksh6
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am3
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged1.ksh (renamed from sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged.ksh)17
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh157
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh8
-rw-r--r--sys/modules/zfs/zfs_config.h29
-rw-r--r--sys/modules/zfs/zfs_gitrev.h5
148 files changed, 3016 insertions, 5277 deletions
diff --git a/cddl/lib/libicp/Makefile b/cddl/lib/libicp/Makefile
index 253b252bc2d4..9195b7182d54 100644
--- a/cddl/lib/libicp/Makefile
+++ b/cddl/lib/libicp/Makefile
@@ -15,7 +15,6 @@ ASM_SOURCES_AS = \
asm-x86_64/modes/gcm_pclmulqdq.S \
asm-x86_64/modes/aesni-gcm-x86_64.S \
asm-x86_64/modes/ghash-x86_64.S \
- asm-x86_64/sha1/sha1-x86_64.S \
asm-x86_64/sha2/sha256_impl.S \
asm-x86_64/sha2/sha512_impl.S
@@ -47,7 +46,6 @@ KERNEL_C = \
algs/modes/ctr.c \
algs/modes/ccm.c \
algs/modes/ecb.c \
- algs/sha1/sha1.c \
algs/sha2/sha2.c \
algs/skein/skein.c \
algs/skein/skein_block.c \
@@ -55,7 +53,6 @@ KERNEL_C = \
illumos-crypto.c \
io/aes.c \
io/edonr_mod.c \
- io/sha1_mod.c \
io/sha2_mod.c \
io/skein_mod.c \
os/modhash.c \
@@ -94,7 +91,6 @@ CFLAGS.aes_aesni.S+= -DLOCORE
CFLAGS.gcm_pclmulqdq.S+= -DLOCORE
CFLAGS.aesni-gcm-x86_64.S+= -DLOCORE
CFLAGS.ghash-x86_64.S+= -DLOCORE
-CFLAGS.sha1-x86_64.S+= -DLOCORE
CFLAGS.sha256_impl.S+= -DLOCORE
CFLAGS.sha512_impl.S+= -DLOCORE
diff --git a/cddl/lib/libicp_rescue/Makefile b/cddl/lib/libicp_rescue/Makefile
index a46fd6db3877..342afd2556cc 100644
--- a/cddl/lib/libicp_rescue/Makefile
+++ b/cddl/lib/libicp_rescue/Makefile
@@ -14,7 +14,6 @@ ASM_SOURCES_AS = \
asm-x86_64/aes/aes_aesni.S \
asm-x86_64/modes/gcm_pclmulqdq.S \
asm-x86_64/modes/aesni-gcm-x86_64.S \
- asm-x86_64/sha1/sha1-x86_64.S \
asm-x86_64/sha2/sha256_impl.S \
asm-x86_64/sha2/sha512_impl.S
@@ -46,13 +45,11 @@ KERNEL_C = \
algs/modes/ctr.c \
algs/modes/ccm.c \
algs/modes/ecb.c \
- algs/sha1/sha1.c \
algs/sha2/sha2.c \
algs/skein/skein_block.c \
illumos-crypto.c \
io/aes.c \
io/edonr_mod.c \
- io/sha1_mod.c \
io/sha2_mod.c \
io/skein_mod.c \
os/modhash.c \
@@ -91,7 +88,6 @@ CFLAGS.aes_aesni.S+= -DLOCORE
CFLAGS.gcm_pclmulqdq.S+= -DLOCORE
CFLAGS.aesni-gcm-x86_64.S+= -DLOCORE
CFLAGS.ghash-x86_64.S+= -DLOCORE
-CFLAGS.sha1-x86_64.S+= -DLOCORE
CFLAGS.sha256_impl.S+= -DLOCORE
CFLAGS.sha512_impl.S+= -DLOCORE
CFLAGS.gcm.c+= -UCAN_USE_GCM_ASM
diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META
index 3fd6c578c77d..7dd5b311d0c2 100644
--- a/sys/contrib/openzfs/META
+++ b/sys/contrib/openzfs/META
@@ -1,10 +1,10 @@
Meta: 1
Name: zfs
Branch: 1.0
-Version: 2.1.5
+Version: 2.1.6
Release: 1
Release-Tags: relext
License: CDDL
Author: OpenZFS
-Linux-Maximum: 5.18
+Linux-Maximum: 5.19
Linux-Minimum: 3.10
diff --git a/sys/contrib/openzfs/Makefile.am b/sys/contrib/openzfs/Makefile.am
index 7e2b10b39dee..36d8cd2d6fb8 100644
--- a/sys/contrib/openzfs/Makefile.am
+++ b/sys/contrib/openzfs/Makefile.am
@@ -114,6 +114,11 @@ commitcheck:
${top_srcdir}/scripts/commitcheck.sh; \
fi
+if HAVE_PARALLEL
+cstyle_line = -print0 | parallel -X0 ${top_srcdir}/scripts/cstyle.pl -cpP {}
+else
+cstyle_line = -exec ${top_srcdir}/scripts/cstyle.pl -cpP {} +
+endif
PHONY += cstyle
cstyle:
@find ${top_srcdir} -name build -prune \
@@ -122,7 +127,7 @@ cstyle:
! -name 'opt_global.h' ! -name '*_if*.h' \
! -name 'zstd_compat_wrapper.h' \
! -path './module/zstd/lib/*' \
- -exec ${top_srcdir}/scripts/cstyle.pl -cpP {} \+
+ $(cstyle_line)
filter_executable = -exec test -x '{}' \; -print
diff --git a/sys/contrib/openzfs/cmd/arcstat/arcstat.in b/sys/contrib/openzfs/cmd/arcstat/arcstat.in
index 9e7c52a6c7a3..425e52d1f513 100755
--- a/sys/contrib/openzfs/cmd/arcstat/arcstat.in
+++ b/sys/contrib/openzfs/cmd/arcstat/arcstat.in
@@ -271,7 +271,7 @@ def print_values():
if pretty_print:
fmt = lambda col: prettynum(cols[col][0], cols[col][1], v[col])
else:
- fmt = lambda col: v[col]
+ fmt = lambda col: str(v[col])
sys.stdout.write(sep.join(fmt(col) for col in hdr))
sys.stdout.write("\n")
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index 1a6dcf82137a..4e57538d2234 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -112,7 +112,7 @@ extern int zfs_vdev_async_read_max_active;
extern boolean_t spa_load_verify_dryrun;
extern boolean_t spa_mode_readable_spacemaps;
extern int zfs_reconstruct_indirect_combinations_max;
-extern int zfs_btree_verify_intensity;
+extern uint_t zfs_btree_verify_intensity;
static const char cmdname[] = "zdb";
uint8_t dump_opt[256];
@@ -8272,6 +8272,23 @@ zdb_embedded_block(char *thing)
free(buf);
}
+/* check for valid hex or decimal numeric string */
+static boolean_t
+zdb_numeric(char *str)
+{
+ int i = 0;
+
+ if (strlen(str) == 0)
+ return (B_FALSE);
+ if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 2) == 0)
+ i = 2;
+ for (; i < strlen(str); i++) {
+ if (!isxdigit(str[i]))
+ return (B_FALSE);
+ }
+ return (B_TRUE);
+}
+
int
main(int argc, char **argv)
{
@@ -8317,7 +8334,7 @@ main(int argc, char **argv)
zfs_btree_verify_intensity = 3;
while ((c = getopt(argc, argv,
- "AbcCdDeEFGhiI:klLmMo:Op:PqrRsSt:uU:vVx:XYyZ")) != -1) {
+ "AbcCdDeEFGhiI:klLmMNo:Op:PqrRsSt:uU:vVx:XYyZ")) != -1) {
switch (c) {
case 'b':
case 'c':
@@ -8331,6 +8348,7 @@ main(int argc, char **argv)
case 'l':
case 'm':
case 'M':
+ case 'N':
case 'O':
case 'r':
case 'R':
@@ -8422,31 +8440,6 @@ main(int argc, char **argv)
(void) fprintf(stderr, "-p option requires use of -e\n");
usage();
}
- if (dump_opt['d'] || dump_opt['r']) {
- /* <pool>[/<dataset | objset id> is accepted */
- if (argv[2] && (objset_str = strchr(argv[2], '/')) != NULL &&
- objset_str++ != NULL) {
- char *endptr;
- errno = 0;
- objset_id = strtoull(objset_str, &endptr, 0);
- /* dataset 0 is the same as opening the pool */
- if (errno == 0 && endptr != objset_str &&
- objset_id != 0) {
- target_is_spa = B_FALSE;
- dataset_lookup = B_TRUE;
- } else if (objset_id != 0) {
- printf("failed to open objset %s "
- "%llu %s", objset_str,
- (u_longlong_t)objset_id,
- strerror(errno));
- exit(1);
- }
- /* normal dataset name not an objset ID */
- if (endptr == objset_str) {
- objset_id = -1;
- }
- }
- }
#if defined(_LP64)
/*
@@ -8486,7 +8479,7 @@ main(int argc, char **argv)
verbose = MAX(verbose, 1);
for (c = 0; c < 256; c++) {
- if (dump_all && strchr("AeEFklLOPrRSXy", c) == NULL)
+ if (dump_all && strchr("AeEFklLNOPrRSXy", c) == NULL)
dump_opt[c] = 1;
if (dump_opt[c])
dump_opt[c] += verbose;
@@ -8525,6 +8518,7 @@ main(int argc, char **argv)
return (dump_path(argv[0], argv[1], NULL));
}
if (dump_opt['r']) {
+ target_is_spa = B_FALSE;
if (argc != 3)
usage();
dump_opt['v'] = verbose;
@@ -8535,6 +8529,10 @@ main(int argc, char **argv)
rewind = ZPOOL_DO_REWIND |
(dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
+ /* -N implies -d */
+ if (dump_opt['N'] && dump_opt['d'] == 0)
+ dump_opt['d'] = dump_opt['N'];
+
if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 ||
nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0)
@@ -8553,6 +8551,34 @@ main(int argc, char **argv)
targetlen = strlen(target);
if (targetlen && target[targetlen - 1] == '/')
target[targetlen - 1] = '\0';
+ /*
+ * See if an objset ID was supplied (-d <pool>/<objset ID>).
+ * To disambiguate tank/100, consider the 100 as objsetID
+ * if -N was given, otherwise 100 is an objsetID iff
+ * tank/100 as a named dataset fails on lookup.
+ */
+ objset_str = strchr(target, '/');
+ if (objset_str && strlen(objset_str) > 1 &&
+ zdb_numeric(objset_str + 1)) {
+ char *endptr;
+ errno = 0;
+ objset_str++;
+ objset_id = strtoull(objset_str, &endptr, 0);
+ /* dataset 0 is the same as opening the pool */
+ if (errno == 0 && endptr != objset_str &&
+ objset_id != 0) {
+ if (dump_opt['N'])
+ dataset_lookup = B_TRUE;
+ }
+ /* normal dataset name not an objset ID */
+ if (endptr == objset_str) {
+ objset_id = -1;
+ }
+ } else if (objset_str && !zdb_numeric(objset_str + 1) &&
+ dump_opt['N']) {
+ printf("Supply a numeric objset ID with -N\n");
+ exit(1);
+ }
} else {
target_pool = target;
}
@@ -8670,13 +8696,27 @@ main(int argc, char **argv)
}
return (error);
} else {
+ target_pool = strdup(target);
+ if (strpbrk(target, "/@") != NULL)
+ *strpbrk(target_pool, "/@") = '\0';
+
zdb_set_skip_mmp(target);
+ /*
+ * If -N was supplied, the user has indicated that
+ * zdb -d <pool>/<objsetID> is in effect. Otherwise
+ * we first assume that the dataset string is the
+ * dataset name. If dmu_objset_hold fails with the
+ * dataset string, and we have an objset_id, retry the
+ * lookup with the objsetID.
+ */
+ boolean_t retry = B_TRUE;
+retry_lookup:
if (dataset_lookup == B_TRUE) {
/*
* Use the supplied id to get the name
* for open_objset.
*/
- error = spa_open(target, &spa, FTAG);
+ error = spa_open(target_pool, &spa, FTAG);
if (error == 0) {
error = name_from_objset_id(spa,
objset_id, dsname);
@@ -8685,10 +8725,23 @@ main(int argc, char **argv)
target = dsname;
}
}
- if (error == 0)
+ if (error == 0) {
+ if (objset_id > 0 && retry) {
+ int err = dmu_objset_hold(target, FTAG,
+ &os);
+ if (err) {
+ dataset_lookup = B_TRUE;
+ retry = B_FALSE;
+ goto retry_lookup;
+ } else {
+ dmu_objset_rele(os, FTAG);
+ }
+ }
error = open_objset(target, FTAG, &os);
+ }
if (error == 0)
spa = dmu_objset_spa(os);
+ free(target_pool);
}
}
nvlist_free(policy);
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
index a510d646e1f9..a4e23ca1a3b0 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
@@ -894,14 +894,90 @@ zfs_deliver_check(nvlist_t *nvl)
return (0);
}
+/*
+ * Given a path to a vdev, lookup the vdev's physical size from its
+ * config nvlist.
+ *
+ * Returns the vdev's physical size in bytes on success, 0 on error.
+ */
+static uint64_t
+vdev_size_from_config(zpool_handle_t *zhp, const char *vdev_path)
+{
+ nvlist_t *nvl = NULL;
+ boolean_t avail_spare, l2cache, log;
+ vdev_stat_t *vs = NULL;
+ uint_t c;
+
+ nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
+ if (!nvl)
+ return (0);
+
+ verify(nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t **)&vs, &c) == 0);
+ if (!vs) {
+ zed_log_msg(LOG_INFO, "%s: no nvlist for '%s'", __func__,
+ vdev_path);
+ return (0);
+ }
+
+ return (vs->vs_pspace);
+}
+
+/*
+ * Given a path to a vdev, lookup if the vdev is a "whole disk" in the
+ * config nvlist. "whole disk" means that ZFS was passed a whole disk
+ * at pool creation time, which it partitioned up and has full control over.
+ * Thus a partition with wholedisk=1 set tells us that zfs created the
+ * partition at creation time. A partition without whole disk set would have
+ * been created by externally (like with fdisk) and passed to ZFS.
+ *
+ * Returns the whole disk value (either 0 or 1).
+ */
+static uint64_t
+vdev_whole_disk_from_config(zpool_handle_t *zhp, const char *vdev_path)
+{
+ nvlist_t *nvl = NULL;
+ boolean_t avail_spare, l2cache, log;
+ uint64_t wholedisk;
+
+ nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
+ if (!nvl)
+ return (0);
+
+ verify(nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk) == 0);
+
+ return (wholedisk);
+}
+
+/*
+ * If the device size grew more than 1% then return true.
+ */
+#define DEVICE_GREW(oldsize, newsize) \
+ ((newsize > oldsize) && \
+ ((newsize / (newsize - oldsize)) <= 100))
+
static int
zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
{
- char *devname = data;
boolean_t avail_spare, l2cache;
+ nvlist_t *udev_nvl = data;
nvlist_t *tgt;
int error;
+ char *tmp_devname, devname[MAXPATHLEN];
+ uint64_t guid;
+
+ if (nvlist_lookup_uint64(udev_nvl, ZFS_EV_VDEV_GUID, &guid) == 0) {
+ sprintf(devname, "%llu", (u_longlong_t)guid);
+ } else if (nvlist_lookup_string(udev_nvl, DEV_PHYS_PATH,
+ &tmp_devname) == 0) {
+ strlcpy(devname, tmp_devname, MAXPATHLEN);
+ zfs_append_partition(devname, MAXPATHLEN);
+ } else {
+ zed_log_msg(LOG_INFO, "%s: no guid or physpath", __func__);
+ }
+
zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'",
devname, zpool_get_name(zhp));
@@ -953,12 +1029,75 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
vdev_state_t newstate;
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
- error = zpool_vdev_online(zhp, fullpath, 0,
- &newstate);
- zed_log_msg(LOG_INFO, "zfsdle_vdev_online: "
- "setting device '%s' to ONLINE state "
- "in pool '%s': %d", fullpath,
- zpool_get_name(zhp), error);
+ /*
+ * If this disk size has not changed, then
+ * there's no need to do an autoexpand. To
+ * check we look at the disk's size in its
+ * config, and compare it to the disk size
+ * that udev is reporting.
+ */
+ uint64_t udev_size = 0, conf_size = 0,
+ wholedisk = 0, udev_parent_size = 0;
+
+ /*
+ * Get the size of our disk that udev is
+ * reporting.
+ */
+ if (nvlist_lookup_uint64(udev_nvl, DEV_SIZE,
+ &udev_size) != 0) {
+ udev_size = 0;
+ }
+
+ /*
+ * Get the size of our disk's parent device
+ * from udev (where sda1's parent is sda).
+ */
+ if (nvlist_lookup_uint64(udev_nvl,
+ DEV_PARENT_SIZE, &udev_parent_size) != 0) {
+ udev_parent_size = 0;
+ }
+
+ conf_size = vdev_size_from_config(zhp,
+ fullpath);
+
+ wholedisk = vdev_whole_disk_from_config(zhp,
+ fullpath);
+
+ /*
+ * Only attempt an autoexpand if the vdev size
+ * changed. There are two different cases
+ * to consider.
+ *
+ * 1. wholedisk=1
+ * If you do a 'zpool create' on a whole disk
+ * (like /dev/sda), then zfs will create
+ * partitions on the disk (like /dev/sda1). In
+ * that case, wholedisk=1 will be set in the
+ * partition's nvlist config. So zed will need
+ * to see if your parent device (/dev/sda)
+ * expanded in size, and if so, then attempt
+ * the autoexpand.
+ *
+ * 2. wholedisk=0
+ * If you do a 'zpool create' on an existing
+ * partition, or a device that doesn't allow
+ * partitions, then wholedisk=0, and you will
+ * simply need to check if the device itself
+ * expanded in size.
+ */
+ if (DEVICE_GREW(conf_size, udev_size) ||
+ (wholedisk && DEVICE_GREW(conf_size,
+ udev_parent_size))) {
+ error = zpool_vdev_online(zhp, fullpath,
+ 0, &newstate);
+
+ zed_log_msg(LOG_INFO,
+ "%s: autoexpanding '%s' from %llu"
+ " to %llu bytes in pool '%s': %d",
+ __func__, fullpath, conf_size,
+ MAX(udev_size, udev_parent_size),
+ zpool_get_name(zhp), error);
+ }
}
}
zpool_close(zhp);
@@ -989,7 +1128,7 @@ zfs_deliver_dle(nvlist_t *nvl)
zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath");
}
- if (zpool_iter(g_zfshdl, zfsdle_vdev_online, name) != 1) {
+ if (zpool_iter(g_zfshdl, zfsdle_vdev_online, nvl) != 1) {
zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not "
"found", name);
return (1);
diff --git a/sys/contrib/openzfs/cmd/zed/zed_disk_event.c b/sys/contrib/openzfs/cmd/zed/zed_disk_event.c
index 52b80d8c4c93..e31ec4cfc7e7 100644
--- a/sys/contrib/openzfs/cmd/zed/zed_disk_event.c
+++ b/sys/contrib/openzfs/cmd/zed/zed_disk_event.c
@@ -78,6 +78,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
+ if (nvlist_lookup_uint64(nvl, DEV_PARENT_SIZE, &numval) == 0)
+ zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_PARENT_SIZE, numval);
if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
@@ -130,6 +132,20 @@ dev_event_nvlist(struct udev_device *dev)
numval *= strtoull(value, NULL, 10);
(void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
+
+ /*
+ * If the device has a parent, then get the parent block
+ * device's size as well. For example, /dev/sda1's parent
+ * is /dev/sda.
+ */
+ struct udev_device *parent_dev = udev_device_get_parent(dev);
+ if ((value = udev_device_get_sysattr_value(parent_dev, "size"))
+ != NULL) {
+ uint64_t numval = DEV_BSIZE;
+
+ numval *= strtoull(value, NULL, 10);
+ (void) nvlist_add_uint64(nvl, DEV_PARENT_SIZE, numval);
+ }
}
/*
@@ -169,7 +185,7 @@ zed_udev_monitor(void *arg)
while (1) {
struct udev_device *dev;
const char *action, *type, *part, *sectors;
- const char *bus, *uuid;
+ const char *bus, *uuid, *devpath;
const char *class, *subclass;
nvlist_t *nvl;
boolean_t is_zfs = B_FALSE;
@@ -208,6 +224,12 @@ zed_udev_monitor(void *arg)
* if this is a disk and it is partitioned, then the
* zfs label will reside in a DEVTYPE=partition and
* we can skip passing this event
+ *
+ * Special case: Blank disks are sometimes reported with
+ * an erroneous 'atari' partition, and should not be
+ * excluded from being used as an autoreplace disk:
+ *
+ * https://github.com/openzfs/zfs/issues/13497
*/
type = udev_device_get_property_value(dev, "DEVTYPE");
part = udev_device_get_property_value(dev,
@@ -215,14 +237,23 @@ zed_udev_monitor(void *arg)
if (type != NULL && type[0] != '\0' &&
strcmp(type, "disk") == 0 &&
part != NULL && part[0] != '\0') {
- zed_log_msg(LOG_INFO,
- "%s: skip %s since it has a %s partition already",
- __func__,
- udev_device_get_property_value(dev, "DEVNAME"),
- part);
- /* skip and wait for partition event */
- udev_device_unref(dev);
- continue;
+ const char *devname =
+ udev_device_get_property_value(dev, "DEVNAME");
+
+ if (strcmp(part, "atari") == 0) {
+ zed_log_msg(LOG_INFO,
+ "%s: %s is reporting an atari partition, "
+ "but we're going to assume it's a false "
+ "positive and still use it (issue #13497)",
+ __func__, devname);
+ } else {
+ zed_log_msg(LOG_INFO,
+ "%s: skip %s since it has a %s partition "
+ "already", __func__, devname, part);
+ /* skip and wait for partition event */
+ udev_device_unref(dev);
+ continue;
+ }
}
/*
@@ -248,10 +279,19 @@ zed_udev_monitor(void *arg)
* device id string is required in the message schema
* for matching with vdevs. Preflight here for expected
* udev information.
+ *
+ * Special case:
+ * NVMe devices don't have ID_BUS set (at least on RHEL 7-8),
+ * but they are valid for autoreplace. Add a special case for
+ * them by searching for "/nvme/" in the udev DEVPATH:
+ *
+ * DEVPATH=/devices/pci0000:00/0000:00:1e.0/nvme/nvme2/nvme2n1
*/
bus = udev_device_get_property_value(dev, "ID_BUS");
uuid = udev_device_get_property_value(dev, "DM_UUID");
- if (!is_zfs && (bus == NULL && uuid == NULL)) {
+ devpath = udev_device_get_devpath(dev);
+ if (!is_zfs && (bus == NULL && uuid == NULL &&
+ strstr(devpath, "/nvme/") == NULL)) {
zed_log_msg(LOG_INFO, "zed_udev_monitor: %s no devid "
"source", udev_device_get_devnode(dev));
udev_device_unref(dev);
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
index 6f0c846fddee..00d5d847d05c 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@@ -2480,7 +2480,7 @@ upgrade_set_callback(zfs_handle_t *zhp, void *data)
/* upgrade */
if (version < cb->cb_version) {
- char verstr[16];
+ char verstr[24];
(void) snprintf(verstr, sizeof (verstr),
"%llu", (u_longlong_t)cb->cb_version);
if (cb->cb_lastfs[0] && !same_pool(zhp, cb->cb_lastfs)) {
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_project.c b/sys/contrib/openzfs/cmd/zfs/zfs_project.c
index 341cc005de48..24849751ce2c 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_project.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_project.c
@@ -207,7 +207,6 @@ static int
zfs_project_handle_dir(const char *name, zfs_project_control_t *zpc,
list_t *head)
{
- char fullname[PATH_MAX];
struct dirent *ent;
DIR *dir;
int ret = 0;
@@ -227,21 +226,28 @@ zfs_project_handle_dir(const char *name, zfs_project_control_t *zpc,
zpc->zpc_ignore_noent = B_TRUE;
errno = 0;
while (!ret && (ent = readdir(dir)) != NULL) {
+ char *fullname;
+
/* skip "." and ".." */
if (strcmp(ent->d_name, ".") == 0 ||
strcmp(ent->d_name, "..") == 0)
continue;
- if (strlen(ent->d_name) + strlen(name) >=
- sizeof (fullname) + 1) {
+ if (strlen(ent->d_name) + strlen(name) + 1 >= PATH_MAX) {
errno = ENAMETOOLONG;
break;
}
- sprintf(fullname, "%s/%s", name, ent->d_name);
+ if (asprintf(&fullname, "%s/%s", name, ent->d_name) == -1) {
+ errno = ENOMEM;
+ break;
+ }
+
ret = zfs_project_handle_one(fullname, zpc);
if (!ret && zpc->zpc_recursive && ent->d_type == DT_DIR)
zfs_project_item_alloc(head, fullname);
+
+ free(fullname);
}
if (errno && !ret) {
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
index b93a6196beea..54464731b52e 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -2438,7 +2438,14 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
(void) nvlist_lookup_uint64_array(root, ZPOOL_CONFIG_SCAN_STATS,
(uint64_t **)&ps, &c);
- if (ps != NULL && ps->pss_state == DSS_SCANNING && children == 0) {
+ /*
+ * If you force fault a drive that's resilvering, its scan stats can
+ * get frozen in time, giving the false impression that it's
+ * being resilvered. That's why we check the state to see if the vdev
+ * is healthy before reporting "resilvering" or "repairing".
+ */
+ if (ps != NULL && ps->pss_state == DSS_SCANNING && children == 0 &&
+ vs->vs_state == VDEV_STATE_HEALTHY) {
if (vs->vs_scan_processed != 0) {
(void) printf(gettext(" (%s)"),
(ps->pss_func == POOL_SCAN_RESILVER) ?
@@ -2450,7 +2457,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
/* The top-level vdevs have the rebuild stats */
if (vrs != NULL && vrs->vrs_state == VDEV_REBUILD_ACTIVE &&
- children == 0) {
+ children == 0 && vs->vs_state == VDEV_STATE_HEALTHY) {
if (vs->vs_rebuild_processed != 0) {
(void) printf(gettext(" (resilvering)"));
}
@@ -5458,8 +5465,8 @@ get_namewidth_iostat(zpool_handle_t *zhp, void *data)
* get_namewidth() returns the maximum width of any name in that column
* for any pool/vdev/device line that will be output.
*/
- width = get_namewidth(zhp, cb->cb_namewidth, cb->cb_name_flags,
- cb->cb_verbose);
+ width = get_namewidth(zhp, cb->cb_namewidth,
+ cb->cb_name_flags | VDEV_NAME_TYPE_ID, cb->cb_verbose);
/*
* The width we are calculating is the width of the header and also the
@@ -6035,6 +6042,7 @@ print_one_column(zpool_prop_t prop, uint64_t value, const char *str,
size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL);
switch (prop) {
+ case ZPOOL_PROP_SIZE:
case ZPOOL_PROP_EXPANDSZ:
case ZPOOL_PROP_CHECKPOINT:
case ZPOOL_PROP_DEDUPRATIO:
@@ -6130,8 +6138,12 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
* 'toplevel' boolean value is passed to the print_one_column()
* to indicate that the value is valid.
*/
- print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, NULL, scripted,
- toplevel, format);
+ if (vs->vs_pspace)
+ print_one_column(ZPOOL_PROP_SIZE, vs->vs_pspace, NULL,
+ scripted, B_TRUE, format);
+ else
+ print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, NULL,
+ scripted, toplevel, format);
print_one_column(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, NULL,
scripted, toplevel, format);
print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc,
@@ -6282,8 +6294,8 @@ get_namewidth_list(zpool_handle_t *zhp, void *data)
list_cbdata_t *cb = data;
int width;
- width = get_namewidth(zhp, cb->cb_namewidth, cb->cb_name_flags,
- cb->cb_verbose);
+ width = get_namewidth(zhp, cb->cb_namewidth,
+ cb->cb_name_flags | VDEV_NAME_TYPE_ID, cb->cb_verbose);
if (width < 9)
width = 9;
diff --git a/sys/contrib/openzfs/config/always-compiler-options.m4 b/sys/contrib/openzfs/config/always-compiler-options.m4
index ce84f7e60684..5046ce0ddb83 100644
--- a/sys/contrib/openzfs/config/always-compiler-options.m4
+++ b/sys/contrib/openzfs/config/always-compiler-options.m4
@@ -88,7 +88,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION], [
])
dnl #
-dnl # Check if gcc supports -Wno-format-truncation option.
+dnl # Check if gcc supports -Wno-format-zero-length option.
dnl #
AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH], [
AC_MSG_CHECKING([whether $CC supports -Wno-format-zero-length])
@@ -108,80 +108,76 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH], [
AC_SUBST([NO_FORMAT_ZERO_LENGTH])
])
-
dnl #
-dnl # Check if gcc supports -Wno-bool-compare option.
+dnl # Check if gcc supports -Wno-clobbered option.
dnl #
-dnl # We actually invoke gcc with the -Wbool-compare option
+dnl # We actually invoke gcc with the -Wclobbered option
dnl # and infer the 'no-' version does or doesn't exist based upon
dnl # the results. This is required because when checking any of
dnl # no- prefixed options gcc always returns success.
dnl #
-AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_BOOL_COMPARE], [
- AC_MSG_CHECKING([whether $CC supports -Wno-bool-compare])
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_CLOBBERED], [
+ AC_MSG_CHECKING([whether $CC supports -Wno-clobbered])
saved_flags="$CFLAGS"
- CFLAGS="$CFLAGS -Werror -Wbool-compare"
+ CFLAGS="$CFLAGS -Werror -Wclobbered"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
- NO_BOOL_COMPARE=-Wno-bool-compare
+ NO_CLOBBERED=-Wno-clobbered
AC_MSG_RESULT([yes])
], [
- NO_BOOL_COMPARE=
+ NO_CLOBBERED=
AC_MSG_RESULT([no])
])
CFLAGS="$saved_flags"
- AC_SUBST([NO_BOOL_COMPARE])
+ AC_SUBST([NO_CLOBBERED])
])
dnl #
-dnl # Check if gcc supports -Wno-unused-but-set-variable option.
-dnl #
-dnl # We actually invoke gcc with the -Wunused-but-set-variable option
-dnl # and infer the 'no-' version does or doesn't exist based upon
-dnl # the results. This is required because when checking any of
-dnl # no- prefixed options gcc always returns success.
+dnl # Check if gcc supports -Wimplicit-fallthrough option.
dnl #
-AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_UNUSED_BUT_SET_VARIABLE], [
- AC_MSG_CHECKING([whether $CC supports -Wno-unused-but-set-variable])
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH], [
+ AC_MSG_CHECKING([whether $CC supports -Wimplicit-fallthrough])
saved_flags="$CFLAGS"
- CFLAGS="$CFLAGS -Werror -Wunused-but-set-variable"
+ CFLAGS="$CFLAGS -Werror -Wimplicit-fallthrough"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
- NO_UNUSED_BUT_SET_VARIABLE=-Wno-unused-but-set-variable
+ IMPLICIT_FALLTHROUGH=-Wimplicit-fallthrough
+ AC_DEFINE([HAVE_IMPLICIT_FALLTHROUGH], 1,
+ [Define if compiler supports -Wimplicit-fallthrough])
AC_MSG_RESULT([yes])
], [
- NO_UNUSED_BUT_SET_VARIABLE=
+ IMPLICIT_FALLTHROUGH=
AC_MSG_RESULT([no])
])
CFLAGS="$saved_flags"
- AC_SUBST([NO_UNUSED_BUT_SET_VARIABLE])
+ AC_SUBST([IMPLICIT_FALLTHROUGH])
])
dnl #
-dnl # Check if gcc supports -Wimplicit-fallthrough option.
+dnl # Check if cc supports -Winfinite-recursion option.
dnl #
-AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH], [
- AC_MSG_CHECKING([whether $CC supports -Wimplicit-fallthrough])
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_INFINITE_RECURSION], [
+ AC_MSG_CHECKING([whether $CC supports -Winfinite-recursion])
saved_flags="$CFLAGS"
- CFLAGS="$CFLAGS -Werror -Wimplicit-fallthrough"
+ CFLAGS="$CFLAGS -Werror -Winfinite-recursion"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
- IMPLICIT_FALLTHROUGH=-Wimplicit-fallthrough
- AC_DEFINE([HAVE_IMPLICIT_FALLTHROUGH], 1,
- [Define if compiler supports -Wimplicit-fallthrough])
+ INFINITE_RECURSION=-Winfinite-recursion
+ AC_DEFINE([HAVE_INFINITE_RECURSION], 1,
+ [Define if compiler supports -Winfinite-recursion])
AC_MSG_RESULT([yes])
], [
- IMPLICIT_FALLTHROUGH=
+ INFINITE_RECURSION=
AC_MSG_RESULT([no])
])
CFLAGS="$saved_flags"
- AC_SUBST([IMPLICIT_FALLTHROUGH])
+ AC_SUBST([INFINITE_RECURSION])
])
dnl #
diff --git a/sys/contrib/openzfs/config/always-parallel.m4 b/sys/contrib/openzfs/config/always-parallel.m4
new file mode 100644
index 000000000000..c1f1ae78e7e7
--- /dev/null
+++ b/sys/contrib/openzfs/config/always-parallel.m4
@@ -0,0 +1,8 @@
+dnl #
+dnl # Check if GNU parallel is available.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PARALLEL], [
+ AC_CHECK_PROG([PARALLEL], [parallel], [yes])
+
+ AM_CONDITIONAL([HAVE_PARALLEL], [test "x$PARALLEL" = "xyes"])
+])
diff --git a/sys/contrib/openzfs/config/kernel-blk-queue.m4 b/sys/contrib/openzfs/config/kernel-blk-queue.m4
index 16251726ccfe..6f42b98125cd 100644
--- a/sys/contrib/openzfs/config/kernel-blk-queue.m4
+++ b/sys/contrib/openzfs/config/kernel-blk-queue.m4
@@ -259,17 +259,17 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH], [
ZFS_LINUX_TEST_SRC([blk_queue_flush], [
#include <linux/blkdev.h>
], [
- struct request_queue *q = NULL;
+ struct request_queue *q __attribute__ ((unused)) = NULL;
(void) blk_queue_flush(q, REQ_FLUSH);
- ], [$NO_UNUSED_BUT_SET_VARIABLE], [ZFS_META_LICENSE])
+ ], [], [ZFS_META_LICENSE])
ZFS_LINUX_TEST_SRC([blk_queue_write_cache], [
#include <linux/kernel.h>
#include <linux/blkdev.h>
], [
- struct request_queue *q = NULL;
+ struct request_queue *q __attribute__ ((unused)) = NULL;
blk_queue_write_cache(q, true, true);
- ], [$NO_UNUSED_BUT_SET_VARIABLE], [ZFS_META_LICENSE])
+ ], [], [ZFS_META_LICENSE])
])
AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [
@@ -322,9 +322,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS], [
ZFS_LINUX_TEST_SRC([blk_queue_max_hw_sectors], [
#include <linux/blkdev.h>
], [
- struct request_queue *q = NULL;
+ struct request_queue *q __attribute__ ((unused)) = NULL;
(void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
- ], [$NO_UNUSED_BUT_SET_VARIABLE])
+ ], [])
])
AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [
@@ -345,9 +345,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS], [
ZFS_LINUX_TEST_SRC([blk_queue_max_segments], [
#include <linux/blkdev.h>
], [
- struct request_queue *q = NULL;
+ struct request_queue *q __attribute__ ((unused)) = NULL;
(void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS);
- ], [$NO_UNUSED_BUT_SET_VARIABLE])
+ ], [])
])
AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
diff --git a/sys/contrib/openzfs/config/kernel-blkdev.m4 b/sys/contrib/openzfs/config/kernel-blkdev.m4
index fb7b1a458638..462d6c6efa8e 100644
--- a/sys/contrib/openzfs/config/kernel-blkdev.m4
+++ b/sys/contrib/openzfs/config/kernel-blkdev.m4
@@ -295,6 +295,32 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE], [
])
dnl #
+dnl # 5.20 API change,
+dnl # Removed bdevname(), snprintf(.., %pg) should be used.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME], [
+ ZFS_LINUX_TEST_SRC([bdevname], [
+ #include <linux/fs.h>
+ #include <linux/blkdev.h>
+ ], [
+ struct block_device *bdev __attribute__ ((unused)) = NULL;
+ char path[BDEVNAME_SIZE];
+
+ (void) bdevname(bdev, path);
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEVNAME], [
+ AC_MSG_CHECKING([whether bdevname() exists])
+ ZFS_LINUX_TEST_RESULT([bdevname], [
+ AC_DEFINE(HAVE_BDEVNAME, 1, [bdevname() is available])
+ AC_MSG_RESULT(yes)
+ ], [
+ AC_MSG_RESULT(no)
+ ])
+])
+
+dnl #
dnl # 5.19 API: blkdev_issue_secure_erase()
dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
dnl #
@@ -377,6 +403,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
ZFS_AC_KERNEL_SRC_BLKDEV_CHECK_DISK_CHANGE
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE
+ ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
])
@@ -391,6 +418,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE
ZFS_AC_KERNEL_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE
+ ZFS_AC_KERNEL_BLKDEV_BDEVNAME
ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS
ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
])
diff --git a/sys/contrib/openzfs/config/kernel-block-device-operations.m4 b/sys/contrib/openzfs/config/kernel-block-device-operations.m4
index a48618185bfb..84e39dc8a2f6 100644
--- a/sys/contrib/openzfs/config/kernel-block-device-operations.m4
+++ b/sys/contrib/openzfs/config/kernel-block-device-operations.m4
@@ -6,13 +6,16 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [
#include <linux/blkdev.h>
unsigned int blk_check_events(struct gendisk *disk,
- unsigned int clearing) { return (0); }
+ unsigned int clearing) {
+ (void) disk, (void) clearing;
+ return (0);
+ }
static const struct block_device_operations
bops __attribute__ ((unused)) = {
.check_events = blk_check_events,
};
- ], [], [$NO_UNUSED_BUT_SET_VARIABLE])
+ ], [], [])
])
AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [
@@ -31,7 +34,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
ZFS_LINUX_TEST_SRC([block_device_operations_release_void], [
#include <linux/blkdev.h>
- void blk_release(struct gendisk *g, fmode_t mode) { return; }
+ void blk_release(struct gendisk *g, fmode_t mode) {
+ (void) g, (void) mode;
+ return;
+ }
static const struct block_device_operations
bops __attribute__ ((unused)) = {
@@ -40,7 +46,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
.ioctl = NULL,
.compat_ioctl = NULL,
};
- ], [], [$NO_UNUSED_BUT_SET_VARIABLE])
+ ], [], [])
])
AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
@@ -61,6 +67,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [
#include <linux/blkdev.h>
int blk_revalidate_disk(struct gendisk *disk) {
+ (void) disk;
return(0);
}
@@ -68,7 +75,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [
bops __attribute__ ((unused)) = {
.revalidate_disk = blk_revalidate_disk,
};
- ], [], [$NO_UNUSED_BUT_SET_VARIABLE])
+ ], [], [])
])
AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [
diff --git a/sys/contrib/openzfs/config/kernel-get-disk-ro.m4 b/sys/contrib/openzfs/config/kernel-get-disk-ro.m4
index 8a379c7669fa..acfcb69acc10 100644
--- a/sys/contrib/openzfs/config/kernel-get-disk-ro.m4
+++ b/sys/contrib/openzfs/config/kernel-get-disk-ro.m4
@@ -5,9 +5,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_DISK_RO], [
ZFS_LINUX_TEST_SRC([get_disk_ro], [
#include <linux/blkdev.h>
],[
- struct gendisk *disk = NULL;
+ struct gendisk *disk __attribute__ ((unused)) = NULL;
(void) get_disk_ro(disk);
- ], [$NO_UNUSED_BUT_SET_VARIABLE])
+ ], [])
])
AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_RO], [
diff --git a/sys/contrib/openzfs/config/kernel-make-request-fn.m4 b/sys/contrib/openzfs/config/kernel-make-request-fn.m4
index 86b202a7a272..f17416acca67 100644
--- a/sys/contrib/openzfs/config/kernel-make-request-fn.m4
+++ b/sys/contrib/openzfs/config/kernel-make-request-fn.m4
@@ -49,6 +49,13 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [
struct gendisk *disk __attribute__ ((unused));
disk = blk_alloc_disk(NUMA_NO_NODE);
])
+
+ ZFS_LINUX_TEST_SRC([blk_cleanup_disk], [
+ #include <linux/blkdev.h>
+ ],[
+ struct gendisk *disk __attribute__ ((unused));
+ blk_cleanup_disk(disk);
+ ])
])
AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [
@@ -73,6 +80,19 @@ AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [
ZFS_LINUX_TEST_RESULT([blk_alloc_disk], [
AC_MSG_RESULT(yes)
AC_DEFINE([HAVE_BLK_ALLOC_DISK], 1, [blk_alloc_disk() exists])
+
+ dnl #
+ dnl # 5.20 API change,
+ dnl # Removed blk_cleanup_disk(), put_disk() should be used.
+ dnl #
+ AC_MSG_CHECKING([whether blk_cleanup_disk() exists])
+ ZFS_LINUX_TEST_RESULT([blk_cleanup_disk], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE([HAVE_BLK_CLEANUP_DISK], 1,
+ [blk_cleanup_disk() exists])
+ ], [
+ AC_MSG_RESULT(no)
+ ])
], [
AC_MSG_RESULT(no)
])
diff --git a/sys/contrib/openzfs/config/kernel-shrink.m4 b/sys/contrib/openzfs/config/kernel-shrink.m4
index 8cf0f2761bde..0c702153e8c4 100644
--- a/sys/contrib/openzfs/config/kernel-shrink.m4
+++ b/sys/contrib/openzfs/config/kernel-shrink.m4
@@ -54,6 +54,21 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [
])
])
+AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [
+ ZFS_LINUX_TEST_SRC([register_shrinker_vararg], [
+ #include <linux/mm.h>
+ unsigned long shrinker_cb(struct shrinker *shrink,
+ struct shrink_control *sc) { return 0; }
+ ],[
+ struct shrinker cache_shrinker = {
+ .count_objects = shrinker_cb,
+ .scan_objects = shrinker_cb,
+ .seeks = DEFAULT_SEEKS,
+ };
+ register_shrinker(&cache_shrinker, "vararg-reg-shrink-test");
+ ])
+])
+
AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [
ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control], [
#include <linux/mm.h>
@@ -83,29 +98,50 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [
AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[
dnl #
- dnl # 3.0 - 3.11 API change
- dnl # cs->shrink(struct shrinker *, struct shrink_control *sc)
+ dnl # 6.0 API change
+ dnl # register_shrinker() becomes a var-arg function that takes
+ dnl # a printf-style format string as args > 0
dnl #
- AC_MSG_CHECKING([whether new 2-argument shrinker exists])
- ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [
+ AC_MSG_CHECKING([whether new var-arg register_shrinker() exists])
+ ZFS_LINUX_TEST_RESULT([register_shrinker_vararg], [
AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_SINGLE_SHRINKER_CALLBACK, 1,
- [new shrinker callback wants 2 args])
+ AC_DEFINE(HAVE_REGISTER_SHRINKER_VARARG, 1,
+ [register_shrinker is vararg])
+
+ dnl # We assume that the split shrinker callback exists if the
+ dnl # vararg register_shrinker() exists, because the latter is
+ dnl # a much more recent addition, and the macro test for the
+ dnl # var-arg version only works if the callback is split
+ AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1,
+ [cs->count_objects exists])
],[
AC_MSG_RESULT(no)
-
dnl #
- dnl # 3.12 API change,
- dnl # cs->shrink() is logically split in to
- dnl # cs->count_objects() and cs->scan_objects()
+ dnl # 3.0 - 3.11 API change
+ dnl # cs->shrink(struct shrinker *, struct shrink_control *sc)
dnl #
- AC_MSG_CHECKING([whether cs->count_objects callback exists])
- ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control_split], [
+ AC_MSG_CHECKING([whether new 2-argument shrinker exists])
+ ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [
AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1,
- [cs->count_objects exists])
+ AC_DEFINE(HAVE_SINGLE_SHRINKER_CALLBACK, 1,
+ [new shrinker callback wants 2 args])
],[
- ZFS_LINUX_TEST_ERROR([shrinker])
+ AC_MSG_RESULT(no)
+
+ dnl #
+ dnl # 3.12 API change,
+ dnl # cs->shrink() is logically split in to
+ dnl # cs->count_objects() and cs->scan_objects()
+ dnl #
+ AC_MSG_CHECKING([if cs->count_objects callback exists])
+ ZFS_LINUX_TEST_RESULT(
+ [shrinker_cb_shrink_control_split],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1,
+ [cs->count_objects exists])
+ ],[
+ ZFS_LINUX_TEST_ERROR([shrinker])
+ ])
])
])
])
@@ -141,6 +177,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [
ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID
ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK
ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT
+ ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG
])
AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [
diff --git a/sys/contrib/openzfs/config/kernel-xattr-handler.m4 b/sys/contrib/openzfs/config/kernel-xattr-handler.m4
index 00b1e74a9ccb..b6cbfa155007 100644
--- a/sys/contrib/openzfs/config/kernel-xattr-handler.m4
+++ b/sys/contrib/openzfs/config/kernel-xattr-handler.m4
@@ -100,6 +100,19 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [
.get = get,
};
],[])
+
+ ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode_flags], [
+ #include <linux/xattr.h>
+
+ int get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer,
+ size_t size, int flags) { return 0; }
+ static const struct xattr_handler
+ xops __attribute__ ((unused)) = {
+ .get = get,
+ };
+ ],[])
])
AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [
@@ -142,7 +155,21 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [
AC_DEFINE(HAVE_XATTR_GET_DENTRY, 1,
[xattr_handler->get() wants dentry])
],[
- ZFS_LINUX_TEST_ERROR([xattr get()])
+ dnl #
+ dnl # Android API change,
+ dnl # The xattr_handler->get() callback was
+ dnl # changed to take dentry, inode and flags.
+ dnl #
+ AC_MSG_RESULT(no)
+ AC_MSG_CHECKING(
+ [whether xattr_handler->get() wants dentry and inode and flags])
+ ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode_flags], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE_FLAGS, 1,
+ [xattr_handler->get() wants dentry and inode and flags])
+ ],[
+ ZFS_LINUX_TEST_ERROR([xattr get()])
+ ])
])
])
])
diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4
index c60eb013552e..bd8e3ac80201 100644
--- a/sys/contrib/openzfs/config/zfs-build.m4
+++ b/sys/contrib/openzfs/config/zfs-build.m4
@@ -209,8 +209,8 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
AX_COUNT_CPUS([])
AC_SUBST(CPU_COUNT)
- ZFS_AC_CONFIG_ALWAYS_CC_NO_UNUSED_BUT_SET_VARIABLE
- ZFS_AC_CONFIG_ALWAYS_CC_NO_BOOL_COMPARE
+ ZFS_AC_CONFIG_ALWAYS_CC_NO_CLOBBERED
+ ZFS_AC_CONFIG_ALWAYS_CC_INFINITE_RECURSION
ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH
ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN
ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION
@@ -226,6 +226,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
ZFS_AC_CONFIG_ALWAYS_SED
ZFS_AC_CONFIG_ALWAYS_CPPCHECK
ZFS_AC_CONFIG_ALWAYS_SHELLCHECK
+ ZFS_AC_CONFIG_ALWAYS_PARALLEL
])
AC_DEFUN([ZFS_AC_CONFIG], [
diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in
index afdba2c9d194..28a4edc844ad 100644
--- a/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in
+++ b/sys/contrib/openzfs/contrib/dracut/90zfs/zfs-snapshot-bootfs.service.in
@@ -8,5 +8,5 @@ ConditionKernelCommandLine=bootfs.snapshot
[Service]
Type=oneshot
-ExecStart=/bin/sh -c '. /lib/dracut-zfs-lib.sh; decode_root_args || exit; [ "$root" = "zfs:AUTO" ] && root="$BOOTFS" SNAPNAME="$(getarg bootfs.snapshot)"; exec @sbindir@/zfs snapshot "$root@${SNAPNAME:-%v}"'
+-ExecStart=/bin/sh -c '. /lib/dracut-zfs-lib.sh; decode_root_args || exit; [ "$root" = "zfs:AUTO" ] && root="$BOOTFS" SNAPNAME="$(getarg bootfs.snapshot)"; exec @sbindir@/zfs snapshot "$root@${SNAPNAME:-%v}"'
RemainAfterExit=yes
diff --git a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
index 2dfed224c29d..32402aec2157 100644
--- a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
+++ b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
@@ -104,7 +104,7 @@ zfs_errno = enum_with_offset(1024, [
)
# compat before we used the enum helper for these values
ZFS_ERR_CHECKPOINT_EXISTS = zfs_errno.ZFS_ERR_CHECKPOINT_EXISTS
-assert(ZFS_ERR_CHECKPOINT_EXISTS == 1024)
+assert (ZFS_ERR_CHECKPOINT_EXISTS == 1024)
ZFS_ERR_DISCARDING_CHECKPOINT = zfs_errno.ZFS_ERR_DISCARDING_CHECKPOINT
ZFS_ERR_NO_CHECKPOINT = zfs_errno.ZFS_ERR_NO_CHECKPOINT
ZFS_ERR_DEVRM_IN_PROGRESS = zfs_errno.ZFS_ERR_DEVRM_IN_PROGRESS
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h
index 5695abee7b85..46ea2d15ac6e 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h
@@ -52,7 +52,7 @@
#define ZFS_MODULE_PARAM_CALL_IMPL(parent, name, perm, args, desc) \
SYSCTL_DECL(parent); \
- SYSCTL_PROC(parent, OID_AUTO, name, perm | args, desc)
+ SYSCTL_PROC(parent, OID_AUTO, name, CTLFLAG_MPSAFE | perm | args, desc)
#define ZFS_MODULE_PARAM_CALL(scope_prefix, name_prefix, name, func, _, perm, desc) \
ZFS_MODULE_PARAM_CALL_IMPL(_vfs_ ## scope_prefix, name, perm, func ## _args(name_prefix ## name), desc)
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/xattr_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/xattr_compat.h
index 54690727eab9..30403fe87397 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/xattr_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/xattr_compat.h
@@ -115,6 +115,20 @@ fn(struct dentry *dentry, const char *name, void *buffer, size_t size, \
{ \
return (__ ## fn(dentry->d_inode, name, buffer, size)); \
}
+/*
+ * Android API change,
+ * The xattr_handler->get() callback was changed to take a dentry and inode
+ * and flags, because the dentry might not be attached to an inode yet.
+ */
+#elif defined(HAVE_XATTR_GET_DENTRY_INODE_FLAGS)
+#define ZPL_XATTR_GET_WRAPPER(fn) \
+static int \
+fn(const struct xattr_handler *handler, struct dentry *dentry, \
+ struct inode *inode, const char *name, void *buffer, \
+ size_t size, int flags) \
+{ \
+ return (__ ## fn(inode, name, buffer, size)); \
+}
#else
#error "Unsupported kernel"
#endif
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/shrinker.h b/sys/contrib/openzfs/include/os/linux/spl/sys/shrinker.h
index e5b7a9c955dd..d472754be4f4 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/shrinker.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/shrinker.h
@@ -64,7 +64,11 @@
* }
*/
+#ifdef HAVE_REGISTER_SHRINKER_VARARG
+#define spl_register_shrinker(x) register_shrinker(x, "zfs-arc-shrinker")
+#else
#define spl_register_shrinker(x) register_shrinker(x)
+#endif
#define spl_unregister_shrinker(x) unregister_shrinker(x)
/*
diff --git a/sys/contrib/openzfs/include/sys/abd.h b/sys/contrib/openzfs/include/sys/abd.h
index 6903e0c0e713..5c6bd0c271d4 100644
--- a/sys/contrib/openzfs/include/sys/abd.h
+++ b/sys/contrib/openzfs/include/sys/abd.h
@@ -91,6 +91,7 @@ abd_t *abd_alloc_linear(size_t, boolean_t);
abd_t *abd_alloc_gang(void);
abd_t *abd_alloc_for_io(size_t, boolean_t);
abd_t *abd_alloc_sametype(abd_t *, size_t);
+boolean_t abd_size_alloc_linear(size_t);
void abd_gang_add(abd_t *, abd_t *, boolean_t);
void abd_free(abd_t *);
abd_t *abd_get_offset(abd_t *, size_t);
diff --git a/sys/contrib/openzfs/include/sys/abd_impl.h b/sys/contrib/openzfs/include/sys/abd_impl.h
index 113700cd72b1..e96f1edfc8ce 100644
--- a/sys/contrib/openzfs/include/sys/abd_impl.h
+++ b/sys/contrib/openzfs/include/sys/abd_impl.h
@@ -68,7 +68,6 @@ abd_t *abd_get_offset_scatter(abd_t *, abd_t *, size_t, size_t);
void abd_free_struct_impl(abd_t *);
void abd_alloc_chunks(abd_t *, size_t);
void abd_free_chunks(abd_t *);
-boolean_t abd_size_alloc_linear(size_t);
void abd_update_scatter_stats(abd_t *, abd_stats_op_t);
void abd_update_linear_stats(abd_t *, abd_stats_op_t);
void abd_verify_scatter(abd_t *);
diff --git a/sys/contrib/openzfs/include/sys/arc.h b/sys/contrib/openzfs/include/sys/arc.h
index a3241f3685a6..5d8176894e60 100644
--- a/sys/contrib/openzfs/include/sys/arc.h
+++ b/sys/contrib/openzfs/include/sys/arc.h
@@ -85,6 +85,7 @@ typedef void arc_prune_func_t(int64_t bytes, void *priv);
/* Shared module parameters */
extern int zfs_arc_average_blocksize;
+extern int l2arc_exclude_special;
/* generic arc_done_func_t's which you can use */
arc_read_done_func_t arc_bcopy_func;
diff --git a/sys/contrib/openzfs/include/sys/bqueue.h b/sys/contrib/openzfs/include/sys/bqueue.h
index 797aecd791a3..b9621966027a 100644
--- a/sys/contrib/openzfs/include/sys/bqueue.h
+++ b/sys/contrib/openzfs/include/sys/bqueue.h
@@ -30,22 +30,22 @@ typedef struct bqueue {
kmutex_t bq_lock;
kcondvar_t bq_add_cv;
kcondvar_t bq_pop_cv;
- uint64_t bq_size;
- uint64_t bq_maxsize;
- uint64_t bq_fill_fraction;
+ size_t bq_size;
+ size_t bq_maxsize;
+ uint_t bq_fill_fraction;
size_t bq_node_offset;
} bqueue_t;
typedef struct bqueue_node {
list_node_t bqn_node;
- uint64_t bqn_size;
+ size_t bqn_size;
} bqueue_node_t;
-int bqueue_init(bqueue_t *, uint64_t, uint64_t, size_t);
+int bqueue_init(bqueue_t *, uint_t, size_t, size_t);
void bqueue_destroy(bqueue_t *);
-void bqueue_enqueue(bqueue_t *, void *, uint64_t);
-void bqueue_enqueue_flush(bqueue_t *, void *, uint64_t);
+void bqueue_enqueue(bqueue_t *, void *, size_t);
+void bqueue_enqueue_flush(bqueue_t *, void *, size_t);
void *bqueue_dequeue(bqueue_t *);
boolean_t bqueue_empty(bqueue_t *);
diff --git a/sys/contrib/openzfs/include/sys/btree.h b/sys/contrib/openzfs/include/sys/btree.h
index 3b53476c7c68..a901d654ef1c 100644
--- a/sys/contrib/openzfs/include/sys/btree.h
+++ b/sys/contrib/openzfs/include/sys/btree.h
@@ -72,7 +72,11 @@ extern kmem_cache_t *zfs_btree_leaf_cache;
typedef struct zfs_btree_hdr {
struct zfs_btree_core *bth_parent;
- boolean_t bth_core;
+ /*
+ * Set to -1 to indicate core nodes. Other values represent first
+ * valid element offset for leaf nodes.
+ */
+ uint32_t bth_first;
/*
* For both leaf and core nodes, represents the number of elements in
* the node. For core nodes, they will have bth_count + 1 children.
@@ -91,9 +95,12 @@ typedef struct zfs_btree_leaf {
uint8_t btl_elems[];
} zfs_btree_leaf_t;
+#define BTREE_LEAF_ESIZE (BTREE_LEAF_SIZE - \
+ offsetof(zfs_btree_leaf_t, btl_elems))
+
typedef struct zfs_btree_index {
zfs_btree_hdr_t *bti_node;
- uint64_t bti_offset;
+ uint32_t bti_offset;
/*
* True if the location is before the list offset, false if it's at
* the listed offset.
@@ -105,6 +112,7 @@ typedef struct btree {
zfs_btree_hdr_t *bt_root;
int64_t bt_height;
size_t bt_elem_size;
+ uint32_t bt_leaf_cap;
uint64_t bt_num_elems;
uint64_t bt_num_nodes;
zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
diff --git a/sys/contrib/openzfs/include/sys/crypto/icp.h b/sys/contrib/openzfs/include/sys/crypto/icp.h
index 4609e3a1dae7..f8fd285a1ed9 100644
--- a/sys/contrib/openzfs/include/sys/crypto/icp.h
+++ b/sys/contrib/openzfs/include/sys/crypto/icp.h
@@ -32,9 +32,6 @@ int aes_mod_fini(void);
int edonr_mod_init(void);
int edonr_mod_fini(void);
-int sha1_mod_init(void);
-int sha1_mod_fini(void);
-
int sha2_mod_init(void);
int sha2_mod_fini(void);
diff --git a/sys/contrib/openzfs/include/sys/dbuf.h b/sys/contrib/openzfs/include/sys/dbuf.h
index e7289c0fe1aa..b757b2664178 100644
--- a/sys/contrib/openzfs/include/sys/dbuf.h
+++ b/sys/contrib/openzfs/include/sys/dbuf.h
@@ -321,15 +321,16 @@ typedef struct dmu_buf_impl {
uint8_t db_dirtycnt;
} dmu_buf_impl_t;
-#define DBUF_RWLOCKS 8192
-#define DBUF_HASH_RWLOCK(h, idx) (&(h)->hash_rwlocks[(idx) & (DBUF_RWLOCKS-1)])
+/* Note: the dbuf hash table is exposed only for the mdb module */
+#define DBUF_MUTEXES 2048
+#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
typedef struct dbuf_hash_table {
uint64_t hash_table_mask;
dmu_buf_impl_t **hash_table;
- krwlock_t hash_rwlocks[DBUF_RWLOCKS] ____cacheline_aligned;
+ kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned;
} dbuf_hash_table_t;
-typedef void (*dbuf_prefetch_fn)(void *, boolean_t);
+typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t);
uint64_t dbuf_whichblock(const struct dnode *di, const int64_t level,
const uint64_t offset);
@@ -441,16 +442,7 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
(dbuf_is_metadata(_db) && \
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
-#define DBUF_IS_L2CACHEABLE(_db) \
- ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
- (dbuf_is_metadata(_db) && \
- ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
-
-#define DNODE_LEVEL_IS_L2CACHEABLE(_dn, _level) \
- ((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_ALL || \
- (((_level) > 0 || \
- DMU_OT_IS_METADATA((_dn)->dn_handle->dnh_dnode->dn_type)) && \
- ((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
+boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db);
#ifdef ZFS_DEBUG
diff --git a/sys/contrib/openzfs/include/sys/dmu.h b/sys/contrib/openzfs/include/sys/dmu.h
index 10e29a45c89f..070d27fde3a9 100644
--- a/sys/contrib/openzfs/include/sys/dmu.h
+++ b/sys/contrib/openzfs/include/sys/dmu.h
@@ -136,7 +136,7 @@ typedef enum dmu_object_byteswap {
#endif
#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
- ((ot) & DMU_OT_METADATA) : \
+ (((ot) & DMU_OT_METADATA) != 0) : \
DMU_OT_IS_METADATA_IMPL(ot))
#define DMU_OT_IS_DDT(ot) \
@@ -147,7 +147,7 @@ typedef enum dmu_object_byteswap {
((ot) == DMU_OT_PLAIN_FILE_CONTENTS || (ot) == DMU_OT_UINT64_OTHER)
#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
- ((ot) & DMU_OT_ENCRYPTED) : \
+ (((ot) & DMU_OT_ENCRYPTED) != 0) : \
DMU_OT_IS_ENCRYPTED_IMPL(ot))
/*
@@ -1067,6 +1067,8 @@ int dmu_diff(const char *tosnap_name, const char *fromsnap_name,
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
extern uint64_t zfs_crc64_table[256];
+extern int dmu_prefetch_max;
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/contrib/openzfs/include/sys/dmu_objset.h b/sys/contrib/openzfs/include/sys/dmu_objset.h
index e89ee64ea686..7ade2dc91247 100644
--- a/sys/contrib/openzfs/include/sys/dmu_objset.h
+++ b/sys/contrib/openzfs/include/sys/dmu_objset.h
@@ -200,10 +200,6 @@ struct objset {
#define DMU_GROUPUSED_DNODE(os) ((os)->os_groupused_dnode.dnh_dnode)
#define DMU_PROJECTUSED_DNODE(os) ((os)->os_projectused_dnode.dnh_dnode)
-#define DMU_OS_IS_L2CACHEABLE(os) \
- ((os)->os_secondary_cache == ZFS_CACHE_ALL || \
- (os)->os_secondary_cache == ZFS_CACHE_METADATA)
-
/* called from zpl */
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
diff --git a/sys/contrib/openzfs/include/sys/dmu_tx.h b/sys/contrib/openzfs/include/sys/dmu_tx.h
index 60e9ed6e26f5..ad3f1b0e47ca 100644
--- a/sys/contrib/openzfs/include/sys/dmu_tx.h
+++ b/sys/contrib/openzfs/include/sys/dmu_tx.h
@@ -125,6 +125,7 @@ typedef struct dmu_tx_stats {
kstat_named_t dmu_tx_dirty_delay;
kstat_named_t dmu_tx_dirty_over_max;
kstat_named_t dmu_tx_dirty_frees_delay;
+ kstat_named_t dmu_tx_wrlog_delay;
kstat_named_t dmu_tx_quota;
} dmu_tx_stats_t;
diff --git a/sys/contrib/openzfs/include/sys/dmu_zfetch.h b/sys/contrib/openzfs/include/sys/dmu_zfetch.h
index 4c220b0c79e5..cd1b79eb8e44 100644
--- a/sys/contrib/openzfs/include/sys/dmu_zfetch.h
+++ b/sys/contrib/openzfs/include/sys/dmu_zfetch.h
@@ -49,20 +49,18 @@ typedef struct zfetch {
typedef struct zstream {
uint64_t zs_blkid; /* expect next access at this blkid */
- uint64_t zs_pf_blkid1; /* first block to prefetch */
- uint64_t zs_pf_blkid; /* block to prefetch up to */
-
- /*
- * We will next prefetch the L1 indirect block of this level-0
- * block id.
- */
- uint64_t zs_ipf_blkid1; /* first block to prefetch */
- uint64_t zs_ipf_blkid; /* block to prefetch up to */
+ unsigned int zs_pf_dist; /* data prefetch distance in bytes */
+ unsigned int zs_ipf_dist; /* L1 prefetch distance in bytes */
+ uint64_t zs_pf_start; /* first data block to prefetch */
+ uint64_t zs_pf_end; /* data block to prefetch up to */
+ uint64_t zs_ipf_start; /* first data block to prefetch L1 */
+ uint64_t zs_ipf_end; /* data block to prefetch L1 up to */
list_node_t zs_node; /* link for zf_stream */
hrtime_t zs_atime; /* time last prefetch issued */
zfetch_t *zs_fetch; /* parent fetch */
boolean_t zs_missed; /* stream saw cache misses */
+ boolean_t zs_more; /* need more distant prefetch */
zfs_refcount_t zs_callers; /* number of pending callers */
/*
* Number of stream references: dnode, callers and pending blocks.
diff --git a/sys/contrib/openzfs/include/sys/dsl_pool.h b/sys/contrib/openzfs/include/sys/dsl_pool.h
index 58fcae65db5a..e93bd0557c1e 100644
--- a/sys/contrib/openzfs/include/sys/dsl_pool.h
+++ b/sys/contrib/openzfs/include/sys/dsl_pool.h
@@ -40,6 +40,7 @@
#include <sys/rrwlock.h>
#include <sys/dsl_synctask.h>
#include <sys/mmp.h>
+#include <sys/aggsum.h>
#ifdef __cplusplus
extern "C" {
@@ -58,6 +59,7 @@ struct dsl_deadlist;
extern unsigned long zfs_dirty_data_max;
extern unsigned long zfs_dirty_data_max_max;
+extern unsigned long zfs_wrlog_data_max;
extern int zfs_dirty_data_sync_percent;
extern int zfs_dirty_data_max_percent;
extern int zfs_dirty_data_max_max_percent;
@@ -82,7 +84,6 @@ typedef struct zfs_blkstat {
typedef struct zfs_all_blkstats {
zfs_blkstat_t zab_type[DN_MAX_LEVELS + 1][DMU_OT_TOTAL + 1];
- kmutex_t zab_lock;
} zfs_all_blkstats_t;
@@ -119,6 +120,9 @@ typedef struct dsl_pool {
uint64_t dp_mos_compressed_delta;
uint64_t dp_mos_uncompressed_delta;
+ aggsum_t dp_wrlog_pertxg[TXG_SIZE];
+ aggsum_t dp_wrlog_total;
+
/*
* Time of most recently scheduled (furthest in the future)
* wakeup for delayed transactions.
@@ -159,6 +163,8 @@ uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy);
uint64_t dsl_pool_unreserved_space(dsl_pool_t *dp,
zfs_space_check_t slop_policy);
uint64_t dsl_pool_deferred_space(dsl_pool_t *dp);
+void dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg);
+boolean_t dsl_pool_need_wrlog_delay(dsl_pool_t *dp);
void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg);
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
diff --git a/sys/contrib/openzfs/include/sys/dsl_scan.h b/sys/contrib/openzfs/include/sys/dsl_scan.h
index fb1f1d65bad4..d716510f879d 100644
--- a/sys/contrib/openzfs/include/sys/dsl_scan.h
+++ b/sys/contrib/openzfs/include/sys/dsl_scan.h
@@ -155,7 +155,7 @@ typedef struct dsl_scan {
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
dsl_scan_phys_t scn_phys_cached;
avl_tree_t scn_queue; /* queue of datasets to scan */
- uint64_t scn_bytes_pending; /* outstanding data to issue */
+ uint64_t scn_queues_pending; /* outstanding data to issue */
} dsl_scan_t;
typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h
index 71d736d5cc97..df1cc060130a 100644
--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
@@ -757,6 +757,7 @@ typedef struct zpool_load_policy {
/* Rewind data discovered */
#define ZPOOL_CONFIG_LOAD_TIME "rewind_txg_ts"
+#define ZPOOL_CONFIG_LOAD_META_ERRORS "verify_meta_errors"
#define ZPOOL_CONFIG_LOAD_DATA_ERRORS "verify_data_errors"
#define ZPOOL_CONFIG_REWIND_TIME "seconds_of_rewind"
@@ -1101,6 +1102,7 @@ typedef struct vdev_stat {
uint64_t vs_configured_ashift; /* TLV vdev_ashift */
uint64_t vs_logical_ashift; /* vdev_logical_ashift */
uint64_t vs_physical_ashift; /* vdev_physical_ashift */
+ uint64_t vs_pspace; /* physical capacity */
} vdev_stat_t;
/* BEGIN CSTYLED */
@@ -1613,6 +1615,47 @@ typedef enum {
#define ZFS_EV_HIST_DSID "history_dsid"
#define ZFS_EV_RESILVER_TYPE "resilver_type"
+
+/*
+ * We currently support block sizes from 512 bytes to 16MB.
+ * The benefits of larger blocks, and thus larger IO, need to be weighed
+ * against the cost of COWing a giant block to modify one byte, and the
+ * large latency of reading or writing a large block.
+ *
+ * The recordsize property can not be set larger than zfs_max_recordsize
+ * (default 16MB on 64-bit and 1MB on 32-bit). See the comment near
+ * zfs_max_recordsize in dsl_dataset.c for details.
+ *
+ * Note that although the LSIZE field of the blkptr_t can store sizes up
+ * to 32MB, the dnode's dn_datablkszsec can only store sizes up to
+ * 32MB - 512 bytes. Therefore, we limit SPA_MAXBLOCKSIZE to 16MB.
+ */
+#define SPA_MINBLOCKSHIFT 9
+#define SPA_OLD_MAXBLOCKSHIFT 17
+#define SPA_MAXBLOCKSHIFT 24
+#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT)
+#define SPA_OLD_MAXBLOCKSIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT)
+#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
+
+
+/* supported encryption algorithms */
+enum zio_encrypt {
+ ZIO_CRYPT_INHERIT = 0,
+ ZIO_CRYPT_ON,
+ ZIO_CRYPT_OFF,
+ ZIO_CRYPT_AES_128_CCM,
+ ZIO_CRYPT_AES_192_CCM,
+ ZIO_CRYPT_AES_256_CCM,
+ ZIO_CRYPT_AES_128_GCM,
+ ZIO_CRYPT_AES_192_GCM,
+ ZIO_CRYPT_AES_256_GCM,
+ ZIO_CRYPT_FUNCTIONS
+};
+
+#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_GCM
+#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF
+
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/contrib/openzfs/include/sys/metaslab.h b/sys/contrib/openzfs/include/sys/metaslab.h
index ecff65f13de5..2b4f7243bbbd 100644
--- a/sys/contrib/openzfs/include/sys/metaslab.h
+++ b/sys/contrib/openzfs/include/sys/metaslab.h
@@ -49,11 +49,14 @@ int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t,
metaslab_t **);
void metaslab_fini(metaslab_t *);
+void metaslab_set_unflushed_dirty(metaslab_t *, boolean_t);
void metaslab_set_unflushed_txg(metaslab_t *, uint64_t, dmu_tx_t *);
void metaslab_set_estimated_condensed_size(metaslab_t *, uint64_t, dmu_tx_t *);
+boolean_t metaslab_unflushed_dirty(metaslab_t *);
uint64_t metaslab_unflushed_txg(metaslab_t *);
uint64_t metaslab_estimated_condensed_size(metaslab_t *);
int metaslab_sort_by_flushed(const void *, const void *);
+void metaslab_unflushed_bump(metaslab_t *, dmu_tx_t *, boolean_t);
uint64_t metaslab_unflushed_changes_memused(metaslab_t *);
int metaslab_load(metaslab_t *);
diff --git a/sys/contrib/openzfs/include/sys/metaslab_impl.h b/sys/contrib/openzfs/include/sys/metaslab_impl.h
index adf4c03a20db..904249cf4ad5 100644
--- a/sys/contrib/openzfs/include/sys/metaslab_impl.h
+++ b/sys/contrib/openzfs/include/sys/metaslab_impl.h
@@ -553,6 +553,7 @@ struct metaslab {
* log space maps.
*/
uint64_t ms_unflushed_txg;
+ boolean_t ms_unflushed_dirty;
/* updated every time we are done syncing the metaslab's space map */
uint64_t ms_synced_length;
diff --git a/sys/contrib/openzfs/include/sys/range_tree.h b/sys/contrib/openzfs/include/sys/range_tree.h
index fef3d4d7bd21..daa39e20dbd6 100644
--- a/sys/contrib/openzfs/include/sys/range_tree.h
+++ b/sys/contrib/openzfs/include/sys/range_tree.h
@@ -63,12 +63,8 @@ typedef struct range_tree {
*/
uint8_t rt_shift;
uint64_t rt_start;
- range_tree_ops_t *rt_ops;
-
- /* rt_btree_compare should only be set if rt_arg is a b-tree */
+ const range_tree_ops_t *rt_ops;
void *rt_arg;
- int (*rt_btree_compare)(const void *, const void *);
-
uint64_t rt_gap; /* allowable inter-segment gap */
/*
@@ -278,11 +274,11 @@ rs_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
typedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size);
-range_tree_t *range_tree_create_impl(range_tree_ops_t *ops,
+range_tree_t *range_tree_create_gap(const range_tree_ops_t *ops,
range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
- int (*zfs_btree_compare) (const void *, const void *), uint64_t gap);
-range_tree_t *range_tree_create(range_tree_ops_t *ops, range_seg_type_t type,
- void *arg, uint64_t start, uint64_t shift);
+ uint64_t gap);
+range_tree_t *range_tree_create(const range_tree_ops_t *ops,
+ range_seg_type_t type, void *arg, uint64_t start, uint64_t shift);
void range_tree_destroy(range_tree_t *rt);
boolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size);
range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size);
@@ -316,13 +312,6 @@ void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
range_tree_t *addto);
-void rt_btree_create(range_tree_t *rt, void *arg);
-void rt_btree_destroy(range_tree_t *rt, void *arg);
-void rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg);
-void rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg);
-void rt_btree_vacate(range_tree_t *rt, void *arg);
-extern range_tree_ops_t rt_btree_ops;
-
#ifdef __cplusplus
}
#endif
diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h
index f168015abffc..67724a68f0e8 100644
--- a/sys/contrib/openzfs/include/sys/spa.h
+++ b/sys/contrib/openzfs/include/sys/spa.h
@@ -73,27 +73,6 @@ struct dsl_dataset;
struct dsl_crypto_params;
/*
- * We currently support block sizes from 512 bytes to 16MB.
- * The benefits of larger blocks, and thus larger IO, need to be weighed
- * against the cost of COWing a giant block to modify one byte, and the
- * large latency of reading or writing a large block.
- *
- * Note that although blocks up to 16MB are supported, the recordsize
- * property can not be set larger than zfs_max_recordsize (default 1MB).
- * See the comment near zfs_max_recordsize in dsl_dataset.c for details.
- *
- * Note that although the LSIZE field of the blkptr_t can store sizes up
- * to 32MB, the dnode's dn_datablkszsec can only store sizes up to
- * 32MB - 512 bytes. Therefore, we limit SPA_MAXBLOCKSIZE to 16MB.
- */
-#define SPA_MINBLOCKSHIFT 9
-#define SPA_OLD_MAXBLOCKSHIFT 17
-#define SPA_MAXBLOCKSHIFT 24
-#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT)
-#define SPA_OLD_MAXBLOCKSIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT)
-#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
-
-/*
* Alignment Shift (ashift) is an immutable, internal top-level vdev property
* which can only be set at vdev creation time. Physical writes are always done
* according to it, which makes 2^ashift the smallest possible IO on a vdev.
diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h
index c8987e2e67e9..9714bbce9c9d 100644
--- a/sys/contrib/openzfs/include/sys/spa_impl.h
+++ b/sys/contrib/openzfs/include/sys/spa_impl.h
@@ -146,9 +146,9 @@ typedef struct spa_config_lock {
kmutex_t scl_lock;
kthread_t *scl_writer;
int scl_write_wanted;
+ int scl_count;
kcondvar_t scl_cv;
- zfs_refcount_t scl_count;
-} spa_config_lock_t;
+} ____cacheline_aligned spa_config_lock_t;
typedef struct spa_config_dirent {
list_node_t scd_link;
diff --git a/sys/contrib/openzfs/include/sys/spa_log_spacemap.h b/sys/contrib/openzfs/include/sys/spa_log_spacemap.h
index b2ed77fac3e4..72229df6cd16 100644
--- a/sys/contrib/openzfs/include/sys/spa_log_spacemap.h
+++ b/sys/contrib/openzfs/include/sys/spa_log_spacemap.h
@@ -30,7 +30,10 @@
typedef struct log_summary_entry {
uint64_t lse_start; /* start TXG */
+ uint64_t lse_end; /* last TXG */
+ uint64_t lse_txgcount; /* # of TXGs */
uint64_t lse_mscount; /* # of metaslabs needed to be flushed */
+ uint64_t lse_msdcount; /* # of dirty metaslabs needed to be flushed */
uint64_t lse_blkcount; /* blocks held by this entry */
list_node_t lse_node;
} log_summary_entry_t;
@@ -50,6 +53,7 @@ typedef struct spa_log_sm {
uint64_t sls_nblocks; /* number of blocks in this log */
uint64_t sls_mscount; /* # of metaslabs flushed in the log's txg */
avl_node_t sls_node; /* node in spa_sm_logs_by_txg */
+ space_map_t *sls_sm; /* space map pointer, if open */
} spa_log_sm_t;
int spa_ld_log_spacemaps(spa_t *);
@@ -68,8 +72,9 @@ uint64_t spa_log_sm_memused(spa_t *);
void spa_log_sm_decrement_mscount(spa_t *, uint64_t);
void spa_log_sm_increment_current_mscount(spa_t *);
-void spa_log_summary_add_flushed_metaslab(spa_t *);
-void spa_log_summary_decrement_mscount(spa_t *, uint64_t);
+void spa_log_summary_add_flushed_metaslab(spa_t *, boolean_t);
+void spa_log_summary_dirty_flushed_metaslab(spa_t *, uint64_t);
+void spa_log_summary_decrement_mscount(spa_t *, uint64_t, boolean_t);
void spa_log_summary_decrement_blkcount(spa_t *, uint64_t);
boolean_t spa_flush_all_logs_requested(spa_t *);
diff --git a/sys/contrib/openzfs/include/sys/sysevent/dev.h b/sys/contrib/openzfs/include/sys/sysevent/dev.h
index 1117538d822d..2418bbad469d 100644
--- a/sys/contrib/openzfs/include/sys/sysevent/dev.h
+++ b/sys/contrib/openzfs/include/sys/sysevent/dev.h
@@ -244,6 +244,9 @@ extern "C" {
#define DEV_PATH "path"
#define DEV_IS_PART "is_slice"
#define DEV_SIZE "dev_size"
+
+/* Size of the whole parent block device (if dev is a partition) */
+#define DEV_PARENT_SIZE "dev_parent_size"
#endif /* __linux__ */
#define EV_V1 1
diff --git a/sys/contrib/openzfs/include/sys/txg.h b/sys/contrib/openzfs/include/sys/txg.h
index 22158bd1a5e6..f38f0006c040 100644
--- a/sys/contrib/openzfs/include/sys/txg.h
+++ b/sys/contrib/openzfs/include/sys/txg.h
@@ -78,7 +78,7 @@ extern void txg_register_callbacks(txg_handle_t *txghp, list_t *tx_callbacks);
extern void txg_delay(struct dsl_pool *dp, uint64_t txg, hrtime_t delta,
hrtime_t resolution);
-extern void txg_kick(struct dsl_pool *dp);
+extern void txg_kick(struct dsl_pool *dp, uint64_t txg);
/*
* Wait until the given transaction group has finished syncing.
diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h
index 3cfde40a77fe..da846d8504fe 100644
--- a/sys/contrib/openzfs/include/sys/vdev_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_impl.h
@@ -642,6 +642,7 @@ extern int vdev_obsolete_counts_are_precise(vdev_t *vd, boolean_t *are_precise);
*/
int vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj);
void vdev_metaslab_group_create(vdev_t *vd);
+uint64_t vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b);
/*
* Vdev ashift optimization tunables
diff --git a/sys/contrib/openzfs/include/sys/zil.h b/sys/contrib/openzfs/include/sys/zil.h
index cefbccb32f22..a43823b605de 100644
--- a/sys/contrib/openzfs/include/sys/zil.h
+++ b/sys/contrib/openzfs/include/sys/zil.h
@@ -222,6 +222,15 @@ typedef struct {
} lr_ooo_t;
/*
+ * Additional lr_attr_t fields.
+ */
+typedef struct {
+ uint64_t lr_attr_attrs; /* all of the attributes */
+ uint64_t lr_attr_crtime[2]; /* create time */
+ uint8_t lr_attr_scanstamp[32];
+} lr_attr_end_t;
+
+/*
* Handle option extended vattr attributes.
*
* Whenever new attributes are added the version number
@@ -231,7 +240,7 @@ typedef struct {
typedef struct {
uint32_t lr_attr_masksize; /* number of elements in array */
uint32_t lr_attr_bitmap; /* First entry of array */
- /* remainder of array and any additional fields */
+ /* remainder of array and additional lr_attr_end_t fields */
} lr_attr_t;
/*
diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h
index e46455ea990b..39de5175b7db 100644
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@@ -108,23 +108,6 @@ enum zio_checksum {
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
-/* supported encryption algorithms */
-enum zio_encrypt {
- ZIO_CRYPT_INHERIT = 0,
- ZIO_CRYPT_ON,
- ZIO_CRYPT_OFF,
- ZIO_CRYPT_AES_128_CCM,
- ZIO_CRYPT_AES_192_CCM,
- ZIO_CRYPT_AES_256_CCM,
- ZIO_CRYPT_AES_128_GCM,
- ZIO_CRYPT_AES_192_GCM,
- ZIO_CRYPT_AES_256_GCM,
- ZIO_CRYPT_FUNCTIONS
-};
-
-#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_GCM
-#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF
-
/* macros defining encryption lengths */
#define ZIO_OBJSET_MAC_LEN 32
#define ZIO_DATA_IV_LEN 12
@@ -699,6 +682,8 @@ extern void spa_handle_ignored_writes(spa_t *spa);
/* zbookmark_phys functions */
boolean_t zbookmark_subtree_completed(const struct dnode_phys *dnp,
const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block);
+boolean_t zbookmark_subtree_tbd(const struct dnode_phys *dnp,
+ const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block);
int zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2,
uint8_t ibs2, const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2);
diff --git a/sys/contrib/openzfs/lib/libavl/Makefile.am b/sys/contrib/openzfs/lib/libavl/Makefile.am
index 2e0a431c77fb..de8ba34d5ba0 100644
--- a/sys/contrib/openzfs/lib/libavl/Makefile.am
+++ b/sys/contrib/openzfs/lib/libavl/Makefile.am
@@ -5,6 +5,9 @@ VPATH = $(top_srcdir)/module/avl/
# Includes kernel code, generate warnings for large stack frames
AM_CFLAGS += $(FRAME_LARGER_THAN)
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
noinst_LTLIBRARIES = libavl.la
KERNEL_C = \
diff --git a/sys/contrib/openzfs/lib/libefi/Makefile.am b/sys/contrib/openzfs/lib/libefi/Makefile.am
index b26f7a6dcd5b..5f77ac480a9f 100644
--- a/sys/contrib/openzfs/lib/libefi/Makefile.am
+++ b/sys/contrib/openzfs/lib/libefi/Makefile.am
@@ -2,6 +2,9 @@ include $(top_srcdir)/config/Rules.am
AM_CFLAGS += $(LIBUUID_CFLAGS) $(ZLIB_CFLAGS)
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
noinst_LTLIBRARIES = libefi.la
USER_C = \
diff --git a/sys/contrib/openzfs/lib/libicp/Makefile.am b/sys/contrib/openzfs/lib/libicp/Makefile.am
index 0b87a988c07e..9a2510d0d222 100644
--- a/sys/contrib/openzfs/lib/libicp/Makefile.am
+++ b/sys/contrib/openzfs/lib/libicp/Makefile.am
@@ -6,6 +6,8 @@ VPATH = \
# Includes kernel code, generate warnings for large stack frames
AM_CFLAGS += $(FRAME_LARGER_THAN)
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
noinst_LTLIBRARIES = libicp.la
@@ -17,7 +19,6 @@ ASM_SOURCES_AS = \
asm-x86_64/modes/gcm_pclmulqdq.S \
asm-x86_64/modes/aesni-gcm-x86_64.S \
asm-x86_64/modes/ghash-x86_64.S \
- asm-x86_64/sha1/sha1-x86_64.S \
asm-x86_64/sha2/sha256_impl.S \
asm-x86_64/sha2/sha512_impl.S
else
@@ -46,7 +47,6 @@ KERNEL_C = \
algs/modes/ctr.c \
algs/modes/ccm.c \
algs/modes/ecb.c \
- algs/sha1/sha1.c \
algs/sha2/sha2.c \
algs/skein/skein.c \
algs/skein/skein_block.c \
@@ -54,7 +54,6 @@ KERNEL_C = \
illumos-crypto.c \
io/aes.c \
io/edonr_mod.c \
- io/sha1_mod.c \
io/sha2_mod.c \
io/skein_mod.c \
os/modhash.c \
diff --git a/sys/contrib/openzfs/lib/libnvpair/Makefile.am b/sys/contrib/openzfs/lib/libnvpair/Makefile.am
index a3e1fa307f7c..f9f1eb539239 100644
--- a/sys/contrib/openzfs/lib/libnvpair/Makefile.am
+++ b/sys/contrib/openzfs/lib/libnvpair/Makefile.am
@@ -8,6 +8,9 @@ VPATH = \
# and required CFLAGS for libtirpc
AM_CFLAGS += $(FRAME_LARGER_THAN) $(LIBTIRPC_CFLAGS)
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
lib_LTLIBRARIES = libnvpair.la
include $(top_srcdir)/config/Abigail.am
diff --git a/sys/contrib/openzfs/lib/libshare/Makefile.am b/sys/contrib/openzfs/lib/libshare/Makefile.am
index 7cef13c3da7c..0fce333506ae 100644
--- a/sys/contrib/openzfs/lib/libshare/Makefile.am
+++ b/sys/contrib/openzfs/lib/libshare/Makefile.am
@@ -2,6 +2,9 @@ include $(top_srcdir)/config/Rules.am
DEFAULT_INCLUDES += -I$(srcdir)
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
noinst_LTLIBRARIES = libshare.la
USER_C = \
diff --git a/sys/contrib/openzfs/lib/libspl/Makefile.am b/sys/contrib/openzfs/lib/libspl/Makefile.am
index 61432225a708..b59919bfb9e9 100644
--- a/sys/contrib/openzfs/lib/libspl/Makefile.am
+++ b/sys/contrib/openzfs/lib/libspl/Makefile.am
@@ -2,6 +2,9 @@ include $(top_srcdir)/config/Rules.am
SUBDIRS = include
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
noinst_LTLIBRARIES = libspl_assert.la libspl.la
libspl_assert_la_SOURCES = \
diff --git a/sys/contrib/openzfs/lib/libtpool/Makefile.am b/sys/contrib/openzfs/lib/libtpool/Makefile.am
index aa8bde32f963..ce9d03a67919 100644
--- a/sys/contrib/openzfs/lib/libtpool/Makefile.am
+++ b/sys/contrib/openzfs/lib/libtpool/Makefile.am
@@ -1,5 +1,11 @@
include $(top_srcdir)/config/Rules.am
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61118
+AM_CFLAGS += $(NO_CLOBBERED)
+
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
noinst_LTLIBRARIES = libtpool.la
USER_C = \
diff --git a/sys/contrib/openzfs/lib/libunicode/Makefile.am b/sys/contrib/openzfs/lib/libunicode/Makefile.am
index b82975f68efd..5b12b3e916f3 100644
--- a/sys/contrib/openzfs/lib/libunicode/Makefile.am
+++ b/sys/contrib/openzfs/lib/libunicode/Makefile.am
@@ -5,6 +5,9 @@ VPATH = $(top_srcdir)/module/unicode
# Includes kernel code, generate warnings for large stack frames
AM_CFLAGS += $(FRAME_LARGER_THAN)
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
noinst_LTLIBRARIES = libunicode.la
KERNEL_C = \
diff --git a/sys/contrib/openzfs/lib/libuutil/Makefile.am b/sys/contrib/openzfs/lib/libuutil/Makefile.am
index 16d5023451bb..05b7ed0db8cb 100644
--- a/sys/contrib/openzfs/lib/libuutil/Makefile.am
+++ b/sys/contrib/openzfs/lib/libuutil/Makefile.am
@@ -1,5 +1,8 @@
include $(top_srcdir)/config/Rules.am
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
lib_LTLIBRARIES = libuutil.la
include $(top_srcdir)/config/Abigail.am
diff --git a/sys/contrib/openzfs/lib/libzfs/Makefile.am b/sys/contrib/openzfs/lib/libzfs/Makefile.am
index 31267fd9a5e9..77e12b9e8d8a 100644
--- a/sys/contrib/openzfs/lib/libzfs/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzfs/Makefile.am
@@ -6,9 +6,11 @@ VPATH = \
$(top_srcdir)/lib/libzfs
# Suppress unused but set variable warnings often due to ASSERTs
-AM_CFLAGS += $(NO_UNUSED_BUT_SET_VARIABLE)
AM_CFLAGS += $(LIBCRYPTO_CFLAGS) $(ZLIB_CFLAGS)
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
pkgconfig_DATA = libzfs.pc
lib_LTLIBRARIES = libzfs.la
diff --git a/sys/contrib/openzfs/lib/libzfs_core/Makefile.am b/sys/contrib/openzfs/lib/libzfs_core/Makefile.am
index 67e554dc8706..33a889a09586 100644
--- a/sys/contrib/openzfs/lib/libzfs_core/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzfs_core/Makefile.am
@@ -2,6 +2,9 @@ include $(top_srcdir)/config/Rules.am
pkgconfig_DATA = libzfs_core.pc
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
lib_LTLIBRARIES = libzfs_core.la
include $(top_srcdir)/config/Abigail.am
diff --git a/sys/contrib/openzfs/lib/libzfsbootenv/Makefile.am b/sys/contrib/openzfs/lib/libzfsbootenv/Makefile.am
index 984df0b8a353..8a6bb76acfe7 100644
--- a/sys/contrib/openzfs/lib/libzfsbootenv/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzfsbootenv/Makefile.am
@@ -2,6 +2,9 @@ include $(top_srcdir)/config/Rules.am
pkgconfig_DATA = libzfsbootenv.pc
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
lib_LTLIBRARIES = libzfsbootenv.la
include $(top_srcdir)/config/Abigail.am
diff --git a/sys/contrib/openzfs/lib/libzpool/Makefile.am b/sys/contrib/openzfs/lib/libzpool/Makefile.am
index c9a55591e5ca..4ce3b4cd2f1d 100644
--- a/sys/contrib/openzfs/lib/libzpool/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzpool/Makefile.am
@@ -17,9 +17,6 @@ endif
# Unconditionally enable debugging for libzpool
AM_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
-# Suppress unused but set variable warnings often due to ASSERTs
-AM_CFLAGS += $(NO_UNUSED_BUT_SET_VARIABLE)
-
# Includes kernel code generate warnings for large stack frames
AM_CFLAGS += $(FRAME_LARGER_THAN)
@@ -27,6 +24,9 @@ AM_CFLAGS += $(ZLIB_CFLAGS)
AM_CFLAGS += -DLIB_ZPOOL_BUILD
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
lib_LTLIBRARIES = libzpool.la
USER_C = \
diff --git a/sys/contrib/openzfs/lib/libzstd/Makefile.am b/sys/contrib/openzfs/lib/libzstd/Makefile.am
index c9ed7e2aafbc..e3bc5c446ee9 100644
--- a/sys/contrib/openzfs/lib/libzstd/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzstd/Makefile.am
@@ -5,6 +5,8 @@ VPATH = $(top_srcdir)/module/zstd
# -fno-tree-vectorize is set for gcc in zstd/common/compiler.h
# Set it for other compilers, too.
AM_CFLAGS += -fno-tree-vectorize
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
noinst_LTLIBRARIES = libzstd.la
diff --git a/sys/contrib/openzfs/lib/libzutil/Makefile.am b/sys/contrib/openzfs/lib/libzutil/Makefile.am
index 2f0357e9f900..f55b7798f1c0 100644
--- a/sys/contrib/openzfs/lib/libzutil/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzutil/Makefile.am
@@ -1,9 +1,10 @@
include $(top_srcdir)/config/Rules.am
-# Suppress unused but set variable warnings often due to ASSERTs
-AM_CFLAGS += $(NO_UNUSED_BUT_SET_VARIABLE)
AM_CFLAGS += $(LIBBLKID_CFLAGS) $(LIBUDEV_CFLAGS)
+# See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
+AM_CFLAGS += -no-suppress
+
DEFAULT_INCLUDES += -I$(srcdir)
noinst_LTLIBRARIES = libzutil.la
diff --git a/sys/contrib/openzfs/lib/libzutil/zutil_import.c b/sys/contrib/openzfs/lib/libzutil/zutil_import.c
index f6f125e7a5df..1658215199f2 100644
--- a/sys/contrib/openzfs/lib/libzutil/zutil_import.c
+++ b/sys/contrib/openzfs/lib/libzutil/zutil_import.c
@@ -1660,6 +1660,8 @@ zpool_find_import_cached(libpc_handle_t *hdl, importargs_t *iarg)
* caller.
*/
nvpair_t *pair = nvlist_next_nvpair(nv, NULL);
+ if (pair == NULL)
+ continue;
fnvlist_add_nvlist(pools, nvpair_name(pair),
fnvpair_value_nvlist(pair));
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index 3eeed8f439fa..fcb97d716d54 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -109,6 +109,11 @@ A value of
.Sy 100
disables this feature.
.
+.It Sy l2arc_exclude_special Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Controls whether buffers present on special vdevs are eligibile for caching
+into L2ARC.
+If set to 1, exclude dbufs on special vdevs from being cached to L2ARC.
+.
.It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq int
Controls whether only MFU metadata and data are cached from ARC into L2ARC.
This may be desired to avoid wasting space on L2ARC when reading/writing large
@@ -213,12 +218,12 @@ For L2ARC devices less than 1GB, the amount of data
evicts is significant compared to the amount of restored L2ARC data.
In this case, do not write log blocks in L2ARC in order not to waste space.
.
-.It Sy metaslab_aliquot Ns = Ns Sy 524288 Ns B Po 512kB Pc Pq ulong
+.It Sy metaslab_aliquot Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong
Metaslab granularity, in bytes.
This is roughly similar to what would be referred to as the "stripe size"
in traditional RAID arrays.
-In normal operation, ZFS will try to write this amount of data
-to a top-level vdev before moving on to the next one.
+In normal operation, ZFS will try to write this amount of data to each disk
+before moving on to the next top-level vdev.
.
.It Sy metaslab_bias_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Enable metaslab group biasing based on their vdevs' over- or under-utilization
@@ -342,9 +347,12 @@ When a vdev is added, target this number of metaslabs per top-level vdev.
.It Sy zfs_vdev_default_ms_shift Ns = Ns Sy 29 Po 512MB Pc Pq int
Default limit for metaslab size.
.
-.It Sy zfs_vdev_max_auto_ashift Ns = Ns Sy ASHIFT_MAX Po 16 Pc Pq ulong
+.It Sy zfs_vdev_max_auto_ashift Ns = Ns Sy 14 Pq ulong
Maximum ashift used when optimizing for logical -> physical sector size on new
top-level vdevs.
+May be increased up to
+.Sy ASHIFT_MAX Po 16 Pc ,
+but this may negatively impact pool space efficiency.
.
.It Sy zfs_vdev_min_auto_ashift Ns = Ns Sy ASHIFT_MIN Po 9 Pc Pq ulong
Minimum ashift used when creating new top-level vdevs.
@@ -475,7 +483,15 @@ However, this is limited by
.It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1MB Pc Pq ulong
If prefetching is enabled, disable prefetching for reads larger than this size.
.
-.It Sy zfetch_max_distance Ns = Ns Sy 8388608 Ns B Po 8MB Pc Pq uint
+.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
+Min bytes to prefetch per stream.
+Prefetch distance starts from the demand access size and quickly grows to
+this value, doubling on each hit.
+After that it may grow further by 1/8 per hit, but only if some prefetch
+since last time haven't completed in time to satisfy demand request, i.e.
+prefetch depth didn't cover the read latency or the pool got saturated.
+.
+.It Sy zfetch_max_distance Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq uint
Max bytes to prefetch per stream.
.
.It Sy zfetch_max_idistance Ns = Ns Sy 67108864 Ns B Po 64MB Pc Pq uint
@@ -484,8 +500,11 @@ Max bytes to prefetch indirects for per stream.
.It Sy zfetch_max_streams Ns = Ns Sy 8 Pq uint
Max number of streams per zfetch (prefetch streams per file).
.
-.It Sy zfetch_min_sec_reap Ns = Ns Sy 2 Pq uint
-Min time before an active prefetch stream can be reclaimed
+.It Sy zfetch_min_sec_reap Ns = Ns Sy 1 Pq uint
+Min time before inactive prefetch stream can be reclaimed
+.
+.It Sy zfetch_max_sec_reap Ns = Ns Sy 2 Pq uint
+Max time before inactive prefetch stream can be deleted
.
.It Sy zfs_abd_scatter_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Enables ARC from using scatter/gather lists and forces all allocations to be
@@ -966,13 +985,13 @@ log spacemap in memory, in bytes.
Part of overall system memory that ZFS allows to be used
for unflushed metadata changes by the log spacemap, in millionths.
.
-.It Sy zfs_unflushed_log_block_max Ns = Ns Sy 262144 Po 256k Pc Pq ulong
+.It Sy zfs_unflushed_log_block_max Ns = Ns Sy 131072 Po 128k Pc Pq ulong
Describes the maximum number of log spacemap blocks allowed for each pool.
The default value means that the space in all the log spacemaps
can add up to no more than
-.Sy 262144
+.Sy 131072
blocks (which means
-.Em 32GB
+.Em 16GB
of logical space before compression and ditto blocks,
assuming that blocksize is
.Em 128kB ) .
@@ -1002,7 +1021,12 @@ Thus we always allow at least this many log blocks.
.It Sy zfs_unflushed_log_block_pct Ns = Ns Sy 400 Ns % Pq ulong
Tunable used to determine the number of blocks that can be used for
the spacemap log, expressed as a percentage of the total number of
-metaslabs in the pool.
+unflushed metaslabs in the pool.
+.
+.It Sy zfs_unflushed_log_txg_max Ns = Ns Sy 1000 Pq ulong
+Tunable limiting maximum time in TXGs any metaslab may remain unflushed.
+It effectively limits maximum number of unflushed per-TXG spacemap logs
+that need to be read after unclean pool export.
.
.It Sy zfs_unlink_suspend_progress Ns = Ns Sy 0 Ns | Ns 1 Pq uint
When enabled, files will not be asynchronously removed from the list of pending
@@ -1075,6 +1099,18 @@ Start syncing out a transaction group if there's at least this much dirty data
This should be less than
.Sy zfs_vdev_async_write_active_min_dirty_percent .
.
+.It Sy zfs_wrlog_data_max Ns = Pq int
+The upper limit of write-transaction zil log data size in bytes.
+Write operations are throttled when approaching the limit until log data is
+cleared out after transaction group sync.
+Because of some overhead, it should be set at least 2 times the size of
+.Sy zfs_dirty_data_max
+.No to prevent harming normal write throughput.
+It also should be smaller than the size of the slog device if slog is present.
+.Pp
+Defaults to
+.Sy zfs_dirty_data_max*2
+.
.It Sy zfs_fallocate_reserve_percent Ns = Ns Sy 110 Ns % Pq uint
Since ZFS is a copy-on-write filesystem with snapshots, blocks cannot be
preallocated for a file in order to guarantee that later writes will not
@@ -1312,6 +1348,22 @@ _
.TE
.Sy \& * No Requires debug build.
.
+.It Sy zfs_btree_verify_intensity Ns = Ns Sy 0 Pq uint
+Enables btree verification.
+The following settings are culminative:
+.TS
+box;
+lbz r l l .
+ Value Description
+
+ 1 Verify height.
+ 2 Verify pointers from children to parent.
+ 3 Verify element counts.
+ 4 Verify element order. (expensive)
+* 5 Verify unused memory is poisoned. (expensive)
+.TE
+.Sy \& * No Requires debug build.
+.
.It Sy zfs_free_leak_on_eio Ns = Ns Sy 0 Ns | Ns 1 Pq int
If destroy encounters an
.Sy EIO
diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8
index a8a944219071..6a7ea2cbf1d1 100644
--- a/sys/contrib/openzfs/man/man8/zdb.8
+++ b/sys/contrib/openzfs/man/man8/zdb.8
@@ -23,7 +23,7 @@
.Nd display ZFS storage pool debugging and consistency information
.Sh SYNOPSIS
.Nm
-.Op Fl AbcdDFGhikLMPsvXYy
+.Op Fl AbcdDFGhikLMNPsvXYy
.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
.Op Fl I Ar inflight I/Os
.Oo Fl o Ar var Ns = Ns Ar value Oc Ns …
@@ -137,6 +137,14 @@ also display the configuration that would be used were the pool to be imported.
Display information about datasets.
Specified once, displays basic dataset information: ID, create transaction,
size, and object count.
+See
+.Fl N
+for determining if
+.Op Ar poolname Ns Op / Ns Ar dataset | objset ID
+is to use the specified
+.Op Ar dataset | objset ID
+as a string (dataset name) or a number (objset ID) when
+datasets have numeric names.
.Pp
If specified multiple times provides greater and greater verbosity.
.Pp
@@ -272,6 +280,14 @@ Also display information about the maximum contiguous free space and the
percentage of free space in each space map.
.It Fl MMM
Display every spacemap record.
+.It Fl N
+Same as
+.Fl d
+but force zdb to interpret the
+.Op Ar dataset | objset ID
+in
+.Op Ar poolname Ns Op / Ns Ar dataset | objset ID
+as a numeric objset ID.
.It Fl O Ar dataset path
Look up the specified
.Ar path
diff --git a/sys/contrib/openzfs/module/.gitignore b/sys/contrib/openzfs/module/.gitignore
index 7a4bd3673e77..0ec6052f1bb0 100644
--- a/sys/contrib/openzfs/module/.gitignore
+++ b/sys/contrib/openzfs/module/.gitignore
@@ -22,5 +22,6 @@
/export_syms
/machine
/x86
+/i386
!Makefile.in
diff --git a/sys/contrib/openzfs/module/avl/avl.c b/sys/contrib/openzfs/module/avl/avl.c
index 1a95092bc2b6..f761a8ae7666 100644
--- a/sys/contrib/openzfs/module/avl/avl.c
+++ b/sys/contrib/openzfs/module/avl/avl.c
@@ -109,21 +109,6 @@
#include <sys/mod.h>
/*
- * Small arrays to translate between balance (or diff) values and child indices.
- *
- * Code that deals with binary tree data structures will randomly use
- * left and right children when examining a tree. C "if()" statements
- * which evaluate randomly suffer from very poor hardware branch prediction.
- * In this code we avoid some of the branch mispredictions by using the
- * following translation arrays. They replace random branches with an
- * additional memory reference. Since the translation arrays are both very
- * small the data should remain efficiently in cache.
- */
-static const int avl_child2balance[2] = {-1, 1};
-static const int avl_balance2child[] = {0, 0, 1};
-
-
-/*
* Walk from one node to the previous valued node (ie. an infix walk
* towards the left). At any given node we do one of 2 things:
*
@@ -278,8 +263,7 @@ avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
#endif
return (AVL_NODE2DATA(node, off));
}
- child = avl_balance2child[1 + diff];
-
+ child = (diff > 0);
}
if (where != NULL)
@@ -531,7 +515,7 @@ avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
* Compute the new balance
*/
old_balance = AVL_XBALANCE(node);
- new_balance = old_balance + avl_child2balance[which_child];
+ new_balance = old_balance + (which_child ? 1 : -1);
/*
* If we introduced equal balance, then we are done immediately
@@ -697,7 +681,7 @@ avl_remove(avl_tree_t *tree, void *data)
* choose node to swap from whichever side is taller
*/
old_balance = AVL_XBALANCE(delete);
- left = avl_balance2child[old_balance + 1];
+ left = (old_balance > 0);
right = 1 - left;
/*
@@ -781,7 +765,7 @@ avl_remove(avl_tree_t *tree, void *data)
*/
node = parent;
old_balance = AVL_XBALANCE(node);
- new_balance = old_balance - avl_child2balance[which_child];
+ new_balance = old_balance - (which_child ? 1 : -1);
parent = AVL_XPARENT(node);
which_child = AVL_XCHILD(node);
diff --git a/sys/contrib/openzfs/module/icp/Makefile.in b/sys/contrib/openzfs/module/icp/Makefile.in
index 858c5a610c26..54b5df1a1987 100644
--- a/sys/contrib/openzfs/module/icp/Makefile.in
+++ b/sys/contrib/openzfs/module/icp/Makefile.in
@@ -27,7 +27,6 @@ $(MODULE)-objs += core/kcf_prov_lib.o
$(MODULE)-objs += spi/kcf_spi.o
$(MODULE)-objs += io/aes.o
$(MODULE)-objs += io/edonr_mod.o
-$(MODULE)-objs += io/sha1_mod.o
$(MODULE)-objs += io/sha2_mod.o
$(MODULE)-objs += io/skein_mod.o
$(MODULE)-objs += os/modhash.o
@@ -43,7 +42,6 @@ $(MODULE)-objs += algs/aes/aes_impl_generic.o
$(MODULE)-objs += algs/aes/aes_impl.o
$(MODULE)-objs += algs/aes/aes_modes.o
$(MODULE)-objs += algs/edonr/edonr.o
-$(MODULE)-objs += algs/sha1/sha1.o
$(MODULE)-objs += algs/sha2/sha2.o
$(MODULE)-objs += algs/skein/skein.o
$(MODULE)-objs += algs/skein/skein_block.o
@@ -55,7 +53,6 @@ $(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_aesni.o
$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/gcm_pclmulqdq.o
$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/aesni-gcm-x86_64.o
$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/ghash-x86_64.o
-$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha1/sha1-x86_64.o
$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha256_impl.o
$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha512_impl.o
@@ -72,7 +69,6 @@ OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
# Suppress objtool "unsupported stack pointer realignment" warnings. We are
# not using a DRAP register while aligning the stack to a 64 byte boundary.
# See #6950 for the reasoning.
-OBJECT_FILES_NON_STANDARD_sha1-x86_64.o := y
OBJECT_FILES_NON_STANDARD_sha256_impl.o := y
OBJECT_FILES_NON_STANDARD_sha512_impl.o := y
@@ -86,13 +82,11 @@ ICP_DIRS = \
algs/aes \
algs/edonr \
algs/modes \
- algs/sha1 \
algs/sha2 \
algs/skein \
asm-x86_64 \
asm-x86_64/aes \
asm-x86_64/modes \
- asm-x86_64/sha1 \
asm-x86_64/sha2 \
asm-i386 \
asm-generic
diff --git a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
index ee96e692ef00..7a3ba30c0582 100644
--- a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
+++ b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c
@@ -488,7 +488,7 @@ EdonRInit(EdonRState *state, size_t hashbitlen)
state->hashbitlen = 512;
state->bits_processed = 0;
state->unprocessed_bits = 0;
- bcopy(i512p2, hashState224(state)->DoublePipe,
+ bcopy(i512p2, hashState512(state)->DoublePipe,
16 * sizeof (uint64_t));
break;
}
diff --git a/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c b/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c
deleted file mode 100644
index da34222c8fc3..000000000000
--- a/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c
+++ /dev/null
@@ -1,835 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * The basic framework for this code came from the reference
- * implementation for MD5. That implementation is Copyright (C)
- * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD5 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- *
- * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
- * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
- * Not as fast as one would like -- further optimizations are encouraged
- * and appreciated.
- */
-
-#include <sys/zfs_context.h>
-#include <sha1/sha1.h>
-#include <sha1/sha1_consts.h>
-
-#ifdef _LITTLE_ENDIAN
-#include <sys/byteorder.h>
-#define HAVE_HTONL
-#endif
-
-#define _RESTRICT_KYWD
-
-static void Encode(uint8_t *, const uint32_t *, size_t);
-
-#if defined(__sparc)
-
-#define SHA1_TRANSFORM(ctx, in) \
- SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
- (ctx)->state[3], (ctx)->state[4], (ctx), (in))
-
-static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
- SHA1_CTX *, const uint8_t *);
-
-#elif defined(__amd64)
-
-#define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
-#define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
- (in), (num))
-
-void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
-
-#else
-
-#define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
-
-static void SHA1Transform(SHA1_CTX *, const uint8_t *);
-
-#endif
-
-
-static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
-
-/*
- * F, G, and H are the basic SHA1 functions.
- */
-#define F(b, c, d) (((b) & (c)) | ((~b) & (d)))
-#define G(b, c, d) ((b) ^ (c) ^ (d))
-#define H(b, c, d) (((b) & (c)) | (((b)|(c)) & (d)))
-
-/*
- * SHA1Init()
- *
- * purpose: initializes the sha1 context and begins and sha1 digest operation
- * input: SHA1_CTX * : the context to initializes.
- * output: void
- */
-
-void
-SHA1Init(SHA1_CTX *ctx)
-{
- ctx->count[0] = ctx->count[1] = 0;
-
- /*
- * load magic initialization constants. Tell lint
- * that these constants are unsigned by using U.
- */
-
- ctx->state[0] = 0x67452301U;
- ctx->state[1] = 0xefcdab89U;
- ctx->state[2] = 0x98badcfeU;
- ctx->state[3] = 0x10325476U;
- ctx->state[4] = 0xc3d2e1f0U;
-}
-
-void
-SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
-{
- uint32_t i, buf_index, buf_len;
- const uint8_t *input = inptr;
-#if defined(__amd64)
- uint32_t block_count;
-#endif /* __amd64 */
-
- /* check for noop */
- if (input_len == 0)
- return;
-
- /* compute number of bytes mod 64 */
- buf_index = (ctx->count[1] >> 3) & 0x3F;
-
- /* update number of bits */
- if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
- ctx->count[0]++;
-
- ctx->count[0] += (input_len >> 29);
-
- buf_len = 64 - buf_index;
-
- /* transform as many times as possible */
- i = 0;
- if (input_len >= buf_len) {
-
- /*
- * general optimization:
- *
- * only do initial bcopy() and SHA1Transform() if
- * buf_index != 0. if buf_index == 0, we're just
- * wasting our time doing the bcopy() since there
- * wasn't any data left over from a previous call to
- * SHA1Update().
- */
-
- if (buf_index) {
- bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
- SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
- i = buf_len;
- }
-
-#if !defined(__amd64)
- for (; i + 63 < input_len; i += 64)
- SHA1_TRANSFORM(ctx, &input[i]);
-#else
- block_count = (input_len - i) >> 6;
- if (block_count > 0) {
- SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
- i += block_count << 6;
- }
-#endif /* !__amd64 */
-
- /*
- * general optimization:
- *
- * if i and input_len are the same, return now instead
- * of calling bcopy(), since the bcopy() in this case
- * will be an expensive nop.
- */
-
- if (input_len == i)
- return;
-
- buf_index = 0;
- }
-
- /* buffer remaining input */
- bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
-}
-
-/*
- * SHA1Final()
- *
- * purpose: ends an sha1 digest operation, finalizing the message digest and
- * zeroing the context.
- * input: uchar_t * : A buffer to store the digest.
- * : The function actually uses void* because many
- * : callers pass things other than uchar_t here.
- * SHA1_CTX * : the context to finalize, save, and zero
- * output: void
- */
-
-void
-SHA1Final(void *digest, SHA1_CTX *ctx)
-{
- uint8_t bitcount_be[sizeof (ctx->count)];
- uint32_t index = (ctx->count[1] >> 3) & 0x3f;
-
- /* store bit count, big endian */
- Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
-
- /* pad out to 56 mod 64 */
- SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
-
- /* append length (before padding) */
- SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
-
- /* store state in digest */
- Encode(digest, ctx->state, sizeof (ctx->state));
-
- /* zeroize sensitive information */
- bzero(ctx, sizeof (*ctx));
-}
-
-
-#if !defined(__amd64)
-
-typedef uint32_t sha1word;
-
-/*
- * sparc optimization:
- *
- * on the sparc, we can load big endian 32-bit data easily. note that
- * special care must be taken to ensure the address is 32-bit aligned.
- * in the interest of speed, we don't check to make sure, since
- * careful programming can guarantee this for us.
- */
-
-#if defined(_ZFS_BIG_ENDIAN)
-#define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
-
-#elif defined(HAVE_HTONL)
-#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
-
-#else
-#define LOAD_BIG_32(addr) BE_32(*((uint32_t *)(addr)))
-#endif /* _BIG_ENDIAN */
-
-/*
- * SHA1Transform()
- */
-#if defined(W_ARRAY)
-#define W(n) w[n]
-#else /* !defined(W_ARRAY) */
-#define W(n) w_ ## n
-#endif /* !defined(W_ARRAY) */
-
-/*
- * ROTATE_LEFT rotates x left n bits.
- */
-
-#if defined(__GNUC__) && defined(_LP64)
-static __inline__ uint64_t
-ROTATE_LEFT(uint64_t value, uint32_t n)
-{
- uint32_t t32;
-
- t32 = (uint32_t)value;
- return ((t32 << n) | (t32 >> (32 - n)));
-}
-
-#else
-
-#define ROTATE_LEFT(x, n) \
- (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
-
-#endif
-
-#if defined(__sparc)
-
-
-/*
- * sparc register window optimization:
- *
- * `a', `b', `c', `d', and `e' are passed into SHA1Transform
- * explicitly since it increases the number of registers available to
- * the compiler. under this scheme, these variables can be held in
- * %i0 - %i4, which leaves more local and out registers available.
- *
- * purpose: sha1 transformation -- updates the digest based on `block'
- * input: uint32_t : bytes 1 - 4 of the digest
- * uint32_t : bytes 5 - 8 of the digest
- * uint32_t : bytes 9 - 12 of the digest
- * uint32_t : bytes 12 - 16 of the digest
- * uint32_t : bytes 16 - 20 of the digest
- * SHA1_CTX * : the context to update
- * uint8_t [64]: the block to use to update the digest
- * output: void
- */
-
-
-void
-SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e,
- SHA1_CTX *ctx, const uint8_t blk[64])
-{
- /*
- * sparc optimization:
- *
- * while it is somewhat counter-intuitive, on sparc, it is
- * more efficient to place all the constants used in this
- * function in an array and load the values out of the array
- * than to manually load the constants. this is because
- * setting a register to a 32-bit value takes two ops in most
- * cases: a `sethi' and an `or', but loading a 32-bit value
- * from memory only takes one `ld' (or `lduw' on v9). while
- * this increases memory usage, the compiler can find enough
- * other things to do while waiting to keep the pipeline does
- * not stall. additionally, it is likely that many of these
- * constants are cached so that later accesses do not even go
- * out to the bus.
- *
- * this array is declared `static' to keep the compiler from
- * having to bcopy() this array onto the stack frame of
- * SHA1Transform() each time it is called -- which is
- * unacceptably expensive.
- *
- * the `const' is to ensure that callers are good citizens and
- * do not try to munge the array. since these routines are
- * going to be called from inside multithreaded kernelland,
- * this is a good safety check. -- `sha1_consts' will end up in
- * .rodata.
- *
- * unfortunately, loading from an array in this manner hurts
- * performance under Intel. So, there is a macro,
- * SHA1_CONST(), used in SHA1Transform(), that either expands to
- * a reference to this array, or to the actual constant,
- * depending on what platform this code is compiled for.
- */
-
-
- static const uint32_t sha1_consts[] = {
- SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
- };
-
-
- /*
- * general optimization:
- *
- * use individual integers instead of using an array. this is a
- * win, although the amount it wins by seems to vary quite a bit.
- */
-
-
- uint32_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
- uint32_t w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
-
-
- /*
- * sparc optimization:
- *
- * if `block' is already aligned on a 4-byte boundary, use
- * LOAD_BIG_32() directly. otherwise, bcopy() into a
- * buffer that *is* aligned on a 4-byte boundary and then do
- * the LOAD_BIG_32() on that buffer. benchmarks have shown
- * that using the bcopy() is better than loading the bytes
- * individually and doing the endian-swap by hand.
- *
- * even though it's quite tempting to assign to do:
- *
- * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
- *
- * and only have one set of LOAD_BIG_32()'s, the compiler
- * *does not* like that, so please resist the urge.
- */
-
-
- if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */
- bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
- w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15);
- w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14);
- w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13);
- w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12);
- w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11);
- w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10);
- w_9 = LOAD_BIG_32(ctx->buf_un.buf32 + 9);
- w_8 = LOAD_BIG_32(ctx->buf_un.buf32 + 8);
- w_7 = LOAD_BIG_32(ctx->buf_un.buf32 + 7);
- w_6 = LOAD_BIG_32(ctx->buf_un.buf32 + 6);
- w_5 = LOAD_BIG_32(ctx->buf_un.buf32 + 5);
- w_4 = LOAD_BIG_32(ctx->buf_un.buf32 + 4);
- w_3 = LOAD_BIG_32(ctx->buf_un.buf32 + 3);
- w_2 = LOAD_BIG_32(ctx->buf_un.buf32 + 2);
- w_1 = LOAD_BIG_32(ctx->buf_un.buf32 + 1);
- w_0 = LOAD_BIG_32(ctx->buf_un.buf32 + 0);
- } else {
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_15 = LOAD_BIG_32(blk + 60);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_14 = LOAD_BIG_32(blk + 56);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_13 = LOAD_BIG_32(blk + 52);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_12 = LOAD_BIG_32(blk + 48);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_11 = LOAD_BIG_32(blk + 44);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_10 = LOAD_BIG_32(blk + 40);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_9 = LOAD_BIG_32(blk + 36);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_8 = LOAD_BIG_32(blk + 32);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_7 = LOAD_BIG_32(blk + 28);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_6 = LOAD_BIG_32(blk + 24);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_5 = LOAD_BIG_32(blk + 20);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_4 = LOAD_BIG_32(blk + 16);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_3 = LOAD_BIG_32(blk + 12);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_2 = LOAD_BIG_32(blk + 8);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_1 = LOAD_BIG_32(blk + 4);
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- w_0 = LOAD_BIG_32(blk + 0);
- }
-#else /* !defined(__sparc) */
-
-void /* CSTYLED */
-SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
-{
- /* CSTYLED */
- sha1word a = ctx->state[0];
- sha1word b = ctx->state[1];
- sha1word c = ctx->state[2];
- sha1word d = ctx->state[3];
- sha1word e = ctx->state[4];
-
-#if defined(W_ARRAY)
- sha1word w[16];
-#else /* !defined(W_ARRAY) */
- sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
- sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
-#endif /* !defined(W_ARRAY) */
-
- W(0) = LOAD_BIG_32((void *)(blk + 0));
- W(1) = LOAD_BIG_32((void *)(blk + 4));
- W(2) = LOAD_BIG_32((void *)(blk + 8));
- W(3) = LOAD_BIG_32((void *)(blk + 12));
- W(4) = LOAD_BIG_32((void *)(blk + 16));
- W(5) = LOAD_BIG_32((void *)(blk + 20));
- W(6) = LOAD_BIG_32((void *)(blk + 24));
- W(7) = LOAD_BIG_32((void *)(blk + 28));
- W(8) = LOAD_BIG_32((void *)(blk + 32));
- W(9) = LOAD_BIG_32((void *)(blk + 36));
- W(10) = LOAD_BIG_32((void *)(blk + 40));
- W(11) = LOAD_BIG_32((void *)(blk + 44));
- W(12) = LOAD_BIG_32((void *)(blk + 48));
- W(13) = LOAD_BIG_32((void *)(blk + 52));
- W(14) = LOAD_BIG_32((void *)(blk + 56));
- W(15) = LOAD_BIG_32((void *)(blk + 60));
-
-#endif /* !defined(__sparc) */
-
- /*
- * general optimization:
- *
- * even though this approach is described in the standard as
- * being slower algorithmically, it is 30-40% faster than the
- * "faster" version under SPARC, because this version has more
- * of the constraints specified at compile-time and uses fewer
- * variables (and therefore has better register utilization)
- * than its "speedier" brother. (i've tried both, trust me)
- *
- * for either method given in the spec, there is an "assignment"
- * phase where the following takes place:
- *
- * tmp = (main_computation);
- * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
- *
- * we can make the algorithm go faster by not doing this work,
- * but just pretending that `d' is now `e', etc. this works
- * really well and obviates the need for a temporary variable.
- * however, we still explicitly perform the rotate action,
- * since it is cheaper on SPARC to do it once than to have to
- * do it over and over again.
- */
-
- /* round 1 */
- e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
- b = ROTATE_LEFT(b, 30);
-
- d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
- a = ROTATE_LEFT(a, 30);
-
- c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
- e = ROTATE_LEFT(e, 30);
-
- b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
- d = ROTATE_LEFT(d, 30);
-
- a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
- c = ROTATE_LEFT(c, 30);
-
- e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */
- b = ROTATE_LEFT(b, 30);
-
- d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */
- a = ROTATE_LEFT(a, 30);
-
- c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */
- e = ROTATE_LEFT(e, 30);
-
- b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */
- d = ROTATE_LEFT(d, 30);
-
- a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */
- c = ROTATE_LEFT(c, 30);
-
- e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */
- b = ROTATE_LEFT(b, 30);
-
- d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */
- a = ROTATE_LEFT(a, 30);
-
- c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */
- e = ROTATE_LEFT(e, 30);
-
- b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */
- d = ROTATE_LEFT(d, 30);
-
- a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */
- c = ROTATE_LEFT(c, 30);
-
- e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */
- b = ROTATE_LEFT(b, 30);
-
- W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 16 */
- d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0);
- a = ROTATE_LEFT(a, 30);
-
- W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 17 */
- c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0);
- e = ROTATE_LEFT(e, 30);
-
- W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 18 */
- b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0);
- d = ROTATE_LEFT(d, 30);
-
- W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 19 */
- a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0);
- c = ROTATE_LEFT(c, 30);
-
- /* round 2 */
- W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 20 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1);
- b = ROTATE_LEFT(b, 30);
-
- W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 21 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1);
- a = ROTATE_LEFT(a, 30);
-
- W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 22 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1);
- e = ROTATE_LEFT(e, 30);
-
- W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 23 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1);
- d = ROTATE_LEFT(d, 30);
-
- W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 24 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1);
- c = ROTATE_LEFT(c, 30);
-
- W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 25 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1);
- b = ROTATE_LEFT(b, 30);
-
- W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 26 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1);
- a = ROTATE_LEFT(a, 30);
-
- W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 27 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1);
- e = ROTATE_LEFT(e, 30);
-
- W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 28 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1);
- d = ROTATE_LEFT(d, 30);
-
- W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1);
- c = ROTATE_LEFT(c, 30);
-
- W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 30 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1);
- b = ROTATE_LEFT(b, 30);
-
- W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 31 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1);
- a = ROTATE_LEFT(a, 30);
-
- W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 32 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1);
- e = ROTATE_LEFT(e, 30);
-
- W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 33 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1);
- d = ROTATE_LEFT(d, 30);
-
- W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 34 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1);
- c = ROTATE_LEFT(c, 30);
-
- W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 35 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1);
- b = ROTATE_LEFT(b, 30);
-
- W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 36 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1);
- a = ROTATE_LEFT(a, 30);
-
- W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 37 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1);
- e = ROTATE_LEFT(e, 30);
-
- W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 38 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1);
- d = ROTATE_LEFT(d, 30);
-
- W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 39 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1);
- c = ROTATE_LEFT(c, 30);
-
- /* round 3 */
- W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 40 */
- e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2);
- b = ROTATE_LEFT(b, 30);
-
- W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 41 */
- d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2);
- a = ROTATE_LEFT(a, 30);
-
- W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 42 */
- c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2);
- e = ROTATE_LEFT(e, 30);
-
- W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 43 */
- b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2);
- d = ROTATE_LEFT(d, 30);
-
- W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 44 */
- a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2);
- c = ROTATE_LEFT(c, 30);
-
- W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */
- e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2);
- b = ROTATE_LEFT(b, 30);
-
- W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 46 */
- d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2);
- a = ROTATE_LEFT(a, 30);
-
- W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 47 */
- c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2);
- e = ROTATE_LEFT(e, 30);
-
- W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 48 */
- b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2);
- d = ROTATE_LEFT(d, 30);
-
- W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 49 */
- a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2);
- c = ROTATE_LEFT(c, 30);
-
- W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 50 */
- e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2);
- b = ROTATE_LEFT(b, 30);
-
- W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 51 */
- d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2);
- a = ROTATE_LEFT(a, 30);
-
- W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 52 */
- c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2);
- e = ROTATE_LEFT(e, 30);
-
- W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 53 */
- b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2);
- d = ROTATE_LEFT(d, 30);
-
- W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 54 */
- a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2);
- c = ROTATE_LEFT(c, 30);
-
- W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 55 */
- e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2);
- b = ROTATE_LEFT(b, 30);
-
- W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 56 */
- d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2);
- a = ROTATE_LEFT(a, 30);
-
- W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 57 */
- c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2);
- e = ROTATE_LEFT(e, 30);
-
- W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 58 */
- b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2);
- d = ROTATE_LEFT(d, 30);
-
- W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 59 */
- a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2);
- c = ROTATE_LEFT(c, 30);
-
- /* round 4 */
- W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 60 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3);
- b = ROTATE_LEFT(b, 30);
-
- W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3);
- a = ROTATE_LEFT(a, 30);
-
- W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 62 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3);
- e = ROTATE_LEFT(e, 30);
-
- W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 63 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3);
- d = ROTATE_LEFT(d, 30);
-
- W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 64 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3);
- c = ROTATE_LEFT(c, 30);
-
- W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 65 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3);
- b = ROTATE_LEFT(b, 30);
-
- W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 66 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3);
- a = ROTATE_LEFT(a, 30);
-
- W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 67 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3);
- e = ROTATE_LEFT(e, 30);
-
- W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 68 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3);
- d = ROTATE_LEFT(d, 30);
-
- W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 69 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3);
- c = ROTATE_LEFT(c, 30);
-
- W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 70 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3);
- b = ROTATE_LEFT(b, 30);
-
- W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 71 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3);
- a = ROTATE_LEFT(a, 30);
-
- W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 72 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3);
- e = ROTATE_LEFT(e, 30);
-
- W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 73 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3);
- d = ROTATE_LEFT(d, 30);
-
- W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 74 */
- a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3);
- c = ROTATE_LEFT(c, 30);
-
- W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 75 */
- e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3);
- b = ROTATE_LEFT(b, 30);
-
- W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 76 */
- d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3);
- a = ROTATE_LEFT(a, 30);
-
- W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */
- c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
- e = ROTATE_LEFT(e, 30);
-
- W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 78 */
- b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
- d = ROTATE_LEFT(d, 30);
-
- W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 79 */
-
- ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
- SHA1_CONST(3);
- ctx->state[1] += b;
- ctx->state[2] += ROTATE_LEFT(c, 30);
- ctx->state[3] += d;
- ctx->state[4] += e;
-
- /* zeroize sensitive information */
- W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
- W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
-}
-#endif /* !__amd64 */
-
-
-/*
- * Encode()
- *
- * purpose: to convert a list of numbers from little endian to big endian
- * input: uint8_t * : place to store the converted big endian numbers
- * uint32_t * : place to get numbers to convert from
- * size_t : the length of the input in bytes
- * output: void
- */
-
-static void
-Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
- size_t len)
-{
- size_t i, j;
-
-#if defined(__sparc)
- if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
- for (i = 0, j = 0; j < len; i++, j += 4) {
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- *((uint32_t *)(output + j)) = input[i];
- }
- } else {
-#endif /* little endian -- will work on big endian, but slowly */
-
- for (i = 0, j = 0; j < len; i++, j += 4) {
- output[j] = (input[i] >> 24) & 0xff;
- output[j + 1] = (input[i] >> 16) & 0xff;
- output[j + 2] = (input[i] >> 8) & 0xff;
- output[j + 3] = input[i] & 0xff;
- }
-#if defined(__sparc)
- }
-#endif
-}
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S
index 4a80c62097ae..b0d9f03af2c8 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S
@@ -208,7 +208,7 @@ _key_expansion_256a_local:
pxor %xmm1, %xmm0
movups %xmm0, (%rcx)
add $0x10, %rcx
- ret
+ RET
nop
SET_SIZE(_key_expansion_128)
SET_SIZE(_key_expansion_256a)
@@ -236,7 +236,7 @@ _key_expansion_192a_local:
shufps $0b01001110, %xmm2, %xmm1
movups %xmm1, 0x10(%rcx)
add $0x20, %rcx
- ret
+ RET
SET_SIZE(_key_expansion_192a)
@@ -257,7 +257,7 @@ _key_expansion_192b_local:
movups %xmm0, (%rcx)
add $0x10, %rcx
- ret
+ RET
SET_SIZE(_key_expansion_192b)
@@ -271,7 +271,7 @@ _key_expansion_256b_local:
pxor %xmm1, %xmm2
movups %xmm2, (%rcx)
add $0x10, %rcx
- ret
+ RET
SET_SIZE(_key_expansion_256b)
@@ -376,7 +376,7 @@ rijndael_key_setup_enc_intel_local:
mov $14, %rax // return # rounds = 14
#endif
FRAME_END
- ret
+ RET
.align 4
.Lenc_key192:
@@ -413,7 +413,7 @@ rijndael_key_setup_enc_intel_local:
mov $12, %rax // return # rounds = 12
#endif
FRAME_END
- ret
+ RET
.align 4
.Lenc_key128:
@@ -453,13 +453,13 @@ rijndael_key_setup_enc_intel_local:
mov $10, %rax // return # rounds = 10
#endif
FRAME_END
- ret
+ RET
.Lenc_key_invalid_param:
#ifdef OPENSSL_INTERFACE
mov $-1, %rax // user key or AES key pointer is NULL
FRAME_END
- ret
+ RET
#else
/* FALLTHROUGH */
#endif /* OPENSSL_INTERFACE */
@@ -471,7 +471,7 @@ rijndael_key_setup_enc_intel_local:
xor %rax, %rax // a key pointer is NULL or invalid keysize
#endif /* OPENSSL_INTERFACE */
FRAME_END
- ret
+ RET
SET_SIZE(rijndael_key_setup_enc_intel)
@@ -548,7 +548,7 @@ FRAME_BEGIN
// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
// OpenSSL: rax = 0 for OK, or non-zero for error
FRAME_END
- ret
+ RET
SET_SIZE(rijndael_key_setup_dec_intel)
@@ -655,7 +655,7 @@ ENTRY_NP(aes_encrypt_intel)
aesenclast %KEY, %STATE // last round
movups %STATE, (%OUTP) // output
- ret
+ RET
SET_SIZE(aes_encrypt_intel)
@@ -738,7 +738,7 @@ ENTRY_NP(aes_decrypt_intel)
aesdeclast %KEY, %STATE // last round
movups %STATE, (%OUTP) // output
- ret
+ RET
SET_SIZE(aes_decrypt_intel)
#endif /* lint || __lint */
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
index 9db3a3179230..931d2480609c 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S
@@ -785,7 +785,7 @@ ENTRY_NP(aes_encrypt_amd64)
mov 2*8(%rsp), %rbp
mov 3*8(%rsp), %r12
add $[4*8], %rsp
- ret
+ RET
SET_SIZE(aes_encrypt_amd64)
@@ -896,7 +896,7 @@ ENTRY_NP(aes_decrypt_amd64)
mov 2*8(%rsp), %rbp
mov 3*8(%rsp), %r12
add $[4*8], %rsp
- ret
+ RET
SET_SIZE(aes_decrypt_amd64)
#endif /* lint || __lint */
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
index dc71ae2c1c89..70e419c2e4ab 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
@@ -1201,7 +1201,7 @@ aesni_gcm_encrypt:
.align 32
clear_fpu_regs_avx:
vzeroall
- ret
+ RET
.size clear_fpu_regs_avx,.-clear_fpu_regs_avx
/*
@@ -1219,7 +1219,7 @@ gcm_xor_avx:
movdqu (%rsi), %xmm1
pxor %xmm1, %xmm0
movdqu %xmm0, (%rsi)
- ret
+ RET
.size gcm_xor_avx,.-gcm_xor_avx
/*
@@ -1236,7 +1236,7 @@ atomic_toggle_boolean_nv:
jz 1f
movl $1, %eax
1:
- ret
+ RET
.size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv
.align 64
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
index 59edc4c8d56c..df7f188ecdae 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
@@ -244,7 +244,7 @@ ENTRY_NP(gcm_mul_pclmulqdq)
//
// Return
//
- ret
+ RET
SET_SIZE(gcm_mul_pclmulqdq)
#endif /* lint || __lint */
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S
deleted file mode 100644
index cb923784a730..000000000000
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S
+++ /dev/null
@@ -1,1353 +0,0 @@
-/*
- * !/usr/bin/env perl
- *
- * ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project. The module is, however, dual licensed under OpenSSL and
- * CRYPTOGAMS licenses depending on where you obtain it. For further
- * details see http://www.openssl.org/~appro/cryptogams/.
- * ====================================================================
- *
- * sha1_block procedure for x86_64.
- *
- * It was brought to my attention that on EM64T compiler-generated code
- * was far behind 32-bit assembler implementation. This is unlike on
- * Opteron where compiler-generated code was only 15% behind 32-bit
- * assembler, which originally made it hard to motivate the effort.
- * There was suggestion to mechanically translate 32-bit code, but I
- * dismissed it, reasoning that x86_64 offers enough register bank
- * capacity to fully utilize SHA-1 parallelism. Therefore this fresh
- * implementation:-) However! While 64-bit code does performs better
- * on Opteron, I failed to beat 32-bit assembler on EM64T core. Well,
- * x86_64 does offer larger *addressable* bank, but out-of-order core
- * reaches for even more registers through dynamic aliasing, and EM64T
- * core must have managed to run-time optimize even 32-bit code just as
- * good as 64-bit one. Performance improvement is summarized in the
- * following table:
- *
- * gcc 3.4 32-bit asm cycles/byte
- * Opteron +45% +20% 6.8
- * Xeon P4 +65% +0% 9.9
- * Core2 +60% +10% 7.0
- *
- *
- * OpenSolaris OS modifications
- *
- * Sun elects to use this software under the BSD license.
- *
- * This source originates from OpenSSL file sha1-x86_64.pl at
- * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
- * (presumably for future OpenSSL release 0.9.8h), with these changes:
- *
- * 1. Added perl "use strict" and declared variables.
- *
- * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
- * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
- *
- * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
- * assemblers).
- *
- */
-
-/*
- * This file was generated by a perl script (sha1-x86_64.pl). The comments from
- * the original file have been pasted above.
- */
-
-#if defined(lint) || defined(__lint)
-#include <sys/stdint.h>
-#include <sys/sha1.h>
-
-
-/* ARGSUSED */
-void
-sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t blocks)
-{
-}
-
-#else
-#define _ASM
-#include <sys/asm_linkage.h>
-ENTRY_NP(sha1_block_data_order)
- push %rbx
- push %rbp
- push %r12
- mov %rsp,%rax
- mov %rdi,%r8 # reassigned argument
- sub $72,%rsp
- mov %rsi,%r9 # reassigned argument
- and $-64,%rsp
- mov %rdx,%r10 # reassigned argument
- mov %rax,64(%rsp)
-
- mov 0(%r8),%edx
- mov 4(%r8),%esi
- mov 8(%r8),%edi
- mov 12(%r8),%ebp
- mov 16(%r8),%r11d
-.align 4
-.Lloop:
- mov 0(%r9),%eax
- bswap %eax
- mov %eax,0(%rsp)
- lea 0x5a827999(%eax,%r11d),%r12d
- mov %edi,%ebx
- mov 4(%r9),%eax
- mov %edx,%r11d
- xor %ebp,%ebx
- bswap %eax
- rol $5,%r11d
- and %esi,%ebx
- mov %eax,4(%rsp)
- add %r11d,%r12d
- xor %ebp,%ebx
- rol $30,%esi
- add %ebx,%r12d
- lea 0x5a827999(%eax,%ebp),%r11d
- mov %esi,%ebx
- mov 8(%r9),%eax
- mov %r12d,%ebp
- xor %edi,%ebx
- bswap %eax
- rol $5,%ebp
- and %edx,%ebx
- mov %eax,8(%rsp)
- add %ebp,%r11d
- xor %edi,%ebx
- rol $30,%edx
- add %ebx,%r11d
- lea 0x5a827999(%eax,%edi),%ebp
- mov %edx,%ebx
- mov 12(%r9),%eax
- mov %r11d,%edi
- xor %esi,%ebx
- bswap %eax
- rol $5,%edi
- and %r12d,%ebx
- mov %eax,12(%rsp)
- add %edi,%ebp
- xor %esi,%ebx
- rol $30,%r12d
- add %ebx,%ebp
- lea 0x5a827999(%eax,%esi),%edi
- mov %r12d,%ebx
- mov 16(%r9),%eax
- mov %ebp,%esi
- xor %edx,%ebx
- bswap %eax
- rol $5,%esi
- and %r11d,%ebx
- mov %eax,16(%rsp)
- add %esi,%edi
- xor %edx,%ebx
- rol $30,%r11d
- add %ebx,%edi
- lea 0x5a827999(%eax,%edx),%esi
- mov %r11d,%ebx
- mov 20(%r9),%eax
- mov %edi,%edx
- xor %r12d,%ebx
- bswap %eax
- rol $5,%edx
- and %ebp,%ebx
- mov %eax,20(%rsp)
- add %edx,%esi
- xor %r12d,%ebx
- rol $30,%ebp
- add %ebx,%esi
- lea 0x5a827999(%eax,%r12d),%edx
- mov %ebp,%ebx
- mov 24(%r9),%eax
- mov %esi,%r12d
- xor %r11d,%ebx
- bswap %eax
- rol $5,%r12d
- and %edi,%ebx
- mov %eax,24(%rsp)
- add %r12d,%edx
- xor %r11d,%ebx
- rol $30,%edi
- add %ebx,%edx
- lea 0x5a827999(%eax,%r11d),%r12d
- mov %edi,%ebx
- mov 28(%r9),%eax
- mov %edx,%r11d
- xor %ebp,%ebx
- bswap %eax
- rol $5,%r11d
- and %esi,%ebx
- mov %eax,28(%rsp)
- add %r11d,%r12d
- xor %ebp,%ebx
- rol $30,%esi
- add %ebx,%r12d
- lea 0x5a827999(%eax,%ebp),%r11d
- mov %esi,%ebx
- mov 32(%r9),%eax
- mov %r12d,%ebp
- xor %edi,%ebx
- bswap %eax
- rol $5,%ebp
- and %edx,%ebx
- mov %eax,32(%rsp)
- add %ebp,%r11d
- xor %edi,%ebx
- rol $30,%edx
- add %ebx,%r11d
- lea 0x5a827999(%eax,%edi),%ebp
- mov %edx,%ebx
- mov 36(%r9),%eax
- mov %r11d,%edi
- xor %esi,%ebx
- bswap %eax
- rol $5,%edi
- and %r12d,%ebx
- mov %eax,36(%rsp)
- add %edi,%ebp
- xor %esi,%ebx
- rol $30,%r12d
- add %ebx,%ebp
- lea 0x5a827999(%eax,%esi),%edi
- mov %r12d,%ebx
- mov 40(%r9),%eax
- mov %ebp,%esi
- xor %edx,%ebx
- bswap %eax
- rol $5,%esi
- and %r11d,%ebx
- mov %eax,40(%rsp)
- add %esi,%edi
- xor %edx,%ebx
- rol $30,%r11d
- add %ebx,%edi
- lea 0x5a827999(%eax,%edx),%esi
- mov %r11d,%ebx
- mov 44(%r9),%eax
- mov %edi,%edx
- xor %r12d,%ebx
- bswap %eax
- rol $5,%edx
- and %ebp,%ebx
- mov %eax,44(%rsp)
- add %edx,%esi
- xor %r12d,%ebx
- rol $30,%ebp
- add %ebx,%esi
- lea 0x5a827999(%eax,%r12d),%edx
- mov %ebp,%ebx
- mov 48(%r9),%eax
- mov %esi,%r12d
- xor %r11d,%ebx
- bswap %eax
- rol $5,%r12d
- and %edi,%ebx
- mov %eax,48(%rsp)
- add %r12d,%edx
- xor %r11d,%ebx
- rol $30,%edi
- add %ebx,%edx
- lea 0x5a827999(%eax,%r11d),%r12d
- mov %edi,%ebx
- mov 52(%r9),%eax
- mov %edx,%r11d
- xor %ebp,%ebx
- bswap %eax
- rol $5,%r11d
- and %esi,%ebx
- mov %eax,52(%rsp)
- add %r11d,%r12d
- xor %ebp,%ebx
- rol $30,%esi
- add %ebx,%r12d
- lea 0x5a827999(%eax,%ebp),%r11d
- mov %esi,%ebx
- mov 56(%r9),%eax
- mov %r12d,%ebp
- xor %edi,%ebx
- bswap %eax
- rol $5,%ebp
- and %edx,%ebx
- mov %eax,56(%rsp)
- add %ebp,%r11d
- xor %edi,%ebx
- rol $30,%edx
- add %ebx,%r11d
- lea 0x5a827999(%eax,%edi),%ebp
- mov %edx,%ebx
- mov 60(%r9),%eax
- mov %r11d,%edi
- xor %esi,%ebx
- bswap %eax
- rol $5,%edi
- and %r12d,%ebx
- mov %eax,60(%rsp)
- add %edi,%ebp
- xor %esi,%ebx
- rol $30,%r12d
- add %ebx,%ebp
- lea 0x5a827999(%eax,%esi),%edi
- mov 0(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 8(%rsp),%eax
- xor %edx,%ebx
- rol $5,%esi
- xor 32(%rsp),%eax
- and %r11d,%ebx
- add %esi,%edi
- xor 52(%rsp),%eax
- xor %edx,%ebx
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,0(%rsp)
- lea 0x5a827999(%eax,%edx),%esi
- mov 4(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 12(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edx
- xor 36(%rsp),%eax
- and %ebp,%ebx
- add %edx,%esi
- xor 56(%rsp),%eax
- xor %r12d,%ebx
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,4(%rsp)
- lea 0x5a827999(%eax,%r12d),%edx
- mov 8(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 16(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%r12d
- xor 40(%rsp),%eax
- and %edi,%ebx
- add %r12d,%edx
- xor 60(%rsp),%eax
- xor %r11d,%ebx
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,8(%rsp)
- lea 0x5a827999(%eax,%r11d),%r12d
- mov 12(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 20(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%r11d
- xor 44(%rsp),%eax
- and %esi,%ebx
- add %r11d,%r12d
- xor 0(%rsp),%eax
- xor %ebp,%ebx
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,12(%rsp)
- lea 0x5a827999(%eax,%ebp),%r11d
- mov 16(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 24(%rsp),%eax
- xor %edi,%ebx
- rol $5,%ebp
- xor 48(%rsp),%eax
- and %edx,%ebx
- add %ebp,%r11d
- xor 4(%rsp),%eax
- xor %edi,%ebx
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,16(%rsp)
- lea 0x6ed9eba1(%eax,%edi),%ebp
- mov 20(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 28(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 52(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 8(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,20(%rsp)
- lea 0x6ed9eba1(%eax,%esi),%edi
- mov 24(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 32(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 56(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 12(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,24(%rsp)
- lea 0x6ed9eba1(%eax,%edx),%esi
- mov 28(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 36(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 60(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 16(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,28(%rsp)
- lea 0x6ed9eba1(%eax,%r12d),%edx
- mov 32(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 40(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 0(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 20(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,32(%rsp)
- lea 0x6ed9eba1(%eax,%r11d),%r12d
- mov 36(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 44(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 4(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 24(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,36(%rsp)
- lea 0x6ed9eba1(%eax,%ebp),%r11d
- mov 40(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 48(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 8(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 28(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,40(%rsp)
- lea 0x6ed9eba1(%eax,%edi),%ebp
- mov 44(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 52(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 12(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 32(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,44(%rsp)
- lea 0x6ed9eba1(%eax,%esi),%edi
- mov 48(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 56(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 16(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 36(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,48(%rsp)
- lea 0x6ed9eba1(%eax,%edx),%esi
- mov 52(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 60(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 20(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 40(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,52(%rsp)
- lea 0x6ed9eba1(%eax,%r12d),%edx
- mov 56(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 0(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 24(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 44(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,56(%rsp)
- lea 0x6ed9eba1(%eax,%r11d),%r12d
- mov 60(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 4(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 28(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 48(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,60(%rsp)
- lea 0x6ed9eba1(%eax,%ebp),%r11d
- mov 0(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 8(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 32(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 52(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,0(%rsp)
- lea 0x6ed9eba1(%eax,%edi),%ebp
- mov 4(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 12(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 36(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 56(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,4(%rsp)
- lea 0x6ed9eba1(%eax,%esi),%edi
- mov 8(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 16(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 40(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 60(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,8(%rsp)
- lea 0x6ed9eba1(%eax,%edx),%esi
- mov 12(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 20(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 44(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 0(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,12(%rsp)
- lea 0x6ed9eba1(%eax,%r12d),%edx
- mov 16(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 24(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 48(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 4(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,16(%rsp)
- lea 0x6ed9eba1(%eax,%r11d),%r12d
- mov 20(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 28(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 52(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 8(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,20(%rsp)
- lea 0x6ed9eba1(%eax,%ebp),%r11d
- mov 24(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 32(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 56(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 12(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,24(%rsp)
- lea 0x6ed9eba1(%eax,%edi),%ebp
- mov 28(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 36(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 60(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 16(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,28(%rsp)
- lea 0x6ed9eba1(%eax,%esi),%edi
- mov 32(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 40(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 0(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 20(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,32(%rsp)
- lea -0x70e44324(%eax,%edx),%esi
- mov 36(%rsp),%eax
- mov %ebp,%ebx
- mov %ebp,%ecx
- xor 44(%rsp),%eax
- mov %edi,%edx
- and %r11d,%ebx
- xor 4(%rsp),%eax
- or %r11d,%ecx
- rol $5,%edx
- xor 24(%rsp),%eax
- and %r12d,%ecx
- add %edx,%esi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%ebp
- mov %eax,36(%rsp)
- add %ebx,%esi
- lea -0x70e44324(%eax,%r12d),%edx
- mov 40(%rsp),%eax
- mov %edi,%ebx
- mov %edi,%ecx
- xor 48(%rsp),%eax
- mov %esi,%r12d
- and %ebp,%ebx
- xor 8(%rsp),%eax
- or %ebp,%ecx
- rol $5,%r12d
- xor 28(%rsp),%eax
- and %r11d,%ecx
- add %r12d,%edx
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edi
- mov %eax,40(%rsp)
- add %ebx,%edx
- lea -0x70e44324(%eax,%r11d),%r12d
- mov 44(%rsp),%eax
- mov %esi,%ebx
- mov %esi,%ecx
- xor 52(%rsp),%eax
- mov %edx,%r11d
- and %edi,%ebx
- xor 12(%rsp),%eax
- or %edi,%ecx
- rol $5,%r11d
- xor 32(%rsp),%eax
- and %ebp,%ecx
- add %r11d,%r12d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%esi
- mov %eax,44(%rsp)
- add %ebx,%r12d
- lea -0x70e44324(%eax,%ebp),%r11d
- mov 48(%rsp),%eax
- mov %edx,%ebx
- mov %edx,%ecx
- xor 56(%rsp),%eax
- mov %r12d,%ebp
- and %esi,%ebx
- xor 16(%rsp),%eax
- or %esi,%ecx
- rol $5,%ebp
- xor 36(%rsp),%eax
- and %edi,%ecx
- add %ebp,%r11d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edx
- mov %eax,48(%rsp)
- add %ebx,%r11d
- lea -0x70e44324(%eax,%edi),%ebp
- mov 52(%rsp),%eax
- mov %r12d,%ebx
- mov %r12d,%ecx
- xor 60(%rsp),%eax
- mov %r11d,%edi
- and %edx,%ebx
- xor 20(%rsp),%eax
- or %edx,%ecx
- rol $5,%edi
- xor 40(%rsp),%eax
- and %esi,%ecx
- add %edi,%ebp
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r12d
- mov %eax,52(%rsp)
- add %ebx,%ebp
- lea -0x70e44324(%eax,%esi),%edi
- mov 56(%rsp),%eax
- mov %r11d,%ebx
- mov %r11d,%ecx
- xor 0(%rsp),%eax
- mov %ebp,%esi
- and %r12d,%ebx
- xor 24(%rsp),%eax
- or %r12d,%ecx
- rol $5,%esi
- xor 44(%rsp),%eax
- and %edx,%ecx
- add %esi,%edi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r11d
- mov %eax,56(%rsp)
- add %ebx,%edi
- lea -0x70e44324(%eax,%edx),%esi
- mov 60(%rsp),%eax
- mov %ebp,%ebx
- mov %ebp,%ecx
- xor 4(%rsp),%eax
- mov %edi,%edx
- and %r11d,%ebx
- xor 28(%rsp),%eax
- or %r11d,%ecx
- rol $5,%edx
- xor 48(%rsp),%eax
- and %r12d,%ecx
- add %edx,%esi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%ebp
- mov %eax,60(%rsp)
- add %ebx,%esi
- lea -0x70e44324(%eax,%r12d),%edx
- mov 0(%rsp),%eax
- mov %edi,%ebx
- mov %edi,%ecx
- xor 8(%rsp),%eax
- mov %esi,%r12d
- and %ebp,%ebx
- xor 32(%rsp),%eax
- or %ebp,%ecx
- rol $5,%r12d
- xor 52(%rsp),%eax
- and %r11d,%ecx
- add %r12d,%edx
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edi
- mov %eax,0(%rsp)
- add %ebx,%edx
- lea -0x70e44324(%eax,%r11d),%r12d
- mov 4(%rsp),%eax
- mov %esi,%ebx
- mov %esi,%ecx
- xor 12(%rsp),%eax
- mov %edx,%r11d
- and %edi,%ebx
- xor 36(%rsp),%eax
- or %edi,%ecx
- rol $5,%r11d
- xor 56(%rsp),%eax
- and %ebp,%ecx
- add %r11d,%r12d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%esi
- mov %eax,4(%rsp)
- add %ebx,%r12d
- lea -0x70e44324(%eax,%ebp),%r11d
- mov 8(%rsp),%eax
- mov %edx,%ebx
- mov %edx,%ecx
- xor 16(%rsp),%eax
- mov %r12d,%ebp
- and %esi,%ebx
- xor 40(%rsp),%eax
- or %esi,%ecx
- rol $5,%ebp
- xor 60(%rsp),%eax
- and %edi,%ecx
- add %ebp,%r11d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edx
- mov %eax,8(%rsp)
- add %ebx,%r11d
- lea -0x70e44324(%eax,%edi),%ebp
- mov 12(%rsp),%eax
- mov %r12d,%ebx
- mov %r12d,%ecx
- xor 20(%rsp),%eax
- mov %r11d,%edi
- and %edx,%ebx
- xor 44(%rsp),%eax
- or %edx,%ecx
- rol $5,%edi
- xor 0(%rsp),%eax
- and %esi,%ecx
- add %edi,%ebp
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r12d
- mov %eax,12(%rsp)
- add %ebx,%ebp
- lea -0x70e44324(%eax,%esi),%edi
- mov 16(%rsp),%eax
- mov %r11d,%ebx
- mov %r11d,%ecx
- xor 24(%rsp),%eax
- mov %ebp,%esi
- and %r12d,%ebx
- xor 48(%rsp),%eax
- or %r12d,%ecx
- rol $5,%esi
- xor 4(%rsp),%eax
- and %edx,%ecx
- add %esi,%edi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r11d
- mov %eax,16(%rsp)
- add %ebx,%edi
- lea -0x70e44324(%eax,%edx),%esi
- mov 20(%rsp),%eax
- mov %ebp,%ebx
- mov %ebp,%ecx
- xor 28(%rsp),%eax
- mov %edi,%edx
- and %r11d,%ebx
- xor 52(%rsp),%eax
- or %r11d,%ecx
- rol $5,%edx
- xor 8(%rsp),%eax
- and %r12d,%ecx
- add %edx,%esi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%ebp
- mov %eax,20(%rsp)
- add %ebx,%esi
- lea -0x70e44324(%eax,%r12d),%edx
- mov 24(%rsp),%eax
- mov %edi,%ebx
- mov %edi,%ecx
- xor 32(%rsp),%eax
- mov %esi,%r12d
- and %ebp,%ebx
- xor 56(%rsp),%eax
- or %ebp,%ecx
- rol $5,%r12d
- xor 12(%rsp),%eax
- and %r11d,%ecx
- add %r12d,%edx
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edi
- mov %eax,24(%rsp)
- add %ebx,%edx
- lea -0x70e44324(%eax,%r11d),%r12d
- mov 28(%rsp),%eax
- mov %esi,%ebx
- mov %esi,%ecx
- xor 36(%rsp),%eax
- mov %edx,%r11d
- and %edi,%ebx
- xor 60(%rsp),%eax
- or %edi,%ecx
- rol $5,%r11d
- xor 16(%rsp),%eax
- and %ebp,%ecx
- add %r11d,%r12d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%esi
- mov %eax,28(%rsp)
- add %ebx,%r12d
- lea -0x70e44324(%eax,%ebp),%r11d
- mov 32(%rsp),%eax
- mov %edx,%ebx
- mov %edx,%ecx
- xor 40(%rsp),%eax
- mov %r12d,%ebp
- and %esi,%ebx
- xor 0(%rsp),%eax
- or %esi,%ecx
- rol $5,%ebp
- xor 20(%rsp),%eax
- and %edi,%ecx
- add %ebp,%r11d
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edx
- mov %eax,32(%rsp)
- add %ebx,%r11d
- lea -0x70e44324(%eax,%edi),%ebp
- mov 36(%rsp),%eax
- mov %r12d,%ebx
- mov %r12d,%ecx
- xor 44(%rsp),%eax
- mov %r11d,%edi
- and %edx,%ebx
- xor 4(%rsp),%eax
- or %edx,%ecx
- rol $5,%edi
- xor 24(%rsp),%eax
- and %esi,%ecx
- add %edi,%ebp
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r12d
- mov %eax,36(%rsp)
- add %ebx,%ebp
- lea -0x70e44324(%eax,%esi),%edi
- mov 40(%rsp),%eax
- mov %r11d,%ebx
- mov %r11d,%ecx
- xor 48(%rsp),%eax
- mov %ebp,%esi
- and %r12d,%ebx
- xor 8(%rsp),%eax
- or %r12d,%ecx
- rol $5,%esi
- xor 28(%rsp),%eax
- and %edx,%ecx
- add %esi,%edi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%r11d
- mov %eax,40(%rsp)
- add %ebx,%edi
- lea -0x70e44324(%eax,%edx),%esi
- mov 44(%rsp),%eax
- mov %ebp,%ebx
- mov %ebp,%ecx
- xor 52(%rsp),%eax
- mov %edi,%edx
- and %r11d,%ebx
- xor 12(%rsp),%eax
- or %r11d,%ecx
- rol $5,%edx
- xor 32(%rsp),%eax
- and %r12d,%ecx
- add %edx,%esi
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%ebp
- mov %eax,44(%rsp)
- add %ebx,%esi
- lea -0x70e44324(%eax,%r12d),%edx
- mov 48(%rsp),%eax
- mov %edi,%ebx
- mov %edi,%ecx
- xor 56(%rsp),%eax
- mov %esi,%r12d
- and %ebp,%ebx
- xor 16(%rsp),%eax
- or %ebp,%ecx
- rol $5,%r12d
- xor 36(%rsp),%eax
- and %r11d,%ecx
- add %r12d,%edx
- rol $1,%eax
- or %ecx,%ebx
- rol $30,%edi
- mov %eax,48(%rsp)
- add %ebx,%edx
- lea -0x359d3e2a(%eax,%r11d),%r12d
- mov 52(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 60(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 20(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 40(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,52(%rsp)
- lea -0x359d3e2a(%eax,%ebp),%r11d
- mov 56(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 0(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 24(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 44(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,56(%rsp)
- lea -0x359d3e2a(%eax,%edi),%ebp
- mov 60(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 4(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 28(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 48(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,60(%rsp)
- lea -0x359d3e2a(%eax,%esi),%edi
- mov 0(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 8(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 32(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 52(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,0(%rsp)
- lea -0x359d3e2a(%eax,%edx),%esi
- mov 4(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 12(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 36(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 56(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,4(%rsp)
- lea -0x359d3e2a(%eax,%r12d),%edx
- mov 8(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 16(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 40(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 60(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,8(%rsp)
- lea -0x359d3e2a(%eax,%r11d),%r12d
- mov 12(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 20(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 44(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 0(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,12(%rsp)
- lea -0x359d3e2a(%eax,%ebp),%r11d
- mov 16(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 24(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 48(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 4(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,16(%rsp)
- lea -0x359d3e2a(%eax,%edi),%ebp
- mov 20(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 28(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 52(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 8(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,20(%rsp)
- lea -0x359d3e2a(%eax,%esi),%edi
- mov 24(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 32(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 56(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 12(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,24(%rsp)
- lea -0x359d3e2a(%eax,%edx),%esi
- mov 28(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 36(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 60(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 16(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- mov %eax,28(%rsp)
- lea -0x359d3e2a(%eax,%r12d),%edx
- mov 32(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 40(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 0(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 20(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- mov %eax,32(%rsp)
- lea -0x359d3e2a(%eax,%r11d),%r12d
- mov 36(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 44(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 4(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 24(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- mov %eax,36(%rsp)
- lea -0x359d3e2a(%eax,%ebp),%r11d
- mov 40(%rsp),%eax
- mov %esi,%ebx
- mov %r12d,%ebp
- xor 48(%rsp),%eax
- xor %edx,%ebx
- rol $5,%ebp
- xor 8(%rsp),%eax
- xor %edi,%ebx
- add %ebp,%r11d
- xor 28(%rsp),%eax
- rol $30,%edx
- add %ebx,%r11d
- rol $1,%eax
- mov %eax,40(%rsp)
- lea -0x359d3e2a(%eax,%edi),%ebp
- mov 44(%rsp),%eax
- mov %edx,%ebx
- mov %r11d,%edi
- xor 52(%rsp),%eax
- xor %r12d,%ebx
- rol $5,%edi
- xor 12(%rsp),%eax
- xor %esi,%ebx
- add %edi,%ebp
- xor 32(%rsp),%eax
- rol $30,%r12d
- add %ebx,%ebp
- rol $1,%eax
- mov %eax,44(%rsp)
- lea -0x359d3e2a(%eax,%esi),%edi
- mov 48(%rsp),%eax
- mov %r12d,%ebx
- mov %ebp,%esi
- xor 56(%rsp),%eax
- xor %r11d,%ebx
- rol $5,%esi
- xor 16(%rsp),%eax
- xor %edx,%ebx
- add %esi,%edi
- xor 36(%rsp),%eax
- rol $30,%r11d
- add %ebx,%edi
- rol $1,%eax
- mov %eax,48(%rsp)
- lea -0x359d3e2a(%eax,%edx),%esi
- mov 52(%rsp),%eax
- mov %r11d,%ebx
- mov %edi,%edx
- xor 60(%rsp),%eax
- xor %ebp,%ebx
- rol $5,%edx
- xor 20(%rsp),%eax
- xor %r12d,%ebx
- add %edx,%esi
- xor 40(%rsp),%eax
- rol $30,%ebp
- add %ebx,%esi
- rol $1,%eax
- lea -0x359d3e2a(%eax,%r12d),%edx
- mov 56(%rsp),%eax
- mov %ebp,%ebx
- mov %esi,%r12d
- xor 0(%rsp),%eax
- xor %edi,%ebx
- rol $5,%r12d
- xor 24(%rsp),%eax
- xor %r11d,%ebx
- add %r12d,%edx
- xor 44(%rsp),%eax
- rol $30,%edi
- add %ebx,%edx
- rol $1,%eax
- lea -0x359d3e2a(%eax,%r11d),%r12d
- mov 60(%rsp),%eax
- mov %edi,%ebx
- mov %edx,%r11d
- xor 4(%rsp),%eax
- xor %esi,%ebx
- rol $5,%r11d
- xor 28(%rsp),%eax
- xor %ebp,%ebx
- add %r11d,%r12d
- xor 48(%rsp),%eax
- rol $30,%esi
- add %ebx,%r12d
- rol $1,%eax
- lea -0x359d3e2a(%eax,%ebp),%r11d
- mov %esi,%ebx
- mov %r12d,%ebp
- xor %edx,%ebx
- rol $5,%ebp
- xor %edi,%ebx
- add %ebp,%r11d
- rol $30,%edx
- add %ebx,%r11d
- // Update and save state information in SHA-1 context
- add 0(%r8),%r11d
- add 4(%r8),%r12d
- add 8(%r8),%edx
- add 12(%r8),%esi
- add 16(%r8),%edi
- mov %r11d,0(%r8)
- mov %r12d,4(%r8)
- mov %edx,8(%r8)
- mov %esi,12(%r8)
- mov %edi,16(%r8)
-
- xchg %r11d,%edx # mov %r11d,%edx
- xchg %r12d,%esi # mov %r12d,%esi
- xchg %r11d,%edi # mov %edx,%edi
- xchg %r12d,%ebp # mov %esi,%ebp
- # mov %edi,%r11d
- lea 64(%r9),%r9
- sub $1,%r10
- jnz .Lloop
- mov 64(%rsp),%rsp
- pop %r12
- pop %rbp
- pop %rbx
- ret
-SET_SIZE(sha1_block_data_order)
-
-.data
-.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by <appro@openssl.org>"
-
-#endif /* lint || __lint */
-
-#ifdef __ELF__
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
index 766b75355f0b..31da7f9767df 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
@@ -83,12 +83,21 @@ SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
#include <sys/asm_linkage.h>
ENTRY_NP(SHA256TransformBlocks)
+.cfi_startproc
+ movq %rsp, %rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_offset %rbx,-16
push %rbp
+.cfi_offset %rbp,-24
push %r12
+.cfi_offset %r12,-32
push %r13
+.cfi_offset %r13,-40
push %r14
+.cfi_offset %r14,-48
push %r15
+.cfi_offset %r15,-56
mov %rsp,%rbp # copy %rsp
shl $4,%rdx # num*16
sub $16*4+4*8,%rsp
@@ -99,6 +108,9 @@ ENTRY_NP(SHA256TransformBlocks)
mov %rsi,16*4+1*8(%rsp) # save inp, 2nd arg
mov %rdx,16*4+2*8(%rsp) # save end pointer, "3rd" arg
mov %rbp,16*4+3*8(%rsp) # save copy of %rsp
+# echo ".cfi_cfa_expression %rsp+88,deref,+56" |
+# openssl/crypto/perlasm/x86_64-xlate.pl
+.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38
#.picmeup %rbp
# The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
@@ -2026,14 +2038,28 @@ ENTRY_NP(SHA256TransformBlocks)
jb .Lloop
mov 16*4+3*8(%rsp),%rsp
+.cfi_def_cfa %rsp,56
pop %r15
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r15
pop %r14
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r14
pop %r13
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r13
pop %r12
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r12
pop %rbp
+.cfi_adjust_cfa_offset -8
+.cfi_restore %rbp
pop %rbx
+.cfi_adjust_cfa_offset -8
+.cfi_restore %rbx
- ret
+ RET
+.cfi_endproc
SET_SIZE(SHA256TransformBlocks)
.data
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
index 6e37618761b2..c2ba18538e33 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
@@ -84,12 +84,21 @@ SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
#include <sys/asm_linkage.h>
ENTRY_NP(SHA512TransformBlocks)
+.cfi_startproc
+ movq %rsp, %rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_offset %rbx,-16
push %rbp
+.cfi_offset %rbp,-24
push %r12
+.cfi_offset %r12,-32
push %r13
+.cfi_offset %r13,-40
push %r14
+.cfi_offset %r14,-48
push %r15
+.cfi_offset %r15,-56
mov %rsp,%rbp # copy %rsp
shl $4,%rdx # num*16
sub $16*8+4*8,%rsp
@@ -100,6 +109,9 @@ ENTRY_NP(SHA512TransformBlocks)
mov %rsi,16*8+1*8(%rsp) # save inp, 2nd arg
mov %rdx,16*8+2*8(%rsp) # save end pointer, "3rd" arg
mov %rbp,16*8+3*8(%rsp) # save copy of %rsp
+# echo ".cfi_cfa_expression %rsp+152,deref,+56" |
+# openssl/crypto/perlasm/x86_64-xlate.pl
+.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38
#.picmeup %rbp
# The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
@@ -2027,14 +2039,28 @@ ENTRY_NP(SHA512TransformBlocks)
jb .Lloop
mov 16*8+3*8(%rsp),%rsp
+.cfi_def_cfa %rsp,56
pop %r15
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r15
pop %r14
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r14
pop %r13
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r13
pop %r12
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r12
pop %rbp
+.cfi_adjust_cfa_offset -8
+.cfi_restore %rbp
pop %rbx
+.cfi_adjust_cfa_offset -8
+.cfi_restore %rbx
- ret
+ RET
+.cfi_endproc
SET_SIZE(SHA512TransformBlocks)
.data
diff --git a/sys/contrib/openzfs/module/icp/illumos-crypto.c b/sys/contrib/openzfs/module/icp/illumos-crypto.c
index 3c5ef4393940..cc990f1aeb58 100644
--- a/sys/contrib/openzfs/module/icp/illumos-crypto.c
+++ b/sys/contrib/openzfs/module/icp/illumos-crypto.c
@@ -111,7 +111,6 @@ icp_fini(void)
{
skein_mod_fini();
sha2_mod_fini();
- sha1_mod_fini();
edonr_mod_fini();
aes_mod_fini();
kcf_sched_destroy();
@@ -142,7 +141,6 @@ icp_init(void)
/* initialize algorithms */
aes_mod_init();
edonr_mod_init();
- sha1_mod_init();
sha2_mod_init();
skein_mod_init();
diff --git a/sys/contrib/openzfs/module/icp/include/sha1/sha1.h b/sys/contrib/openzfs/module/icp/include/sha1/sha1.h
deleted file mode 100644
index 251b64fcaeee..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sha1/sha1.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SHA1_H
-#define _SYS_SHA1_H
-
-#include <sys/types.h> /* for uint_* */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * NOTE: n2rng (Niagara2 RNG driver) accesses the state field of
- * SHA1_CTX directly. NEVER change this structure without verifying
- * compatibility with n2rng. The important thing is that the state
- * must be in a field declared as uint32_t state[5].
- */
-/* SHA-1 context. */
-typedef struct {
- uint32_t state[5]; /* state (ABCDE) */
- uint32_t count[2]; /* number of bits, modulo 2^64 (msb first) */
- union {
- uint8_t buf8[64]; /* undigested input */
- uint32_t buf32[16]; /* realigned input */
- } buf_un;
-} SHA1_CTX;
-
-#define SHA1_DIGEST_LENGTH 20
-
-void SHA1Init(SHA1_CTX *);
-void SHA1Update(SHA1_CTX *, const void *, size_t);
-void SHA1Final(void *, SHA1_CTX *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA1_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h b/sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h
deleted file mode 100644
index 848d25ef050f..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 1998, by Sun Microsystems, Inc.
- * All rights reserved.
- */
-
-#ifndef _SYS_SHA1_CONSTS_H
-#define _SYS_SHA1_CONSTS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * as explained in sha1.c, loading 32-bit constants on a sparc is expensive
- * since it involves both a `sethi' and an `or'. thus, we instead use `ld'
- * to load the constants from an array called `sha1_consts'. however, on
- * intel (and perhaps other processors), it is cheaper to load the constant
- * directly. thus, the c code in SHA1Transform() uses the macro SHA1_CONST()
- * which either expands to a constant or an array reference, depending on
- * the architecture the code is being compiled for.
- */
-
-#include <sys/types.h> /* uint32_t */
-
-extern const uint32_t sha1_consts[];
-
-#if defined(__sparc)
-#define SHA1_CONST(x) (sha1_consts[x])
-#else
-#define SHA1_CONST(x) (SHA1_CONST_ ## x)
-#endif
-
-/* constants, as provided in FIPS 180-1 */
-
-#define SHA1_CONST_0 0x5a827999U
-#define SHA1_CONST_1 0x6ed9eba1U
-#define SHA1_CONST_2 0x8f1bbcdcU
-#define SHA1_CONST_3 0xca62c1d6U
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA1_CONSTS_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h b/sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h
deleted file mode 100644
index 1c1f8728f9b5..000000000000
--- a/sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SHA1_IMPL_H
-#define _SHA1_IMPL_H
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define SHA1_HASH_SIZE 20 /* SHA_1 digest length in bytes */
-#define SHA1_DIGEST_LENGTH 20 /* SHA1 digest length in bytes */
-#define SHA1_HMAC_BLOCK_SIZE 64 /* SHA1-HMAC block size */
-#define SHA1_HMAC_MIN_KEY_LEN 1 /* SHA1-HMAC min key length in bytes */
-#define SHA1_HMAC_MAX_KEY_LEN INT_MAX /* SHA1-HMAC max key length in bytes */
-#define SHA1_HMAC_INTS_PER_BLOCK (SHA1_HMAC_BLOCK_SIZE/sizeof (uint32_t))
-
-/*
- * CSPI information (entry points, provider info, etc.)
- */
-typedef enum sha1_mech_type {
- SHA1_MECH_INFO_TYPE, /* SUN_CKM_SHA1 */
- SHA1_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA1_HMAC */
- SHA1_HMAC_GEN_MECH_INFO_TYPE /* SUN_CKM_SHA1_HMAC_GENERAL */
-} sha1_mech_type_t;
-
-/*
- * Context for SHA1 mechanism.
- */
-typedef struct sha1_ctx {
- sha1_mech_type_t sc_mech_type; /* type of context */
- SHA1_CTX sc_sha1_ctx; /* SHA1 context */
-} sha1_ctx_t;
-
-/*
- * Context for SHA1-HMAC and SHA1-HMAC-GENERAL mechanisms.
- */
-typedef struct sha1_hmac_ctx {
- sha1_mech_type_t hc_mech_type; /* type of context */
- uint32_t hc_digest_len; /* digest len in bytes */
- SHA1_CTX hc_icontext; /* inner SHA1 context */
- SHA1_CTX hc_ocontext; /* outer SHA1 context */
-} sha1_hmac_ctx_t;
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SHA1_IMPL_H */
diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h
index f2dae7093b94..876e21e5f1b1 100644
--- a/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h
+++ b/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h
@@ -30,6 +30,12 @@
#include <sys/stack.h>
#include <sys/trap.h>
+#if defined(__linux__) && defined(CONFIG_SLS)
+#define RET ret; int3
+#else
+#define RET ret
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/sys/contrib/openzfs/module/icp/io/sha1_mod.c b/sys/contrib/openzfs/module/icp/io/sha1_mod.c
deleted file mode 100644
index 6dcee6b2ecf2..000000000000
--- a/sys/contrib/openzfs/module/icp/io/sha1_mod.c
+++ /dev/null
@@ -1,1230 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/modctl.h>
-#include <sys/crypto/common.h>
-#include <sys/crypto/icp.h>
-#include <sys/crypto/spi.h>
-
-#include <sha1/sha1.h>
-#include <sha1/sha1_impl.h>
-
-/*
- * The sha1 module is created with two modlinkages:
- * - a modlmisc that allows consumers to directly call the entry points
- * SHA1Init, SHA1Update, and SHA1Final.
- * - a modlcrypto that allows the module to register with the Kernel
- * Cryptographic Framework (KCF) as a software provider for the SHA1
- * mechanisms.
- */
-
-static struct modlcrypto modlcrypto = {
- &mod_cryptoops,
- "SHA1 Kernel SW Provider 1.1"
-};
-
-static struct modlinkage modlinkage = {
- MODREV_1, { &modlcrypto, NULL }
-};
-
-
-/*
- * Macros to access the SHA1 or SHA1-HMAC contexts from a context passed
- * by KCF to one of the entry points.
- */
-
-#define PROV_SHA1_CTX(ctx) ((sha1_ctx_t *)(ctx)->cc_provider_private)
-#define PROV_SHA1_HMAC_CTX(ctx) ((sha1_hmac_ctx_t *)(ctx)->cc_provider_private)
-
-/* to extract the digest length passed as mechanism parameter */
-#define PROV_SHA1_GET_DIGEST_LEN(m, len) { \
- if (IS_P2ALIGNED((m)->cm_param, sizeof (ulong_t))) \
- (len) = (uint32_t)*((ulong_t *)(void *)mechanism->cm_param); \
- else { \
- ulong_t tmp_ulong; \
- bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t)); \
- (len) = (uint32_t)tmp_ulong; \
- } \
-}
-
-#define PROV_SHA1_DIGEST_KEY(ctx, key, len, digest) { \
- SHA1Init(ctx); \
- SHA1Update(ctx, key, len); \
- SHA1Final(digest, ctx); \
-}
-
-/*
- * Mechanism info structure passed to KCF during registration.
- */
-static crypto_mech_info_t sha1_mech_info_tab[] = {
- /* SHA1 */
- {SUN_CKM_SHA1, SHA1_MECH_INFO_TYPE,
- CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
- 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
- /* SHA1-HMAC */
- {SUN_CKM_SHA1_HMAC, SHA1_HMAC_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
- SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES},
- /* SHA1-HMAC GENERAL */
- {SUN_CKM_SHA1_HMAC_GENERAL, SHA1_HMAC_GEN_MECH_INFO_TYPE,
- CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
- SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN,
- CRYPTO_KEYSIZE_UNIT_IN_BYTES}
-};
-
-static void sha1_provider_status(crypto_provider_handle_t, uint_t *);
-
-static crypto_control_ops_t sha1_control_ops = {
- sha1_provider_status
-};
-
-static int sha1_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
- crypto_req_handle_t);
-static int sha1_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha1_digest_update(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha1_digest_final(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha1_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
- crypto_req_handle_t);
-
-static crypto_digest_ops_t sha1_digest_ops = {
- .digest_init = sha1_digest_init,
- .digest = sha1_digest,
- .digest_update = sha1_digest_update,
- .digest_key = NULL,
- .digest_final = sha1_digest_final,
- .digest_atomic = sha1_digest_atomic
-};
-
-static int sha1_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-static int sha1_mac_update(crypto_ctx_t *, crypto_data_t *,
- crypto_req_handle_t);
-static int sha1_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
-static int sha1_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-static int sha1_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
- crypto_spi_ctx_template_t, crypto_req_handle_t);
-
-static crypto_mac_ops_t sha1_mac_ops = {
- .mac_init = sha1_mac_init,
- .mac = NULL,
- .mac_update = sha1_mac_update,
- .mac_final = sha1_mac_final,
- .mac_atomic = sha1_mac_atomic,
- .mac_verify_atomic = sha1_mac_verify_atomic
-};
-
-static int sha1_create_ctx_template(crypto_provider_handle_t,
- crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
- size_t *, crypto_req_handle_t);
-static int sha1_free_context(crypto_ctx_t *);
-
-static crypto_ctx_ops_t sha1_ctx_ops = {
- .create_ctx_template = sha1_create_ctx_template,
- .free_context = sha1_free_context
-};
-
-static crypto_ops_t sha1_crypto_ops = {{{{{
- &sha1_control_ops,
- &sha1_digest_ops,
- NULL,
- &sha1_mac_ops,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- &sha1_ctx_ops,
-}}}}};
-
-static crypto_provider_info_t sha1_prov_info = {{{{
- CRYPTO_SPI_VERSION_1,
- "SHA1 Software Provider",
- CRYPTO_SW_PROVIDER,
- NULL,
- &sha1_crypto_ops,
- sizeof (sha1_mech_info_tab)/sizeof (crypto_mech_info_t),
- sha1_mech_info_tab
-}}}};
-
-static crypto_kcf_provider_handle_t sha1_prov_handle = 0;
-
-int
-sha1_mod_init(void)
-{
- int ret;
-
- if ((ret = mod_install(&modlinkage)) != 0)
- return (ret);
-
- /*
- * Register with KCF. If the registration fails, log an
- * error but do not uninstall the module, since the functionality
- * provided by misc/sha1 should still be available.
- */
- if ((ret = crypto_register_provider(&sha1_prov_info,
- &sha1_prov_handle)) != CRYPTO_SUCCESS)
- cmn_err(CE_WARN, "sha1 _init: "
- "crypto_register_provider() failed (0x%x)", ret);
-
- return (0);
-}
-
-int
-sha1_mod_fini(void)
-{
- int ret;
-
- if (sha1_prov_handle != 0) {
- if ((ret = crypto_unregister_provider(sha1_prov_handle)) !=
- CRYPTO_SUCCESS) {
- cmn_err(CE_WARN,
- "sha1 _fini: crypto_unregister_provider() "
- "failed (0x%x)", ret);
- return (EBUSY);
- }
- sha1_prov_handle = 0;
- }
-
- return (mod_remove(&modlinkage));
-}
-
-/*
- * KCF software provider control entry points.
- */
-/* ARGSUSED */
-static void
-sha1_provider_status(crypto_provider_handle_t provider, uint_t *status)
-{
- *status = CRYPTO_PROVIDER_READY;
-}
-
-/*
- * KCF software provider digest entry points.
- */
-
-static int
-sha1_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
- crypto_req_handle_t req)
-{
- if (mechanism->cm_type != SHA1_MECH_INFO_TYPE)
- return (CRYPTO_MECHANISM_INVALID);
-
- /*
- * Allocate and initialize SHA1 context.
- */
- ctx->cc_provider_private = kmem_alloc(sizeof (sha1_ctx_t),
- crypto_kmflag(req));
- if (ctx->cc_provider_private == NULL)
- return (CRYPTO_HOST_MEMORY);
-
- PROV_SHA1_CTX(ctx)->sc_mech_type = SHA1_MECH_INFO_TYPE;
- SHA1Init(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
-
- return (CRYPTO_SUCCESS);
-}
-
-/*
- * Helper SHA1 digest update function for uio data.
- */
-static int
-sha1_digest_update_uio(SHA1_CTX *sha1_ctx, crypto_data_t *data)
-{
- off_t offset = data->cd_offset;
- size_t length = data->cd_length;
- uint_t vec_idx = 0;
- size_t cur_len;
-
- /* we support only kernel buffer */
- if (zfs_uio_segflg(data->cd_uio) != UIO_SYSSPACE)
- return (CRYPTO_ARGUMENTS_BAD);
-
- /*
- * Jump to the first iovec containing data to be
- * digested.
- */
- offset = zfs_uio_index_at_offset(data->cd_uio, offset, &vec_idx);
- if (vec_idx == zfs_uio_iovcnt(data->cd_uio)) {
- /*
- * The caller specified an offset that is larger than the
- * total size of the buffers it provided.
- */
- return (CRYPTO_DATA_LEN_RANGE);
- }
-
- /*
- * Now do the digesting on the iovecs.
- */
- while (vec_idx < zfs_uio_iovcnt(data->cd_uio) && length > 0) {
- cur_len = MIN(zfs_uio_iovlen(data->cd_uio, vec_idx) -
- offset, length);
-
- SHA1Update(sha1_ctx,
- (uint8_t *)zfs_uio_iovbase(data->cd_uio, vec_idx) + offset,
- cur_len);
-
- length -= cur_len;
- vec_idx++;
- offset = 0;
- }
-
- if (vec_idx == zfs_uio_iovcnt(data->cd_uio) && length > 0) {
- /*
- * The end of the specified iovec's was reached but
- * the length requested could not be processed, i.e.
- * The caller requested to digest more data than it provided.
- */
- return (CRYPTO_DATA_LEN_RANGE);
- }
-
- return (CRYPTO_SUCCESS);
-}
-
-/*
- * Helper SHA1 digest final function for uio data.
- * digest_len is the length of the desired digest. If digest_len
- * is smaller than the default SHA1 digest length, the caller
- * must pass a scratch buffer, digest_scratch, which must
- * be at least SHA1_DIGEST_LENGTH bytes.
- */
-static int
-sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest,
- ulong_t digest_len, uchar_t *digest_scratch)
-{
- off_t offset = digest->cd_offset;
- uint_t vec_idx = 0;
-
- /* we support only kernel buffer */
- if (zfs_uio_segflg(digest->cd_uio) != UIO_SYSSPACE)
- return (CRYPTO_ARGUMENTS_BAD);
-
- /*
- * Jump to the first iovec containing ptr to the digest to
- * be returned.
- */
- offset = zfs_uio_index_at_offset(digest->cd_uio, offset, &vec_idx);
- if (vec_idx == zfs_uio_iovcnt(digest->cd_uio)) {
- /*
- * The caller specified an offset that is
- * larger than the total size of the buffers
- * it provided.
- */
- return (CRYPTO_DATA_LEN_RANGE);
- }
-
- if (offset + digest_len <=
- zfs_uio_iovlen(digest->cd_uio, vec_idx)) {
- /*
- * The computed SHA1 digest will fit in the current
- * iovec.
- */
- if (digest_len != SHA1_DIGEST_LENGTH) {
- /*
- * The caller requested a short digest. Digest
- * into a scratch buffer and return to
- * the user only what was requested.
- */
- SHA1Final(digest_scratch, sha1_ctx);
- bcopy(digest_scratch, (uchar_t *)
- zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
- digest_len);
- } else {
- SHA1Final((uchar_t *)zfs_uio_iovbase(digest->
- cd_uio, vec_idx) + offset,
- sha1_ctx);
- }
- } else {
- /*
- * The computed digest will be crossing one or more iovec's.
- * This is bad performance-wise but we need to support it.
- * Allocate a small scratch buffer on the stack and
- * copy it piece meal to the specified digest iovec's.
- */
- uchar_t digest_tmp[SHA1_DIGEST_LENGTH];
- off_t scratch_offset = 0;
- size_t length = digest_len;
- size_t cur_len;
-
- SHA1Final(digest_tmp, sha1_ctx);
-
- while (vec_idx < zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
- cur_len = MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) -
- offset, length);
- bcopy(digest_tmp + scratch_offset,
- zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
- cur_len);
-
- length -= cur_len;
- vec_idx++;
- scratch_offset += cur_len;
- offset = 0;
- }
-
- if (vec_idx == zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
- /*
- * The end of the specified iovec's was reached but
- * the length requested could not be processed, i.e.
- * The caller requested to digest more data than it
- * provided.
- */
- return (CRYPTO_DATA_LEN_RANGE);
- }
- }
-
- return (CRYPTO_SUCCESS);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
- crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
-
- ASSERT(ctx->cc_provider_private != NULL);
-
- /*
- * We need to just return the length needed to store the output.
- * We should not destroy the context for the following cases.
- */
- if ((digest->cd_length == 0) ||
- (digest->cd_length < SHA1_DIGEST_LENGTH)) {
- digest->cd_length = SHA1_DIGEST_LENGTH;
- return (CRYPTO_BUFFER_TOO_SMALL);
- }
-
- /*
- * Do the SHA1 update on the specified input data.
- */
- switch (data->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
- data->cd_length);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- data);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- if (ret != CRYPTO_SUCCESS) {
- /* the update failed, free context and bail */
- kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
- ctx->cc_provider_private = NULL;
- digest->cd_length = 0;
- return (ret);
- }
-
- /*
- * Do a SHA1 final, must be done separately since the digest
- * type can be different than the input data type.
- */
- switch (digest->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Final((unsigned char *)digest->cd_raw.iov_base +
- digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- digest, SHA1_DIGEST_LENGTH, NULL);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- /* all done, free context and return */
-
- if (ret == CRYPTO_SUCCESS) {
- digest->cd_length = SHA1_DIGEST_LENGTH;
- } else {
- digest->cd_length = 0;
- }
-
- kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
- ctx->cc_provider_private = NULL;
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest_update(crypto_ctx_t *ctx, crypto_data_t *data,
- crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
-
- ASSERT(ctx->cc_provider_private != NULL);
-
- /*
- * Do the SHA1 update on the specified input data.
- */
- switch (data->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
- data->cd_length);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- data);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest,
- crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
-
- ASSERT(ctx->cc_provider_private != NULL);
-
- /*
- * We need to just return the length needed to store the output.
- * We should not destroy the context for the following cases.
- */
- if ((digest->cd_length == 0) ||
- (digest->cd_length < SHA1_DIGEST_LENGTH)) {
- digest->cd_length = SHA1_DIGEST_LENGTH;
- return (CRYPTO_BUFFER_TOO_SMALL);
- }
-
- /*
- * Do a SHA1 final.
- */
- switch (digest->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Final((unsigned char *)digest->cd_raw.iov_base +
- digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
- digest, SHA1_DIGEST_LENGTH, NULL);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- /* all done, free context and return */
-
- if (ret == CRYPTO_SUCCESS) {
- digest->cd_length = SHA1_DIGEST_LENGTH;
- } else {
- digest->cd_length = 0;
- }
-
- kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
- ctx->cc_provider_private = NULL;
-
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_digest_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
- crypto_data_t *data, crypto_data_t *digest,
- crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
- SHA1_CTX sha1_ctx;
-
- if (mechanism->cm_type != SHA1_MECH_INFO_TYPE)
- return (CRYPTO_MECHANISM_INVALID);
-
- /*
- * Do the SHA1 init.
- */
- SHA1Init(&sha1_ctx);
-
- /*
- * Do the SHA1 update on the specified input data.
- */
- switch (data->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Update(&sha1_ctx,
- (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
- data->cd_length);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_update_uio(&sha1_ctx, data);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- if (ret != CRYPTO_SUCCESS) {
- /* the update failed, bail */
- digest->cd_length = 0;
- return (ret);
- }
-
- /*
- * Do a SHA1 final, must be done separately since the digest
- * type can be different than the input data type.
- */
- switch (digest->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Final((unsigned char *)digest->cd_raw.iov_base +
- digest->cd_offset, &sha1_ctx);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_final_uio(&sha1_ctx, digest,
- SHA1_DIGEST_LENGTH, NULL);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- if (ret == CRYPTO_SUCCESS) {
- digest->cd_length = SHA1_DIGEST_LENGTH;
- } else {
- digest->cd_length = 0;
- }
-
- return (ret);
-}
-
-/*
- * KCF software provider mac entry points.
- *
- * SHA1 HMAC is: SHA1(key XOR opad, SHA1(key XOR ipad, text))
- *
- * Init:
- * The initialization routine initializes what we denote
- * as the inner and outer contexts by doing
- * - for inner context: SHA1(key XOR ipad)
- * - for outer context: SHA1(key XOR opad)
- *
- * Update:
- * Each subsequent SHA1 HMAC update will result in an
- * update of the inner context with the specified data.
- *
- * Final:
- * The SHA1 HMAC final will do a SHA1 final operation on the
- * inner context, and the resulting digest will be used
- * as the data for an update on the outer context. Last
- * but not least, a SHA1 final on the outer context will
- * be performed to obtain the SHA1 HMAC digest to return
- * to the user.
- */
-
-/*
- * Initialize a SHA1-HMAC context.
- */
-static void
-sha1_mac_init_ctx(sha1_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes)
-{
- uint32_t ipad[SHA1_HMAC_INTS_PER_BLOCK];
- uint32_t opad[SHA1_HMAC_INTS_PER_BLOCK];
- uint_t i;
-
- bzero(ipad, SHA1_HMAC_BLOCK_SIZE);
- bzero(opad, SHA1_HMAC_BLOCK_SIZE);
-
- bcopy(keyval, ipad, length_in_bytes);
- bcopy(keyval, opad, length_in_bytes);
-
- /* XOR key with ipad (0x36) and opad (0x5c) */
- for (i = 0; i < SHA1_HMAC_INTS_PER_BLOCK; i++) {
- ipad[i] ^= 0x36363636;
- opad[i] ^= 0x5c5c5c5c;
- }
-
- /* perform SHA1 on ipad */
- SHA1Init(&ctx->hc_icontext);
- SHA1Update(&ctx->hc_icontext, (uint8_t *)ipad, SHA1_HMAC_BLOCK_SIZE);
-
- /* perform SHA1 on opad */
- SHA1Init(&ctx->hc_ocontext);
- SHA1Update(&ctx->hc_ocontext, (uint8_t *)opad, SHA1_HMAC_BLOCK_SIZE);
-}
-
-/*
- */
-static int
-sha1_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
- crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
- uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
- if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
- mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
- return (CRYPTO_MECHANISM_INVALID);
-
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
- ctx->cc_provider_private = kmem_alloc(sizeof (sha1_hmac_ctx_t),
- crypto_kmflag(req));
- if (ctx->cc_provider_private == NULL)
- return (CRYPTO_HOST_MEMORY);
-
- if (ctx_template != NULL) {
- /* reuse context template */
- bcopy(ctx_template, PROV_SHA1_HMAC_CTX(ctx),
- sizeof (sha1_hmac_ctx_t));
- } else {
- /* no context template, compute context */
- if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
- uchar_t digested_key[SHA1_DIGEST_LENGTH];
- sha1_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private;
-
- /*
- * Hash the passed-in key to get a smaller key.
- * The inner context is used since it hasn't been
- * initialized yet.
- */
- PROV_SHA1_DIGEST_KEY(&hmac_ctx->hc_icontext,
- key->ck_data, keylen_in_bytes, digested_key);
- sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
- digested_key, SHA1_DIGEST_LENGTH);
- } else {
- sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
- key->ck_data, keylen_in_bytes);
- }
- }
-
- /*
- * Get the mechanism parameters, if applicable.
- */
- PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type;
- if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
- if (mechanism->cm_param == NULL ||
- mechanism->cm_param_len != sizeof (ulong_t))
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- PROV_SHA1_GET_DIGEST_LEN(mechanism,
- PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len);
- if (PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len >
- SHA1_DIGEST_LENGTH)
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- }
-
- if (ret != CRYPTO_SUCCESS) {
- bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
- kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
- ctx->cc_provider_private = NULL;
- }
-
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
-
- ASSERT(ctx->cc_provider_private != NULL);
-
- /*
- * Do a SHA1 update of the inner context using the specified
- * data.
- */
- switch (data->cd_format) {
- case CRYPTO_DATA_RAW:
- SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_icontext,
- (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
- data->cd_length);
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_update_uio(
- &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext, data);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
- uchar_t digest[SHA1_DIGEST_LENGTH];
- uint32_t digest_len = SHA1_DIGEST_LENGTH;
-
- ASSERT(ctx->cc_provider_private != NULL);
-
- if (PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type ==
- SHA1_HMAC_GEN_MECH_INFO_TYPE)
- digest_len = PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len;
-
- /*
- * We need to just return the length needed to store the output.
- * We should not destroy the context for the following cases.
- */
- if ((mac->cd_length == 0) || (mac->cd_length < digest_len)) {
- mac->cd_length = digest_len;
- return (CRYPTO_BUFFER_TOO_SMALL);
- }
-
- /*
- * Do a SHA1 final on the inner context.
- */
- SHA1Final(digest, &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext);
-
- /*
- * Do a SHA1 update on the outer context, feeding the inner
- * digest as data.
- */
- SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, digest,
- SHA1_DIGEST_LENGTH);
-
- /*
- * Do a SHA1 final on the outer context, storing the computing
- * digest in the users buffer.
- */
- switch (mac->cd_format) {
- case CRYPTO_DATA_RAW:
- if (digest_len != SHA1_DIGEST_LENGTH) {
- /*
- * The caller requested a short digest. Digest
- * into a scratch buffer and return to
- * the user only what was requested.
- */
- SHA1Final(digest,
- &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext);
- bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset, digest_len);
- } else {
- SHA1Final((unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset,
- &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext);
- }
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_final_uio(
- &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, mac,
- digest_len, digest);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- if (ret == CRYPTO_SUCCESS) {
- mac->cd_length = digest_len;
- } else {
- mac->cd_length = 0;
- }
-
- bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
- kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
- ctx->cc_provider_private = NULL;
-
- return (ret);
-}
-
-#define SHA1_MAC_UPDATE(data, ctx, ret) { \
- switch (data->cd_format) { \
- case CRYPTO_DATA_RAW: \
- SHA1Update(&(ctx).hc_icontext, \
- (uint8_t *)data->cd_raw.iov_base + \
- data->cd_offset, data->cd_length); \
- break; \
- case CRYPTO_DATA_UIO: \
- ret = sha1_digest_update_uio(&(ctx).hc_icontext, data); \
- break; \
- default: \
- ret = CRYPTO_ARGUMENTS_BAD; \
- } \
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
- crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
- uchar_t digest[SHA1_DIGEST_LENGTH];
- sha1_hmac_ctx_t sha1_hmac_ctx;
- uint32_t digest_len = SHA1_DIGEST_LENGTH;
- uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
- if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
- mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
- return (CRYPTO_MECHANISM_INVALID);
-
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
- if (ctx_template != NULL) {
- /* reuse context template */
- bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
- } else {
- /* no context template, initialize context */
- if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
- /*
- * Hash the passed-in key to get a smaller key.
- * The inner context is used since it hasn't been
- * initialized yet.
- */
- PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext,
- key->ck_data, keylen_in_bytes, digest);
- sha1_mac_init_ctx(&sha1_hmac_ctx, digest,
- SHA1_DIGEST_LENGTH);
- } else {
- sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data,
- keylen_in_bytes);
- }
- }
-
- /* get the mechanism parameters, if applicable */
- if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
- if (mechanism->cm_param == NULL ||
- mechanism->cm_param_len != sizeof (ulong_t)) {
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- goto bail;
- }
- PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len);
- if (digest_len > SHA1_DIGEST_LENGTH) {
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- goto bail;
- }
- }
-
- /* do a SHA1 update of the inner context using the specified data */
- SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret);
- if (ret != CRYPTO_SUCCESS)
- /* the update failed, free context and bail */
- goto bail;
-
- /*
- * Do a SHA1 final on the inner context.
- */
- SHA1Final(digest, &sha1_hmac_ctx.hc_icontext);
-
- /*
- * Do an SHA1 update on the outer context, feeding the inner
- * digest as data.
- */
- SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH);
-
- /*
- * Do a SHA1 final on the outer context, storing the computed
- * digest in the users buffer.
- */
- switch (mac->cd_format) {
- case CRYPTO_DATA_RAW:
- if (digest_len != SHA1_DIGEST_LENGTH) {
- /*
- * The caller requested a short digest. Digest
- * into a scratch buffer and return to
- * the user only what was requested.
- */
- SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext);
- bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset, digest_len);
- } else {
- SHA1Final((unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset, &sha1_hmac_ctx.hc_ocontext);
- }
- break;
- case CRYPTO_DATA_UIO:
- ret = sha1_digest_final_uio(&sha1_hmac_ctx.hc_ocontext, mac,
- digest_len, digest);
- break;
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- if (ret == CRYPTO_SUCCESS) {
- mac->cd_length = digest_len;
- } else {
- mac->cd_length = 0;
- }
- /* Extra paranoia: zeroize the context on the stack */
- bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
-
- return (ret);
-bail:
- bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
- mac->cd_length = 0;
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-sha1_mac_verify_atomic(crypto_provider_handle_t provider,
- crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
- crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
- crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
-{
- int ret = CRYPTO_SUCCESS;
- uchar_t digest[SHA1_DIGEST_LENGTH];
- sha1_hmac_ctx_t sha1_hmac_ctx;
- uint32_t digest_len = SHA1_DIGEST_LENGTH;
- uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
- if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
- mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
- return (CRYPTO_MECHANISM_INVALID);
-
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
- if (ctx_template != NULL) {
- /* reuse context template */
- bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
- } else {
- /* no context template, initialize context */
- if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
- /*
- * Hash the passed-in key to get a smaller key.
- * The inner context is used since it hasn't been
- * initialized yet.
- */
- PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext,
- key->ck_data, keylen_in_bytes, digest);
- sha1_mac_init_ctx(&sha1_hmac_ctx, digest,
- SHA1_DIGEST_LENGTH);
- } else {
- sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data,
- keylen_in_bytes);
- }
- }
-
- /* get the mechanism parameters, if applicable */
- if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
- if (mechanism->cm_param == NULL ||
- mechanism->cm_param_len != sizeof (ulong_t)) {
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- goto bail;
- }
- PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len);
- if (digest_len > SHA1_DIGEST_LENGTH) {
- ret = CRYPTO_MECHANISM_PARAM_INVALID;
- goto bail;
- }
- }
-
- if (mac->cd_length != digest_len) {
- ret = CRYPTO_INVALID_MAC;
- goto bail;
- }
-
- /* do a SHA1 update of the inner context using the specified data */
- SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret);
- if (ret != CRYPTO_SUCCESS)
- /* the update failed, free context and bail */
- goto bail;
-
- /* do a SHA1 final on the inner context */
- SHA1Final(digest, &sha1_hmac_ctx.hc_icontext);
-
- /*
- * Do an SHA1 update on the outer context, feeding the inner
- * digest as data.
- */
- SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH);
-
- /*
- * Do a SHA1 final on the outer context, storing the computed
- * digest in the users buffer.
- */
- SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext);
-
- /*
- * Compare the computed digest against the expected digest passed
- * as argument.
- */
-
- switch (mac->cd_format) {
-
- case CRYPTO_DATA_RAW:
- if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base +
- mac->cd_offset, digest_len) != 0)
- ret = CRYPTO_INVALID_MAC;
- break;
-
- case CRYPTO_DATA_UIO: {
- off_t offset = mac->cd_offset;
- uint_t vec_idx = 0;
- off_t scratch_offset = 0;
- size_t length = digest_len;
- size_t cur_len;
-
- /* we support only kernel buffer */
- if (zfs_uio_segflg(mac->cd_uio) != UIO_SYSSPACE)
- return (CRYPTO_ARGUMENTS_BAD);
-
- /* jump to the first iovec containing the expected digest */
- offset = zfs_uio_index_at_offset(mac->cd_uio, offset, &vec_idx);
- if (vec_idx == zfs_uio_iovcnt(mac->cd_uio)) {
- /*
- * The caller specified an offset that is
- * larger than the total size of the buffers
- * it provided.
- */
- ret = CRYPTO_DATA_LEN_RANGE;
- break;
- }
-
- /* do the comparison of computed digest vs specified one */
- while (vec_idx < zfs_uio_iovcnt(mac->cd_uio) && length > 0) {
- cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) -
- offset, length);
-
- if (bcmp(digest + scratch_offset,
- zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset,
- cur_len) != 0) {
- ret = CRYPTO_INVALID_MAC;
- break;
- }
-
- length -= cur_len;
- vec_idx++;
- scratch_offset += cur_len;
- offset = 0;
- }
- break;
- }
-
- default:
- ret = CRYPTO_ARGUMENTS_BAD;
- }
-
- bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
- return (ret);
-bail:
- bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
- mac->cd_length = 0;
- return (ret);
-}
-
-/*
- * KCF software provider context management entry points.
- */
-
-/* ARGSUSED */
-static int
-sha1_create_ctx_template(crypto_provider_handle_t provider,
- crypto_mechanism_t *mechanism, crypto_key_t *key,
- crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
- crypto_req_handle_t req)
-{
- sha1_hmac_ctx_t *sha1_hmac_ctx_tmpl;
- uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
-
- if ((mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE) &&
- (mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)) {
- return (CRYPTO_MECHANISM_INVALID);
- }
-
- /* Add support for key by attributes (RFE 4706552) */
- if (key->ck_format != CRYPTO_KEY_RAW)
- return (CRYPTO_ARGUMENTS_BAD);
-
- /*
- * Allocate and initialize SHA1 context.
- */
- sha1_hmac_ctx_tmpl = kmem_alloc(sizeof (sha1_hmac_ctx_t),
- crypto_kmflag(req));
- if (sha1_hmac_ctx_tmpl == NULL)
- return (CRYPTO_HOST_MEMORY);
-
- if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
- uchar_t digested_key[SHA1_DIGEST_LENGTH];
-
- /*
- * Hash the passed-in key to get a smaller key.
- * The inner context is used since it hasn't been
- * initialized yet.
- */
- PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx_tmpl->hc_icontext,
- key->ck_data, keylen_in_bytes, digested_key);
- sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, digested_key,
- SHA1_DIGEST_LENGTH);
- } else {
- sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, key->ck_data,
- keylen_in_bytes);
- }
-
- sha1_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type;
- *ctx_template = (crypto_spi_ctx_template_t)sha1_hmac_ctx_tmpl;
- *ctx_template_size = sizeof (sha1_hmac_ctx_t);
-
-
- return (CRYPTO_SUCCESS);
-}
-
-static int
-sha1_free_context(crypto_ctx_t *ctx)
-{
- uint_t ctx_len;
- sha1_mech_type_t mech_type;
-
- if (ctx->cc_provider_private == NULL)
- return (CRYPTO_SUCCESS);
-
- /*
- * We have to free either SHA1 or SHA1-HMAC contexts, which
- * have different lengths.
- */
-
- mech_type = PROV_SHA1_CTX(ctx)->sc_mech_type;
- if (mech_type == SHA1_MECH_INFO_TYPE)
- ctx_len = sizeof (sha1_ctx_t);
- else {
- ASSERT(mech_type == SHA1_HMAC_MECH_INFO_TYPE ||
- mech_type == SHA1_HMAC_GEN_MECH_INFO_TYPE);
- ctx_len = sizeof (sha1_hmac_ctx_t);
- }
-
- bzero(ctx->cc_provider_private, ctx_len);
- kmem_free(ctx->cc_provider_private, ctx_len);
- ctx->cc_provider_private = NULL;
-
- return (CRYPTO_SUCCESS);
-}
diff --git a/sys/contrib/openzfs/module/icp/io/skein_mod.c b/sys/contrib/openzfs/module/icp/io/skein_mod.c
index 5ee36af12bcb..8992c5895e5b 100644
--- a/sys/contrib/openzfs/module/icp/io/skein_mod.c
+++ b/sys/contrib/openzfs/module/icp/io/skein_mod.c
@@ -494,7 +494,8 @@ skein_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
*/
/*ARGSUSED*/
static int
-skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
+skein_final_nofree(crypto_ctx_t *ctx, crypto_data_t *digest,
+ crypto_req_handle_t req)
{
int error = CRYPTO_SUCCESS;
@@ -525,6 +526,17 @@ skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
else
digest->cd_length = 0;
+ return (error);
+}
+
+static int
+skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
+{
+ int error = skein_final_nofree(ctx, digest, req);
+
+ if (error == CRYPTO_BUFFER_TOO_SMALL)
+ return (error);
+
bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
kmem_free(SKEIN_CTX(ctx), sizeof (*(SKEIN_CTX(ctx))));
SKEIN_CTX_LVALUE(ctx) = NULL;
@@ -560,7 +572,7 @@ skein_digest_atomic(crypto_provider_handle_t provider,
if ((error = skein_update(&ctx, data, digest)) != CRYPTO_SUCCESS)
goto out;
- if ((error = skein_final(&ctx, data, digest)) != CRYPTO_SUCCESS)
+ if ((error = skein_final_nofree(&ctx, data, digest)) != CRYPTO_SUCCESS)
goto out;
out:
@@ -669,7 +681,7 @@ skein_mac_atomic(crypto_provider_handle_t provider,
if ((error = skein_update(&ctx, data, req)) != CRYPTO_SUCCESS)
goto errout;
- if ((error = skein_final(&ctx, mac, req)) != CRYPTO_SUCCESS)
+ if ((error = skein_final_nofree(&ctx, mac, req)) != CRYPTO_SUCCESS)
goto errout;
return (CRYPTO_SUCCESS);
diff --git a/sys/contrib/openzfs/module/lua/ldo.c b/sys/contrib/openzfs/module/lua/ldo.c
index f3c3dcb4d81a..a9835c4f571d 100644
--- a/sys/contrib/openzfs/module/lua/ldo.c
+++ b/sys/contrib/openzfs/module/lua/ldo.c
@@ -168,6 +168,13 @@ static void seterrorobj (lua_State *L, int errcode, StkId oldtop) {
L->top = oldtop + 1;
}
+/*
+ * Silence infinite recursion warning which was added to -Wall in gcc 12.1
+ */
+#if defined(HAVE_INFINITE_RECURSION)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winfinite-recursion"
+#endif
l_noret luaD_throw (lua_State *L, int errcode) {
if (L->errorJmp) { /* thread has an error handler? */
@@ -190,6 +197,10 @@ l_noret luaD_throw (lua_State *L, int errcode) {
}
}
+#if defined(HAVE_INFINITE_RECURSION)
+#pragma GCC diagnostic pop
+#endif
+
int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud) {
unsigned short oldnCcalls = L->nCcalls;
@@ -395,7 +406,7 @@ int luaD_precall (lua_State *L, StkId func, int nresults) {
StkId base;
Proto *p = clLvalue(func)->p;
n = cast_int(L->top - func) - 1; /* number of real arguments */
- luaD_checkstack(L, p->maxstacksize);
+ luaD_checkstack(L, p->maxstacksize + p->numparams);
for (; n < p->numparams; n++)
setnilvalue(L->top++); /* complete missing arguments */
if (!p->is_vararg) {
diff --git a/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S b/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S
index a469cbad780e..34cf2c7dce93 100644
--- a/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S
+++ b/sys/contrib/openzfs/module/lua/setjmp/setjmp_x86_64.S
@@ -35,6 +35,12 @@ x:
.size x, [.-x]
+#if defined(__linux__) && defined(CONFIG_SLS)
+#define RET ret; int3
+#else
+#define RET ret
+#endif
+
/*
* Setjmp and longjmp implement non-local gotos using state vectors
* type label_t.
@@ -52,7 +58,7 @@ x:
movq 0(%rsp), %rdx /* return address */
movq %rdx, 56(%rdi) /* rip */
xorl %eax, %eax /* return 0 */
- ret
+ RET
SET_SIZE(setjmp)
ENTRY(longjmp)
@@ -67,7 +73,7 @@ x:
movq %rdx, 0(%rsp)
xorl %eax, %eax
incl %eax /* return 1 */
- ret
+ RET
SET_SIZE(longjmp)
#ifdef __ELF__
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c
index ddd6d68b361c..ff7f112ffbbd 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c
@@ -131,7 +131,7 @@ abd_scatter_chunkcnt(abd_t *abd)
boolean_t
abd_size_alloc_linear(size_t size)
{
- return (size < zfs_abd_scatter_min_size ? B_TRUE : B_FALSE);
+ return (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size);
}
void
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
index fddb1f0e87cb..590d1c04b9a5 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
@@ -161,6 +161,12 @@ arc_prune_task(void *arg)
int64_t nr_scan = (intptr_t)arg;
arc_reduce_target_size(ptob(nr_scan));
+
+#ifndef __ILP32__
+ if (nr_scan > INT_MAX)
+ nr_scan = INT_MAX;
+#endif
+
#if __FreeBSD_version >= 1300139
sx_xlock(&arc_vnlru_lock);
vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
@@ -223,7 +229,10 @@ arc_lowmem(void *arg __unused, int howto __unused)
arc_warm = B_TRUE;
arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
free_memory = arc_available_memory();
- to_free = (arc_c >> arc_shrink_shift) - MIN(free_memory, 0);
+ int64_t can_free = arc_c - arc_c_min;
+ if (can_free <= 0)
+ return;
+ to_free = (can_free >> arc_shrink_shift) - MIN(free_memory, 0);
DTRACE_PROBE2(arc__needfree, int64_t, free_memory, int64_t, to_free);
arc_reduce_target_size(to_free);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
index 6a67dbc9f616..f342c5e85d74 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
@@ -151,6 +151,13 @@ freebsd_zfs_crypt_done(struct cryptop *crp)
return (0);
}
+static int
+freebsd_zfs_crypt_done_sync(struct cryptop *crp)
+{
+
+ return (0);
+}
+
void
freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
{
@@ -160,26 +167,36 @@ freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
}
static int
-zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp)
+zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp)
{
int error;
crp->crp_opaque = session;
- crp->crp_callback = freebsd_zfs_crypt_done;
for (;;) {
+#if __FreeBSD_version < 1400004
+ boolean_t async = ((crypto_ses2caps(crp->crp_session) &
+ CRYPTOCAP_F_SYNC) == 0);
+#else
+ boolean_t async = !CRYPTO_SESS_SYNC(crp->crp_session);
+#endif
+ crp->crp_callback = async ? freebsd_zfs_crypt_done :
+ freebsd_zfs_crypt_done_sync;
error = crypto_dispatch(crp);
- if (error)
- break;
- mtx_lock(&session->fs_lock);
- while (session->fs_done == false)
- msleep(crp, &session->fs_lock, 0,
- "zfs_crypto", 0);
- mtx_unlock(&session->fs_lock);
+ if (error == 0) {
+ if (async) {
+ mtx_lock(&session->fs_lock);
+ while (session->fs_done == false) {
+ msleep(crp, &session->fs_lock, 0,
+ "zfs_crypto", 0);
+ }
+ mtx_unlock(&session->fs_lock);
+ }
+ error = crp->crp_etype;
+ }
- if (crp->crp_etype == ENOMEM) {
+ if (error == ENOMEM) {
pause("zcrnomem", 1);
- } else if (crp->crp_etype != EAGAIN) {
- error = crp->crp_etype;
+ } else if (error != EAGAIN) {
break;
}
crp->crp_etype = 0;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
index 5447eb922062..c8fa2b00c002 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
@@ -956,8 +956,7 @@ skip_open:
*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
*physical_ashift = 0;
if (pp->stripesize && pp->stripesize > (1 << *logical_ashift) &&
- ISP2(pp->stripesize) && pp->stripesize <= (1 << ASHIFT_MAX) &&
- pp->stripeoffset == 0)
+ ISP2(pp->stripesize) && pp->stripeoffset == 0)
*physical_ashift = highbit(pp->stripesize) - 1;
/*
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c
index 3a5c9f8caf0a..5bd2e1510ddb 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c
@@ -976,12 +976,13 @@ zfsctl_snapdir_lookup(struct vop_lookup_args *ap)
*/
VI_LOCK(*vpp);
if (((*vpp)->v_iflag & VI_MOUNT) == 0) {
+ VI_UNLOCK(*vpp);
/*
* Upgrade to exclusive lock in order to:
* - avoid race conditions
* - satisfy the contract of mount_snapshot()
*/
- err = VOP_LOCK(*vpp, LK_TRYUPGRADE | LK_INTERLOCK);
+ err = VOP_LOCK(*vpp, LK_TRYUPGRADE);
if (err == 0)
break;
} else {
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
index fd86a75416e6..60c9ff0581e0 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
@@ -226,7 +226,11 @@ zfs_vop_fsync(vnode_t *vp)
struct mount *mp;
int error;
+#if __FreeBSD_version < 1400068
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+#else
+ if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
+#endif
goto drop;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
error = VOP_FSYNC(vp, MNT_WAIT, curthread);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index f6bc9c0c6afb..ea6388dd515e 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -981,13 +981,17 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
case RENAME:
if (error == ENOENT) {
error = EJUSTRETURN;
+#if __FreeBSD_version < 1400068
cnp->cn_flags |= SAVENAME;
+#endif
break;
}
fallthrough;
case DELETE:
+#if __FreeBSD_version < 1400068
if (error == 0)
cnp->cn_flags |= SAVENAME;
+#endif
break;
}
}
@@ -1337,7 +1341,10 @@ zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
cnp->cn_nameptr = __DECONST(char *, name);
cnp->cn_namelen = strlen(name);
cnp->cn_nameiop = nameiop;
- cnp->cn_flags = ISLASTCN | SAVENAME;
+ cnp->cn_flags = ISLASTCN;
+#if __FreeBSD_version < 1400068
+ cnp->cn_flags |= SAVENAME;
+#endif
cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
cnp->cn_cred = kcred;
#if __FreeBSD_version < 1400037
@@ -4642,7 +4649,9 @@ zfs_freebsd_create(struct vop_create_args *ap)
znode_t *zp = NULL;
int rc, mode;
+#if __FreeBSD_version < 1400068
ASSERT(cnp->cn_flags & SAVENAME);
+#endif
vattr_init_mask(vap);
mode = vap->va_mode & ALLPERMS;
@@ -4672,7 +4681,9 @@ static int
zfs_freebsd_remove(struct vop_remove_args *ap)
{
+#if __FreeBSD_version < 1400068
ASSERT(ap->a_cnp->cn_flags & SAVENAME);
+#endif
return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
ap->a_cnp->cn_cred));
@@ -4694,7 +4705,9 @@ zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
znode_t *zp = NULL;
int rc;
+#if __FreeBSD_version < 1400068
ASSERT(ap->a_cnp->cn_flags & SAVENAME);
+#endif
vattr_init_mask(vap);
*ap->a_vpp = NULL;
@@ -4720,7 +4733,9 @@ zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
{
struct componentname *cnp = ap->a_cnp;
+#if __FreeBSD_version < 1400068
ASSERT(cnp->cn_flags & SAVENAME);
+#endif
return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
}
@@ -4974,8 +4989,10 @@ zfs_freebsd_rename(struct vop_rename_args *ap)
vnode_t *tvp = ap->a_tvp;
int error;
+#if __FreeBSD_version < 1400068
ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
+#endif
error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
ap->a_tcnp, ap->a_fcnp->cn_cred);
@@ -5011,7 +5028,9 @@ zfs_freebsd_symlink(struct vop_symlink_args *ap)
#endif
int rc;
+#if __FreeBSD_version < 1400068
ASSERT(cnp->cn_flags & SAVENAME);
+#endif
vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */
vattr_init_mask(vap);
@@ -5105,7 +5124,9 @@ zfs_freebsd_link(struct vop_link_args *ap)
if (tdvp->v_mount != vp->v_mount)
return (EXDEV);
+#if __FreeBSD_version < 1400068
ASSERT(cnp->cn_flags & SAVENAME);
+#endif
return (zfs_link(VTOZ(tdvp), VTOZ(vp),
cnp->cn_nameptr, cnp->cn_cred, 0));
@@ -5364,10 +5385,10 @@ zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
#endif
error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
- vp = nd.ni_vp;
- NDFREE_PNBUF(&nd);
if (error != 0)
return (error);
+ vp = nd.ni_vp;
+ NDFREE_PNBUF(&nd);
if (ap->a_size != NULL) {
error = VOP_GETATTR(vp, &va, ap->a_cred);
@@ -5481,12 +5502,10 @@ zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
UIO_SYSSPACE, attrname, xvp);
#endif
error = namei(&nd);
- vp = nd.ni_vp;
- if (error != 0) {
- NDFREE_PNBUF(&nd);
+ if (error != 0)
return (error);
- }
+ vp = nd.ni_vp;
error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
NDFREE_PNBUF(&nd);
@@ -5612,10 +5631,10 @@ zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
#endif
error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
NULL);
- vp = nd.ni_vp;
- NDFREE_PNBUF(&nd);
if (error != 0)
return (error);
+ vp = nd.ni_vp;
+ NDFREE_PNBUF(&nd);
VATTR_NULL(&va);
va.va_size = 0;
@@ -5767,10 +5786,10 @@ zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
UIO_SYSSPACE, ".", xvp);
#endif
error = namei(&nd);
- vp = nd.ni_vp;
- NDFREE_PNBUF(&nd);
if (error != 0)
return (error);
+ vp = nd.ni_vp;
+ NDFREE_PNBUF(&nd);
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
index 8afa222dea34..6067950d5cd7 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
@@ -638,7 +638,7 @@ abd_alloc_zero_scatter(void)
boolean_t
abd_size_alloc_linear(size_t size)
{
- return (size < zfs_abd_scatter_min_size ? B_TRUE : B_FALSE);
+ return (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size);
}
void
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
index 97cd90bf0a75..d19595706ca0 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
@@ -105,6 +105,16 @@ bdev_whole(struct block_device *bdev)
}
#endif
+#if defined(HAVE_BDEVNAME)
+#define vdev_bdevname(bdev, name) bdevname(bdev, name)
+#else
+static inline void
+vdev_bdevname(struct block_device *bdev, char *name)
+{
+ snprintf(name, BDEVNAME_SIZE, "%pg", bdev);
+}
+#endif
+
/*
* Returns the maximum expansion capacity of the block device (in bytes).
*
@@ -204,7 +214,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
if (bdev) {
if (v->vdev_expanding && bdev != bdev_whole(bdev)) {
- bdevname(bdev_whole(bdev), disk_name + 5);
+ vdev_bdevname(bdev_whole(bdev), disk_name + 5);
/*
* If userland has BLKPG_RESIZE_PARTITION,
* then it should have updated the partition
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
index a13fc2aa2546..50e93909659f 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
@@ -1900,6 +1900,9 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
crypto_ctx_template_t tmpl;
uint8_t *authbuf = NULL;
+ memset(&puio, 0, sizeof (puio));
+ memset(&cuio, 0, sizeof (cuio));
+
/*
* If the needed key is the current one, just use it. Otherwise we
* need to generate a temporary one from the given salt + master key.
@@ -1937,7 +1940,7 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
*/
if (qat_crypt_use_accel(datalen) &&
ot != DMU_OT_INTENT_LOG && ot != DMU_OT_DNODE) {
- uint8_t *srcbuf, *dstbuf;
+ uint8_t __attribute__((unused)) *srcbuf, *dstbuf;
if (encrypt) {
srcbuf = plainbuf;
@@ -1960,9 +1963,6 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
/* If the hardware implementation fails fall back to software */
}
- bzero(&puio, sizeof (zfs_uio_t));
- bzero(&cuio, sizeof (zfs_uio_t));
-
/* create uios for encryption */
ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len,
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index bb92c65ca810..cef047bec6f6 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -954,8 +954,12 @@ zvol_free(zvol_state_t *zv)
del_gendisk(zv->zv_zso->zvo_disk);
#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
defined(HAVE_BLK_ALLOC_DISK)
+#if defined(HAVE_BLK_CLEANUP_DISK)
blk_cleanup_disk(zv->zv_zso->zvo_disk);
#else
+ put_disk(zv->zv_zso->zvo_disk);
+#endif
+#else
blk_cleanup_queue(zv->zv_zso->zvo_queue);
put_disk(zv->zv_zso->zvo_disk);
#endif
diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c
index 42bf3e3036f9..8ee8e7e57420 100644
--- a/sys/contrib/openzfs/module/zfs/abd.c
+++ b/sys/contrib/openzfs/module/zfs/abd.c
@@ -181,7 +181,7 @@ abd_free_struct(abd_t *abd)
abd_t *
abd_alloc(size_t size, boolean_t is_metadata)
{
- if (!zfs_abd_scatter_enabled || abd_size_alloc_linear(size))
+ if (abd_size_alloc_linear(size))
return (abd_alloc_linear(size, is_metadata));
VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 9f08f63ea681..17193ed079fe 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -878,6 +878,14 @@ static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)
/*
+ * l2arc_exclude_special : A zfs module parameter that controls whether buffers
+ * present on special vdevs are eligibile for caching in L2ARC. If
+ * set to 1, exclude dbufs on special vdevs from being cached to
+ * L2ARC.
+ */
+int l2arc_exclude_special = 0;
+
+/*
* l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
* metadata and data are cached from ARC into L2ARC.
*/
@@ -5036,10 +5044,11 @@ arc_reap_cb(void *arg, zthr_t *zthr)
*/
free_memory = arc_available_memory();
- int64_t to_free =
- (arc_c >> arc_shrink_shift) - free_memory;
- if (to_free > 0) {
- arc_reduce_target_size(to_free);
+ int64_t can_free = arc_c - arc_c_min;
+ if (can_free > 0) {
+ int64_t to_free = (can_free >> arc_shrink_shift) - free_memory;
+ if (to_free > 0)
+ arc_reduce_target_size(to_free);
}
spl_fstrans_unmark(cookie);
}
@@ -6917,7 +6926,8 @@ arc_write_ready(zio_t *zio)
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT | ARC_HDR_ALLOC_RDATA |
ARC_HDR_USE_RESERVE);
abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
- } else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
+ } else if (!abd_size_alloc_linear(arc_buf_size(buf)) ||
+ !arc_can_share(hdr, buf)) {
/*
* Ideally, we would always copy the io_abd into b_pabd, but the
* user may have disabled compressed ARC, thus we must check the
@@ -8052,6 +8062,18 @@ arc_init(void)
zfs_dirty_data_max = MIN(zfs_dirty_data_max,
zfs_dirty_data_max_max);
}
+
+ if (zfs_wrlog_data_max == 0) {
+
+ /*
+ * dp_wrlog_total is reduced for each txg at the end of
+ * spa_sync(). However, dp_dirty_total is reduced every time
+ * a block is written out. Thus under normal operation,
+ * dp_wrlog_total could grow 2 times as big as
+ * zfs_dirty_data_max.
+ */
+ zfs_wrlog_data_max = zfs_dirty_data_max * 2;
+ }
}
void
@@ -11134,6 +11156,10 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
"Cache only MFU data from ARC into L2ARC");
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
+ "If set to 1 exclude dbufs on special vdevs from being cached to "
+ "L2ARC.");
+
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");
diff --git a/sys/contrib/openzfs/module/zfs/bqueue.c b/sys/contrib/openzfs/module/zfs/bqueue.c
index 22539efc4e23..ec5ce4388ec8 100644
--- a/sys/contrib/openzfs/module/zfs/bqueue.c
+++ b/sys/contrib/openzfs/module/zfs/bqueue.c
@@ -42,8 +42,7 @@ obj2node(bqueue_t *q, void *data)
* Return 0 on success, or -1 on failure.
*/
int
-bqueue_init(bqueue_t *q, uint64_t fill_fraction, uint64_t size,
- size_t node_offset)
+bqueue_init(bqueue_t *q, uint_t fill_fraction, size_t size, size_t node_offset)
{
if (fill_fraction == 0) {
return (-1);
@@ -78,22 +77,26 @@ bqueue_destroy(bqueue_t *q)
}
static void
-bqueue_enqueue_impl(bqueue_t *q, void *data, uint64_t item_size,
- boolean_t flush)
+bqueue_enqueue_impl(bqueue_t *q, void *data, size_t item_size, boolean_t flush)
{
ASSERT3U(item_size, >, 0);
ASSERT3U(item_size, <=, q->bq_maxsize);
mutex_enter(&q->bq_lock);
obj2node(q, data)->bqn_size = item_size;
- while (q->bq_size + item_size > q->bq_maxsize) {
+ while (q->bq_size && q->bq_size + item_size > q->bq_maxsize) {
+ /*
+ * Wake up bqueue_dequeue() thread if already sleeping in order
+ * to prevent the deadlock condition
+ */
+ cv_signal(&q->bq_pop_cv);
cv_wait_sig(&q->bq_add_cv, &q->bq_lock);
}
q->bq_size += item_size;
list_insert_tail(&q->bq_list, data);
- if (q->bq_size >= q->bq_maxsize / q->bq_fill_fraction)
- cv_signal(&q->bq_pop_cv);
if (flush)
cv_broadcast(&q->bq_pop_cv);
+ else if (q->bq_size >= q->bq_maxsize / q->bq_fill_fraction)
+ cv_signal(&q->bq_pop_cv);
mutex_exit(&q->bq_lock);
}
@@ -103,7 +106,7 @@ bqueue_enqueue_impl(bqueue_t *q, void *data, uint64_t item_size,
* > 0.
*/
void
-bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
+bqueue_enqueue(bqueue_t *q, void *data, size_t item_size)
{
bqueue_enqueue_impl(q, data, item_size, B_FALSE);
}
@@ -117,7 +120,7 @@ bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
* destroy the condvar before the enqueuing thread is done.
*/
void
-bqueue_enqueue_flush(bqueue_t *q, void *data, uint64_t item_size)
+bqueue_enqueue_flush(bqueue_t *q, void *data, size_t item_size)
{
bqueue_enqueue_impl(q, data, item_size, B_TRUE);
}
@@ -130,7 +133,7 @@ void *
bqueue_dequeue(bqueue_t *q)
{
void *ret = NULL;
- uint64_t item_size;
+ size_t item_size;
mutex_enter(&q->bq_lock);
while (q->bq_size == 0) {
cv_wait_sig(&q->bq_pop_cv, &q->bq_lock);
diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c
index 57b9dbbb2b50..e16c4ebef6ba 100644
--- a/sys/contrib/openzfs/module/zfs/btree.c
+++ b/sys/contrib/openzfs/module/zfs/btree.c
@@ -53,18 +53,30 @@ kmem_cache_t *zfs_btree_leaf_cache;
* (while the asymptotic complexity of the other steps is the same, the
* importance of the constant factors cannot be denied).
*/
-int zfs_btree_verify_intensity = 0;
+uint_t zfs_btree_verify_intensity = 0;
/*
- * A convenience function to silence warnings from memmove's return value and
- * change argument order to src, dest.
+ * Convenience functions to silence warnings from memcpy/memmove's
+ * return values and change argument order to src, dest.
*/
static void
+bcpy(const void *src, void *dest, size_t size)
+{
+ (void) memcpy(dest, src, size);
+}
+
+static void
bmov(const void *src, void *dest, size_t size)
{
(void) memmove(dest, src, size);
}
+static boolean_t
+zfs_btree_is_core(struct zfs_btree_hdr *hdr)
+{
+ return (hdr->bth_first == -1);
+}
+
#ifdef _ILP32
#define BTREE_POISON 0xabadb10c
#else
@@ -76,59 +88,74 @@ zfs_btree_poison_node(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
{
#ifdef ZFS_DEBUG
size_t size = tree->bt_elem_size;
- if (!hdr->bth_core) {
- zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
- (void) memset(leaf->btl_elems + hdr->bth_count * size, 0x0f,
- BTREE_LEAF_SIZE - sizeof (zfs_btree_hdr_t) -
- hdr->bth_count * size);
- } else {
+ if (zfs_btree_is_core(hdr)) {
zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
- for (int i = hdr->bth_count + 1; i <= BTREE_CORE_ELEMS; i++) {
+ for (uint32_t i = hdr->bth_count + 1; i <= BTREE_CORE_ELEMS;
+ i++) {
node->btc_children[i] =
(zfs_btree_hdr_t *)BTREE_POISON;
}
(void) memset(node->btc_elems + hdr->bth_count * size, 0x0f,
(BTREE_CORE_ELEMS - hdr->bth_count) * size);
+ } else {
+ zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
+ (void) memset(leaf->btl_elems, 0x0f, hdr->bth_first * size);
+ (void) memset(leaf->btl_elems +
+ (hdr->bth_first + hdr->bth_count) * size, 0x0f,
+ BTREE_LEAF_ESIZE -
+ (hdr->bth_first + hdr->bth_count) * size);
}
#endif
}
static inline void
zfs_btree_poison_node_at(zfs_btree_t *tree, zfs_btree_hdr_t *hdr,
- uint64_t offset)
+ uint32_t idx, uint32_t count)
{
#ifdef ZFS_DEBUG
size_t size = tree->bt_elem_size;
- ASSERT3U(offset, >=, hdr->bth_count);
- if (!hdr->bth_core) {
- zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
- (void) memset(leaf->btl_elems + offset * size, 0x0f, size);
- } else {
+ if (zfs_btree_is_core(hdr)) {
+ ASSERT3U(idx, >=, hdr->bth_count);
+ ASSERT3U(idx, <=, BTREE_CORE_ELEMS);
+ ASSERT3U(idx + count, <=, BTREE_CORE_ELEMS);
zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
- node->btc_children[offset + 1] =
- (zfs_btree_hdr_t *)BTREE_POISON;
- (void) memset(node->btc_elems + offset * size, 0x0f, size);
+ for (uint32_t i = 1; i <= count; i++) {
+ node->btc_children[idx + i] =
+ (zfs_btree_hdr_t *)BTREE_POISON;
+ }
+ (void) memset(node->btc_elems + idx * size, 0x0f, count * size);
+ } else {
+ ASSERT3U(idx, <=, tree->bt_leaf_cap);
+ ASSERT3U(idx + count, <=, tree->bt_leaf_cap);
+ zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
+ (void) memset(leaf->btl_elems +
+ (hdr->bth_first + idx) * size, 0x0f, count * size);
}
#endif
}
static inline void
zfs_btree_verify_poison_at(zfs_btree_t *tree, zfs_btree_hdr_t *hdr,
- uint64_t offset)
+ uint32_t idx)
{
#ifdef ZFS_DEBUG
size_t size = tree->bt_elem_size;
- uint8_t eval = 0x0f;
- if (hdr->bth_core) {
+ if (zfs_btree_is_core(hdr)) {
+ ASSERT3U(idx, <, BTREE_CORE_ELEMS);
zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
zfs_btree_hdr_t *cval = (zfs_btree_hdr_t *)BTREE_POISON;
- VERIFY3P(node->btc_children[offset + 1], ==, cval);
- for (int i = 0; i < size; i++)
- VERIFY3U(node->btc_elems[offset * size + i], ==, eval);
+ VERIFY3P(node->btc_children[idx + 1], ==, cval);
+ for (size_t i = 0; i < size; i++)
+ VERIFY3U(node->btc_elems[idx * size + i], ==, 0x0f);
} else {
+ ASSERT3U(idx, <, tree->bt_leaf_cap);
zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
- for (int i = 0; i < size; i++)
- VERIFY3U(leaf->btl_elems[offset * size + i], ==, eval);
+ if (idx >= tree->bt_leaf_cap - hdr->bth_first)
+ return;
+ for (size_t i = 0; i < size; i++) {
+ VERIFY3U(leaf->btl_elems[(hdr->bth_first + idx)
+ * size + i], ==, 0x0f);
+ }
}
#endif
}
@@ -137,8 +164,7 @@ void
zfs_btree_init(void)
{
zfs_btree_leaf_cache = kmem_cache_create("zfs_btree_leaf_cache",
- BTREE_LEAF_SIZE, 0, NULL, NULL, NULL, NULL,
- NULL, 0);
+ BTREE_LEAF_SIZE, 0, NULL, NULL, NULL, NULL, NULL, 0);
}
void
@@ -151,17 +177,12 @@ void
zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
size_t size)
{
- /*
- * We need a minimmum of 4 elements so that when we split a node we
- * always have at least two elements in each node. This simplifies the
- * logic in zfs_btree_bulk_finish, since it means the last leaf will
- * always have a left sibling to share with (unless it's the root).
- */
- ASSERT3U(size, <=, (BTREE_LEAF_SIZE - sizeof (zfs_btree_hdr_t)) / 4);
+ ASSERT3U(size, <=, BTREE_LEAF_ESIZE / 2);
bzero(tree, sizeof (*tree));
tree->bt_compar = compar;
tree->bt_elem_size = size;
+ tree->bt_leaf_cap = P2ALIGN(BTREE_LEAF_ESIZE / size, 2);
tree->bt_height = -1;
tree->bt_bulk = NULL;
}
@@ -170,21 +191,20 @@ zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
* Find value in the array of elements provided. Uses a simple binary search.
*/
static void *
-zfs_btree_find_in_buf(zfs_btree_t *tree, uint8_t *buf, uint64_t nelems,
+zfs_btree_find_in_buf(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems,
const void *value, zfs_btree_index_t *where)
{
- uint64_t max = nelems;
- uint64_t min = 0;
+ uint32_t max = nelems;
+ uint32_t min = 0;
while (max > min) {
- uint64_t idx = (min + max) / 2;
+ uint32_t idx = (min + max) / 2;
uint8_t *cur = buf + idx * tree->bt_elem_size;
int comp = tree->bt_compar(cur, value);
- if (comp == -1) {
+ if (comp < 0) {
min = idx + 1;
- } else if (comp == 1) {
+ } else if (comp > 0) {
max = idx;
} else {
- ASSERT0(comp);
where->bti_offset = idx;
where->bti_before = B_FALSE;
return (cur);
@@ -219,12 +239,13 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
* bulk-insert mode are to insert new elements.
*/
zfs_btree_index_t idx;
+ size_t size = tree->bt_elem_size;
if (tree->bt_bulk != NULL) {
zfs_btree_leaf_t *last_leaf = tree->bt_bulk;
- int compar = tree->bt_compar(last_leaf->btl_elems +
- ((last_leaf->btl_hdr.bth_count - 1) * tree->bt_elem_size),
- value);
- if (compar < 0) {
+ int comp = tree->bt_compar(last_leaf->btl_elems +
+ (last_leaf->btl_hdr.bth_first +
+ last_leaf->btl_hdr.bth_count - 1) * size, value);
+ if (comp < 0) {
/*
* If what they're looking for is after the last
* element, it's not in the tree.
@@ -236,7 +257,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
where->bti_before = B_TRUE;
}
return (NULL);
- } else if (compar == 0) {
+ } else if (comp == 0) {
if (where != NULL) {
where->bti_node = (zfs_btree_hdr_t *)last_leaf;
where->bti_offset =
@@ -244,18 +265,20 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
where->bti_before = B_FALSE;
}
return (last_leaf->btl_elems +
- ((last_leaf->btl_hdr.bth_count - 1) *
- tree->bt_elem_size));
+ (last_leaf->btl_hdr.bth_first +
+ last_leaf->btl_hdr.bth_count - 1) * size);
}
- if (tree->bt_compar(last_leaf->btl_elems, value) <= 0) {
+ if (tree->bt_compar(last_leaf->btl_elems +
+ last_leaf->btl_hdr.bth_first * size, value) <= 0) {
/*
* If what they're looking for is after the first
* element in the last leaf, it's in the last leaf or
* it's not in the tree.
*/
void *d = zfs_btree_find_in_buf(tree,
- last_leaf->btl_elems, last_leaf->btl_hdr.bth_count,
- value, &idx);
+ last_leaf->btl_elems +
+ last_leaf->btl_hdr.bth_first * size,
+ last_leaf->btl_hdr.bth_count, value, &idx);
if (where != NULL) {
idx.bti_node = (zfs_btree_hdr_t *)last_leaf;
@@ -266,7 +289,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
}
zfs_btree_core_t *node = NULL;
- uint64_t child = 0;
+ uint32_t child = 0;
uint64_t depth = 0;
/*
@@ -296,7 +319,8 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
*/
zfs_btree_leaf_t *leaf = (depth == 0 ?
(zfs_btree_leaf_t *)tree->bt_root : (zfs_btree_leaf_t *)node);
- void *d = zfs_btree_find_in_buf(tree, leaf->btl_elems,
+ void *d = zfs_btree_find_in_buf(tree, leaf->btl_elems +
+ leaf->btl_hdr.bth_first * size,
leaf->btl_hdr.bth_count, value, &idx);
if (where != NULL) {
@@ -366,24 +390,23 @@ enum bt_shift_direction {
* shift is determined by shape. The direction is determined by dir.
*/
static inline void
-bt_shift_core(zfs_btree_t *tree, zfs_btree_core_t *node, uint64_t idx,
- uint64_t count, uint64_t off, enum bt_shift_shape shape,
+bt_shift_core(zfs_btree_t *tree, zfs_btree_core_t *node, uint32_t idx,
+ uint32_t count, uint32_t off, enum bt_shift_shape shape,
enum bt_shift_direction dir)
{
size_t size = tree->bt_elem_size;
- ASSERT(node->btc_hdr.bth_core);
+ ASSERT(zfs_btree_is_core(&node->btc_hdr));
uint8_t *e_start = node->btc_elems + idx * size;
- int sign = (dir == BSD_LEFT ? -1 : +1);
- uint8_t *e_out = e_start + sign * off * size;
- uint64_t e_count = count;
- bmov(e_start, e_out, e_count * size);
+ uint8_t *e_out = (dir == BSD_LEFT ? e_start - off * size :
+ e_start + off * size);
+ bmov(e_start, e_out, count * size);
zfs_btree_hdr_t **c_start = node->btc_children + idx +
(shape == BSS_TRAPEZOID ? 0 : 1);
zfs_btree_hdr_t **c_out = (dir == BSD_LEFT ? c_start - off :
c_start + off);
- uint64_t c_count = count + (shape == BSS_TRAPEZOID ? 1 : 0);
+ uint32_t c_count = count + (shape == BSS_TRAPEZOID ? 1 : 0);
bmov(c_start, c_out, c_count * sizeof (*c_start));
}
@@ -394,8 +417,8 @@ bt_shift_core(zfs_btree_t *tree, zfs_btree_core_t *node, uint64_t idx,
* false if it is a parallelogram.
*/
static inline void
-bt_shift_core_left(zfs_btree_t *tree, zfs_btree_core_t *node, uint64_t idx,
- uint64_t count, enum bt_shift_shape shape)
+bt_shift_core_left(zfs_btree_t *tree, zfs_btree_core_t *node, uint32_t idx,
+ uint32_t count, enum bt_shift_shape shape)
{
bt_shift_core(tree, node, idx, count, 1, shape, BSD_LEFT);
}
@@ -405,8 +428,8 @@ bt_shift_core_left(zfs_btree_t *tree, zfs_btree_core_t *node, uint64_t idx,
* Starts with elements[idx] and children[idx] and one more child than element.
*/
static inline void
-bt_shift_core_right(zfs_btree_t *tree, zfs_btree_core_t *node, uint64_t idx,
- uint64_t count, enum bt_shift_shape shape)
+bt_shift_core_right(zfs_btree_t *tree, zfs_btree_core_t *node, uint32_t idx,
+ uint32_t count, enum bt_shift_shape shape)
{
bt_shift_core(tree, node, idx, count, 1, shape, BSD_RIGHT);
}
@@ -417,30 +440,78 @@ bt_shift_core_right(zfs_btree_t *tree, zfs_btree_core_t *node, uint64_t idx,
* is determined by left.
*/
static inline void
-bt_shift_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *node, uint64_t idx,
- uint64_t count, uint64_t off, enum bt_shift_direction dir)
+bt_shift_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *node, uint32_t idx,
+ uint32_t count, uint32_t off, enum bt_shift_direction dir)
{
size_t size = tree->bt_elem_size;
- ASSERT(!node->btl_hdr.bth_core);
+ zfs_btree_hdr_t *hdr = &node->btl_hdr;
+ ASSERT(!zfs_btree_is_core(hdr));
- uint8_t *start = node->btl_elems + idx * size;
- int sign = (dir == BSD_LEFT ? -1 : +1);
- uint8_t *out = start + sign * off * size;
+ if (count == 0)
+ return;
+ uint8_t *start = node->btl_elems + (hdr->bth_first + idx) * size;
+ uint8_t *out = (dir == BSD_LEFT ? start - off * size :
+ start + off * size);
bmov(start, out, count * size);
}
-static inline void
-bt_shift_leaf_right(zfs_btree_t *tree, zfs_btree_leaf_t *leaf, uint64_t idx,
- uint64_t count)
+/*
+ * Grow leaf for n new elements before idx.
+ */
+static void
+bt_grow_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf, uint32_t idx,
+ uint32_t n)
{
- bt_shift_leaf(tree, leaf, idx, count, 1, BSD_RIGHT);
+ zfs_btree_hdr_t *hdr = &leaf->btl_hdr;
+ ASSERT(!zfs_btree_is_core(hdr));
+ ASSERT3U(idx, <=, hdr->bth_count);
+ uint32_t capacity = tree->bt_leaf_cap;
+ ASSERT3U(hdr->bth_count + n, <=, capacity);
+ boolean_t cl = (hdr->bth_first >= n);
+ boolean_t cr = (hdr->bth_first + hdr->bth_count + n <= capacity);
+
+ if (cl && (!cr || idx <= hdr->bth_count / 2)) {
+ /* Grow left. */
+ hdr->bth_first -= n;
+ bt_shift_leaf(tree, leaf, n, idx, n, BSD_LEFT);
+ } else if (cr) {
+ /* Grow right. */
+ bt_shift_leaf(tree, leaf, idx, hdr->bth_count - idx, n,
+ BSD_RIGHT);
+ } else {
+ /* Grow both ways. */
+ uint32_t fn = hdr->bth_first -
+ (capacity - (hdr->bth_count + n)) / 2;
+ hdr->bth_first -= fn;
+ bt_shift_leaf(tree, leaf, fn, idx, fn, BSD_LEFT);
+ bt_shift_leaf(tree, leaf, fn + idx, hdr->bth_count - idx,
+ n - fn, BSD_RIGHT);
+ }
+ hdr->bth_count += n;
}
-static inline void
-bt_shift_leaf_left(zfs_btree_t *tree, zfs_btree_leaf_t *leaf, uint64_t idx,
- uint64_t count)
+/*
+ * Shrink leaf for count elements starting from idx.
+ */
+static void
+bt_shrink_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf, uint32_t idx,
+ uint32_t n)
{
- bt_shift_leaf(tree, leaf, idx, count, 1, BSD_LEFT);
+ zfs_btree_hdr_t *hdr = &leaf->btl_hdr;
+ ASSERT(!zfs_btree_is_core(hdr));
+ ASSERT3U(idx, <=, hdr->bth_count);
+ ASSERT3U(idx + n, <=, hdr->bth_count);
+
+ if (idx <= (hdr->bth_count - n) / 2) {
+ bt_shift_leaf(tree, leaf, 0, idx, n, BSD_RIGHT);
+ zfs_btree_poison_node_at(tree, hdr, 0, n);
+ hdr->bth_first += n;
+ } else {
+ bt_shift_leaf(tree, leaf, idx + n, hdr->bth_count - idx - n, n,
+ BSD_LEFT);
+ zfs_btree_poison_node_at(tree, hdr, hdr->bth_count - n, n);
+ }
+ hdr->bth_count -= n;
}
/*
@@ -448,32 +519,33 @@ bt_shift_leaf_left(zfs_btree_t *tree, zfs_btree_leaf_t *leaf, uint64_t idx,
* parameter behaves the same as it does in the shift logic.
*/
static inline void
-bt_transfer_core(zfs_btree_t *tree, zfs_btree_core_t *source, uint64_t sidx,
- uint64_t count, zfs_btree_core_t *dest, uint64_t didx,
+bt_transfer_core(zfs_btree_t *tree, zfs_btree_core_t *source, uint32_t sidx,
+ uint32_t count, zfs_btree_core_t *dest, uint32_t didx,
enum bt_shift_shape shape)
{
size_t size = tree->bt_elem_size;
- ASSERT(source->btc_hdr.bth_core);
- ASSERT(dest->btc_hdr.bth_core);
+ ASSERT(zfs_btree_is_core(&source->btc_hdr));
+ ASSERT(zfs_btree_is_core(&dest->btc_hdr));
- bmov(source->btc_elems + sidx * size, dest->btc_elems + didx * size,
+ bcpy(source->btc_elems + sidx * size, dest->btc_elems + didx * size,
count * size);
- uint64_t c_count = count + (shape == BSS_TRAPEZOID ? 1 : 0);
- bmov(source->btc_children + sidx + (shape == BSS_TRAPEZOID ? 0 : 1),
+ uint32_t c_count = count + (shape == BSS_TRAPEZOID ? 1 : 0);
+ bcpy(source->btc_children + sidx + (shape == BSS_TRAPEZOID ? 0 : 1),
dest->btc_children + didx + (shape == BSS_TRAPEZOID ? 0 : 1),
c_count * sizeof (*source->btc_children));
}
static inline void
-bt_transfer_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *source, uint64_t sidx,
- uint64_t count, zfs_btree_leaf_t *dest, uint64_t didx)
+bt_transfer_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *source, uint32_t sidx,
+ uint32_t count, zfs_btree_leaf_t *dest, uint32_t didx)
{
size_t size = tree->bt_elem_size;
- ASSERT(!source->btl_hdr.bth_core);
- ASSERT(!dest->btl_hdr.bth_core);
+ ASSERT(!zfs_btree_is_core(&source->btl_hdr));
+ ASSERT(!zfs_btree_is_core(&dest->btl_hdr));
- bmov(source->btl_elems + sidx * size, dest->btl_elems + didx * size,
+ bcpy(source->btl_elems + (source->btl_hdr.bth_first + sidx) * size,
+ dest->btl_elems + (dest->btl_hdr.bth_first + didx) * size,
count * size);
}
@@ -482,30 +554,31 @@ bt_transfer_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *source, uint64_t sidx,
* put its location in where if non-null.
*/
static void *
-zfs_btree_first_helper(zfs_btree_hdr_t *hdr, zfs_btree_index_t *where)
+zfs_btree_first_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr,
+ zfs_btree_index_t *where)
{
zfs_btree_hdr_t *node;
- for (node = hdr; node->bth_core; node =
- ((zfs_btree_core_t *)node)->btc_children[0])
+ for (node = hdr; zfs_btree_is_core(node);
+ node = ((zfs_btree_core_t *)node)->btc_children[0])
;
- ASSERT(!node->bth_core);
+ ASSERT(!zfs_btree_is_core(node));
zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)node;
if (where != NULL) {
where->bti_node = node;
where->bti_offset = 0;
where->bti_before = B_FALSE;
}
- return (&leaf->btl_elems[0]);
+ return (&leaf->btl_elems[node->bth_first * tree->bt_elem_size]);
}
/* Insert an element and a child into a core node at the given offset. */
static void
zfs_btree_insert_core_impl(zfs_btree_t *tree, zfs_btree_core_t *parent,
- uint64_t offset, zfs_btree_hdr_t *new_node, void *buf)
+ uint32_t offset, zfs_btree_hdr_t *new_node, void *buf)
{
- uint64_t size = tree->bt_elem_size;
+ size_t size = tree->bt_elem_size;
zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
ASSERT3P(par_hdr, ==, new_node->bth_parent);
ASSERT3U(par_hdr->bth_count, <, BTREE_CORE_ELEMS);
@@ -515,13 +588,13 @@ zfs_btree_insert_core_impl(zfs_btree_t *tree, zfs_btree_core_t *parent,
par_hdr->bth_count);
}
/* Shift existing elements and children */
- uint64_t count = par_hdr->bth_count - offset;
+ uint32_t count = par_hdr->bth_count - offset;
bt_shift_core_right(tree, parent, offset, count,
BSS_PARALLELOGRAM);
/* Insert new values */
parent->btc_children[offset + 1] = new_node;
- bmov(buf, parent->btc_elems + offset * size, size);
+ bcpy(buf, parent->btc_elems + offset * size, size);
par_hdr->bth_count++;
}
@@ -534,7 +607,7 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
zfs_btree_hdr_t *new_node, void *buf)
{
ASSERT3P(old_node->bth_parent, ==, new_node->bth_parent);
- uint64_t size = tree->bt_elem_size;
+ size_t size = tree->bt_elem_size;
zfs_btree_core_t *parent = old_node->bth_parent;
zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
@@ -550,13 +623,13 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
size, KM_SLEEP);
zfs_btree_hdr_t *new_root_hdr = &new_root->btc_hdr;
new_root_hdr->bth_parent = NULL;
- new_root_hdr->bth_core = B_TRUE;
+ new_root_hdr->bth_first = -1;
new_root_hdr->bth_count = 1;
old_node->bth_parent = new_node->bth_parent = new_root;
new_root->btc_children[0] = old_node;
new_root->btc_children[1] = new_node;
- bmov(buf, new_root->btc_elems, size);
+ bcpy(buf, new_root->btc_elems, size);
tree->bt_height++;
tree->bt_root = new_root_hdr;
@@ -569,11 +642,11 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
* new_node.
*/
zfs_btree_index_t idx;
- ASSERT(par_hdr->bth_core);
+ ASSERT(zfs_btree_is_core(par_hdr));
VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
par_hdr->bth_count, buf, &idx), ==, NULL);
ASSERT(idx.bti_before);
- uint64_t offset = idx.bti_offset;
+ uint32_t offset = idx.bti_offset;
ASSERT3U(offset, <=, par_hdr->bth_count);
ASSERT3P(parent->btc_children[offset], ==, old_node);
@@ -604,16 +677,16 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
* We do this in two stages: first we split into two nodes, and then we
* reuse our existing logic to insert the new element and child.
*/
- uint64_t move_count = MAX((BTREE_CORE_ELEMS / (tree->bt_bulk == NULL ?
+ uint32_t move_count = MAX((BTREE_CORE_ELEMS / (tree->bt_bulk == NULL ?
2 : 4)) - 1, 2);
- uint64_t keep_count = BTREE_CORE_ELEMS - move_count - 1;
+ uint32_t keep_count = BTREE_CORE_ELEMS - move_count - 1;
ASSERT3U(BTREE_CORE_ELEMS - move_count, >=, 2);
tree->bt_num_nodes++;
zfs_btree_core_t *new_parent = kmem_alloc(sizeof (zfs_btree_core_t) +
BTREE_CORE_ELEMS * size, KM_SLEEP);
zfs_btree_hdr_t *new_par_hdr = &new_parent->btc_hdr;
new_par_hdr->bth_parent = par_hdr->bth_parent;
- new_par_hdr->bth_core = B_TRUE;
+ new_par_hdr->bth_first = -1;
new_par_hdr->bth_count = move_count;
zfs_btree_poison_node(tree, new_par_hdr);
@@ -624,7 +697,7 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
/* Store the new separator in a buffer. */
uint8_t *tmp_buf = kmem_alloc(size, KM_SLEEP);
- bmov(parent->btc_elems + keep_count * size, tmp_buf,
+ bcpy(parent->btc_elems + keep_count * size, tmp_buf,
size);
zfs_btree_poison_node(tree, par_hdr);
@@ -636,7 +709,7 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
/*
* Move the new separator to the existing buffer.
*/
- bmov(tmp_buf, buf, size);
+ bcpy(tmp_buf, buf, size);
} else if (offset > keep_count) {
/* Insert the new node into the right half */
new_node->bth_parent = new_parent;
@@ -646,7 +719,7 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
/*
* Move the new separator to the existing buffer.
*/
- bmov(tmp_buf, buf, size);
+ bcpy(tmp_buf, buf, size);
} else {
/*
* Move the new separator into the right half, and replace it
@@ -656,16 +729,16 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
bt_shift_core_right(tree, new_parent, 0, move_count,
BSS_TRAPEZOID);
new_parent->btc_children[0] = new_node;
- bmov(tmp_buf, new_parent->btc_elems, size);
+ bcpy(tmp_buf, new_parent->btc_elems, size);
new_par_hdr->bth_count++;
}
kmem_free(tmp_buf, size);
zfs_btree_poison_node(tree, par_hdr);
- for (int i = 0; i <= new_parent->btc_hdr.bth_count; i++)
+ for (uint32_t i = 0; i <= new_parent->btc_hdr.bth_count; i++)
new_parent->btc_children[i]->bth_parent = new_parent;
- for (int i = 0; i <= parent->btc_hdr.bth_count; i++)
+ for (uint32_t i = 0; i <= parent->btc_hdr.bth_count; i++)
ASSERT3P(parent->btc_children[i]->bth_parent, ==, parent);
/*
@@ -679,34 +752,32 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
/* Insert an element into a leaf node at the given offset. */
static void
zfs_btree_insert_leaf_impl(zfs_btree_t *tree, zfs_btree_leaf_t *leaf,
- uint64_t idx, const void *value)
+ uint32_t idx, const void *value)
{
- uint64_t size = tree->bt_elem_size;
- uint8_t *start = leaf->btl_elems + (idx * size);
+ size_t size = tree->bt_elem_size;
zfs_btree_hdr_t *hdr = &leaf->btl_hdr;
- uint64_t capacity __maybe_unused = P2ALIGN((BTREE_LEAF_SIZE -
- sizeof (zfs_btree_hdr_t)) / size, 2);
- uint64_t count = leaf->btl_hdr.bth_count - idx;
- ASSERT3U(leaf->btl_hdr.bth_count, <, capacity);
+ ASSERT3U(leaf->btl_hdr.bth_count, <, tree->bt_leaf_cap);
if (zfs_btree_verify_intensity >= 5) {
zfs_btree_verify_poison_at(tree, &leaf->btl_hdr,
leaf->btl_hdr.bth_count);
}
- bt_shift_leaf_right(tree, leaf, idx, count);
- bmov(value, start, size);
- hdr->bth_count++;
+ bt_grow_leaf(tree, leaf, idx, 1);
+ uint8_t *start = leaf->btl_elems + (hdr->bth_first + idx) * size;
+ bcpy(value, start, size);
}
+static void
+zfs_btree_verify_order_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr);
+
/* Helper function for inserting a new value into leaf at the given index. */
static void
zfs_btree_insert_into_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf,
- const void *value, uint64_t idx)
+ const void *value, uint32_t idx)
{
- uint64_t size = tree->bt_elem_size;
- uint64_t capacity = P2ALIGN((BTREE_LEAF_SIZE -
- sizeof (zfs_btree_hdr_t)) / size, 2);
+ size_t size = tree->bt_elem_size;
+ uint32_t capacity = tree->bt_leaf_cap;
/*
* If the leaf isn't full, shift the elements after idx and insert
@@ -731,32 +802,36 @@ zfs_btree_insert_into_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf,
* In either case, we're left with one extra element. The leftover
* element will become the new dividing element between the two nodes.
*/
- uint64_t move_count = MAX(capacity / (tree->bt_bulk == NULL ? 2 : 4) -
- 1, 2);
- uint64_t keep_count = capacity - move_count - 1;
- ASSERT3U(capacity - move_count, >=, 2);
+ uint32_t move_count = MAX(capacity / (tree->bt_bulk ? 4 : 2), 1) - 1;
+ uint32_t keep_count = capacity - move_count - 1;
+ ASSERT3U(keep_count, >=, 1);
+ /* If we insert on left. move one more to keep leaves balanced. */
+ if (idx < keep_count) {
+ keep_count--;
+ move_count++;
+ }
tree->bt_num_nodes++;
zfs_btree_leaf_t *new_leaf = kmem_cache_alloc(zfs_btree_leaf_cache,
KM_SLEEP);
zfs_btree_hdr_t *new_hdr = &new_leaf->btl_hdr;
new_hdr->bth_parent = leaf->btl_hdr.bth_parent;
- new_hdr->bth_core = B_FALSE;
+ new_hdr->bth_first = (tree->bt_bulk ? 0 : capacity / 4) +
+ (idx >= keep_count && idx <= keep_count + move_count / 2);
new_hdr->bth_count = move_count;
zfs_btree_poison_node(tree, new_hdr);
- leaf->btl_hdr.bth_count = keep_count;
-
if (tree->bt_bulk != NULL && leaf == tree->bt_bulk)
tree->bt_bulk = new_leaf;
/* Copy the back part to the new leaf. */
- bt_transfer_leaf(tree, leaf, keep_count + 1, move_count, new_leaf,
- 0);
+ bt_transfer_leaf(tree, leaf, keep_count + 1, move_count, new_leaf, 0);
/* We store the new separator in a buffer we control for simplicity. */
uint8_t *buf = kmem_alloc(size, KM_SLEEP);
- bmov(leaf->btl_elems + (keep_count * size), buf, size);
- zfs_btree_poison_node(tree, &leaf->btl_hdr);
+ bcpy(leaf->btl_elems + (leaf->btl_hdr.bth_first + keep_count) * size,
+ buf, size);
+
+ bt_shrink_leaf(tree, leaf, keep_count, 1 + move_count);
if (idx < keep_count) {
/* Insert into the existing leaf. */
@@ -767,13 +842,11 @@ zfs_btree_insert_into_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf,
1, value);
} else {
/*
- * Shift the elements in the new leaf to make room for the
- * separator, and use the new value as the new separator.
+ * Insert planned separator into the new leaf, and use
+ * the new value as the new separator.
*/
- bt_shift_leaf_right(tree, new_leaf, 0, move_count);
- bmov(buf, new_leaf->btl_elems, size);
- bmov(value, buf, size);
- new_hdr->bth_count++;
+ zfs_btree_insert_leaf_impl(tree, new_leaf, 0, buf);
+ bcpy(value, buf, size);
}
/*
@@ -785,14 +858,15 @@ zfs_btree_insert_into_leaf(zfs_btree_t *tree, zfs_btree_leaf_t *leaf,
kmem_free(buf, size);
}
-static uint64_t
+static uint32_t
zfs_btree_find_parent_idx(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
{
void *buf;
- if (hdr->bth_core) {
+ if (zfs_btree_is_core(hdr)) {
buf = ((zfs_btree_core_t *)hdr)->btc_elems;
} else {
- buf = ((zfs_btree_leaf_t *)hdr)->btl_elems;
+ buf = ((zfs_btree_leaf_t *)hdr)->btl_elems +
+ hdr->bth_first * tree->bt_elem_size;
}
zfs_btree_index_t idx;
zfs_btree_core_t *parent = hdr->bth_parent;
@@ -821,9 +895,8 @@ zfs_btree_bulk_finish(zfs_btree_t *tree)
zfs_btree_leaf_t *leaf = tree->bt_bulk;
zfs_btree_hdr_t *hdr = &leaf->btl_hdr;
zfs_btree_core_t *parent = hdr->bth_parent;
- uint64_t size = tree->bt_elem_size;
- uint64_t capacity = P2ALIGN((BTREE_LEAF_SIZE -
- sizeof (zfs_btree_hdr_t)) / size, 2);
+ size_t size = tree->bt_elem_size;
+ uint32_t capacity = tree->bt_leaf_cap;
/*
* The invariant doesn't apply to the root node, if that's the only
@@ -848,56 +921,54 @@ zfs_btree_bulk_finish(zfs_btree_t *tree)
.bti_offset = 0
};
VERIFY3P(zfs_btree_prev(tree, &idx, &idx), !=, NULL);
- ASSERT(idx.bti_node->bth_core);
+ ASSERT(zfs_btree_is_core(idx.bti_node));
zfs_btree_core_t *common = (zfs_btree_core_t *)idx.bti_node;
- uint64_t common_idx = idx.bti_offset;
+ uint32_t common_idx = idx.bti_offset;
VERIFY3P(zfs_btree_prev(tree, &idx, &idx), !=, NULL);
- ASSERT(!idx.bti_node->bth_core);
+ ASSERT(!zfs_btree_is_core(idx.bti_node));
zfs_btree_leaf_t *l_neighbor = (zfs_btree_leaf_t *)idx.bti_node;
zfs_btree_hdr_t *l_hdr = idx.bti_node;
- uint64_t move_count = (capacity / 2) - hdr->bth_count;
+ uint32_t move_count = (capacity / 2) - hdr->bth_count;
ASSERT3U(l_neighbor->btl_hdr.bth_count - move_count, >=,
capacity / 2);
if (zfs_btree_verify_intensity >= 5) {
- for (int i = 0; i < move_count; i++) {
+ for (uint32_t i = 0; i < move_count; i++) {
zfs_btree_verify_poison_at(tree, hdr,
leaf->btl_hdr.bth_count + i);
}
}
/* First, shift elements in leaf back. */
- bt_shift_leaf(tree, leaf, 0, hdr->bth_count, move_count,
- BSD_RIGHT);
+ bt_grow_leaf(tree, leaf, 0, move_count);
/* Next, move the separator from the common ancestor to leaf. */
- uint8_t *separator = common->btc_elems + (common_idx * size);
- uint8_t *out = leaf->btl_elems + ((move_count - 1) * size);
- bmov(separator, out, size);
- move_count--;
+ uint8_t *separator = common->btc_elems + common_idx * size;
+ uint8_t *out = leaf->btl_elems +
+ (hdr->bth_first + move_count - 1) * size;
+ bcpy(separator, out, size);
/*
* Now we move elements from the tail of the left neighbor to
* fill the remaining spots in leaf.
*/
bt_transfer_leaf(tree, l_neighbor, l_hdr->bth_count -
- move_count, move_count, leaf, 0);
+ (move_count - 1), move_count - 1, leaf, 0);
/*
* Finally, move the new last element in the left neighbor to
* the separator.
*/
- bmov(l_neighbor->btl_elems + (l_hdr->bth_count -
- move_count - 1) * size, separator, size);
+ bcpy(l_neighbor->btl_elems + (l_hdr->bth_first +
+ l_hdr->bth_count - move_count) * size, separator, size);
/* Adjust the node's counts, and we're done. */
- l_hdr->bth_count -= move_count + 1;
- hdr->bth_count += move_count + 1;
+ bt_shrink_leaf(tree, l_neighbor, l_hdr->bth_count - move_count,
+ move_count);
ASSERT3U(l_hdr->bth_count, >=, capacity / 2);
ASSERT3U(hdr->bth_count, >=, capacity / 2);
- zfs_btree_poison_node(tree, l_hdr);
}
/*
@@ -921,16 +992,16 @@ zfs_btree_bulk_finish(zfs_btree_t *tree)
* splitting is 2, we never need to worry about not having a
* left sibling (a sibling is a neighbor with the same parent).
*/
- uint64_t parent_idx = zfs_btree_find_parent_idx(tree, hdr);
+ uint32_t parent_idx = zfs_btree_find_parent_idx(tree, hdr);
ASSERT3U(parent_idx, >, 0);
zfs_btree_core_t *l_neighbor =
(zfs_btree_core_t *)parent->btc_children[parent_idx - 1];
- uint64_t move_count = (capacity / 2) - hdr->bth_count;
+ uint32_t move_count = (capacity / 2) - hdr->bth_count;
ASSERT3U(l_neighbor->btc_hdr.bth_count - move_count, >=,
capacity / 2);
if (zfs_btree_verify_intensity >= 5) {
- for (int i = 0; i < move_count; i++) {
+ for (uint32_t i = 0; i < move_count; i++) {
zfs_btree_verify_poison_at(tree, hdr,
hdr->bth_count + i);
}
@@ -943,14 +1014,14 @@ zfs_btree_bulk_finish(zfs_btree_t *tree)
uint8_t *separator = parent->btc_elems + ((parent_idx - 1) *
size);
uint8_t *e_out = cur->btc_elems + ((move_count - 1) * size);
- bmov(separator, e_out, size);
+ bcpy(separator, e_out, size);
/*
* Now, move elements and children from the left node to the
* right. We move one more child than elements.
*/
move_count--;
- uint64_t move_idx = l_neighbor->btc_hdr.bth_count - move_count;
+ uint32_t move_idx = l_neighbor->btc_hdr.bth_count - move_count;
bt_transfer_core(tree, l_neighbor, move_idx, move_count, cur, 0,
BSS_TRAPEZOID);
@@ -959,7 +1030,7 @@ zfs_btree_bulk_finish(zfs_btree_t *tree)
* separator's position.
*/
move_idx--;
- bmov(l_neighbor->btc_elems + move_idx * size, separator, size);
+ bcpy(l_neighbor->btc_elems + move_idx * size, separator, size);
l_neighbor->btc_hdr.bth_count -= move_count + 1;
hdr->bth_count += move_count + 1;
@@ -969,11 +1040,12 @@ zfs_btree_bulk_finish(zfs_btree_t *tree)
zfs_btree_poison_node(tree, &l_neighbor->btc_hdr);
- for (int i = 0; i <= hdr->bth_count; i++)
+ for (uint32_t i = 0; i <= hdr->bth_count; i++)
cur->btc_children[i]->bth_parent = cur;
}
tree->bt_bulk = NULL;
+ zfs_btree_verify(tree);
}
/*
@@ -1013,13 +1085,13 @@ zfs_btree_add_idx(zfs_btree_t *tree, const void *value,
zfs_btree_hdr_t *hdr = &leaf->btl_hdr;
hdr->bth_parent = NULL;
- hdr->bth_core = B_FALSE;
+ hdr->bth_first = 0;
hdr->bth_count = 0;
zfs_btree_poison_node(tree, hdr);
zfs_btree_insert_into_leaf(tree, leaf, value, 0);
tree->bt_bulk = leaf;
- } else if (!where->bti_node->bth_core) {
+ } else if (!zfs_btree_is_core(where->bti_node)) {
/*
* If we're inserting into a leaf, go directly to the helper
* function.
@@ -1035,28 +1107,28 @@ zfs_btree_add_idx(zfs_btree_t *tree, const void *value,
* value in the node at that spot and then insert the old
* separator into the first slot in the subtree to the right.
*/
- ASSERT(where->bti_node->bth_core);
zfs_btree_core_t *node = (zfs_btree_core_t *)where->bti_node;
/*
* We can ignore bti_before, because either way the value
* should end up in bti_offset.
*/
- uint64_t off = where->bti_offset;
+ uint32_t off = where->bti_offset;
zfs_btree_hdr_t *subtree = node->btc_children[off + 1];
size_t size = tree->bt_elem_size;
uint8_t *buf = kmem_alloc(size, KM_SLEEP);
- bmov(node->btc_elems + off * size, buf, size);
- bmov(value, node->btc_elems + off * size, size);
+ bcpy(node->btc_elems + off * size, buf, size);
+ bcpy(value, node->btc_elems + off * size, size);
/*
* Find the first slot in the subtree to the right, insert
* there.
*/
zfs_btree_index_t new_idx;
- VERIFY3P(zfs_btree_first_helper(subtree, &new_idx), !=, NULL);
+ VERIFY3P(zfs_btree_first_helper(tree, subtree, &new_idx), !=,
+ NULL);
ASSERT0(new_idx.bti_offset);
- ASSERT(!new_idx.bti_node->bth_core);
+ ASSERT(!zfs_btree_is_core(new_idx.bti_node));
zfs_btree_insert_into_leaf(tree,
(zfs_btree_leaf_t *)new_idx.bti_node, buf, 0);
kmem_free(buf, size);
@@ -1075,7 +1147,7 @@ zfs_btree_first(zfs_btree_t *tree, zfs_btree_index_t *where)
ASSERT0(tree->bt_num_elems);
return (NULL);
}
- return (zfs_btree_first_helper(tree->bt_root, where));
+ return (zfs_btree_first_helper(tree, tree->bt_root, where));
}
/*
@@ -1088,7 +1160,7 @@ zfs_btree_last_helper(zfs_btree_t *btree, zfs_btree_hdr_t *hdr,
{
zfs_btree_hdr_t *node;
- for (node = hdr; node->bth_core; node =
+ for (node = hdr; zfs_btree_is_core(node); node =
((zfs_btree_core_t *)node)->btc_children[node->bth_count])
;
@@ -1098,7 +1170,8 @@ zfs_btree_last_helper(zfs_btree_t *btree, zfs_btree_hdr_t *hdr,
where->bti_offset = node->bth_count - 1;
where->bti_before = B_FALSE;
}
- return (leaf->btl_elems + (node->bth_count - 1) * btree->bt_elem_size);
+ return (leaf->btl_elems + (node->bth_first + node->bth_count - 1) *
+ btree->bt_elem_size);
}
/*
@@ -1131,8 +1204,8 @@ zfs_btree_next_helper(zfs_btree_t *tree, const zfs_btree_index_t *idx,
return (NULL);
}
- uint64_t offset = idx->bti_offset;
- if (!idx->bti_node->bth_core) {
+ uint32_t offset = idx->bti_offset;
+ if (!zfs_btree_is_core(idx->bti_node)) {
/*
* When finding the next element of an element in a leaf,
* there are two cases. If the element isn't the last one in
@@ -1143,20 +1216,21 @@ zfs_btree_next_helper(zfs_btree_t *tree, const zfs_btree_index_t *idx,
* separator after our ancestor in its parent.
*/
zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)idx->bti_node;
- uint64_t new_off = offset + (idx->bti_before ? 0 : 1);
+ uint32_t new_off = offset + (idx->bti_before ? 0 : 1);
if (leaf->btl_hdr.bth_count > new_off) {
out_idx->bti_node = &leaf->btl_hdr;
out_idx->bti_offset = new_off;
out_idx->bti_before = B_FALSE;
- return (leaf->btl_elems + new_off * tree->bt_elem_size);
+ return (leaf->btl_elems + (leaf->btl_hdr.bth_first +
+ new_off) * tree->bt_elem_size);
}
zfs_btree_hdr_t *prev = &leaf->btl_hdr;
for (zfs_btree_core_t *node = leaf->btl_hdr.bth_parent;
node != NULL; node = node->btc_hdr.bth_parent) {
zfs_btree_hdr_t *hdr = &node->btc_hdr;
- ASSERT(hdr->bth_core);
- uint64_t i = zfs_btree_find_parent_idx(tree, prev);
+ ASSERT(zfs_btree_is_core(hdr));
+ uint32_t i = zfs_btree_find_parent_idx(tree, prev);
if (done_func != NULL)
done_func(tree, prev);
if (i == hdr->bth_count) {
@@ -1178,7 +1252,7 @@ zfs_btree_next_helper(zfs_btree_t *tree, const zfs_btree_index_t *idx,
}
/* If we were before an element in a core node, return that element. */
- ASSERT(idx->bti_node->bth_core);
+ ASSERT(zfs_btree_is_core(idx->bti_node));
zfs_btree_core_t *node = (zfs_btree_core_t *)idx->bti_node;
if (idx->bti_before) {
out_idx->bti_before = B_FALSE;
@@ -1190,7 +1264,7 @@ zfs_btree_next_helper(zfs_btree_t *tree, const zfs_btree_index_t *idx,
* the subtree just to the right of the separator.
*/
zfs_btree_hdr_t *child = node->btc_children[offset + 1];
- return (zfs_btree_first_helper(child, out_idx));
+ return (zfs_btree_first_helper(tree, child, out_idx));
}
/*
@@ -1217,8 +1291,8 @@ zfs_btree_prev(zfs_btree_t *tree, const zfs_btree_index_t *idx,
return (NULL);
}
- uint64_t offset = idx->bti_offset;
- if (!idx->bti_node->bth_core) {
+ uint32_t offset = idx->bti_offset;
+ if (!zfs_btree_is_core(idx->bti_node)) {
/*
* When finding the previous element of an element in a leaf,
* there are two cases. If the element isn't the first one in
@@ -1233,15 +1307,15 @@ zfs_btree_prev(zfs_btree_t *tree, const zfs_btree_index_t *idx,
out_idx->bti_node = &leaf->btl_hdr;
out_idx->bti_offset = offset - 1;
out_idx->bti_before = B_FALSE;
- return (leaf->btl_elems + (offset - 1) *
- tree->bt_elem_size);
+ return (leaf->btl_elems + (leaf->btl_hdr.bth_first +
+ offset - 1) * tree->bt_elem_size);
}
zfs_btree_hdr_t *prev = &leaf->btl_hdr;
for (zfs_btree_core_t *node = leaf->btl_hdr.bth_parent;
node != NULL; node = node->btc_hdr.bth_parent) {
zfs_btree_hdr_t *hdr = &node->btc_hdr;
- ASSERT(hdr->bth_core);
- uint64_t i = zfs_btree_find_parent_idx(tree, prev);
+ ASSERT(zfs_btree_is_core(hdr));
+ uint32_t i = zfs_btree_find_parent_idx(tree, prev);
if (i == 0) {
prev = hdr;
continue;
@@ -1262,7 +1336,7 @@ zfs_btree_prev(zfs_btree_t *tree, const zfs_btree_index_t *idx,
* The previous element from one in a core node is the last element in
* the subtree just to the left of the separator.
*/
- ASSERT(idx->bti_node->bth_core);
+ ASSERT(zfs_btree_is_core(idx->bti_node));
zfs_btree_core_t *node = (zfs_btree_core_t *)idx->bti_node;
zfs_btree_hdr_t *child = node->btc_children[offset];
return (zfs_btree_last_helper(tree, child, out_idx));
@@ -1279,13 +1353,14 @@ void *
zfs_btree_get(zfs_btree_t *tree, zfs_btree_index_t *idx)
{
ASSERT(!idx->bti_before);
- if (!idx->bti_node->bth_core) {
+ size_t size = tree->bt_elem_size;
+ if (!zfs_btree_is_core(idx->bti_node)) {
zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)idx->bti_node;
- return (leaf->btl_elems + idx->bti_offset * tree->bt_elem_size);
+ return (leaf->btl_elems + (leaf->btl_hdr.bth_first +
+ idx->bti_offset) * size);
}
- ASSERT(idx->bti_node->bth_core);
zfs_btree_core_t *node = (zfs_btree_core_t *)idx->bti_node;
- return (node->btc_elems + idx->bti_offset * tree->bt_elem_size);
+ return (node->btc_elems + idx->bti_offset * size);
}
/* Add the given value to the tree. Must not already be in the tree. */
@@ -1302,7 +1377,7 @@ static void
zfs_btree_node_destroy(zfs_btree_t *tree, zfs_btree_hdr_t *node)
{
tree->bt_num_nodes--;
- if (!node->bth_core) {
+ if (!zfs_btree_is_core(node)) {
kmem_cache_free(zfs_btree_leaf_cache, node);
} else {
kmem_free(node, sizeof (zfs_btree_core_t) +
@@ -1320,7 +1395,7 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
zfs_btree_hdr_t *rm_hdr)
{
size_t size = tree->bt_elem_size;
- uint64_t min_count = (BTREE_CORE_ELEMS / 2) - 1;
+ uint32_t min_count = (BTREE_CORE_ELEMS / 2) - 1;
zfs_btree_hdr_t *hdr = &node->btc_hdr;
/*
* If the node is the root node and rm_hdr is one of two children,
@@ -1337,7 +1412,7 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
return;
}
- uint64_t idx;
+ uint32_t idx;
for (idx = 0; idx <= hdr->bth_count; idx++) {
if (node->btc_children[idx] == rm_hdr)
break;
@@ -1357,7 +1432,7 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
bt_shift_core_left(tree, node, idx, hdr->bth_count - idx,
BSS_PARALLELOGRAM);
hdr->bth_count--;
- zfs_btree_poison_node_at(tree, hdr, hdr->bth_count);
+ zfs_btree_poison_node_at(tree, hdr, hdr->bth_count, 1);
return;
}
@@ -1378,13 +1453,13 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
* implementing in the future for completeness' sake.
*/
zfs_btree_core_t *parent = hdr->bth_parent;
- uint64_t parent_idx = zfs_btree_find_parent_idx(tree, hdr);
+ uint32_t parent_idx = zfs_btree_find_parent_idx(tree, hdr);
zfs_btree_hdr_t *l_hdr = (parent_idx == 0 ? NULL :
parent->btc_children[parent_idx - 1]);
if (l_hdr != NULL && l_hdr->bth_count > min_count) {
/* We can take a node from the left neighbor. */
- ASSERT(l_hdr->bth_core);
+ ASSERT(zfs_btree_is_core(l_hdr));
zfs_btree_core_t *neighbor = (zfs_btree_core_t *)l_hdr;
/*
@@ -1399,20 +1474,19 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
*/
uint8_t *separator = parent->btc_elems + (parent_idx - 1) *
size;
- bmov(separator, node->btc_elems, size);
+ bcpy(separator, node->btc_elems, size);
/* Move the last child of neighbor to our first child slot. */
- zfs_btree_hdr_t **take_child = neighbor->btc_children +
- l_hdr->bth_count;
- bmov(take_child, node->btc_children, sizeof (*take_child));
+ node->btc_children[0] =
+ neighbor->btc_children[l_hdr->bth_count];
node->btc_children[0]->bth_parent = node;
/* Move the last element of neighbor to the separator spot. */
uint8_t *take_elem = neighbor->btc_elems +
(l_hdr->bth_count - 1) * size;
- bmov(take_elem, separator, size);
+ bcpy(take_elem, separator, size);
l_hdr->bth_count--;
- zfs_btree_poison_node_at(tree, l_hdr, l_hdr->bth_count);
+ zfs_btree_poison_node_at(tree, l_hdr, l_hdr->bth_count, 1);
return;
}
@@ -1420,7 +1494,7 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
NULL : parent->btc_children[parent_idx + 1]);
if (r_hdr != NULL && r_hdr->bth_count > min_count) {
/* We can take a node from the right neighbor. */
- ASSERT(r_hdr->bth_core);
+ ASSERT(zfs_btree_is_core(r_hdr));
zfs_btree_core_t *neighbor = (zfs_btree_core_t *)r_hdr;
/*
@@ -1435,21 +1509,19 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
* element spot in node.
*/
uint8_t *separator = parent->btc_elems + parent_idx * size;
- bmov(separator, node->btc_elems + (hdr->bth_count - 1) * size,
+ bcpy(separator, node->btc_elems + (hdr->bth_count - 1) * size,
size);
/*
* Move the first child of neighbor to the last child spot in
* node.
*/
- zfs_btree_hdr_t **take_child = neighbor->btc_children;
- bmov(take_child, node->btc_children + hdr->bth_count,
- sizeof (*take_child));
+ node->btc_children[hdr->bth_count] = neighbor->btc_children[0];
node->btc_children[hdr->bth_count]->bth_parent = node;
/* Move the first element of neighbor to the separator spot. */
uint8_t *take_elem = neighbor->btc_elems;
- bmov(take_elem, separator, size);
+ bcpy(take_elem, separator, size);
r_hdr->bth_count--;
/*
@@ -1458,7 +1530,7 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
*/
bt_shift_core_left(tree, neighbor, 1, r_hdr->bth_count,
BSS_TRAPEZOID);
- zfs_btree_poison_node_at(tree, r_hdr, r_hdr->bth_count);
+ zfs_btree_poison_node_at(tree, r_hdr, r_hdr->bth_count, 1);
return;
}
@@ -1473,7 +1545,7 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
* merging.
*/
zfs_btree_hdr_t *new_rm_hdr, *keep_hdr;
- uint64_t new_idx = idx;
+ uint32_t new_idx = idx;
if (l_hdr != NULL) {
keep_hdr = l_hdr;
new_rm_hdr = hdr;
@@ -1485,14 +1557,14 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
parent_idx++;
}
- ASSERT(keep_hdr->bth_core);
- ASSERT(new_rm_hdr->bth_core);
+ ASSERT(zfs_btree_is_core(keep_hdr));
+ ASSERT(zfs_btree_is_core(new_rm_hdr));
zfs_btree_core_t *keep = (zfs_btree_core_t *)keep_hdr;
zfs_btree_core_t *rm = (zfs_btree_core_t *)new_rm_hdr;
if (zfs_btree_verify_intensity >= 5) {
- for (int i = 0; i < new_rm_hdr->bth_count + 1; i++) {
+ for (uint32_t i = 0; i < new_rm_hdr->bth_count + 1; i++) {
zfs_btree_verify_poison_at(tree, keep_hdr,
keep_hdr->bth_count + i);
}
@@ -1502,14 +1574,14 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
uint8_t *e_out = keep->btc_elems + keep_hdr->bth_count * size;
uint8_t *separator = parent->btc_elems + (parent_idx - 1) *
size;
- bmov(separator, e_out, size);
+ bcpy(separator, e_out, size);
keep_hdr->bth_count++;
/* Move all our elements and children into the left node. */
bt_transfer_core(tree, rm, 0, new_rm_hdr->bth_count, keep,
keep_hdr->bth_count, BSS_TRAPEZOID);
- uint64_t old_count = keep_hdr->bth_count;
+ uint32_t old_count = keep_hdr->bth_count;
/* Update bookkeeping */
keep_hdr->bth_count += new_rm_hdr->bth_count;
@@ -1527,17 +1599,17 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
/* Reparent all our children to point to the left node. */
zfs_btree_hdr_t **new_start = keep->btc_children +
old_count - 1;
- for (int i = 0; i < new_rm_hdr->bth_count + 1; i++)
+ for (uint32_t i = 0; i < new_rm_hdr->bth_count + 1; i++)
new_start[i]->bth_parent = keep;
- for (int i = 0; i <= keep_hdr->bth_count; i++) {
+ for (uint32_t i = 0; i <= keep_hdr->bth_count; i++) {
ASSERT3P(keep->btc_children[i]->bth_parent, ==, keep);
ASSERT3P(keep->btc_children[i], !=, rm_hdr);
}
- zfs_btree_poison_node_at(tree, keep_hdr, keep_hdr->bth_count);
+ zfs_btree_poison_node_at(tree, keep_hdr, keep_hdr->bth_count, 1);
new_rm_hdr->bth_count = 0;
- zfs_btree_node_destroy(tree, new_rm_hdr);
zfs_btree_remove_from_node(tree, parent, new_rm_hdr);
+ zfs_btree_node_destroy(tree, new_rm_hdr);
}
/* Remove the element at the specific location. */
@@ -1546,9 +1618,7 @@ zfs_btree_remove_idx(zfs_btree_t *tree, zfs_btree_index_t *where)
{
size_t size = tree->bt_elem_size;
zfs_btree_hdr_t *hdr = where->bti_node;
- uint64_t idx = where->bti_offset;
- uint64_t capacity = P2ALIGN((BTREE_LEAF_SIZE -
- sizeof (zfs_btree_hdr_t)) / size, 2);
+ uint32_t idx = where->bti_offset;
ASSERT(!where->bti_before);
if (tree->bt_bulk != NULL) {
@@ -1560,7 +1630,7 @@ zfs_btree_remove_idx(zfs_btree_t *tree, zfs_btree_index_t *where)
*/
uint8_t *value = zfs_btree_get(tree, where);
uint8_t *tmp = kmem_alloc(size, KM_SLEEP);
- bmov(value, tmp, size);
+ bcpy(value, tmp, size);
zfs_btree_bulk_finish(tree);
VERIFY3P(zfs_btree_find(tree, tmp, where), !=, NULL);
kmem_free(tmp, size);
@@ -1575,14 +1645,14 @@ zfs_btree_remove_idx(zfs_btree_t *tree, zfs_btree_index_t *where)
* makes the rebalance logic not need to be recursive both upwards and
* downwards.
*/
- if (hdr->bth_core) {
+ if (zfs_btree_is_core(hdr)) {
zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
zfs_btree_hdr_t *left_subtree = node->btc_children[idx];
void *new_value = zfs_btree_last_helper(tree, left_subtree,
where);
ASSERT3P(new_value, !=, NULL);
- bmov(new_value, node->btc_elems + idx * size, size);
+ bcpy(new_value, node->btc_elems + idx * size, size);
hdr = where->bti_node;
idx = where->bti_offset;
@@ -1594,19 +1664,18 @@ zfs_btree_remove_idx(zfs_btree_t *tree, zfs_btree_index_t *where)
* elements after the idx to the left. After that, we rebalance if
* needed.
*/
- ASSERT(!hdr->bth_core);
+ ASSERT(!zfs_btree_is_core(hdr));
zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
ASSERT3U(hdr->bth_count, >, 0);
- uint64_t min_count = (capacity / 2) - 1;
+ uint32_t min_count = (tree->bt_leaf_cap / 2) - 1;
/*
* If we're over the minimum size or this is the root, just overwrite
* the value and return.
*/
if (hdr->bth_count > min_count || hdr->bth_parent == NULL) {
- hdr->bth_count--;
- bt_shift_leaf_left(tree, leaf, idx + 1, hdr->bth_count - idx);
+ bt_shrink_leaf(tree, leaf, idx, 1);
if (hdr->bth_parent == NULL) {
ASSERT0(tree->bt_height);
if (hdr->bth_count == 0) {
@@ -1615,8 +1684,6 @@ zfs_btree_remove_idx(zfs_btree_t *tree, zfs_btree_index_t *where)
zfs_btree_node_destroy(tree, &leaf->btl_hdr);
}
}
- if (tree->bt_root != NULL)
- zfs_btree_poison_node_at(tree, hdr, hdr->bth_count);
zfs_btree_verify(tree);
return;
}
@@ -1636,33 +1703,33 @@ zfs_btree_remove_idx(zfs_btree_t *tree, zfs_btree_index_t *where)
* worth implementing in the future for completeness' sake.
*/
zfs_btree_core_t *parent = hdr->bth_parent;
- uint64_t parent_idx = zfs_btree_find_parent_idx(tree, hdr);
+ uint32_t parent_idx = zfs_btree_find_parent_idx(tree, hdr);
zfs_btree_hdr_t *l_hdr = (parent_idx == 0 ? NULL :
parent->btc_children[parent_idx - 1]);
if (l_hdr != NULL && l_hdr->bth_count > min_count) {
/* We can take a node from the left neighbor. */
- ASSERT(!l_hdr->bth_core);
+ ASSERT(!zfs_btree_is_core(l_hdr));
+ zfs_btree_leaf_t *neighbor = (zfs_btree_leaf_t *)l_hdr;
/*
* Move our elements back by one spot to make room for the
* stolen element and overwrite the element being removed.
*/
- bt_shift_leaf_right(tree, leaf, 0, idx);
+ bt_shift_leaf(tree, leaf, 0, idx, 1, BSD_RIGHT);
+
+ /* Move the separator to our first spot. */
uint8_t *separator = parent->btc_elems + (parent_idx - 1) *
size;
- uint8_t *take_elem = ((zfs_btree_leaf_t *)l_hdr)->btl_elems +
- (l_hdr->bth_count - 1) * size;
- /* Move the separator to our first spot. */
- bmov(separator, leaf->btl_elems, size);
+ bcpy(separator, leaf->btl_elems + hdr->bth_first * size, size);
/* Move our neighbor's last element to the separator. */
- bmov(take_elem, separator, size);
-
- /* Update the bookkeeping. */
- l_hdr->bth_count--;
- zfs_btree_poison_node_at(tree, l_hdr, l_hdr->bth_count);
+ uint8_t *take_elem = neighbor->btl_elems +
+ (l_hdr->bth_first + l_hdr->bth_count - 1) * size;
+ bcpy(take_elem, separator, size);
+ /* Delete our neighbor's last element. */
+ bt_shrink_leaf(tree, neighbor, l_hdr->bth_count - 1, 1);
zfs_btree_verify(tree);
return;
}
@@ -1671,7 +1738,7 @@ zfs_btree_remove_idx(zfs_btree_t *tree, zfs_btree_index_t *where)
NULL : parent->btc_children[parent_idx + 1]);
if (r_hdr != NULL && r_hdr->bth_count > min_count) {
/* We can take a node from the right neighbor. */
- ASSERT(!r_hdr->bth_core);
+ ASSERT(!zfs_btree_is_core(r_hdr));
zfs_btree_leaf_t *neighbor = (zfs_btree_leaf_t *)r_hdr;
/*
@@ -1679,96 +1746,81 @@ zfs_btree_remove_idx(zfs_btree_t *tree, zfs_btree_index_t *where)
* by one spot to make room for the stolen element and
* overwrite the element being removed.
*/
- bt_shift_leaf_left(tree, leaf, idx + 1, hdr->bth_count - idx -
- 1);
+ bt_shift_leaf(tree, leaf, idx + 1, hdr->bth_count - idx - 1,
+ 1, BSD_LEFT);
- uint8_t *separator = parent->btc_elems + parent_idx * size;
- uint8_t *take_elem = ((zfs_btree_leaf_t *)r_hdr)->btl_elems;
/* Move the separator between us to our last spot. */
- bmov(separator, leaf->btl_elems + (hdr->bth_count - 1) * size,
- size);
+ uint8_t *separator = parent->btc_elems + parent_idx * size;
+ bcpy(separator, leaf->btl_elems + (hdr->bth_first +
+ hdr->bth_count - 1) * size, size);
/* Move our neighbor's first element to the separator. */
- bmov(take_elem, separator, size);
-
- /* Update the bookkeeping. */
- r_hdr->bth_count--;
+ uint8_t *take_elem = neighbor->btl_elems +
+ r_hdr->bth_first * size;
+ bcpy(take_elem, separator, size);
- /*
- * Move our neighbors elements forwards to overwrite the
- * stolen element.
- */
- bt_shift_leaf_left(tree, neighbor, 1, r_hdr->bth_count);
- zfs_btree_poison_node_at(tree, r_hdr, r_hdr->bth_count);
+ /* Delete our neighbor's first element. */
+ bt_shrink_leaf(tree, neighbor, 0, 1);
zfs_btree_verify(tree);
return;
}
/*
* In this case, neither of our neighbors can spare an element, so we
- * need to merge with one of them. We prefer the left one,
- * arbitrarily. Move the separator into the leftmost merging node
+ * need to merge with one of them. We prefer the left one, arbitrarily.
+ * After remove we move the separator into the leftmost merging node
* (which may be us or the left neighbor), and then move the right
* merging node's elements. Once that's done, we go back and delete
* the element we're removing. Finally, go into the parent and delete
* the right merging node and the separator. This may cause further
* merging.
*/
- zfs_btree_hdr_t *rm_hdr, *keep_hdr;
- uint64_t new_idx = idx;
+ zfs_btree_hdr_t *rm_hdr, *k_hdr;
if (l_hdr != NULL) {
- keep_hdr = l_hdr;
+ k_hdr = l_hdr;
rm_hdr = hdr;
- new_idx += keep_hdr->bth_count + 1; // 449
} else {
ASSERT3P(r_hdr, !=, NULL);
- keep_hdr = hdr;
+ k_hdr = hdr;
rm_hdr = r_hdr;
parent_idx++;
}
-
- ASSERT(!keep_hdr->bth_core);
- ASSERT(!rm_hdr->bth_core);
- ASSERT3U(keep_hdr->bth_count, ==, min_count);
+ ASSERT(!zfs_btree_is_core(k_hdr));
+ ASSERT(!zfs_btree_is_core(rm_hdr));
+ ASSERT3U(k_hdr->bth_count, ==, min_count);
ASSERT3U(rm_hdr->bth_count, ==, min_count);
-
- zfs_btree_leaf_t *keep = (zfs_btree_leaf_t *)keep_hdr;
+ zfs_btree_leaf_t *keep = (zfs_btree_leaf_t *)k_hdr;
zfs_btree_leaf_t *rm = (zfs_btree_leaf_t *)rm_hdr;
if (zfs_btree_verify_intensity >= 5) {
- for (int i = 0; i < rm_hdr->bth_count + 1; i++) {
- zfs_btree_verify_poison_at(tree, keep_hdr,
- keep_hdr->bth_count + i);
+ for (uint32_t i = 0; i < rm_hdr->bth_count + 1; i++) {
+ zfs_btree_verify_poison_at(tree, k_hdr,
+ k_hdr->bth_count + i);
}
}
+
/*
- * Move the separator into the first open spot in the left
- * neighbor.
+ * Remove the value from the node. It will go below the minimum,
+ * but we'll fix it in no time.
*/
- uint8_t *out = keep->btl_elems + keep_hdr->bth_count * size;
- uint8_t *separator = parent->btc_elems + (parent_idx - 1) *
- size;
- bmov(separator, out, size);
- keep_hdr->bth_count++;
+ bt_shrink_leaf(tree, leaf, idx, 1);
- /* Move our elements to the left neighbor. */
- bt_transfer_leaf(tree, rm, 0, rm_hdr->bth_count, keep,
- keep_hdr->bth_count);
+ /* Prepare space for elements to be moved from the right. */
+ uint32_t k_count = k_hdr->bth_count;
+ bt_grow_leaf(tree, keep, k_count, 1 + rm_hdr->bth_count);
+ ASSERT3U(k_hdr->bth_count, ==, min_count * 2);
- /* Update the bookkeeping. */
- keep_hdr->bth_count += rm_hdr->bth_count;
- ASSERT3U(keep_hdr->bth_count, ==, min_count * 2 + 1);
+ /* Move the separator into the first open spot. */
+ uint8_t *out = keep->btl_elems + (k_hdr->bth_first + k_count) * size;
+ uint8_t *separator = parent->btc_elems + (parent_idx - 1) * size;
+ bcpy(separator, out, size);
- /* Remove the value from the node */
- keep_hdr->bth_count--;
- bt_shift_leaf_left(tree, keep, new_idx + 1, keep_hdr->bth_count -
- new_idx);
- zfs_btree_poison_node_at(tree, keep_hdr, keep_hdr->bth_count);
+ /* Move our elements to the left neighbor. */
+ bt_transfer_leaf(tree, rm, 0, rm_hdr->bth_count, keep, k_count + 1);
- rm_hdr->bth_count = 0;
- zfs_btree_node_destroy(tree, rm_hdr);
/* Remove the emptied node from the parent. */
zfs_btree_remove_from_node(tree, parent, rm_hdr);
+ zfs_btree_node_destroy(tree, rm_hdr);
zfs_btree_verify(tree);
}
@@ -1831,11 +1883,10 @@ zfs_btree_destroy_nodes(zfs_btree_t *tree, zfs_btree_index_t **cookie)
static void
zfs_btree_clear_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
{
- if (hdr->bth_core) {
+ if (zfs_btree_is_core(hdr)) {
zfs_btree_core_t *btc = (zfs_btree_core_t *)hdr;
- for (int i = 0; i <= hdr->bth_count; i++) {
+ for (uint32_t i = 0; i <= hdr->bth_count; i++)
zfs_btree_clear_helper(tree, btc->btc_children[i]);
- }
}
zfs_btree_node_destroy(tree, hdr);
@@ -1868,11 +1919,11 @@ zfs_btree_destroy(zfs_btree_t *tree)
static void
zfs_btree_verify_pointers_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
{
- if (!hdr->bth_core)
+ if (!zfs_btree_is_core(hdr))
return;
zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
- for (int i = 0; i <= hdr->bth_count; i++) {
+ for (uint32_t i = 0; i <= hdr->bth_count; i++) {
VERIFY3P(node->btc_children[i]->bth_parent, ==, hdr);
zfs_btree_verify_pointers_helper(tree, node->btc_children[i]);
}
@@ -1897,11 +1948,10 @@ zfs_btree_verify_pointers(zfs_btree_t *tree)
static uint64_t
zfs_btree_verify_counts_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
{
- if (!hdr->bth_core) {
- if (tree->bt_root != hdr && hdr != &tree->bt_bulk->btl_hdr) {
- uint64_t capacity = P2ALIGN((BTREE_LEAF_SIZE -
- sizeof (zfs_btree_hdr_t)) / tree->bt_elem_size, 2);
- VERIFY3U(hdr->bth_count, >=, (capacity / 2) - 1);
+ if (!zfs_btree_is_core(hdr)) {
+ if (tree->bt_root != hdr && tree->bt_bulk &&
+ hdr != &tree->bt_bulk->btl_hdr) {
+ VERIFY3U(hdr->bth_count, >=, tree->bt_leaf_cap / 2 - 1);
}
return (hdr->bth_count);
@@ -1911,7 +1961,7 @@ zfs_btree_verify_counts_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
uint64_t ret = hdr->bth_count;
if (tree->bt_root != hdr && tree->bt_bulk == NULL)
VERIFY3P(hdr->bth_count, >=, BTREE_CORE_ELEMS / 2 - 1);
- for (int i = 0; i <= hdr->bth_count; i++) {
+ for (uint32_t i = 0; i <= hdr->bth_count; i++) {
ret += zfs_btree_verify_counts_helper(tree,
node->btc_children[i]);
}
@@ -1943,15 +1993,14 @@ static uint64_t
zfs_btree_verify_height_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr,
int64_t height)
{
- if (!hdr->bth_core) {
+ if (!zfs_btree_is_core(hdr)) {
VERIFY0(height);
return (1);
}
- VERIFY(hdr->bth_core);
zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
uint64_t ret = 1;
- for (int i = 0; i <= hdr->bth_count; i++) {
+ for (uint32_t i = 0; i <= hdr->bth_count; i++) {
ret += zfs_btree_verify_height_helper(tree,
node->btc_children[i], height - 1);
}
@@ -1983,24 +2032,26 @@ static void
zfs_btree_verify_order_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
{
size_t size = tree->bt_elem_size;
- if (!hdr->bth_core) {
+ if (!zfs_btree_is_core(hdr)) {
zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
- for (int i = 1; i < hdr->bth_count; i++) {
- VERIFY3S(tree->bt_compar(leaf->btl_elems + (i - 1) *
- size, leaf->btl_elems + i * size), ==, -1);
+ for (uint32_t i = 1; i < hdr->bth_count; i++) {
+ VERIFY3S(tree->bt_compar(leaf->btl_elems +
+ (hdr->bth_first + i - 1) * size,
+ leaf->btl_elems +
+ (hdr->bth_first + i) * size), ==, -1);
}
return;
}
zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
- for (int i = 1; i < hdr->bth_count; i++) {
+ for (uint32_t i = 1; i < hdr->bth_count; i++) {
VERIFY3S(tree->bt_compar(node->btc_elems + (i - 1) * size,
node->btc_elems + i * size), ==, -1);
}
- for (int i = 0; i < hdr->bth_count; i++) {
+ for (uint32_t i = 0; i < hdr->bth_count; i++) {
uint8_t *left_child_last = NULL;
zfs_btree_hdr_t *left_child_hdr = node->btc_children[i];
- if (left_child_hdr->bth_core) {
+ if (zfs_btree_is_core(left_child_hdr)) {
zfs_btree_core_t *left_child =
(zfs_btree_core_t *)left_child_hdr;
left_child_last = left_child->btc_elems +
@@ -2009,40 +2060,39 @@ zfs_btree_verify_order_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
zfs_btree_leaf_t *left_child =
(zfs_btree_leaf_t *)left_child_hdr;
left_child_last = left_child->btl_elems +
- (left_child_hdr->bth_count - 1) * size;
+ (left_child_hdr->bth_first +
+ left_child_hdr->bth_count - 1) * size;
}
- if (tree->bt_compar(node->btc_elems + i * size,
- left_child_last) != 1) {
+ int comp = tree->bt_compar(node->btc_elems + i * size,
+ left_child_last);
+ if (comp <= 0) {
panic("btree: compar returned %d (expected 1) at "
- "%px %d: compar(%px, %px)", tree->bt_compar(
- node->btc_elems + i * size, left_child_last),
- (void *)node, i, (void *)(node->btc_elems + i *
- size), (void *)left_child_last);
+ "%px %d: compar(%px, %px)", comp, node, i,
+ node->btc_elems + i * size, left_child_last);
}
uint8_t *right_child_first = NULL;
zfs_btree_hdr_t *right_child_hdr = node->btc_children[i + 1];
- if (right_child_hdr->bth_core) {
+ if (zfs_btree_is_core(right_child_hdr)) {
zfs_btree_core_t *right_child =
(zfs_btree_core_t *)right_child_hdr;
right_child_first = right_child->btc_elems;
} else {
zfs_btree_leaf_t *right_child =
(zfs_btree_leaf_t *)right_child_hdr;
- right_child_first = right_child->btl_elems;
+ right_child_first = right_child->btl_elems +
+ right_child_hdr->bth_first * size;
}
- if (tree->bt_compar(node->btc_elems + i * size,
- right_child_first) != -1) {
+ comp = tree->bt_compar(node->btc_elems + i * size,
+ right_child_first);
+ if (comp >= 0) {
panic("btree: compar returned %d (expected -1) at "
- "%px %d: compar(%px, %px)", tree->bt_compar(
- node->btc_elems + i * size, right_child_first),
- (void *)node, i, (void *)(node->btc_elems + i *
- size), (void *)right_child_first);
+ "%px %d: compar(%px, %px)", comp, node, i,
+ node->btc_elems + i * size, right_child_first);
}
}
- for (int i = 0; i <= hdr->bth_count; i++) {
+ for (uint32_t i = 0; i <= hdr->bth_count; i++)
zfs_btree_verify_order_helper(tree, node->btc_children[i]);
- }
}
/* Check that all elements in the tree are in sorted order. */
@@ -2063,27 +2113,26 @@ static void
zfs_btree_verify_poison_helper(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
{
size_t size = tree->bt_elem_size;
- if (!hdr->bth_core) {
+ if (!zfs_btree_is_core(hdr)) {
zfs_btree_leaf_t *leaf = (zfs_btree_leaf_t *)hdr;
- uint8_t val = 0x0f;
- for (int i = hdr->bth_count * size; i < BTREE_LEAF_SIZE -
- sizeof (zfs_btree_hdr_t); i++) {
- VERIFY3U(leaf->btl_elems[i], ==, val);
- }
+ for (size_t i = 0; i < hdr->bth_first * size; i++)
+ VERIFY3U(leaf->btl_elems[i], ==, 0x0f);
+ for (size_t i = (hdr->bth_first + hdr->bth_count) * size;
+ i < BTREE_LEAF_ESIZE; i++)
+ VERIFY3U(leaf->btl_elems[i], ==, 0x0f);
} else {
zfs_btree_core_t *node = (zfs_btree_core_t *)hdr;
- uint8_t val = 0x0f;
- for (int i = hdr->bth_count * size; i < BTREE_CORE_ELEMS * size;
- i++) {
- VERIFY3U(node->btc_elems[i], ==, val);
- }
+ for (size_t i = hdr->bth_count * size;
+ i < BTREE_CORE_ELEMS * size; i++)
+ VERIFY3U(node->btc_elems[i], ==, 0x0f);
- for (int i = hdr->bth_count + 1; i <= BTREE_CORE_ELEMS; i++) {
+ for (uint32_t i = hdr->bth_count + 1; i <= BTREE_CORE_ELEMS;
+ i++) {
VERIFY3P(node->btc_children[i], ==,
(zfs_btree_hdr_t *)BTREE_POISON);
}
- for (int i = 0; i <= hdr->bth_count; i++) {
+ for (uint32_t i = 0; i <= hdr->bth_count; i++) {
zfs_btree_verify_poison_helper(tree,
node->btc_children[i]);
}
@@ -2122,3 +2171,9 @@ zfs_btree_verify(zfs_btree_t *tree)
return;
zfs_btree_verify_poison(tree);
}
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, btree_verify_intensity, UINT, ZMOD_RW,
+ "Enable btree verification. Levels above 4 require ZFS be built "
+ "with debugging");
+/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index 247eeddb6cd6..7ecc2812b4e4 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -53,6 +53,7 @@
#include <cityhash.h>
#include <sys/spa_impl.h>
#include <sys/wmsum.h>
+#include <sys/vdev_impl.h>
kstat_t *dbuf_ksp;
@@ -338,18 +339,18 @@ dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid)
hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;
- rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_READER);
+ mutex_enter(DBUF_HASH_MUTEX(h, idx));
for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) {
if (DBUF_EQUAL(db, os, obj, level, blkid)) {
mutex_enter(&db->db_mtx);
if (db->db_state != DB_EVICTING) {
- rw_exit(DBUF_HASH_RWLOCK(h, idx));
+ mutex_exit(DBUF_HASH_MUTEX(h, idx));
return (db);
}
mutex_exit(&db->db_mtx);
}
}
- rw_exit(DBUF_HASH_RWLOCK(h, idx));
+ mutex_exit(DBUF_HASH_MUTEX(h, idx));
return (NULL);
}
@@ -392,13 +393,13 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;
- rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER);
+ mutex_enter(DBUF_HASH_MUTEX(h, idx));
for (dbf = h->hash_table[idx], i = 0; dbf != NULL;
dbf = dbf->db_hash_next, i++) {
if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
mutex_enter(&dbf->db_mtx);
if (dbf->db_state != DB_EVICTING) {
- rw_exit(DBUF_HASH_RWLOCK(h, idx));
+ mutex_exit(DBUF_HASH_MUTEX(h, idx));
return (dbf);
}
mutex_exit(&dbf->db_mtx);
@@ -416,7 +417,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
mutex_enter(&db->db_mtx);
db->db_hash_next = h->hash_table[idx];
h->hash_table[idx] = db;
- rw_exit(DBUF_HASH_RWLOCK(h, idx));
+ mutex_exit(DBUF_HASH_MUTEX(h, idx));
uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64);
DBUF_STAT_MAX(hash_elements_max, he);
@@ -473,13 +474,13 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
/*
* We mustn't hold db_mtx to maintain lock ordering:
- * DBUF_HASH_RWLOCK > db_mtx.
+ * DBUF_HASH_MUTEX > db_mtx.
*/
ASSERT(zfs_refcount_is_zero(&db->db_holds));
ASSERT(db->db_state == DB_EVICTING);
ASSERT(!MUTEX_HELD(&db->db_mtx));
- rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER);
+ mutex_enter(DBUF_HASH_MUTEX(h, idx));
dbp = &h->hash_table[idx];
while ((dbf = *dbp) != db) {
dbp = &dbf->db_hash_next;
@@ -490,7 +491,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
if (h->hash_table[idx] &&
h->hash_table[idx]->db_hash_next == NULL)
DBUF_STAT_BUMPDOWN(hash_chains);
- rw_exit(DBUF_HASH_RWLOCK(h, idx));
+ mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_dec_64(&dbuf_stats.hash_elements.value.ui64);
}
@@ -594,6 +595,68 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
}
}
+/*
+ * We want to exclude buffers that are on a special allocation class from
+ * L2ARC.
+ */
+boolean_t
+dbuf_is_l2cacheable(dmu_buf_impl_t *db)
+{
+ vdev_t *vd = NULL;
+ zfs_cache_type_t cache = db->db_objset->os_secondary_cache;
+ blkptr_t *bp = db->db_blkptr;
+
+ if (bp != NULL && !BP_IS_HOLE(bp)) {
+ uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+ vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev;
+
+ if (vdev < rvd->vdev_children)
+ vd = rvd->vdev_child[vdev];
+
+ if (cache == ZFS_CACHE_ALL ||
+ (dbuf_is_metadata(db) && cache == ZFS_CACHE_METADATA)) {
+ if (vd == NULL)
+ return (B_TRUE);
+
+ if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+ vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
+ l2arc_exclude_special == 0)
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
+static inline boolean_t
+dnode_level_is_l2cacheable(blkptr_t *bp, dnode_t *dn, int64_t level)
+{
+ vdev_t *vd = NULL;
+ zfs_cache_type_t cache = dn->dn_objset->os_secondary_cache;
+
+ if (bp != NULL && !BP_IS_HOLE(bp)) {
+ uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+ vdev_t *rvd = dn->dn_objset->os_spa->spa_root_vdev;
+
+ if (vdev < rvd->vdev_children)
+ vd = rvd->vdev_child[vdev];
+
+ if (cache == ZFS_CACHE_ALL || ((level > 0 ||
+ DMU_OT_IS_METADATA(dn->dn_handle->dnh_dnode->dn_type)) &&
+ cache == ZFS_CACHE_METADATA)) {
+ if (vd == NULL)
+ return (B_TRUE);
+
+ if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+ vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
+ l2arc_exclude_special == 0)
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
/*
* This function *must* return indices evenly distributed between all
@@ -851,8 +914,8 @@ retry:
sizeof (dmu_buf_impl_t),
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
- for (i = 0; i < DBUF_RWLOCKS; i++)
- rw_init(&h->hash_rwlocks[i], NULL, RW_DEFAULT, NULL);
+ for (i = 0; i < DBUF_MUTEXES; i++)
+ mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
dbuf_stats_init(h);
@@ -918,8 +981,8 @@ dbuf_fini(void)
dbuf_stats_destroy();
- for (i = 0; i < DBUF_RWLOCKS; i++)
- rw_destroy(&h->hash_rwlocks[i]);
+ for (i = 0; i < DBUF_MUTEXES; i++)
+ mutex_destroy(&h->hash_mutexes[i]);
#if defined(_KERNEL)
/*
* Large allocations which do not require contiguous pages
@@ -1523,7 +1586,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
DTRACE_SET_STATE(db, "read issued");
mutex_exit(&db->db_mtx);
- if (DBUF_IS_L2CACHEABLE(db))
+ if (dbuf_is_l2cacheable(db))
aflags |= ARC_FLAG_L2CACHE;
dbuf_add_ref(db, NULL);
@@ -2878,9 +2941,6 @@ dbuf_destroy(dmu_buf_impl_t *db)
ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
ASSERT(!multilist_link_active(&db->db_cache_link));
- kmem_cache_free(dbuf_kmem_cache, db);
- arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
-
/*
* If this dbuf is referenced from an indirect dbuf,
* decrement the ref count on the indirect dbuf.
@@ -2889,6 +2949,9 @@ dbuf_destroy(dmu_buf_impl_t *db)
mutex_enter(&parent->db_mtx);
dbuf_rele_and_unlock(parent, db, B_TRUE);
}
+
+ kmem_cache_free(dbuf_kmem_cache, db);
+ arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
}
/*
@@ -3122,8 +3185,10 @@ typedef struct dbuf_prefetch_arg {
static void
dbuf_prefetch_fini(dbuf_prefetch_arg_t *dpa, boolean_t io_done)
{
- if (dpa->dpa_cb != NULL)
- dpa->dpa_cb(dpa->dpa_arg, io_done);
+ if (dpa->dpa_cb != NULL) {
+ dpa->dpa_cb(dpa->dpa_arg, dpa->dpa_zb.zb_level,
+ dpa->dpa_zb.zb_blkid, io_done);
+ }
kmem_free(dpa, sizeof (*dpa));
}
@@ -3134,9 +3199,10 @@ dbuf_issue_final_prefetch_done(zio_t *zio, const zbookmark_phys_t *zb,
(void) zio, (void) zb, (void) iobp;
dbuf_prefetch_arg_t *dpa = private;
- dbuf_prefetch_fini(dpa, B_TRUE);
if (abuf != NULL)
arc_buf_destroy(abuf, private);
+
+ dbuf_prefetch_fini(dpa, B_TRUE);
}
/*
@@ -3257,7 +3323,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
dpa->dpa_zb.zb_object, dpa->dpa_curlevel, nextblkid);
(void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
- bp, dbuf_prefetch_indirect_done, dpa, dpa->dpa_prio,
+ bp, dbuf_prefetch_indirect_done, dpa,
+ ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
&iter_aflags, &zb);
}
@@ -3368,7 +3435,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
dpa->dpa_arg = arg;
/* flag if L2ARC eligible, l2arc_noprefetch then decides */
- if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
+ if (dnode_level_is_l2cacheable(&bp, dn, level))
dpa->dpa_aflags |= ARC_FLAG_L2CACHE;
/*
@@ -3386,13 +3453,14 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
zbookmark_phys_t zb;
/* flag if L2ARC eligible, l2arc_noprefetch then decides */
- if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
+ if (dnode_level_is_l2cacheable(&bp, dn, level))
iter_aflags |= ARC_FLAG_L2CACHE;
SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
dn->dn_object, curlevel, curblkid);
(void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
- &bp, dbuf_prefetch_indirect_done, dpa, prio,
+ &bp, dbuf_prefetch_indirect_done, dpa,
+ ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
&iter_aflags, &zb);
}
@@ -3404,7 +3472,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
return (1);
no_issue:
if (cb != NULL)
- cb(arg, B_FALSE);
+ cb(arg, level, blkid, B_FALSE);
return (0);
}
@@ -4984,7 +5052,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
children_ready_cb = dbuf_write_children_ready;
dr->dr_zio = arc_write(pio, os->os_spa, txg,
- &dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
+ &dr->dr_bp_copy, data, dbuf_is_l2cacheable(db),
&zp, dbuf_write_ready,
children_ready_cb, dbuf_write_physdone,
dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
diff --git a/sys/contrib/openzfs/module/zfs/dbuf_stats.c b/sys/contrib/openzfs/module/zfs/dbuf_stats.c
index 037190a81bb3..12bb568a08cc 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf_stats.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf_stats.c
@@ -137,7 +137,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
if (size)
buf[0] = 0;
- rw_enter(DBUF_HASH_RWLOCK(h, dsh->idx), RW_READER);
+ mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
/*
* Returning ENOMEM will cause the data and header functions
@@ -158,7 +158,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
mutex_exit(&db->db_mtx);
}
- rw_exit(DBUF_HASH_RWLOCK(h, dsh->idx));
+ mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));
return (error);
}
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index 4e7127bd1bab..e38c9b452a28 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -1846,7 +1846,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
dsa->dsa_tx = NULL;
zio_nowait(arc_write(pio, os->os_spa, txg,
- zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
+ zgd->zgd_bp, dr->dt.dl.dr_data, dbuf_is_l2cacheable(db),
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
index b9380890230c..a8975797e8af 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@@ -63,6 +63,8 @@
#include <sys/dmu_recv.h>
#include <sys/zfs_project.h>
#include "zfs_namecheck.h"
+#include <sys/vdev_impl.h>
+#include <sys/arc.h>
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
@@ -411,6 +413,34 @@ dnode_multilist_index_func(multilist_t *ml, void *obj)
multilist_get_num_sublists(ml));
}
+static inline boolean_t
+dmu_os_is_l2cacheable(objset_t *os)
+{
+ vdev_t *vd = NULL;
+ zfs_cache_type_t cache = os->os_secondary_cache;
+ blkptr_t *bp = os->os_rootbp;
+
+ if (bp != NULL && !BP_IS_HOLE(bp)) {
+ uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+ vdev_t *rvd = os->os_spa->spa_root_vdev;
+
+ if (vdev < rvd->vdev_children)
+ vd = rvd->vdev_child[vdev];
+
+ if (cache == ZFS_CACHE_ALL || cache == ZFS_CACHE_METADATA) {
+ if (vd == NULL)
+ return (B_TRUE);
+
+ if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+ vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
+ l2arc_exclude_special == 0)
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
/*
* Instantiates the objset_t in-memory structure corresponding to the
* objset_phys_t that's pointed to by the specified blkptr_t.
@@ -453,7 +483,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
- if (DMU_OS_IS_L2CACHEABLE(os))
+ if (dmu_os_is_l2cacheable(os))
aflags |= ARC_FLAG_L2CACHE;
if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
@@ -1661,7 +1691,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
}
zio = arc_write(pio, os->os_spa, tx->tx_txg,
- blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
+ blkptr_copy, os->os_phys_buf, dmu_os_is_l2cacheable(os),
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c
index 7efe423d35f0..5184ef6888df 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_redact.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c
@@ -141,7 +141,7 @@ record_merge_enqueue(bqueue_t *q, struct redact_record **build,
{
if (new->eos_marker) {
if (*build != NULL)
- bqueue_enqueue(q, *build, sizeof (*build));
+ bqueue_enqueue(q, *build, sizeof (**build));
bqueue_enqueue_flush(q, new, sizeof (*new));
return;
}
@@ -823,7 +823,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
avl_destroy(&end_tree);
kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
if (current_record != NULL)
- bqueue_enqueue(q, current_record, sizeof (current_record));
+ bqueue_enqueue(q, current_record, sizeof (*current_record));
return (err);
}
diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c
index 0beb983f992f..1eed0526b51d 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_tx.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c
@@ -54,6 +54,7 @@ dmu_tx_stats_t dmu_tx_stats = {
{ "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 },
+ { "dmu_tx_wrlog_delay", KSTAT_DATA_UINT64 },
{ "dmu_tx_quota", KSTAT_DATA_UINT64 },
};
@@ -780,34 +781,49 @@ static void
dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty)
{
dsl_pool_t *dp = tx->tx_pool;
- uint64_t delay_min_bytes =
+ uint64_t delay_min_bytes, wrlog;
+ hrtime_t wakeup, tx_time = 0, now;
+
+ /* Calculate minimum transaction time for the dirty data amount. */
+ delay_min_bytes =
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
- hrtime_t wakeup, min_tx_time, now;
+ if (dirty > delay_min_bytes) {
+ /*
+ * The caller has already waited until we are under the max.
+ * We make them pass us the amount of dirty data so we don't
+ * have to handle the case of it being >= the max, which
+ * could cause a divide-by-zero if it's == the max.
+ */
+ ASSERT3U(dirty, <, zfs_dirty_data_max);
- if (dirty <= delay_min_bytes)
- return;
+ tx_time = zfs_delay_scale * (dirty - delay_min_bytes) /
+ (zfs_dirty_data_max - dirty);
+ }
- /*
- * The caller has already waited until we are under the max.
- * We make them pass us the amount of dirty data so we don't
- * have to handle the case of it being >= the max, which could
- * cause a divide-by-zero if it's == the max.
- */
- ASSERT3U(dirty, <, zfs_dirty_data_max);
+ /* Calculate minimum transaction time for the TX_WRITE log size. */
+ wrlog = aggsum_upper_bound(&dp->dp_wrlog_total);
+ delay_min_bytes =
+ zfs_wrlog_data_max * zfs_delay_min_dirty_percent / 100;
+ if (wrlog >= zfs_wrlog_data_max) {
+ tx_time = zfs_delay_max_ns;
+ } else if (wrlog > delay_min_bytes) {
+ tx_time = MAX(zfs_delay_scale * (wrlog - delay_min_bytes) /
+ (zfs_wrlog_data_max - wrlog), tx_time);
+ }
+ if (tx_time == 0)
+ return;
+
+ tx_time = MIN(tx_time, zfs_delay_max_ns);
now = gethrtime();
- min_tx_time = zfs_delay_scale *
- (dirty - delay_min_bytes) / (zfs_dirty_data_max - dirty);
- min_tx_time = MIN(min_tx_time, zfs_delay_max_ns);
- if (now > tx->tx_start + min_tx_time)
+ if (now > tx->tx_start + tx_time)
return;
DTRACE_PROBE3(delay__mintime, dmu_tx_t *, tx, uint64_t, dirty,
- uint64_t, min_tx_time);
+ uint64_t, tx_time);
mutex_enter(&dp->dp_lock);
- wakeup = MAX(tx->tx_start + min_tx_time,
- dp->dp_last_wakeup + min_tx_time);
+ wakeup = MAX(tx->tx_start + tx_time, dp->dp_last_wakeup + tx_time);
dp->dp_last_wakeup = wakeup;
mutex_exit(&dp->dp_lock);
@@ -885,6 +901,13 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
}
if (!tx->tx_dirty_delayed &&
+ dsl_pool_need_wrlog_delay(tx->tx_pool)) {
+ tx->tx_wait_dirty = B_TRUE;
+ DMU_TX_STAT_BUMP(dmu_tx_wrlog_delay);
+ return (SET_ERROR(ERESTART));
+ }
+
+ if (!tx->tx_dirty_delayed &&
dsl_pool_need_dirty_delay(tx->tx_pool)) {
tx->tx_wait_dirty = B_TRUE;
DMU_TX_STAT_BUMP(dmu_tx_dirty_delay);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
index 7566663bd3ad..bca881d82f87 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@@ -48,9 +48,13 @@ int zfs_prefetch_disable = B_FALSE;
/* max # of streams per zfetch */
unsigned int zfetch_max_streams = 8;
/* min time before stream reclaim */
-unsigned int zfetch_min_sec_reap = 2;
-/* max bytes to prefetch per stream (default 8MB) */
-unsigned int zfetch_max_distance = 8 * 1024 * 1024;
+static unsigned int zfetch_min_sec_reap = 1;
+/* max time before stream delete */
+static unsigned int zfetch_max_sec_reap = 2;
+/* min bytes to prefetch per stream (default 4MB) */
+static unsigned int zfetch_min_distance = 4 * 1024 * 1024;
+/* max bytes to prefetch per stream (default 64MB) */
+unsigned int zfetch_max_distance = 64 * 1024 * 1024;
/* max bytes to prefetch indirects for per stream (default 64MB) */
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
/* max number of bytes in an array_read in which we allow prefetching (1MB) */
@@ -195,74 +199,99 @@ dmu_zfetch_fini(zfetch_t *zf)
}
/*
- * If there aren't too many streams already, create a new stream.
+ * If there aren't too many active streams already, create one more.
+ * In process delete/reuse all streams without hits for zfetch_max_sec_reap.
+ * If needed, reuse oldest stream without hits for zfetch_min_sec_reap or ever.
* The "blkid" argument is the next block that we expect this stream to access.
- * While we're here, clean up old streams (which haven't been
- * accessed for at least zfetch_min_sec_reap seconds).
*/
static void
dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
{
- zstream_t *zs_next;
- hrtime_t now = gethrtime();
+ zstream_t *zs, *zs_next, *zs_old = NULL;
+ hrtime_t now = gethrtime(), t;
ASSERT(MUTEX_HELD(&zf->zf_lock));
/*
- * Clean up old streams.
+ * Delete too old streams, reusing the first found one.
*/
- for (zstream_t *zs = list_head(&zf->zf_stream);
- zs != NULL; zs = zs_next) {
+ t = now - SEC2NSEC(zfetch_max_sec_reap);
+ for (zs = list_head(&zf->zf_stream); zs != NULL; zs = zs_next) {
zs_next = list_next(&zf->zf_stream, zs);
/*
* Skip if still active. 1 -- zf_stream reference.
*/
if (zfs_refcount_count(&zs->zs_refs) != 1)
continue;
- if (((now - zs->zs_atime) / NANOSEC) >
- zfetch_min_sec_reap)
+ if (zs->zs_atime > t)
+ continue;
+ if (zs_old)
dmu_zfetch_stream_remove(zf, zs);
+ else
+ zs_old = zs;
+ }
+ if (zs_old) {
+ zs = zs_old;
+ goto reuse;
}
/*
* The maximum number of streams is normally zfetch_max_streams,
* but for small files we lower it such that it's at least possible
* for all the streams to be non-overlapping.
- *
- * If we are already at the maximum number of streams for this file,
- * even after removing old streams, then don't create this stream.
*/
uint32_t max_streams = MAX(1, MIN(zfetch_max_streams,
zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz /
zfetch_max_distance));
if (zf->zf_numstreams >= max_streams) {
+ t = now - SEC2NSEC(zfetch_min_sec_reap);
+ for (zs = list_head(&zf->zf_stream); zs != NULL;
+ zs = list_next(&zf->zf_stream, zs)) {
+ if (zfs_refcount_count(&zs->zs_refs) != 1)
+ continue;
+ if (zs->zs_atime > t)
+ continue;
+ if (zs_old == NULL || zs->zs_atime < zs_old->zs_atime)
+ zs_old = zs;
+ }
+ if (zs_old) {
+ zs = zs_old;
+ goto reuse;
+ }
ZFETCHSTAT_BUMP(zfetchstat_max_streams);
return;
}
- zstream_t *zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
- zs->zs_blkid = blkid;
- zs->zs_pf_blkid1 = blkid;
- zs->zs_pf_blkid = blkid;
- zs->zs_ipf_blkid1 = blkid;
- zs->zs_ipf_blkid = blkid;
- zs->zs_atime = now;
+ zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
zs->zs_fetch = zf;
- zs->zs_missed = B_FALSE;
zfs_refcount_create(&zs->zs_callers);
zfs_refcount_create(&zs->zs_refs);
/* One reference for zf_stream. */
zfs_refcount_add(&zs->zs_refs, NULL);
zf->zf_numstreams++;
list_insert_head(&zf->zf_stream, zs);
+
+reuse:
+ zs->zs_blkid = blkid;
+ zs->zs_pf_dist = 0;
+ zs->zs_pf_start = blkid;
+ zs->zs_pf_end = blkid;
+ zs->zs_ipf_dist = 0;
+ zs->zs_ipf_start = blkid;
+ zs->zs_ipf_end = blkid;
+ /* Allow immediate stream reuse until first hit. */
+ zs->zs_atime = now - SEC2NSEC(zfetch_min_sec_reap);
+ zs->zs_missed = B_FALSE;
+ zs->zs_more = B_FALSE;
}
static void
-dmu_zfetch_stream_done(void *arg, boolean_t io_issued)
+dmu_zfetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t io_issued)
{
- (void) io_issued;
zstream_t *zs = arg;
+ if (io_issued && level == 0 && blkid < zs->zs_blkid)
+ zs->zs_more = B_TRUE;
if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
dmu_zfetch_stream_fini(zs);
}
@@ -284,11 +313,6 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
boolean_t fetch_data, boolean_t have_lock)
{
zstream_t *zs;
- int64_t pf_start, ipf_start;
- int64_t pf_ahead_blks, max_blks;
- int max_dist_blks, pf_nblks, ipf_nblks;
- uint64_t end_of_access_blkid, maxblkid;
- end_of_access_blkid = blkid + nblks;
spa_t *spa = zf->zf_dnode->dn_objset->os_spa;
if (zfs_prefetch_disable)
@@ -317,7 +341,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
* A fast path for small files for which no prefetch will
* happen.
*/
- maxblkid = zf->zf_dnode->dn_maxblkid;
+ uint64_t maxblkid = zf->zf_dnode->dn_maxblkid;
if (maxblkid < 2) {
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
@@ -345,6 +369,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
* If the file is ending, remove the matching stream if found.
* If not found then it is too late to create a new one now.
*/
+ uint64_t end_of_access_blkid = blkid + nblks;
if (end_of_access_blkid >= maxblkid) {
if (zs != NULL)
dmu_zfetch_stream_remove(zf, zs);
@@ -377,60 +402,48 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
/*
* This access was to a block that we issued a prefetch for on
- * behalf of this stream. Issue further prefetches for this stream.
+ * behalf of this stream. Calculate further prefetch distances.
*
- * Normally, we start prefetching where we stopped
- * prefetching last (zs_pf_blkid). But when we get our first
- * hit on this stream, zs_pf_blkid == zs_blkid, we don't
- * want to prefetch the block we just accessed. In this case,
- * start just after the block we just accessed.
- */
- pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid);
- if (zs->zs_pf_blkid1 < end_of_access_blkid)
- zs->zs_pf_blkid1 = end_of_access_blkid;
- if (zs->zs_ipf_blkid1 < end_of_access_blkid)
- zs->zs_ipf_blkid1 = end_of_access_blkid;
-
- /*
- * Double our amount of prefetched data, but don't let the
- * prefetch get further ahead than zfetch_max_distance.
+ * Start prefetch from the demand access size (nblks). Double the
+ * distance every access up to zfetch_min_distance. After that only
+ * if needed increase the distance by 1/8 up to zfetch_max_distance.
*/
+ unsigned int nbytes = nblks << zf->zf_dnode->dn_datablkshift;
+ unsigned int pf_nblks;
if (fetch_data) {
- max_dist_blks =
- zfetch_max_distance >> zf->zf_dnode->dn_datablkshift;
- /*
- * Previously, we were (zs_pf_blkid - blkid) ahead. We
- * want to now be double that, so read that amount again,
- * plus the amount we are catching up by (i.e. the amount
- * read just now).
- */
- pf_ahead_blks = zs->zs_pf_blkid - blkid + nblks;
- max_blks = max_dist_blks - (pf_start - end_of_access_blkid);
- pf_nblks = MIN(pf_ahead_blks, max_blks);
+ if (unlikely(zs->zs_pf_dist < nbytes))
+ zs->zs_pf_dist = nbytes;
+ else if (zs->zs_pf_dist < zfetch_min_distance)
+ zs->zs_pf_dist *= 2;
+ else if (zs->zs_more)
+ zs->zs_pf_dist += zs->zs_pf_dist / 8;
+ zs->zs_more = B_FALSE;
+ if (zs->zs_pf_dist > zfetch_max_distance)
+ zs->zs_pf_dist = zfetch_max_distance;
+ pf_nblks = zs->zs_pf_dist >> zf->zf_dnode->dn_datablkshift;
} else {
pf_nblks = 0;
}
+ if (zs->zs_pf_start < end_of_access_blkid)
+ zs->zs_pf_start = end_of_access_blkid;
+ if (zs->zs_pf_end < end_of_access_blkid + pf_nblks)
+ zs->zs_pf_end = end_of_access_blkid + pf_nblks;
- zs->zs_pf_blkid = pf_start + pf_nblks;
-
- /*
- * Do the same for indirects, starting from where we stopped last,
- * or where we will stop reading data blocks (and the indirects
- * that point to them).
- */
- ipf_start = MAX(zs->zs_ipf_blkid, zs->zs_pf_blkid);
- max_dist_blks = zfetch_max_idistance >> zf->zf_dnode->dn_datablkshift;
/*
- * We want to double our distance ahead of the data prefetch
- * (or reader, if we are not prefetching data). Previously, we
- * were (zs_ipf_blkid - blkid) ahead. To double that, we read
- * that amount again, plus the amount we are catching up by
- * (i.e. the amount read now + the amount of data prefetched now).
+ * Do the same for indirects, starting where we will stop reading
+ * data blocks (and the indirects that point to them).
*/
- pf_ahead_blks = zs->zs_ipf_blkid - blkid + nblks + pf_nblks;
- max_blks = max_dist_blks - (ipf_start - zs->zs_pf_blkid);
- ipf_nblks = MIN(pf_ahead_blks, max_blks);
- zs->zs_ipf_blkid = ipf_start + ipf_nblks;
+ if (unlikely(zs->zs_ipf_dist < nbytes))
+ zs->zs_ipf_dist = nbytes;
+ else
+ zs->zs_ipf_dist *= 2;
+ if (zs->zs_ipf_dist > zfetch_max_idistance)
+ zs->zs_ipf_dist = zfetch_max_idistance;
+ pf_nblks = zs->zs_ipf_dist >> zf->zf_dnode->dn_datablkshift;
+ if (zs->zs_ipf_start < zs->zs_pf_end)
+ zs->zs_ipf_start = zs->zs_pf_end;
+ if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks)
+ zs->zs_ipf_end = zs->zs_pf_end + pf_nblks;
zs->zs_blkid = end_of_access_blkid;
/* Protect the stream from reclamation. */
@@ -471,13 +484,13 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock)
mutex_enter(&zf->zf_lock);
if (zs->zs_missed) {
- pf_start = zs->zs_pf_blkid1;
- pf_end = zs->zs_pf_blkid1 = zs->zs_pf_blkid;
+ pf_start = zs->zs_pf_start;
+ pf_end = zs->zs_pf_start = zs->zs_pf_end;
} else {
pf_start = pf_end = 0;
}
- ipf_start = MAX(zs->zs_pf_blkid1, zs->zs_ipf_blkid1);
- ipf_end = zs->zs_ipf_blkid1 = zs->zs_ipf_blkid;
+ ipf_start = zs->zs_ipf_start;
+ ipf_end = zs->zs_ipf_start = zs->zs_ipf_end;
mutex_exit(&zf->zf_lock);
ASSERT3S(pf_start, <=, pf_end);
ASSERT3S(ipf_start, <=, ipf_end);
@@ -504,12 +517,12 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock)
for (int64_t blk = pf_start; blk < pf_end; blk++) {
issued += dbuf_prefetch_impl(zf->zf_dnode, 0, blk,
ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH,
- dmu_zfetch_stream_done, zs);
+ dmu_zfetch_done, zs);
}
for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) {
issued += dbuf_prefetch_impl(zf->zf_dnode, 1, iblk,
ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH,
- dmu_zfetch_stream_done, zs);
+ dmu_zfetch_done, zs);
}
if (!have_lock)
@@ -540,6 +553,12 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_streams, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, min_sec_reap, UINT, ZMOD_RW,
"Min time before stream reclaim");
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_sec_reap, UINT, ZMOD_RW,
+ "Max time before stream delete");
+
+ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, min_distance, UINT, ZMOD_RW,
+ "Min bytes to prefetch per stream");
+
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
"Max bytes to prefetch per stream");
diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
index b8e3523ffc2d..861dd9239131 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
@@ -347,6 +347,8 @@ dsl_bookmark_set_phys(zfs_bookmark_phys_t *zbm, dsl_dataset_t *snap)
spa_t *spa = dsl_dataset_get_spa(snap);
objset_t *mos = spa_get_dsl(spa)->dp_meta_objset;
dsl_dataset_phys_t *dsp = dsl_dataset_phys(snap);
+
+ memset(zbm, 0, sizeof (zfs_bookmark_phys_t));
zbm->zbm_guid = dsp->ds_guid;
zbm->zbm_creation_txg = dsp->ds_creation_txg;
zbm->zbm_creation_time = dsp->ds_creation_time;
@@ -380,10 +382,6 @@ dsl_bookmark_set_phys(zfs_bookmark_phys_t *zbm, dsl_dataset_t *snap)
&zbm->zbm_compressed_freed_before_next_snap,
&zbm->zbm_uncompressed_freed_before_next_snap);
dsl_dataset_rele(nextds, FTAG);
- } else {
- bzero(&zbm->zbm_flags,
- sizeof (zfs_bookmark_phys_t) -
- offsetof(zfs_bookmark_phys_t, zbm_flags));
}
}
diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c
index af7fa5af4e1e..4036c8671f2d 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_pool.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c
@@ -105,6 +105,13 @@ int zfs_dirty_data_max_percent = 10;
int zfs_dirty_data_max_max_percent = 25;
/*
+ * The upper limit of TX_WRITE log data. Write operations are throttled
+ * when approaching the limit until log data is cleared out after txg sync.
+ * It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
+ */
+unsigned long zfs_wrlog_data_max = 0;
+
+/*
* If there's at least this much dirty data (as a percentage of
* zfs_dirty_data_max), push out a txg. This should be less than
* zfs_vdev_async_write_active_min_dirty_percent.
@@ -220,6 +227,11 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
+ aggsum_init(&dp->dp_wrlog_total, 0);
+ for (int i = 0; i < TXG_SIZE; i++) {
+ aggsum_init(&dp->dp_wrlog_pertxg[i], 0);
+ }
+
dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
TASKQ_THREADS_CPU_PCT);
@@ -416,12 +428,18 @@ dsl_pool_close(dsl_pool_t *dp)
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
cv_destroy(&dp->dp_spaceavail_cv);
+
+ ASSERT0(aggsum_value(&dp->dp_wrlog_total));
+ aggsum_fini(&dp->dp_wrlog_total);
+ for (int i = 0; i < TXG_SIZE; i++) {
+ ASSERT0(aggsum_value(&dp->dp_wrlog_pertxg[i]));
+ aggsum_fini(&dp->dp_wrlog_pertxg[i]);
+ }
+
taskq_destroy(dp->dp_unlinked_drain_taskq);
taskq_destroy(dp->dp_zrele_taskq);
- if (dp->dp_blkstats != NULL) {
- mutex_destroy(&dp->dp_blkstats->zab_lock);
+ if (dp->dp_blkstats != NULL)
vmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
- }
kmem_free(dp, sizeof (dsl_pool_t));
}
@@ -592,6 +610,42 @@ dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta)
cv_signal(&dp->dp_spaceavail_cv);
}
+void
+dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg)
+{
+ ASSERT3S(size, >=, 0);
+
+ aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], size);
+ aggsum_add(&dp->dp_wrlog_total, size);
+
+ /* Choose a value slightly bigger than min dirty sync bytes */
+ uint64_t sync_min =
+ zfs_wrlog_data_max * (zfs_dirty_data_sync_percent + 10) / 200;
+ if (aggsum_compare(&dp->dp_wrlog_pertxg[txg & TXG_MASK], sync_min) > 0)
+ txg_kick(dp, txg);
+}
+
+boolean_t
+dsl_pool_need_wrlog_delay(dsl_pool_t *dp)
+{
+ uint64_t delay_min_bytes =
+ zfs_wrlog_data_max * zfs_delay_min_dirty_percent / 100;
+
+ return (aggsum_compare(&dp->dp_wrlog_total, delay_min_bytes) > 0);
+}
+
+static void
+dsl_pool_wrlog_clear(dsl_pool_t *dp, uint64_t txg)
+{
+ int64_t delta;
+ delta = -(int64_t)aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]);
+ aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], delta);
+ aggsum_add(&dp->dp_wrlog_total, delta);
+ /* Compact per-CPU sums after the big change. */
+ (void) aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]);
+ (void) aggsum_value(&dp->dp_wrlog_total);
+}
+
#ifdef ZFS_DEBUG
static boolean_t
dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
@@ -816,6 +870,9 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
dmu_buf_rele(ds->ds_dbuf, zilog);
}
+
+ dsl_pool_wrlog_clear(dp, txg);
+
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
}
@@ -904,18 +961,26 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp)
{
uint64_t delay_min_bytes =
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
- uint64_t dirty_min_bytes =
- zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
- uint64_t dirty;
mutex_enter(&dp->dp_lock);
- dirty = dp->dp_dirty_total;
+ uint64_t dirty = dp->dp_dirty_total;
mutex_exit(&dp->dp_lock);
- if (dirty > dirty_min_bytes)
- txg_kick(dp);
+
return (dirty > delay_min_bytes);
}
+static boolean_t
+dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg)
+{
+ ASSERT(MUTEX_HELD(&dp->dp_lock));
+
+ uint64_t dirty_min_bytes =
+ zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
+ uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
+
+ return (dirty > dirty_min_bytes);
+}
+
void
dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
{
@@ -923,7 +988,12 @@ dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
mutex_enter(&dp->dp_lock);
dp->dp_dirty_pertxg[tx->tx_txg & TXG_MASK] += space;
dsl_pool_dirty_delta(dp, space);
+ boolean_t needsync = !dmu_tx_is_syncing(tx) &&
+ dsl_pool_need_dirty_sync(dp, tx->tx_txg);
mutex_exit(&dp->dp_lock);
+
+ if (needsync)
+ txg_kick(dp, tx->tx_txg);
}
}
@@ -1398,6 +1468,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
"Determines the dirty space limit");
+ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
+ "The size limit of write-transaction zil log data");
+
/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
"zfs_dirty_data_max upper bound in bytes");
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index d7050487ff82..603fe84ecd04 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -129,6 +129,7 @@ static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
static uint64_t dsl_scan_count_data_disks(vdev_t *vd);
extern int zfs_vdev_async_write_active_min_dirty_percent;
+static int zfs_scan_blkstats = 0;
/*
* By default zfs will check to ensure it is not over the hard memory
@@ -219,9 +220,9 @@ typedef struct {
/*
* This controls what conditions are placed on dsl_scan_sync_state():
- * SYNC_OPTIONAL) write out scn_phys iff scn_bytes_pending == 0
- * SYNC_MANDATORY) write out scn_phys always. scn_bytes_pending must be 0.
- * SYNC_CACHED) if scn_bytes_pending == 0, write out scn_phys. Otherwise
+ * SYNC_OPTIONAL) write out scn_phys iff scn_queues_pending == 0
+ * SYNC_MANDATORY) write out scn_phys always. scn_queues_pending must be 0.
+ * SYNC_CACHED) if scn_queues_pending == 0, write out scn_phys. Otherwise
* write out the scn_phys_cached version.
* See dsl_scan_sync_state for details.
*/
@@ -279,12 +280,14 @@ typedef struct scan_io {
struct dsl_scan_io_queue {
dsl_scan_t *q_scn; /* associated dsl_scan_t */
vdev_t *q_vd; /* top-level vdev that this queue represents */
+ zio_t *q_zio; /* scn_zio_root child for waiting on IO */
/* trees used for sorting I/Os and extents of I/Os */
range_tree_t *q_exts_by_addr;
- zfs_btree_t q_exts_by_size;
+ zfs_btree_t q_exts_by_size;
avl_tree_t q_sios_by_addr;
uint64_t q_sio_memused;
+ uint64_t q_last_ext_addr;
/* members for zio rate limiting */
uint64_t q_maxinflight_bytes;
@@ -638,7 +641,7 @@ dsl_scan_is_paused_scrub(const dsl_scan_t *scn)
* Because we can be running in the block sorting algorithm, we do not always
* want to write out the record, only when it is "safe" to do so. This safety
* condition is achieved by making sure that the sorting queues are empty
- * (scn_bytes_pending == 0). When this condition is not true, the sync'd state
+ * (scn_queues_pending == 0). When this condition is not true, the sync'd state
* is inconsistent with how much actual scanning progress has been made. The
* kind of sync to be performed is specified by the sync_type argument. If the
* sync is optional, we only sync if the queues are empty. If the sync is
@@ -661,8 +664,8 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx, state_sync_type_t sync_type)
int i;
spa_t *spa = scn->scn_dp->dp_spa;
- ASSERT(sync_type != SYNC_MANDATORY || scn->scn_bytes_pending == 0);
- if (scn->scn_bytes_pending == 0) {
+ ASSERT(sync_type != SYNC_MANDATORY || scn->scn_queues_pending == 0);
+ if (scn->scn_queues_pending == 0) {
for (i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
dsl_scan_io_queue_t *q = vd->vdev_scan_io_queue;
@@ -786,13 +789,19 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
/* back to the generic stuff */
- if (dp->dp_blkstats == NULL) {
- dp->dp_blkstats =
- vmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP);
- mutex_init(&dp->dp_blkstats->zab_lock, NULL,
- MUTEX_DEFAULT, NULL);
+ if (zfs_scan_blkstats) {
+ if (dp->dp_blkstats == NULL) {
+ dp->dp_blkstats =
+ vmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP);
+ }
+ memset(&dp->dp_blkstats->zab_type, 0,
+ sizeof (dp->dp_blkstats->zab_type));
+ } else {
+ if (dp->dp_blkstats) {
+ vmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
+ dp->dp_blkstats = NULL;
+ }
}
- bzero(&dp->dp_blkstats->zab_type, sizeof (dp->dp_blkstats->zab_type));
if (spa_version(spa) < SPA_VERSION_DSL_SCRUB)
ot = DMU_OT_ZAP_OTHER;
@@ -1197,7 +1206,7 @@ scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx)
dmu_object_type_t ot = (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) ?
DMU_OT_SCAN_QUEUE : DMU_OT_ZAP_OTHER;
- ASSERT0(scn->scn_bytes_pending);
+ ASSERT0(scn->scn_queues_pending);
ASSERT(scn->scn_phys.scn_queue_obj != 0);
VERIFY0(dmu_object_free(dp->dp_meta_objset,
@@ -1269,11 +1278,12 @@ dsl_scan_should_clear(dsl_scan_t *scn)
queue = tvd->vdev_scan_io_queue;
if (queue != NULL) {
/*
- * # of extents in exts_by_size = # in exts_by_addr.
+ * # of extents in exts_by_addr = # in exts_by_size.
* B-tree efficiency is ~75%, but can be as low as 50%.
*/
mused += zfs_btree_numnodes(&queue->q_exts_by_size) *
- 3 * sizeof (range_seg_gap_t) + queue->q_sio_memused;
+ ((sizeof (range_seg_gap_t) + sizeof (uint64_t)) *
+ 3 / 2) + queue->q_sio_memused;
}
mutex_exit(&tvd->vdev_scan_io_queue_lock);
}
@@ -1281,7 +1291,7 @@ dsl_scan_should_clear(dsl_scan_t *scn)
dprintf("current scan memory usage: %llu bytes\n", (longlong_t)mused);
if (mused == 0)
- ASSERT0(scn->scn_bytes_pending);
+ ASSERT0(scn->scn_queues_pending);
/*
* If we are above our hard limit, we need to clear out memory.
@@ -1331,12 +1341,13 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;
uint64_t sync_time_ns = curr_time_ns -
scn->scn_dp->dp_spa->spa_sync_starttime;
- int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max;
+ uint64_t dirty_min_bytes = zfs_dirty_data_max *
+ zfs_vdev_async_write_active_min_dirty_percent / 100;
int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
zfs_resilver_min_time_ms : zfs_scrub_min_time_ms;
if ((NSEC2MSEC(scan_time_ns) > mintime &&
- (dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent ||
+ (scn->scn_dp->dp_dirty_total >= dirty_min_bytes ||
txg_sync_waiting(scn->scn_dp) ||
NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) ||
spa_shutting_down(scn->scn_dp->dp_spa) ||
@@ -1784,12 +1795,11 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
/*
* If we found the block we're trying to resume from, or
- * we went past it to a different object, zero it out to
- * indicate that it's OK to start checking for suspending
- * again.
+ * we went past it, zero it out to indicate that it's OK
+ * to start checking for suspending again.
*/
- if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
- zb->zb_object > scn->scn_phys.scn_bookmark.zb_object) {
+ if (zbookmark_subtree_tbd(dnp, zb,
+ &scn->scn_phys.scn_bookmark)) {
dprintf("resuming at %llx/%llx/%llx/%llx\n",
(longlong_t)zb->zb_objset,
(longlong_t)zb->zb_object,
@@ -2815,12 +2825,13 @@ scan_io_queue_check_suspend(dsl_scan_t *scn)
uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;
uint64_t sync_time_ns = curr_time_ns -
scn->scn_dp->dp_spa->spa_sync_starttime;
- int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max;
+ uint64_t dirty_min_bytes = zfs_dirty_data_max *
+ zfs_vdev_async_write_active_min_dirty_percent / 100;
int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
zfs_resilver_min_time_ms : zfs_scrub_min_time_ms;
return ((NSEC2MSEC(scan_time_ns) > mintime &&
- (dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent ||
+ (scn->scn_dp->dp_dirty_total >= dirty_min_bytes ||
txg_sync_waiting(scn->scn_dp) ||
NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) ||
spa_shutting_down(scn->scn_dp->dp_spa));
@@ -2839,7 +2850,6 @@ scan_io_queue_issue(dsl_scan_io_queue_t *queue, list_t *io_list)
{
dsl_scan_t *scn = queue->q_scn;
scan_io_t *sio;
- int64_t bytes_issued = 0;
boolean_t suspended = B_FALSE;
while ((sio = list_head(io_list)) != NULL) {
@@ -2851,16 +2861,12 @@ scan_io_queue_issue(dsl_scan_io_queue_t *queue, list_t *io_list)
}
sio2bp(sio, &bp);
- bytes_issued += SIO_GET_ASIZE(sio);
scan_exec_io(scn->scn_dp, &bp, sio->sio_flags,
&sio->sio_zb, queue);
(void) list_remove_head(io_list);
scan_io_queues_update_zio_stats(queue, &bp);
sio_free(sio);
}
-
- atomic_add_64(&scn->scn_bytes_pending, -bytes_issued);
-
return (suspended);
}
@@ -2905,6 +2911,8 @@ scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list)
next_sio = AVL_NEXT(&queue->q_sios_by_addr, sio);
avl_remove(&queue->q_sios_by_addr, sio);
+ if (avl_is_empty(&queue->q_sios_by_addr))
+ atomic_add_64(&queue->q_scn->scn_queues_pending, -1);
queue->q_sio_memused -= SIO_GET_MUSED(sio);
bytes_issued += SIO_GET_ASIZE(sio);
@@ -2926,12 +2934,13 @@ scan_io_queue_gather(dsl_scan_io_queue_t *queue, range_seg_t *rs, list_t *list)
range_tree_resize_segment(queue->q_exts_by_addr, rs,
SIO_GET_OFFSET(sio), rs_get_end(rs,
queue->q_exts_by_addr) - SIO_GET_OFFSET(sio));
-
+ queue->q_last_ext_addr = SIO_GET_OFFSET(sio);
return (B_TRUE);
} else {
uint64_t rstart = rs_get_start(rs, queue->q_exts_by_addr);
uint64_t rend = rs_get_end(rs, queue->q_exts_by_addr);
range_tree_remove(queue->q_exts_by_addr, rstart, rend - rstart);
+ queue->q_last_ext_addr = -1;
return (B_FALSE);
}
}
@@ -2956,31 +2965,8 @@ scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue)
ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
ASSERT(scn->scn_is_sorted);
- /* handle tunable overrides */
- if (scn->scn_checkpointing || scn->scn_clearing) {
- if (zfs_scan_issue_strategy == 1) {
- return (range_tree_first(rt));
- } else if (zfs_scan_issue_strategy == 2) {
- /*
- * We need to get the original entry in the by_addr
- * tree so we can modify it.
- */
- range_seg_t *size_rs =
- zfs_btree_first(&queue->q_exts_by_size, NULL);
- if (size_rs == NULL)
- return (NULL);
- uint64_t start = rs_get_start(size_rs, rt);
- uint64_t size = rs_get_end(size_rs, rt) - start;
- range_seg_t *addr_rs = range_tree_find(rt, start,
- size);
- ASSERT3P(addr_rs, !=, NULL);
- ASSERT3U(rs_get_start(size_rs, rt), ==,
- rs_get_start(addr_rs, rt));
- ASSERT3U(rs_get_end(size_rs, rt), ==,
- rs_get_end(addr_rs, rt));
- return (addr_rs);
- }
- }
+ if (!scn->scn_checkpointing && !scn->scn_clearing)
+ return (NULL);
/*
* During normal clearing, we want to issue our largest segments
@@ -2991,28 +2977,42 @@ scan_io_queue_fetch_ext(dsl_scan_io_queue_t *queue)
* so the way we are sorted now is as good as it will ever get.
* In this case, we instead switch to issuing extents in LBA order.
*/
- if (scn->scn_checkpointing) {
+ if ((zfs_scan_issue_strategy < 1 && scn->scn_checkpointing) ||
+ zfs_scan_issue_strategy == 1)
return (range_tree_first(rt));
- } else if (scn->scn_clearing) {
- /*
- * We need to get the original entry in the by_addr
- * tree so we can modify it.
- */
- range_seg_t *size_rs = zfs_btree_first(&queue->q_exts_by_size,
- NULL);
- if (size_rs == NULL)
- return (NULL);
- uint64_t start = rs_get_start(size_rs, rt);
- uint64_t size = rs_get_end(size_rs, rt) - start;
- range_seg_t *addr_rs = range_tree_find(rt, start, size);
- ASSERT3P(addr_rs, !=, NULL);
- ASSERT3U(rs_get_start(size_rs, rt), ==, rs_get_start(addr_rs,
- rt));
- ASSERT3U(rs_get_end(size_rs, rt), ==, rs_get_end(addr_rs, rt));
- return (addr_rs);
- } else {
- return (NULL);
+
+ /*
+ * Try to continue previous extent if it is not completed yet. After
+ * shrink in scan_io_queue_gather() it may no longer be the best, but
+ * otherwise we leave shorter remnant every txg.
+ */
+ uint64_t start;
+ uint64_t size = 1 << rt->rt_shift;
+ range_seg_t *addr_rs;
+ if (queue->q_last_ext_addr != -1) {
+ start = queue->q_last_ext_addr;
+ addr_rs = range_tree_find(rt, start, size);
+ if (addr_rs != NULL)
+ return (addr_rs);
}
+
+ /*
+ * Nothing to continue, so find new best extent.
+ */
+ uint64_t *v = zfs_btree_first(&queue->q_exts_by_size, NULL);
+ if (v == NULL)
+ return (NULL);
+ queue->q_last_ext_addr = start = *v << rt->rt_shift;
+
+ /*
+ * We need to get the original entry in the by_addr tree so we can
+ * modify it.
+ */
+ addr_rs = range_tree_find(rt, start, size);
+ ASSERT3P(addr_rs, !=, NULL);
+ ASSERT3U(rs_get_start(addr_rs, rt), ==, start);
+ ASSERT3U(rs_get_end(addr_rs, rt), >, start);
+ return (addr_rs);
}
static void
@@ -3021,15 +3021,19 @@ scan_io_queues_run_one(void *arg)
dsl_scan_io_queue_t *queue = arg;
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
boolean_t suspended = B_FALSE;
- range_seg_t *rs = NULL;
- scan_io_t *sio = NULL;
+ range_seg_t *rs;
+ scan_io_t *sio;
+ zio_t *zio;
list_t sio_list;
ASSERT(queue->q_scn->scn_is_sorted);
list_create(&sio_list, sizeof (scan_io_t),
offsetof(scan_io_t, sio_nodes.sio_list_node));
+ zio = zio_null(queue->q_scn->scn_zio_root, queue->q_scn->scn_dp->dp_spa,
+ NULL, NULL, NULL, ZIO_FLAG_CANFAIL);
mutex_enter(q_lock);
+ queue->q_zio = zio;
/* Calculate maximum in-flight bytes for this vdev. */
queue->q_maxinflight_bytes = MAX(1, zfs_scan_vdev_limit *
@@ -3044,12 +3048,12 @@ scan_io_queues_run_one(void *arg)
/* loop until we run out of time or sios */
while ((rs = scan_io_queue_fetch_ext(queue)) != NULL) {
uint64_t seg_start = 0, seg_end = 0;
- boolean_t more_left = B_TRUE;
+ boolean_t more_left;
ASSERT(list_is_empty(&sio_list));
/* loop while we still have sios left to process in this rs */
- while (more_left) {
+ do {
scan_io_t *first_sio, *last_sio;
/*
@@ -3078,7 +3082,7 @@ scan_io_queues_run_one(void *arg)
if (suspended)
break;
- }
+ } while (more_left);
/* update statistics for debugging purposes */
scan_io_queues_update_seg_stats(queue, seg_start, seg_end);
@@ -3096,7 +3100,9 @@ scan_io_queues_run_one(void *arg)
scan_io_queue_insert_impl(queue, sio);
}
+ queue->q_zio = NULL;
mutex_exit(q_lock);
+ zio_nowait(zio);
list_destroy(&sio_list);
}
@@ -3117,7 +3123,7 @@ scan_io_queues_run(dsl_scan_t *scn)
ASSERT(scn->scn_is_sorted);
ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
- if (scn->scn_bytes_pending == 0)
+ if (scn->scn_queues_pending == 0)
return;
if (scn->scn_taskq == NULL) {
@@ -3742,7 +3748,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
zfs_dbgmsg("scan complete txg %llu",
(longlong_t)tx->tx_txg);
}
- } else if (scn->scn_is_sorted && scn->scn_bytes_pending != 0) {
+ } else if (scn->scn_is_sorted && scn->scn_queues_pending != 0) {
ASSERT(scn->scn_clearing);
/* need to issue scrubbing IOs from per-vdev queues */
@@ -3770,7 +3776,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
(longlong_t)tx->tx_txg);
ASSERT3U(scn->scn_done_txg, !=, 0);
ASSERT0(spa->spa_scrub_inflight);
- ASSERT0(scn->scn_bytes_pending);
+ ASSERT0(scn->scn_queues_pending);
dsl_scan_done(scn, B_TRUE, tx);
sync_type = SYNC_MANDATORY;
}
@@ -3779,10 +3785,8 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
}
static void
-count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp)
+count_block_issued(spa_t *spa, const blkptr_t *bp, boolean_t all)
{
- int i;
-
/*
* Don't count embedded bp's, since we already did the work of
* scanning these when we scanned the containing block.
@@ -3797,18 +3801,13 @@ count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp)
* zio code will only try the first one unless there is an issue.
* Therefore, we should only count the first DVA for these IOs.
*/
- if (scn->scn_is_sorted) {
- atomic_add_64(&scn->scn_dp->dp_spa->spa_scan_pass_issued,
- DVA_GET_ASIZE(&bp->blk_dva[0]));
- } else {
- spa_t *spa = scn->scn_dp->dp_spa;
-
- for (i = 0; i < BP_GET_NDVAS(bp); i++) {
- atomic_add_64(&spa->spa_scan_pass_issued,
- DVA_GET_ASIZE(&bp->blk_dva[i]));
- }
- }
+ atomic_add_64(&spa->spa_scan_pass_issued,
+ all ? BP_GET_ASIZE(bp) : DVA_GET_ASIZE(&bp->blk_dva[0]));
+}
+static void
+count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
+{
/*
* If we resume after a reboot, zab will be NULL; don't record
* incomplete stats in that case.
@@ -3816,9 +3815,7 @@ count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp)
if (zab == NULL)
return;
- mutex_enter(&zab->zab_lock);
-
- for (i = 0; i < 4; i++) {
+ for (int i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
@@ -3853,28 +3850,27 @@ count_block(dsl_scan_t *scn, zfs_all_blkstats_t *zab, const blkptr_t *bp)
break;
}
}
-
- mutex_exit(&zab->zab_lock);
}
static void
scan_io_queue_insert_impl(dsl_scan_io_queue_t *queue, scan_io_t *sio)
{
avl_index_t idx;
- int64_t asize = SIO_GET_ASIZE(sio);
dsl_scan_t *scn = queue->q_scn;
ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
+ if (unlikely(avl_is_empty(&queue->q_sios_by_addr)))
+ atomic_add_64(&scn->scn_queues_pending, 1);
if (avl_find(&queue->q_sios_by_addr, sio, &idx) != NULL) {
/* block is already scheduled for reading */
- atomic_add_64(&scn->scn_bytes_pending, -asize);
sio_free(sio);
return;
}
avl_insert(&queue->q_sios_by_addr, sio, idx);
queue->q_sio_memused += SIO_GET_MUSED(sio);
- range_tree_add(queue->q_exts_by_addr, SIO_GET_OFFSET(sio), asize);
+ range_tree_add(queue->q_exts_by_addr, SIO_GET_OFFSET(sio),
+ SIO_GET_ASIZE(sio));
}
/*
@@ -3887,7 +3883,6 @@ static void
scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i,
int zio_flags, const zbookmark_phys_t *zb)
{
- dsl_scan_t *scn = queue->q_scn;
scan_io_t *sio = sio_alloc(BP_GET_NDVAS(bp));
ASSERT0(BP_IS_GANG(bp));
@@ -3897,13 +3892,7 @@ scan_io_queue_insert(dsl_scan_io_queue_t *queue, const blkptr_t *bp, int dva_i,
sio->sio_flags = zio_flags;
sio->sio_zb = *zb;
- /*
- * Increment the bytes pending counter now so that we can't
- * get an integer underflow in case the worker processes the
- * zio before we get to incrementing this counter.
- */
- atomic_add_64(&scn->scn_bytes_pending, SIO_GET_ASIZE(sio));
-
+ queue->q_last_ext_addr = -1;
scan_io_queue_insert_impl(queue, sio);
}
@@ -3958,10 +3947,10 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
boolean_t needs_io = B_FALSE;
int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
-
+ count_block(dp->dp_blkstats, bp);
if (phys_birth <= scn->scn_phys.scn_min_txg ||
phys_birth >= scn->scn_phys.scn_max_txg) {
- count_block(scn, dp->dp_blkstats, bp);
+ count_block_issued(spa, bp, B_TRUE);
return (0);
}
@@ -3989,8 +3978,9 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
* Keep track of how much data we've examined so that
* zpool(8) status can make useful progress reports.
*/
- scn->scn_phys.scn_examined += DVA_GET_ASIZE(dva);
- spa->spa_scan_pass_exam += DVA_GET_ASIZE(dva);
+ uint64_t asize = DVA_GET_ASIZE(dva);
+ scn->scn_phys.scn_examined += asize;
+ spa->spa_scan_pass_exam += asize;
/* if it's a resilver, this may not be in the target range */
if (!needs_io)
@@ -4001,7 +3991,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
if (needs_io && !zfs_no_scrub_io) {
dsl_scan_enqueue(dp, bp, zio_flags, zb);
} else {
- count_block(scn, dp->dp_blkstats, bp);
+ count_block_issued(spa, bp, B_TRUE);
}
/* do not relocate this block */
@@ -4052,6 +4042,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
dsl_scan_t *scn = dp->dp_scan;
size_t size = BP_GET_PSIZE(bp);
abd_t *data = abd_alloc_for_io(size, B_FALSE);
+ zio_t *pio;
if (queue == NULL) {
ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
@@ -4060,6 +4051,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
spa->spa_scrub_inflight += BP_GET_PSIZE(bp);
mutex_exit(&spa->spa_scrub_lock);
+ pio = scn->scn_zio_root;
} else {
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
@@ -4068,12 +4060,14 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes)
cv_wait(&queue->q_zio_cv, q_lock);
queue->q_inflight_bytes += BP_GET_PSIZE(bp);
+ pio = queue->q_zio;
mutex_exit(q_lock);
}
- count_block(scn, dp->dp_blkstats, bp);
- zio_nowait(zio_read(scn->scn_zio_root, spa, bp, data, size,
- dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
+ ASSERT(pio != NULL);
+ count_block_issued(spa, bp, queue == NULL);
+ zio_nowait(zio_read(pio, spa, bp, data, size, dsl_scan_scrub_done,
+ queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
}
/*
@@ -4107,33 +4101,88 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
* extents that are more completely filled (in a 3:2 ratio) vs just larger.
* Note that as an optimization, we replace multiplication and division by
* 100 with bitshifting by 7 (which effectively multiplies and divides by 128).
+ *
+ * Since we do not care if one extent is only few percent better than another,
+ * compress the score into 6 bits via binary logarithm AKA highbit64() and
+ * put into otherwise unused due to ashift high bits of offset. This allows
+ * to reduce q_exts_by_size B-tree elements to only 64 bits and compare them
+ * with single operation. Plus it makes scrubs more sequential and reduces
+ * chances that minor extent change move it within the B-tree.
*/
static int
ext_size_compare(const void *x, const void *y)
{
- const range_seg_gap_t *rsa = x, *rsb = y;
+ const uint64_t *a = x, *b = y;
- uint64_t sa = rsa->rs_end - rsa->rs_start;
- uint64_t sb = rsb->rs_end - rsb->rs_start;
- uint64_t score_a, score_b;
+ return (TREE_CMP(*a, *b));
+}
- score_a = rsa->rs_fill + ((((rsa->rs_fill << 7) / sa) *
- fill_weight * rsa->rs_fill) >> 7);
- score_b = rsb->rs_fill + ((((rsb->rs_fill << 7) / sb) *
- fill_weight * rsb->rs_fill) >> 7);
+static void
+ext_size_create(range_tree_t *rt, void *arg)
+{
+ (void) rt;
+ zfs_btree_t *size_tree = arg;
- if (score_a > score_b)
- return (-1);
- if (score_a == score_b) {
- if (rsa->rs_start < rsb->rs_start)
- return (-1);
- if (rsa->rs_start == rsb->rs_start)
- return (0);
- return (1);
- }
- return (1);
+ zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t));
+}
+
+static void
+ext_size_destroy(range_tree_t *rt, void *arg)
+{
+ (void) rt;
+ zfs_btree_t *size_tree = arg;
+ ASSERT0(zfs_btree_numnodes(size_tree));
+
+ zfs_btree_destroy(size_tree);
+}
+
+static uint64_t
+ext_size_value(range_tree_t *rt, range_seg_gap_t *rsg)
+{
+ (void) rt;
+ uint64_t size = rsg->rs_end - rsg->rs_start;
+ uint64_t score = rsg->rs_fill + ((((rsg->rs_fill << 7) / size) *
+ fill_weight * rsg->rs_fill) >> 7);
+ ASSERT3U(rt->rt_shift, >=, 8);
+ return (((uint64_t)(64 - highbit64(score)) << 56) | rsg->rs_start);
+}
+
+static void
+ext_size_add(range_tree_t *rt, range_seg_t *rs, void *arg)
+{
+ zfs_btree_t *size_tree = arg;
+ ASSERT3U(rt->rt_type, ==, RANGE_SEG_GAP);
+ uint64_t v = ext_size_value(rt, (range_seg_gap_t *)rs);
+ zfs_btree_add(size_tree, &v);
}
+static void
+ext_size_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
+{
+ zfs_btree_t *size_tree = arg;
+ ASSERT3U(rt->rt_type, ==, RANGE_SEG_GAP);
+ uint64_t v = ext_size_value(rt, (range_seg_gap_t *)rs);
+ zfs_btree_remove(size_tree, &v);
+}
+
+static void
+ext_size_vacate(range_tree_t *rt, void *arg)
+{
+ zfs_btree_t *size_tree = arg;
+ zfs_btree_clear(size_tree);
+ zfs_btree_destroy(size_tree);
+
+ ext_size_create(rt, arg);
+}
+
+static const range_tree_ops_t ext_size_ops = {
+ .rtop_create = ext_size_create,
+ .rtop_destroy = ext_size_destroy,
+ .rtop_add = ext_size_add,
+ .rtop_remove = ext_size_remove,
+ .rtop_vacate = ext_size_vacate
+};
+
/*
* Comparator for the q_sios_by_addr tree. Sorting is simply performed
* based on LBA-order (from lowest to highest).
@@ -4156,9 +4205,10 @@ scan_io_queue_create(vdev_t *vd)
q->q_scn = scn;
q->q_vd = vd;
q->q_sio_memused = 0;
+ q->q_last_ext_addr = -1;
cv_init(&q->q_zio_cv, NULL, CV_DEFAULT, NULL);
- q->q_exts_by_addr = range_tree_create_impl(&rt_btree_ops, RANGE_SEG_GAP,
- &q->q_exts_by_size, 0, 0, ext_size_compare, zfs_scan_max_ext_gap);
+ q->q_exts_by_addr = range_tree_create_gap(&ext_size_ops, RANGE_SEG_GAP,
+ &q->q_exts_by_size, 0, vd->vdev_ashift, zfs_scan_max_ext_gap);
avl_create(&q->q_sios_by_addr, sio_addr_compare,
sizeof (scan_io_t), offsetof(scan_io_t, sio_nodes.sio_addr_node));
@@ -4176,21 +4226,20 @@ dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue)
dsl_scan_t *scn = queue->q_scn;
scan_io_t *sio;
void *cookie = NULL;
- int64_t bytes_dequeued = 0;
ASSERT(MUTEX_HELD(&queue->q_vd->vdev_scan_io_queue_lock));
+ if (!avl_is_empty(&queue->q_sios_by_addr))
+ atomic_add_64(&scn->scn_queues_pending, -1);
while ((sio = avl_destroy_nodes(&queue->q_sios_by_addr, &cookie)) !=
NULL) {
ASSERT(range_tree_contains(queue->q_exts_by_addr,
SIO_GET_OFFSET(sio), SIO_GET_ASIZE(sio)));
- bytes_dequeued += SIO_GET_ASIZE(sio);
queue->q_sio_memused -= SIO_GET_MUSED(sio);
sio_free(sio);
}
ASSERT0(queue->q_sio_memused);
- atomic_add_64(&scn->scn_bytes_pending, -bytes_dequeued);
range_tree_vacate(queue->q_exts_by_addr, NULL, queue);
range_tree_destroy(queue->q_exts_by_addr);
avl_destroy(&queue->q_sios_by_addr);
@@ -4286,28 +4335,22 @@ dsl_scan_freed_dva(spa_t *spa, const blkptr_t *bp, int dva_i)
sio_free(srch_sio);
if (sio != NULL) {
- int64_t asize = SIO_GET_ASIZE(sio);
blkptr_t tmpbp;
/* Got it while it was cold in the queue */
ASSERT3U(start, ==, SIO_GET_OFFSET(sio));
- ASSERT3U(size, ==, asize);
+ ASSERT3U(size, ==, SIO_GET_ASIZE(sio));
avl_remove(&queue->q_sios_by_addr, sio);
+ if (avl_is_empty(&queue->q_sios_by_addr))
+ atomic_add_64(&scn->scn_queues_pending, -1);
queue->q_sio_memused -= SIO_GET_MUSED(sio);
ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size));
range_tree_remove_fill(queue->q_exts_by_addr, start, size);
- /*
- * We only update scn_bytes_pending in the cold path,
- * otherwise it will already have been accounted for as
- * part of the zio's execution.
- */
- atomic_add_64(&scn->scn_bytes_pending, -asize);
-
/* count the block as though we issued it */
sio2bp(sio, &tmpbp);
- count_block(scn, dp->dp_blkstats, &tmpbp);
+ count_block_issued(spa, &tmpbp, B_FALSE);
sio_free(sio);
}
@@ -4399,6 +4442,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, max_async_dedup_frees, ULONG, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, free_bpobj_enabled, INT, ZMOD_RW,
"Enable processing of the free_bpobj");
+ZFS_MODULE_PARAM(zfs, zfs_, scan_blkstats, INT, ZMOD_RW,
+ "Enable block statistics calculation during scrub");
+
ZFS_MODULE_PARAM(zfs, zfs_, scan_mem_lim_fact, INT, ZMOD_RW,
"Fraction of RAM for scan hard limit");
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index 9e216c38d954..ecc70298dc79 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -48,10 +48,10 @@
/*
* Metaslab granularity, in bytes. This is roughly similar to what would be
* referred to as the "stripe size" in traditional RAID arrays. In normal
- * operation, we will try to write this amount of data to a top-level vdev
- * before moving on to the next one.
+ * operation, we will try to write this amount of data to each disk before
+ * moving on to the next top-level vdev.
*/
-unsigned long metaslab_aliquot = 512 << 10;
+static unsigned long metaslab_aliquot = 1024 * 1024;
/*
* For testing, make some blocks above a certain size be gang blocks.
@@ -899,7 +899,8 @@ metaslab_group_activate(metaslab_group_t *mg)
if (++mg->mg_activation_count <= 0)
return;
- mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children);
+ mg->mg_aliquot = metaslab_aliquot * MAX(1,
+ vdev_get_ndisks(mg->mg_vd) - vdev_get_nparity(mg->mg_vd));
metaslab_group_alloc_update(mg);
if ((mgprev = mc->mc_allocator[0].mca_rotor) == NULL) {
@@ -2756,7 +2757,8 @@ metaslab_fini_flush_data(metaslab_t *msp)
mutex_exit(&spa->spa_flushed_ms_lock);
spa_log_sm_decrement_mscount(spa, metaslab_unflushed_txg(msp));
- spa_log_summary_decrement_mscount(spa, metaslab_unflushed_txg(msp));
+ spa_log_summary_decrement_mscount(spa, metaslab_unflushed_txg(msp),
+ metaslab_unflushed_dirty(msp));
}
uint64_t
@@ -3734,50 +3736,45 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx)
metaslab_flush_update(msp, tx);
}
-/*
- * Called when the metaslab has been flushed (its own spacemap now reflects
- * all the contents of the pool-wide spacemap log). Updates the metaslab's
- * metadata and any pool-wide related log space map data (e.g. summary,
- * obsolete logs, etc..) to reflect that.
- */
static void
-metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
+metaslab_unflushed_add(metaslab_t *msp, dmu_tx_t *tx)
{
- metaslab_group_t *mg = msp->ms_group;
- spa_t *spa = mg->mg_vd->vdev_spa;
-
- ASSERT(MUTEX_HELD(&msp->ms_lock));
-
- ASSERT3U(spa_sync_pass(spa), ==, 1);
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+ ASSERT(spa_syncing_log_sm(spa) != NULL);
+ ASSERT(msp->ms_sm != NULL);
ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
- /*
- * Just because a metaslab got flushed, that doesn't mean that
- * it will pass through metaslab_sync_done(). Thus, make sure to
- * update ms_synced_length here in case it doesn't.
- */
- msp->ms_synced_length = space_map_length(msp->ms_sm);
+ mutex_enter(&spa->spa_flushed_ms_lock);
+ metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
+ metaslab_set_unflushed_dirty(msp, B_TRUE);
+ avl_add(&spa->spa_metaslabs_by_flushed, msp);
+ mutex_exit(&spa->spa_flushed_ms_lock);
- /*
- * We may end up here from metaslab_condense() without the
- * feature being active. In that case this is a no-op.
- */
- if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
- return;
+ spa_log_sm_increment_current_mscount(spa);
+ spa_log_summary_add_flushed_metaslab(spa, B_TRUE);
+}
+void
+metaslab_unflushed_bump(metaslab_t *msp, dmu_tx_t *tx, boolean_t dirty)
+{
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
ASSERT(spa_syncing_log_sm(spa) != NULL);
ASSERT(msp->ms_sm != NULL);
ASSERT(metaslab_unflushed_txg(msp) != 0);
ASSERT3P(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL), ==, msp);
+ ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
+ ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(spa));
/* update metaslab's position in our flushing tree */
uint64_t ms_prev_flushed_txg = metaslab_unflushed_txg(msp);
+ boolean_t ms_prev_flushed_dirty = metaslab_unflushed_dirty(msp);
mutex_enter(&spa->spa_flushed_ms_lock);
avl_remove(&spa->spa_metaslabs_by_flushed, msp);
metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
+ metaslab_set_unflushed_dirty(msp, dirty);
avl_add(&spa->spa_metaslabs_by_flushed, msp);
mutex_exit(&spa->spa_flushed_ms_lock);
@@ -3785,17 +3782,47 @@ metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
spa_log_sm_decrement_mscount(spa, ms_prev_flushed_txg);
spa_log_sm_increment_current_mscount(spa);
+ /* update log space map summary */
+ spa_log_summary_decrement_mscount(spa, ms_prev_flushed_txg,
+ ms_prev_flushed_dirty);
+ spa_log_summary_add_flushed_metaslab(spa, dirty);
+
/* cleanup obsolete logs if any */
- uint64_t log_blocks_before = spa_log_sm_nblocks(spa);
spa_cleanup_old_sm_logs(spa, tx);
- uint64_t log_blocks_after = spa_log_sm_nblocks(spa);
- VERIFY3U(log_blocks_after, <=, log_blocks_before);
+}
- /* update log space map summary */
- uint64_t blocks_gone = log_blocks_before - log_blocks_after;
- spa_log_summary_add_flushed_metaslab(spa);
- spa_log_summary_decrement_mscount(spa, ms_prev_flushed_txg);
- spa_log_summary_decrement_blkcount(spa, blocks_gone);
+/*
+ * Called when the metaslab has been flushed (its own spacemap now reflects
+ * all the contents of the pool-wide spacemap log). Updates the metaslab's
+ * metadata and any pool-wide related log space map data (e.g. summary,
+ * obsolete logs, etc..) to reflect that.
+ */
+static void
+metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
+{
+ metaslab_group_t *mg = msp->ms_group;
+ spa_t *spa = mg->mg_vd->vdev_spa;
+
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+ ASSERT3U(spa_sync_pass(spa), ==, 1);
+
+ /*
+ * Just because a metaslab got flushed, that doesn't mean that
+ * it will pass through metaslab_sync_done(). Thus, make sure to
+ * update ms_synced_length here in case it doesn't.
+ */
+ msp->ms_synced_length = space_map_length(msp->ms_sm);
+
+ /*
+ * We may end up here from metaslab_condense() without the
+ * feature being active. In that case this is a no-op.
+ */
+ if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP) ||
+ metaslab_unflushed_txg(msp) == 0)
+ return;
+
+ metaslab_unflushed_bump(msp, tx, B_FALSE);
}
boolean_t
@@ -4011,23 +4038,6 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
ASSERT0(metaslab_allocated_space(msp));
}
- if (metaslab_unflushed_txg(msp) == 0 &&
- spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
- ASSERT(spa_syncing_log_sm(spa) != NULL);
-
- metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
- spa_log_sm_increment_current_mscount(spa);
- spa_log_summary_add_flushed_metaslab(spa);
-
- ASSERT(msp->ms_sm != NULL);
- mutex_enter(&spa->spa_flushed_ms_lock);
- avl_add(&spa->spa_metaslabs_by_flushed, msp);
- mutex_exit(&spa->spa_flushed_ms_lock);
-
- ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
- ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
- }
-
if (!range_tree_is_empty(msp->ms_checkpointing) &&
vd->vdev_checkpoint_sm == NULL) {
ASSERT(spa_has_checkpoint(spa));
@@ -4075,6 +4085,10 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
space_map_t *log_sm = spa_syncing_log_sm(spa);
if (log_sm != NULL) {
ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_LOG_SPACEMAP));
+ if (metaslab_unflushed_txg(msp) == 0)
+ metaslab_unflushed_add(msp, tx);
+ else if (!metaslab_unflushed_dirty(msp))
+ metaslab_unflushed_bump(msp, tx, B_TRUE);
space_map_write(log_sm, alloctree, SM_ALLOC,
vd->vdev_id, tx);
@@ -6137,6 +6151,12 @@ metaslab_enable(metaslab_t *msp, boolean_t sync, boolean_t unload)
mutex_exit(&mg->mg_ms_disabled_lock);
}
+void
+metaslab_set_unflushed_dirty(metaslab_t *ms, boolean_t dirty)
+{
+ ms->ms_unflushed_dirty = dirty;
+}
+
static void
metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx)
{
@@ -6173,15 +6193,16 @@ metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx)
void
metaslab_set_unflushed_txg(metaslab_t *ms, uint64_t txg, dmu_tx_t *tx)
{
- spa_t *spa = ms->ms_group->mg_vd->vdev_spa;
-
- if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
- return;
-
ms->ms_unflushed_txg = txg;
metaslab_update_ondisk_flush_data(ms, tx);
}
+boolean_t
+metaslab_unflushed_dirty(metaslab_t *ms)
+{
+ return (ms->ms_unflushed_dirty);
+}
+
uint64_t
metaslab_unflushed_txg(metaslab_t *ms)
{
diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c
index 67910f9ffde0..a1a5f7985b5a 100644
--- a/sys/contrib/openzfs/module/zfs/range_tree.c
+++ b/sys/contrib/openzfs/module/zfs/range_tree.c
@@ -188,10 +188,8 @@ range_tree_seg_gap_compare(const void *x1, const void *x2)
}
range_tree_t *
-range_tree_create_impl(range_tree_ops_t *ops, range_seg_type_t type, void *arg,
- uint64_t start, uint64_t shift,
- int (*zfs_btree_compare) (const void *, const void *),
- uint64_t gap)
+range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
+ void *arg, uint64_t start, uint64_t shift, uint64_t gap)
{
range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
@@ -223,7 +221,6 @@ range_tree_create_impl(range_tree_ops_t *ops, range_seg_type_t type, void *arg,
rt->rt_type = type;
rt->rt_start = start;
rt->rt_shift = shift;
- rt->rt_btree_compare = zfs_btree_compare;
if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL)
rt->rt_ops->rtop_create(rt, rt->rt_arg);
@@ -232,10 +229,10 @@ range_tree_create_impl(range_tree_ops_t *ops, range_seg_type_t type, void *arg,
}
range_tree_t *
-range_tree_create(range_tree_ops_t *ops, range_seg_type_t type,
+range_tree_create(const range_tree_ops_t *ops, range_seg_type_t type,
void *arg, uint64_t start, uint64_t shift)
{
- return (range_tree_create_impl(ops, type, arg, start, shift, NULL, 0));
+ return (range_tree_create_gap(ops, type, arg, start, shift, 0));
}
void
@@ -741,74 +738,6 @@ range_tree_is_empty(range_tree_t *rt)
return (range_tree_space(rt) == 0);
}
-void
-rt_btree_create(range_tree_t *rt, void *arg)
-{
- zfs_btree_t *size_tree = arg;
-
- size_t size;
- switch (rt->rt_type) {
- case RANGE_SEG32:
- size = sizeof (range_seg32_t);
- break;
- case RANGE_SEG64:
- size = sizeof (range_seg64_t);
- break;
- case RANGE_SEG_GAP:
- size = sizeof (range_seg_gap_t);
- break;
- default:
- panic("Invalid range seg type %d", rt->rt_type);
- }
- zfs_btree_create(size_tree, rt->rt_btree_compare, size);
-}
-
-void
-rt_btree_destroy(range_tree_t *rt, void *arg)
-{
- (void) rt;
- zfs_btree_t *size_tree = arg;
- ASSERT0(zfs_btree_numnodes(size_tree));
-
- zfs_btree_destroy(size_tree);
-}
-
-void
-rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg)
-{
- (void) rt;
- zfs_btree_t *size_tree = arg;
-
- zfs_btree_add(size_tree, rs);
-}
-
-void
-rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
-{
- (void) rt;
- zfs_btree_t *size_tree = arg;
-
- zfs_btree_remove(size_tree, rs);
-}
-
-void
-rt_btree_vacate(range_tree_t *rt, void *arg)
-{
- zfs_btree_t *size_tree = arg;
- zfs_btree_clear(size_tree);
- zfs_btree_destroy(size_tree);
-
- rt_btree_create(rt, arg);
-}
-
-range_tree_ops_t rt_btree_ops = {
- .rtop_create = rt_btree_create,
- .rtop_destroy = rt_btree_destroy,
- .rtop_add = rt_btree_add,
- .rtop_remove = rt_btree_remove,
- .rtop_vacate = rt_btree_vacate
-};
-
/*
* Remove any overlapping ranges between the given segment [start, end)
* from removefrom. Add non-overlapping leftovers to addto.
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index dfa73483accf..aa65fb658113 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -283,15 +283,15 @@ spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
const char *propname = zpool_prop_to_name(prop);
nvlist_t *propval;
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
+ propval = fnvlist_alloc();
+ fnvlist_add_uint64(propval, ZPROP_SOURCE, src);
if (strval != NULL)
- VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
+ fnvlist_add_string(propval, ZPROP_VALUE, strval);
else
- VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
+ fnvlist_add_uint64(propval, ZPROP_VALUE, intval);
- VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
+ fnvlist_add_nvlist(nvl, propname, propval);
nvlist_free(propval);
}
@@ -1787,8 +1787,8 @@ spa_load_spares(spa_t *spa)
if (spa->spa_spares.sav_config == NULL)
nspares = 0;
else
- VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
- ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+ VERIFY0(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
+ ZPOOL_CONFIG_SPARES, &spares, &nspares));
spa->spa_spares.sav_count = (int)nspares;
spa->spa_spares.sav_vdevs = NULL;
@@ -1850,16 +1850,15 @@ spa_load_spares(spa_t *spa)
* Recompute the stashed list of spares, with status information
* this time.
*/
- VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
- DATA_TYPE_NVLIST_ARRAY) == 0);
+ fnvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES);
spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
KM_SLEEP);
for (i = 0; i < spa->spa_spares.sav_count; i++)
spares[i] = vdev_config_generate(spa,
spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE);
- VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
- ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
+ fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
+ ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count);
for (i = 0; i < spa->spa_spares.sav_count; i++)
nvlist_free(spares[i]);
kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
@@ -1909,16 +1908,15 @@ spa_load_l2cache(spa_t *spa)
goto out;
}
- VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
- ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+ VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config,
+ ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache));
newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
/*
* Process new nvlist of vdevs.
*/
for (i = 0; i < nl2cache; i++) {
- VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
- &guid) == 0);
+ guid = fnvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID);
newvdevs[i] = NULL;
for (j = 0; j < oldnvdevs; j++) {
@@ -1979,8 +1977,7 @@ spa_load_l2cache(spa_t *spa)
* Recompute the stashed list of l2cache devices, with status
* information this time.
*/
- VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
- DATA_TYPE_NVLIST_ARRAY) == 0);
+ fnvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE);
if (sav->sav_count > 0)
l2cache = kmem_alloc(sav->sav_count * sizeof (void *),
@@ -1988,8 +1985,8 @@ spa_load_l2cache(spa_t *spa)
for (i = 0; i < sav->sav_count; i++)
l2cache[i] = vdev_config_generate(spa,
sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
- VERIFY(nvlist_add_nvlist_array(sav->sav_config,
- ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
+ fnvlist_add_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE, l2cache,
+ sav->sav_count);
out:
/*
@@ -2099,7 +2096,7 @@ spa_check_for_missing_logs(spa_t *spa)
child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t *),
KM_SLEEP);
- VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ nv = fnvlist_alloc();
for (uint64_t c = 0; c < rvd->vdev_children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
@@ -2259,6 +2256,7 @@ spa_claim_notify(zio_t *zio)
}
typedef struct spa_load_error {
+ boolean_t sle_verify_data;
uint64_t sle_meta_count;
uint64_t sle_data_count;
} spa_load_error_t;
@@ -2299,6 +2297,9 @@ static int
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
+ zio_t *rio = arg;
+ spa_load_error_t *sle = rio->io_private;
+
(void) zilog, (void) dnp;
if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
@@ -2311,12 +2312,12 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
*/
if (!spa_load_verify_metadata)
return (0);
- if (!BP_IS_METADATA(bp) && !spa_load_verify_data)
+ if (!BP_IS_METADATA(bp) &&
+ (!spa_load_verify_data || !sle->sle_verify_data))
return (0);
uint64_t maxinflight_bytes =
arc_target_bytes() >> spa_load_verify_shift;
- zio_t *rio = arg;
size_t size = BP_GET_PSIZE(bp);
mutex_enter(&spa->spa_scrub_lock);
@@ -2354,7 +2355,8 @@ spa_load_verify(spa_t *spa)
zpool_get_load_policy(spa->spa_config, &policy);
- if (policy.zlp_rewind & ZPOOL_NEVER_REWIND)
+ if (policy.zlp_rewind & ZPOOL_NEVER_REWIND ||
+ policy.zlp_maxmeta == UINT64_MAX)
return (0);
dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
@@ -2365,6 +2367,13 @@ spa_load_verify(spa_t *spa)
if (error != 0)
return (error);
+ /*
+ * Verify data only if we are rewinding or error limit was set.
+ * Otherwise nothing except dbgmsg care about it to waste time.
+ */
+ sle.sle_verify_data = (policy.zlp_rewind & ZPOOL_REWIND_MASK) ||
+ (policy.zlp_maxdata < UINT64_MAX);
+
rio = zio_root(spa, NULL, &sle,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
@@ -2404,12 +2413,14 @@ spa_load_verify(spa_t *spa)
spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts;
- VERIFY(nvlist_add_uint64(spa->spa_load_info,
- ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0);
- VERIFY(nvlist_add_int64(spa->spa_load_info,
- ZPOOL_CONFIG_REWIND_TIME, loss) == 0);
- VERIFY(nvlist_add_uint64(spa->spa_load_info,
- ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0);
+ fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_LOAD_TIME,
+ spa->spa_load_txg_ts);
+ fnvlist_add_int64(spa->spa_load_info, ZPOOL_CONFIG_REWIND_TIME,
+ loss);
+ fnvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_LOAD_META_ERRORS, sle.sle_meta_count);
+ fnvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count);
} else {
spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
}
@@ -3662,7 +3673,7 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
* from the label.
*/
nvlist_free(spa->spa_label_features);
- VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
+ spa->spa_label_features = fnvlist_dup(features);
}
nvlist_free(label);
@@ -3675,21 +3686,20 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
if (ub->ub_version >= SPA_VERSION_FEATURES) {
nvlist_t *unsup_feat;
- VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
- 0);
+ unsup_feat = fnvlist_alloc();
for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
NULL); nvp != NULL;
nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
if (!zfeature_is_supported(nvpair_name(nvp))) {
- VERIFY(nvlist_add_string(unsup_feat,
- nvpair_name(nvp), "") == 0);
+ fnvlist_add_string(unsup_feat,
+ nvpair_name(nvp), "");
}
}
if (!nvlist_empty(unsup_feat)) {
- VERIFY(nvlist_add_nvlist(spa->spa_load_info,
- ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
+ fnvlist_add_nvlist(spa->spa_load_info,
+ ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat);
nvlist_free(unsup_feat);
spa_load_failed(spa, "some features are unsupported");
return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
@@ -4335,7 +4345,7 @@ spa_ld_load_vdev_metadata(spa_t *spa)
error = spa_ld_log_spacemaps(spa);
if (error != 0) {
- spa_load_failed(spa, "spa_ld_log_sm_data failed [error=%d]",
+ spa_load_failed(spa, "spa_ld_log_spacemaps failed [error=%d]",
error);
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
}
@@ -5196,11 +5206,10 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
* attempted vdev_open(). Return this to the user.
*/
if (config != NULL && spa->spa_config) {
- VERIFY(nvlist_dup(spa->spa_config, config,
- KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist(*config,
+ *config = fnvlist_dup(spa->spa_config);
+ fnvlist_add_nvlist(*config,
ZPOOL_CONFIG_LOAD_INFO,
- spa->spa_load_info) == 0);
+ spa->spa_load_info);
}
spa_unload(spa);
spa_deactivate(spa);
@@ -5222,8 +5231,8 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
* gathered while doing the load.
*/
if (state == SPA_LOAD_RECOVER) {
- VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
- spa->spa_load_info) == 0);
+ fnvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
+ spa->spa_load_info);
}
if (locked) {
@@ -5301,15 +5310,14 @@ spa_add_spares(spa_t *spa, nvlist_t *config)
if (spa->spa_spares.sav_count == 0)
return;
- VERIFY(nvlist_lookup_nvlist(config,
- ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
- VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
- ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+ nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
+ VERIFY0(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
+ ZPOOL_CONFIG_SPARES, &spares, &nspares));
if (nspares != 0) {
- VERIFY(nvlist_add_nvlist_array(nvroot,
- ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
- VERIFY(nvlist_lookup_nvlist_array(nvroot,
- ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+ fnvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, spares,
+ nspares);
+ VERIFY0(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares));
/*
* Go through and find any spares which have since been
@@ -5317,13 +5325,13 @@ spa_add_spares(spa_t *spa, nvlist_t *config)
* their status appropriately.
*/
for (i = 0; i < nspares; i++) {
- VERIFY(nvlist_lookup_uint64(spares[i],
- ZPOOL_CONFIG_GUID, &guid) == 0);
+ guid = fnvlist_lookup_uint64(spares[i],
+ ZPOOL_CONFIG_GUID);
if (spa_spare_exists(guid, &pool, NULL) &&
pool != 0ULL) {
- VERIFY(nvlist_lookup_uint64_array(
- spares[i], ZPOOL_CONFIG_VDEV_STATS,
- (uint64_t **)&vs, &vsc) == 0);
+ VERIFY0(nvlist_lookup_uint64_array(spares[i],
+ ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs,
+ &vsc));
vs->vs_state = VDEV_STATE_CANT_OPEN;
vs->vs_aux = VDEV_AUX_SPARED;
}
@@ -5350,23 +5358,22 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
if (spa->spa_l2cache.sav_count == 0)
return;
- VERIFY(nvlist_lookup_nvlist(config,
- ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
- VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
- ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+ nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
+ VERIFY0(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
+ ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache));
if (nl2cache != 0) {
- VERIFY(nvlist_add_nvlist_array(nvroot,
- ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
- VERIFY(nvlist_lookup_nvlist_array(nvroot,
- ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+ fnvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, l2cache,
+ nl2cache);
+ VERIFY0(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+ &l2cache, &nl2cache));
/*
* Update level 2 cache device stats.
*/
for (i = 0; i < nl2cache; i++) {
- VERIFY(nvlist_lookup_uint64(l2cache[i],
- ZPOOL_CONFIG_GUID, &guid) == 0);
+ guid = fnvlist_lookup_uint64(l2cache[i],
+ ZPOOL_CONFIG_GUID);
vd = NULL;
for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
@@ -5378,9 +5385,8 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
}
ASSERT(vd != NULL);
- VERIFY(nvlist_lookup_uint64_array(l2cache[i],
- ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
- == 0);
+ VERIFY0(nvlist_lookup_uint64_array(l2cache[i],
+ ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc));
vdev_get_stats(vd, vs);
vdev_config_generate_stats(vd, l2cache[i]);
@@ -5495,20 +5501,20 @@ spa_get_stats(const char *name, nvlist_t **config,
loadtimes[0] = spa->spa_loaded_ts.tv_sec;
loadtimes[1] = spa->spa_loaded_ts.tv_nsec;
- VERIFY(nvlist_add_uint64_array(*config,
- ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0);
+ fnvlist_add_uint64_array(*config,
+ ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2);
- VERIFY(nvlist_add_uint64(*config,
+ fnvlist_add_uint64(*config,
ZPOOL_CONFIG_ERRCOUNT,
- spa_get_errlog_size(spa)) == 0);
+ spa_get_errlog_size(spa));
if (spa_suspended(spa)) {
- VERIFY(nvlist_add_uint64(*config,
+ fnvlist_add_uint64(*config,
ZPOOL_CONFIG_SUSPENDED,
- spa->spa_failmode) == 0);
- VERIFY(nvlist_add_uint64(*config,
+ spa->spa_failmode);
+ fnvlist_add_uint64(*config,
ZPOOL_CONFIG_SUSPENDED_REASON,
- spa->spa_suspended) == 0);
+ spa->spa_suspended);
}
spa_add_spares(spa, *config);
@@ -5600,8 +5606,8 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
if ((error = vdev_open(vd)) == 0 &&
(error = vdev_label_init(vd, crtxg, label)) == 0) {
- VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
- vd->vdev_guid) == 0);
+ fnvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
+ vd->vdev_guid);
}
vdev_free(vd);
@@ -5652,23 +5658,20 @@ spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
* Generate new dev list by concatenating with the
* current dev list.
*/
- VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
- &olddevs, &oldndevs) == 0);
+ VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config, config,
+ &olddevs, &oldndevs));
newdevs = kmem_alloc(sizeof (void *) *
(ndevs + oldndevs), KM_SLEEP);
for (i = 0; i < oldndevs; i++)
- VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
- KM_SLEEP) == 0);
+ newdevs[i] = fnvlist_dup(olddevs[i]);
for (i = 0; i < ndevs; i++)
- VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
- KM_SLEEP) == 0);
+ newdevs[i + oldndevs] = fnvlist_dup(devs[i]);
- VERIFY(nvlist_remove(sav->sav_config, config,
- DATA_TYPE_NVLIST_ARRAY) == 0);
+ fnvlist_remove(sav->sav_config, config);
- VERIFY(nvlist_add_nvlist_array(sav->sav_config,
- config, newdevs, ndevs + oldndevs) == 0);
+ fnvlist_add_nvlist_array(sav->sav_config, config, newdevs,
+ ndevs + oldndevs);
for (i = 0; i < oldndevs + ndevs; i++)
nvlist_free(newdevs[i]);
kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
@@ -5676,10 +5679,8 @@ spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
/*
* Generate a new dev list.
*/
- VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
- KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
- devs, ndevs) == 0);
+ sav->sav_config = fnvlist_alloc();
+ fnvlist_add_nvlist_array(sav->sav_config, config, devs, ndevs);
}
}
@@ -5888,10 +5889,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
*/
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
&spares, &nspares) == 0) {
- VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
- KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
- ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+ spa->spa_spares.sav_config = fnvlist_alloc();
+ fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
+ ZPOOL_CONFIG_SPARES, spares, nspares);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
spa_load_spares(spa);
spa_config_exit(spa, SCL_ALL, FTAG);
@@ -5903,10 +5903,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
*/
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
&l2cache, &nl2cache) == 0) {
- VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
- ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
+ spa->spa_l2cache.sav_config = fnvlist_alloc();
+ fnvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
+ ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
spa_load_l2cache(spa);
spa_config_exit(spa, SCL_ALL, FTAG);
@@ -6107,8 +6106,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
* Propagate anything learned while loading the pool and pass it
* back to caller (i.e. rewind info, missing devices, etc).
*/
- VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
- spa->spa_load_info) == 0);
+ fnvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
/*
@@ -6126,8 +6124,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
spa_load_l2cache(spa);
}
- VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
+ nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
spa_config_exit(spa, SCL_ALL, FTAG);
if (props != NULL)
@@ -6151,13 +6148,12 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
&spares, &nspares) == 0) {
if (spa->spa_spares.sav_config)
- VERIFY(nvlist_remove(spa->spa_spares.sav_config,
- ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
+ fnvlist_remove(spa->spa_spares.sav_config,
+ ZPOOL_CONFIG_SPARES);
else
- VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
- ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+ spa->spa_spares.sav_config = fnvlist_alloc();
+ fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
+ ZPOOL_CONFIG_SPARES, spares, nspares);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
spa_load_spares(spa);
spa_config_exit(spa, SCL_ALL, FTAG);
@@ -6166,13 +6162,12 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
&l2cache, &nl2cache) == 0) {
if (spa->spa_l2cache.sav_config)
- VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
- ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
+ fnvlist_remove(spa->spa_l2cache.sav_config,
+ ZPOOL_CONFIG_L2CACHE);
else
- VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
- ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
+ spa->spa_l2cache.sav_config = fnvlist_alloc();
+ fnvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
+ ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
spa_load_l2cache(spa);
spa_config_exit(spa, SCL_ALL, FTAG);
@@ -6262,16 +6257,14 @@ spa_tryimport(nvlist_t *tryconfig)
*/
if (spa->spa_root_vdev != NULL) {
config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
- VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
- poolname) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- state) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
- spa->spa_uberblock.ub_timestamp) == 0);
- VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
- spa->spa_load_info) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
- spa->spa_errata) == 0);
+ fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, poolname);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, state);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
+ spa->spa_uberblock.ub_timestamp);
+ fnvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
+ spa->spa_load_info);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
+ spa->spa_errata);
/*
* If the bootfs property exists on this pool then we
@@ -6300,8 +6293,8 @@ spa_tryimport(nvlist_t *tryconfig)
(void) snprintf(dsname, MAXPATHLEN,
"%s/%s", poolname, ++cp);
}
- VERIFY(nvlist_add_string(config,
- ZPOOL_CONFIG_BOOTFS, dsname) == 0);
+ fnvlist_add_string(config, ZPOOL_CONFIG_BOOTFS,
+ dsname);
kmem_free(dsname, MAXPATHLEN);
}
kmem_free(tmpname, MAXPATHLEN);
@@ -6468,7 +6461,7 @@ export_spa:
}
if (oldconfig && spa->spa_config)
- VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
+ *oldconfig = fnvlist_dup(spa->spa_config);
if (new_state != POOL_STATE_UNINITIALIZED) {
if (!hardforce)
@@ -7611,14 +7604,14 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
}
/* we need certain info from the top level */
- VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
- vml[c]->vdev_top->vdev_ms_array) == 0);
- VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
- vml[c]->vdev_top->vdev_ms_shift) == 0);
- VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
- vml[c]->vdev_top->vdev_asize) == 0);
- VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
- vml[c]->vdev_top->vdev_ashift) == 0);
+ fnvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
+ vml[c]->vdev_top->vdev_ms_array);
+ fnvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
+ vml[c]->vdev_top->vdev_ms_shift);
+ fnvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
+ vml[c]->vdev_top->vdev_asize);
+ fnvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
+ vml[c]->vdev_top->vdev_ashift);
/* transfer per-vdev ZAPs */
ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0);
@@ -7648,28 +7641,24 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
* Temporarily record the splitting vdevs in the spa config. This
* will disappear once the config is regenerated.
*/
- VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
- glist, children) == 0);
+ nvl = fnvlist_alloc();
+ fnvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, glist, children);
kmem_free(glist, children * sizeof (uint64_t));
mutex_enter(&spa->spa_props_lock);
- VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT,
- nvl) == 0);
+ fnvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, nvl);
mutex_exit(&spa->spa_props_lock);
spa->spa_config_splitting = nvl;
vdev_config_dirty(spa->spa_root_vdev);
/* configure and create the new pool */
- VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
- spa_version(spa)) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
- spa->spa_config_txg) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- spa_generate_guid(NULL)) == 0);
+ fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+ exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa));
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, spa->spa_config_txg);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+ spa_generate_guid(NULL));
VERIFY0(nvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS));
(void) nvlist_lookup_string(props,
zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
@@ -7731,10 +7720,9 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
/* if that worked, generate a real config for the new pool */
if (newspa->spa_root_vdev != NULL) {
- VERIFY(nvlist_alloc(&newspa->spa_config_splitting,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_uint64(newspa->spa_config_splitting,
- ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0);
+ newspa->spa_config_splitting = fnvlist_alloc();
+ fnvlist_add_uint64(newspa->spa_config_splitting,
+ ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa));
spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL,
B_TRUE));
}
@@ -8522,16 +8510,15 @@ spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
&sav->sav_object, tx) == 0);
}
- VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ nvroot = fnvlist_alloc();
if (sav->sav_count == 0) {
- VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
+ fnvlist_add_nvlist_array(nvroot, config, NULL, 0);
} else {
list = kmem_alloc(sav->sav_count*sizeof (void *), KM_SLEEP);
for (i = 0; i < sav->sav_count; i++)
list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
B_FALSE, VDEV_CONFIG_L2CACHE);
- VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
- sav->sav_count) == 0);
+ fnvlist_add_nvlist_array(nvroot, config, list, sav->sav_count);
for (i = 0; i < sav->sav_count; i++)
nvlist_free(list[i]);
kmem_free(list, sav->sav_count * sizeof (void *));
diff --git a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
index 8e14d9a833cd..0dfe5b8395e0 100644
--- a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
+++ b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
@@ -257,7 +257,12 @@ unsigned long zfs_unflushed_log_block_min = 1000;
* terms of performance. Thus we have a hard limit in the size of the log in
* terms of blocks.
*/
-unsigned long zfs_unflushed_log_block_max = (1ULL << 18);
+static unsigned long zfs_unflushed_log_block_max = (1ULL << 17);
+
+/*
+ * Also we have a hard limit in the size of the log in terms of dirty TXGs.
+ */
+static unsigned long zfs_unflushed_log_txg_max = 1000;
/*
* Max # of rows allowed for the log_summary. The tradeoff here is accuracy and
@@ -333,9 +338,13 @@ spa_log_sm_set_blocklimit(spa_t *spa)
return;
}
- uint64_t calculated_limit =
- (spa_total_metaslabs(spa) * zfs_unflushed_log_block_pct) / 100;
- spa->spa_unflushed_stats.sus_blocklimit = MIN(MAX(calculated_limit,
+ uint64_t msdcount = 0;
+ for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ e; e = list_next(&spa->spa_log_summary, e))
+ msdcount += e->lse_msdcount;
+
+ uint64_t limit = msdcount * zfs_unflushed_log_block_pct / 100;
+ spa->spa_unflushed_stats.sus_blocklimit = MIN(MAX(limit,
zfs_unflushed_log_block_min), zfs_unflushed_log_block_max);
}
@@ -380,8 +389,13 @@ spa_log_summary_verify_counts(spa_t *spa)
}
static boolean_t
-summary_entry_is_full(spa_t *spa, log_summary_entry_t *e)
+summary_entry_is_full(spa_t *spa, log_summary_entry_t *e, uint64_t txg)
{
+ if (e->lse_end == txg)
+ return (0);
+ if (e->lse_txgcount >= DIV_ROUND_UP(zfs_unflushed_log_txg_max,
+ zfs_max_logsm_summary_length))
+ return (1);
uint64_t blocks_per_row = MAX(1,
DIV_ROUND_UP(spa_log_sm_blocklimit(spa),
zfs_max_logsm_summary_length));
@@ -401,7 +415,7 @@ summary_entry_is_full(spa_t *spa, log_summary_entry_t *e)
* the metaslab.
*/
void
-spa_log_summary_decrement_mscount(spa_t *spa, uint64_t txg)
+spa_log_summary_decrement_mscount(spa_t *spa, uint64_t txg, boolean_t dirty)
{
/*
* We don't track summary data for read-only pools and this function
@@ -429,6 +443,8 @@ spa_log_summary_decrement_mscount(spa_t *spa, uint64_t txg)
}
target->lse_mscount--;
+ if (dirty)
+ target->lse_msdcount--;
}
/*
@@ -490,8 +506,10 @@ spa_log_summary_decrement_mscount(spa_t *spa, uint64_t txg)
void
spa_log_summary_decrement_blkcount(spa_t *spa, uint64_t blocks_gone)
{
- for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
- e != NULL; e = list_head(&spa->spa_log_summary)) {
+ log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ if (e->lse_txgcount > 0)
+ e->lse_txgcount--;
+ for (; e != NULL; e = list_head(&spa->spa_log_summary)) {
if (e->lse_blkcount > blocks_gone) {
e->lse_blkcount -= blocks_gone;
blocks_gone = 0;
@@ -554,31 +572,52 @@ spa_log_sm_increment_current_mscount(spa_t *spa)
static void
summary_add_data(spa_t *spa, uint64_t txg, uint64_t metaslabs_flushed,
- uint64_t nblocks)
+ uint64_t metaslabs_dirty, uint64_t nblocks)
{
log_summary_entry_t *e = list_tail(&spa->spa_log_summary);
- if (e == NULL || summary_entry_is_full(spa, e)) {
+ if (e == NULL || summary_entry_is_full(spa, e, txg)) {
e = kmem_zalloc(sizeof (log_summary_entry_t), KM_SLEEP);
- e->lse_start = txg;
+ e->lse_start = e->lse_end = txg;
+ e->lse_txgcount = 1;
list_insert_tail(&spa->spa_log_summary, e);
}
ASSERT3U(e->lse_start, <=, txg);
+ if (e->lse_end < txg) {
+ e->lse_end = txg;
+ e->lse_txgcount++;
+ }
e->lse_mscount += metaslabs_flushed;
+ e->lse_msdcount += metaslabs_dirty;
e->lse_blkcount += nblocks;
}
static void
spa_log_summary_add_incoming_blocks(spa_t *spa, uint64_t nblocks)
{
- summary_add_data(spa, spa_syncing_txg(spa), 0, nblocks);
+ summary_add_data(spa, spa_syncing_txg(spa), 0, 0, nblocks);
}
void
-spa_log_summary_add_flushed_metaslab(spa_t *spa)
+spa_log_summary_add_flushed_metaslab(spa_t *spa, boolean_t dirty)
{
- summary_add_data(spa, spa_syncing_txg(spa), 1, 0);
+ summary_add_data(spa, spa_syncing_txg(spa), 1, dirty ? 1 : 0, 0);
+}
+
+void
+spa_log_summary_dirty_flushed_metaslab(spa_t *spa, uint64_t txg)
+{
+ log_summary_entry_t *target = NULL;
+ for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ e != NULL; e = list_next(&spa->spa_log_summary, e)) {
+ if (e->lse_start > txg)
+ break;
+ target = e;
+ }
+ ASSERT3P(target, !=, NULL);
+ ASSERT3U(target->lse_mscount, !=, 0);
+ target->lse_msdcount++;
}
/*
@@ -624,6 +663,11 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
int64_t available_blocks =
spa_log_sm_blocklimit(spa) - spa_log_sm_nblocks(spa) - incoming;
+ int64_t available_txgs = zfs_unflushed_log_txg_max;
+ for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+ e; e = list_next(&spa->spa_log_summary, e))
+ available_txgs -= e->lse_txgcount;
+
/*
* This variable tells us the total number of flushes needed to
* keep the log size within the limit when we reach txgs_in_future.
@@ -631,9 +675,7 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
uint64_t total_flushes = 0;
/* Holds the current maximum of our estimates so far. */
- uint64_t max_flushes_pertxg =
- MIN(avl_numnodes(&spa->spa_metaslabs_by_flushed),
- zfs_min_metaslabs_to_flush);
+ uint64_t max_flushes_pertxg = zfs_min_metaslabs_to_flush;
/*
* For our estimations we only look as far in the future
@@ -647,11 +689,14 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
* then keep skipping TXGs accumulating more blocks
* based on the incoming rate until we exceed it.
*/
- if (available_blocks >= 0) {
- uint64_t skip_txgs = (available_blocks / incoming) + 1;
+ if (available_blocks >= 0 && available_txgs >= 0) {
+ uint64_t skip_txgs = MIN(available_txgs + 1,
+ (available_blocks / incoming) + 1);
available_blocks -= (skip_txgs * incoming);
+ available_txgs -= skip_txgs;
txgs_in_future += skip_txgs;
ASSERT3S(available_blocks, >=, -incoming);
+ ASSERT3S(available_txgs, >=, -1);
}
/*
@@ -660,9 +705,10 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
* based on the current entry in the summary, updating
* our available_blocks.
*/
- ASSERT3S(available_blocks, <, 0);
+ ASSERT(available_blocks < 0 || available_txgs < 0);
available_blocks += e->lse_blkcount;
- total_flushes += e->lse_mscount;
+ available_txgs += e->lse_txgcount;
+ total_flushes += e->lse_msdcount;
/*
* Keep the running maximum of the total_flushes that
@@ -674,8 +720,6 @@ spa_estimate_metaslabs_to_flush(spa_t *spa)
*/
max_flushes_pertxg = MAX(max_flushes_pertxg,
DIV_ROUND_UP(total_flushes, txgs_in_future));
- ASSERT3U(avl_numnodes(&spa->spa_metaslabs_by_flushed), >=,
- max_flushes_pertxg);
}
return (max_flushes_pertxg);
}
@@ -765,14 +809,11 @@ spa_flush_metaslabs(spa_t *spa, dmu_tx_t *tx)
uint64_t want_to_flush;
if (spa_flush_all_logs_requested(spa)) {
ASSERT3S(spa_state(spa), ==, POOL_STATE_EXPORTED);
- want_to_flush = avl_numnodes(&spa->spa_metaslabs_by_flushed);
+ want_to_flush = UINT64_MAX;
} else {
want_to_flush = spa_estimate_metaslabs_to_flush(spa);
}
- ASSERT3U(avl_numnodes(&spa->spa_metaslabs_by_flushed), >=,
- want_to_flush);
-
/* Used purely for verification purposes */
uint64_t visited = 0;
@@ -803,31 +844,22 @@ spa_flush_metaslabs(spa_t *spa, dmu_tx_t *tx)
if (want_to_flush == 0 && !spa_log_exceeds_memlimit(spa))
break;
- mutex_enter(&curr->ms_sync_lock);
- mutex_enter(&curr->ms_lock);
- boolean_t flushed = metaslab_flush(curr, tx);
- mutex_exit(&curr->ms_lock);
- mutex_exit(&curr->ms_sync_lock);
-
- /*
- * If we failed to flush a metaslab (because it was loading),
- * then we are done with the block heuristic as it's not
- * possible to destroy any log space maps once you've skipped
- * a metaslab. In that case we just set our counter to 0 but
- * we continue looping in case there is still memory pressure
- * due to unflushed changes. Note that, flushing a metaslab
- * that is not the oldest flushed in the pool, will never
- * destroy any log space maps [see spa_cleanup_old_sm_logs()].
- */
- if (!flushed) {
- want_to_flush = 0;
- } else if (want_to_flush > 0) {
- want_to_flush--;
- }
+ if (metaslab_unflushed_dirty(curr)) {
+ mutex_enter(&curr->ms_sync_lock);
+ mutex_enter(&curr->ms_lock);
+ metaslab_flush(curr, tx);
+ mutex_exit(&curr->ms_lock);
+ mutex_exit(&curr->ms_sync_lock);
+ if (want_to_flush > 0)
+ want_to_flush--;
+ } else
+ metaslab_unflushed_bump(curr, tx, B_FALSE);
visited++;
}
ASSERT3U(avl_numnodes(&spa->spa_metaslabs_by_flushed), >=, visited);
+
+ spa_log_sm_set_blocklimit(spa);
}
/*
@@ -898,6 +930,7 @@ spa_cleanup_old_sm_logs(spa_t *spa, dmu_tx_t *tx)
avl_remove(&spa->spa_sm_logs_by_txg, sls);
space_map_free_obj(mos, sls->sls_sm_obj, tx);
VERIFY0(zap_remove_int(mos, spacemap_zap, sls->sls_txg, tx));
+ spa_log_summary_decrement_blkcount(spa, sls->sls_nblocks);
spa->spa_unflushed_stats.sus_nblocks -= sls->sls_nblocks;
kmem_free(sls, sizeof (spa_log_sm_t));
}
@@ -957,12 +990,7 @@ spa_generate_syncing_log_sm(spa_t *spa, dmu_tx_t *tx)
VERIFY0(space_map_open(&spa->spa_syncing_log_sm, mos, sm_obj,
0, UINT64_MAX, SPA_MINBLOCKSHIFT));
- /*
- * If the log space map feature was just enabled, the blocklimit
- * has not yet been set.
- */
- if (spa_log_sm_blocklimit(spa) == 0)
- spa_log_sm_set_blocklimit(spa);
+ spa_log_sm_set_blocklimit(spa);
}
/*
@@ -1088,12 +1116,18 @@ spa_ld_log_sm_cb(space_map_entry_t *sme, void *arg)
panic("invalid maptype_t");
break;
}
+ if (!metaslab_unflushed_dirty(ms)) {
+ metaslab_set_unflushed_dirty(ms, B_TRUE);
+ spa_log_summary_dirty_flushed_metaslab(spa,
+ metaslab_unflushed_txg(ms));
+ }
return (0);
}
static int
spa_ld_log_sm_data(spa_t *spa)
{
+ spa_log_sm_t *sls, *psls;
int error = 0;
/*
@@ -1107,41 +1141,71 @@ spa_ld_log_sm_data(spa_t *spa)
ASSERT0(spa->spa_unflushed_stats.sus_memused);
hrtime_t read_logs_starttime = gethrtime();
- /* this is a no-op when we don't have space map logs */
- for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
- sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
- space_map_t *sm = NULL;
- error = space_map_open(&sm, spa_meta_objset(spa),
- sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT);
- if (error != 0) {
- spa_load_failed(spa, "spa_ld_log_sm_data(): failed at "
- "space_map_open(obj=%llu) [error %d]",
- (u_longlong_t)sls->sls_sm_obj, error);
- goto out;
+
+ /* Prefetch log spacemaps dnodes. */
+ for (sls = avl_first(&spa->spa_sm_logs_by_txg); sls;
+ sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+ dmu_prefetch(spa_meta_objset(spa), sls->sls_sm_obj,
+ 0, 0, 0, ZIO_PRIORITY_SYNC_READ);
+ }
+
+ uint_t pn = 0;
+ uint64_t ps = 0;
+ psls = sls = avl_first(&spa->spa_sm_logs_by_txg);
+ while (sls != NULL) {
+ /* Prefetch log spacemaps up to 16 TXGs or MBs ahead. */
+ if (psls != NULL && pn < 16 &&
+ (pn < 2 || ps < 2 * dmu_prefetch_max)) {
+ error = space_map_open(&psls->sls_sm,
+ spa_meta_objset(spa), psls->sls_sm_obj, 0,
+ UINT64_MAX, SPA_MINBLOCKSHIFT);
+ if (error != 0) {
+ spa_load_failed(spa, "spa_ld_log_sm_data(): "
+ "failed at space_map_open(obj=%llu) "
+ "[error %d]",
+ (u_longlong_t)sls->sls_sm_obj, error);
+ goto out;
+ }
+ dmu_prefetch(spa_meta_objset(spa), psls->sls_sm_obj,
+ 0, 0, space_map_length(psls->sls_sm),
+ ZIO_PRIORITY_ASYNC_READ);
+ pn++;
+ ps += space_map_length(psls->sls_sm);
+ psls = AVL_NEXT(&spa->spa_sm_logs_by_txg, psls);
+ continue;
}
+ /* Load TXG log spacemap into ms_unflushed_allocs/frees. */
+ cond_resched();
+ ASSERT0(sls->sls_nblocks);
+ sls->sls_nblocks = space_map_nblocks(sls->sls_sm);
+ spa->spa_unflushed_stats.sus_nblocks += sls->sls_nblocks;
+ summary_add_data(spa, sls->sls_txg,
+ sls->sls_mscount, 0, sls->sls_nblocks);
+
struct spa_ld_log_sm_arg vla = {
.slls_spa = spa,
.slls_txg = sls->sls_txg
};
- error = space_map_iterate(sm, space_map_length(sm),
- spa_ld_log_sm_cb, &vla);
+ error = space_map_iterate(sls->sls_sm,
+ space_map_length(sls->sls_sm), spa_ld_log_sm_cb, &vla);
if (error != 0) {
- space_map_close(sm);
spa_load_failed(spa, "spa_ld_log_sm_data(): failed "
"at space_map_iterate(obj=%llu) [error %d]",
(u_longlong_t)sls->sls_sm_obj, error);
goto out;
}
- ASSERT0(sls->sls_nblocks);
- sls->sls_nblocks = space_map_nblocks(sm);
- spa->spa_unflushed_stats.sus_nblocks += sls->sls_nblocks;
- summary_add_data(spa, sls->sls_txg,
- sls->sls_mscount, sls->sls_nblocks);
+ pn--;
+ ps -= space_map_length(sls->sls_sm);
+ space_map_close(sls->sls_sm);
+ sls->sls_sm = NULL;
+ sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls);
- space_map_close(sm);
+ /* Update log block limits considering just loaded. */
+ spa_log_sm_set_blocklimit(spa);
}
+
hrtime_t read_logs_endtime = gethrtime();
spa_load_note(spa,
"read %llu log space maps (%llu total blocks - blksz = %llu bytes) "
@@ -1151,6 +1215,18 @@ spa_ld_log_sm_data(spa_t *spa)
(longlong_t)((read_logs_endtime - read_logs_starttime) / 1000000));
out:
+ if (error != 0) {
+ for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
+ sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
+ if (sls->sls_sm) {
+ space_map_close(sls->sls_sm);
+ sls->sls_sm = NULL;
+ }
+ }
+ } else {
+ ASSERT0(pn);
+ ASSERT0(ps);
+ }
/*
* Now that the metaslabs contain their unflushed changes:
* [1] recalculate their actual allocated space
@@ -1231,6 +1307,9 @@ spa_ld_unflushed_txgs(vdev_t *vd)
}
ms->ms_unflushed_txg = entry.msp_unflushed_txg;
+ ms->ms_unflushed_dirty = B_FALSE;
+ ASSERT(range_tree_is_empty(ms->ms_unflushed_allocs));
+ ASSERT(range_tree_is_empty(ms->ms_unflushed_frees));
if (ms->ms_unflushed_txg != 0) {
mutex_enter(&spa->spa_flushed_ms_lock);
avl_add(&spa->spa_metaslabs_by_flushed, ms);
@@ -1294,6 +1373,10 @@ ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_min, ULONG, ZMOD_RW,
"Lower-bound limit for the maximum amount of blocks allowed in "
"log spacemap (see zfs_unflushed_log_block_max)");
+ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_txg_max, ULONG, ZMOD_RW,
+ "Hard limit (upper-bound) in the size of the space map log "
+ "in terms of dirty TXGs.");
+
ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_pct, ULONG, ZMOD_RW,
"Tunable used to determine the number of blocks that can be used for "
"the spacemap log, expressed as a percentage of the total number of "
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index bcf6360c25c3..1c93e7487dda 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -444,9 +444,9 @@ spa_config_lock_init(spa_t *spa)
spa_config_lock_t *scl = &spa->spa_config_lock[i];
mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
- zfs_refcount_create_untracked(&scl->scl_count);
scl->scl_writer = NULL;
scl->scl_write_wanted = 0;
+ scl->scl_count = 0;
}
}
@@ -457,9 +457,9 @@ spa_config_lock_destroy(spa_t *spa)
spa_config_lock_t *scl = &spa->spa_config_lock[i];
mutex_destroy(&scl->scl_lock);
cv_destroy(&scl->scl_cv);
- zfs_refcount_destroy(&scl->scl_count);
ASSERT(scl->scl_writer == NULL);
ASSERT(scl->scl_write_wanted == 0);
+ ASSERT(scl->scl_count == 0);
}
}
@@ -480,7 +480,7 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
}
} else {
ASSERT(scl->scl_writer != curthread);
- if (!zfs_refcount_is_zero(&scl->scl_count)) {
+ if (scl->scl_count != 0) {
mutex_exit(&scl->scl_lock);
spa_config_exit(spa, locks & ((1 << i) - 1),
tag);
@@ -488,7 +488,7 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
}
scl->scl_writer = curthread;
}
- (void) zfs_refcount_add(&scl->scl_count, tag);
+ scl->scl_count++;
mutex_exit(&scl->scl_lock);
}
return (1);
@@ -515,14 +515,14 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
}
} else {
ASSERT(scl->scl_writer != curthread);
- while (!zfs_refcount_is_zero(&scl->scl_count)) {
+ while (scl->scl_count != 0) {
scl->scl_write_wanted++;
cv_wait(&scl->scl_cv, &scl->scl_lock);
scl->scl_write_wanted--;
}
scl->scl_writer = curthread;
}
- (void) zfs_refcount_add(&scl->scl_count, tag);
+ scl->scl_count++;
mutex_exit(&scl->scl_lock);
}
ASSERT3U(wlocks_held, <=, locks);
@@ -537,8 +537,8 @@ spa_config_exit(spa_t *spa, int locks, const void *tag)
if (!(locks & (1 << i)))
continue;
mutex_enter(&scl->scl_lock);
- ASSERT(!zfs_refcount_is_zero(&scl->scl_count));
- if (zfs_refcount_remove(&scl->scl_count, tag) == 0) {
+ ASSERT(scl->scl_count > 0);
+ if (--scl->scl_count == 0) {
ASSERT(scl->scl_writer == NULL ||
scl->scl_writer == curthread);
scl->scl_writer = NULL; /* OK in either case */
@@ -557,8 +557,7 @@ spa_config_held(spa_t *spa, int locks, krw_t rw)
spa_config_lock_t *scl = &spa->spa_config_lock[i];
if (!(locks & (1 << i)))
continue;
- if ((rw == RW_READER &&
- !zfs_refcount_is_zero(&scl->scl_count)) ||
+ if ((rw == RW_READER && scl->scl_count != 0) ||
(rw == RW_WRITER && scl->scl_writer == curthread))
locks_held |= 1 << i;
}
diff --git a/sys/contrib/openzfs/module/zfs/txg.c b/sys/contrib/openzfs/module/zfs/txg.c
index c55b1d8f9601..c9eb84bbdb12 100644
--- a/sys/contrib/openzfs/module/zfs/txg.c
+++ b/sys/contrib/openzfs/module/zfs/txg.c
@@ -499,14 +499,6 @@ txg_wait_callbacks(dsl_pool_t *dp)
}
static boolean_t
-txg_is_syncing(dsl_pool_t *dp)
-{
- tx_state_t *tx = &dp->dp_tx;
- ASSERT(MUTEX_HELD(&tx->tx_sync_lock));
- return (tx->tx_syncing_txg != 0);
-}
-
-static boolean_t
txg_is_quiescing(dsl_pool_t *dp)
{
tx_state_t *tx = &dp->dp_tx;
@@ -539,8 +531,6 @@ txg_sync_thread(void *arg)
clock_t timeout = zfs_txg_timeout * hz;
clock_t timer;
uint64_t txg;
- uint64_t dirty_min_bytes =
- zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
/*
* We sync when we're scanning, there's someone waiting
@@ -551,8 +541,7 @@ txg_sync_thread(void *arg)
while (!dsl_scan_active(dp->dp_scan) &&
!tx->tx_exiting && timer > 0 &&
tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
- !txg_has_quiesced_to_sync(dp) &&
- dp->dp_dirty_total < dirty_min_bytes) {
+ !txg_has_quiesced_to_sync(dp)) {
dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
(u_longlong_t)tx->tx_synced_txg,
(u_longlong_t)tx->tx_sync_txg_waiting, dp);
@@ -566,6 +555,11 @@ txg_sync_thread(void *arg)
* prompting it to do so if necessary.
*/
while (!tx->tx_exiting && !txg_has_quiesced_to_sync(dp)) {
+ if (txg_is_quiescing(dp)) {
+ txg_thread_wait(tx, &cpr,
+ &tx->tx_quiesce_done_cv, 0);
+ continue;
+ }
if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1)
tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1;
cv_broadcast(&tx->tx_quiesce_more_cv);
@@ -791,24 +785,22 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg, boolean_t should_quiesce)
}
/*
- * If there isn't a txg syncing or in the pipeline, push another txg through
- * the pipeline by quiescing the open txg.
+ * Pass in the txg number that should be synced.
*/
void
-txg_kick(dsl_pool_t *dp)
+txg_kick(dsl_pool_t *dp, uint64_t txg)
{
tx_state_t *tx = &dp->dp_tx;
ASSERT(!dsl_pool_config_held(dp));
+ if (tx->tx_sync_txg_waiting >= txg)
+ return;
+
mutex_enter(&tx->tx_sync_lock);
- if (!txg_is_syncing(dp) &&
- !txg_is_quiescing(dp) &&
- tx->tx_quiesce_txg_waiting <= tx->tx_open_txg &&
- tx->tx_sync_txg_waiting <= tx->tx_synced_txg &&
- tx->tx_quiesced_txg <= tx->tx_synced_txg) {
- tx->tx_quiesce_txg_waiting = tx->tx_open_txg + 1;
- cv_broadcast(&tx->tx_quiesce_more_cv);
+ if (tx->tx_sync_txg_waiting < txg) {
+ tx->tx_sync_txg_waiting = txg;
+ cv_broadcast(&tx->tx_sync_more_cv);
}
mutex_exit(&tx->tx_sync_lock);
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index a323b90a26a2..67fb5bf8f17e 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -134,7 +134,15 @@ int zfs_vdev_standard_sm_blksz = (1 << 17);
*/
int zfs_nocacheflush = 0;
-uint64_t zfs_vdev_max_auto_ashift = ASHIFT_MAX;
+/*
+ * Maximum and minimum ashift values that can be automatically set based on
+ * vdev's physical ashift (disk's physical sector size). While ASHIFT_MAX
+ * is higher than the maximum value, it is intentionally limited here to not
+ * excessively impact pool space efficiency. Higher ashift values may still
+ * be forced by vdev logical ashift or by user via ashift property, but won't
+ * be set automatically as a performance optimization.
+ */
+uint64_t zfs_vdev_max_auto_ashift = 14;
uint64_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
/*PRINTFLIKE2*/
@@ -1513,13 +1521,6 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
if (txg == 0)
spa_config_exit(spa, SCL_ALLOC, FTAG);
- /*
- * Regardless whether this vdev was just added or it is being
- * expanded, the metaslab count has changed. Recalculate the
- * block limit.
- */
- spa_log_sm_set_blocklimit(spa);
-
return (0);
}
@@ -1843,6 +1844,24 @@ vdev_set_deflate_ratio(vdev_t *vd)
}
/*
+ * Choose the best of two ashifts, preferring one between logical ashift
+ * (absolute minimum) and administrator defined maximum, otherwise take
+ * the biggest of the two.
+ */
+uint64_t
+vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b)
+{
+ if (a > logical && a <= zfs_vdev_max_auto_ashift) {
+ if (b <= logical || b > zfs_vdev_max_auto_ashift)
+ return (a);
+ else
+ return (MAX(a, b));
+ } else if (b <= logical || b > zfs_vdev_max_auto_ashift)
+ return (MAX(a, b));
+ return (b);
+}
+
+/*
* Maximize performance by inflating the configured ashift for top level
* vdevs to be as close to the physical ashift as possible while maintaining
* administrator defined limits and ensuring it doesn't go below the
@@ -1853,7 +1872,8 @@ vdev_ashift_optimize(vdev_t *vd)
{
ASSERT(vd == vd->vdev_top);
- if (vd->vdev_ashift < vd->vdev_physical_ashift) {
+ if (vd->vdev_ashift < vd->vdev_physical_ashift &&
+ vd->vdev_physical_ashift <= zfs_vdev_max_auto_ashift) {
vd->vdev_ashift = MIN(
MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift),
MAX(zfs_vdev_min_auto_ashift,
@@ -4413,6 +4433,7 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
vs->vs_rsize = vdev_get_min_asize(vd);
if (vd->vdev_ops->vdev_op_leaf) {
+ vs->vs_pspace = vd->vdev_psize;
vs->vs_rsize += VDEV_LABEL_START_SIZE +
VDEV_LABEL_END_SIZE;
/*
@@ -4458,7 +4479,10 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
vs->vs_configured_ashift = vd->vdev_top != NULL
? vd->vdev_top->vdev_ashift : vd->vdev_ashift;
vs->vs_logical_ashift = vd->vdev_logical_ashift;
- vs->vs_physical_ashift = vd->vdev_physical_ashift;
+ if (vd->vdev_physical_ashift <= ASHIFT_MAX)
+ vs->vs_physical_ashift = vd->vdev_physical_ashift;
+ else
+ vs->vs_physical_ashift = 0;
/*
* Report fragmentation and rebuild progress for top-level,
diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c
index db87e69f2057..10d09517effd 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_draid.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c
@@ -541,7 +541,7 @@ vdev_draid_generate_perms(const draid_map_t *map, uint8_t **permsp)
int
vdev_draid_lookup_map(uint64_t children, const draid_map_t **mapp)
{
- for (int i = 0; i <= VDEV_DRAID_MAX_MAPS; i++) {
+ for (int i = 0; i < VDEV_DRAID_MAX_MAPS; i++) {
if (draid_maps[i].dm_children == children) {
*mapp = &draid_maps[i];
return (0);
@@ -1496,8 +1496,14 @@ vdev_draid_calculate_asize(vdev_t *vd, uint64_t *asizep, uint64_t *max_asizep,
asize = MIN(asize - 1, cvd->vdev_asize - 1) + 1;
max_asize = MIN(max_asize - 1, cvd->vdev_max_asize - 1) + 1;
logical_ashift = MAX(logical_ashift, cvd->vdev_ashift);
- physical_ashift = MAX(physical_ashift,
- cvd->vdev_physical_ashift);
+ }
+ for (int c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+
+ if (cvd->vdev_ops == &vdev_draid_spare_ops)
+ continue;
+ physical_ashift = vdev_best_ashift(logical_ashift,
+ physical_ashift, cvd->vdev_physical_ashift);
}
*asizep = asize;
diff --git a/sys/contrib/openzfs/module/zfs/vdev_mirror.c b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
index 45b744b2ec89..d80a767043a5 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_mirror.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
@@ -35,6 +35,7 @@
#include <sys/vdev_impl.h>
#include <sys/vdev_draid.h>
#include <sys/zio.h>
+#include <sys/zio_checksum.h>
#include <sys/abd.h>
#include <sys/fs/zfs.h>
@@ -102,6 +103,7 @@ vdev_mirror_stat_fini(void)
*/
typedef struct mirror_child {
vdev_t *mc_vd;
+ abd_t *mc_abd;
uint64_t mc_offset;
int mc_error;
int mc_load;
@@ -407,8 +409,14 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
- *physical_ashift = MAX(*physical_ashift,
- cvd->vdev_physical_ashift);
+ }
+ for (int c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+
+ if (cvd->vdev_open_error)
+ continue;
+ *physical_ashift = vdev_best_ashift(*logical_ashift,
+ *physical_ashift, cvd->vdev_physical_ashift);
}
if (numerrors == vd->vdev_children) {
@@ -439,32 +447,6 @@ vdev_mirror_child_done(zio_t *zio)
mc->mc_skipped = 0;
}
-static void
-vdev_mirror_scrub_done(zio_t *zio)
-{
- mirror_child_t *mc = zio->io_private;
-
- if (zio->io_error == 0) {
- zio_t *pio;
- zio_link_t *zl = NULL;
-
- mutex_enter(&zio->io_lock);
- while ((pio = zio_walk_parents(zio, &zl)) != NULL) {
- mutex_enter(&pio->io_lock);
- ASSERT3U(zio->io_size, >=, pio->io_size);
- abd_copy(pio->io_abd, zio->io_abd, pio->io_size);
- mutex_exit(&pio->io_lock);
- }
- mutex_exit(&zio->io_lock);
- }
-
- abd_free(zio->io_abd);
-
- mc->mc_error = zio->io_error;
- mc->mc_tried = 1;
- mc->mc_skipped = 0;
-}
-
/*
* Check the other, lower-index DVAs to see if they're on the same
* vdev as the child we picked. If they are, use them since they
@@ -637,16 +619,15 @@ vdev_mirror_io_start(zio_t *zio)
}
if (zio->io_type == ZIO_TYPE_READ) {
- if (zio->io_bp != NULL &&
- (zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_resilvering) {
+ if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_resilvering) {
/*
- * For scrubbing reads (if we can verify the
- * checksum here, as indicated by io_bp being
- * non-NULL) we need to allocate a read buffer for
- * each child and issue reads to all children. If
- * any child succeeds, it will copy its data into
- * zio->io_data in vdev_mirror_scrub_done.
+ * For scrubbing reads we need to issue reads to all
+ * children. One child can reuse parent buffer, but
+ * for others we have to allocate separate ones to
+ * verify checksums if io_bp is non-NULL, or compare
+ * them in vdev_mirror_io_done() otherwise.
*/
+ boolean_t first = B_TRUE;
for (c = 0; c < mm->mm_children; c++) {
mc = &mm->mm_child[c];
@@ -658,12 +639,15 @@ vdev_mirror_io_start(zio_t *zio)
continue;
}
- zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
- mc->mc_vd, mc->mc_offset,
+ mc->mc_abd = first ? zio->io_abd :
abd_alloc_sametype(zio->io_abd,
- zio->io_size), zio->io_size,
- zio->io_type, zio->io_priority, 0,
- vdev_mirror_scrub_done, mc));
+ zio->io_size);
+ zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
+ mc->mc_vd, mc->mc_offset, mc->mc_abd,
+ zio->io_size, zio->io_type,
+ zio->io_priority, 0,
+ vdev_mirror_child_done, mc));
+ first = B_FALSE;
}
zio_execute(zio);
return;
@@ -731,6 +715,7 @@ vdev_mirror_io_done(zio_t *zio)
int c;
int good_copies = 0;
int unexpected_errors = 0;
+ int last_good_copy = -1;
if (mm == NULL)
return;
@@ -742,6 +727,7 @@ vdev_mirror_io_done(zio_t *zio)
if (!mc->mc_skipped)
unexpected_errors++;
} else if (mc->mc_tried) {
+ last_good_copy = c;
good_copies++;
}
}
@@ -755,7 +741,6 @@ vdev_mirror_io_done(zio_t *zio)
* no non-degraded top-level vdevs left, and not update DTLs
* if we intend to reallocate.
*/
- /* XXPOLICY */
if (good_copies != mm->mm_children) {
/*
* Always require at least one good copy.
@@ -782,7 +767,6 @@ vdev_mirror_io_done(zio_t *zio)
/*
* If we don't have a good copy yet, keep trying other children.
*/
- /* XXPOLICY */
if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) {
ASSERT(c >= 0 && c < mm->mm_children);
mc = &mm->mm_child[c];
@@ -794,7 +778,80 @@ vdev_mirror_io_done(zio_t *zio)
return;
}
- /* XXPOLICY */
+ if (zio->io_flags & ZIO_FLAG_SCRUB && !mm->mm_resilvering) {
+ abd_t *best_abd = NULL;
+ if (last_good_copy >= 0)
+ best_abd = mm->mm_child[last_good_copy].mc_abd;
+
+ /*
+ * If we're scrubbing but don't have a BP available (because
+ * this vdev is under a raidz or draid vdev) then the best we
+ * can do is compare all of the copies read. If they're not
+ * identical then return a checksum error and the most likely
+ * correct data. The raidz code will issue a repair I/O if
+ * possible.
+ */
+ if (zio->io_bp == NULL) {
+ ASSERT(zio->io_vd->vdev_ops == &vdev_replacing_ops ||
+ zio->io_vd->vdev_ops == &vdev_spare_ops);
+
+ abd_t *pref_abd = NULL;
+ for (c = 0; c < last_good_copy; c++) {
+ mc = &mm->mm_child[c];
+ if (mc->mc_error || !mc->mc_tried)
+ continue;
+
+ if (abd_cmp(mc->mc_abd, best_abd) != 0)
+ zio->io_error = SET_ERROR(ECKSUM);
+
+ /*
+ * The distributed spare is always prefered
+ * by vdev_mirror_child_select() so it's
+ * considered to be the best candidate.
+ */
+ if (pref_abd == NULL &&
+ mc->mc_vd->vdev_ops ==
+ &vdev_draid_spare_ops)
+ pref_abd = mc->mc_abd;
+
+ /*
+ * In the absence of a preferred copy, use
+ * the parent pointer to avoid a memory copy.
+ */
+ if (mc->mc_abd == zio->io_abd)
+ best_abd = mc->mc_abd;
+ }
+ if (pref_abd)
+ best_abd = pref_abd;
+ } else {
+
+ /*
+ * If we have a BP available, then checksums are
+ * already verified and we just need a buffer
+ * with valid data, preferring parent one to
+ * avoid a memory copy.
+ */
+ for (c = 0; c < last_good_copy; c++) {
+ mc = &mm->mm_child[c];
+ if (mc->mc_error || !mc->mc_tried)
+ continue;
+ if (mc->mc_abd == zio->io_abd) {
+ best_abd = mc->mc_abd;
+ break;
+ }
+ }
+ }
+
+ if (best_abd && best_abd != zio->io_abd)
+ abd_copy(zio->io_abd, best_abd, zio->io_size);
+ for (c = 0; c < mm->mm_children; c++) {
+ mc = &mm->mm_child[c];
+ if (mc->mc_abd != zio->io_abd)
+ abd_free(mc->mc_abd);
+ mc->mc_abd = NULL;
+ }
+ }
+
if (good_copies == 0) {
zio->io_error = vdev_mirror_worst_error(mm);
ASSERT(zio->io_error != 0);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
index b14e995e3ce1..5c25007f17b9 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
@@ -1426,8 +1426,14 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
- *physical_ashift = MAX(*physical_ashift,
- cvd->vdev_physical_ashift);
+ }
+ for (c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+
+ if (cvd->vdev_open_error != 0)
+ continue;
+ *physical_ashift = vdev_best_ashift(*logical_ashift,
+ *physical_ashift, cvd->vdev_physical_ashift);
}
*asize *= vd->vdev_children;
@@ -1721,8 +1727,9 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
if (!rc->rc_tried || rc->rc_error != 0)
continue;
- orig[c] = abd_alloc_sametype(rc->rc_abd, rc->rc_size);
- abd_copy(orig[c], rc->rc_abd, rc->rc_size);
+ orig[c] = rc->rc_abd;
+ ASSERT3U(abd_get_size(rc->rc_abd), ==, rc->rc_size);
+ rc->rc_abd = abd_alloc_linear(rc->rc_size, B_FALSE);
}
/*
@@ -1791,6 +1798,9 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
} else if (c < rr->rr_firstdatacol && !rc->rc_tried) {
parity_untried++;
}
+
+ if (rc->rc_force_repair)
+ unexpected_errors++;
}
/*
@@ -2155,9 +2165,20 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
for (int c = 0; c < rr->rr_cols; c++) {
raidz_col_t *rc = &rr->rr_col[c];
- if (rc->rc_error) {
- ASSERT(rc->rc_error != ECKSUM); /* child has no bp */
+ /*
+ * If scrubbing and a replacing/sparing child vdev determined
+ * that not all of its children have an identical copy of the
+ * data, then clear the error so the column is treated like
+ * any other read and force a repair to correct the damage.
+ */
+ if (rc->rc_error == ECKSUM) {
+ ASSERT(zio->io_flags & ZIO_FLAG_SCRUB);
+ vdev_raidz_checksum_error(zio, rc, rc->rc_abd);
+ rc->rc_force_repair = 1;
+ rc->rc_error = 0;
+ }
+ if (rc->rc_error) {
if (c < rr->rr_firstdatacol)
parity_errors++;
else
diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c
index d7385cdf25de..48bd20b2169f 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_removal.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c
@@ -1207,7 +1207,6 @@ vdev_remove_complete(spa_t *spa)
vdev_metaslab_fini(vd);
metaslab_group_destroy(vd->vdev_mg);
vd->vdev_mg = NULL;
- spa_log_sm_set_blocklimit(spa);
}
if (vd->vdev_log_mg != NULL) {
ASSERT0(vd->vdev_ms_count);
@@ -1935,7 +1934,6 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
* metaslab_class_histogram_verify()
*/
vdev_metaslab_fini(vd);
- spa_log_sm_set_blocklimit(spa);
spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG);
*txg = spa_vdev_config_enter(spa);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index 3336bb783251..3d2492b9b908 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -4787,6 +4787,11 @@ extract_delay_props(nvlist_t *props)
static const zfs_prop_t delayable[] = {
ZFS_PROP_REFQUOTA,
ZFS_PROP_KEYLOCATION,
+ /*
+ * Setting ZFS_PROP_SHARESMB requires the objset type to be
+ * known, which is not possible prior to receipt of raw sends.
+ */
+ ZFS_PROP_SHARESMB,
0
};
int i;
@@ -4850,6 +4855,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
offset_t off, noff;
nvlist_t *local_delayprops = NULL;
nvlist_t *recv_delayprops = NULL;
+ nvlist_t *inherited_delayprops = NULL;
nvlist_t *origprops = NULL; /* existing properties */
nvlist_t *origrecvd = NULL; /* existing received properties */
boolean_t first_recvd_props = B_FALSE;
@@ -4964,6 +4970,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
local_delayprops = extract_delay_props(oprops);
(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
oprops, *errors);
+ inherited_delayprops = extract_delay_props(xprops);
(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
xprops, *errors);
@@ -5021,6 +5028,10 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
local_delayprops, *errors);
}
+ if (inherited_delayprops != NULL && error == 0) {
+ (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
+ inherited_delayprops, *errors);
+ }
}
/*
@@ -5040,6 +5051,10 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
nvlist_free(local_delayprops);
}
+ if (inherited_delayprops != NULL) {
+ ASSERT(nvlist_merge(localprops, inherited_delayprops, 0) == 0);
+ nvlist_free(inherited_delayprops);
+ }
*read_bytes = off - noff;
#ifdef ZFS_DEBUG
diff --git a/sys/contrib/openzfs/module/zfs/zfs_log.c b/sys/contrib/openzfs/module/zfs/zfs_log.c
index c2f48210398c..9e52bed77a61 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_log.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_log.c
@@ -108,86 +108,81 @@ zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap)
static void
zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
{
- uint32_t *bitmap;
- uint64_t *attrs;
- uint64_t *crtime;
- xoptattr_t *xoap;
- void *scanstamp;
- int i;
+ xoptattr_t *xoap;
xoap = xva_getxoptattr(xvap);
ASSERT(xoap);
lrattr->lr_attr_masksize = xvap->xva_mapsize;
- bitmap = &lrattr->lr_attr_bitmap;
- for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) {
+ uint32_t *bitmap = &lrattr->lr_attr_bitmap;
+ for (int i = 0; i != xvap->xva_mapsize; i++, bitmap++)
*bitmap = xvap->xva_reqattrmap[i];
- }
- /* Now pack the attributes up in a single uint64_t */
- attrs = (uint64_t *)bitmap;
- *attrs = 0;
- crtime = attrs + 1;
- bzero(crtime, 2 * sizeof (uint64_t));
- scanstamp = (caddr_t)(crtime + 2);
- bzero(scanstamp, AV_SCANSTAMP_SZ);
+ lr_attr_end_t *end = (lr_attr_end_t *)bitmap;
+ end->lr_attr_attrs = 0;
+ end->lr_attr_crtime[0] = 0;
+ end->lr_attr_crtime[1] = 0;
+ memset(end->lr_attr_scanstamp, 0, AV_SCANSTAMP_SZ);
+
if (XVA_ISSET_REQ(xvap, XAT_READONLY))
- *attrs |= (xoap->xoa_readonly == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_readonly == 0) ? 0 :
XAT0_READONLY;
if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
- *attrs |= (xoap->xoa_hidden == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_hidden == 0) ? 0 :
XAT0_HIDDEN;
if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
- *attrs |= (xoap->xoa_system == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_system == 0) ? 0 :
XAT0_SYSTEM;
if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
- *attrs |= (xoap->xoa_archive == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_archive == 0) ? 0 :
XAT0_ARCHIVE;
if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
- *attrs |= (xoap->xoa_immutable == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_immutable == 0) ? 0 :
XAT0_IMMUTABLE;
if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
- *attrs |= (xoap->xoa_nounlink == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_nounlink == 0) ? 0 :
XAT0_NOUNLINK;
if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
- *attrs |= (xoap->xoa_appendonly == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_appendonly == 0) ? 0 :
XAT0_APPENDONLY;
if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
- *attrs |= (xoap->xoa_opaque == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_opaque == 0) ? 0 :
XAT0_APPENDONLY;
if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
- *attrs |= (xoap->xoa_nodump == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_nodump == 0) ? 0 :
XAT0_NODUMP;
if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
- *attrs |= (xoap->xoa_av_quarantined == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_av_quarantined == 0) ? 0 :
XAT0_AV_QUARANTINED;
if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
- *attrs |= (xoap->xoa_av_modified == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_av_modified == 0) ? 0 :
XAT0_AV_MODIFIED;
if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
- ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime);
+ ZFS_TIME_ENCODE(&xoap->xoa_createtime, end->lr_attr_crtime);
if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
ASSERT(!XVA_ISSET_REQ(xvap, XAT_PROJID));
- bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ);
+ memcpy(end->lr_attr_scanstamp, xoap->xoa_av_scanstamp,
+ AV_SCANSTAMP_SZ);
} else if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
/*
* XAT_PROJID and XAT_AV_SCANSTAMP will never be valid
* at the same time, so we can share the same space.
*/
- bcopy(&xoap->xoa_projid, scanstamp, sizeof (uint64_t));
+ memcpy(end->lr_attr_scanstamp, &xoap->xoa_projid,
+ sizeof (uint64_t));
}
if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
- *attrs |= (xoap->xoa_reparse == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_reparse == 0) ? 0 :
XAT0_REPARSE;
if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
- *attrs |= (xoap->xoa_offline == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_offline == 0) ? 0 :
XAT0_OFFLINE;
if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
- *attrs |= (xoap->xoa_sparse == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_sparse == 0) ? 0 :
XAT0_SPARSE;
if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT))
- *attrs |= (xoap->xoa_projinherit == 0) ? 0 :
+ end->lr_attr_attrs |= (xoap->xoa_projinherit == 0) ? 0 :
XAT0_PROJINHERIT;
}
@@ -543,6 +538,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
itx_wr_state_t write_state;
uintptr_t fsync_cnt;
uint64_t gen = 0;
+ ssize_t size = resid;
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp)) {
@@ -628,6 +624,10 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
off += len;
resid -= len;
}
+
+ if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+ dsl_pool_wrlog_count(zilog->zl_dmu_pool, size, tx->tx_txg);
+ }
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index 1b32d3876724..918938d62823 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -261,6 +261,9 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
}
ASSERT(zfs_uio_offset(uio) < zp->z_size);
+#if defined(__linux__)
+ ssize_t start_offset = zfs_uio_offset(uio);
+#endif
ssize_t n = MIN(zfs_uio_resid(uio), zp->z_size - zfs_uio_offset(uio));
ssize_t start_resid = n;
@@ -283,6 +286,18 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
/* convert checksum errors into IO errors */
if (error == ECKSUM)
error = SET_ERROR(EIO);
+
+#if defined(__linux__)
+ /*
+ * if we actually read some bytes, bubbling EFAULT
+ * up to become EAGAIN isn't what we want here...
+ *
+ * ...on Linux, at least. On FBSD, doing this breaks.
+ */
+ if (error == EFAULT &&
+ (zfs_uio_offset(uio) - start_offset) != 0)
+ error = 0;
+#endif
break;
}
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index 04b5d121e111..c1fd2de2e586 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -5008,7 +5008,7 @@ zbookmark_subtree_completed(const dnode_phys_t *dnp,
{
zbookmark_phys_t mod_zb = *subtree_root;
mod_zb.zb_blkid++;
- ASSERT(last_block->zb_level == 0);
+ ASSERT0(last_block->zb_level);
/* The objset_phys_t isn't before anything. */
if (dnp == NULL)
@@ -5034,6 +5034,22 @@ zbookmark_subtree_completed(const dnode_phys_t *dnp,
last_block) <= 0);
}
+/*
+ * This function is similar to zbookmark_subtree_completed(), but returns true
+ * if subtree_root is equal or ahead of last_block, i.e. still to be done.
+ */
+boolean_t
+zbookmark_subtree_tbd(const dnode_phys_t *dnp,
+ const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block)
+{
+ ASSERT0(last_block->zb_level);
+ if (dnp == NULL)
+ return (B_FALSE);
+ return (zbookmark_compare(dnp->dn_datablkszsec, dnp->dn_indblkshift,
+ 1ULL << (DNODE_BLOCK_SHIFT - SPA_MINBLOCKSHIFT), 0, subtree_root,
+ last_block) >= 0);
+}
+
EXPORT_SYMBOL(zio_type_name);
EXPORT_SYMBOL(zio_buf_alloc);
EXPORT_SYMBOL(zio_data_buf_alloc);
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index 59b05b4b08d0..7d141a12288b 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -84,10 +84,8 @@
#include <sys/zfs_rlock.h>
#include <sys/spa_impl.h>
#include <sys/zvol.h>
-
#include <sys/zvol_impl.h>
-
unsigned int zvol_inhibit_dev = 0;
unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
@@ -577,6 +575,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
itx_wr_state_t write_state;
+ uint64_t sz = size;
if (zil_replaying(zilog, tx))
return;
@@ -628,6 +627,10 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
offset += len;
size -= len;
}
+
+ if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+ dsl_pool_wrlog_count(zilog->zl_dmu_pool, sz, tx->tx_txg);
+ }
}
/*
diff --git a/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in b/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in
index 55f0f1cf5249..920b90e88912 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in
@@ -31,7 +31,7 @@ Requires(post): gcc, make, perl, diffutils
%if 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}
Requires: kernel-devel >= @ZFS_META_KVER_MIN@, kernel-devel <= @ZFS_META_KVER_MAX@.999
Requires(post): kernel-devel >= @ZFS_META_KVER_MIN@, kernel-devel <= @ZFS_META_KVER_MAX@.999
-Obsoletes: spl-dkms
+Obsoletes: spl-dkms <= %{version}
%endif
Provides: %{module}-kmod = %{version}
AutoReqProv: no
diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
index 1cd3f6b520ea..8cab1c3d70bb 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
@@ -120,13 +120,13 @@ License: @ZFS_META_LICENSE@
URL: https://github.com/openzfs/zfs
Source0: %{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
-Requires: libzpool5 = %{version}
-Requires: libnvpair3 = %{version}
-Requires: libuutil3 = %{version}
-Requires: libzfs5 = %{version}
+Requires: libzpool5%{?_isa} = %{version}-%{release}
+Requires: libnvpair3%{?_isa} = %{version}-%{release}
+Requires: libuutil3%{?_isa} = %{version}-%{release}
+Requires: libzfs5%{?_isa} = %{version}-%{release}
Requires: %{name}-kmod = %{version}
-Provides: %{name}-kmod-common = %{version}
-Obsoletes: spl
+Provides: %{name}-kmod-common = %{version}-%{release}
+Obsoletes: spl <= %{version}
# zfs-fuse provides the same commands and man pages that OpenZFS does.
# Renaming those on either side would conflict with all available documentation.
@@ -178,8 +178,8 @@ This package contains the core ZFS command line utilities.
%package -n libzpool5
Summary: Native ZFS pool library for Linux
Group: System Environment/Kernel
-Obsoletes: libzpool2
-Obsoletes: libzpool4
+Obsoletes: libzpool2 <= %{version}
+Obsoletes: libzpool4 <= %{version}
%description -n libzpool5
This package contains the zpool library, which provides support
@@ -195,7 +195,7 @@ for managing zpools
%package -n libnvpair3
Summary: Solaris name-value library for Linux
Group: System Environment/Kernel
-Obsoletes: libnvpair1
+Obsoletes: libnvpair1 <= %{version}
%description -n libnvpair3
This package contains routines for packing and unpacking name-value
@@ -213,7 +213,7 @@ to write self describing data structures on disk.
%package -n libuutil3
Summary: Solaris userland utility library for Linux
Group: System Environment/Kernel
-Obsoletes: libuutil1
+Obsoletes: libuutil1 <= %{version}
%description -n libuutil3
This library provides a variety of compatibility functions for OpenZFS:
@@ -239,8 +239,8 @@ This library provides a variety of compatibility functions for OpenZFS:
%package -n libzfs5
Summary: Native ZFS filesystem library for Linux
Group: System Environment/Kernel
-Obsoletes: libzfs2
-Obsoletes: libzfs4
+Obsoletes: libzfs2 <= %{version}
+Obsoletes: libzfs4 <= %{version}
%description -n libzfs5
This package provides support for managing ZFS filesystems
@@ -255,16 +255,16 @@ This package provides support for managing ZFS filesystems
%package -n libzfs5-devel
Summary: Development headers
Group: System Environment/Kernel
-Requires: libzfs5 = %{version}
-Requires: libzpool5 = %{version}
-Requires: libnvpair3 = %{version}
-Requires: libuutil3 = %{version}
-Provides: libzpool5-devel
-Provides: libnvpair3-devel
-Provides: libuutil3-devel
-Obsoletes: zfs-devel
-Obsoletes: libzfs2-devel
-Obsoletes: libzfs4-devel
+Requires: libzfs5%{?_isa} = %{version}-%{release}
+Requires: libzpool5%{?_isa} = %{version}-%{release}
+Requires: libnvpair3%{?_isa} = %{version}-%{release}
+Requires: libuutil3%{?_isa} = %{version}-%{release}
+Provides: libzpool5-devel = %{version}-%{release}
+Provides: libnvpair3-devel = %{version}-%{release}
+Provides: libuutil3-devel = %{version}-%{release}
+Obsoletes: zfs-devel <= %{version}
+Obsoletes: libzfs2-devel <= %{version}
+Obsoletes: libzfs4-devel <= %{version}
%description -n libzfs5-devel
This package contains the header files needed for building additional
@@ -313,8 +313,8 @@ Summary: Python %{python_version} wrapper for libzfs_core
Group: Development/Languages/Python
License: Apache-2.0
BuildArch: noarch
-Requires: libzfs5 = %{version}
-Requires: libnvpair3 = %{version}
+Requires: libzfs5 = %{version}-%{release}
+Requires: libnvpair3 = %{version}-%{release}
Requires: libffi
Requires: python%{__python_pkg_version}
Requires: %{__python_cffi_pkg}
@@ -339,7 +339,6 @@ This package provides a python wrapper for the libzfs_core C library.
Summary: Initramfs module
Group: System Environment/Kernel
Requires: %{name}%{?_isa} = %{version}-%{release}
-Requires: %{name} = %{version}-%{release}
Requires: initramfs-tools
%description initramfs
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index 19919a00afb3..709bd2533aa6 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -747,7 +747,8 @@ tags = ['functional', 'raidz']
[tests/functional/redundancy]
tests = ['redundancy_draid', 'redundancy_draid1', 'redundancy_draid2',
- 'redundancy_draid3', 'redundancy_draid_damaged', 'redundancy_draid_spare1',
+ 'redundancy_draid3', 'redundancy_draid_damaged1',
+ 'redundancy_draid_damaged2', 'redundancy_draid_spare1',
'redundancy_draid_spare2', 'redundancy_draid_spare3', 'redundancy_mirror',
'redundancy_raidz', 'redundancy_raidz1', 'redundancy_raidz2',
'redundancy_raidz3', 'redundancy_stripe']
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
index 559e98dd07b2..71b0cc8d6483 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -244,8 +244,6 @@ maybe = {
'pyzfs/pyzfs_unittest': ['SKIP', python_deps_reason],
'pool_checkpoint/checkpoint_discard_busy': ['FAIL', '11946'],
'projectquota/setup': ['SKIP', exec_reason],
- 'redundancy/redundancy_004_neg': ['FAIL', '7290'],
- 'redundancy/redundancy_draid_spare3': ['SKIP', known_reason],
'removal/removal_condense_export': ['FAIL', known_reason],
'reservation/reservation_008_pos': ['FAIL', '7741'],
'reservation/reservation_018_pos': ['FAIL', '5642'],
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
index fff43e469165..0fd2f48f2c1f 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
@@ -81,7 +81,9 @@ TRIM_TXG_BATCH trim.txg_batch zfs_trim_txg_batch
TXG_HISTORY txg.history zfs_txg_history
TXG_TIMEOUT txg.timeout zfs_txg_timeout
UNLINK_SUSPEND_PROGRESS UNSUPPORTED zfs_unlink_suspend_progress
+VDEV_FILE_LOGICAL_ASHIFT vdev.file.logical_ashift vdev_file_logical_ashift
VDEV_FILE_PHYSICAL_ASHIFT vdev.file.physical_ashift vdev_file_physical_ashift
+VDEV_MAX_AUTO_ASHIFT vdev.max_auto_ashift zfs_vdev_max_auto_ashift
VDEV_MIN_MS_COUNT vdev.min_ms_count zfs_vdev_min_ms_count
VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip
VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh
index ae948bb9b755..cb88def7cc3e 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_args_neg.ksh
@@ -58,7 +58,7 @@ set -A args "create" "add" "destroy" "import fakepool" \
"setvprop" "blah blah" "-%" "--?" "-*" "-=" \
"-a" "-f" "-g" "-j" "-n" "-o" "-p" "-p /tmp" \
"-t" "-w" "-z" "-E" "-H" "-I" "-J" "-K" \
- "-N" "-Q" "-R" "-T" "-W"
+ "-Q" "-R" "-T" "-W"
log_assert "Execute zdb using invalid parameters."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_objset_id.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_objset_id.ksh
index d23cc43c90ef..90a81007e984 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_objset_id.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_objset_id.ksh
@@ -30,10 +30,16 @@
# 6. Confirm names
# 7. Run zdb -dddddd pool/objsetID objectID (hex)
# 8. Confirm names
-# 9. Obtain objsetID from /proc/spl/kstat/zfs/testpool/obset-0x<ID>
+# 9. Repeat with zdb -NNNNNN pool/objsetID objectID
+# 10. Obtain objsetID from /proc/spl/kstat/zfs/testpool/obset-0x<ID>
# (linux only)
-# 10. Run zdb -dddddd pool/objsetID (hex)
-# 11. Match name from zdb against proc entry
+# 11. Run zdb -dddddd pool/objsetID (hex)
+# 12. Match name from zdb against proc entry
+# 13. Create dataset with hex numeric name
+# 14. Create dataset with decimal numeric name
+# 15. zdb -d for numeric datasets succeeds
+# 16. zdb -N for numeric datasets fails
+# 17. zdb -dN for numeric datasets fails
#
function cleanup
@@ -48,6 +54,8 @@ write_count=8
blksize=131072
verify_runnable "global"
verify_disk_count "$DISKS" 2
+hex_ds=$TESTPOOL/0x400000
+num_ds=$TESTPOOL/100000
default_mirror_setup_noexit $DISKS
file_write -o create -w -f $init_data -b $blksize -c $write_count
@@ -78,6 +86,17 @@ do
(( $? != 0 )) && log_fail \
"zdb -dddddd $TESTPOOL/$id $obj failed $reason"
obj=$(printf "0x%X" $obj)
+
+ log_note "zdb -NNNNNN $TESTPOOL/$id $obj"
+ output=$(zdb -NNNNNN $TESTPOOL/$id $obj)
+ reason="($TESTPOOL/$TESTFS not in zdb output)"
+ echo $output |grep "$TESTPOOL/$TESTFS" > /dev/null
+ (( $? != 0 )) && log_fail \
+ "zdb -NNNNNN $TESTPOOL/$id $obj failed $reason"
+ reason="(file1 not in zdb output)"
+ echo $output |grep "file1" > /dev/null
+ (( $? != 0 )) && log_fail \
+ "zdb -NNNNNN $TESTPOOL/$id $obj failed $reason"
done
if is_linux; then
@@ -94,4 +113,22 @@ if is_linux; then
"zdb -dddddd $TESTPOOL/$objset_hex failed $reason"
fi
+log_must zfs create $hex_ds
+log_must zfs create $num_ds
+output=$(zdb -d $hex_ds)
+reason="($TESTPOOL/0x400 not in zdb output)"
+echo $output |grep "$hex_ds" > /dev/null
+(( $? != 0 )) && log_fail \
+ "zdb -d $hex_ds failed $reason"
+output=$(zdb -d $num_ds)
+reason="($num_ds not in zdb output)"
+echo $output |grep "$num_ds" > /dev/null
+ "zdb -d $TESTPOOL/0x400 failed $reason"
+
+# force numeric interpretation, should fail
+log_mustnot zdb -N $hex_ds
+log_mustnot zdb -N $num_ds
+log_mustnot zdb -Nd $hex_ds
+log_mustnot zdb -Nd $num_ds
+
log_pass "zdb -d <pool>/<objset ID> generates the correct names."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
index 89cc4b0d3082..0fa1c0055b3c 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
@@ -57,7 +57,9 @@ disk2=$TEST_BASE_DIR/disk2
log_must mkfile $SIZE $disk1
log_must mkfile $SIZE $disk2
+logical_ashift=$(get_tunable VDEV_FILE_LOGICAL_ASHIFT)
orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+max_auto_ashift=$(get_tunable VDEV_MAX_AUTO_ASHIFT)
typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
for ashift in ${ashifts[@]}
@@ -81,7 +83,8 @@ do
log_must zpool create $TESTPOOL $disk1
log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $ashift
log_must zpool add $TESTPOOL $disk2
- verify_ashift $disk2 $ashift
+ exp=$(( (ashift <= max_auto_ashift) ? ashift : logical_ashift ))
+ verify_ashift $disk2 $exp
if [[ $? -ne 0 ]]
then
log_fail "Device was added without setting ashift value to "\
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh
index 6bbd46289f7c..8760f48dd2a4 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh
@@ -54,6 +54,14 @@
verify_runnable "global"
+# We override $org_size and $exp_size from zpool_expand.cfg to make sure we get
+# an expected free space value every time. Otherwise, if we left it
+# configurable, the free space ratio to pool size ratio would diverge too much
+# much at low $org_size values.
+#
+org_size=$((1024 * 1024 * 1024))
+exp_size=$(($org_size * 2))
+
function cleanup
{
poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
@@ -68,11 +76,35 @@ function cleanup
unload_scsi_debug
}
+# Wait for the size of a pool to autoexpand to $1 and the total free space to
+# expand to $2 (both values allowing a 10% tolerance).
+#
+# Wait for up to 10 seconds for this to happen (typically takes 1-2 seconds)
+#
+function wait_for_autoexpand
+{
+ typeset exp_new_size=$1
+ typeset exp_new_free=$2
+
+ for i in $(seq 1 10) ; do
+ typeset new_size=$(get_pool_prop size $TESTPOOL1)
+ typeset new_free=$(get_prop avail $TESTPOOL1)
+ # Values need to be within 90% of each other (10% tolerance)
+ if within_percent $new_size $exp_new_size 90 > /dev/null && \
+ within_percent $new_free $exp_new_free 90 > /dev/null ; then
+ return
+ fi
+ sleep 1
+ done
+ log_fail "$TESTPOOL never expanded to $exp_new_size with $exp_new_free" \
+ " free space (got $new_size with $new_free free space)"
+}
+
log_onexit cleanup
log_assert "zpool can be autoexpanded after set autoexpand=on on vdev expansion"
-for type in " " mirror raidz draid:1s; do
+for type in " " mirror raidz; do
log_note "Setting up loopback, scsi_debug, and file vdevs"
log_must truncate -s $org_size $FILE_LO
DEV1=$(losetup -f)
@@ -105,72 +137,38 @@ for type in " " mirror raidz draid:1s; do
log_note "Expanding loopback, scsi_debug, and file vdevs"
log_must truncate -s $exp_size $FILE_LO
log_must losetup -c $DEV1
- sleep 3
echo "2" > /sys/bus/pseudo/drivers/scsi_debug/virtual_gb
echo "1" > /sys/class/block/$DEV2/device/rescan
block_device_wait
- sleep 3
log_must truncate -s $exp_size $FILE_RAW
log_must zpool online -e $TESTPOOL1 $FILE_RAW
- typeset expand_size=$(get_pool_prop size $TESTPOOL1)
- typeset zfs_expand_size=$(get_prop avail $TESTPOOL1)
-
- log_note "$TESTPOOL1 $type has previous size: $prev_size and " \
- "expanded size: $expand_size"
- # compare available pool size from zfs
- if [[ $zfs_expand_size -gt $zfs_prev_size ]]; then
- # check for zpool history for the pool size expansion
- if [[ $type == " " ]]; then
- typeset expansion_size=$(($exp_size-$org_size))
- typeset size_addition=$(zpool history -il $TESTPOOL1 |\
- grep "pool '$TESTPOOL1' size:" | \
- grep "vdev online" | \
- grep "(+${expansion_size}" | wc -l)
-
- if [[ $size_addition -ne 3 ]]; then
- log_fail "pool $TESTPOOL1 has not expanded, " \
- "$size_addition/3 vdevs expanded"
- fi
- elif [[ $type == "mirror" ]]; then
- typeset expansion_size=$(($exp_size-$org_size))
- zpool history -il $TESTPOOL1 | \
- grep "pool '$TESTPOOL1' size:" | \
- grep "vdev online" | \
- grep "(+${expansion_size})" >/dev/null 2>&1
-
- if [[ $? -ne 0 ]] ; then
- log_fail "pool $TESTPOOL1 has not expanded"
- fi
- elif [[ $type == "draid:1s" ]]; then
- typeset expansion_size=$((2*($exp_size-$org_size)))
- zpool history -il $TESTPOOL1 | \
- grep "pool '$TESTPOOL1' size:" | \
- grep "vdev online" | \
- grep "(+${expansion_size})" >/dev/null 2>&1
-
- if [[ $? -ne 0 ]]; then
- log_fail "pool $TESTPOOL has not expanded"
- fi
- else
- typeset expansion_size=$((3*($exp_size-$org_size)))
- zpool history -il $TESTPOOL1 | \
- grep "pool '$TESTPOOL1' size:" | \
- grep "vdev online" | \
- grep "(+${expansion_size})" >/dev/null 2>&1
-
- if [[ $? -ne 0 ]]; then
- log_fail "pool $TESTPOOL has not expanded"
- fi
- fi
- else
- log_fail "pool $TESTPOOL1 is not autoexpanded after vdev " \
- "expansion. Previous size: $zfs_prev_size and expanded " \
- "size: $zfs_expand_size"
+
+ # The expected free space values below were observed at the time of
+ # this commit. However, we know ZFS overhead will change over time,
+ # and thus we do not do an exact comparison to these values in
+ # wait_for_autoexpand. Rather, we make sure the free space
+ # is within some small percentage threshold of these values.
+ typeset exp_new_size=$(($prev_size * 2))
+ if [[ "$type" == " " ]] ; then
+ exp_new_free=6045892608
+ elif [[ "$type" == "mirror" ]] ; then
+ exp_new_free=1945997312
+ elif [[ "$type" == "raidz" ]] ; then
+ exp_new_free=3977637338
+ elif [[ "$type" == "draid:1s" ]] then
+ exp_new_free=1946000384
fi
+ wait_for_autoexpand $exp_new_size $exp_new_free
+
+ expand_size=$(get_pool_prop size $TESTPOOL1)
+
+ log_note "$TESTPOOL1 '$type' grew from $prev_size -> $expand_size with" \
+ "free space from $zfs_prev_size -> $(get_prop avail $TESTPOOL1)"
+
cleanup
done
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh
index 2d2b18f8bb5b..189c11f0d64f 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh
@@ -40,7 +40,7 @@
verify_runnable "global"
-if [[ $(linux_version) -lt $(linux_version "5.1") ]]; then
+if ! $(grep -q "CONFIG_IO_URING=y" /boot/config-$(uname -r)); then
log_unsupported "Requires io_uring support"
fi
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_lun_expsz.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_lun_expsz.ksh
index 59f64081a977..a18e634cefa7 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_lun_expsz.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_lun_expsz.ksh
@@ -48,14 +48,18 @@ log_must zpool checkpoint $NESTEDPOOL
log_must truncate -s $EXPSZ $FILEDISK1
log_must zpool online -e $NESTEDPOOL $FILEDISK1
NEWSZ=$(zpool list -v | grep "$FILEDISK1" | awk '{print $2}')
+DEXPSZ=$(zpool list -v | grep "$FILEDISK1" | awk '{print $6}')
nested_change_state_after_checkpoint
log_mustnot [ "$INITSZ" = "$NEWSZ" ]
+log_must [ "$DEXPSZ" = "-" ]
log_must zpool export $NESTEDPOOL
log_must zpool import -d $FILEDISKDIR --rewind-to-checkpoint $NESTEDPOOL
nested_verify_pre_checkpoint_state
FINSZ=$(zpool list -v | grep "$FILEDISK1" | awk '{print $2}')
-log_must [ "$INITSZ" = "$FINSZ" ]
+DEXPSZ=$(zpool list -v | grep "$FILEDISK1" | awk '{print $6}')
+log_must [ "$EXPSZ" = "$FINSZ" ]
+log_must [ "$DEXPSZ" != "-" ]
log_pass "LUN expansion rewinded correctly."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am
index 42c11c4aa957..7c1930beb743 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/Makefile.am
@@ -6,7 +6,8 @@ dist_pkgdata_SCRIPTS = \
redundancy_draid1.ksh \
redundancy_draid2.ksh \
redundancy_draid3.ksh \
- redundancy_draid_damaged.ksh \
+ redundancy_draid_damaged1.ksh \
+ redundancy_draid_damaged2.ksh \
redundancy_draid_spare1.ksh \
redundancy_draid_spare2.ksh \
redundancy_draid_spare3.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged1.ksh
index 6796cc78a1bd..1c1183c09f08 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged1.ksh
@@ -89,22 +89,9 @@ function test_sequential_resilver # <pool> <parity> <dir>
done
log_must zpool scrub -w $pool
+ log_must zpool status $pool
- # When only a single child was overwritten the sequential resilver
- # can fully repair the damange from parity and the scrub will have
- # nothing to repair. When multiple children are silently damaged
- # the sequential resilver will calculate the wrong data since only
- # the parity information is used and it cannot be verified with
- # the checksum. However, since only the resilvering devices are
- # written to with the bad data a subsequent scrub will be able to
- # fully repair the pool.
- #
- if [[ $nparity == 1 ]]; then
- log_must check_pool_status $pool "scan" "repaired 0B"
- else
- log_mustnot check_pool_status $pool "scan" "repaired 0B"
- fi
-
+ log_mustnot check_pool_status $pool "scan" "repaired 0B"
log_must check_pool_status $pool "errors" "No known data errors"
log_must check_pool_status $pool "scan" "with 0 errors"
}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh
new file mode 100755
index 000000000000..8e06db9bad99
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh
@@ -0,0 +1,157 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+# When sequentially resilvering a dRAID pool to a distributed spare
+# silent damage to an online vdev in a replacing or spare mirror vdev
+# is not expected to be repaired. Not only does the rebuild have no
+# reason to suspect the silent damage but even if it did there's no
+# checksum available to determine the correct copy and make the repair.
+# However, the subsequent scrub should detect and repair any damage.
+#
+# STRATEGY:
+# 1. Create block device files for the test draid pool
+# 2. For each parity value [1..3]
+# a. Create a draid pool
+# b. Fill it with some directories/files
+# c. Systematically damage and replace three devices by:
+# - Overwrite the device
+# - Replace the damaged vdev with a distributed spare
+# - Scrub the pool and verify repair IO is issued
+# d. Detach the distributed spares
+# e. Scrub the pool and verify there was nothing to repair
+# f. Destroy the draid pool
+#
+
+typeset -r devs=7
+typeset -r dev_size_mb=512
+typeset -a disks
+
+prefetch_disable=$(get_tunable PREFETCH_DISABLE)
+rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED)
+
+function cleanup
+{
+ poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+
+ for i in {0..$devs}; do
+ rm -f "$TEST_BASE_DIR/dev-$i"
+ done
+
+ set_tunable32 PREFETCH_DISABLE $prefetch_disable
+ set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled
+}
+
+log_onexit cleanup
+
+log_must set_tunable32 PREFETCH_DISABLE 1
+log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
+
+# Disk files which will be used by pool
+for i in {0..$(($devs - 1))}; do
+ device=$TEST_BASE_DIR/dev-$i
+ log_must truncate -s ${dev_size_mb}M $device
+ disks[${#disks[*]}+1]=$device
+done
+
+# Disk file which will be attached
+log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
+
+dir=$TEST_BASE_DIR
+
+for nparity in 1 2 3; do
+ raid=draid${nparity}:3s
+
+ log_must zpool create -f -O compression=off -o cachefile=none \
+ $TESTPOOL $raid ${disks[@]}
+ # log_must zfs set primarycache=metadata $TESTPOOL
+
+ log_must zfs create $TESTPOOL/fs
+ log_must fill_fs /$TESTPOOL/fs 1 256 10 1024 R
+
+ log_must zfs create -o compress=on $TESTPOOL/fs2
+ log_must fill_fs /$TESTPOOL/fs2 1 256 10 1024 R
+
+ log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
+ log_must fill_fs /$TESTPOOL/fs3 1 256 10 1024 R
+
+ log_must zpool export $TESTPOOL
+ log_must zpool import -o cachefile=none -d $dir $TESTPOOL
+
+ log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+ for nspare in 0 1 2; do
+ damaged=$dir/dev-${nspare}
+ spare=draid${nparity}-0-${nspare}
+
+ log_must zpool export $TESTPOOL
+ log_must dd conv=notrunc if=/dev/zero of=$damaged \
+ bs=1M seek=4 count=$(($dev_size_mb-4))
+ log_must zpool import -o cachefile=none -d $dir $TESTPOOL
+
+ log_must zpool replace -fsw $TESTPOOL $damaged $spare
+
+ # Scrub the pool after the sequential resilver and verify
+ # that the silent damage was repaired by the scrub.
+ log_must zpool scrub -w $TESTPOOL
+ log_must zpool status $TESTPOOL
+ log_must check_pool_status $TESTPOOL "errors" \
+ "No known data errors"
+ log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+ log_mustnot check_pool_status $TESTPOOL "scan" "repaired 0B"
+ done
+
+ for nspare in 0 1 2; do
+ log_must check_vdev_state $TESTPOOL \
+ spare-${nspare} "ONLINE"
+ log_must check_vdev_state $TESTPOOL \
+ ${dir}/dev-${nspare} "ONLINE"
+ log_must check_vdev_state $TESTPOOL \
+ draid${nparity}-0-${nspare} "ONLINE"
+ done
+
+ # Detach the distributed spares and scrub the pool again to
+ # verify no damage remained on the originally corrupted vdevs.
+ for nspare in 0 1 2; do
+ log_must zpool detach $TESTPOOL draid${nparity}-0-${nspare}
+ done
+
+ log_must zpool clear $TESTPOOL
+ log_must zpool scrub -w $TESTPOOL
+ log_must zpool status $TESTPOOL
+
+ log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+ log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+ log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+
+ log_must zpool destroy "$TESTPOOL"
+done
+
+log_pass "draid damaged device scrub test succeeded."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
index 793904db91ca..c0c7b682def9 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
@@ -133,6 +133,14 @@ recv_cksum=$(md5digest /$ds/$TESTFILE0)
log_must test "$recv_cksum" == "$cksum"
log_must zfs destroy -r $ds
+# Test that we can override sharesmb property for encrypted raw stream.
+log_note "Must be able to override sharesmb property for encrypted raw stream"
+ds=$TESTPOOL/recv
+log_must eval "zfs send -w $esnap > $sendfile"
+log_must eval "zfs recv -o sharesmb=on $ds < $sendfile"
+log_must test "$(get_prop 'sharesmb' $ds)" == "on"
+log_must zfs destroy -r $ds
+
# Test that we can override encryption properties on a properties stream
# of an unencrypted dataset, turning it into an encryption root.
log_note "Must be able to receive stream with props as encryption root"
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index a10ec47d9f45..6f54dfc699a4 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -85,6 +85,9 @@
#define HAVE_AVX512VL 1
#endif
+/* bdevname() is available */
+/* #undef HAVE_BDEVNAME */
+
/* bdev_check_media_change() exists */
/* #undef HAVE_BDEV_CHECK_MEDIA_CHANGE */
@@ -163,6 +166,9 @@
/* blk_alloc_queue_rh() expects request function */
/* #undef HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH */
+/* blk_cleanup_disk() exists */
+/* #undef HAVE_BLK_CLEANUP_DISK */
+
/* blk queue backing_dev_info is dynamic */
/* #undef HAVE_BLK_QUEUE_BDI_DYNAMIC */
@@ -341,6 +347,9 @@
/* Define if you have the iconv() function and it works. */
#define HAVE_ICONV 1
+/* Define if compiler supports -Winfinite-recursion */
+/* #undef HAVE_INFINITE_RECURSION */
+
/* yes */
/* #undef HAVE_INODE_LOCK_SHARED */
@@ -500,9 +509,6 @@
/* Noting that make_request_fn() returns void */
/* #undef HAVE_MAKE_REQUEST_FN_RET_VOID */
-/* Define to 1 if you have the <memory.h> header file. */
-#define HAVE_MEMORY_H 1
-
/* iops->mkdir() takes umode_t */
/* #undef HAVE_MKDIR_UMODE_T */
@@ -566,6 +572,9 @@
/* qat is enabled and existed */
/* #undef HAVE_QAT */
+/* register_shrinker is vararg */
+/* #undef HAVE_REGISTER_SHRINKER_VARARG */
+
/* iops->rename() wants flags */
/* #undef HAVE_RENAME_WANTS_FLAGS */
@@ -667,6 +676,9 @@
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
+/* Define to 1 if you have the <stdio.h> header file. */
+#define HAVE_STDIO_H 1
+
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
@@ -784,6 +796,9 @@
/* xattr_handler->get() wants both dentry and inode */
/* #undef HAVE_XATTR_GET_DENTRY_INODE */
+/* xattr_handler->get() wants dentry and inode and flags */
+/* #undef HAVE_XATTR_GET_DENTRY_INODE_FLAGS */
+
/* xattr_handler->get() wants xattr_handler */
/* #undef HAVE_XATTR_GET_HANDLER */
@@ -920,7 +935,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.1.5-FreeBSD_g6c3c5fcfb"
+#define ZFS_META_ALIAS "zfs-2.1.6-FreeBSD_g6a6bd4939"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -929,7 +944,7 @@
/* #undef ZFS_META_DATA */
/* Define the maximum compatible kernel version. */
-#define ZFS_META_KVER_MAX "5.18"
+#define ZFS_META_KVER_MAX "5.19"
/* Define the minimum compatible kernel version. */
#define ZFS_META_KVER_MIN "3.10"
@@ -950,10 +965,10 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "FreeBSD_g6c3c5fcfb"
+#define ZFS_META_RELEASE "FreeBSD_g6a6bd4939"
/* Define the project version. */
-#define ZFS_META_VERSION "2.1.5"
+#define ZFS_META_VERSION "2.1.6"
/* count is located in percpu_ref.data */
/* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 7ddbb3bedc5c..a801a27c589a 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1,4 +1 @@
-/*
- * $FreeBSD$
- */
-#define ZFS_META_GITREV "zfs-2.1.5-0-g6c3c5fcfb"
+#define ZFS_META_GITREV "zfs-2.1.6-0-g6a6bd4939"