aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2023-10-08 07:58:51 +0000
committerMartin Matuska <mm@FreeBSD.org>2023-10-08 08:14:19 +0000
commitfdc38bc6cd28a56fbc82d6ca1d99f47569070b3a (patch)
treeaa04fe7c375b94182c88904d82d503d14765305a
parentf69181e9de1b021f4689ce50b420f9c694268ec8 (diff)
parent2407f30bda96f7d61a32fc38c638b3eb5b216284 (diff)
downloadsrc-fdc38bc6cd28a56fbc82d6ca1d99f47569070b3a.tar.gz
src-fdc38bc6cd28a56fbc82d6ca1d99f47569070b3a.zip
zfs: merge openzfs/zfs@2407f30bd (zfs-2.2-release) into stable/14
Notable upstream pull request merges: #15290 33d7c2d16 import: require force when cachefile hostid doesn't match on-disk #15319 bcd010d3a Reduce number of metaslab preload taskq threads #15339 1611b8e56 Add BTI landing pads to the AArch64 SHA2 assembly #15340 bc77a0c85 ARC: Remove b_cv from struct l1arc_buf_hdr #15347 3158b5d71 ARC: Drop different size headers for crypto #15350 ba7797c8d ARC: Remove b_bufcnt/b_ebufcnt from ARC headers #15353 9be8ddfb3 ZIL: Reduce maximum size of WR_COPIED to 7.5K #15362 8495536f7 zfsconcepts: add description of block cloning Obtained from: OpenZFS OpenZFS commit: 2407f30bda96f7d61a32fc38c638b3eb5b216284 OpenZFS tag: zfs-2.2.0-rc5
-rw-r--r--sys/contrib/openzfs/.cirrus.yml21
-rw-r--r--sys/contrib/openzfs/.gitignore2
-rw-r--r--sys/contrib/openzfs/META2
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_main.c23
-rw-r--r--sys/contrib/openzfs/config/zfs-build.m43
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h12
-rw-r--r--sys/contrib/openzfs/include/sys/arc_impl.h10
-rw-r--r--sys/contrib/openzfs/include/sys/metaslab_impl.h1
-rw-r--r--sys/contrib/openzfs/include/sys/spa_impl.h4
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.411
-rw-r--r--sys/contrib/openzfs/man/man7/zfsconcepts.740
-rw-r--r--sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S3
-rw-r--r--sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S2
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c22
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c322
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c23
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c46
-rw-r--r--sys/contrib/openzfs/module/zfs/zil.c17
-rw-r--r--sys/contrib/openzfs/rpm/generic/zfs.spec.in2
-rw-r--r--sys/contrib/openzfs/tests/runfiles/common.run4
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am4
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg5
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib1
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed.ksh59
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile.ksh65
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile_unclean_export.ksh75
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_unclean_export.ksh70
-rw-r--r--sys/modules/zfs/zfs_config.h4
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
30 files changed, 521 insertions, 336 deletions
diff --git a/sys/contrib/openzfs/.cirrus.yml b/sys/contrib/openzfs/.cirrus.yml
new file mode 100644
index 000000000000..18b292289e20
--- /dev/null
+++ b/sys/contrib/openzfs/.cirrus.yml
@@ -0,0 +1,21 @@
+env:
+ CIRRUS_CLONE_DEPTH: 1
+ ARCH: amd64
+
+build_task:
+ matrix:
+ freebsd_instance:
+ image_family: freebsd-12-4
+ freebsd_instance:
+ image_family: freebsd-13-2
+ freebsd_instance:
+ image_family: freebsd-14-0-snap
+ prepare_script:
+ - pkg install -y autoconf automake libtool gettext-runtime gmake ksh93 py39-packaging py39-cffi py39-sysctl
+ configure_script:
+ - env MAKE=gmake ./autogen.sh
+ - env MAKE=gmake ./configure --with-config="user" --with-python=3.9
+ build_script:
+ - gmake -j `sysctl -n kern.smp.cpus`
+ install_script:
+ - gmake install
diff --git a/sys/contrib/openzfs/.gitignore b/sys/contrib/openzfs/.gitignore
index 8d91dd9466c5..1ef47d921c28 100644
--- a/sys/contrib/openzfs/.gitignore
+++ b/sys/contrib/openzfs/.gitignore
@@ -42,6 +42,7 @@
!udev/**
!.editorconfig
+!.cirrus.yml
!.gitignore
!.gitmodules
!AUTHORS
@@ -60,7 +61,6 @@
!TEST
!zfs.release.in
-
#
# Normal rules
#
diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META
index 9ffe90458dbd..4178f1b5daa4 100644
--- a/sys/contrib/openzfs/META
+++ b/sys/contrib/openzfs/META
@@ -2,7 +2,7 @@ Meta: 1
Name: zfs
Branch: 1.0
Version: 2.2.0
-Release: rc4
+Release: rc5
Release-Tags: relext
License: CDDL
Author: OpenZFS
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
index d64fdfa5ba4c..5507f9d3fd67 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -3122,12 +3122,21 @@ zfs_force_import_required(nvlist_t *config)
nvlist_t *nvinfo;
state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE);
- (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
+ nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
+
+ /*
+ * The hostid on LOAD_INFO comes from the MOS label via
+ * spa_tryimport(). If its not there then we're likely talking to an
+ * older kernel, so use the top one, which will be from the label
+ * discovered in zpool_find_import(), or if a cachefile is in use, the
+ * local hostid.
+ */
+ if (nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_HOSTID, &hostid) != 0)
+ nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid())
return (B_TRUE);
- nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) {
mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo,
ZPOOL_CONFIG_MMP_STATE);
@@ -3198,7 +3207,10 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
time_t timestamp = 0;
uint64_t hostid = 0;
- if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME))
+ if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTNAME))
+ hostname = fnvlist_lookup_string(nvinfo,
+ ZPOOL_CONFIG_HOSTNAME);
+ else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME))
hostname = fnvlist_lookup_string(config,
ZPOOL_CONFIG_HOSTNAME);
@@ -3206,7 +3218,10 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
timestamp = fnvlist_lookup_uint64(config,
ZPOOL_CONFIG_TIMESTAMP);
- if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID))
+ if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTID))
+ hostid = fnvlist_lookup_uint64(nvinfo,
+ ZPOOL_CONFIG_HOSTID);
+ else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID))
hostid = fnvlist_lookup_uint64(config,
ZPOOL_CONFIG_HOSTID);
diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4
index 5ea6aa29a3de..e4197dc1424e 100644
--- a/sys/contrib/openzfs/config/zfs-build.m4
+++ b/sys/contrib/openzfs/config/zfs-build.m4
@@ -358,6 +358,9 @@ AC_DEFUN([ZFS_AC_RPM], [
AS_IF([test -n "$udevruledir" ], [
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevruledir $(udevruledir)"'
])
+ AS_IF([test -n "$bashcompletiondir" ], [
+ RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_bashcompletiondir $(bashcompletiondir)"'
+ ])
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_SYSTEMD)'
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYZFS)'
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PAM)'
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h
index c494f48bb48b..f749223daa72 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h
@@ -51,7 +51,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__array(uint64_t, hdr_dva_word, 2)
__field(uint64_t, hdr_birth)
__field(uint32_t, hdr_flags)
- __field(uint32_t, hdr_bufcnt)
__field(arc_buf_contents_t, hdr_type)
__field(uint16_t, hdr_psize)
__field(uint16_t, hdr_lsize)
@@ -70,7 +69,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__entry->hdr_dva_word[1] = ab->b_dva.dva_word[1];
__entry->hdr_birth = ab->b_birth;
__entry->hdr_flags = ab->b_flags;
- __entry->hdr_bufcnt = ab->b_l1hdr.b_bufcnt;
__entry->hdr_psize = ab->b_psize;
__entry->hdr_lsize = ab->b_lsize;
__entry->hdr_spa = ab->b_spa;
@@ -84,12 +82,12 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__entry->hdr_refcount = ab->b_l1hdr.b_refcnt.rc_count;
),
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
- "flags 0x%x bufcnt %u type %u psize %u lsize %u spa %llu "
+ "flags 0x%x type %u psize %u lsize %u spa %llu "
"state_type %u access %lu mru_hits %u mru_ghost_hits %u "
"mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
__entry->hdr_birth, __entry->hdr_flags,
- __entry->hdr_bufcnt, __entry->hdr_type, __entry->hdr_psize,
+ __entry->hdr_type, __entry->hdr_psize,
__entry->hdr_lsize, __entry->hdr_spa, __entry->hdr_state_type,
__entry->hdr_access, __entry->hdr_mru_hits,
__entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits,
@@ -192,7 +190,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__array(uint64_t, hdr_dva_word, 2)
__field(uint64_t, hdr_birth)
__field(uint32_t, hdr_flags)
- __field(uint32_t, hdr_bufcnt)
__field(arc_buf_contents_t, hdr_type)
__field(uint16_t, hdr_psize)
__field(uint16_t, hdr_lsize)
@@ -223,7 +220,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1];
__entry->hdr_birth = hdr->b_birth;
__entry->hdr_flags = hdr->b_flags;
- __entry->hdr_bufcnt = hdr->b_l1hdr.b_bufcnt;
__entry->hdr_psize = hdr->b_psize;
__entry->hdr_lsize = hdr->b_lsize;
__entry->hdr_spa = hdr->b_spa;
@@ -255,7 +251,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__entry->zb_blkid = zb->zb_blkid;
),
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
- "flags 0x%x bufcnt %u psize %u lsize %u spa %llu state_type %u "
+ "flags 0x%x psize %u lsize %u spa %llu state_type %u "
"access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
"mfu_ghost_hits %u l2_hits %u refcount %lli } "
"bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
@@ -264,7 +260,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
"blkid %llu }",
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
__entry->hdr_birth, __entry->hdr_flags,
- __entry->hdr_bufcnt, __entry->hdr_psize, __entry->hdr_lsize,
+ __entry->hdr_psize, __entry->hdr_lsize,
__entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
__entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
__entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h
index 78774792f367..adff42c55d05 100644
--- a/sys/contrib/openzfs/include/sys/arc_impl.h
+++ b/sys/contrib/openzfs/include/sys/arc_impl.h
@@ -159,10 +159,6 @@ struct arc_write_callback {
* these two allocation states.
*/
typedef struct l1arc_buf_hdr {
- /* for waiting on reads to complete */
- kcondvar_t b_cv;
- uint8_t b_byteswap;
-
/* protected by arc state mutex */
arc_state_t *b_state;
multilist_node_t b_arc_node;
@@ -173,7 +169,7 @@ typedef struct l1arc_buf_hdr {
uint32_t b_mru_ghost_hits;
uint32_t b_mfu_hits;
uint32_t b_mfu_ghost_hits;
- uint32_t b_bufcnt;
+ uint8_t b_byteswap;
arc_buf_t *b_buf;
/* self protecting */
@@ -436,12 +432,12 @@ typedef struct l2arc_dev {
*/
typedef struct arc_buf_hdr_crypt {
abd_t *b_rabd; /* raw encrypted data */
- dmu_object_type_t b_ot; /* object type */
- uint32_t b_ebufcnt; /* count of encrypted buffers */
/* dsobj for looking up encryption key for l2arc encryption */
uint64_t b_dsobj;
+ dmu_object_type_t b_ot; /* object type */
+
/* encryption parameters */
uint8_t b_salt[ZIO_DATA_SALT_LEN];
uint8_t b_iv[ZIO_DATA_IV_LEN];
diff --git a/sys/contrib/openzfs/include/sys/metaslab_impl.h b/sys/contrib/openzfs/include/sys/metaslab_impl.h
index d328068890cc..4f434291ddbf 100644
--- a/sys/contrib/openzfs/include/sys/metaslab_impl.h
+++ b/sys/contrib/openzfs/include/sys/metaslab_impl.h
@@ -250,7 +250,6 @@ struct metaslab_group {
int64_t mg_activation_count;
metaslab_class_t *mg_class;
vdev_t *mg_vd;
- taskq_t *mg_taskq;
metaslab_group_t *mg_prev;
metaslab_group_t *mg_next;
diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h
index 588c72f6e4fa..cdf65c371337 100644
--- a/sys/contrib/openzfs/include/sys/spa_impl.h
+++ b/sys/contrib/openzfs/include/sys/spa_impl.h
@@ -423,7 +423,9 @@ struct spa {
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
+ taskq_t *spa_metaslab_taskq; /* Taskq for metaslab preload */
taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */
+ taskq_t *spa_upgrade_taskq; /* Taskq for upgrade jobs */
uint64_t spa_multihost; /* multihost aware (mmp) */
mmp_thread_t spa_mmp; /* multihost mmp thread */
list_t spa_leaf_list; /* list of leaf vdevs */
@@ -447,8 +449,6 @@ struct spa {
*/
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
zfs_refcount_t spa_refcount; /* number of opens */
-
- taskq_t *spa_upgrade_taskq; /* taskq for upgrade jobs */
};
extern char *spa_config_path;
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index 3843419731b8..cfadd79d87f3 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -402,6 +402,12 @@ Practical upper limit of total metaslabs per top-level vdev.
.It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Enable metaslab group preloading.
.
+.It Sy metaslab_preload_limit Ns = Ns Sy 10 Pq uint
+Maximum number of metaslabs per group to preload
+.
+.It Sy metaslab_preload_pct Ns = Ns Sy 50 Pq uint
+Percentage of CPUs to run a metaslab preload taskq
+.
.It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Give more weight to metaslabs with lower LBAs,
assuming they have greater bandwidth,
@@ -2144,6 +2150,11 @@ On very fragmented pools, lowering this
.Pq typically to Sy 36 KiB
can improve performance.
.
+.It Sy zil_maxcopied Ns = Ns Sy 7680 Ns B Po 7.5 KiB Pc Pq uint
+This sets the maximum number of write bytes logged via WR_COPIED.
+It tunes a tradeoff between additional memory copy and possibly worse log
+space efficiency vs additional range lock/unlock.
+.
.It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64
This sets the minimum delay in nanoseconds ZIL care to delay block commit,
waiting for more records.
diff --git a/sys/contrib/openzfs/man/man7/zfsconcepts.7 b/sys/contrib/openzfs/man/man7/zfsconcepts.7
index 18a9e9b5cafe..1be3d961c3d7 100644
--- a/sys/contrib/openzfs/man/man7/zfsconcepts.7
+++ b/sys/contrib/openzfs/man/man7/zfsconcepts.7
@@ -28,8 +28,9 @@
.\" Copyright 2019 Richard Laager. All rights reserved.
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
+.\" Copyright 2023 Klara, Inc.
.\"
-.Dd June 30, 2019
+.Dd October 6, 2023
.Dt ZFSCONCEPTS 7
.Os
.
@@ -205,3 +206,40 @@ practices, such as regular backups.
Consider using the
.Sy compression
property as a less resource-intensive alternative.
+.Ss Block cloning
+Block cloning is a facility that allows a file (or parts of a file) to be
+.Qq cloned ,
+that is, a shallow copy made where the existing data blocks are referenced
+rather than copied.
+Later modifications to the data will cause a copy of the data block to be taken
+and that copy modified.
+This facility is used to implement
+.Qq reflinks
+or
+.Qq file-level copy-on-write .
+.Pp
+Cloned blocks are tracked in a special on-disk structure called the Block
+Reference Table
+.Po BRT
+.Pc .
+Unlike deduplication, this table has minimal overhead, so can be enabled at all
+times.
+.Pp
+Also unlike deduplication, cloning must be requested by a user program.
+Many common file copying programs, including newer versions of
+.Nm /bin/cp ,
+will try to create clones automatically.
+Look for
+.Qq clone ,
+.Qq dedupe
+or
+.Qq reflink
+in the documentation for more information.
+.Pp
+There are some limitations to block cloning.
+Only whole blocks can be cloned, and blocks can not be cloned if they are not
+yet written to disk, or if they are encrypted, or the source and destination
+.Sy recordsize
+properties differ.
+The OS may add additional restrictions;
+for example, most versions of Linux will not allow clones across datasets.
diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S
index fa50c4e74d59..7ae486e4e229 100644
--- a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S
+++ b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S
@@ -49,6 +49,7 @@
.type zfs_sha256_block_armv7,%function
.align 6
zfs_sha256_block_armv7:
+ hint #34 // bti c
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@@ -1015,6 +1016,7 @@ zfs_sha256_block_armv7:
.type zfs_sha256_block_armv8,%function
.align 6
zfs_sha256_block_armv8:
+ hint #34 // bti c
.Lv8_entry:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@@ -1155,6 +1157,7 @@ zfs_sha256_block_armv8:
.type zfs_sha256_block_neon,%function
.align 4
zfs_sha256_block_neon:
+ hint #34 // bti c
.Lneon_entry:
stp x29, x30, [sp, #-16]!
mov x29, sp
diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S
index 1683fc1ca53c..9c61eeee4d7b 100644
--- a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S
+++ b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S
@@ -73,6 +73,7 @@
.type zfs_sha512_block_armv7,%function
.align 6
zfs_sha512_block_armv7:
+ hint #34 // bti c
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@@ -1040,6 +1041,7 @@ zfs_sha512_block_armv7:
.type zfs_sha512_block_armv8,%function
.align 6
zfs_sha512_block_armv8:
+ hint #34 // bti c
.Lv8_entry:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later
stp x29,x30,[sp,#-16]!
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index 8ae2f23c3ecf..38ef590702cb 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -596,28 +596,6 @@ SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct,
" space map to continue allocations in a first-fit fashion");
/* END CSTYLED */
-/*
- * Percentage of all cpus that can be used by the metaslab taskq.
- */
-extern int metaslab_load_pct;
-
-/* BEGIN CSTYLED */
-SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct,
- CTLFLAG_RWTUN, &metaslab_load_pct, 0,
- "Percentage of cpus that can be used by the metaslab taskq");
-/* END CSTYLED */
-
-/*
- * Max number of metaslabs per group to preload.
- */
-extern uint_t metaslab_preload_limit;
-
-/* BEGIN CSTYLED */
-SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, preload_limit,
- CTLFLAG_RWTUN, &metaslab_preload_limit, 0,
- "Max number of metaslabs per group to preload");
-/* END CSTYLED */
-
/* mmp.c */
int
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 22dc0ed5e3b6..b5946e7604c0 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -748,8 +748,7 @@ taskq_t *arc_prune_taskq;
* Other sizes
*/
-#define HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
-#define HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr))
+#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
#define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
/*
@@ -1113,7 +1112,6 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
*/
static kmem_cache_t *hdr_full_cache;
-static kmem_cache_t *hdr_full_crypt_cache;
static kmem_cache_t *hdr_l2only_cache;
static kmem_cache_t *buf_cache;
@@ -1134,7 +1132,6 @@ buf_fini(void)
for (int i = 0; i < BUF_LOCKS; i++)
mutex_destroy(BUF_HASH_LOCK(i));
kmem_cache_destroy(hdr_full_cache);
- kmem_cache_destroy(hdr_full_crypt_cache);
kmem_cache_destroy(hdr_l2only_cache);
kmem_cache_destroy(buf_cache);
}
@@ -1151,7 +1148,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
memset(hdr, 0, HDR_FULL_SIZE);
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
- cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
zfs_refcount_create(&hdr->b_l1hdr.b_refcnt);
#ifdef ZFS_DEBUG
mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -1164,19 +1160,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
}
static int
-hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag)
-{
- (void) unused;
- arc_buf_hdr_t *hdr = vbuf;
-
- hdr_full_cons(vbuf, unused, kmflag);
- memset(&hdr->b_crypt_hdr, 0, sizeof (hdr->b_crypt_hdr));
- arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
-
- return (0);
-}
-
-static int
hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
{
(void) unused, (void) kmflag;
@@ -1211,7 +1194,6 @@ hdr_full_dest(void *vbuf, void *unused)
arc_buf_hdr_t *hdr = vbuf;
ASSERT(HDR_EMPTY(hdr));
- cv_destroy(&hdr->b_l1hdr.b_cv);
zfs_refcount_destroy(&hdr->b_l1hdr.b_refcnt);
#ifdef ZFS_DEBUG
mutex_destroy(&hdr->b_l1hdr.b_freeze_lock);
@@ -1221,16 +1203,6 @@ hdr_full_dest(void *vbuf, void *unused)
}
static void
-hdr_full_crypt_dest(void *vbuf, void *unused)
-{
- (void) vbuf, (void) unused;
-
- hdr_full_dest(vbuf, unused);
- arc_space_return(sizeof (((arc_buf_hdr_t *)NULL)->b_crypt_hdr),
- ARC_SPACE_HDRS);
-}
-
-static void
hdr_l2only_dest(void *vbuf, void *unused)
{
(void) unused;
@@ -1285,9 +1257,6 @@ retry:
hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0);
- hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt",
- HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest,
- NULL, NULL, NULL, 0);
hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL,
NULL, NULL, 0);
@@ -1995,7 +1964,6 @@ arc_buf_untransform_in_place(arc_buf_t *buf)
arc_buf_size(buf));
buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED;
buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED;
- hdr->b_crypt_hdr.b_ebufcnt -= 1;
}
/*
@@ -2230,7 +2198,6 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(state)) {
- ASSERT0(hdr->b_l1hdr.b_bufcnt);
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
ASSERT(!HDR_HAS_RABD(hdr));
@@ -2270,7 +2237,6 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(state)) {
- ASSERT0(hdr->b_l1hdr.b_bufcnt);
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
ASSERT(!HDR_HAS_RABD(hdr));
@@ -2386,7 +2352,9 @@ arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
l2hdr = &hdr->b_l2hdr;
if (l1hdr) {
- abi->abi_bufcnt = l1hdr->b_bufcnt;
+ abi->abi_bufcnt = 0;
+ for (arc_buf_t *buf = l1hdr->b_buf; buf; buf = buf->b_next)
+ abi->abi_bufcnt++;
abi->abi_access = l1hdr->b_arc_access;
abi->abi_mru_hits = l1hdr->b_mru_hits;
abi->abi_mru_ghost_hits = l1hdr->b_mru_ghost_hits;
@@ -2414,7 +2382,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
{
arc_state_t *old_state;
int64_t refcnt;
- uint32_t bufcnt;
boolean_t update_old, update_new;
arc_buf_contents_t type = arc_buf_type(hdr);
@@ -2428,19 +2395,16 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
if (HDR_HAS_L1HDR(hdr)) {
old_state = hdr->b_l1hdr.b_state;
refcnt = zfs_refcount_count(&hdr->b_l1hdr.b_refcnt);
- bufcnt = hdr->b_l1hdr.b_bufcnt;
- update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL ||
- HDR_HAS_RABD(hdr));
+ update_old = (hdr->b_l1hdr.b_buf != NULL ||
+ hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
- IMPLY(GHOST_STATE(old_state), bufcnt == 0);
- IMPLY(GHOST_STATE(new_state), bufcnt == 0);
IMPLY(GHOST_STATE(old_state), hdr->b_l1hdr.b_buf == NULL);
IMPLY(GHOST_STATE(new_state), hdr->b_l1hdr.b_buf == NULL);
- IMPLY(old_state == arc_anon, bufcnt <= 1);
+ IMPLY(old_state == arc_anon, hdr->b_l1hdr.b_buf == NULL ||
+ ARC_BUF_LAST(hdr->b_l1hdr.b_buf));
} else {
old_state = arc_l2c_only;
refcnt = 0;
- bufcnt = 0;
update_old = B_FALSE;
}
update_new = update_old;
@@ -2488,14 +2452,12 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
if (update_new && new_state != arc_l2c_only) {
ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(new_state)) {
- ASSERT0(bufcnt);
/*
* When moving a header to a ghost state, we first
- * remove all arc buffers. Thus, we'll have a
- * bufcnt of zero, and no arc buffer to use for
- * the reference. As a result, we use the arc
- * header pointer for the reference.
+ * remove all arc buffers. Thus, we'll have no arc
+ * buffer to use for the reference. As a result, we
+ * use the arc header pointer for the reference.
*/
(void) zfs_refcount_add_many(
&new_state->arcs_size[type],
@@ -2503,7 +2465,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
ASSERT(!HDR_HAS_RABD(hdr));
} else {
- uint32_t buffers = 0;
/*
* Each individual buffer holds a unique reference,
@@ -2512,8 +2473,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
*/
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
buf = buf->b_next) {
- ASSERT3U(bufcnt, !=, 0);
- buffers++;
/*
* When the arc_buf_t is sharing the data
@@ -2529,7 +2488,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
&new_state->arcs_size[type],
arc_buf_size(buf), buf);
}
- ASSERT3U(bufcnt, ==, buffers);
if (hdr->b_l1hdr.b_pabd != NULL) {
(void) zfs_refcount_add_many(
@@ -2548,7 +2506,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
if (update_old && old_state != arc_l2c_only) {
ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(old_state)) {
- ASSERT0(bufcnt);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
ASSERT(!HDR_HAS_RABD(hdr));
@@ -2564,7 +2521,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
&old_state->arcs_size[type],
HDR_GET_LSIZE(hdr), hdr);
} else {
- uint32_t buffers = 0;
/*
* Each individual buffer holds a unique reference,
@@ -2573,8 +2529,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
*/
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
buf = buf->b_next) {
- ASSERT3U(bufcnt, !=, 0);
- buffers++;
/*
* When the arc_buf_t is sharing the data
@@ -2590,7 +2544,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
&old_state->arcs_size[type],
arc_buf_size(buf), buf);
}
- ASSERT3U(bufcnt, ==, buffers);
ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
HDR_HAS_RABD(hdr));
@@ -2838,9 +2791,6 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
VERIFY3P(buf->b_data, !=, NULL);
hdr->b_l1hdr.b_buf = buf;
- hdr->b_l1hdr.b_bufcnt += 1;
- if (encrypted)
- hdr->b_crypt_hdr.b_ebufcnt += 1;
/*
* If the user wants the data from the hdr, we need to either copy or
@@ -3082,8 +3032,6 @@ arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf)
}
buf->b_next = NULL;
ASSERT3P(lastbuf, !=, buf);
- IMPLY(hdr->b_l1hdr.b_bufcnt > 0, lastbuf != NULL);
- IMPLY(hdr->b_l1hdr.b_bufcnt > 0, hdr->b_l1hdr.b_buf != NULL);
IMPLY(lastbuf != NULL, ARC_BUF_LAST(lastbuf));
return (lastbuf);
@@ -3122,22 +3070,20 @@ arc_buf_destroy_impl(arc_buf_t *buf)
}
buf->b_data = NULL;
- ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
- hdr->b_l1hdr.b_bufcnt -= 1;
-
- if (ARC_BUF_ENCRYPTED(buf)) {
- hdr->b_crypt_hdr.b_ebufcnt -= 1;
-
- /*
- * If we have no more encrypted buffers and we've
- * already gotten a copy of the decrypted data we can
- * free b_rabd to save some space.
- */
- if (hdr->b_crypt_hdr.b_ebufcnt == 0 &&
- HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd != NULL &&
- !HDR_IO_IN_PROGRESS(hdr)) {
- arc_hdr_free_abd(hdr, B_TRUE);
+ /*
+ * If we have no more encrypted buffers and we've already
+ * gotten a copy of the decrypted data we can free b_rabd
+ * to save some space.
+ */
+ if (ARC_BUF_ENCRYPTED(buf) && HDR_HAS_RABD(hdr) &&
+ hdr->b_l1hdr.b_pabd != NULL && !HDR_IO_IN_PROGRESS(hdr)) {
+ arc_buf_t *b;
+ for (b = hdr->b_l1hdr.b_buf; b; b = b->b_next) {
+ if (b != buf && ARC_BUF_ENCRYPTED(b))
+ break;
}
+ if (b == NULL)
+ arc_hdr_free_abd(hdr, B_TRUE);
}
}
@@ -3298,11 +3244,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
arc_buf_hdr_t *hdr;
VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
- if (protected) {
- hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE);
- } else {
- hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
- }
+ hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
ASSERT(HDR_EMPTY(hdr));
#ifdef ZFS_DEBUG
@@ -3325,7 +3267,6 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
hdr->b_l1hdr.b_mru_ghost_hits = 0;
hdr->b_l1hdr.b_mfu_hits = 0;
hdr->b_l1hdr.b_mfu_ghost_hits = 0;
- hdr->b_l1hdr.b_bufcnt = 0;
hdr->b_l1hdr.b_buf = NULL;
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
@@ -3351,16 +3292,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
(old == hdr_l2only_cache && new == hdr_full_cache));
- /*
- * if the caller wanted a new full header and the header is to be
- * encrypted we will actually allocate the header from the full crypt
- * cache instead. The same applies to freeing from the old cache.
- */
- if (HDR_PROTECTED(hdr) && new == hdr_full_cache)
- new = hdr_full_crypt_cache;
- if (HDR_PROTECTED(hdr) && old == hdr_full_cache)
- old = hdr_full_crypt_cache;
-
nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
@@ -3368,7 +3299,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
memcpy(nhdr, hdr, HDR_L2ONLY_SIZE);
- if (new == hdr_full_cache || new == hdr_full_crypt_cache) {
+ if (new == hdr_full_cache) {
arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR);
/*
* arc_access and arc_change_state need to be aware that a
@@ -3382,7 +3313,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
ASSERT(!HDR_HAS_RABD(hdr));
} else {
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
- ASSERT0(hdr->b_l1hdr.b_bufcnt);
#ifdef ZFS_DEBUG
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
#endif
@@ -3449,126 +3379,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
}
/*
- * This function allows an L1 header to be reallocated as a crypt
- * header and vice versa. If we are going to a crypt header, the
- * new fields will be zeroed out.
- */
-static arc_buf_hdr_t *
-arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
-{
- arc_buf_hdr_t *nhdr;
- arc_buf_t *buf;
- kmem_cache_t *ncache, *ocache;
-
- /*
- * This function requires that hdr is in the arc_anon state.
- * Therefore it won't have any L2ARC data for us to worry
- * about copying.
- */
- ASSERT(HDR_HAS_L1HDR(hdr));
- ASSERT(!HDR_HAS_L2HDR(hdr));
- ASSERT3U(!!HDR_PROTECTED(hdr), !=, need_crypt);
- ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
- ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
- ASSERT(!list_link_active(&hdr->b_l2hdr.b_l2node));
- ASSERT3P(hdr->b_hash_next, ==, NULL);
-
- if (need_crypt) {
- ncache = hdr_full_crypt_cache;
- ocache = hdr_full_cache;
- } else {
- ncache = hdr_full_cache;
- ocache = hdr_full_crypt_cache;
- }
-
- nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE);
-
- /*
- * Copy all members that aren't locks or condvars to the new header.
- * No lists are pointing to us (as we asserted above), so we don't
- * need to worry about the list nodes.
- */
- nhdr->b_dva = hdr->b_dva;
- nhdr->b_birth = hdr->b_birth;
- nhdr->b_type = hdr->b_type;
- nhdr->b_flags = hdr->b_flags;
- nhdr->b_psize = hdr->b_psize;
- nhdr->b_lsize = hdr->b_lsize;
- nhdr->b_spa = hdr->b_spa;
-#ifdef ZFS_DEBUG
- nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum;
-#endif
- nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt;
- nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap;
- nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state;
- nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access;
- nhdr->b_l1hdr.b_mru_hits = hdr->b_l1hdr.b_mru_hits;
- nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
- nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
- nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
- nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
- nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
-
- /*
- * This zfs_refcount_add() exists only to ensure that the individual
- * arc buffers always point to a header that is referenced, avoiding
- * a small race condition that could trigger ASSERTs.
- */
- (void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG);
- nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf;
- for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next)
- buf->b_hdr = nhdr;
-
- zfs_refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt);
- (void) zfs_refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG);
- ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
-
- if (need_crypt) {
- arc_hdr_set_flags(nhdr, ARC_FLAG_PROTECTED);
- } else {
- arc_hdr_clear_flags(nhdr, ARC_FLAG_PROTECTED);
- }
-
- /* unset all members of the original hdr */
- memset(&hdr->b_dva, 0, sizeof (dva_t));
- hdr->b_birth = 0;
- hdr->b_type = 0;
- hdr->b_flags = 0;
- hdr->b_psize = 0;
- hdr->b_lsize = 0;
- hdr->b_spa = 0;
-#ifdef ZFS_DEBUG
- hdr->b_l1hdr.b_freeze_cksum = NULL;
-#endif
- hdr->b_l1hdr.b_buf = NULL;
- hdr->b_l1hdr.b_bufcnt = 0;
- hdr->b_l1hdr.b_byteswap = 0;
- hdr->b_l1hdr.b_state = NULL;
- hdr->b_l1hdr.b_arc_access = 0;
- hdr->b_l1hdr.b_mru_hits = 0;
- hdr->b_l1hdr.b_mru_ghost_hits = 0;
- hdr->b_l1hdr.b_mfu_hits = 0;
- hdr->b_l1hdr.b_mfu_ghost_hits = 0;
- hdr->b_l1hdr.b_acb = NULL;
- hdr->b_l1hdr.b_pabd = NULL;
-
- if (ocache == hdr_full_crypt_cache) {
- ASSERT(!HDR_HAS_RABD(hdr));
- hdr->b_crypt_hdr.b_ot = DMU_OT_NONE;
- hdr->b_crypt_hdr.b_ebufcnt = 0;
- hdr->b_crypt_hdr.b_dsobj = 0;
- memset(hdr->b_crypt_hdr.b_salt, 0, ZIO_DATA_SALT_LEN);
- memset(hdr->b_crypt_hdr.b_iv, 0, ZIO_DATA_IV_LEN);
- memset(hdr->b_crypt_hdr.b_mac, 0, ZIO_DATA_MAC_LEN);
- }
-
- buf_discard_identity(hdr);
- kmem_cache_free(ocache, hdr);
-
- return (nhdr);
-}
-
-/*
* This function is used by the send / receive code to convert a newly
* allocated arc_buf_t to one that is suitable for a raw encrypted write. It
* is also used to allow the root objset block to be updated without altering
@@ -3587,8 +3397,7 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED);
- if (!HDR_PROTECTED(hdr))
- hdr = arc_hdr_realloc_crypt(hdr, B_TRUE);
+ arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
hdr->b_crypt_hdr.b_dsobj = dsobj;
hdr->b_crypt_hdr.b_ot = ot;
hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
@@ -3789,8 +3598,6 @@ static void
arc_hdr_destroy(arc_buf_hdr_t *hdr)
{
if (HDR_HAS_L1HDR(hdr)) {
- ASSERT(hdr->b_l1hdr.b_buf == NULL ||
- hdr->b_l1hdr.b_bufcnt > 0);
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
}
@@ -3854,12 +3661,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
#ifdef ZFS_DEBUG
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
#endif
-
- if (!HDR_PROTECTED(hdr)) {
- kmem_cache_free(hdr_full_cache, hdr);
- } else {
- kmem_cache_free(hdr_full_crypt_cache, hdr);
- }
+ kmem_cache_free(hdr_full_cache, hdr);
} else {
kmem_cache_free(hdr_l2only_cache, hdr);
}
@@ -3871,7 +3673,8 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag)
arc_buf_hdr_t *hdr = buf->b_hdr;
if (hdr->b_l1hdr.b_state == arc_anon) {
- ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, buf);
+ ASSERT(ARC_BUF_LAST(buf));
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
VERIFY0(remove_reference(hdr, tag));
return;
@@ -3881,7 +3684,7 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag)
mutex_enter(hash_lock);
ASSERT3P(hdr, ==, buf->b_hdr);
- ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
+ ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
ASSERT3P(hdr->b_l1hdr.b_state, !=, arc_anon);
ASSERT3P(buf->b_data, !=, NULL);
@@ -3924,7 +3727,6 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- ASSERT0(hdr->b_l1hdr.b_bufcnt);
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
@@ -5586,13 +5388,6 @@ arc_read_done(zio_t *zio)
buf_hash_remove(hdr);
}
- /*
- * Broadcast before we drop the hash_lock to avoid the possibility
- * that the hdr (and hence the cv) might be freed before we get to
- * the cv_broadcast().
- */
- cv_broadcast(&hdr->b_l1hdr.b_cv);
-
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
(void) remove_reference(hdr, hdr);
@@ -5787,8 +5582,7 @@ top:
}
acb->acb_zio_head = head_zio;
acb->acb_next = hdr->b_l1hdr.b_acb;
- if (hdr->b_l1hdr.b_acb)
- hdr->b_l1hdr.b_acb->acb_prev = acb;
+ hdr->b_l1hdr.b_acb->acb_prev = acb;
hdr->b_l1hdr.b_acb = acb;
}
mutex_exit(hash_lock);
@@ -5928,8 +5722,28 @@ top:
* and so the performance impact shouldn't
* matter.
*/
- cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
+ arc_callback_t *acb = kmem_zalloc(
+ sizeof (arc_callback_t), KM_SLEEP);
+ acb->acb_wait = B_TRUE;
+ mutex_init(&acb->acb_wait_lock, NULL,
+ MUTEX_DEFAULT, NULL);
+ cv_init(&acb->acb_wait_cv, NULL, CV_DEFAULT,
+ NULL);
+ acb->acb_zio_head =
+ hdr->b_l1hdr.b_acb->acb_zio_head;
+ acb->acb_next = hdr->b_l1hdr.b_acb;
+ hdr->b_l1hdr.b_acb->acb_prev = acb;
+ hdr->b_l1hdr.b_acb = acb;
mutex_exit(hash_lock);
+ mutex_enter(&acb->acb_wait_lock);
+ while (acb->acb_wait) {
+ cv_wait(&acb->acb_wait_cv,
+ &acb->acb_wait_lock);
+ }
+ mutex_exit(&acb->acb_wait_lock);
+ mutex_destroy(&acb->acb_wait_lock);
+ cv_destroy(&acb->acb_wait_cv);
+ kmem_free(acb, sizeof (arc_callback_t));
goto top;
}
}
@@ -6310,7 +6124,8 @@ arc_release(arc_buf_t *buf, const void *tag)
ASSERT(!HDR_IN_HASH_TABLE(hdr));
ASSERT(!HDR_HAS_L2HDR(hdr));
- ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, buf);
+ ASSERT(ARC_BUF_LAST(buf));
ASSERT3S(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1);
ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
@@ -6361,7 +6176,7 @@ arc_release(arc_buf_t *buf, const void *tag)
/*
* Do we have more than one buf?
*/
- if (hdr->b_l1hdr.b_bufcnt > 1) {
+ if (hdr->b_l1hdr.b_buf != buf || !ARC_BUF_LAST(buf)) {
arc_buf_hdr_t *nhdr;
uint64_t spa = hdr->b_spa;
uint64_t psize = HDR_GET_PSIZE(hdr);
@@ -6442,10 +6257,6 @@ arc_release(arc_buf_t *buf, const void *tag)
arc_buf_size(buf), buf);
}
- hdr->b_l1hdr.b_bufcnt -= 1;
- if (ARC_BUF_ENCRYPTED(buf))
- hdr->b_crypt_hdr.b_ebufcnt -= 1;
-
arc_cksum_verify(buf);
arc_buf_unwatch(buf);
@@ -6458,15 +6269,11 @@ arc_release(arc_buf_t *buf, const void *tag)
nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
compress, hdr->b_complevel, type);
ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
- ASSERT0(nhdr->b_l1hdr.b_bufcnt);
ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
VERIFY3U(nhdr->b_type, ==, type);
ASSERT(!HDR_SHARED_DATA(nhdr));
nhdr->b_l1hdr.b_buf = buf;
- nhdr->b_l1hdr.b_bufcnt = 1;
- if (ARC_BUF_ENCRYPTED(buf))
- nhdr->b_crypt_hdr.b_ebufcnt = 1;
(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
buf->b_hdr = nhdr;
@@ -6517,7 +6324,7 @@ arc_write_ready(zio_t *zio)
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(!zfs_refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt));
- ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
+ ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
/*
* If we're reexecuting this zio because the pool suspended, then
@@ -6552,13 +6359,9 @@ arc_write_ready(zio_t *zio)
add_reference(hdr, hdr); /* For IO_IN_PROGRESS. */
}
- if (BP_IS_PROTECTED(bp) != !!HDR_PROTECTED(hdr))
- hdr = arc_hdr_realloc_crypt(hdr, BP_IS_PROTECTED(bp));
-
if (BP_IS_PROTECTED(bp)) {
/* ZIL blocks are written through zio_rewrite */
ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
- ASSERT(HDR_PROTECTED(hdr));
if (BP_SHOULD_BYTESWAP(bp)) {
if (BP_GET_LEVEL(bp) > 0) {
@@ -6571,11 +6374,14 @@ arc_write_ready(zio_t *zio)
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
}
+ arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
hdr->b_crypt_hdr.b_iv);
zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
+ } else {
+ arc_hdr_clear_flags(hdr, ARC_FLAG_PROTECTED);
}
/*
@@ -6656,7 +6462,8 @@ arc_write_ready(zio_t *zio)
} else {
ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd));
ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf));
- ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, buf);
+ ASSERT(ARC_BUF_LAST(buf));
arc_share_buf(hdr, buf);
}
@@ -6737,7 +6544,8 @@ arc_write_done(zio_t *zio)
(void *)hdr, (void *)exists);
} else {
/* Dedup */
- ASSERT(hdr->b_l1hdr.b_bufcnt == 1);
+ ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
+ ASSERT(ARC_BUF_LAST(hdr->b_l1hdr.b_buf));
ASSERT(hdr->b_l1hdr.b_state == arc_anon);
ASSERT(BP_GET_DEDUP(zio->io_bp));
ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
@@ -6778,7 +6586,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
ASSERT(!HDR_IO_ERROR(hdr));
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
- ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
+ ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
if (uncached)
arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED);
else if (l2arc)
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index cdf599b17924..599d7ffa0cf3 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -208,11 +208,6 @@ static const uint32_t metaslab_min_search_count = 100;
static int metaslab_df_use_largest_segment = B_FALSE;
/*
- * Percentage of all cpus that can be used by the metaslab taskq.
- */
-int metaslab_load_pct = 50;
-
-/*
* These tunables control how long a metaslab will remain loaded after the
* last allocation from it. A metaslab can't be unloaded until at least
* metaslab_unload_delay TXG's and metaslab_unload_delay_ms milliseconds
@@ -856,9 +851,6 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd, int allocators)
zfs_refcount_create_tracked(&mga->mga_alloc_queue_depth);
}
- mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
- maxclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);
-
return (mg);
}
@@ -874,7 +866,6 @@ metaslab_group_destroy(metaslab_group_t *mg)
*/
ASSERT(mg->mg_activation_count <= 0);
- taskq_destroy(mg->mg_taskq);
avl_destroy(&mg->mg_metaslab_tree);
mutex_destroy(&mg->mg_lock);
mutex_destroy(&mg->mg_ms_disabled_lock);
@@ -965,7 +956,7 @@ metaslab_group_passivate(metaslab_group_t *mg)
* allocations from taking place and any changes to the vdev tree.
*/
spa_config_exit(spa, locks & ~(SCL_ZIO - 1), spa);
- taskq_wait_outstanding(mg->mg_taskq, 0);
+ taskq_wait_outstanding(spa->spa_metaslab_taskq, 0);
spa_config_enter(spa, locks & ~(SCL_ZIO - 1), spa, RW_WRITER);
metaslab_group_alloc_update(mg);
for (int i = 0; i < mg->mg_allocators; i++) {
@@ -3529,10 +3520,8 @@ metaslab_group_preload(metaslab_group_t *mg)
avl_tree_t *t = &mg->mg_metaslab_tree;
int m = 0;
- if (spa_shutting_down(spa) || !metaslab_preload_enabled) {
- taskq_wait_outstanding(mg->mg_taskq, 0);
+ if (spa_shutting_down(spa) || !metaslab_preload_enabled)
return;
- }
mutex_enter(&mg->mg_lock);
@@ -3552,8 +3541,9 @@ metaslab_group_preload(metaslab_group_t *mg)
continue;
}
- VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload,
- msp, TQ_SLEEP) != TASKQID_INVALID);
+ VERIFY(taskq_dispatch(spa->spa_metaslab_taskq, metaslab_preload,
+ msp, TQ_SLEEP | (m <= mg->mg_allocators ? TQ_FRONT : 0))
+ != TASKQID_INVALID);
}
mutex_exit(&mg->mg_lock);
}
@@ -6182,6 +6172,9 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, debug_unload, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_enabled, INT, ZMOD_RW,
"Preload potential metaslabs during reassessment");
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_limit, UINT, ZMOD_RW,
+ "Max number of metaslabs per group to preload");
+
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, unload_delay, UINT, ZMOD_RW,
"Delay in txgs after metaslab was last used before unloading");
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index 88ee4ea9f458..1410651c63cc 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -169,6 +169,11 @@ static int spa_load_impl(spa_t *spa, spa_import_type_t type,
const char **ereport);
static void spa_vdev_resilver_done(spa_t *spa);
+/*
+ * Percentage of all CPUs that can be used by the metaslab preload taskq.
+ */
+static uint_t metaslab_preload_pct = 50;
+
static uint_t zio_taskq_batch_pct = 80; /* 1 thread per cpu in pset */
static uint_t zio_taskq_batch_tpq; /* threads per taskq */
static const boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
@@ -1398,6 +1403,13 @@ spa_activate(spa_t *spa, spa_mode_t mode)
1, INT_MAX, 0);
/*
+ * The taskq to preload metaslabs.
+ */
+ spa->spa_metaslab_taskq = taskq_create("z_metaslab",
+ metaslab_preload_pct, maxclsyspri, 1, INT_MAX,
+ TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
+
+ /*
* Taskq dedicated to prefetcher threads: this is used to prevent the
* pool traverse code from monopolizing the global (and limited)
* system_taskq by inappropriately scheduling long running tasks on it.
@@ -1432,6 +1444,11 @@ spa_deactivate(spa_t *spa)
spa->spa_zvol_taskq = NULL;
}
+ if (spa->spa_metaslab_taskq) {
+ taskq_destroy(spa->spa_metaslab_taskq);
+ spa->spa_metaslab_taskq = NULL;
+ }
+
if (spa->spa_prefetch_taskq) {
taskq_destroy(spa->spa_prefetch_taskq);
spa->spa_prefetch_taskq = NULL;
@@ -1704,13 +1721,7 @@ spa_unload(spa_t *spa)
* This ensures that there is no async metaslab prefetching
* while we attempt to unload the spa.
*/
- if (spa->spa_root_vdev != NULL) {
- for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) {
- vdev_t *vc = spa->spa_root_vdev->vdev_child[c];
- if (vc->vdev_mg != NULL)
- taskq_wait(vc->vdev_mg->mg_taskq);
- }
- }
+ taskq_wait(spa->spa_metaslab_taskq);
if (spa->spa_mmp.mmp_thread)
mmp_thread_stop(spa);
@@ -3921,6 +3932,24 @@ spa_ld_trusted_config(spa_t *spa, spa_import_type_t type,
spa_config_exit(spa, SCL_ALL, FTAG);
/*
+ * If 'zpool import' used a cached config, then the on-disk hostid and
+ * hostname may be different to the cached config in ways that should
+ * prevent import. Userspace can't discover this without a scan, but
+ * we know, so we add these values to LOAD_INFO so the caller can know
+ * the difference.
+ *
+ * Note that we have to do this before the config is regenerated,
+ * because the new config will have the hostid and hostname for this
+ * host, in readiness for import.
+ */
+ if (nvlist_exists(mos_config, ZPOOL_CONFIG_HOSTID))
+ fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_HOSTID,
+ fnvlist_lookup_uint64(mos_config, ZPOOL_CONFIG_HOSTID));
+ if (nvlist_exists(mos_config, ZPOOL_CONFIG_HOSTNAME))
+ fnvlist_add_string(spa->spa_load_info, ZPOOL_CONFIG_HOSTNAME,
+ fnvlist_lookup_string(mos_config, ZPOOL_CONFIG_HOSTNAME));
+
+ /*
* We will use spa_config if we decide to reload the spa or if spa_load
* fails and we rewind. We must thus regenerate the config using the
* MOS information with the updated paths. ZPOOL_LOAD_POLICY is used to
@@ -10132,6 +10161,9 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
/* asynchronous event notification */
EXPORT_SYMBOL(spa_event_notify);
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_pct, UINT, ZMOD_RW,
+ "Percentage of CPUs to run a metaslab preload taskq");
+
/* BEGIN CSTYLED */
ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, UINT, ZMOD_RW,
"log2 fraction of arc that can be used by inflight I/Os when "
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index 9e9c9c22549d..18c6cbf028b3 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -1958,26 +1958,28 @@ zil_max_log_data(zilog_t *zilog, size_t hdrsize)
/*
* Maximum amount of log space we agree to waste to reduce number of
- * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%).
+ * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~6%).
*/
static inline uint64_t
zil_max_waste_space(zilog_t *zilog)
{
- return (zil_max_log_data(zilog, sizeof (lr_write_t)) / 8);
+ return (zil_max_log_data(zilog, sizeof (lr_write_t)) / 16);
}
/*
* Maximum amount of write data for WR_COPIED. For correctness, consumers
* must fall back to WR_NEED_COPY if we can't fit the entire record into one
* maximum sized log block, because each WR_COPIED record must fit in a
- * single log block. For space efficiency, we want to fit two records into a
- * max-sized log block.
+ * single log block. Below that it is a tradeoff of additional memory copy
+ * and possibly worse log space efficiency vs additional range lock/unlock.
*/
+static uint_t zil_maxcopied = 7680;
+
uint64_t
zil_max_copied_data(zilog_t *zilog)
{
- return ((zilog->zl_max_block_size - sizeof (zil_chain_t)) / 2 -
- sizeof (lr_write_t));
+ uint64_t max_data = zil_max_log_data(zilog, sizeof (lr_write_t));
+ return (MIN(max_data, zil_maxcopied));
}
/*
@@ -4226,3 +4228,6 @@ ZFS_MODULE_PARAM(zfs_zil, zil_, slog_bulk, U64, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_zil, zil_, maxblocksize, UINT, ZMOD_RW,
"Limit in bytes of ZIL log block size");
+
+ZFS_MODULE_PARAM(zfs_zil, zil_, maxcopied, UINT, ZMOD_RW,
+ "Limit in bytes WR_COPIED size");
diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
index 8c538a00d203..711e6c751dc0 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
@@ -522,7 +522,7 @@ systemctl --system daemon-reload >/dev/null || true
%config(noreplace) %{_sysconfdir}/%{name}/vdev_id.conf.*.example
%attr(440, root, root) %config(noreplace) %{_sysconfdir}/sudoers.d/*
-%config(noreplace) %{_sysconfdir}/bash_completion.d/zfs
+%config(noreplace) %{_bashcompletiondir}/zfs
%files -n libzpool5
%{_libdir}/libzpool.so.*
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index ef787c65c0f9..1435c55e8fc2 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -415,6 +415,10 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
'zpool_import_rename_001_pos', 'zpool_import_all_001_pos',
'zpool_import_encrypted', 'zpool_import_encrypted_load',
'zpool_import_errata3', 'zpool_import_errata4',
+ 'zpool_import_hostid_changed',
+ 'zpool_import_hostid_changed_unclean_export',
+ 'zpool_import_hostid_changed_cachefile',
+ 'zpool_import_hostid_changed_cachefile_unclean_export',
'import_cachefile_device_added',
'import_cachefile_device_removed',
'import_cachefile_device_replaced',
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index 3272a5d5816f..158401e078aa 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -1104,6 +1104,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_import/zpool_import_features_001_pos.ksh \
functional/cli_root/zpool_import/zpool_import_features_002_neg.ksh \
functional/cli_root/zpool_import/zpool_import_features_003_pos.ksh \
+ functional/cli_root/zpool_import/zpool_import_hostid_changed.ksh \
+ functional/cli_root/zpool_import/zpool_import_hostid_changed_unclean_export.ksh \
+ functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile.ksh \
+ functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile_unclean_export.ksh \
functional/cli_root/zpool_import/zpool_import_missing_001_pos.ksh \
functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh \
functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
index 2cd2f4763a73..e52b34ec8a51 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
@@ -52,6 +52,8 @@ log_must set_tunable64 TXG_TIMEOUT 5000
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
+log_must sync_pool $TESTPOOL true
+
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg
index 4a9fb5e7489a..cf9c6a8499af 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.cfg
@@ -26,6 +26,7 @@
#
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2023 by Klara, Inc.
#
. $STF_SUITE/include/libtest.shlib
@@ -63,3 +64,7 @@ export VDEV4=$DEVICE_DIR/${DEVICE_FILE}4
export VDEV5=$DEVICE_DIR/${DEVICE_FILE}5
export ALTER_ROOT=/alter_import-test
+
+export HOSTID_FILE="/etc/hostid"
+export HOSTID1=01234567
+export HOSTID2=89abcdef
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib
index 559810ff0e30..50157fa80578 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import.kshlib
@@ -11,6 +11,7 @@
#
# Copyright (c) 2016 by Delphix. All rights reserved.
+# Copyright (c) 2023 by Klara, Inc.
#
. $STF_SUITE/include/libtest.shlib
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed.ksh
new file mode 100755
index 000000000000..bc82b7cc1ee8
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed.ksh
@@ -0,0 +1,59 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Delphix. All rights reserved.
+# Copyright (c) 2023 by Klara, Inc.
+#
+
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+# A pool that was cleanly exported should be importable without force even if
+# the local hostid doesn't match the on-disk hostid.
+#
+# STRATEGY:
+# 1. Set a hostid.
+# 2. Create a pool.
+# 3. Export the pool.
+# 4. Change the hostid.
+# 5. Verify that importing the pool without force succeeds.
+#
+
+verify_runnable "global"
+
+function custom_cleanup
+{
+ rm -f $HOSTID_FILE
+ cleanup
+}
+
+log_onexit custom_cleanup
+
+# 1. Set a hostid.
+log_must zgenhostid -f $HOSTID1
+
+# 2. Create a pool.
+log_must zpool create $TESTPOOL1 $VDEV0
+
+# 3. Export the pool.
+log_must zpool export $TESTPOOL1
+
+# 4. Change the hostid.
+log_must zgenhostid -f $HOSTID2
+
+# 5. Verify that importing the pool without force succeeds.
+log_must zpool import -d $DEVICE_DIR $TESTPOOL1
+
+log_pass "zpool import can import cleanly exported pool when hostid changes."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile.ksh
new file mode 100755
index 000000000000..07c43482d68f
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile.ksh
@@ -0,0 +1,65 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Delphix. All rights reserved.
+# Copyright (c) 2023 by Klara, Inc.
+#
+
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+# A pool that was cleanly exported should be importable from a cachefile
+# without force even if the local hostid doesn't match the on-disk hostid.
+#
+# STRATEGY:
+# 1. Set a hostid.
+# 2. Create a pool with a cachefile.
+# 3. Backup the cachfile.
+# 4. Export the pool.
+# 5. Change the hostid.
+# 6. Verify that importing the pool from the cachefile succeeds
+# without force.
+#
+
+verify_runnable "global"
+
+function custom_cleanup
+{
+ rm -f $HOSTID_FILE $CPATH $CPATHBKP
+ cleanup
+}
+
+log_onexit custom_cleanup
+
+# 1. Set a hostid.
+log_must zgenhostid -f $HOSTID1
+
+# 2. Create a pool.
+log_must zpool create -o cachefile=$CPATH $TESTPOOL1 $VDEV0
+
+# 3. Backup the cachfile.
+log_must cp $CPATH $CPATHBKP
+
+# 4. Export the pool.
+log_must zpool export $TESTPOOL1
+
+# 5. Change the hostid.
+log_must zgenhostid -f $HOSTID2
+
+# 6. Verify that importing the pool from the cachefile succeeds without force.
+log_must zpool import -c $CPATHBKP $TESTPOOL1
+
+log_pass "zpool import can import cleanly exported pool from cachefile " \
+ "when hostid changes."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile_unclean_export.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile_unclean_export.ksh
new file mode 100755
index 000000000000..dcb1ac1ab69f
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_cachefile_unclean_export.ksh
@@ -0,0 +1,75 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Delphix. All rights reserved.
+# Copyright (c) 2023 by Klara, Inc.
+#
+
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+# A pool that wasn't cleanly exported should not be importable from a cachefile
+# without force if the local hostid doesn't match the on-disk hostid.
+#
+# STRATEGY:
+# 1. Set a hostid.
+# 2. Create a pool.
+# 3. Backup the cachefile.
+# 4. Simulate the pool being torn down without export:
+# 4.1. Copy the underlying device state.
+# 4.2. Export the pool.
+# 4.3. Restore the device state from the copy.
+# 5. Change the hostid.
+# 6. Verify that importing the pool from the cachefile fails.
+# 7. Verify that importing the pool from the cachefile with force
+# succeeds.
+#
+
+verify_runnable "global"
+
+function custom_cleanup
+{
+ rm -f $HOSTID_FILE $CPATH $CPATHBKP $VDEV0.bak
+ cleanup
+}
+
+log_onexit custom_cleanup
+
+# 1. Set a hostid.
+log_must zgenhostid -f $HOSTID1
+
+# 2. Create a pool.
+log_must zpool create -o cachefile=$CPATH $TESTPOOL1 $VDEV0
+
+# 3. Backup the cachfile.
+log_must cp $CPATH $CPATHBKP
+
+# 4. Simulate the pool being torn down without export.
+log_must cp $VDEV0 $VDEV0.bak
+log_must zpool export $TESTPOOL1
+log_must cp -f $VDEV0.bak $VDEV0
+log_must rm -f $VDEV0.bak
+
+# 5. Change the hostid.
+log_must zgenhostid -f $HOSTID2
+
+# 6. Verify that importing the pool from the cachefile fails.
+log_mustnot zpool import -c $CPATHBKP $TESTPOOL1
+
+# 7. Verify that importing the pool from the cachefile with force succeeds.
+log_must zpool import -f -c $CPATHBKP $TESTPOOL1
+
+log_pass "zpool import from cachefile requires force if not cleanly " \
+ "exported and hostid changes."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_unclean_export.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_unclean_export.ksh
new file mode 100755
index 000000000000..ad8cca642dbc
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_hostid_changed_unclean_export.ksh
@@ -0,0 +1,70 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Delphix. All rights reserved.
+# Copyright (c) 2023 by Klara, Inc.
+#
+
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+# A pool that wasn't cleanly exported should not be importable without force if
+# the local hostid doesn't match the on-disk hostid.
+#
+# STRATEGY:
+# 1. Set a hostid.
+# 2. Create a pool.
+# 3. Simulate the pool being torn down without export:
+# 3.1. Copy the underlying device state.
+# 3.2. Export the pool.
+# 3.3. Restore the device state from the copy.
+# 4. Change the hostid.
+# 5. Verify that importing the pool fails.
+# 6. Verify that importing the pool with force succeeds.
+#
+
+verify_runnable "global"
+
+function custom_cleanup
+{
+ rm -f $HOSTID_FILE $VDEV0.bak
+ cleanup
+}
+
+log_onexit custom_cleanup
+
+# 1. Set a hostid.
+log_must zgenhostid -f $HOSTID1
+
+# 2. Create a pool.
+log_must zpool create $TESTPOOL1 $VDEV0
+
+# 3. Simulate the pool being torn down without export.
+log_must cp $VDEV0 $VDEV0.bak
+log_must zpool export $TESTPOOL1
+log_must cp -f $VDEV0.bak $VDEV0
+log_must rm -f $VDEV0.bak
+
+# 4. Change the hostid.
+log_must zgenhostid -f $HOSTID2
+
+# 5. Verify that importing the pool fails.
+log_mustnot zpool import -d $DEVICE_DIR $TESTPOOL1
+
+# 6. Verify that importing the pool with force succeeds.
+log_must zpool import -d $DEVICE_DIR -f $TESTPOOL1
+
+log_pass "zpool import requires force if not cleanly exported " \
+ "and hostid changed."
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index 3d527ecbd00f..90591123450c 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -1095,7 +1095,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.2.0-FreeBSD_g8015e2ea6"
+#define ZFS_META_ALIAS "zfs-2.2.0-FreeBSD_g2407f30bd"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -1125,7 +1125,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "FreeBSD_g8015e2ea6"
+#define ZFS_META_RELEASE "FreeBSD_g2407f30bd"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.0"
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 8ba7890a78e8..4a8e30bc5d7a 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.2.0-rc4-45-g8015e2ea6"
+#define ZFS_META_GITREV "zfs-2.2.0-rc5-0-g2407f30bd"