aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2023-02-16 21:38:51 +0000
committerMartin Matuska <mm@FreeBSD.org>2023-02-16 21:38:51 +0000
commitc9539b89010900499a200cdd6c0265ea5d950875 (patch)
tree70b0c96e5b8d5a881081847c8b6daf87624d3282
parent6606096986222bac46e6f97d6ca47fc14978d172 (diff)
parent57cfae4a2f04aaff10c45b3f7975e0fe3ef3e8b8 (diff)
zfs: merge openzfs/zfs@57cfae4a2 (master)
Notable upstream pull request merges: #13816 Fix a race condition in dsl_dataset_sync() when activating features #14402 Prefetch on deadlists merge #14410 Improve resilver ETAs #14428 Resilver performance tuning #14439 Resolve WS-2021-0184 vulnerability in zstd #14440 EIO caused by encryption + recursive gang #14448 Fix console progress reporting for recursive send #14454 Improve arc_read() error reporting #14460 Restore FreeBSD to use .rodata #14474 Reduce need for contiguous memory for ioctls Obtained from: OpenZFS OpenZFS commit: 57cfae4a2f04aaff10c45b3f7975e0fe3ef3e8b8
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.c26
-rw-r--r--sys/contrib/openzfs/cmd/zfs/zfs_main.c3
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_main.c33
-rw-r--r--sys/contrib/openzfs/config/kernel-filemap.m426
-rw-r--r--sys/contrib/openzfs/config/kernel.m42
-rw-r--r--sys/contrib/openzfs/contrib/initramfs/scripts/zfs58
-rw-r--r--sys/contrib/openzfs/include/libzutil.h1
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h2
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h3
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h9
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h16
-rw-r--r--sys/contrib/openzfs/include/sys/bpobj.h1
-rw-r--r--sys/contrib/openzfs/include/sys/dsl_dataset.h1
-rw-r--r--sys/contrib/openzfs/include/sys/spa.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_znode.h1
-rw-r--r--sys/contrib/openzfs/lib/libshare/nfs.c2
-rw-r--r--sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h2
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_mount.c6
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c12
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.411
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S1
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S5
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S2
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S2
-rw-r--r--sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S3
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c10
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c3
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/abd_os.c14
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c17
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c4
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c3
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c5
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c4
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c24
-rw-r--r--sys/contrib/openzfs/module/zfs/bpobj.c65
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_send.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dataset.c23
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_deadlist.c71
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_scan.c76
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c58
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_rebuild.c27
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_replay.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_vnops.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c16
-rw-r--r--sys/contrib/openzfs/module/zstd/lib/compress/zstd_double_fast.c4
-rw-r--r--sys/contrib/openzfs/module/zstd/lib/compress/zstd_fast.c6
-rw-r--r--sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c12
-rw-r--r--sys/contrib/openzfs/rpm/generic/zfs.spec.in8
-rw-r--r--sys/contrib/openzfs/tests/runfiles/common.run2
-rwxr-xr-xsys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am1
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh86
-rw-r--r--sys/modules/zfs/zfs_config.h7
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
56 files changed, 614 insertions, 191 deletions
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index b04b220c768e..d239da67613c 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -2377,7 +2377,8 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf), " %s", "FREE");
(void) snprintf(blkbuf + strlen(blkbuf),
- buflen - strlen(blkbuf), " cksum=%llx:%llx:%llx:%llx",
+ buflen - strlen(blkbuf),
+ " cksum=%016llx:%016llx:%016llx:%016llx",
(u_longlong_t)bp->blk_cksum.zc_word[0],
(u_longlong_t)bp->blk_cksum.zc_word[1],
(u_longlong_t)bp->blk_cksum.zc_word[2],
@@ -7509,6 +7510,19 @@ mos_leak_log_spacemaps(spa_t *spa)
mos_obj_refd(sls->sls_sm_obj);
}
+static void
+errorlog_count_refd(objset_t *mos, uint64_t errlog)
+{
+ zap_cursor_t zc;
+ zap_attribute_t za;
+ for (zap_cursor_init(&zc, mos, errlog);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ mos_obj_refd(za.za_first_integer);
+ }
+ zap_cursor_fini(&zc);
+}
+
static int
dump_mos_leaks(spa_t *spa)
{
@@ -7529,6 +7543,12 @@ dump_mos_leaks(spa_t *spa)
mos_obj_refd(spa->spa_history);
mos_obj_refd(spa->spa_errlog_last);
mos_obj_refd(spa->spa_errlog_scrub);
+
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
+ errorlog_count_refd(mos, spa->spa_errlog_last);
+ errorlog_count_refd(mos, spa->spa_errlog_scrub);
+ }
+
mos_obj_refd(spa->spa_all_vdev_zaps);
mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj);
mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj);
@@ -8364,7 +8384,9 @@ zdb_read_block(char *thing, spa_t *spa)
DVA_GET_OFFSET(&bp->blk_dva[0]);
ck_zio->io_bp = bp;
zio_checksum_compute(ck_zio, ck, pabd, lsize);
- printf("%12s\tcksum=%llx:%llx:%llx:%llx\n",
+ printf(
+ "%12s\t"
+ "cksum=%016llx:%016llx:%016llx:%016llx\n",
zio_checksum_table[ck].ci_name,
(u_longlong_t)bp->blk_cksum.zc_word[0],
(u_longlong_t)bp->blk_cksum.zc_word[1],
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
index 5880343a92f3..b1b00738569b 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@@ -4532,7 +4532,7 @@ zfs_do_send(int argc, char **argv)
}
}
- if (flags.parsable && flags.verbosity == 0)
+ if ((flags.parsable || flags.progressastitle) && flags.verbosity == 0)
flags.verbosity = 1;
if (excludes.count > 0 && !flags.replicate) {
@@ -8672,7 +8672,6 @@ main(int argc, char **argv)
int i = 0;
const char *cmdname;
char **newargv;
- extern char **environ;
(void) setlocale(LC_ALL, "");
(void) setlocale(LC_NUMERIC, "C");
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
index 93d6a18981cb..efb2d10e591b 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -7524,19 +7524,20 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf));
- assert(ps->pss_func == POOL_SCAN_SCRUB ||
- ps->pss_func == POOL_SCAN_RESILVER);
+ int is_resilver = ps->pss_func == POOL_SCAN_RESILVER;
+ int is_scrub = ps->pss_func == POOL_SCAN_SCRUB;
+ assert(is_resilver || is_scrub);
/* Scan is finished or canceled. */
if (ps->pss_state == DSS_FINISHED) {
secs_to_dhms(end - start, time_buf);
- if (ps->pss_func == POOL_SCAN_SCRUB) {
+ if (is_scrub) {
(void) printf(gettext("scrub repaired %s "
"in %s with %llu errors on %s"), processed_buf,
time_buf, (u_longlong_t)ps->pss_errors,
ctime(&end));
- } else if (ps->pss_func == POOL_SCAN_RESILVER) {
+ } else if (is_resilver) {
(void) printf(gettext("resilvered %s "
"in %s with %llu errors on %s"), processed_buf,
time_buf, (u_longlong_t)ps->pss_errors,
@@ -7544,10 +7545,10 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
}
return;
} else if (ps->pss_state == DSS_CANCELED) {
- if (ps->pss_func == POOL_SCAN_SCRUB) {
+ if (is_scrub) {
(void) printf(gettext("scrub canceled on %s"),
ctime(&end));
- } else if (ps->pss_func == POOL_SCAN_RESILVER) {
+ } else if (is_resilver) {
(void) printf(gettext("resilver canceled on %s"),
ctime(&end));
}
@@ -7557,7 +7558,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
assert(ps->pss_state == DSS_SCANNING);
/* Scan is in progress. Resilvers can't be paused. */
- if (ps->pss_func == POOL_SCAN_SCRUB) {
+ if (is_scrub) {
if (pause == 0) {
(void) printf(gettext("scrub in progress since %s"),
ctime(&start));
@@ -7567,7 +7568,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
(void) printf(gettext("\tscrub started on %s"),
ctime(&start));
}
- } else if (ps->pss_func == POOL_SCAN_RESILVER) {
+ } else if (is_resilver) {
(void) printf(gettext("resilver in progress since %s"),
ctime(&start));
}
@@ -7609,17 +7610,27 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
scanned_buf, issued_buf, total_buf);
}
- if (ps->pss_func == POOL_SCAN_RESILVER) {
+ if (is_resilver) {
(void) printf(gettext("\t%s resilvered, %.2f%% done"),
processed_buf, 100 * fraction_done);
- } else if (ps->pss_func == POOL_SCAN_SCRUB) {
+ } else if (is_scrub) {
(void) printf(gettext("\t%s repaired, %.2f%% done"),
processed_buf, 100 * fraction_done);
}
if (pause == 0) {
+ /*
+ * Only provide an estimate iff:
+ * 1) the time remaining is valid, and
+ * 2) the issue rate exceeds 10 MB/s, and
+ * 3) it's either:
+ * a) a resilver which has started repairs, or
+ * b) a scrub which has entered the issue phase.
+ */
if (total_secs_left != UINT64_MAX &&
- issue_rate >= 10 * 1024 * 1024) {
+ issue_rate >= 10 * 1024 * 1024 &&
+ ((is_resilver && ps->pss_processed > 0) ||
+ (is_scrub && issued > 0))) {
(void) printf(gettext(", %s to go\n"), time_buf);
} else {
(void) printf(gettext(", no estimated "
diff --git a/sys/contrib/openzfs/config/kernel-filemap.m4 b/sys/contrib/openzfs/config/kernel-filemap.m4
new file mode 100644
index 000000000000..745928168f92
--- /dev/null
+++ b/sys/contrib/openzfs/config/kernel-filemap.m4
@@ -0,0 +1,26 @@
+dnl #
+dnl # filemap_range_has_page was not available till 4.13
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [
+ ZFS_LINUX_TEST_SRC([filemap_range_has_page], [
+ #include <linux/fs.h>
+ ],[
+ struct address_space *mapping = NULL;
+ loff_t lstart = 0;
+ loff_t lend = 0;
+ bool ret __attribute__ ((unused));
+
+ ret = filemap_range_has_page(mapping, lstart, lend);
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_FILEMAP], [
+ AC_MSG_CHECKING([whether filemap_range_has_page() is available])
+ ZFS_LINUX_TEST_RESULT([filemap_range_has_page], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_FILEMAP_RANGE_HAS_PAGE, 1,
+ [filemap_range_has_page() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4
index 353988e9c867..121d73ef641a 100644
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@@ -150,6 +150,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
ZFS_AC_KERNEL_SRC_IDMAP_MNT_API
ZFS_AC_KERNEL_SRC_IATTR_VFSID
+ ZFS_AC_KERNEL_SRC_FILEMAP
AC_MSG_CHECKING([for available kernel interfaces])
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@@ -273,6 +274,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_USER_NS_COMMON_INUM
ZFS_AC_KERNEL_IDMAP_MNT_API
ZFS_AC_KERNEL_IATTR_VFSID
+ ZFS_AC_KERNEL_FILEMAP
])
dnl #
diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
index c724f0c2cf57..d1a99e4a87ba 100644
--- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
+++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
@@ -272,30 +272,46 @@ import_pool()
# with more logging etc.
load_module_initrd()
{
- [ -n "$ROOTDELAY" ] && ZFS_INITRD_PRE_MOUNTROOT_SLEEP="$ROOTDELAY"
+ ZFS_INITRD_PRE_MOUNTROOT_SLEEP=${ROOTDELAY:-0}
- if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ] 2>/dev/null
- then
- if [ "$quiet" != "y" ]; then
- zfs_log_begin_msg "Sleeping for" \
- "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP seconds..."
- fi
- sleep "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP"
- [ "$quiet" != "y" ] && zfs_log_end_msg
+ if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ]; then
+ [ "$quiet" != "y" ] && zfs_log_begin_msg "Delaying for up to '${ZFS_INITRD_PRE_MOUNTROOT_SLEEP}' seconds."
fi
- # Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear.
- if command -v wait_for_udev > /dev/null 2>&1 ; then
- wait_for_udev 10
- elif command -v wait_for_dev > /dev/null 2>&1 ; then
- wait_for_dev
- fi
+ START=$(/bin/date -u +%s)
+ END=$((START+ZFS_INITRD_PRE_MOUNTROOT_SLEEP))
+ while true; do
- # zpool import refuse to import without a valid /proc/self/mounts
- [ ! -f /proc/self/mounts ] && mount proc /proc
+ # Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear.
+ if command -v wait_for_udev > /dev/null 2>&1 ; then
+ wait_for_udev 10
+ elif command -v wait_for_dev > /dev/null 2>&1 ; then
+ wait_for_dev
+ fi
- # Load the module
- load_module "zfs" || return 1
+ #
+ # zpool import refuse to import without a valid
+ # /proc/self/mounts
+ #
+ [ ! -f /proc/self/mounts ] && mount proc /proc
+
+ # Load the module
+ if load_module "zfs"; then
+ ret=0
+ break
+ else
+ ret=1
+ fi
+
+ [ "$(/bin/date -u +%s)" -gt "$END" ] && break
+ sleep 1
+
+ done
+ if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ]; then
+ [ "$quiet" != "y" ] && zfs_log_end_msg
+ fi
+
+ [ "$ret" -ne 0 ] && return 1
if [ "$ZFS_INITRD_POST_MODPROBE_SLEEP" -gt 0 ] 2>/dev/null
then
@@ -343,9 +359,11 @@ mount_fs()
# isn't the root fs.
return 0
fi
- ZFS_CMD="mount.zfs"
# Last hail-mary: Hope 'rootmnt' is set!
mountpoint=""
+ if [ "$mountpoint" = "legacy" ]; then
+ ZFS_CMD="mount.zfs"
+ fi
else
mountpoint="$mountpoint1"
fi
diff --git a/sys/contrib/openzfs/include/libzutil.h b/sys/contrib/openzfs/include/libzutil.h
index 4d4bddaad5f3..948ac08cd772 100644
--- a/sys/contrib/openzfs/include/libzutil.h
+++ b/sys/contrib/openzfs/include/libzutil.h
@@ -183,6 +183,7 @@ _LIBZUTIL_H int printf_color(const char *color, const char *format, ...);
_LIBZUTIL_H const char *zfs_basename(const char *path);
_LIBZUTIL_H ssize_t zfs_dirnamelen(const char *path);
#ifdef __linux__
+extern char **environ;
_LIBZUTIL_H void zfs_setproctitle_init(int argc, char *argv[], char *envp[]);
_LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...);
#else
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h
index 058d600007af..1ebfd8350661 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h
@@ -36,7 +36,7 @@
#define ENDBR
#define SECTION_TEXT .text
-#define SECTION_STATIC .data
+#define SECTION_STATIC .rodata
#ifdef __cplusplus
extern "C" {
diff --git a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
index 41a5bb218c12..8cde33dbcbbb 100644
--- a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
+++ b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
@@ -116,7 +116,8 @@ typedef struct zfs_soft_state {
#define Z_ISLNK(type) ((type) == VLNK)
#define Z_ISDIR(type) ((type) == VDIR)
-#define zn_has_cached_data(zp) vn_has_cached_data(ZTOV(zp))
+#define zn_has_cached_data(zp, start, end) \
+ vn_has_cached_data(ZTOV(zp))
#define zn_flush_cached_data(zp, sync) vn_flush_cached_data(ZTOV(zp), sync)
#define zn_rlimit_fsize(zp, uio) \
vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio))
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h
index 2c734322267a..15dc77edafac 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h
@@ -62,7 +62,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__field(uint32_t, z_async_writes_cnt)
__field(mode_t, z_mode)
__field(boolean_t, z_is_sa)
- __field(boolean_t, z_is_mapped)
__field(boolean_t, z_is_ctldir)
__field(uint32_t, i_uid)
@@ -96,7 +95,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_async_writes_cnt = zn->z_async_writes_cnt;
__entry->z_mode = zn->z_mode;
__entry->z_is_sa = zn->z_is_sa;
- __entry->z_is_mapped = zn->z_is_mapped;
__entry->z_is_ctldir = zn->z_is_ctldir;
__entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid);
@@ -119,7 +117,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
"zn_prefetch %u blksz %u seq %u "
"mapcnt %llu size %llu pflags %llu "
"sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
- "mode 0x%x is_sa %d is_mapped %d is_ctldir %d "
+ "mode 0x%x is_sa %d is_ctldir %d "
"inode { uid %u gid %u ino %lu nlink %u size %lli "
"blkbits %u bytes %u mode 0x%x generation %x } } "
"ace { type %u flags %u access_mask %u } mask_matched %u",
@@ -128,9 +126,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_seq, __entry->z_mapcnt, __entry->z_size,
__entry->z_pflags, __entry->z_sync_cnt,
__entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
- __entry->z_mode, __entry->z_is_sa, __entry->z_is_mapped,
- __entry->z_is_ctldir, __entry->i_uid,
- __entry->i_gid, __entry->i_ino, __entry->i_nlink,
+ __entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir,
+ __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink,
__entry->i_size, __entry->i_blkbits,
__entry->i_bytes, __entry->i_mode, __entry->i_generation,
__entry->z_type, __entry->z_flags, __entry->z_access_mask,
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
index 52568781011f..81607ef2a25e 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
@@ -47,9 +47,16 @@
extern "C" {
#endif
+#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
#define ZNODE_OS_FIELDS \
inode_timespec_t z_btime; /* creation/birth time (cached) */ \
struct inode z_inode;
+#else
+#define ZNODE_OS_FIELDS \
+ inode_timespec_t z_btime; /* creation/birth time (cached) */ \
+ struct inode z_inode; \
+ boolean_t z_is_mapped; /* we are mmap'ed */
+#endif
/*
* Convert between znode pointers and inode pointers
@@ -70,7 +77,14 @@ extern "C" {
#define Z_ISDEV(type) (S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type))
#define Z_ISDIR(type) S_ISDIR(type)
-#define zn_has_cached_data(zp) ((zp)->z_is_mapped)
+#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+#define zn_has_cached_data(zp, start, end) \
+ filemap_range_has_page(ZTOI(zp)->i_mapping, start, end)
+#else
+#define zn_has_cached_data(zp, start, end) \
+ ((zp)->z_is_mapped)
+#endif
+
#define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync)
#define zn_rlimit_fsize(zp, uio) (0)
diff --git a/sys/contrib/openzfs/include/sys/bpobj.h b/sys/contrib/openzfs/include/sys/bpobj.h
index 84f0ee76c44e..f3384f526454 100644
--- a/sys/contrib/openzfs/include/sys/bpobj.h
+++ b/sys/contrib/openzfs/include/sys/bpobj.h
@@ -87,6 +87,7 @@ int livelist_bpobj_iterate_from_nofree(bpobj_t *bpo, bpobj_itor_t func,
void *arg, int64_t start);
void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
+void bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj);
void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx);
diff --git a/sys/contrib/openzfs/include/sys/dsl_dataset.h b/sys/contrib/openzfs/include/sys/dsl_dataset.h
index 3450527af7e0..b0d8c7994c07 100644
--- a/sys/contrib/openzfs/include/sys/dsl_dataset.h
+++ b/sys/contrib/openzfs/include/sys/dsl_dataset.h
@@ -372,6 +372,7 @@ int dsl_dataset_rename_snapshot(const char *fsname,
const char *oldsnapname, const char *newsnapname, boolean_t recursive);
int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
minor_t cleanup_minor, const char *htag);
+boolean_t zfeature_active(spa_feature_t f, void *arg);
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h
index 500eb3491a99..c9d03bf645a9 100644
--- a/sys/contrib/openzfs/include/sys/spa.h
+++ b/sys/contrib/openzfs/include/sys/spa.h
@@ -678,7 +678,7 @@ typedef struct blkptr {
len += func(buf + len, size - len, \
"[L%llu %s] %s %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
- "cksum=%llx:%llx:%llx:%llx", \
+ "cksum=%016llx:%016llx:%016llx:%016llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
checksum, \
diff --git a/sys/contrib/openzfs/include/sys/zfs_znode.h b/sys/contrib/openzfs/include/sys/zfs_znode.h
index de38f56dc32d..fcee55b0199d 100644
--- a/sys/contrib/openzfs/include/sys/zfs_znode.h
+++ b/sys/contrib/openzfs/include/sys/zfs_znode.h
@@ -188,7 +188,6 @@ typedef struct znode {
boolean_t z_atime_dirty; /* atime needs to be synced */
boolean_t z_zn_prefetch; /* Prefetch znodes? */
boolean_t z_is_sa; /* are we native sa? */
- boolean_t z_is_mapped; /* are we mmap'ed */
boolean_t z_is_ctldir; /* are we .zfs entry */
boolean_t z_suspended; /* extra ref from a suspend? */
uint_t z_blksz; /* block size in bytes */
diff --git a/sys/contrib/openzfs/lib/libshare/nfs.c b/sys/contrib/openzfs/lib/libshare/nfs.c
index 118ad7ef2209..3962c87453d4 100644
--- a/sys/contrib/openzfs/lib/libshare/nfs.c
+++ b/sys/contrib/openzfs/lib/libshare/nfs.c
@@ -97,7 +97,7 @@ nfs_init_tmpfile(const char *prefix, const char *mdir, struct tmpfile *tmpf)
}
strlcpy(tmpf->name, prefix, sizeof (tmpf->name));
- strlcat(tmpf->name, ".XXXXXXXX", sizeof (tmpf->name) - strlen(prefix));
+ strlcat(tmpf->name, ".XXXXXXXX", sizeof (tmpf->name));
int fd = mkostemp(tmpf->name, O_CLOEXEC);
if (fd == -1) {
diff --git a/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h
index 9964f183cc68..08c73037990f 100644
--- a/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h
+++ b/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h
@@ -40,7 +40,7 @@
#define ENDBR
#define SECTION_TEXT .text
-#define SECTION_STATIC .data
+#define SECTION_STATIC .rodata
#ifdef __cplusplus
extern "C" {
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c
index 57737bc6c01a..8612e082ba34 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c
@@ -1422,10 +1422,10 @@ zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
* Walk through and first unshare everything.
*/
for (i = 0; i < used; i++) {
- for (enum sa_protocol i = 0; i < SA_PROTOCOL_COUNT; ++i) {
- if (sa_is_shared(sets[i].mountpoint, i) &&
+ for (enum sa_protocol p = 0; p < SA_PROTOCOL_COUNT; ++p) {
+ if (sa_is_shared(sets[i].mountpoint, p) &&
unshare_one(hdl, sets[i].mountpoint,
- sets[i].mountpoint, i) != 0)
+ sets[i].mountpoint, p) != 0)
goto out;
}
}
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c
index 038613a1fcfa..66a22e333663 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c
@@ -84,6 +84,7 @@ typedef struct progress_arg {
boolean_t pa_estimate;
int pa_verbosity;
boolean_t pa_astitle;
+ boolean_t pa_progress;
uint64_t pa_size;
} progress_arg_t;
@@ -940,7 +941,7 @@ send_progress_thread(void *arg)
struct tm tm;
int err;
- if (!pa->pa_parsable && pa->pa_verbosity != 0) {
+ if (!pa->pa_parsable && pa->pa_progress) {
(void) fprintf(stderr,
"TIME %s %sSNAPSHOT %s\n",
pa->pa_estimate ? "BYTES" : " SENT",
@@ -990,7 +991,7 @@ send_progress_thread(void *arg)
(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
tm.tm_hour, tm.tm_min, tm.tm_sec,
(u_longlong_t)bytes, zhp->zfs_name);
- } else if (pa->pa_verbosity != 0) {
+ } else if (pa->pa_progress) {
zfs_nicebytes(bytes, buf, sizeof (buf));
(void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n",
tm.tm_hour, tm.tm_min, tm.tm_sec,
@@ -1206,6 +1207,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
pa.pa_verbosity = sdd->verbosity;
pa.pa_size = sdd->size;
pa.pa_astitle = sdd->progressastitle;
+ pa.pa_progress = sdd->progress;
if ((err = pthread_create(&tid, NULL,
send_progress_thread, &pa)) != 0) {
@@ -1886,6 +1888,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
pa.pa_verbosity = flags->verbosity;
pa.pa_size = size;
pa.pa_astitle = flags->progressastitle;
+ pa.pa_progress = flags->progress;
error = pthread_create(&tid, NULL,
send_progress_thread, &pa);
@@ -2696,6 +2699,7 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
pa.pa_verbosity = flags->verbosity;
pa.pa_size = size;
pa.pa_astitle = flags->progressastitle;
+ pa.pa_progress = flags->progress;
err = pthread_create(&ptid, NULL,
send_progress_thread, &pa);
@@ -4586,7 +4590,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
B_FALSE, destsnap) == 0) {
*strchr(destsnap, '@') = '\0';
(void) strlcat(destsnap, suffix,
- sizeof (destsnap) - strlen(destsnap));
+ sizeof (destsnap));
}
}
} else {
@@ -4622,7 +4626,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
B_FALSE, destsnap) == 0) {
*strchr(destsnap, '@') = '\0';
(void) strlcat(destsnap, snap,
- sizeof (destsnap) - strlen(destsnap));
+ sizeof (destsnap));
}
}
}
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index e20d601340c6..88a044f63f28 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -1769,7 +1769,7 @@ completes in order to verify the checksums of all blocks which have been
resilvered.
This is enabled by default and strongly recommended.
.
-.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32 MiB Pc Pq u64
+.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq u64
Maximum amount of I/O that can be concurrently issued for a sequential
resilver per leaf device, given in bytes.
.
@@ -1890,6 +1890,13 @@ I/O.
In this case (unless the metadata scan is done) we stop issuing verification I/O
and start scanning metadata again until we get to the hard limit.
.
+.It Sy zfs_scan_report_txgs Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+When reporting resilver throughput and estimated completion time use the
+performance observed over roughly the last
+.Sy zfs_scan_report_txgs
+TXGs.
+When set to zero performance is calculated over the time between checkpoints.
+.
.It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int
Enforce tight memory limits on pool scans when a sequential scan is in progress.
When disabled, the memory limit may be exceeded by fast disks.
@@ -1898,7 +1905,7 @@ When disabled, the memory limit may be exceeded by fast disks.
Freezes a scrub/resilver in progress without actually pausing it.
Intended for testing/debugging.
.
-.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq int
+.It Sy zfs_scan_vdev_limit Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq int
Maximum amount of data that can be concurrently issued at once for scrubs and
resilvers per leaf device, given in bytes.
.
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
index 8f9e766486f1..0ebec5c1095e 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
@@ -1791,7 +1791,6 @@ ENTRY_ALIGN(zfs_blake3_hash_many_avx2, 64)
SET_SIZE(zfs_blake3_hash_many_avx2)
SECTION_STATIC
-.section .rodata
.p2align 6
ADD0:
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
index 165492a0ed76..909b2147dff9 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
@@ -53,6 +53,11 @@
/* Windows userland links with OpenSSL */
#if !defined (_WIN32) || defined (_KERNEL)
+/* Apple needs _ */
+#if defined (__APPLE__)
+#define gcm_avx_can_use_movbe _gcm_avx_can_use_movbe
+#endif
+
.extern gcm_avx_can_use_movbe
.text
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
index e40b3df32753..dec782fda33e 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
@@ -101,7 +101,7 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) {
// static uint8_t byte_swap16_mask[] = {
// 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 };
-.section .rodata
+SECTION_STATIC
.balign XMM_ALIGN
.Lbyte_swap16_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
index f3d701528459..f1fde51c1d69 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S
@@ -2063,7 +2063,7 @@ ENTRY_NP(SHA256TransformBlocks)
.cfi_endproc
SET_SIZE(SHA256TransformBlocks)
-.section .rodata
+SECTION_STATIC
.balign 64
SET_OBJ(K256)
K256:
diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
index 520f5b6dab24..b2f7d4863d8a 100644
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S
@@ -2064,7 +2064,7 @@ ENTRY_NP(SHA512TransformBlocks)
.cfi_endproc
SET_SIZE(SHA512TransformBlocks)
-.section .rodata
+SECTION_STATIC
.balign 64
SET_OBJ(K512)
K512:
@@ -2113,4 +2113,3 @@ K512:
#if defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
-
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c
index e4c6cf7d097d..9a268573528c 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c
@@ -142,7 +142,7 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
return (EINVAL);
uaddr = (void *)(uintptr_t)zp->zfs_cmd;
- zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
+ zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
#ifdef ZFS_LEGACY_SUPPORT
/*
* Remap ioctl code for legacy user binaries
@@ -150,10 +150,10 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
if (zp->zfs_ioctl_version == ZFS_IOCVER_LEGACY) {
vecnum = zfs_ioctl_legacy_to_ozfs(vecnum);
if (vecnum < 0) {
- kmem_free(zc, sizeof (zfs_cmd_t));
+ vmem_free(zc, sizeof (zfs_cmd_t));
return (ENOTSUP);
}
- zcl = kmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP);
+ zcl = vmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP);
if (copyin(uaddr, zcl, sizeof (zfs_cmd_legacy_t))) {
error = SET_ERROR(EFAULT);
goto out;
@@ -180,9 +180,9 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
out:
#ifdef ZFS_LEGACY_SUPPORT
if (zcl)
- kmem_free(zcl, sizeof (zfs_cmd_legacy_t));
+ vmem_free(zcl, sizeof (zfs_cmd_legacy_t));
#endif
- kmem_free(zc, sizeof (zfs_cmd_t));
+ vmem_free(zc, sizeof (zfs_cmd_t));
MPASS(tsd_get(rrw_tsd_key) == NULL);
return (error);
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
index 2820f10b5de8..a1e0595bda34 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
@@ -230,7 +230,8 @@ zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
vfs_unbusy(vfsp);
if (tmp != *val) {
- (void) strcpy(setpoint, "temporary");
+ if (setpoint)
+ (void) strcpy(setpoint, "temporary");
*val = tmp;
}
return (0);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
index 16530d82693e..13150adbe0cf 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
@@ -272,18 +272,20 @@ abd_alloc_chunks(abd_t *abd, size_t size)
struct page *page, *tmp_page = NULL;
gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;
- int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1);
- int nr_pages = abd_chunkcnt_for_bytes(size);
- int chunks = 0, zones = 0;
+ unsigned int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1);
+ unsigned int nr_pages = abd_chunkcnt_for_bytes(size);
+ unsigned int chunks = 0, zones = 0;
size_t remaining_size;
int nid = NUMA_NO_NODE;
- int alloc_pages = 0;
+ unsigned int alloc_pages = 0;
INIT_LIST_HEAD(&pages);
+ ASSERT3U(alloc_pages, <, nr_pages);
+
while (alloc_pages < nr_pages) {
- unsigned chunk_pages;
- int order;
+ unsigned int chunk_pages;
+ unsigned int order;
order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order);
chunk_pages = (1U << order);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
index 519f13212fac..dca48e1e4010 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
@@ -392,7 +392,20 @@ zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
zfsctl_snapshot_hold(se);
rw_enter(&se->se_taskqid_lock, RW_WRITER);
- ASSERT3S(se->se_taskqid, ==, TASKQID_INVALID);
+ /*
+ * If this condition happens, we managed to:
+ * - dispatch once
+ * - want to dispatch _again_ before it returned
+ *
+ * So let's just return - if that task fails at unmounting,
+ * we'll eventually dispatch again, and if it succeeds,
+ * no problem.
+ */
+ if (se->se_taskqid != TASKQID_INVALID) {
+ rw_exit(&se->se_taskqid_lock);
+ zfsctl_snapshot_rele(se);
+ return;
+ }
se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
rw_exit(&se->se_taskqid_lock);
@@ -485,7 +498,9 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
zp->z_atime_dirty = B_FALSE;
zp->z_zn_prefetch = B_FALSE;
zp->z_is_sa = B_FALSE;
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
zp->z_is_mapped = B_FALSE;
+#endif
zp->z_is_ctldir = B_TRUE;
zp->z_sa_hdl = NULL;
zp->z_blksz = 0;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
index f8d1777b07a6..f068f544f0ec 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
@@ -135,7 +135,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
vecnum = cmd - ZFS_IOC_FIRST;
- zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
+ zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
if (ddi_copyin((void *)(uintptr_t)arg, zc, sizeof (zfs_cmd_t), 0)) {
error = -SET_ERROR(EFAULT);
@@ -146,7 +146,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
if (error == 0 && rc != 0)
error = -SET_ERROR(EFAULT);
out:
- kmem_free(zc, sizeof (zfs_cmd_t));
+ vmem_free(zc, sizeof (zfs_cmd_t));
return (error);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index c921e587c75c..2d9b27a90884 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -608,7 +608,8 @@ zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
}
if (tmp != *val) {
- (void) strcpy(setpoint, "temporary");
+ if (setpoint)
+ (void) strcpy(setpoint, "temporary");
*val = tmp;
}
return (0);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index 47f132a38abe..302a88c2d353 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -987,7 +987,7 @@ top:
mutex_enter(&zp->z_lock);
may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
- !(zp->z_is_mapped);
+ !zn_has_cached_data(zp, 0, LLONG_MAX);
mutex_exit(&zp->z_lock);
/*
@@ -1075,7 +1075,8 @@ top:
&xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
delete_now = may_delete_now && !toobig &&
atomic_read(&ZTOI(zp)->i_count) == 1 &&
- !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked &&
+ !zn_has_cached_data(zp, 0, LLONG_MAX) &&
+ xattr_obj == xattr_obj_unlinked &&
zfs_external_acl(zp) == acl_obj;
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
index 1faf25d93cc7..7b802a9bace0 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
@@ -551,7 +551,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
ASSERT3P(zp->z_xattr_cached, ==, NULL);
zp->z_unlinked = B_FALSE;
zp->z_atime_dirty = B_FALSE;
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
zp->z_is_mapped = B_FALSE;
+#endif
zp->z_is_ctldir = B_FALSE;
zp->z_suspended = B_FALSE;
zp->z_sa_hdl = NULL;
@@ -1641,7 +1643,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
* Zero partial page cache entries. This must be done under a
* range lock in order to keep the ARC and page cache in sync.
*/
- if (zp->z_is_mapped) {
+ if (zn_has_cached_data(zp, off, off + len - 1)) {
loff_t first_page, last_page, page_len;
loff_t first_page_offset, last_page_offset;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
index c56e3691e70a..e42b15042a3c 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
@@ -625,7 +625,6 @@ static int
zpl_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct inode *ip = filp->f_mapping->host;
- znode_t *zp = ITOZ(ip);
int error;
fstrans_cookie_t cookie;
@@ -640,9 +639,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
if (error)
return (error);
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+ znode_t *zp = ITOZ(ip);
mutex_enter(&zp->z_lock);
zp->z_is_mapped = B_TRUE;
mutex_exit(&zp->z_lock);
+#endif
return (error);
}
@@ -937,7 +939,7 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice)
case POSIX_FADV_SEQUENTIAL:
case POSIX_FADV_WILLNEED:
#ifdef HAVE_GENERIC_FADVISE
- if (zn_has_cached_data(zp))
+ if (zn_has_cached_data(zp, offset, offset + len - 1))
error = generic_fadvise(filp, offset, len, advice);
#endif
/*
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 2a52d0d24572..aa806706de29 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -5958,6 +5958,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
(zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0;
boolean_t embedded_bp = !!BP_IS_EMBEDDED(bp);
boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF;
+ arc_buf_t *buf = NULL;
int rc = 0;
ASSERT(!embedded_bp ||
@@ -5987,7 +5988,7 @@ top:
if (!zfs_blkptr_verify(spa, bp, zio_flags & ZIO_FLAG_CONFIG_WRITER,
BLK_VERIFY_LOG)) {
rc = SET_ERROR(ECKSUM);
- goto out;
+ goto done;
}
if (!embedded_bp) {
@@ -6008,14 +6009,13 @@ top:
if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) ||
(hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) {
boolean_t is_data = !HDR_ISTYPE_METADATA(hdr);
- arc_buf_t *buf = NULL;
if (HDR_IO_IN_PROGRESS(hdr)) {
if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
mutex_exit(hash_lock);
ARCSTAT_BUMP(arcstat_cached_only_in_progress);
rc = SET_ERROR(ENOENT);
- goto out;
+ goto done;
}
zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head;
@@ -6144,9 +6144,7 @@ top:
ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH),
demand, prefetch, is_data, data, metadata, hits);
*arc_flags |= ARC_FLAG_CACHED;
-
- if (done)
- done(NULL, zb, bp, buf, private);
+ goto done;
} else {
uint64_t lsize = BP_GET_LSIZE(bp);
uint64_t psize = BP_GET_PSIZE(bp);
@@ -6159,10 +6157,10 @@ top:
int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
- rc = SET_ERROR(ENOENT);
if (hash_lock != NULL)
mutex_exit(hash_lock);
- goto out;
+ rc = SET_ERROR(ENOENT);
+ goto done;
}
if (hdr == NULL) {
@@ -6482,6 +6480,16 @@ out:
spa_read_history_add(spa, zb, *arc_flags);
spl_fstrans_unmark(cookie);
return (rc);
+
+done:
+ if (done)
+ done(NULL, zb, bp, buf, private);
+ if (pio && rc != 0) {
+ zio_t *zio = zio_null(pio, spa, NULL, NULL, NULL, zio_flags);
+ zio->io_error = rc;
+ zio_nowait(zio);
+ }
+ goto out;
}
arc_prune_t *
diff --git a/sys/contrib/openzfs/module/zfs/bpobj.c b/sys/contrib/openzfs/module/zfs/bpobj.c
index f7fded56518b..fa99f5141d4e 100644
--- a/sys/contrib/openzfs/module/zfs/bpobj.c
+++ b/sys/contrib/openzfs/module/zfs/bpobj.c
@@ -663,14 +663,13 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
}
VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
- VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
-
if (bpobj_is_empty(&subbpo)) {
/* No point in having an empty subobj. */
bpobj_close(&subbpo);
bpobj_free(bpo->bpo_os, subobj, tx);
return;
}
+ VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
mutex_enter(&bpo->bpo_lock);
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
@@ -780,6 +779,68 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
}
+/*
+ * Prefetch metadata required for bpobj_enqueue_subobj().
+ */
+void
+bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj)
+{
+ dmu_object_info_t doi;
+ bpobj_t subbpo;
+ uint64_t subsubobjs;
+ boolean_t copy_subsub = B_TRUE;
+ boolean_t copy_bps = B_TRUE;
+
+ ASSERT(bpobj_is_open(bpo));
+ ASSERT(subobj != 0);
+
+ if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj)
+ return;
+
+ if (bpobj_open(&subbpo, bpo->bpo_os, subobj) != 0)
+ return;
+ if (bpobj_is_empty(&subbpo)) {
+ bpobj_close(&subbpo);
+ return;
+ }
+ subsubobjs = subbpo.bpo_phys->bpo_subobjs;
+ bpobj_close(&subbpo);
+
+ if (subsubobjs != 0) {
+ if (dmu_object_info(bpo->bpo_os, subsubobjs, &doi) != 0)
+ return;
+ if (doi.doi_max_offset > doi.doi_data_block_size)
+ copy_subsub = B_FALSE;
+ }
+
+ if (dmu_object_info(bpo->bpo_os, subobj, &doi) != 0)
+ return;
+ if (doi.doi_max_offset > doi.doi_data_block_size || !copy_subsub)
+ copy_bps = B_FALSE;
+
+ if (copy_subsub && subsubobjs != 0) {
+ if (bpo->bpo_phys->bpo_subobjs) {
+ dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0,
+ bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1,
+ ZIO_PRIORITY_ASYNC_READ);
+ }
+ dmu_prefetch(bpo->bpo_os, subsubobjs, 0, 0, 1,
+ ZIO_PRIORITY_ASYNC_READ);
+ }
+
+ if (copy_bps) {
+ dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
+ bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), 1,
+ ZIO_PRIORITY_ASYNC_READ);
+ dmu_prefetch(bpo->bpo_os, subobj, 0, 0, 1,
+ ZIO_PRIORITY_ASYNC_READ);
+ } else if (bpo->bpo_phys->bpo_subobjs) {
+ dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0,
+ bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1,
+ ZIO_PRIORITY_ASYNC_READ);
+ }
+}
+
void
bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx)
diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c
index 7f8de23f0e29..f86a0a5b1c57 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_send.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_send.c
@@ -493,6 +493,7 @@ dmu_dump_write(dmu_send_cookie_t *dscp, dmu_object_type_t type, uint64_t object,
(bp != NULL ? BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
io_compressed : lsize != psize);
if (raw || compressed) {
+ ASSERT(bp != NULL);
ASSERT(raw || dscp->dsc_featureflags &
DMU_BACKUP_FEATURE_COMPRESSED);
ASSERT(!BP_IS_EMBEDDED(bp));
@@ -3028,8 +3029,7 @@ dmu_send_estimate_fast(dsl_dataset_t *origds, dsl_dataset_t *fromds,
dsl_dataset_name(origds, dsname);
(void) strcat(dsname, "/");
- (void) strlcat(dsname, recv_clone_name,
- sizeof (dsname) - strlen(dsname));
+ (void) strlcat(dsname, recv_clone_name, sizeof (dsname));
err = dsl_dataset_hold(origds->ds_dir->dd_pool,
dsname, FTAG, &ds);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
index 57a58f88cec5..0584a356f9ac 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -1039,7 +1039,7 @@ dsl_dataset_has_owner(dsl_dataset_t *ds)
return (rv);
}
-static boolean_t
+boolean_t
zfeature_active(spa_feature_t f, void *arg)
{
switch (spa_feature_table[f].fi_type) {
@@ -2121,16 +2121,6 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
}
dmu_objset_sync(ds->ds_objset, zio, tx);
-
- for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
- if (zfeature_active(f, ds->ds_feature_activation[f])) {
- if (zfeature_active(f, ds->ds_feature[f]))
- continue;
- dsl_dataset_activate_feature(ds->ds_object, f,
- ds->ds_feature_activation[f], tx);
- ds->ds_feature[f] = ds->ds_feature_activation[f];
- }
- }
}
/*
@@ -2303,6 +2293,17 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
dmu_buf_rele(ds->ds_dbuf, ds);
+
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ if (zfeature_active(f,
+ ds->ds_feature_activation[f])) {
+ if (zfeature_active(f, ds->ds_feature[f]))
+ continue;
+ dsl_dataset_activate_feature(ds->ds_object, f,
+ ds->ds_feature_activation[f], tx);
+ ds->ds_feature[f] = ds->ds_feature_activation[f];
+ }
+ }
}
int
diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
index 2b33446e66af..d58820701f60 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
@@ -438,6 +438,18 @@ dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
}
}
+/*
+ * Prefetch metadata required for dle_enqueue_subobj().
+ */
+static void
+dle_prefetch_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
+ uint64_t obj)
+{
+ if (dle->dle_bpobj.bpo_object !=
+ dmu_objset_pool(dl->dl_os)->dp_empty_bpobj)
+ bpobj_prefetch_subobj(&dle->dle_bpobj, obj);
+}
+
void
dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx)
@@ -810,6 +822,27 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
dle_enqueue_subobj(dl, dle, obj, tx);
}
+/*
+ * Prefetch metadata required for dsl_deadlist_insert_bpobj().
+ */
+static void
+dsl_deadlist_prefetch_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth)
+{
+ dsl_deadlist_entry_t dle_tofind;
+ dsl_deadlist_entry_t *dle;
+ avl_index_t where;
+
+ ASSERT(MUTEX_HELD(&dl->dl_lock));
+
+ dsl_deadlist_load_tree(dl);
+
+ dle_tofind.dle_mintxg = birth;
+ dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
+ if (dle == NULL)
+ dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
+ dle_prefetch_subobj(dl, dle, obj);
+}
+
static int
dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx)
@@ -826,12 +859,12 @@ dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
void
dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
{
- zap_cursor_t zc;
- zap_attribute_t za;
+ zap_cursor_t zc, pzc;
+ zap_attribute_t za, pza;
dmu_buf_t *bonus;
dsl_deadlist_phys_t *dlp;
dmu_object_info_t doi;
- int error;
+ int error, perror, i;
VERIFY0(dmu_object_info(dl->dl_os, obj, &doi));
if (doi.doi_type == DMU_OT_BPOBJ) {
@@ -843,15 +876,32 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
}
mutex_enter(&dl->dl_lock);
+ /*
+ * Prefetch up to 128 deadlists first and then more as we progress.
+ * The limit is a balance between ARC use and diminishing returns.
+ */
+ for (zap_cursor_init(&pzc, dl->dl_os, obj), i = 0;
+ (perror = zap_cursor_retrieve(&pzc, &pza)) == 0 && i < 128;
+ zap_cursor_advance(&pzc), i++) {
+ dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer,
+ zfs_strtonum(pza.za_name, NULL));
+ }
for (zap_cursor_init(&zc, dl->dl_os, obj);
(error = zap_cursor_retrieve(&zc, &za)) == 0;
zap_cursor_advance(&zc)) {
uint64_t mintxg = zfs_strtonum(za.za_name, NULL);
dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx));
+ if (perror == 0) {
+ dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer,
+ zfs_strtonum(pza.za_name, NULL));
+ zap_cursor_advance(&pzc);
+ perror = zap_cursor_retrieve(&pzc, &pza);
+ }
}
VERIFY3U(error, ==, ENOENT);
zap_cursor_fini(&zc);
+ zap_cursor_fini(&pzc);
VERIFY0(dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
dlp = bonus->db_data;
@@ -869,8 +919,9 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
dmu_tx_t *tx)
{
dsl_deadlist_entry_t dle_tofind;
- dsl_deadlist_entry_t *dle;
+ dsl_deadlist_entry_t *dle, *pdle;
avl_index_t where;
+ int i;
ASSERT(!dl->dl_oldfmt);
@@ -882,11 +933,23 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
if (dle == NULL)
dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER);
+ /*
+ * Prefetch up to 128 deadlists first and then more as we progress.
+ * The limit is a balance between ARC use and diminishing returns.
+ */
+ for (pdle = dle, i = 0; pdle && i < 128; ) {
+ bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object);
+ pdle = AVL_NEXT(&dl->dl_tree, pdle);
+ }
while (dle) {
uint64_t used, comp, uncomp;
dsl_deadlist_entry_t *dle_next;
bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx);
+ if (pdle) {
+ bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object);
+ pdle = AVL_NEXT(&dl->dl_tree, pdle);
+ }
VERIFY0(bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp));
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index f9e437f0c947..f971aa211e0c 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -37,6 +37,7 @@
#include <sys/dmu_tx.h>
#include <sys/dmu_objset.h>
#include <sys/arc.h>
+#include <sys/arc_impl.h>
#include <sys/zap.h>
#include <sys/zio.h>
#include <sys/zfs_context.h>
@@ -126,12 +127,21 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj,
static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg);
static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj);
static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
-static uint64_t dsl_scan_count_data_disks(vdev_t *vd);
+static uint64_t dsl_scan_count_data_disks(spa_t *spa);
extern uint_t zfs_vdev_async_write_active_min_dirty_percent;
static int zfs_scan_blkstats = 0;
/*
+ * 'zpool status' uses bytes processed per pass to report throughput and
+ * estimate time remaining. We define a pass to start when the scanning
+ * phase completes for a sequential resilver. Optionally, this value
+ * may be used to reset the pass statistics every N txgs to provide an
+ * estimated completion time based on currently observed performance.
+ */
+static uint_t zfs_scan_report_txgs = 0;
+
+/*
* By default zfs will check to ensure it is not over the hard memory
* limit before each txg. If finer-grained control of this is needed
* this value can be set to 1 to enable checking before scanning each
@@ -147,7 +157,7 @@ static int zfs_scan_strict_mem_lim = B_FALSE;
* overload the drives with I/O, since that is protected by
* zfs_vdev_scrub_max_active.
*/
-static uint64_t zfs_scan_vdev_limit = 4 << 20;
+static uint64_t zfs_scan_vdev_limit = 16 << 20;
static uint_t zfs_scan_issue_strategy = 0;
@@ -466,11 +476,12 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
/*
* Calculate the max number of in-flight bytes for pool-wide
- * scanning operations (minimum 1MB). Limits for the issuing
- * phase are done per top-level vdev and are handled separately.
+ * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
+ * Limits for the issuing phase are done per top-level vdev and
+ * are handled separately.
*/
- scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
- dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20);
+ scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
+ zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
offsetof(scan_ds_t, sds_node));
@@ -604,6 +615,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
}
spa_scan_stat_init(spa);
+ vdev_scan_stat_init(spa->spa_root_vdev);
+
return (0);
}
@@ -763,6 +776,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
scn->scn_last_checkpoint = 0;
scn->scn_checkpointing = B_FALSE;
spa_scan_stat_init(spa);
+ vdev_scan_stat_init(spa->spa_root_vdev);
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
@@ -2024,6 +2038,26 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
return;
}
+ /*
+ * Check if this block contradicts any filesystem flags.
+ */
+ spa_feature_t f = SPA_FEATURE_LARGE_BLOCKS;
+ if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE)
+ ASSERT(dsl_dataset_feature_is_active(ds, f));
+
+ f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
+ if (f != SPA_FEATURE_NONE)
+ ASSERT(dsl_dataset_feature_is_active(ds, f));
+
+ f = zio_compress_to_feature(BP_GET_COMPRESS(bp));
+ if (f != SPA_FEATURE_NONE)
+ ASSERT(dsl_dataset_feature_is_active(ds, f));
+
+ if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) {
+ scn->scn_lt_min_this_txg++;
+ return;
+ }
+
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) {
scn->scn_lt_min_this_txg++;
return;
@@ -2811,8 +2845,9 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
}
static uint64_t
-dsl_scan_count_data_disks(vdev_t *rvd)
+dsl_scan_count_data_disks(spa_t *spa)
{
+ vdev_t *rvd = spa->spa_root_vdev;
uint64_t i, leaves = 0;
for (i = 0; i < rvd->vdev_children; i++) {
@@ -3653,6 +3688,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
}
/*
+ * Disabled by default, set zfs_scan_report_txgs to report
+ * average performance over the last zfs_scan_report_txgs TXGs.
+ */
+ if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 &&
+ tx->tx_txg % zfs_scan_report_txgs == 0) {
+ scn->scn_issued_before_pass += spa->spa_scan_pass_issued;
+ spa_scan_stat_init(spa);
+ }
+
+ /*
* It is possible to switch from unsorted to sorted at any time,
* but afterwards the scan will remain sorted unless reloaded from
* a checkpoint after a reboot.
@@ -3711,12 +3756,13 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
taskqid_t prefetch_tqid;
/*
- * Recalculate the max number of in-flight bytes for pool-wide
- * scanning operations (minimum 1MB). Limits for the issuing
- * phase are done per top-level vdev and are handled separately.
+ * Calculate the max number of in-flight bytes for pool-wide
+ * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
+ * Limits for the issuing phase are done per top-level vdev and
+ * are handled separately.
*/
- scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
- dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20);
+ scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
+ zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
if (scnp->scn_ddt_bookmark.ddb_class <=
scnp->scn_ddt_class_max) {
@@ -3780,6 +3826,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
if (scn->scn_is_sorted) {
scn->scn_checkpointing = B_TRUE;
scn->scn_clearing = B_TRUE;
+ scn->scn_issued_before_pass +=
+ spa->spa_scan_pass_issued;
+ spa_scan_stat_init(spa);
}
zfs_dbgmsg("scan complete for %s txg %llu",
spa->spa_name,
@@ -4507,5 +4556,8 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_strict_mem_lim, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, UINT, ZMOD_RW,
"Tunable to adjust bias towards more filled segments during scans");
+ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW,
+ "Tunable to report resilver performance over the last N txgs");
+
ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW,
"Process all resilvers immediately");
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index 67b3a03a951a..bbb83fc610b1 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -1714,9 +1714,9 @@ spa_unload(spa_t *spa)
*/
spa_l2cache_drop(spa);
- for (int i = 0; i < spa->spa_spares.sav_count; i++)
- vdev_free(spa->spa_spares.sav_vdevs[i]);
if (spa->spa_spares.sav_vdevs) {
+ for (int i = 0; i < spa->spa_spares.sav_count; i++)
+ vdev_free(spa->spa_spares.sav_vdevs[i]);
kmem_free(spa->spa_spares.sav_vdevs,
spa->spa_spares.sav_count * sizeof (void *));
spa->spa_spares.sav_vdevs = NULL;
@@ -1727,11 +1727,11 @@ spa_unload(spa_t *spa)
}
spa->spa_spares.sav_count = 0;
- for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
- vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
- vdev_free(spa->spa_l2cache.sav_vdevs[i]);
- }
if (spa->spa_l2cache.sav_vdevs) {
+ for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
+ vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
+ vdev_free(spa->spa_l2cache.sav_vdevs[i]);
+ }
kmem_free(spa->spa_l2cache.sav_vdevs,
spa->spa_l2cache.sav_count * sizeof (void *));
spa->spa_l2cache.sav_vdevs = NULL;
@@ -1789,20 +1789,21 @@ spa_load_spares(spa_t *spa)
/*
* First, close and free any existing spare vdevs.
*/
- for (i = 0; i < spa->spa_spares.sav_count; i++) {
- vd = spa->spa_spares.sav_vdevs[i];
+ if (spa->spa_spares.sav_vdevs) {
+ for (i = 0; i < spa->spa_spares.sav_count; i++) {
+ vd = spa->spa_spares.sav_vdevs[i];
- /* Undo the call to spa_activate() below */
- if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
- B_FALSE)) != NULL && tvd->vdev_isspare)
- spa_spare_remove(tvd);
- vdev_close(vd);
- vdev_free(vd);
- }
+ /* Undo the call to spa_activate() below */
+ if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
+ B_FALSE)) != NULL && tvd->vdev_isspare)
+ spa_spare_remove(tvd);
+ vdev_close(vd);
+ vdev_free(vd);
+ }
- if (spa->spa_spares.sav_vdevs)
kmem_free(spa->spa_spares.sav_vdevs,
spa->spa_spares.sav_count * sizeof (void *));
+ }
if (spa->spa_spares.sav_config == NULL)
nspares = 0;
@@ -2013,23 +2014,24 @@ out:
/*
* Purge vdevs that were dropped
*/
- for (i = 0; i < oldnvdevs; i++) {
- uint64_t pool;
+ if (oldvdevs) {
+ for (i = 0; i < oldnvdevs; i++) {
+ uint64_t pool;
- vd = oldvdevs[i];
- if (vd != NULL) {
- ASSERT(vd->vdev_isl2cache);
+ vd = oldvdevs[i];
+ if (vd != NULL) {
+ ASSERT(vd->vdev_isl2cache);
- if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
- pool != 0ULL && l2arc_vdev_present(vd))
- l2arc_remove_vdev(vd);
- vdev_clear_stats(vd);
- vdev_free(vd);
+ if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
+ pool != 0ULL && l2arc_vdev_present(vd))
+ l2arc_remove_vdev(vd);
+ vdev_clear_stats(vd);
+ vdev_free(vd);
+ }
}
- }
- if (oldvdevs)
kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
+ }
for (i = 0; i < sav->sav_count; i++)
nvlist_free(l2cache[i]);
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index 822fd0ee89b6..53763e915ca8 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -2556,7 +2556,6 @@ spa_scan_stat_init(spa_t *spa)
spa->spa_scan_pass_scrub_spent_paused = 0;
spa->spa_scan_pass_exam = 0;
spa->spa_scan_pass_issued = 0;
- vdev_scan_stat_init(spa->spa_root_vdev);
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
index 1f56275c853b..62aa61b3b9e7 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
@@ -34,6 +34,7 @@
#include <sys/zio.h>
#include <sys/dmu_tx.h>
#include <sys/arc.h>
+#include <sys/arc_impl.h>
#include <sys/zap.h>
/*
@@ -116,13 +117,12 @@ static uint64_t zfs_rebuild_max_segment = 1024 * 1024;
* segment size is also large (zfs_rebuild_max_segment=1M). This helps keep
* the queue depth short.
*
- * 32MB was selected as the default value to achieve good performance with
- * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential
- * rebuild was unable to saturate all of the drives using smaller values.
- * With a value of 32MB the sequential resilver write rate was measured at
- * 800MB/s sustained while rebuilding to a distributed spare.
+ * 64MB was observed to deliver the best performance and set as the default.
+ * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c)
+ * and a rebuild rate of 1.2GB/s was measured to the distribute spare.
+ * Smaller values were unable to fully saturate the available pool I/O.
*/
-static uint64_t zfs_rebuild_vdev_limit = 32 << 20;
+static uint64_t zfs_rebuild_vdev_limit = 64 << 20;
/*
* Automatically start a pool scrub when the last active sequential resilver
@@ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg)
{
vdev_t *vd = arg;
spa_t *spa = vd->vdev_spa;
+ vdev_t *rvd = spa->spa_root_vdev;
int error = 0;
/*
@@ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg)
vr->vr_pass_bytes_scanned = 0;
vr->vr_pass_bytes_issued = 0;
- vr->vr_bytes_inflight_max = MAX(1ULL << 20,
- zfs_rebuild_vdev_limit * vd->vdev_children);
-
uint64_t update_est_time = gethrtime();
vdev_rebuild_update_bytes_est(vd, 0);
@@ -805,6 +803,17 @@ vdev_rebuild_thread(void *arg)
vr->vr_scan_msp = msp;
/*
+ * Calculate the max number of in-flight bytes for top-level
+ * vdev scanning operations (minimum 1MB, maximum 1/4 of
+ * arc_c_max shared by all top-level vdevs). Limits for the
+ * issuing phase are done per top-level vdev and are handled
+ * separately.
+ */
+ uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1);
+ vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20,
+ zfs_rebuild_vdev_limit * vd->vdev_children));
+
+ /*
* Removal of vdevs from the vdev tree may eliminate the need
* for the rebuild, in which case it should be canceled. The
* vdev_rebuild_cancel_wanted flag is set until the sync task
diff --git a/sys/contrib/openzfs/module/zfs/zfs_replay.c b/sys/contrib/openzfs/module/zfs/zfs_replay.c
index 0293e46d5858..32be27a8ba6e 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_replay.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_replay.c
@@ -512,9 +512,9 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
*
* The _ATTR versions will grab the fuid info in their subcases.
*/
- if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK &&
- (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR &&
- (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) {
+ if (txtype != TX_SYMLINK &&
+ txtype != TX_MKDIR_ATTR &&
+ txtype != TX_CREATE_ATTR) {
start = (lr + 1);
zfsvfs->z_fuid_replay =
zfs_replay_fuid_domain(start, &start,
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index 0c392b9da0fb..10677d8d9947 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -106,7 +106,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
hole = B_FALSE;
/* Flush any mmap()'d data to disk */
- if (zn_has_cached_data(zp))
+ if (zn_has_cached_data(zp, 0, file_sz - 1))
zn_flush_cached_data(zp, B_FALSE);
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER);
@@ -288,7 +288,8 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
error = mappedread_sf(zp, nbytes, uio);
else
#endif
- if (zn_has_cached_data(zp) && !(ioflag & O_DIRECT)) {
+ if (zn_has_cached_data(zp, zfs_uio_offset(uio),
+ zfs_uio_offset(uio) + nbytes - 1) && !(ioflag & O_DIRECT)) {
error = mappedread(zp, nbytes, uio);
} else {
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
@@ -696,7 +697,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
zfs_uioskip(uio, nbytes);
tx_bytes = nbytes;
}
- if (tx_bytes && zn_has_cached_data(zp) &&
+ if (tx_bytes &&
+ zn_has_cached_data(zp, woff, woff + tx_bytes - 1) &&
!(ioflag & O_DIRECT)) {
update_pages(zp, woff, tx_bytes, zfsvfs->z_os);
}
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index 5d7ed6d582a2..d888a584a93c 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -2778,7 +2778,7 @@ zio_write_gang_member_ready(zio_t *zio)
ASSERT3U(zio->io_prop.zp_copies, ==, gio->io_prop.zp_copies);
ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp));
ASSERT3U(pio->io_prop.zp_copies, <=, BP_GET_NDVAS(pio->io_bp));
- ASSERT3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp));
+ VERIFY3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp));
mutex_enter(&pio->io_lock);
for (int d = 0; d < BP_GET_NDVAS(zio->io_bp); d++) {
@@ -2816,18 +2816,20 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
uint64_t resid = pio->io_size;
uint64_t lsize;
int copies = gio->io_prop.zp_copies;
- int gbh_copies;
zio_prop_t zp;
int error;
boolean_t has_data = !(pio->io_flags & ZIO_FLAG_NODATA);
/*
- * encrypted blocks need DVA[2] free so encrypted gang headers can't
- * have a third copy.
+ * If one copy was requested, store 2 copies of the GBH, so that we
+ * can still traverse all the data (e.g. to free or scrub) even if a
+ * block is damaged. Note that we can't store 3 copies of the GBH in
+ * all cases, e.g. with encryption, which uses DVA[2] for the IV+salt.
*/
- gbh_copies = MIN(copies + 1, spa_max_replication(spa));
- if (BP_IS_ENCRYPTED(bp) && gbh_copies >= SPA_DVAS_PER_BP)
- gbh_copies = SPA_DVAS_PER_BP - 1;
+ int gbh_copies = copies;
+ if (gbh_copies == 1) {
+ gbh_copies = MIN(2, spa_max_replication(spa));
+ }
int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
diff --git a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_double_fast.c b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_double_fast.c
index 27eed66cfedd..4a95c01a090d 100644
--- a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_double_fast.c
+++ b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_double_fast.c
@@ -409,7 +409,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
- & (repIndex > dictStartIndex))
+ & (offset_1 < current+1 - dictStartIndex)) /* note: we are searching at current+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
@@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
- & (repIndex2 > dictStartIndex))
+ & (offset_2 < current2 - dictStartIndex))
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
diff --git a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_fast.c b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_fast.c
index 85a3a7a91e49..17894b85472f 100644
--- a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_fast.c
+++ b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_fast.c
@@ -416,9 +416,9 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const repMatch = repBase + repIndex;
hashTable[h] = current; /* update hash table */
DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
- assert(offset_1 <= current +1); /* check repIndex */
- if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
+ if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
+ & (offset_1 < current+1 - dictStartIndex) ) /* note: we are searching at current+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
@@ -453,7 +453,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
+ if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < current - dictStartIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
diff --git a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c
index 4cf5c88b5325..22d80597ec62 100644
--- a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c
+++ b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c
@@ -975,7 +975,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const U32 repIndex = (U32)(current+1 - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
+ & (offset_1 < current+1 - windowLow) ) /* note: we are searching at current+1 */
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1006,7 +1007,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const U32 repIndex = (U32)(current - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
+ & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1037,7 +1039,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const U32 repIndex = (U32)(current - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
+ & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1083,7 +1086,8 @@ _storeSequence:
const U32 repIndex = repCurrent - offset_2;
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
+ & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
index 251470a67fba..8c538a00d203 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
@@ -3,7 +3,7 @@
# Set the default udev directory based on distribution.
%if %{undefined _udevdir}
-%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler}
+%if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version}%{?openEuler}
%global _udevdir %{_prefix}/lib/udev
%else
%global _udevdir /lib/udev
@@ -12,7 +12,7 @@
# Set the default udevrule directory based on distribution.
%if %{undefined _udevruledir}
-%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler}
+%if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version}%{?openEuler}
%global _udevruledir %{_prefix}/lib/udev/rules.d
%else
%global _udevruledir /lib/udev/rules.d
@@ -21,7 +21,7 @@
# Set the default dracut directory based on distribution.
%if %{undefined _dracutdir}
-%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler}
+%if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version}%{?openEuler}
%global _dracutdir %{_prefix}/lib/dracut
%else
%global _dracutdir %{_prefix}/share/dracut
@@ -110,7 +110,7 @@ BuildRequires: libblkid-devel
BuildRequires: libudev-devel
BuildRequires: libattr-devel
BuildRequires: openssl-devel
-%if 0%{?fedora}%{?openEuler} || 0%{?rhel} >= 8 || 0%{?centos} >= 8
+%if 0%{?fedora}%{?suse_version}%{?openEuler} || 0%{?rhel} >= 8 || 0%{?centos} >= 8
BuildRequires: libtirpc-devel
%endif
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index 005c539fc89d..7a7cf927c77e 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -704,7 +704,7 @@ tags = ['functional', 'nestedfs']
[tests/functional/no_space]
tests = ['enospc_001_pos', 'enospc_002_pos', 'enospc_003_pos',
- 'enospc_df', 'enospc_rm']
+ 'enospc_df', 'enospc_ganging', 'enospc_rm']
tags = ['functional', 'no_space']
[tests/functional/nopwrite]
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in b/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in
index cb453b266f3c..28276ebc47e3 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in
@@ -297,7 +297,7 @@ User: %s
proc = Popen(privcmd, stdout=PIPE, stderr=PIPE)
# Allow a special timeout value of 0 to mean infinity
if int(self.timeout) == 0:
- self.timeout = sys.maxsize
+ self.timeout = sys.maxsize / (10 ** 9)
t = Timer(int(self.timeout), self.kill_cmd, [proc])
try:
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index bbe94f9177ae..ad2ec4670556 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -1539,6 +1539,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/no_space/enospc_002_pos.ksh \
functional/no_space/enospc_003_pos.ksh \
functional/no_space/enospc_df.ksh \
+ functional/no_space/enospc_ganging.ksh \
functional/no_space/enospc_rm.ksh \
functional/no_space/setup.ksh \
functional/online_offline/cleanup.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh
new file mode 100755
index 000000000000..1d35fba5dbfa
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh
@@ -0,0 +1,86 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Exercise gang block IO paths for non-encrypted and encrypted datasets.
+#
+
+verify_runnable "both"
+log_assert "Verify IO when file system is full and ganging."
+
+function cleanup
+{
+ log_must set_tunable64 METASLAB_FORCE_GANGING $metaslab_force_ganging
+ default_cleanup_noexit
+}
+
+log_onexit cleanup
+
+default_setup_noexit $DISKS
+
+typeset metaslab_force_ganging=$(get_tunable METASLAB_FORCE_GANGING)
+shift=$(random_int_between 15 17)
+log_must set_tunable64 METASLAB_FORCE_GANGING $((2**$shift))
+
+keyfile=/$TESTPOOL/keyencfods
+log_must eval "echo 'password' > $keyfile"
+bs=1024k
+count=512
+
+log_must dd if=/dev/urandom of=$TESTDIR/data bs=$bs count=$count
+data_checksum=$(sha256digest $TESTDIR/data)
+
+# Test common large block configuration.
+log_must zfs create -o recordsize=1m -o primarycache=metadata $TESTPOOL/gang
+mntpnt=$(get_prop mountpoint $TESTPOOL/gang)
+
+log_must dd if=$TESTDIR/data of=$mntpnt/file bs=$bs count=$count
+sync_pool $TESTPOOL
+log_must dd if=$mntpnt/file of=$TESTDIR/out bs=$bs count=$count
+out_checksum=$(sha256digest $TESTDIR/out)
+
+if [[ "$data_checksum" != "$out_checksum" ]]; then
+ log_fail "checksum mismatch ($data_checksum != $out_checksum)"
+fi
+
+log_must rm -f $TESTDIR/out
+log_must zfs destroy $TESTPOOL/gang
+
+# Test common large block configuration with encryption.
+log_must zfs create \
+ -o recordsize=1m \
+ -o primarycache=metadata \
+ -o compression=off \
+ -o encryption=on \
+ -o keyformat=passphrase \
+ -o keylocation=file://$keyfile \
+ -o copies=2 \
+ $TESTPOOL/gang
+mntpnt=$(get_prop mountpoint $TESTPOOL/gang)
+
+log_must dd if=$TESTDIR/data of=$mntpnt/file bs=$bs count=$count
+sync_pool $TESTPOOL
+log_must dd if=$mntpnt/file of=$TESTDIR/out bs=$bs count=$count
+out_checksum=$(sha256digest $TESTDIR/out)
+
+if [[ "$data_checksum" != "$out_checksum" ]]; then
+ log_fail "checksum mismatch ($data_checksum != $out_checksum)"
+fi
+
+log_must rm -f $TESTDIR/out
+log_must zfs destroy $TESTPOOL/gang
+
+log_pass "Verified IO when file system is full and ganging."
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index 5c30abf047f4..0d4fea6ea856 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -296,6 +296,9 @@
/* fault_in_iov_iter_readable() is available */
/* #undef HAVE_FAULT_IN_IOV_ITER_READABLE */
+/* filemap_range_has_page() is available */
+/* #undef HAVE_FILEMAP_RANGE_HAS_PAGE */
+
/* fops->aio_fsync() exists */
/* #undef HAVE_FILE_AIO_FSYNC */
@@ -985,7 +988,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g9cd71c860"
+#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g57cfae4a2"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -1015,7 +1018,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "FreeBSD_g9cd71c860"
+#define ZFS_META_RELEASE "FreeBSD_g57cfae4a2"
/* Define the project version. */
#define ZFS_META_VERSION "2.1.99"
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 334a458f9647..0d4020f71888 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.1.99-1706-g9cd71c860"
+#define ZFS_META_GITREV "zfs-2.1.99-1734-g57cfae4a2"