aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2024-02-15 09:21:13 +0000
committerMartin Matuska <mm@FreeBSD.org>2024-02-15 09:22:15 +0000
commite2257b3168fc1697518265527e5d817eca6c43b8 (patch)
tree2cb028ed04bdb43ecf67f4068b37d7dbf075e7d0
parent3733d82c4deb49035a39e18744085d1e3e9b8dc5 (diff)
parente0bd8118d04b55b7adf3d9ba256ad4bb53e66512 (diff)
downloadsrc-e2257b3168fc1697518265527e5d817eca6c43b8.tar.gz
src-e2257b3168fc1697518265527e5d817eca6c43b8.zip
zfs: merge openzfs/zfs@e0bd8118d
Notable upstream pull request merges: #15469 cbe882298 Add slow disk diagnosis to ZED #15857 d0d273320 Update zfs-snapshot.8 #15864 a5a725440 zfs list: add '-t fs' and '-t vol' options #15874 6cc93ccde BRT: Fix slop space calculation with block cloning #15882 a0635ae73 zdb: Fix false leak report for BRT objects Obtained from: OpenZFS OpenZFS commit: e0bd8118d04b55b7adf3d9ba256ad4bb53e66512
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.c11
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/fmd_api.c57
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/fmd_api.h3
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/fmd_serd.c3
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/fmd_serd.h2
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c143
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c3
-rw-r--r--sys/contrib/openzfs/cmd/zfs/zfs_main.c22
-rw-r--r--sys/contrib/openzfs/cmd/zinject/zinject.c16
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_main.c8
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h2
-rw-r--r--sys/contrib/openzfs/include/sys/fm/fs/zfs.h2
-rw-r--r--sys/contrib/openzfs/include/sys/fs/zfs.h2
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_impl.h5
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs.abi4
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_pool.c2
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_util.c4
-rw-r--r--sys/contrib/openzfs/man/man4/spl.418
-rw-r--r--sys/contrib/openzfs/man/man7/vdevprops.74
-rw-r--r--sys/contrib/openzfs/man/man7/zpoolconcepts.74
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-list.811
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-snapshot.816
-rw-r--r--sys/contrib/openzfs/man/man8/zinject.81
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c85
-rw-r--r--sys/contrib/openzfs/module/zcommon/zpool_prop.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c30
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_fm.c26
-rw-r--r--sys/contrib/openzfs/module/zfs/zio_inject.c4
-rw-r--r--sys/contrib/openzfs/tests/runfiles/linux.run3
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/default.cfg.in2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/events/cleanup.ksh4
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh205
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_slow_io_many_vdevs.ksh177
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/cleanup.ksh1
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/setup.ksh1
-rw-r--r--sys/modules/zfs/zfs_config.h7
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
40 files changed, 746 insertions, 157 deletions
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index afdc5a2c8b54..b857e61bd04f 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -7952,6 +7952,17 @@ dump_mos_leaks(spa_t *spa)
}
}
+ if (spa->spa_brt != NULL) {
+ brt_t *brt = spa->spa_brt;
+ for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
+ brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
+ if (brtvd != NULL && brtvd->bv_initiated) {
+ mos_obj_refd(brtvd->bv_mos_brtvdev);
+ mos_obj_refd(brtvd->bv_mos_entries);
+ }
+ }
+ }
+
/*
* Visit all allocated objects and make sure they are referenced.
*/
diff --git a/sys/contrib/openzfs/cmd/zed/agents/fmd_api.c b/sys/contrib/openzfs/cmd/zed/agents/fmd_api.c
index 4a6cfbf8c05c..fe43e2ab971e 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/fmd_api.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/fmd_api.c
@@ -22,6 +22,7 @@
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*
* Copyright (c) 2016, Intel Corporation.
+ * Copyright (c) 2023, Klara Inc.
*/
/*
@@ -231,28 +232,6 @@ fmd_prop_get_int32(fmd_hdl_t *hdl, const char *name)
if (strcmp(name, "spare_on_remove") == 0)
return (1);
- if (strcmp(name, "io_N") == 0 || strcmp(name, "checksum_N") == 0)
- return (10); /* N = 10 events */
-
- return (0);
-}
-
-int64_t
-fmd_prop_get_int64(fmd_hdl_t *hdl, const char *name)
-{
- (void) hdl;
-
- /*
- * These can be looked up in mp->modinfo->fmdi_props
- * For now we just hard code for phase 2. In the
- * future, there can be a ZED based override.
- */
- if (strcmp(name, "remove_timeout") == 0)
- return (15ULL * 1000ULL * 1000ULL * 1000ULL); /* 15 sec */
-
- if (strcmp(name, "io_T") == 0 || strcmp(name, "checksum_T") == 0)
- return (1000ULL * 1000ULL * 1000ULL * 600ULL); /* 10 min */
-
return (0);
}
@@ -535,20 +514,31 @@ fmd_serd_exists(fmd_hdl_t *hdl, const char *name)
return (fmd_serd_eng_lookup(&mp->mod_serds, name) != NULL);
}
-void
-fmd_serd_reset(fmd_hdl_t *hdl, const char *name)
+int
+fmd_serd_active(fmd_hdl_t *hdl, const char *name)
{
fmd_module_t *mp = (fmd_module_t *)hdl;
fmd_serd_eng_t *sgp;
if ((sgp = fmd_serd_eng_lookup(&mp->mod_serds, name)) == NULL) {
zed_log_msg(LOG_ERR, "serd engine '%s' does not exist", name);
- return;
+ return (0);
}
+ return (fmd_serd_eng_fired(sgp) || !fmd_serd_eng_empty(sgp));
+}
- fmd_serd_eng_reset(sgp);
+void
+fmd_serd_reset(fmd_hdl_t *hdl, const char *name)
+{
+ fmd_module_t *mp = (fmd_module_t *)hdl;
+ fmd_serd_eng_t *sgp;
- fmd_hdl_debug(hdl, "serd_reset %s", name);
+ if ((sgp = fmd_serd_eng_lookup(&mp->mod_serds, name)) == NULL) {
+ zed_log_msg(LOG_ERR, "serd engine '%s' does not exist", name);
+ } else {
+ fmd_serd_eng_reset(sgp);
+ fmd_hdl_debug(hdl, "serd_reset %s", name);
+ }
}
int
@@ -556,16 +546,21 @@ fmd_serd_record(fmd_hdl_t *hdl, const char *name, fmd_event_t *ep)
{
fmd_module_t *mp = (fmd_module_t *)hdl;
fmd_serd_eng_t *sgp;
- int err;
if ((sgp = fmd_serd_eng_lookup(&mp->mod_serds, name)) == NULL) {
zed_log_msg(LOG_ERR, "failed to add record to SERD engine '%s'",
name);
return (0);
}
- err = fmd_serd_eng_record(sgp, ep->ev_hrt);
+ return (fmd_serd_eng_record(sgp, ep->ev_hrt));
+}
+
+void
+fmd_serd_gc(fmd_hdl_t *hdl)
+{
+ fmd_module_t *mp = (fmd_module_t *)hdl;
- return (err);
+ fmd_serd_hash_apply(&mp->mod_serds, fmd_serd_eng_gc, NULL);
}
/* FMD Timers */
@@ -579,7 +574,7 @@ _timer_notify(union sigval sv)
const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
struct itimerspec its;
- fmd_hdl_debug(hdl, "timer fired (%p)", ftp->ft_tid);
+ fmd_hdl_debug(hdl, "%s timer fired (%p)", mp->mod_name, ftp->ft_tid);
/* disarm the timer */
memset(&its, 0, sizeof (struct itimerspec));
diff --git a/sys/contrib/openzfs/cmd/zed/agents/fmd_api.h b/sys/contrib/openzfs/cmd/zed/agents/fmd_api.h
index b940d0d395ec..8471feecf33f 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/fmd_api.h
+++ b/sys/contrib/openzfs/cmd/zed/agents/fmd_api.h
@@ -151,7 +151,6 @@ extern void fmd_hdl_vdebug(fmd_hdl_t *, const char *, va_list);
extern void fmd_hdl_debug(fmd_hdl_t *, const char *, ...);
extern int32_t fmd_prop_get_int32(fmd_hdl_t *, const char *);
-extern int64_t fmd_prop_get_int64(fmd_hdl_t *, const char *);
#define FMD_STAT_NOALLOC 0x0 /* fmd should use caller's memory */
#define FMD_STAT_ALLOC 0x1 /* fmd should allocate stats memory */
@@ -195,10 +194,12 @@ extern size_t fmd_buf_size(fmd_hdl_t *, fmd_case_t *, const char *);
extern void fmd_serd_create(fmd_hdl_t *, const char *, uint_t, hrtime_t);
extern void fmd_serd_destroy(fmd_hdl_t *, const char *);
extern int fmd_serd_exists(fmd_hdl_t *, const char *);
+extern int fmd_serd_active(fmd_hdl_t *, const char *);
extern void fmd_serd_reset(fmd_hdl_t *, const char *);
extern int fmd_serd_record(fmd_hdl_t *, const char *, fmd_event_t *);
extern int fmd_serd_fired(fmd_hdl_t *, const char *);
extern int fmd_serd_empty(fmd_hdl_t *, const char *);
+extern void fmd_serd_gc(fmd_hdl_t *);
extern id_t fmd_timer_install(fmd_hdl_t *, void *, fmd_event_t *, hrtime_t);
extern void fmd_timer_remove(fmd_hdl_t *, id_t);
diff --git a/sys/contrib/openzfs/cmd/zed/agents/fmd_serd.c b/sys/contrib/openzfs/cmd/zed/agents/fmd_serd.c
index 0bb2c535f094..f942e62b3f48 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/fmd_serd.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/fmd_serd.c
@@ -310,8 +310,9 @@ fmd_serd_eng_reset(fmd_serd_eng_t *sgp)
}
void
-fmd_serd_eng_gc(fmd_serd_eng_t *sgp)
+fmd_serd_eng_gc(fmd_serd_eng_t *sgp, void *arg)
{
+ (void) arg;
fmd_serd_elem_t *sep, *nep;
hrtime_t hrt;
diff --git a/sys/contrib/openzfs/cmd/zed/agents/fmd_serd.h b/sys/contrib/openzfs/cmd/zed/agents/fmd_serd.h
index 25b6888e61f2..80ff9a3b25b8 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/fmd_serd.h
+++ b/sys/contrib/openzfs/cmd/zed/agents/fmd_serd.h
@@ -77,7 +77,7 @@ extern int fmd_serd_eng_fired(fmd_serd_eng_t *);
extern int fmd_serd_eng_empty(fmd_serd_eng_t *);
extern void fmd_serd_eng_reset(fmd_serd_eng_t *);
-extern void fmd_serd_eng_gc(fmd_serd_eng_t *);
+extern void fmd_serd_eng_gc(fmd_serd_eng_t *, void *);
#ifdef __cplusplus
}
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c
index f6ba334a3ba3..e0ad00800add 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c
@@ -23,6 +23,7 @@
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2016, Intel Corporation.
+ * Copyright (c) 2023, Klara Inc.
*/
#include <stddef.h>
@@ -47,11 +48,16 @@
#define DEFAULT_CHECKSUM_T 600 /* seconds */
#define DEFAULT_IO_N 10 /* events */
#define DEFAULT_IO_T 600 /* seconds */
+#define DEFAULT_SLOW_IO_N 10 /* events */
+#define DEFAULT_SLOW_IO_T 30 /* seconds */
+
+#define CASE_GC_TIMEOUT_SECS 43200 /* 12 hours */
/*
- * Our serd engines are named 'zfs_<pool_guid>_<vdev_guid>_{checksum,io}'. This
- * #define reserves enough space for two 64-bit hex values plus the length of
- * the longest string.
+ * Our serd engines are named in the following format:
+ * 'zfs_<pool_guid>_<vdev_guid>_{checksum,io,slow_io}'
+ * This #define reserves enough space for two 64-bit hex values plus the
+ * length of the longest string.
*/
#define MAX_SERDLEN (16 * 2 + sizeof ("zfs___checksum"))
@@ -68,6 +74,7 @@ typedef struct zfs_case_data {
int zc_pool_state;
char zc_serd_checksum[MAX_SERDLEN];
char zc_serd_io[MAX_SERDLEN];
+ char zc_serd_slow_io[MAX_SERDLEN];
int zc_has_remove_timer;
} zfs_case_data_t;
@@ -114,7 +121,8 @@ zfs_de_stats_t zfs_stats = {
{ "resource_drops", FMD_TYPE_UINT64, "resource related ereports" }
};
-static hrtime_t zfs_remove_timeout;
+/* wait 15 seconds after a removal */
+static hrtime_t zfs_remove_timeout = SEC2NSEC(15);
uu_list_pool_t *zfs_case_pool;
uu_list_t *zfs_cases;
@@ -124,6 +132,8 @@ uu_list_t *zfs_cases;
#define ZFS_MAKE_EREPORT(type) \
FM_EREPORT_CLASS "." ZFS_ERROR_CLASS "." type
+static void zfs_purge_cases(fmd_hdl_t *hdl);
+
/*
* Write out the persistent representation of an active case.
*/
@@ -171,6 +181,42 @@ zfs_case_unserialize(fmd_hdl_t *hdl, fmd_case_t *cp)
}
/*
+ * count other unique slow-io cases in a pool
+ */
+static uint_t
+zfs_other_slow_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case)
+{
+ zfs_case_t *zcp;
+ uint_t cases = 0;
+ static hrtime_t next_check = 0;
+
+ /*
+ * Note that plumbing in some external GC would require adding locking,
+ * since most of this module code is not thread safe and assumes there
+ * is only one thread running against the module. So we perform GC here
+ * inline periodically so that future delay induced faults will be
+ * possible once the issue causing multiple vdev delays is resolved.
+ */
+ if (gethrestime_sec() > next_check) {
+ /* Periodically purge old SERD entries and stale cases */
+ fmd_serd_gc(hdl);
+ zfs_purge_cases(hdl);
+ next_check = gethrestime_sec() + CASE_GC_TIMEOUT_SECS;
+ }
+
+ for (zcp = uu_list_first(zfs_cases); zcp != NULL;
+ zcp = uu_list_next(zfs_cases, zcp)) {
+ if (zcp->zc_data.zc_pool_guid == zfs_case->zc_pool_guid &&
+ zcp->zc_data.zc_vdev_guid != zfs_case->zc_vdev_guid &&
+ zcp->zc_data.zc_serd_slow_io[0] != '\0' &&
+ fmd_serd_active(hdl, zcp->zc_data.zc_serd_slow_io)) {
+ cases++;
+ }
+ }
+ return (cases);
+}
+
+/*
* Iterate over any active cases. If any cases are associated with a pool or
* vdev which is no longer present on the system, close the associated case.
*/
@@ -376,6 +422,14 @@ zfs_serd_name(char *buf, uint64_t pool_guid, uint64_t vdev_guid,
(long long unsigned int)vdev_guid, type);
}
+static void
+zfs_case_retire(fmd_hdl_t *hdl, zfs_case_t *zcp)
+{
+ fmd_hdl_debug(hdl, "retiring case");
+
+ fmd_case_close(hdl, zcp->zc_case);
+}
+
/*
* Solve a given ZFS case. This first checks to make sure the diagnosis is
* still valid, as well as cleaning up any pending timer associated with the
@@ -632,9 +686,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
if (strcmp(class,
ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DATA)) == 0 ||
strcmp(class,
- ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE)) == 0 ||
- strcmp(class,
- ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY)) == 0) {
+ ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE)) == 0) {
zfs_stats.resource_drops.fmds_value.ui64++;
return;
}
@@ -702,6 +754,9 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
if (zcp->zc_data.zc_serd_checksum[0] != '\0')
fmd_serd_reset(hdl,
zcp->zc_data.zc_serd_checksum);
+ if (zcp->zc_data.zc_serd_slow_io[0] != '\0')
+ fmd_serd_reset(hdl,
+ zcp->zc_data.zc_serd_slow_io);
} else if (fmd_nvl_class_match(hdl, nvl,
ZFS_MAKE_RSRC(FM_RESOURCE_STATECHANGE))) {
uint64_t state = 0;
@@ -730,7 +785,11 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
if (fmd_case_solved(hdl, zcp->zc_case))
return;
- fmd_hdl_debug(hdl, "error event '%s'", class);
+ if (vdev_guid)
+ fmd_hdl_debug(hdl, "error event '%s', vdev %llu", class,
+ vdev_guid);
+ else
+ fmd_hdl_debug(hdl, "error event '%s'", class);
/*
* Determine if we should solve the case and generate a fault. We solve
@@ -779,6 +838,8 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
fmd_nvl_class_match(hdl, nvl,
ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO_FAILURE)) ||
fmd_nvl_class_match(hdl, nvl,
+ ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY)) ||
+ fmd_nvl_class_match(hdl, nvl,
ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE))) {
const char *failmode = NULL;
boolean_t checkremove = B_FALSE;
@@ -815,6 +876,51 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
if (fmd_serd_record(hdl, zcp->zc_data.zc_serd_io, ep))
checkremove = B_TRUE;
} else if (fmd_nvl_class_match(hdl, nvl,
+ ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY))) {
+ uint64_t slow_io_n, slow_io_t;
+
+ /*
+ * Create a slow io SERD engine when the VDEV has the
+ * 'vdev_slow_io_n' and 'vdev_slow_io_n' properties.
+ */
+ if (zcp->zc_data.zc_serd_slow_io[0] == '\0' &&
+ nvlist_lookup_uint64(nvl,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N,
+ &slow_io_n) == 0 &&
+ nvlist_lookup_uint64(nvl,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T,
+ &slow_io_t) == 0) {
+ zfs_serd_name(zcp->zc_data.zc_serd_slow_io,
+ pool_guid, vdev_guid, "slow_io");
+ fmd_serd_create(hdl,
+ zcp->zc_data.zc_serd_slow_io,
+ slow_io_n,
+ SEC2NSEC(slow_io_t));
+ zfs_case_serialize(zcp);
+ }
+ /* Pass event to SERD engine and see if this triggers */
+ if (zcp->zc_data.zc_serd_slow_io[0] != '\0' &&
+ fmd_serd_record(hdl, zcp->zc_data.zc_serd_slow_io,
+ ep)) {
+ /*
+ * Ignore a slow io diagnosis when other
+ * VDEVs in the pool show signs of being slow.
+ */
+ if (zfs_other_slow_cases(hdl, &zcp->zc_data)) {
+ zfs_case_retire(hdl, zcp);
+ fmd_hdl_debug(hdl, "pool %llu has "
+ "multiple slow io cases -- skip "
+ "degrading vdev %llu",
+ (u_longlong_t)
+ zcp->zc_data.zc_pool_guid,
+ (u_longlong_t)
+ zcp->zc_data.zc_vdev_guid);
+ } else {
+ zfs_case_solve(hdl, zcp,
+ "fault.fs.zfs.vdev.slow_io");
+ }
+ }
+ } else if (fmd_nvl_class_match(hdl, nvl,
ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) {
/*
* We ignore ereports for checksum errors generated by
@@ -924,6 +1030,8 @@ zfs_fm_close(fmd_hdl_t *hdl, fmd_case_t *cs)
fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_checksum);
if (zcp->zc_data.zc_serd_io[0] != '\0')
fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_io);
+ if (zcp->zc_data.zc_serd_slow_io[0] != '\0')
+ fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_slow_io);
if (zcp->zc_data.zc_has_remove_timer)
fmd_timer_remove(hdl, zcp->zc_remove_timer);
@@ -932,30 +1040,15 @@ zfs_fm_close(fmd_hdl_t *hdl, fmd_case_t *cs)
fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t));
}
-/*
- * We use the fmd gc entry point to look for old cases that no longer apply.
- * This allows us to keep our set of case data small in a long running system.
- */
-static void
-zfs_fm_gc(fmd_hdl_t *hdl)
-{
- zfs_purge_cases(hdl);
-}
-
static const fmd_hdl_ops_t fmd_ops = {
zfs_fm_recv, /* fmdo_recv */
zfs_fm_timeout, /* fmdo_timeout */
zfs_fm_close, /* fmdo_close */
NULL, /* fmdo_stats */
- zfs_fm_gc, /* fmdo_gc */
+ NULL, /* fmdo_gc */
};
static const fmd_prop_t fmd_props[] = {
- { "checksum_N", FMD_TYPE_UINT32, "10" },
- { "checksum_T", FMD_TYPE_TIME, "10min" },
- { "io_N", FMD_TYPE_UINT32, "10" },
- { "io_T", FMD_TYPE_TIME, "10min" },
- { "remove_timeout", FMD_TYPE_TIME, "15sec" },
{ NULL, 0, NULL }
};
@@ -996,8 +1089,6 @@ _zfs_diagnosis_init(fmd_hdl_t *hdl)
(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (zfs_stats) /
sizeof (fmd_stat_t), (fmd_stat_t *)&zfs_stats);
-
- zfs_remove_timeout = fmd_prop_get_int64(hdl, "remove_timeout");
}
void
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c
index a0e377a4a0c8..1ef5c631a438 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c
@@ -524,6 +524,9 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
"fault.fs.zfs.vdev.checksum")) {
degrade_device = B_TRUE;
} else if (fmd_nvl_class_match(hdl, fault,
+ "fault.fs.zfs.vdev.slow_io")) {
+ degrade_device = B_TRUE;
+ } else if (fmd_nvl_class_match(hdl, fault,
"fault.fs.zfs.device")) {
fault_device = B_FALSE;
} else if (fmd_nvl_class_match(hdl, fault, "fault.io.*")) {
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
index d7bee3615dfb..54cc5fec9579 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@@ -3672,15 +3672,25 @@ zfs_do_list(int argc, char **argv)
for (char *tok; (tok = strsep(&optarg, ",")); ) {
static const char *const type_subopts[] = {
- "filesystem", "volume",
- "snapshot", "snap",
+ "filesystem",
+ "fs",
+ "volume",
+ "vol",
+ "snapshot",
+ "snap",
"bookmark",
- "all" };
+ "all"
+ };
static const int type_types[] = {
- ZFS_TYPE_FILESYSTEM, ZFS_TYPE_VOLUME,
- ZFS_TYPE_SNAPSHOT, ZFS_TYPE_SNAPSHOT,
+ ZFS_TYPE_FILESYSTEM,
+ ZFS_TYPE_FILESYSTEM,
+ ZFS_TYPE_VOLUME,
+ ZFS_TYPE_VOLUME,
+ ZFS_TYPE_SNAPSHOT,
+ ZFS_TYPE_SNAPSHOT,
ZFS_TYPE_BOOKMARK,
- ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK };
+ ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK
+ };
for (c = 0; c < ARRAY_SIZE(type_subopts); ++c)
if (strcmp(tok, type_subopts[c]) == 0) {
diff --git a/sys/contrib/openzfs/cmd/zinject/zinject.c b/sys/contrib/openzfs/cmd/zinject/zinject.c
index f1262ed772de..a11b6d0b7fac 100644
--- a/sys/contrib/openzfs/cmd/zinject/zinject.c
+++ b/sys/contrib/openzfs/cmd/zinject/zinject.c
@@ -1083,6 +1083,22 @@ main(int argc, char **argv)
libzfs_fini(g_zfs);
return (1);
}
+
+ if (record.zi_nlanes) {
+ switch (io_type) {
+ case ZIO_TYPE_READ:
+ case ZIO_TYPE_WRITE:
+ case ZIO_TYPES:
+ break;
+ default:
+ (void) fprintf(stderr, "I/O type for a delay "
+ "must be 'read' or 'write'\n");
+ usage();
+ libzfs_fini(g_zfs);
+ return (1);
+ }
+ }
+
if (!error)
error = ENXIO;
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
index 8753d7263914..0783271f4734 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -2569,7 +2569,13 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
break;
case VDEV_AUX_ERR_EXCEEDED:
- (void) printf(gettext("too many errors"));
+ if (vs->vs_read_errors + vs->vs_write_errors +
+ vs->vs_checksum_errors == 0 && children == 0 &&
+ vs->vs_slow_ios > 0) {
+ (void) printf(gettext("too many slow I/Os"));
+ } else {
+ (void) printf(gettext("too many errors"));
+ }
break;
case VDEV_AUX_IO_FAILURE:
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
index aa5860c56e83..b73dab631e04 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
@@ -104,7 +104,7 @@ typedef struct taskq {
/* list node for the cpu hotplug callback */
struct hlist_node tq_hp_cb_node;
boolean_t tq_hp_support;
- unsigned long lastshouldstop; /* when to purge dynamic */
+ unsigned long lastspawnstop; /* when to purge dynamic */
} taskq_t;
typedef struct taskq_ent {
diff --git a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
index fb9e8649221e..c746600cd2d5 100644
--- a/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fm/fs/zfs.h
@@ -82,6 +82,8 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T "vdev_cksum_t"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N "vdev_io_n"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T "vdev_io_t"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N "vdev_slow_io_n"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T "vdev_slow_io_t"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DELAYS "vdev_delays"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type"
diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h
index c6f7dcca78b3..025567e2183f 100644
--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
@@ -366,6 +366,8 @@ typedef enum {
VDEV_PROP_IO_N,
VDEV_PROP_IO_T,
VDEV_PROP_RAIDZ_EXPANDING,
+ VDEV_PROP_SLOW_IO_N,
+ VDEV_PROP_SLOW_IO_T,
VDEV_NUM_PROPS
} vdev_prop_t;
diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h
index dafab66c70f9..f39ebf031cea 100644
--- a/sys/contrib/openzfs/include/sys/vdev_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_impl.h
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2023, Klara Inc.
*/
#ifndef _SYS_VDEV_IMPL_H
@@ -454,12 +455,14 @@ struct vdev {
zfs_ratelimit_t vdev_checksum_rl;
/*
- * Checksum and IO thresholds for tuning ZED
+ * Vdev properties for tuning ZED
*/
uint64_t vdev_checksum_n;
uint64_t vdev_checksum_t;
uint64_t vdev_io_n;
uint64_t vdev_io_t;
+ uint64_t vdev_slow_io_n;
+ uint64_t vdev_slow_io_t;
};
#define VDEV_PAD_SIZE (8 << 10)
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
index 7c39b134d1ef..cdd2f04c2629 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
@@ -5626,7 +5626,9 @@
<enumerator name='VDEV_PROP_IO_N' value='44'/>
<enumerator name='VDEV_PROP_IO_T' value='45'/>
<enumerator name='VDEV_PROP_RAIDZ_EXPANDING' value='46'/>
- <enumerator name='VDEV_NUM_PROPS' value='47'/>
+ <enumerator name='VDEV_PROP_SLOW_IO_N' value='47'/>
+ <enumerator name='VDEV_PROP_SLOW_IO_T' value='48'/>
+ <enumerator name='VDEV_NUM_PROPS' value='49'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
index c7b8617ef35e..402c14a6baee 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
@@ -5264,6 +5264,8 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
case VDEV_PROP_CHECKSUM_T:
case VDEV_PROP_IO_N:
case VDEV_PROP_IO_T:
+ case VDEV_PROP_SLOW_IO_N:
+ case VDEV_PROP_SLOW_IO_T:
if (intval == UINT64_MAX) {
(void) strlcpy(buf, "-", len);
} else {
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
index d0a63a4daa8c..8e70af2e5830 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
@@ -1704,7 +1704,9 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
(prop == VDEV_PROP_CHECKSUM_N ||
prop == VDEV_PROP_CHECKSUM_T ||
prop == VDEV_PROP_IO_N ||
- prop == VDEV_PROP_IO_T)) {
+ prop == VDEV_PROP_IO_T ||
+ prop == VDEV_PROP_SLOW_IO_N ||
+ prop == VDEV_PROP_SLOW_IO_T)) {
*ivalp = UINT64_MAX;
}
diff --git a/sys/contrib/openzfs/man/man4/spl.4 b/sys/contrib/openzfs/man/man4/spl.4
index 414a92394858..5cc12764e18c 100644
--- a/sys/contrib/openzfs/man/man4/spl.4
+++ b/sys/contrib/openzfs/man/man4/spl.4
@@ -186,18 +186,8 @@ reading it could cause a lock-up if the list grow too large
without limiting the output.
"(truncated)" will be shown if the list is larger than the limit.
.
-.It Sy spl_taskq_thread_timeout_ms Ns = Ns Sy 10000 Pq uint
-(Linux-only)
-How long a taskq has to have had no work before we tear it down.
-Previously, we would tear down a dynamic taskq worker as soon
-as we noticed it had no work, but it was observed that this led
-to a lot of churn in tearing down things we then immediately
-spawned anew.
-In practice, it seems any nonzero value will remove the vast
-majority of this churn, while the nontrivially larger value
-was chosen to help filter out the little remaining churn on
-a mostly idle system.
-Setting this value to
-.Sy 0
-will revert to the previous behavior.
+.It Sy spl_taskq_thread_timeout_ms Ns = Ns Sy 5000 Pq uint
+Minimum idle threads exit interval for dynamic taskqs.
+Smaller values allow idle threads exit more often and potentially be
+respawned again on demand, causing more churn.
.El
diff --git a/sys/contrib/openzfs/man/man7/vdevprops.7 b/sys/contrib/openzfs/man/man7/vdevprops.7
index 6eebfa0060de..3d3ebc072915 100644
--- a/sys/contrib/openzfs/man/man7/vdevprops.7
+++ b/sys/contrib/openzfs/man/man7/vdevprops.7
@@ -44,7 +44,7 @@ section, below.
Every vdev has a set of properties that export statistics about the vdev
as well as control various behaviors.
Properties are not inherited from top-level vdevs, with the exception of
-checksum_n, checksum_t, io_n, and io_t.
+checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t.
.Pp
The values of numeric properties can be specified using human-readable suffixes
.Po for example,
@@ -117,7 +117,7 @@ If this device is currently being removed from the pool
.Pp
The following native properties can be used to change the behavior of a vdev.
.Bl -tag -width "allocating"
-.It Sy checksum_n , checksum_t , io_n , io_t
+.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_n , slow_io_t
Tune the fault management daemon by specifying checksum/io thresholds of <N>
errors in <T> seconds, respectively.
These properties can be set on leaf and top-level vdevs.
diff --git a/sys/contrib/openzfs/man/man7/zpoolconcepts.7 b/sys/contrib/openzfs/man/man7/zpoolconcepts.7
index 98f3ee7cd660..18dfca6dc8ac 100644
--- a/sys/contrib/openzfs/man/man7/zpoolconcepts.7
+++ b/sys/contrib/openzfs/man/man7/zpoolconcepts.7
@@ -260,8 +260,8 @@ sufficient replicas exist to continue functioning.
The underlying conditions are as follows:
.Bl -bullet -compact
.It
-The number of checksum errors exceeds acceptable levels and the device is
-degraded as an indication that something may be wrong.
+The number of checksum errors or slow I/Os exceeds acceptable levels and the
+device is degraded as an indication that something may be wrong.
ZFS continues to use the device as necessary.
.It
The number of I/O errors exceeds acceptable levels.
diff --git a/sys/contrib/openzfs/man/man8/zfs-list.8 b/sys/contrib/openzfs/man/man8/zfs-list.8
index 9f6a73ab956d..85bd3fbafced 100644
--- a/sys/contrib/openzfs/man/man8/zfs-list.8
+++ b/sys/contrib/openzfs/man/man8/zfs-list.8
@@ -29,7 +29,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd March 16, 2022
+.Dd February 8, 2024
.Dt ZFS-LIST 8
.Os
.
@@ -155,6 +155,15 @@ or
For example, specifying
.Fl t Sy snapshot
displays only snapshots.
+.Sy fs ,
+.Sy snap ,
+or
+.Sy vol
+can be used as aliases for
+.Sy filesystem ,
+.Sy snapshot ,
+or
+.Sy volume .
.El
.
.Sh EXAMPLES
diff --git a/sys/contrib/openzfs/man/man8/zfs-snapshot.8 b/sys/contrib/openzfs/man/man8/zfs-snapshot.8
index 864c65c7002b..a0c164dd02cb 100644
--- a/sys/contrib/openzfs/man/man8/zfs-snapshot.8
+++ b/sys/contrib/openzfs/man/man8/zfs-snapshot.8
@@ -44,13 +44,21 @@
.Ar dataset Ns @ Ns Ar snapname Ns …
.
.Sh DESCRIPTION
-All previous modifications by successful system calls to the file system are
-part of the snapshots.
-Snapshots are taken atomically, so that all snapshots correspond to the same
-moment in time.
+Creates a snapshot of a dataset or multiple snapshots of different
+datasets.
+.Pp
+Snapshots are created atomically.
+That is, a snapshot is a consistent image of a dataset at a specific
+point in time; it includes all modifications to the dataset made by
+system calls that have successfully completed before that point in time.
+Recursive snapshots created through the
+.Fl r
+option are all created at the same time.
+.Pp
.Nm zfs Cm snap
can be used as an alias for
.Nm zfs Cm snapshot .
+.Pp
See the
.Sx Snapshots
section of
diff --git a/sys/contrib/openzfs/man/man8/zinject.8 b/sys/contrib/openzfs/man/man8/zinject.8
index 4f0bbae81212..b692f12130a8 100644
--- a/sys/contrib/openzfs/man/man8/zinject.8
+++ b/sys/contrib/openzfs/man/man8/zinject.8
@@ -69,6 +69,7 @@ Force a vdev into the DEGRADED or FAULTED state.
.Nm zinject
.Fl d Ar vdev
.Fl D Ar latency : Ns Ar lanes
+.Op Fl T Ar read|write
.Ar pool
.Xc
Add an artificial delay to I/O requests on a particular
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
index 79a1a8e5a5aa..c384b7b378c3 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
@@ -36,12 +36,12 @@ static int spl_taskq_thread_bind = 0;
module_param(spl_taskq_thread_bind, int, 0644);
MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
-static uint_t spl_taskq_thread_timeout_ms = 10000;
+static uint_t spl_taskq_thread_timeout_ms = 5000;
/* BEGIN CSTYLED */
module_param(spl_taskq_thread_timeout_ms, uint, 0644);
/* END CSTYLED */
MODULE_PARM_DESC(spl_taskq_thread_timeout_ms,
- "Time to require a dynamic thread be idle before it gets cleaned up");
+ "Minimum idle threads exit interval for dynamic taskqs");
static int spl_taskq_thread_dynamic = 1;
module_param(spl_taskq_thread_dynamic, int, 0444);
@@ -594,8 +594,7 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
ASSERT(tq->tq_nactive <= tq->tq_nthreads);
if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
/* Dynamic taskq may be able to spawn another thread */
- if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
- taskq_thread_spawn(tq) == 0)
+ if (taskq_thread_spawn(tq) == 0)
goto out;
}
@@ -629,11 +628,11 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
spin_unlock(&t->tqent_lock);
wake_up(&tq->tq_work_waitq);
-out:
+
/* Spawn additional taskq threads if required. */
if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads)
(void) taskq_thread_spawn(tq);
-
+out:
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
return (rc);
}
@@ -676,10 +675,11 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
spin_unlock(&t->tqent_lock);
-out:
+
/* Spawn additional taskq threads if required. */
if (tq->tq_nactive == tq->tq_nthreads)
(void) taskq_thread_spawn(tq);
+out:
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
return (rc);
}
@@ -704,9 +704,8 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
/* Dynamic taskq may be able to spawn another thread */
- if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
- taskq_thread_spawn(tq) == 0)
- goto out2;
+ if (taskq_thread_spawn(tq) == 0)
+ goto out;
flags |= TQ_FRONT;
}
@@ -742,11 +741,11 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
spin_unlock(&t->tqent_lock);
wake_up(&tq->tq_work_waitq);
-out:
+
/* Spawn additional taskq threads if required. */
if (tq->tq_nactive == tq->tq_nthreads)
(void) taskq_thread_spawn(tq);
-out2:
+out:
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
}
EXPORT_SYMBOL(taskq_dispatch_ent);
@@ -825,6 +824,7 @@ taskq_thread_spawn(taskq_t *tq)
if (!(tq->tq_flags & TASKQ_DYNAMIC))
return (0);
+ tq->lastspawnstop = jiffies;
if ((tq->tq_nthreads + tq->tq_nspawn < tq->tq_maxthreads) &&
(tq->tq_flags & TASKQ_ACTIVE)) {
spawning = (++tq->tq_nspawn);
@@ -836,9 +836,9 @@ taskq_thread_spawn(taskq_t *tq)
}
/*
- * Threads in a dynamic taskq should only exit once it has been completely
- * drained and no other threads are actively servicing tasks. This prevents
- * threads from being created and destroyed more than is required.
+ * Threads in a dynamic taskq may exit once there is no more work to do.
+ * To prevent threads from being created and destroyed too often limit
+ * the exit rate to one per spl_taskq_thread_timeout_ms.
*
* The first thread is the thread list is treated as the primary thread.
* There is nothing special about the primary thread but in order to avoid
@@ -847,44 +847,22 @@ taskq_thread_spawn(taskq_t *tq)
static int
taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt)
{
- if (!(tq->tq_flags & TASKQ_DYNAMIC))
+ ASSERT(!taskq_next_ent(tq));
+ if (!(tq->tq_flags & TASKQ_DYNAMIC) || !spl_taskq_thread_dynamic)
return (0);
-
+ if (!(tq->tq_flags & TASKQ_ACTIVE))
+ return (1);
if (list_first_entry(&(tq->tq_thread_list), taskq_thread_t,
tqt_thread_list) == tqt)
return (0);
-
- int no_work =
- ((tq->tq_nspawn == 0) && /* No threads are being spawned */
- (tq->tq_nactive == 0) && /* No threads are handling tasks */
- (tq->tq_nthreads > 1) && /* More than 1 thread is running */
- (!taskq_next_ent(tq)) && /* There are no pending tasks */
- (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
-
- /*
- * If we would have said stop before, let's instead wait a bit, maybe
- * we'll see more work come our way soon...
- */
- if (no_work) {
- /* if it's 0, we want the old behavior. */
- /* if the taskq is being torn down, we also want to go away. */
- if (spl_taskq_thread_timeout_ms == 0 ||
- !(tq->tq_flags & TASKQ_ACTIVE))
- return (1);
- unsigned long lasttime = tq->lastshouldstop;
- if (lasttime > 0) {
- if (time_after(jiffies, lasttime +
- msecs_to_jiffies(spl_taskq_thread_timeout_ms)))
- return (1);
- else
- return (0);
- } else {
- tq->lastshouldstop = jiffies;
- }
- } else {
- tq->lastshouldstop = 0;
- }
- return (0);
+ ASSERT3U(tq->tq_nthreads, >, 1);
+ if (tq->tq_nspawn != 0)
+ return (0);
+ if (time_before(jiffies, tq->lastspawnstop +
+ msecs_to_jiffies(spl_taskq_thread_timeout_ms)))
+ return (0);
+ tq->lastspawnstop = jiffies;
+ return (1);
}
static int
@@ -935,10 +913,8 @@ taskq_thread(void *args)
if (list_empty(&tq->tq_pend_list) &&
list_empty(&tq->tq_prio_list)) {
- if (taskq_thread_should_stop(tq, tqt)) {
- wake_up_all(&tq->tq_wait_waitq);
+ if (taskq_thread_should_stop(tq, tqt))
break;
- }
add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
spin_unlock_irqrestore(&tq->tq_lock, flags);
@@ -1013,9 +989,6 @@ taskq_thread(void *args)
tqt->tqt_id = TASKQID_INVALID;
tqt->tqt_flags = 0;
wake_up_all(&tq->tq_wait_waitq);
- } else {
- if (taskq_thread_should_stop(tq, tqt))
- break;
}
set_current_state(TASK_INTERRUPTIBLE);
@@ -1122,7 +1095,7 @@ taskq_create(const char *name, int threads_arg, pri_t pri,
tq->tq_flags = (flags | TASKQ_ACTIVE);
tq->tq_next_id = TASKQID_INITIAL;
tq->tq_lowest_id = TASKQID_INITIAL;
- tq->lastshouldstop = 0;
+ tq->lastspawnstop = jiffies;
INIT_LIST_HEAD(&tq->tq_free_list);
INIT_LIST_HEAD(&tq->tq_pend_list);
INIT_LIST_HEAD(&tq->tq_prio_list);
diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
index e98063e8bc57..e2e3bf5be69e 100644
--- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
@@ -431,6 +431,12 @@ vdev_prop_init(void)
zprop_register_number(VDEV_PROP_IO_T, "io_t", UINT64_MAX,
PROP_DEFAULT, ZFS_TYPE_VDEV, "<seconds>", "IO_T", B_FALSE,
sfeatures);
+ zprop_register_number(VDEV_PROP_SLOW_IO_N, "slow_io_n", UINT64_MAX,
+ PROP_DEFAULT, ZFS_TYPE_VDEV, "<events>", "SLOW_IO_N", B_FALSE,
+ sfeatures);
+ zprop_register_number(VDEV_PROP_SLOW_IO_T, "slow_io_t", UINT64_MAX,
+ PROP_DEFAULT, ZFS_TYPE_VDEV, "<seconds>", "SLOW_IO_T", B_FALSE,
+ sfeatures);
/* default index (boolean) properties */
zprop_register_index(VDEV_PROP_REMOVING, "removing", 0,
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index 1e5ab59eb4d0..68b907614196 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -1837,7 +1837,8 @@ spa_get_slop_space(spa_t *spa)
* deduplicated data, so since it's not useful to reserve more
* space with more deduplicated data, we subtract that out here.
*/
- space = spa_get_dspace(spa) - spa->spa_dedup_dspace;
+ space =
+ spa_get_dspace(spa) - spa->spa_dedup_dspace - brt_get_dspace(spa);
slop = MIN(space >> spa_slop_shift, spa_max_slop);
/*
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index d6286dc5920b..ebba453e2b14 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -677,6 +677,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
+ vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
+ vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
list_link_init(&vd->vdev_config_dirty_node);
list_link_init(&vd->vdev_state_dirty_node);
@@ -3755,6 +3757,18 @@ vdev_load(vdev_t *vd)
if (error && error != ENOENT)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
+
+ error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
+ &vd->vdev_slow_io_n);
+ if (error && error != ENOENT)
+ vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+ "failed [error=%d]", (u_longlong_t)zapobj, error);
+
+ error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_T,
+ &vd->vdev_slow_io_t);
+ if (error && error != ENOENT)
+ vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+ "failed [error=%d]", (u_longlong_t)zapobj, error);
}
/*
@@ -5970,6 +5984,20 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_io_t = intval;
break;
+ case VDEV_PROP_SLOW_IO_N:
+ if (nvpair_value_uint64(elem, &intval) != 0) {
+ error = EINVAL;
+ break;
+ }
+ vd->vdev_slow_io_n = intval;
+ break;
+ case VDEV_PROP_SLOW_IO_T:
+ if (nvpair_value_uint64(elem, &intval) != 0) {
+ error = EINVAL;
+ break;
+ }
+ vd->vdev_slow_io_t = intval;
+ break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
@@ -6313,6 +6341,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
case VDEV_PROP_CHECKSUM_T:
case VDEV_PROP_IO_N:
case VDEV_PROP_IO_T:
+ case VDEV_PROP_SLOW_IO_N:
+ case VDEV_PROP_SLOW_IO_T:
err = vdev_prop_get_int(vd, prop, &intval);
if (err && err != ENOENT)
break;
diff --git a/sys/contrib/openzfs/module/zfs/zfs_fm.c b/sys/contrib/openzfs/module/zfs/zfs_fm.c
index c4eb74e873db..481af2ba826b 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_fm.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c
@@ -222,6 +222,12 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
case VDEV_PROP_IO_T:
propval = vd->vdev_io_t;
break;
+ case VDEV_PROP_SLOW_IO_N:
+ propval = vd->vdev_slow_io_n;
+ break;
+ case VDEV_PROP_SLOW_IO_T:
+ propval = vd->vdev_slow_io_t;
+ break;
default:
propval = propdef;
break;
@@ -741,6 +747,26 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
NULL);
}
+ if (vd != NULL && strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
+ uint64_t slow_io_n, slow_io_t;
+
+ slow_io_n = vdev_prop_get_inherited(vd, VDEV_PROP_SLOW_IO_N);
+ if (slow_io_n != vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N))
+ fm_payload_set(ereport,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N,
+ DATA_TYPE_UINT64,
+ slow_io_n,
+ NULL);
+
+ slow_io_t = vdev_prop_get_inherited(vd, VDEV_PROP_SLOW_IO_T);
+ if (slow_io_t != vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T))
+ fm_payload_set(ereport,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T,
+ DATA_TYPE_UINT64,
+ slow_io_t,
+ NULL);
+ }
+
mutex_exit(&spa->spa_errlist_lock);
*ereport_out = ereport;
diff --git a/sys/contrib/openzfs/module/zfs/zio_inject.c b/sys/contrib/openzfs/module/zfs/zio_inject.c
index 3598351c499d..609182f4a2cd 100644
--- a/sys/contrib/openzfs/module/zfs/zio_inject.c
+++ b/sys/contrib/openzfs/module/zfs/zio_inject.c
@@ -605,6 +605,10 @@ zio_handle_io_delay(zio_t *zio)
if (vd->vdev_guid != handler->zi_record.zi_guid)
continue;
+ if (handler->zi_record.zi_iotype != ZIO_TYPES &&
+ handler->zi_record.zi_iotype != zio->io_type)
+ continue;
+
/*
* Defensive; should never happen as the array allocation
* occurs prior to inserting this handler on the list.
diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run
index 6a4cd3fe691c..a0b74ef4a8c6 100644
--- a/sys/contrib/openzfs/tests/runfiles/linux.run
+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
@@ -104,7 +104,8 @@ tags = ['functional', 'devices']
[tests/functional/events:Linux]
tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill',
- 'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config']
+ 'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config',
+ 'zed_slow_io', 'zed_slow_io_many_vdevs']
tags = ['functional', 'events']
[tests/functional/fadvise:Linux]
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/default.cfg.in b/sys/contrib/openzfs/tests/zfs-tests/include/default.cfg.in
index 3aed8ebea03a..db70a02a7828 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/default.cfg.in
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/default.cfg.in
@@ -80,7 +80,7 @@ export TESTPOOL=testpool
export TESTPOOL1=testpool1
export TESTPOOL2=testpool2
export TESTPOOL3=testpool3
-export PERFPOOL=perfpool
+export PERFPOOL=${PERFPOOL:-perfpool}
# some test file system names
export TESTFS=testfs
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index 01af258d59fe..fe9c92108725 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -1447,6 +1447,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/events/zed_fd_spill.ksh \
functional/events/zed_io_config.ksh \
functional/events/zed_rc_filter.ksh \
+ functional/events/zed_slow_io.ksh \
+ functional/events/zed_slow_io_many_vdevs.ksh \
functional/exec/cleanup.ksh \
functional/exec/exec_001_pos.ksh \
functional/exec/exec_002_neg.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
index 71a64d4fae7a..c3b9efd6464a 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
@@ -70,4 +70,6 @@ typeset -a properties=(
checksum_t
io_n
io_t
+ slow_io_n
+ slow_io_t
)
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/cleanup.ksh
index ef6e098cf42a..669b8ae99456 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/cleanup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/cleanup.ksh
@@ -26,8 +26,10 @@
. $STF_SUITE/include/libtest.shlib
+zed_stop
+
zed_cleanup all-debug.sh all-syslog.sh all-dumpfds
-zed_stop
+zed_events_drain
default_cleanup
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh
new file mode 100755
index 000000000000..d9fabb2c3bc9
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh
@@ -0,0 +1,205 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2023, Klara Inc.
+#
+
+# DESCRIPTION:
+# Verify that vdev properties, slow_io_n and slow_io_t, work with ZED.
+#
+# STRATEGY:
+# 1. Create a pool with single vdev
+# 2. Set slow_io_n/slow_io_t to non-default values
+# 3. Inject slow io errors
+# 4. Verify that ZED degrades vdev
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+TESTDIR="$TEST_BASE_DIR/zed_slow_io"
+VDEV="$TEST_BASE_DIR/vdevfile.$$"
+TESTPOOL="slow_io_pool"
+FILEPATH="$TESTDIR/slow_io.testfile"
+
+OLD_SLOW_IO=$(get_tunable ZIO_SLOW_IO_MS)
+OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
+
+verify_runnable "both"
+
+function do_setup
+{
+ log_must truncate -s 1G $VDEV
+ default_setup_noexit $VDEV
+ zed_events_drain
+ log_must zfs set compression=off $TESTPOOL
+ log_must zfs set primarycache=none $TESTPOOL
+ log_must zfs set prefetch=none $TESTPOOL
+ log_must zfs set recordsize=512 $TESTPOOL
+ for i in {1..10}; do
+ dd if=/dev/urandom of=${FILEPATH}$i bs=512 count=1 2>/dev/null
+ done
+ zpool sync
+}
+
+# intermediate cleanup
+function do_clean
+{
+ log_must zinject -c all
+ log_must zpool destroy $TESTPOOL
+ log_must rm -f $VDEV
+}
+
+# final cleanup
+function cleanup
+{
+ log_must zinject -c all
+
+ # if pool still exists then something failed so log additional info
+ if poolexists $TESTPOOL ; then
+ log_note "$(zpool status -s $TESTPOOL)"
+ echo "=================== zed log search ==================="
+ grep "Diagnosis Engine" $ZEDLET_DIR/zed.log
+ destroy_pool $TESTPOOL
+ fi
+ log_must zed_stop
+
+ log_must rm -f $VDEV
+
+ log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
+ log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
+}
+
+function start_slow_io
+{
+ zpool sync
+ log_must set_tunable64 ZIO_SLOW_IO_MS 10
+ log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1000
+
+ log_must zinject -d $VDEV -D10:1 -T read $TESTPOOL
+ zpool sync
+}
+
+function stop_slow_io
+{
+ log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
+ log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
+
+ log_must zinject -c all
+}
+
+# Test default ZED settings:
+# inject 10 events over 2.5 seconds, should not degrade.
+function default_degrade
+{
+ do_setup
+
+ start_slow_io
+ for i in {1..10}; do
+ dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
+ sleep 0.25
+ done
+ stop_slow_io
+ log_note "$(zpool status -s $TESTPOOL)"
+
+ # give slow ZED a chance to process the delay events
+ sleep 18
+ log_note "$(zpool status -s $TESTPOOL)"
+
+ degrades=$(grep "zpool_vdev_degrade" $ZEDLET_DIR/zed.log | wc -l)
+ log_note $degrades vdev degrades in ZED log
+ [ $degrades -eq "0" ] || \
+ log_fail "expecting no degrade events, found $degrades"
+
+ do_clean
+}
+
+# change slow_io_n, slow_io_t to 5 events in 60 seconds
+# fire more than 5 events, should degrade
+function slow_io_degrade
+{
+ do_setup
+
+ zpool set slow_io_n=5 $TESTPOOL $VDEV
+ zpool set slow_io_t=60 $TESTPOOL $VDEV
+
+ start_slow_io
+ for i in {1..16}; do
+ dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
+ sleep 0.5
+ done
+ stop_slow_io
+ zpool sync
+
+ #
+ # wait up to 60 seconds for kernel to produce at least 5 delay events
+ #
+ typeset -i i=0
+ typeset -i events=0
+ while [[ $i -lt 60 ]]; do
+ events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
+ [[ $events -ge "5" ]] && break
+ i=$((i+1))
+ sleep 1
+ done
+ log_note "$events delay events found"
+
+ if [[ $events -ge "5" ]]; then
+ log_must wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 10
+ fi
+
+ do_clean
+}
+
+# change slow_io_n, slow_io_t to 10 events in 1 second
+# inject events spaced 0.5 seconds apart, should not degrade
+function slow_io_no_degrade
+{
+ do_setup
+
+ zpool set slow_io_n=10 $TESTPOOL $VDEV
+ zpool set slow_io_t=1 $TESTPOOL $VDEV
+
+ start_slow_io
+ for i in {1..16}; do
+ dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
+ sleep 0.5
+ done
+ stop_slow_io
+ zpool sync
+
+ log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45
+
+ do_clean
+}
+
+log_assert "Test ZED slow io configurability"
+log_onexit cleanup
+
+log_must zed_events_drain
+log_must zed_start
+
+default_degrade
+slow_io_degrade
+slow_io_no_degrade
+
+log_pass "Test ZED slow io configurability"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_slow_io_many_vdevs.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_slow_io_many_vdevs.ksh
new file mode 100755
index 000000000000..3357ae2e3510
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/events/zed_slow_io_many_vdevs.ksh
@@ -0,0 +1,177 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2023, Klara Inc.
+#
+
+# DESCRIPTION:
+# Verify that delay events from multiple vdevs doesnt degrade
+#
+# STRATEGY:
+# 1. Create a pool with a 3 disk raidz vdev
+# 2. Inject slow io errors
+# 3. Verify that ZED detects slow I/Os but doesn't degrade any vdevs
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+TESTDIR="$TEST_BASE_DIR/zed_slow_io"
+VDEV1="$TEST_BASE_DIR/vdevfile1.$$"
+VDEV2="$TEST_BASE_DIR/vdevfile2.$$"
+VDEV3="$TEST_BASE_DIR/vdevfile3.$$"
+VDEV4="$TEST_BASE_DIR/vdevfile4.$$"
+VDEVS="$VDEV1 $VDEV2 $VDEV3 $VDEV4"
+TESTPOOL="slow_io_pool"
+FILEPATH="$TESTDIR/slow_io.testfile"
+
+OLD_SLOW_IO=$(get_tunable ZIO_SLOW_IO_MS)
+OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
+
+verify_runnable "both"
+
+function cleanup
+{
+ log_must zinject -c all
+
+ # if pool still exists then something failed so log additional info
+ if poolexists $TESTPOOL ; then
+ log_note "$(zpool status -s $TESTPOOL)"
+ echo "=================== zed log search ==================="
+ grep "Diagnosis Engine" $ZEDLET_DIR/zed.log
+ destroy_pool $TESTPOOL
+ fi
+ log_must zed_stop
+
+ log_must rm -f $VDEVS
+ log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
+ log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
+}
+
+function start_slow_io
+{
+ for vdev in $VDEVS
+ do
+ log_must zpool set slow_io_n=4 $TESTPOOL $vdev
+ log_must zpool set slow_io_t=60 $TESTPOOL $vdev
+ done
+ zpool sync
+
+ log_must set_tunable64 ZIO_SLOW_IO_MS 10
+ log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1000
+
+ for vdev in $VDEVS
+ do
+ log_must zinject -d $vdev -D10:1 $TESTPOOL
+ done
+ zpool sync
+}
+
+function stop_slow_io
+{
+ log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
+ log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
+
+ log_must zinject -c all
+}
+
+function multiple_slow_vdevs_test
+{
+ log_must truncate -s 1G $VDEVS
+ default_raidz_setup_noexit $VDEVS
+
+ log_must zpool events -c
+ log_must zfs set compression=off $TESTPOOL
+ log_must zfs set primarycache=none $TESTPOOL
+ log_must zfs set recordsize=4K $TESTPOOL
+
+ log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=20
+ zpool sync
+
+ #
+ # Read the file with slow io injected on the disks
+ # This will cause multiple errors on each disk to trip ZED SERD
+ #
+ # pool: slow_io_pool
+ # state: ONLINE
+ # config:
+ #
+ # NAME STATE READ WRITE CKSUM SLOW
+ # slow_io_pool ONLINE 0 0 0 -
+ # raidz1-0 ONLINE 0 0 0 -
+ # /var/tmp/vdevfile1.499278 ONLINE 0 0 0 113
+ # /var/tmp/vdevfile2.499278 ONLINE 0 0 0 109
+ # /var/tmp/vdevfile3.499278 ONLINE 0 0 0 96
+ # /var/tmp/vdevfile4.499278 ONLINE 0 0 0 109
+ #
+ start_slow_io
+ dd if=$FILEPATH of=/dev/null bs=1M count=20 2>/dev/null
+ stop_slow_io
+
+ # count events available for processing
+ typeset -i i=0
+ typeset -i events=0
+ while [[ $i -lt 60 ]]; do
+ events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
+ [[ $events -ge "50" ]] && break
+ i=$((i+1))
+ sleep 1
+ done
+ log_note "$events delay events found"
+ if [[ $events -lt "50" ]]; then
+ log_note "bailing: not enough events to complete the test"
+ destroy_pool $TESTPOOL
+ return
+ fi
+
+ #
+ # give slow ZED a chance to process the delay events
+ #
+ typeset -i i=0
+ typeset -i skips=0
+ while [[ $i -lt 75 ]]; do
+ skips=$(grep "retiring case" \
+ $ZEDLET_DIR/zed.log | wc -l)
+ [[ $skips -gt "0" ]] && break
+ i=$((i+1))
+ sleep 1
+ done
+
+ log_note $skips degrade skips in ZED log after $i seconds
+ [ $skips -gt "0" ] || log_fail "expecting to see skips"
+
+ degrades=$(grep "zpool_vdev_degrade" $ZEDLET_DIR/zed.log | wc -l)
+ log_note $degrades vdev degrades in ZED log
+ [ $degrades -eq "0" ] || \
+ log_fail "expecting no degrade events, found $degrades"
+
+ destroy_pool $TESTPOOL
+}
+
+log_assert "Test ZED slow io across multiple vdevs"
+log_onexit cleanup
+
+log_must zed_events_drain
+log_must zed_start
+multiple_slow_vdevs_test
+
+log_pass "Test ZED slow io across multiple vdevs"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/cleanup.ksh
index 654343c0cf00..2959236b59a3 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/cleanup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/cleanup.ksh
@@ -32,5 +32,6 @@ cleanup_devices $DISKS
zed_stop
zed_cleanup resilver_finish-start-scrub.sh
+zed_events_drain
log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/setup.ksh
index 62f1c8ab56cb..61b9206ec1a6 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/setup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/setup.ksh
@@ -28,6 +28,7 @@
verify_runnable "global"
+zed_events_drain
zed_setup resilver_finish-start-scrub.sh
zed_start
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index f452cffa20c8..5a3e421ab67e 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -162,6 +162,9 @@
/* blkdev_issue_discard() is available */
/* #undef HAVE_BLKDEV_ISSUE_DISCARD */
+/* __blkdev_issue_discard() is available */
+/* #undef HAVE_BLKDEV_ISSUE_DISCARD_ASYNC */
+
/* blkdev_issue_secure_erase() is available */
/* #undef HAVE_BLKDEV_ISSUE_SECURE_ERASE */
@@ -1152,7 +1155,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.2.99-338-FreeBSD_g229b9f4ed"
+#define ZFS_META_ALIAS "zfs-2.2.99-345-FreeBSD_ge0bd8118d"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -1182,7 +1185,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "338-FreeBSD_g229b9f4ed"
+#define ZFS_META_RELEASE "345-FreeBSD_ge0bd8118d"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 04ced657e728..9370fe080d71 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.2.99-338-g229b9f4ed"
+#define ZFS_META_GITREV "zfs-2.2.99-345-ge0bd8118d"