76 files changed, 2470 insertions, 1211 deletions
diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c
index 826928e67350..bf9b13c30509 100644
--- a/sys/contrib/openzfs/module/zfs/abd.c
+++ b/sys/contrib/openzfs/module/zfs/abd.c
@@ -563,7 +563,7 @@ abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size)
 			left -= csize;
 			off = 0;
 		}
-		ASSERT3U(left, ==, 0);
+		ASSERT0(left);
 	} else {
 		abd = abd_get_offset_scatter(abd, sabd, off, size);
 	}
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 04ca32356a6d..df41e3b49204 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -1052,7 +1052,7 @@ static arc_buf_hdr_t *
 buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp)
 {
 	const dva_t *dva = BP_IDENTITY(bp);
-	uint64_t birth = BP_GET_BIRTH(bp);
+	uint64_t birth = BP_GET_PHYSICAL_BIRTH(bp);
 	uint64_t idx = BUF_HASH_INDEX(spa, dva, birth);
 	kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
 	arc_buf_hdr_t *hdr;
@@ -2239,8 +2239,8 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
 	ASSERT(HDR_HAS_L1HDR(hdr));
 
 	if (GHOST_STATE(state)) {
-		ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
-		ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+		ASSERT0P(hdr->b_l1hdr.b_buf);
+		ASSERT0P(hdr->b_l1hdr.b_pabd);
 		ASSERT(!HDR_HAS_RABD(hdr));
 		(void) zfs_refcount_add_many(&state->arcs_esize[type],
 		    HDR_GET_LSIZE(hdr), hdr);
@@ -2278,8 +2278,8 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
 	ASSERT(HDR_HAS_L1HDR(hdr));
 
 	if (GHOST_STATE(state)) {
-		ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
-		ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+		ASSERT0P(hdr->b_l1hdr.b_buf);
+		ASSERT0P(hdr->b_l1hdr.b_pabd);
 		ASSERT(!HDR_HAS_RABD(hdr));
 		(void) zfs_refcount_remove_many(&state->arcs_esize[type],
 		    HDR_GET_LSIZE(hdr), hdr);
@@ -2319,7 +2319,7 @@ add_reference(arc_buf_hdr_t *hdr, const void *tag)
 	if (!HDR_EMPTY(hdr) && !MUTEX_HELD(HDR_LOCK(hdr))) {
 		ASSERT(state == arc_anon);
 		ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
-		ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+		ASSERT0P(hdr->b_l1hdr.b_buf);
 	}
 
 	if ((zfs_refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) &&
@@ -2503,7 +2503,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
 			(void) zfs_refcount_add_many(
 			    &new_state->arcs_size[type],
 			    HDR_GET_LSIZE(hdr), hdr);
-			ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+			ASSERT0P(hdr->b_l1hdr.b_pabd);
 			ASSERT(!HDR_HAS_RABD(hdr));
 		} else {
 
@@ -2547,7 +2547,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
 	if (update_old && old_state != arc_l2c_only) {
 		ASSERT(HDR_HAS_L1HDR(hdr));
 		if (GHOST_STATE(old_state)) {
-			ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+			ASSERT0P(hdr->b_l1hdr.b_pabd);
 			ASSERT(!HDR_HAS_RABD(hdr));
 
 			/*
@@ -2631,7 +2631,7 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
 		ARCSTAT_INCR(arcstat_bonus_size, space);
 		break;
 	case ARC_SPACE_DNODE:
-		ARCSTAT_INCR(arcstat_dnode_size, space);
+		aggsum_add(&arc_sums.arcstat_dnode_size, space);
 		break;
 	case ARC_SPACE_DBUF:
 		ARCSTAT_INCR(arcstat_dbuf_size, space);
@@ -2677,7 +2677,7 @@ arc_space_return(uint64_t space, arc_space_type_t type)
 		ARCSTAT_INCR(arcstat_bonus_size, -space);
 		break;
 	case ARC_SPACE_DNODE:
-		ARCSTAT_INCR(arcstat_dnode_size, -space);
+		aggsum_add(&arc_sums.arcstat_dnode_size, -space);
 		break;
 	case ARC_SPACE_DBUF:
 		ARCSTAT_INCR(arcstat_dbuf_size, -space);
@@ -2758,7 +2758,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
 	VERIFY(hdr->b_type == ARC_BUFC_DATA ||
 	    hdr->b_type == ARC_BUFC_METADATA);
 	ASSERT3P(ret, !=, NULL);
-	ASSERT3P(*ret, ==, NULL);
+	ASSERT0P(*ret);
 	IMPLY(encrypted, compressed);
 
 	buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
@@ -2982,7 +2982,7 @@ static void
 arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
 {
 	ASSERT(arc_can_share(hdr, buf));
-	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+	ASSERT0P(hdr->b_l1hdr.b_pabd);
 	ASSERT(!ARC_BUF_ENCRYPTED(buf));
 	ASSERT(HDR_EMPTY_OR_LOCKED(hdr));
 
@@ -3201,14 +3201,14 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
 
 	if (alloc_rdata) {
 		size = HDR_GET_PSIZE(hdr);
-		ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
+		ASSERT0P(hdr->b_crypt_hdr.b_rabd);
 		hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr,
 		    alloc_flags);
 		ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
 		ARCSTAT_INCR(arcstat_raw_size, size);
 	} else {
 		size = arc_hdr_size(hdr);
-		ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+		ASSERT0P(hdr->b_l1hdr.b_pabd);
 		hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr,
 		    alloc_flags);
 		ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
@@ -3290,7 +3290,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
 
 	ASSERT(HDR_EMPTY(hdr));
 #ifdef ZFS_DEBUG
-	ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+	ASSERT0P(hdr->b_l1hdr.b_freeze_cksum);
 #endif
 	HDR_SET_PSIZE(hdr, psize);
 	HDR_SET_LSIZE(hdr, lsize);
@@ -3351,12 +3351,12 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
 		nhdr->b_l1hdr.b_state = arc_l2c_only;
 
 		/* Verify previous threads set to NULL before freeing */
-		ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL);
+		ASSERT0P(nhdr->b_l1hdr.b_pabd);
 		ASSERT(!HDR_HAS_RABD(hdr));
 	} else {
-		ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+		ASSERT0P(hdr->b_l1hdr.b_buf);
 #ifdef ZFS_DEBUG
-		ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+		ASSERT0P(hdr->b_l1hdr.b_freeze_cksum);
 #endif
 
 		/*
@@ -3375,7 +3375,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
 		 * might try to be accessed, even though it was removed.
 		 */
 		VERIFY(!HDR_L2_WRITING(hdr));
-		VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+		VERIFY0P(hdr->b_l1hdr.b_pabd);
 		ASSERT(!HDR_HAS_RABD(hdr));
 
 		arc_hdr_clear_flags(nhdr, ARC_FLAG_HAS_L1HDR);
@@ -3698,12 +3698,12 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
 			arc_hdr_free_abd(hdr, B_TRUE);
 	}
 
-	ASSERT3P(hdr->b_hash_next, ==, NULL);
+	ASSERT0P(hdr->b_hash_next);
 	if (HDR_HAS_L1HDR(hdr)) {
 		ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
-		ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+		ASSERT0P(hdr->b_l1hdr.b_acb);
 #ifdef ZFS_DEBUG
-		ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+		ASSERT0P(hdr->b_l1hdr.b_freeze_cksum);
 #endif
 		kmem_cache_free(hdr_full_cache, hdr);
 	} else {
@@ -3771,7 +3771,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
 	ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
 	ASSERT(HDR_HAS_L1HDR(hdr));
 	ASSERT(!HDR_IO_IN_PROGRESS(hdr));
-	ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+	ASSERT0P(hdr->b_l1hdr.b_buf);
 	ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
 
 	*real_evicted = 0;
@@ -3796,7 +3796,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
 		DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr);
 
 		if (HDR_HAS_L2HDR(hdr)) {
-			ASSERT(hdr->b_l1hdr.b_pabd == NULL);
+			ASSERT0P(hdr->b_l1hdr.b_pabd);
 			ASSERT(!HDR_HAS_RABD(hdr));
 			/*
 			 * This buffer is cached on the 2nd Level ARC;
@@ -4490,7 +4490,7 @@ arc_evict(void)
 	 * target is not evictable or if they go over arc_dnode_limit.
 	 */
 	int64_t prune = 0;
-	int64_t dn = wmsum_value(&arc_sums.arcstat_dnode_size);
+	int64_t dn = aggsum_value(&arc_sums.arcstat_dnode_size);
 	int64_t nem = zfs_refcount_count(&arc_mru->arcs_size[ARC_BUFC_METADATA])
 	    + zfs_refcount_count(&arc_mfu->arcs_size[ARC_BUFC_METADATA])
 	    - zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA])
@@ -5082,11 +5082,13 @@ arc_is_overflowing(boolean_t lax, boolean_t use_reserve)
 	 * in the ARC. In practice, that's in the tens of MB, which is low
 	 * enough to be safe.
 	 */
-	int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) - arc_c -
+	int64_t arc_over = aggsum_lower_bound(&arc_sums.arcstat_size) - arc_c -
 	    zfs_max_recordsize;
+	int64_t dn_over = aggsum_lower_bound(&arc_sums.arcstat_dnode_size) -
+	    arc_dnode_limit;
 
 	/* Always allow at least one block of overflow. */
-	if (over < 0)
+	if (arc_over < 0 && dn_over <= 0)
 		return (ARC_OVF_NONE);
 
 	/* If we are under memory pressure, report severe overflow. */
@@ -5097,7 +5099,7 @@ arc_is_overflowing(boolean_t lax, boolean_t use_reserve)
 	int64_t overflow = (arc_c >> zfs_arc_overflow_shift) / 2;
 	if (use_reserve)
 		overflow *= 3;
-	return (over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE);
+	return (arc_over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE);
 }
 
 static abd_t *
@@ -5552,7 +5554,7 @@ static void
 arc_hdr_verify(arc_buf_hdr_t *hdr, blkptr_t *bp)
 {
 	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) {
-		ASSERT3U(HDR_GET_PSIZE(hdr), ==, 0);
+		ASSERT0(HDR_GET_PSIZE(hdr));
 		ASSERT3U(arc_hdr_get_compress(hdr), ==, ZIO_COMPRESS_OFF);
 	} else {
 		if (HDR_COMPRESSION_ENABLED(hdr)) {
@@ -5585,7 +5587,7 @@ arc_read_done(zio_t *zio)
 	if (HDR_IN_HASH_TABLE(hdr)) {
 		arc_buf_hdr_t *found;
 
-		ASSERT3U(hdr->b_birth, ==, BP_GET_BIRTH(zio->io_bp));
+		ASSERT3U(hdr->b_birth, ==, BP_GET_PHYSICAL_BIRTH(zio->io_bp));
 		ASSERT3U(hdr->b_dva.dva_word[0], ==,
 		    BP_IDENTITY(zio->io_bp)->dva_word[0]);
 		ASSERT3U(hdr->b_dva.dva_word[1], ==,
@@ -5688,7 +5690,7 @@ arc_read_done(zio_t *zio)
 			error = SET_ERROR(EIO);
 			if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
 				spa_log_error(zio->io_spa, &acb->acb_zb,
-				    BP_GET_LOGICAL_BIRTH(zio->io_bp));
+				    BP_GET_PHYSICAL_BIRTH(zio->io_bp));
 				(void) zfs_ereport_post(
 				    FM_EREPORT_ZFS_AUTHENTICATION,
 				    zio->io_spa, NULL, &acb->acb_zb, zio, 0);
@@ -6107,7 +6109,7 @@ top:
 
 			if (!embedded_bp) {
 				hdr->b_dva = *BP_IDENTITY(bp);
-				hdr->b_birth = BP_GET_BIRTH(bp);
+				hdr->b_birth = BP_GET_PHYSICAL_BIRTH(bp);
 				exists = buf_hash_insert(hdr, &hash_lock);
 			}
 			if (exists != NULL) {
@@ -6130,14 +6132,14 @@ top:
 			}
 
 			if (GHOST_STATE(hdr->b_l1hdr.b_state)) {
-				ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+				ASSERT0P(hdr->b_l1hdr.b_pabd);
 				ASSERT(!HDR_HAS_RABD(hdr));
 				ASSERT(!HDR_IO_IN_PROGRESS(hdr));
 				ASSERT0(zfs_refcount_count(
 				    &hdr->b_l1hdr.b_refcnt));
-				ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+				ASSERT0P(hdr->b_l1hdr.b_buf);
 #ifdef ZFS_DEBUG
-				ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+				ASSERT0P(hdr->b_l1hdr.b_freeze_cksum);
 #endif
 			} else if (HDR_IO_IN_PROGRESS(hdr)) {
 				/*
@@ -6231,7 +6233,7 @@ top:
 		acb->acb_nobuf = no_buf;
 		acb->acb_zb = *zb;
 
-		ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+		ASSERT0P(hdr->b_l1hdr.b_acb);
 		hdr->b_l1hdr.b_acb = acb;
 
 		if (HDR_HAS_L2HDR(hdr) &&
@@ -6715,7 +6717,7 @@ arc_release(arc_buf_t *buf, const void *tag)
 
 		nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
 		    compress, hdr->b_complevel, type);
-		ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
+		ASSERT0P(nhdr->b_l1hdr.b_buf);
 		ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
 		VERIFY3U(nhdr->b_type, ==, type);
 		ASSERT(!HDR_SHARED_DATA(nhdr));
@@ -6802,7 +6804,7 @@ arc_write_ready(zio_t *zio)
 		if (HDR_HAS_RABD(hdr))
 			arc_hdr_free_abd(hdr, B_TRUE);
 	}
-	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+	ASSERT0P(hdr->b_l1hdr.b_pabd);
 	ASSERT(!HDR_HAS_RABD(hdr));
 	ASSERT(!HDR_SHARED_DATA(hdr));
 	ASSERT(!arc_buf_is_shared(buf));
@@ -6946,7 +6948,7 @@ arc_write_done(zio_t *zio)
 	arc_buf_t *buf = callback->awcb_buf;
 	arc_buf_hdr_t *hdr = buf->b_hdr;
 
-	ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+	ASSERT0P(hdr->b_l1hdr.b_acb);
 
 	if (zio->io_error == 0) {
 		arc_hdr_verify(hdr, zio->io_bp);
@@ -6955,7 +6957,7 @@ arc_write_done(zio_t *zio)
 			buf_discard_identity(hdr);
 		} else {
 			hdr->b_dva = *BP_IDENTITY(zio->io_bp);
-			hdr->b_birth = BP_GET_BIRTH(zio->io_bp);
+			hdr->b_birth = BP_GET_PHYSICAL_BIRTH(zio->io_bp);
 		}
 	} else {
 		ASSERT(HDR_EMPTY(hdr));
@@ -6971,7 +6973,7 @@ arc_write_done(zio_t *zio)
 		arc_buf_hdr_t *exists;
 		kmutex_t *hash_lock;
 
-		ASSERT3U(zio->io_error, ==, 0);
+		ASSERT0(zio->io_error);
 
 		arc_cksum_verify(buf);
 
@@ -6992,7 +6994,7 @@ arc_write_done(zio_t *zio)
 				arc_hdr_destroy(exists);
 				mutex_exit(hash_lock);
 				exists = buf_hash_insert(hdr, &hash_lock);
-				ASSERT3P(exists, ==, NULL);
+				ASSERT0P(exists);
 			} else if (zio->io_flags & ZIO_FLAG_NOPWRITE) {
 				/* nopwrite */
 				ASSERT(zio->io_prop.zp_nopwrite);
@@ -7005,7 +7007,7 @@ arc_write_done(zio_t *zio)
 				ASSERT(ARC_BUF_LAST(hdr->b_l1hdr.b_buf));
 				ASSERT(hdr->b_l1hdr.b_state == arc_anon);
 				ASSERT(BP_GET_DEDUP(zio->io_bp));
-				ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
+				ASSERT0(BP_GET_LEVEL(zio->io_bp));
 			}
 		}
 		arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
@@ -7042,7 +7044,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
 	ASSERT3P(done, !=, NULL);
 	ASSERT(!HDR_IO_ERROR(hdr));
 	ASSERT(!HDR_IO_IN_PROGRESS(hdr));
-	ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+	ASSERT0P(hdr->b_l1hdr.b_acb);
 	ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
 	if (uncached)
 		arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED);
@@ -7111,7 +7113,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
 		arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF);
 
 	ASSERT(!arc_buf_is_shared(buf));
-	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+	ASSERT0P(hdr->b_l1hdr.b_pabd);
 
 	zio = zio_write(pio, spa, txg, bp,
 	    abd_get_from_buf(buf->b_data, HDR_GET_LSIZE(hdr)),
@@ -7326,7 +7328,7 @@ arc_kstat_update(kstat_t *ksp, int rw)
 #if defined(COMPAT_FREEBSD11)
 	as->arcstat_other_size.value.ui64 =
 	    wmsum_value(&arc_sums.arcstat_bonus_size) +
-	    wmsum_value(&arc_sums.arcstat_dnode_size) +
+	    aggsum_value(&arc_sums.arcstat_dnode_size) +
 	    wmsum_value(&arc_sums.arcstat_dbuf_size);
 #endif
 
@@ -7368,7 +7370,7 @@ arc_kstat_update(kstat_t *ksp, int rw)
 	    &as->arcstat_uncached_evictable_metadata);
 
 	as->arcstat_dnode_size.value.ui64 =
-	    wmsum_value(&arc_sums.arcstat_dnode_size);
+	    aggsum_value(&arc_sums.arcstat_dnode_size);
 	as->arcstat_bonus_size.value.ui64 =
 	    wmsum_value(&arc_sums.arcstat_bonus_size);
 	as->arcstat_l2_hits.value.ui64 =
@@ -7738,7 +7740,7 @@ arc_state_init(void)
 	wmsum_init(&arc_sums.arcstat_data_size, 0);
 	wmsum_init(&arc_sums.arcstat_metadata_size, 0);
 	wmsum_init(&arc_sums.arcstat_dbuf_size, 0);
-	wmsum_init(&arc_sums.arcstat_dnode_size, 0);
+	aggsum_init(&arc_sums.arcstat_dnode_size, 0);
 	wmsum_init(&arc_sums.arcstat_bonus_size, 0);
 	wmsum_init(&arc_sums.arcstat_l2_hits, 0);
 	wmsum_init(&arc_sums.arcstat_l2_misses, 0);
@@ -7897,7 +7899,7 @@ arc_state_fini(void)
 	wmsum_fini(&arc_sums.arcstat_data_size);
 	wmsum_fini(&arc_sums.arcstat_metadata_size);
 	wmsum_fini(&arc_sums.arcstat_dbuf_size);
-	wmsum_fini(&arc_sums.arcstat_dnode_size);
+	aggsum_fini(&arc_sums.arcstat_dnode_size);
 	wmsum_fini(&arc_sums.arcstat_bonus_size);
 	wmsum_fini(&arc_sums.arcstat_l2_hits);
 	wmsum_fini(&arc_sums.arcstat_l2_misses);
diff --git a/sys/contrib/openzfs/module/zfs/bpobj.c b/sys/contrib/openzfs/module/zfs/bpobj.c
index 8c19de93f12f..ea9fbd036c6e 100644
--- a/sys/contrib/openzfs/module/zfs/bpobj.c
+++ b/sys/contrib/openzfs/module/zfs/bpobj.c
@@ -160,8 +160,8 @@ bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object)
 	memset(bpo, 0, sizeof (*bpo));
 	mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL);
 
-	ASSERT(bpo->bpo_dbuf == NULL);
-	ASSERT(bpo->bpo_phys == NULL);
+	ASSERT0P(bpo->bpo_dbuf);
+	ASSERT0P(bpo->bpo_phys);
 	ASSERT(object != 0);
 	ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
 	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
@@ -478,7 +478,7 @@ bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg,
 			 * We have unprocessed subobjs. Process the next one.
 			 */
 			ASSERT(bpo->bpo_havecomp);
-			ASSERT3P(bpobj_size, ==, NULL);
+			ASSERT0P(bpobj_size);
 
 			/* Add the last subobj to stack. */
 			int64_t i = bpi->bpi_unprocessed_subobjs - 1;
@@ -954,8 +954,8 @@ space_range_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
 	(void) bp_freed, (void) tx;
 	struct space_range_arg *sra = arg;
 
-	if (BP_GET_LOGICAL_BIRTH(bp) > sra->mintxg &&
-	    BP_GET_LOGICAL_BIRTH(bp) <= sra->maxtxg) {
+	if (BP_GET_BIRTH(bp) > sra->mintxg &&
+	    BP_GET_BIRTH(bp) <= sra->maxtxg) {
 		if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
 			sra->used += bp_get_dsize_sync(sra->spa, bp);
 		else
diff --git a/sys/contrib/openzfs/module/zfs/brt.c b/sys/contrib/openzfs/module/zfs/brt.c
index 27d9ed7ea2b0..40664354aa73 100644
--- a/sys/contrib/openzfs/module/zfs/brt.c
+++ b/sys/contrib/openzfs/module/zfs/brt.c
@@ -478,6 +478,18 @@ brt_vdev_create(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx)
 	    sizeof (uint64_t), 1, &brtvd->bv_mos_brtvdev, tx));
 	BRT_DEBUG("Pool directory object created, object=%s", name);
 
+	/*
+	 * Activate the endian-fixed feature if this is the first BRT ZAP
+	 * (i.e., BLOCK_CLONING is not yet active) and the feature is enabled.
+	 */
+	if (spa_feature_is_enabled(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN) &&
+	    !spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
+		spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx);
+	} else if (spa_feature_is_active(spa,
+	    SPA_FEATURE_BLOCK_CLONING_ENDIAN)) {
+		spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx);
+	}
+
 	spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING, tx);
 }
 
@@ -658,6 +670,8 @@ brt_vdev_destroy(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx)
 	rw_exit(&brtvd->bv_lock);
 
 	spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING, tx);
+	if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN))
+		spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx);
 }
 
 static void
@@ -855,16 +869,29 @@ brt_entry_fill(const blkptr_t *bp, brt_entry_t *bre, uint64_t *vdevidp)
 	*vdevidp = DVA_GET_VDEV(&bp->blk_dva[0]);
 }
 
+static boolean_t
+brt_has_endian_fixed(spa_t *spa)
+{
+	return (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN));
+}
+
 static int
-brt_entry_lookup(brt_vdev_t *brtvd, brt_entry_t *bre)
+brt_entry_lookup(spa_t *spa, brt_vdev_t *brtvd, brt_entry_t *bre)
 {
 	uint64_t off = BRE_OFFSET(bre);
 
 	if (brtvd->bv_mos_entries == 0)
 		return (SET_ERROR(ENOENT));
 
-	return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode,
-	    &off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count), &bre->bre_count));
+	if (brt_has_endian_fixed(spa)) {
+		return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode,
+		    &off, BRT_KEY_WORDS, sizeof (bre->bre_count), 1,
+		    &bre->bre_count));
+	} else {
+		return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode,
+		    &off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
+		    &bre->bre_count));
+	}
 }
 
 /*
@@ -1056,7 +1083,7 @@ brt_entry_decref(spa_t *spa, const blkptr_t *bp)
 	}
 	rw_exit(&brtvd->bv_lock);
 
-	error = brt_entry_lookup(brtvd, &bre_search);
+	error = brt_entry_lookup(spa, brtvd, &bre_search);
 	/* bre_search now contains correct bre_count */
 	if (error == ENOENT) {
 		BRTSTAT_BUMP(brt_decref_no_entry);
@@ -1118,7 +1145,7 @@ brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp)
 	bre = avl_find(&brtvd->bv_tree, &bre_search, NULL);
 	if (bre == NULL) {
 		rw_exit(&brtvd->bv_lock);
-		error = brt_entry_lookup(brtvd, &bre_search);
+		error = brt_entry_lookup(spa, brtvd, &bre_search);
 		if (error == ENOENT) {
 			refcnt = 0;
 		} else {
@@ -1270,10 +1297,18 @@ brt_pending_apply_vdev(spa_t *spa, brt_vdev_t *brtvd, uint64_t txg)
 		uint64_t off = BRE_OFFSET(bre);
 		if (brtvd->bv_mos_entries != 0 &&
 		    brt_vdev_lookup(spa, brtvd, off)) {
-			int error = zap_lookup_uint64_by_dnode(
-			    brtvd->bv_mos_entries_dnode, &off,
-			    BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
-			    &bre->bre_count);
+			int error;
+			if (brt_has_endian_fixed(spa)) {
+				error = zap_lookup_uint64_by_dnode(
+				    brtvd->bv_mos_entries_dnode, &off,
+				    BRT_KEY_WORDS, sizeof (bre->bre_count), 1,
+				    &bre->bre_count);
+			} else {
+				error = zap_lookup_uint64_by_dnode(
+				    brtvd->bv_mos_entries_dnode, &off,
+				    BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
+				    &bre->bre_count);
+			}
 			if (error == 0) {
 				BRTSTAT_BUMP(brt_addref_entry_on_disk);
 			} else {
@@ -1326,7 +1361,7 @@ brt_pending_apply(spa_t *spa, uint64_t txg)
 }
 
 static void
-brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
+brt_sync_entry(spa_t *spa, dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
 {
 	uint64_t off = BRE_OFFSET(bre);
 
@@ -1337,9 +1372,15 @@ brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
 		    BRT_KEY_WORDS, tx);
 		VERIFY(error == 0 || error == ENOENT);
 	} else {
-		VERIFY0(zap_update_uint64_by_dnode(dn, &off,
-		    BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
-		    &bre->bre_count, tx));
+		if (brt_has_endian_fixed(spa)) {
+			VERIFY0(zap_update_uint64_by_dnode(dn, &off,
+			    BRT_KEY_WORDS, sizeof (bre->bre_count), 1,
+			    &bre->bre_count, tx));
+		} else {
+			VERIFY0(zap_update_uint64_by_dnode(dn, &off,
+			    BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
+			    &bre->bre_count, tx));
+		}
 	}
 }
 
@@ -1368,7 +1409,8 @@ brt_sync_table(spa_t *spa, dmu_tx_t *tx)
 
 		void *c = NULL;
 		while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) {
-			brt_sync_entry(brtvd->bv_mos_entries_dnode, bre, tx);
+			brt_sync_entry(spa, brtvd->bv_mos_entries_dnode, bre,
+			    tx);
 			kmem_cache_free(brt_entry_cache, bre);
 		}
 
diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c
index aa282f711bc3..725b96a3b2c7 100644
--- a/sys/contrib/openzfs/module/zfs/btree.c
+++ b/sys/contrib/openzfs/module/zfs/btree.c
@@ -1110,7 +1110,7 @@ zfs_btree_add_idx(zfs_btree_t *tree, const void *value,
 	if (where->bti_node == NULL) {
 		ASSERT3U(tree->bt_num_elems, ==, 1);
 		ASSERT3S(tree->bt_height, ==, -1);
-		ASSERT3P(tree->bt_root, ==, NULL);
+		ASSERT0P(tree->bt_root);
 		ASSERT0(where->bti_offset);
 
 		tree->bt_num_nodes++;
@@ -1947,7 +1947,7 @@ void
 zfs_btree_destroy(zfs_btree_t *tree)
 {
 	ASSERT0(tree->bt_num_elems);
-	ASSERT3P(tree->bt_root, ==, NULL);
+	ASSERT0P(tree->bt_root);
 }
 
 /* Verify that every child of this node has the correct parent pointer. */
@@ -1969,10 +1969,10 @@ static void
 zfs_btree_verify_pointers(zfs_btree_t *tree)
 {
 	if (tree->bt_height == -1) {
-		VERIFY3P(tree->bt_root, ==, NULL);
+		VERIFY0P(tree->bt_root);
 		return;
 	}
-	VERIFY3P(tree->bt_root->bth_parent, ==, NULL);
+	VERIFY0P(tree->bt_root->bth_parent);
 	zfs_btree_verify_pointers_helper(tree, tree->bt_root);
 }
 
diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
index d3baabd6169f..e5abcd2044cf 100644
--- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c
+++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
@@ -44,6 +44,7 @@ static dataset_kstat_values_t empty_dataset_kstats = {
 	{ "zil_commit_error_count",		KSTAT_DATA_UINT64 },
 	{ "zil_commit_stall_count",		KSTAT_DATA_UINT64 },
 	{ "zil_commit_suspend_count",		KSTAT_DATA_UINT64 },
+	{ "zil_commit_crash_count",		KSTAT_DATA_UINT64 },
 	{ "zil_itx_count",			KSTAT_DATA_UINT64 },
 	{ "zil_itx_indirect_count",		KSTAT_DATA_UINT64 },
 	{ "zil_itx_indirect_bytes",		KSTAT_DATA_UINT64 },
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index f1b5a17f337e..3d0f88b36336 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -523,7 +523,7 @@ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
 		return;
 
 	/* Only data blocks support the attachment of user data. */
-	ASSERT(db->db_level == 0);
+	ASSERT0(db->db_level);
 
 	/* Clients must resolve a dbuf before attaching user data. */
 	ASSERT(db->db.db_data != NULL);
@@ -866,8 +866,16 @@ dbuf_evict_notify(uint64_t size)
 	 * and grabbing the lock results in massive lock contention.
 	 */
 	if (size > dbuf_cache_target_bytes()) {
-		if (size > dbuf_cache_hiwater_bytes())
+		/*
+		 * Avoid calling dbuf_evict_one() from memory reclaim context
+		 * (e.g. Linux kswapd, FreeBSD pagedaemon) to prevent deadlocks.
+		 * Memory reclaim threads can get stuck waiting for the dbuf
+		 * hash lock.
+		 */
+		if (size > dbuf_cache_hiwater_bytes() &&
+		    !current_is_reclaim_thread()) {
 			dbuf_evict_one();
+		}
 		cv_signal(&dbuf_evict_cv);
 	}
 }
@@ -1120,8 +1128,8 @@ dbuf_verify(dmu_buf_impl_t *db)
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
 	if (dn == NULL) {
-		ASSERT(db->db_parent == NULL);
-		ASSERT(db->db_blkptr == NULL);
+		ASSERT0P(db->db_parent);
+		ASSERT0P(db->db_blkptr);
 	} else {
 		ASSERT3U(db->db.db_object, ==, dn->dn_object);
 		ASSERT3P(db->db_objset, ==, dn->dn_objset);
@@ -1172,7 +1180,7 @@ dbuf_verify(dmu_buf_impl_t *db)
 			/* db is pointed to by the dnode */
 			/* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */
 			if (DMU_OBJECT_IS_SPECIAL(db->db.db_object))
-				ASSERT(db->db_parent == NULL);
+				ASSERT0P(db->db_parent);
 			else
 				ASSERT(db->db_parent != NULL);
 			if (db->db_blkid != DMU_SPILL_BLKID)
@@ -1211,7 +1219,7 @@ dbuf_verify(dmu_buf_impl_t *db)
 				int i;
 
 				for (i = 0; i < db->db.db_size >> 3; i++) {
-					ASSERT(buf[i] == 0);
+					ASSERT0(buf[i]);
 				}
 			} else {
 				blkptr_t *bps = db->db.db_data;
@@ -1235,11 +1243,9 @@ dbuf_verify(dmu_buf_impl_t *db)
 					    DVA_IS_EMPTY(&bp->blk_dva[1]) &&
 					    DVA_IS_EMPTY(&bp->blk_dva[2]));
 					ASSERT0(bp->blk_fill);
-					ASSERT0(bp->blk_pad[0]);
-					ASSERT0(bp->blk_pad[1]);
 					ASSERT(!BP_IS_EMBEDDED(bp));
 					ASSERT(BP_IS_HOLE(bp));
-					ASSERT0(BP_GET_PHYSICAL_BIRTH(bp));
+					ASSERT0(BP_GET_RAW_PHYSICAL_BIRTH(bp));
 				}
 			}
 		}
@@ -1253,7 +1259,7 @@ dbuf_clear_data(dmu_buf_impl_t *db)
 {
 	ASSERT(MUTEX_HELD(&db->db_mtx));
 	dbuf_evict_user(db);
-	ASSERT3P(db->db_buf, ==, NULL);
+	ASSERT0P(db->db_buf);
 	db->db.db_data = NULL;
 	if (db->db_state != DB_NOFILL) {
 		db->db_state = DB_UNCACHED;
@@ -1378,13 +1384,13 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
 	 * All reads are synchronous, so we must have a hold on the dbuf
 	 */
 	ASSERT(zfs_refcount_count(&db->db_holds) > 0);
-	ASSERT(db->db_buf == NULL);
-	ASSERT(db->db.db_data == NULL);
+	ASSERT0P(db->db_buf);
+	ASSERT0P(db->db.db_data);
 	if (buf == NULL) {
 		/* i/o error */
 		ASSERT(zio == NULL || zio->io_error != 0);
 		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
-		ASSERT3P(db->db_buf, ==, NULL);
+		ASSERT0P(db->db_buf);
 		db->db_state = DB_UNCACHED;
 		DTRACE_SET_STATE(db, "i/o error");
 	} else if (db->db_level == 0 && db->db_freed_in_flight) {
@@ -1578,7 +1584,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, dmu_flags_t flags,
 	ASSERT(!zfs_refcount_is_zero(&db->db_holds));
 	ASSERT(MUTEX_HELD(&db->db_mtx));
 	ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
-	ASSERT(db->db_buf == NULL);
+	ASSERT0P(db->db_buf);
 	ASSERT(db->db_parent == NULL ||
 	    RW_LOCK_HELD(&db->db_parent->db_rwlock));
 
@@ -1615,7 +1621,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, dmu_flags_t flags,
 	 */
 	if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bp)) {
 		spa_log_error(db->db_objset->os_spa, &zb,
-		    BP_GET_LOGICAL_BIRTH(bp));
+		    BP_GET_PHYSICAL_BIRTH(bp));
 		err = SET_ERROR(EIO);
 		goto early_unlock;
 	}
@@ -1676,7 +1682,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
 
 	ASSERT(MUTEX_HELD(&db->db_mtx));
 	ASSERT(db->db.db_data != NULL);
-	ASSERT(db->db_level == 0);
+	ASSERT0(db->db_level);
 	ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
 
 	if (dr == NULL ||
@@ -1895,8 +1901,8 @@ dbuf_noread(dmu_buf_impl_t *db, dmu_flags_t flags)
 	while (db->db_state == DB_READ || db->db_state == DB_FILL)
 		cv_wait(&db->db_changed, &db->db_mtx);
 	if (db->db_state == DB_UNCACHED) {
-		ASSERT(db->db_buf == NULL);
-		ASSERT(db->db.db_data == NULL);
+		ASSERT0P(db->db_buf);
+		ASSERT0P(db->db.db_data);
 		dbuf_set_data(db, dbuf_alloc_arcbuf(db));
 		db->db_state = DB_FILL;
 		DTRACE_SET_STATE(db, "assigning filled buffer");
@@ -1923,7 +1929,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
 	 * comes from dbuf_dirty() callers who must also hold a range lock.
 	 */
 	ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC);
-	ASSERT(db->db_level == 0);
+	ASSERT0(db->db_level);
 
 	if (db->db_blkid == DMU_BONUS_BLKID ||
 	    dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN)
@@ -1988,7 +1994,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
 
 	mutex_enter(&dn->dn_dbufs_mtx);
 	db = avl_find(&dn->dn_dbufs, db_search, &where);
-	ASSERT3P(db, ==, NULL);
+	ASSERT0P(db);
 
 	db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
 
@@ -2011,7 +2017,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
 		if (db->db_state == DB_UNCACHED ||
 		    db->db_state == DB_NOFILL ||
 		    db->db_state == DB_EVICTING) {
-			ASSERT(db->db.db_data == NULL);
+			ASSERT0P(db->db.db_data);
 			mutex_exit(&db->db_mtx);
 			continue;
 		}
@@ -2154,6 +2160,12 @@ dbuf_redirty(dbuf_dirty_record_t *dr)
 			ASSERT(arc_released(db->db_buf));
 			arc_buf_thaw(db->db_buf);
 		}
+
+		/*
+		 * Clear the rewrite flag since this is now a logical
+		 * modification.
+		 */
+		dr->dt.dl.dr_rewrite = B_FALSE;
 	}
 }
 
@@ -2701,6 +2713,38 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
 	dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
 }
 
+void
+dmu_buf_will_rewrite(dmu_buf_t *db_fake, dmu_tx_t *tx)
+{
+	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+
+	ASSERT(tx->tx_txg != 0);
+	ASSERT(!zfs_refcount_is_zero(&db->db_holds));
+
+	/*
+	 * If the dbuf is already dirty in this txg, it will be written
+	 * anyway, so there's nothing to do.
+	 */
+	mutex_enter(&db->db_mtx);
+	if (dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) {
+		mutex_exit(&db->db_mtx);
+		return;
+	}
+	mutex_exit(&db->db_mtx);
+
+	/*
+	 * The dbuf is not dirty, so we need to make it dirty and
+	 * mark it for rewrite (preserve logical birth time).
+	 */
+	dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
+
+	mutex_enter(&db->db_mtx);
+	dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
+	if (dr != NULL && db->db_level == 0)
+		dr->dt.dl.dr_rewrite = B_TRUE;
+	mutex_exit(&db->db_mtx);
+}
+
 boolean_t
 dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
 {
@@ -2852,8 +2896,8 @@ dmu_buf_will_clone_or_dio(dmu_buf_t *db_fake, dmu_tx_t *tx)
 		dbuf_clear_data(db);
 	}
 
-	ASSERT3P(db->db_buf, ==, NULL);
-	ASSERT3P(db->db.db_data, ==, NULL);
+	ASSERT0P(db->db_buf);
+	ASSERT0P(db->db.db_data);
 
 	db->db_state = DB_NOFILL;
 	DTRACE_SET_STATE(db,
@@ -2888,7 +2932,7 @@ dmu_buf_will_fill_flags(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail,
 
 	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 	ASSERT(tx->tx_txg != 0);
-	ASSERT(db->db_level == 0);
+	ASSERT0(db->db_level);
 	ASSERT(!zfs_refcount_is_zero(&db->db_holds));
 
 	ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT ||
@@ -3100,7 +3144,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx,
 {
 	ASSERT(!zfs_refcount_is_zero(&db->db_holds));
 	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
-	ASSERT(db->db_level == 0);
+	ASSERT0(db->db_level);
 	ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));
 	ASSERT(buf != NULL);
 	ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size);
@@ -3165,7 +3209,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx,
 		VERIFY(!dbuf_undirty(db, tx));
 		db->db_state = DB_UNCACHED;
 	}
-	ASSERT(db->db_buf == NULL);
+	ASSERT0P(db->db_buf);
 	dbuf_set_data(db, buf);
 	db->db_state = DB_FILL;
 	DTRACE_SET_STATE(db, "filling assigned arcbuf");
@@ -3225,7 +3269,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
 	}
 
 	ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
-	ASSERT(db->db_data_pending == NULL);
+	ASSERT0P(db->db_data_pending);
 	ASSERT(list_is_empty(&db->db_dirty_records));
 
 	db->db_state = DB_EVICTING;
@@ -3277,11 +3321,11 @@ dbuf_destroy(dmu_buf_impl_t *db)
 
 	db->db_parent = NULL;
 
-	ASSERT(db->db_buf == NULL);
-	ASSERT(db->db.db_data == NULL);
-	ASSERT(db->db_hash_next == NULL);
-	ASSERT(db->db_blkptr == NULL);
-	ASSERT(db->db_data_pending == NULL);
+	ASSERT0P(db->db_buf);
+	ASSERT0P(db->db.db_data);
+	ASSERT0P(db->db_hash_next);
+	ASSERT0P(db->db_blkptr);
+	ASSERT0P(db->db_data_pending);
 	ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
 	ASSERT(!multilist_link_active(&db->db_cache_link));
 
@@ -3916,7 +3960,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
 		if (fail_uncached)
 			return (SET_ERROR(ENOENT));
 
-		ASSERT3P(parent, ==, NULL);
+		ASSERT0P(parent);
 		err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
 		if (fail_sparse) {
 			if (err == 0 && bp && BP_IS_HOLE(bp))
@@ -4020,7 +4064,7 @@ dbuf_create_bonus(dnode_t *dn)
 {
 	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
 
-	ASSERT(dn->dn_bonus == NULL);
+	ASSERT0P(dn->dn_bonus);
 	dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL,
 	    dbuf_hash(dn->dn_objset, dn->dn_object, 0, DMU_BONUS_BLKID));
 	dn->dn_bonus->db_pending_evict = FALSE;
@@ -4372,7 +4416,7 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
 		 * inappropriate to hook it in (i.e., nlevels mismatch).
 		 */
 		ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr);
-		ASSERT(db->db_parent == NULL);
+		ASSERT0P(db->db_parent);
 		db->db_parent = dn->dn_dbuf;
 		db->db_blkptr = &dn->dn_phys->dn_blkptr[db->db_blkid];
 		DBUF_VERIFY(db);
@@ -4433,7 +4477,7 @@ dbuf_prepare_encrypted_dnode_leaf(dbuf_dirty_record_t *dr)
 
 	ASSERT(MUTEX_HELD(&db->db_mtx));
 	ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT);
-	ASSERT3U(db->db_level, ==, 0);
+	ASSERT0(db->db_level);
 
 	if (!db->db_objset->os_raw_receive && arc_is_encrypted(db->db_buf)) {
 		zbookmark_phys_t zb;
@@ -4544,7 +4588,7 @@ dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr)
 
 	/* ensure that everything is zero after our data */
 	for (; datap_end < datap_max; datap_end++)
-		ASSERT(*datap_end == 0);
+		ASSERT0(*datap_end);
 #endif
 }
 
@@ -4552,7 +4596,7 @@ static blkptr_t *
 dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
 {
 	/* This must be a lightweight dirty record. */
-	ASSERT3P(dr->dr_dbuf, ==, NULL);
+	ASSERT0P(dr->dr_dbuf);
 	dnode_t *dn = dr->dr_dnode;
 
 	if (dn->dn_phys->dn_nlevels == 1) {
@@ -4695,7 +4739,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 	 */
 	if (db->db_state == DB_UNCACHED) {
 		/* This buffer has been freed since it was dirtied */
-		ASSERT3P(db->db.db_data, ==, NULL);
+		ASSERT0P(db->db.db_data);
 	} else if (db->db_state == DB_FILL) {
 		/* This buffer was freed and is now being re-filled */
 		ASSERT(db->db.db_data != dr->dt.dl.dr_data);
@@ -4712,9 +4756,9 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 		 */
 		dbuf_dirty_record_t *dr_head =
 		    list_head(&db->db_dirty_records);
-		ASSERT3P(db->db_buf, ==, NULL);
-		ASSERT3P(db->db.db_data, ==, NULL);
-		ASSERT3P(dr_head->dt.dl.dr_data, ==, NULL);
+		ASSERT0P(db->db_buf);
+		ASSERT0P(db->db.db_data);
+		ASSERT0P(dr_head->dt.dl.dr_data);
 		ASSERT3U(dr_head->dt.dl.dr_override_state, ==, DR_OVERRIDDEN);
 	} else {
 		ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL);
@@ -4899,7 +4943,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
 	dnode_diduse_space(dn, delta - zio->io_prev_space_delta);
 	zio->io_prev_space_delta = delta;
 
-	if (BP_GET_LOGICAL_BIRTH(bp) != 0) {
+	if (BP_GET_BIRTH(bp) != 0) {
 		ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
 		    BP_GET_TYPE(bp) == dn->dn_type) ||
 		    (db->db_blkid == DMU_SPILL_BLKID &&
@@ -5186,7 +5230,7 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
 	ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)));
 
 	drica.drica_os = dn->dn_objset;
-	drica.drica_blk_birth = BP_GET_LOGICAL_BIRTH(bp);
+	drica.drica_blk_birth = BP_GET_BIRTH(bp);
 	drica.drica_tx = tx;
 	if (spa_remap_blkptr(spa, &bp_copy, dbuf_remap_impl_callback,
 	    &drica)) {
@@ -5201,8 +5245,7 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
 		if (dn->dn_objset != spa_meta_objset(spa)) {
 			dsl_dataset_t *ds = dmu_objset_ds(dn->dn_objset);
 			if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
-			    BP_GET_LOGICAL_BIRTH(bp) >
-			    ds->ds_dir->dd_origin_txg) {
+			    BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg) {
 				ASSERT(!BP_IS_EMBEDDED(bp));
 				ASSERT(dsl_dir_is_clone(ds->ds_dir));
 				ASSERT(spa_feature_is_enabled(spa,
@@ -5320,7 +5363,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
 	}
 
 	ASSERT(db->db_level == 0 || data == db->db_buf);
-	ASSERT3U(BP_GET_LOGICAL_BIRTH(db->db_blkptr), <=, txg);
+	ASSERT3U(BP_GET_BIRTH(db->db_blkptr), <=, txg);
 	ASSERT(pio);
 
 	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
@@ -5334,6 +5377,24 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
 	dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
 
 	/*
+	 * Set rewrite properties for zfs_rewrite() operations.
+	 */
+	if (db->db_level == 0 && dr->dt.dl.dr_rewrite) {
+		zp.zp_rewrite = B_TRUE;
+
+		/*
+		 * Mark physical rewrite feature for activation.
+		 * This will be activated automatically during dataset sync.
+		 */
+		dsl_dataset_t *ds = os->os_dsl_dataset;
+		if (!dsl_dataset_feature_is_active(ds,
+		    SPA_FEATURE_PHYSICAL_REWRITE)) {
+			ds->ds_feature_activation[
+			    SPA_FEATURE_PHYSICAL_REWRITE] = (void *)B_TRUE;
+		}
+	}
+
+	/*
 	 * We copy the blkptr now (rather than when we instantiate the dirty
 	 * record), because its value can change between open context and
 	 * syncing context. We do not need to hold dn_struct_rwlock to read
@@ -5403,6 +5464,7 @@ EXPORT_SYMBOL(dbuf_release_bp);
 EXPORT_SYMBOL(dbuf_dirty);
 EXPORT_SYMBOL(dmu_buf_set_crypt_params);
 EXPORT_SYMBOL(dmu_buf_will_dirty);
+EXPORT_SYMBOL(dmu_buf_will_rewrite);
 EXPORT_SYMBOL(dmu_buf_is_dirty);
 EXPORT_SYMBOL(dmu_buf_will_clone_or_dio);
 EXPORT_SYMBOL(dmu_buf_will_not_fill);
diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c
index 60cbb7755a7e..d6658375f810 100644
--- a/sys/contrib/openzfs/module/zfs/ddt.c
+++ b/sys/contrib/openzfs/module/zfs/ddt.c
@@ -397,7 +397,7 @@ ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
 
 	ddt_object_name(ddt, type, class, name);
 
-	ASSERT3U(*objectp, ==, 0);
+	ASSERT0(*objectp);
 	VERIFY0(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash));
 	ASSERT3U(*objectp, !=, 0);
 
@@ -724,10 +724,13 @@ ddt_phys_extend(ddt_univ_phys_t *ddp, ddt_phys_variant_t v, const blkptr_t *bp)
 		dvas[2] = bp->blk_dva[2];
 
 	if (ddt_phys_birth(ddp, v) == 0) {
-		if (v == DDT_PHYS_FLAT)
-			ddp->ddp_flat.ddp_phys_birth = BP_GET_BIRTH(bp);
-		else
-			ddp->ddp_trad[v].ddp_phys_birth = BP_GET_BIRTH(bp);
+		if (v == DDT_PHYS_FLAT) {
+			ddp->ddp_flat.ddp_phys_birth =
+			    BP_GET_PHYSICAL_BIRTH(bp);
+		} else {
+			ddp->ddp_trad[v].ddp_phys_birth =
+			    BP_GET_PHYSICAL_BIRTH(bp);
+		}
 	}
 }
 
@@ -891,14 +894,14 @@ ddt_phys_select(const ddt_t *ddt, const ddt_entry_t *dde, const blkptr_t *bp)
 
 	if (ddt->ddt_flags & DDT_FLAG_FLAT) {
 		if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_flat.ddp_dva[0]) &&
-		    BP_GET_BIRTH(bp) == ddp->ddp_flat.ddp_phys_birth) {
+		    BP_GET_PHYSICAL_BIRTH(bp) == ddp->ddp_flat.ddp_phys_birth) {
 			return (DDT_PHYS_FLAT);
 		}
 	} else /* traditional phys */ {
 		for (int p = 0; p < DDT_PHYS_MAX; p++) {
 			if (DVA_EQUAL(BP_IDENTITY(bp),
 			    &ddp->ddp_trad[p].ddp_dva[0]) &&
-			    BP_GET_BIRTH(bp) ==
+			    BP_GET_PHYSICAL_BIRTH(bp) ==
 			    ddp->ddp_trad[p].ddp_phys_birth) {
 				return (p);
 			}
@@ -1008,7 +1011,7 @@ ddt_free(const ddt_t *ddt, ddt_entry_t *dde)
 {
 	if (dde->dde_io != NULL) {
 		for (int p = 0; p < DDT_NPHYS(ddt); p++)
-			ASSERT3P(dde->dde_io->dde_lead_zio[p], ==, NULL);
+			ASSERT0P(dde->dde_io->dde_lead_zio[p]);
 
 		if (dde->dde_io->dde_repair_abd != NULL)
 			abd_free(dde->dde_io->dde_repair_abd);
@@ -1418,7 +1421,7 @@ ddt_key_compare(const void *x1, const void *x2)
 static void
 ddt_create_dir(ddt_t *ddt, dmu_tx_t *tx)
 {
-	ASSERT3U(ddt->ddt_dir_object, ==, 0);
+	ASSERT0(ddt->ddt_dir_object);
 	ASSERT3U(ddt->ddt_version, ==, DDT_VERSION_FDT);
 
 	char name[DDT_NAMELEN];
@@ -2392,7 +2395,7 @@ ddt_sync(spa_t *spa, uint64_t txg)
 	 * scan's root zio here so that we can wait for any scan IOs in
 	 * addition to the regular ddt IOs.
 	 */
-	ASSERT3P(scn->scn_zio_root, ==, NULL);
+	ASSERT0P(scn->scn_zio_root);
 	scn->scn_zio_root = rio;
 
 	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c
index dbd381aa9609..3d30e244c1f7 100644
--- a/sys/contrib/openzfs/module/zfs/ddt_log.c
+++ b/sys/contrib/openzfs/module/zfs/ddt_log.c
@@ -116,7 +116,7 @@ static void
 ddt_log_create_one(ddt_t *ddt, ddt_log_t *ddl, uint_t n, dmu_tx_t *tx)
 {
 	ASSERT3U(ddt->ddt_dir_object, >, 0);
-	ASSERT3U(ddl->ddl_object, ==, 0);
+	ASSERT0(ddl->ddl_object);
 
 	char name[DDT_NAMELEN];
 	ddt_log_name(ddt, name, n);
@@ -194,7 +194,7 @@ void
 ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx, ddt_log_update_t *dlu)
 {
 	ASSERT3U(nentries, >, 0);
-	ASSERT3P(dlu->dlu_dbp, ==, NULL);
+	ASSERT0P(dlu->dlu_dbp);
 
 	if (ddt->ddt_log_active->ddl_object == 0)
 		ddt_log_create(ddt, tx);
@@ -748,8 +748,8 @@ ddt_log_load(ddt_t *ddt)
 void
 ddt_log_alloc(ddt_t *ddt)
 {
-	ASSERT3P(ddt->ddt_log_active, ==, NULL);
-	ASSERT3P(ddt->ddt_log_flushing, ==, NULL);
+	ASSERT0P(ddt->ddt_log_active);
+	ASSERT0P(ddt->ddt_log_flushing);
 
 	avl_create(&ddt->ddt_log[0].ddl_tree, ddt_key_compare,
 	    sizeof (ddt_log_entry_t), offsetof(ddt_log_entry_t, ddle_node));
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index 21c465328134..f7f808d5b8f7 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -1343,7 +1343,7 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	if (size == 0)
 		return;
 
-	VERIFY(0 == dmu_buf_hold_array(os, object, offset, size,
+	VERIFY0(dmu_buf_hold_array(os, object, offset, size,
 	    FALSE, FTAG, &numbufs, &dbp));
 
 	for (i = 0; i < numbufs; i++) {
@@ -1872,7 +1872,7 @@ dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg)
 			 */
 			BP_SET_LSIZE(bp, db->db_size);
 		} else if (!BP_IS_EMBEDDED(bp)) {
-			ASSERT(BP_GET_LEVEL(bp) == 0);
+			ASSERT0(BP_GET_LEVEL(bp));
 			BP_SET_FILL(bp, 1);
 		}
 	}
@@ -1966,7 +1966,7 @@ dmu_sync_late_arrival_done(zio_t *zio)
 			blkptr_t *bp_orig __maybe_unused = &zio->io_bp_orig;
 			ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
 			ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
-			ASSERT(BP_GET_LOGICAL_BIRTH(zio->io_bp) == zio->io_txg);
+			ASSERT(BP_GET_BIRTH(zio->io_bp) == zio->io_txg);
 			ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
 			zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
 		}
@@ -2405,7 +2405,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
 			}
 		}
 	} else if (wp & WP_NOFILL) {
-		ASSERT(level == 0);
+		ASSERT0(level);
 
 		/*
 		 * If we're writing preallocated blocks, we aren't actually
@@ -2508,6 +2508,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
 	zp->zp_encrypt = encrypt;
 	zp->zp_byteorder = ZFS_HOST_BYTEORDER;
 	zp->zp_direct_write = (wp & WP_DIRECT_WR) ? B_TRUE : B_FALSE;
+	zp->zp_rewrite = B_FALSE;
 	memset(zp->zp_salt, 0, ZIO_DATA_SALT_LEN);
 	memset(zp->zp_iv, 0, ZIO_DATA_IV_LEN);
 	memset(zp->zp_mac, 0, ZIO_DATA_MAC_LEN);
@@ -2655,11 +2656,12 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
 		 * operation into ZIL, or it may be impossible to replay, since
 		 * the block may appear not yet allocated at that point.
 		 */
-		if (BP_GET_BIRTH(bp) > spa_freeze_txg(os->os_spa)) {
+		if (BP_GET_PHYSICAL_BIRTH(bp) > spa_freeze_txg(os->os_spa)) {
 			error = SET_ERROR(EINVAL);
 			goto out;
 		}
-		if (BP_GET_BIRTH(bp) > spa_last_synced_txg(os->os_spa)) {
+		if (BP_GET_PHYSICAL_BIRTH(bp) >
+		    spa_last_synced_txg(os->os_spa)) {
 			error = SET_ERROR(EAGAIN);
 			goto out;
 		}
@@ -2731,7 +2733,8 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
 		if (!BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) != 0) {
 			if (!BP_IS_EMBEDDED(bp)) {
 				BP_SET_BIRTH(&dl->dr_overridden_by, dr->dr_txg,
-				    BP_GET_BIRTH(bp));
+				    BP_GET_PHYSICAL_BIRTH(bp));
+				BP_SET_REWRITE(&dl->dr_overridden_by, 0);
 			} else {
 				BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by,
 				    dr->dr_txg);
@@ -2862,7 +2865,7 @@ byteswap_uint64_array(void *vbuf, size_t size)
 	size_t count = size >> 3;
 	int i;
 
-	ASSERT((size & 7) == 0);
+	ASSERT0((size & 7));
 
 	for (i = 0; i < count; i++)
 		buf[i] = BSWAP_64(buf[i]);
@@ -2875,7 +2878,7 @@ byteswap_uint32_array(void *vbuf, size_t size)
 	size_t count = size >> 2;
 	int i;
 
-	ASSERT((size & 3) == 0);
+	ASSERT0((size & 3));
 
 	for (i = 0; i < count; i++)
 		buf[i] = BSWAP_32(buf[i]);
@@ -2888,7 +2891,7 @@ byteswap_uint16_array(void *vbuf, size_t size)
 	size_t count = size >> 1;
 	int i;
 
-	ASSERT((size & 1) == 0);
+	ASSERT0((size & 1));
 
 	for (i = 0; i < count; i++)
 		buf[i] = BSWAP_16(buf[i]);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_diff.c b/sys/contrib/openzfs/module/zfs/dmu_diff.c
index 86f751e886c9..fb13b2f87f57 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_diff.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_diff.c
@@ -224,8 +224,8 @@ dmu_diff(const char *tosnap_name, const char *fromsnap_name,
 	 * call the ZFS_IOC_OBJ_TO_STATS ioctl.
 	 */
 	error = traverse_dataset(tosnap, fromtxg,
-	    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_NO_DECRYPT,
-	    diff_cb, &da);
+	    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_NO_DECRYPT |
+	    TRAVERSE_LOGICAL, diff_cb, &da);
 
 	if (error != 0) {
 		da.da_err = error;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_direct.c b/sys/contrib/openzfs/module/zfs/dmu_direct.c
index 12b0ffa2c99b..d44c686088fc 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_direct.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_direct.c
@@ -95,16 +95,16 @@ dmu_write_direct_done(zio_t *zio)
 	abd_free(zio->io_abd);
 
 	mutex_enter(&db->db_mtx);
-	ASSERT3P(db->db_buf, ==, NULL);
-	ASSERT3P(dr->dt.dl.dr_data, ==, NULL);
-	ASSERT3P(db->db.db_data, ==, NULL);
+	ASSERT0P(db->db_buf);
+	ASSERT0P(dr->dt.dl.dr_data);
+	ASSERT0P(db->db.db_data);
 	db->db_state = DB_UNCACHED;
 	mutex_exit(&db->db_mtx);
 
 	dmu_sync_done(zio, NULL, zio->io_private);
 
 	if (zio->io_error != 0) {
-		if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
+		if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)
 			ASSERT3U(zio->io_error, ==, EIO);
 
 		/*
diff --git a/sys/contrib/openzfs/module/zfs/dmu_object.c b/sys/contrib/openzfs/module/zfs/dmu_object.c
index b4ff7d224cc9..207cc6d0e713 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_object.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_object.c
@@ -90,7 +90,7 @@ dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize,
 	if (allocated_dnode != NULL) {
 		ASSERT3P(tag, !=, NULL);
 	} else {
-		ASSERT3P(tag, ==, NULL);
+		ASSERT0P(tag);
 		tag = FTAG;
 	}
 
diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
index b3f792e4ae6b..a77f338bdfd3 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@@ -345,12 +345,6 @@ smallblk_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
 
-	/*
-	 * Inheritance and range checking should have been done by now.
-	 */
-	ASSERT(newval <= SPA_MAXBLOCKSIZE);
-	ASSERT(ISP2(newval));
-
 	os->os_zpl_special_smallblock = newval;
 }
 
@@ -730,7 +724,7 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
 
 		if (err == 0) {
 			mutex_enter(&ds->ds_lock);
-			ASSERT(ds->ds_objset == NULL);
+			ASSERT0P(ds->ds_objset);
 			ds->ds_objset = os;
 			mutex_exit(&ds->ds_lock);
 		}
@@ -1376,7 +1370,7 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
 	    6, ZFS_SPACE_CHECK_NORMAL);
 
 	if (rv == 0)
-		zvol_create_minor(name);
+		zvol_create_minors(name);
 
 	crfree(cr);
 
@@ -2232,7 +2226,7 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
 				rf |= DB_RF_HAVESTRUCT;
 			error = dmu_spill_hold_by_dnode(dn, rf,
 			    FTAG, (dmu_buf_t **)&db);
-			ASSERT(error == 0);
+			ASSERT0(error);
 			mutex_enter(&db->db_mtx);
 			data = (before) ? db->db.db_data :
 			    dmu_objset_userquota_find_data(db, tx);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c
index 3a4bd7a1cea9..45c7af2bdcd2 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_recv.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c
@@ -866,7 +866,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 	 */
 	if (dcp == NULL && drrb->drr_fromguid == 0 &&
 	    drba->drba_origin == NULL) {
-		ASSERT3P(dcp, ==, NULL);
+		ASSERT0P(dcp);
 		dcp = &dummy_dcp;
 
 		if (featureflags & DMU_BACKUP_FEATURE_RAW)
@@ -881,7 +881,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 		if (drba->drba_cookie->drc_fromsnapobj != 0) {
 			VERIFY0(dsl_dataset_hold_obj(dp,
 			    drba->drba_cookie->drc_fromsnapobj, FTAG, &snap));
-			ASSERT3P(dcp, ==, NULL);
+			ASSERT0P(dcp);
 		}
 		if (drc->drc_heal) {
 			/* When healing we want to use the provided snapshot */
@@ -905,7 +905,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 		if (drba->drba_origin != NULL) {
 			VERIFY0(dsl_dataset_hold(dp, drba->drba_origin,
 			    FTAG, &origin));
-			ASSERT3P(dcp, ==, NULL);
+			ASSERT0P(dcp);
 		}
 
 		/* Create new dataset. */
@@ -1403,7 +1403,7 @@ corrective_read_done(zio_t *zio)
 	/* Corruption corrected; update error log if needed */
 	if (zio->io_error == 0) {
 		spa_remove_error(data->spa, &data->zb,
-		    BP_GET_LOGICAL_BIRTH(zio->io_bp));
+		    BP_GET_PHYSICAL_BIRTH(zio->io_bp));
 	}
 	kmem_free(data, sizeof (cr_cb_data_t));
 	abd_free(zio->io_abd);
@@ -1530,7 +1530,7 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
 	}
 	rrd->abd = abd;
 
-	io = zio_rewrite(NULL, rwa->os->os_spa, BP_GET_LOGICAL_BIRTH(bp), bp,
+	io = zio_rewrite(NULL, rwa->os->os_spa, BP_GET_BIRTH(bp), bp,
 	    abd, BP_GET_PSIZE(bp), NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, flags,
 	    &zb);
 
@@ -2792,7 +2792,7 @@ receive_read_payload_and_next_header(dmu_recv_cookie_t *drc, int len, void *buf)
 			drc->drc_rrd->bytes_read = drc->drc_bytes_read;
 		}
 	} else {
-		ASSERT3P(buf, ==, NULL);
+		ASSERT0P(buf);
 	}
 
 	drc->drc_prev_cksum = drc->drc_cksum;
@@ -3450,7 +3450,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp)
 			break;
 		}
 
-		ASSERT3P(drc->drc_rrd, ==, NULL);
+		ASSERT0P(drc->drc_rrd);
 		drc->drc_rrd = drc->drc_next_rrd;
 		drc->drc_next_rrd = NULL;
 		/* Allocates and loads header into drc->drc_next_rrd */
@@ -3468,7 +3468,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp)
 		drc->drc_rrd = NULL;
 	}
 
-	ASSERT3P(drc->drc_rrd, ==, NULL);
+	ASSERT0P(drc->drc_rrd);
 	drc->drc_rrd = kmem_zalloc(sizeof (*drc->drc_rrd), KM_SLEEP);
 	drc->drc_rrd->eos_marker = B_TRUE;
 	bqueue_enqueue_flush(&rwa->q, drc->drc_rrd, 1);
@@ -3831,11 +3831,11 @@ dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
 		nvlist_free(drc->drc_keynvl);
 	} else if (!drc->drc_heal) {
 		if (drc->drc_newfs) {
-			zvol_create_minor(drc->drc_tofs);
+			zvol_create_minors(drc->drc_tofs);
 		}
 		char *snapname = kmem_asprintf("%s@%s",
 		    drc->drc_tofs, drc->drc_tosnap);
-		zvol_create_minor(snapname);
+		zvol_create_minors(snapname);
 		kmem_strfree(snapname);
 	}
 
diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c
index 65443d112f27..5a22ed71a5fe 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_redact.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c
@@ -370,8 +370,8 @@ redact_traverse_thread(void *arg)
 #endif
 
 	err = traverse_dataset_resume(rt_arg->ds, rt_arg->txg,
-	    &rt_arg->resume, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
-	    redact_cb, rt_arg);
+	    &rt_arg->resume, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
+	    TRAVERSE_LOGICAL, redact_cb, rt_arg);
 
 	if (err != EINTR)
 		rt_arg->error_code = err;
@@ -1067,7 +1067,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
 	}
 	if (err != 0)
 		goto out;
-	VERIFY3P(nvlist_next_nvpair(redactnvl, pair), ==, NULL);
+	VERIFY0P(nvlist_next_nvpair(redactnvl, pair));
 
 	boolean_t resuming = B_FALSE;
 	zfs_bookmark_phys_t bookmark;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c
index 4f27f3df0e55..8ecb99d5f57c 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_send.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_send.c
@@ -962,7 +962,7 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range)
 		char *data = NULL;
 		if (srdp->abd != NULL) {
 			data = abd_to_buf(srdp->abd);
-			ASSERT3P(srdp->abuf, ==, NULL);
+			ASSERT0P(srdp->abuf);
 		} else if (srdp->abuf != NULL) {
 			data = srdp->abuf->b_data;
 		}
@@ -1084,7 +1084,7 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 	 */
 	if (sta->os->os_encrypted &&
 	    !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
-		spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp));
+		spa_log_error(spa, zb, BP_GET_PHYSICAL_BIRTH(bp));
 		return (SET_ERROR(EIO));
 	}
 
@@ -1210,7 +1210,7 @@ send_traverse_thread(void *arg)
 
 	err = traverse_dataset_resume(st_arg->os->os_dsl_dataset,
 	    st_arg->fromtxg, &st_arg->resume,
-	    st_arg->flags, send_cb, st_arg);
+	    st_arg->flags | TRAVERSE_LOGICAL, send_cb, st_arg);
 
 	if (err != EINTR)
 		st_arg->error_code = err;
@@ -2514,7 +2514,7 @@ dmu_send_impl(struct dmu_send_params *dspp)
 	 * list in the stream.
 	 */
 	if (dspp->numfromredactsnaps != NUM_SNAPS_NOT_REDACTED) {
-		ASSERT3P(from_rl, ==, NULL);
+		ASSERT0P(from_rl);
 		fnvlist_add_uint64_array(nvl, BEGINNV_REDACT_FROM_SNAPS,
 		    dspp->fromredactsnaps, (uint_t)dspp->numfromredactsnaps);
 		if (dspp->numfromredactsnaps > 0) {
@@ -2891,7 +2891,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
 			    &fromds);
 
 			if (err != 0) {
-				ASSERT3P(fromds, ==, NULL);
+				ASSERT0P(fromds);
 			} else {
 				/*
 				 * We need to make a deep copy of the redact
diff --git a/sys/contrib/openzfs/module/zfs/dmu_traverse.c b/sys/contrib/openzfs/module/zfs/dmu_traverse.c
index f534a7dd64e3..dd1df1705040 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_traverse.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_traverse.c
@@ -74,6 +74,15 @@ static int traverse_dnode(traverse_data_t *td, const blkptr_t *bp,
 static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *,
     uint64_t objset, uint64_t object);
 
+static inline uint64_t
+get_birth_time(traverse_data_t *td, const blkptr_t *bp)
+{
+	if (td->td_flags & TRAVERSE_LOGICAL)
+		return (BP_GET_LOGICAL_BIRTH(bp));
+	else
+		return (BP_GET_BIRTH(bp));
+}
+
 static int
 traverse_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
     uint64_t claim_txg)
@@ -85,7 +94,7 @@ traverse_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
 		return (0);
 
 	if (claim_txg == 0 &&
-	    BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(td->td_spa))
+	    get_birth_time(td, bp) >= spa_min_claim_txg(td->td_spa))
 		return (-1);
 
 	SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
@@ -110,7 +119,7 @@ traverse_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
 		if (BP_IS_HOLE(bp))
 			return (0);
 
-		if (claim_txg == 0 || BP_GET_LOGICAL_BIRTH(bp) < claim_txg)
+		if (claim_txg == 0 || get_birth_time(td, bp) < claim_txg)
 			return (0);
 
 		ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
@@ -194,7 +203,7 @@ traverse_prefetch_metadata(traverse_data_t *td, const dnode_phys_t *dnp,
 	 */
 	if (resume_skip_check(td, dnp, zb) != RESUME_SKIP_NONE)
 		return (B_FALSE);
-	if (BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) <= td->td_min_txg)
+	if (BP_IS_HOLE(bp) || get_birth_time(td, bp) <= td->td_min_txg)
 		return (B_FALSE);
 	if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
 		return (B_FALSE);
@@ -265,7 +274,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 		    zb->zb_object == DMU_META_DNODE_OBJECT) &&
 		    td->td_hole_birth_enabled_txg <= td->td_min_txg)
 			return (0);
-	} else if (BP_GET_LOGICAL_BIRTH(bp) <= td->td_min_txg) {
+	} else if (get_birth_time(td, bp) <= td->td_min_txg) {
 		return (0);
 	}
 
diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c
index d85d8b89423e..40c0b3402a05 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_tx.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c
@@ -126,7 +126,7 @@ dmu_tx_hold_dnode_impl(dmu_tx_t *tx, dnode_t *dn, enum dmu_tx_hold_type type,
 			 * problem, but there's no way for it to happen (for
 			 * now, at least).
 			 */
-			ASSERT(dn->dn_assigned_txg == 0);
+			ASSERT0(dn->dn_assigned_txg);
 			dn->dn_assigned_txg = tx->tx_txg;
 			(void) zfs_refcount_add(&dn->dn_tx_holds, tx);
 			mutex_exit(&dn->dn_mtx);
@@ -443,7 +443,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
 	dnode_t *dn = txh->txh_dnode;
 	int err;
 
-	ASSERT(tx->tx_txg == 0);
+	ASSERT0(tx->tx_txg);
 
 	if (off >= (dn->dn_maxblkid + 1) * dn->dn_datablksz)
 		return;
@@ -607,7 +607,7 @@ dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, const char *name)
 	dnode_t *dn = txh->txh_dnode;
 	int err;
 
-	ASSERT(tx->tx_txg == 0);
+	ASSERT0(tx->tx_txg);
 
 	dmu_tx_count_dnode(txh);
 
@@ -681,7 +681,7 @@ dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object)
 {
 	dmu_tx_hold_t *txh;
 
-	ASSERT(tx->tx_txg == 0);
+	ASSERT0(tx->tx_txg);
 
 	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
 	    object, THT_BONUS, 0, 0);
@@ -706,7 +706,7 @@ dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space)
 {
 	dmu_tx_hold_t *txh;
 
-	ASSERT(tx->tx_txg == 0);
+	ASSERT0(tx->tx_txg);
 
 	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
 	    DMU_NEW_OBJECT, THT_SPACE, space, 0);
@@ -1232,7 +1232,7 @@ dmu_tx_assign(dmu_tx_t *tx, dmu_tx_flag_t flags)
 {
 	int err;
 
-	ASSERT(tx->tx_txg == 0);
+	ASSERT0(tx->tx_txg);
 	ASSERT0(flags & ~(DMU_TX_WAIT | DMU_TX_NOTHROTTLE | DMU_TX_SUSPEND));
 	IMPLY(flags & DMU_TX_SUSPEND, flags & DMU_TX_WAIT);
 	ASSERT(!dsl_pool_sync_context(tx->tx_pool));
@@ -1328,7 +1328,7 @@ dmu_tx_wait(dmu_tx_t *tx)
 	dsl_pool_t *dp = tx->tx_pool;
 	hrtime_t before;
 
-	ASSERT(tx->tx_txg == 0);
+	ASSERT0(tx->tx_txg);
 	ASSERT(!dsl_pool_config_held(tx->tx_pool));
 
 	/*
@@ -1644,12 +1644,12 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow)
 		dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
 
 	if (sa->sa_force_spill || may_grow || hdl->sa_spill) {
-		ASSERT(tx->tx_txg == 0);
+		ASSERT0(tx->tx_txg);
 		dmu_tx_hold_spill(tx, object);
 	} else {
 		DB_DNODE_ENTER(db);
 		if (DB_DNODE(db)->dn_have_spill) {
-			ASSERT(tx->tx_txg == 0);
+			ASSERT0(tx->tx_txg);
 			dmu_tx_hold_spill(tx, object);
 		}
 		DB_DNODE_EXIT(db);
diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c
index 904a039edf95..963ff41232a3 100644
--- a/sys/contrib/openzfs/module/zfs/dnode.c
+++ b/sys/contrib/openzfs/module/zfs/dnode.c
@@ -86,6 +86,19 @@ int zfs_default_ibs = DN_MAX_INDBLKSHIFT;
 static kmem_cbrc_t dnode_move(void *, void *, size_t, void *);
 #endif /* _KERNEL */
 
+static char *
+rt_name(dnode_t *dn, const char *name)
+{
+	struct objset *os = dn->dn_objset;
+
+	return (kmem_asprintf("{spa=%s objset=%llu obj=%llu %s}",
+	    spa_name(os->os_spa),
+	    (u_longlong_t)(os->os_dsl_dataset ?
+	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET),
+	    (u_longlong_t)dn->dn_object,
+	    name));
+}
+
 static int
 dbuf_compare(const void *x1, const void *x2)
 {
@@ -201,7 +214,7 @@ dnode_dest(void *arg, void *unused)
 
 	for (int i = 0; i < TXG_SIZE; i++) {
 		ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
-		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
+		ASSERT0P(dn->dn_free_ranges[i]);
 		list_destroy(&dn->dn_dirty_records[i]);
 		ASSERT0(dn->dn_next_nblkptr[i]);
 		ASSERT0(dn->dn_next_nlevels[i]);
@@ -218,10 +231,10 @@ dnode_dest(void *arg, void *unused)
 	ASSERT0(dn->dn_assigned_txg);
 	ASSERT0(dn->dn_dirty_txg);
 	ASSERT0(dn->dn_dirtyctx);
-	ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL);
-	ASSERT3P(dn->dn_bonus, ==, NULL);
+	ASSERT0P(dn->dn_dirtyctx_firstset);
+	ASSERT0P(dn->dn_bonus);
 	ASSERT(!dn->dn_have_spill);
-	ASSERT3P(dn->dn_zio, ==, NULL);
+	ASSERT0P(dn->dn_zio);
 	ASSERT0(dn->dn_oldused);
 	ASSERT0(dn->dn_oldflags);
 	ASSERT0(dn->dn_olduid);
@@ -305,7 +318,7 @@ dnode_kstats_update(kstat_t *ksp, int rw)
 void
 dnode_init(void)
 {
-	ASSERT(dnode_cache == NULL);
+	ASSERT0P(dnode_cache);
 	dnode_cache = kmem_cache_create("dnode_t", sizeof (dnode_t),
 	    0, dnode_cons, dnode_dest, NULL, NULL, NULL, KMC_RECLAIMABLE);
 	kmem_cache_set_move(dnode_cache, dnode_move);
@@ -496,7 +509,7 @@ dnode_buf_byteswap(void *vbuf, size_t size)
 	int i = 0;
 
 	ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT));
-	ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0);
+	ASSERT0((size & (sizeof (dnode_phys_t)-1)));
 
 	while (i < size) {
 		dnode_phys_t *dnp = (void *)(((char *)vbuf) + i);
@@ -660,7 +673,7 @@ dnode_destroy(dnode_t *dn)
 	objset_t *os = dn->dn_objset;
 	boolean_t complete_os_eviction = B_FALSE;
 
-	ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
+	ASSERT0((dn->dn_id_flags & DN_ID_NEW_EXIST));
 
 	mutex_enter(&os->os_lock);
 	POINTER_INVALIDATE(&dn->dn_objset);
@@ -767,7 +780,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
 		ASSERT0(dn->dn_next_maxblkid[i]);
 		ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
 		ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
-		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
+		ASSERT0P(dn->dn_free_ranges[i]);
 	}
 
 	dn->dn_type = ot;
@@ -945,7 +958,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
 	ndn->dn_dirty_txg = odn->dn_dirty_txg;
 	ndn->dn_dirtyctx = odn->dn_dirtyctx;
 	ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
-	ASSERT(zfs_refcount_count(&odn->dn_tx_holds) == 0);
+	ASSERT0(zfs_refcount_count(&odn->dn_tx_holds));
 	zfs_refcount_transfer(&ndn->dn_holds, &odn->dn_holds);
 	ASSERT(avl_is_empty(&ndn->dn_dbufs));
 	avl_swap(&ndn->dn_dbufs, &odn->dn_dbufs);
@@ -2291,7 +2304,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
 		if ((off >> blkshift) > dn->dn_maxblkid)
 			return;
 	} else {
-		ASSERT(dn->dn_maxblkid == 0);
+		ASSERT0(dn->dn_maxblkid);
 		if (off == 0 && len >= blksz) {
 			/*
 			 * Freeing the whole block; fast-track this request.
@@ -2436,8 +2449,10 @@ done:
 	{
 		int txgoff = tx->tx_txg & TXG_MASK;
 		if (dn->dn_free_ranges[txgoff] == NULL) {
-			dn->dn_free_ranges[txgoff] = zfs_range_tree_create(NULL,
-			    ZFS_RANGE_SEG64, NULL, 0, 0);
+			dn->dn_free_ranges[txgoff] =
+			    zfs_range_tree_create_flags(
+			    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+			    ZFS_RT_F_DYN_NAME, rt_name(dn, "dn_free_ranges"));
 		}
 		zfs_range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks);
 		zfs_range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks);
@@ -2509,7 +2524,7 @@ dnode_diduse_space(dnode_t *dn, int64_t delta)
 	}
 	space += delta;
 	if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_DNODE_BYTES) {
-		ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0);
+		ASSERT0((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES));
 		ASSERT0(P2PHASE(space, 1<<DEV_BSHIFT));
 		dn->dn_phys->dn_used = space >> DEV_BSHIFT;
 	} else {
diff --git a/sys/contrib/openzfs/module/zfs/dnode_sync.c b/sys/contrib/openzfs/module/zfs/dnode_sync.c
index 4067f221f1bf..046ceddb3609 100644
--- a/sys/contrib/openzfs/module/zfs/dnode_sync.c
+++ b/sys/contrib/openzfs/module/zfs/dnode_sync.c
@@ -209,8 +209,8 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
 		rw_exit(&dn->dn_struct_rwlock);
 		if (err == ENOENT)
 			continue;
-		ASSERT(err == 0);
-		ASSERT(child->db_level == 0);
+		ASSERT0(err);
+		ASSERT0(child->db_level);
 		dr = dbuf_find_dirty_eq(child, txg);
 
 		/* data_old better be zeroed */
@@ -868,7 +868,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 	dbuf_sync_list(list, dn->dn_phys->dn_nlevels - 1, tx);
 
 	if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
-		ASSERT3P(list_head(list), ==, NULL);
+		ASSERT0P(list_head(list));
 		dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
 	}
 
diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
index e301fe19f645..ee574c499f9f 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
@@ -243,7 +243,7 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp,
 	/* error is retval of the following if-cascade */
 	if (strchr(source, '@') != NULL) {
 		dsl_dataset_t *source_snap_ds;
-		ASSERT3S(snapshot_namecheck(source, NULL, NULL), ==, 0);
+		ASSERT0(snapshot_namecheck(source, NULL, NULL));
 		error = dsl_dataset_hold(dp, source, FTAG, &source_snap_ds);
 		if (error == 0) {
 			VERIFY(source_snap_ds->ds_is_snapshot);
@@ -258,7 +258,7 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp,
 		}
 	} else if (strchr(source, '#') != NULL) {
 		zfs_bookmark_phys_t source_phys;
-		ASSERT3S(bookmark_namecheck(source, NULL, NULL), ==, 0);
+		ASSERT0(bookmark_namecheck(source, NULL, NULL));
 		/*
 		 * Source must exists and be an earlier point in newbm_ds's
 		 * timeline (newbm_ds's origin may be a snap of source's ds)
@@ -501,7 +501,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
 		    sizeof (uint64_t) * num_redact_snaps);
 		local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
 		if (bookmark_redacted) {
-			ASSERT3P(redaction_list, ==, NULL);
+			ASSERT0P(redaction_list);
 			local_rl->rl_phys->rlp_last_blkid = UINT64_MAX;
 			local_rl->rl_phys->rlp_last_object = UINT64_MAX;
 			dsl_redaction_list_long_rele(local_rl, tag);
@@ -1523,7 +1523,7 @@ dsl_bookmark_block_killed(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 		 * If the block was live (referenced) at the time of this
 		 * bookmark, add its space to the bookmark's FBN.
 		 */
-		if (BP_GET_LOGICAL_BIRTH(bp) <=
+		if (BP_GET_BIRTH(bp) <=
 		    dbn->dbn_phys.zbm_creation_txg &&
 		    (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) {
 			mutex_enter(&dbn->dbn_lock);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c
index db568f42d24e..f519b937edc0 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c
@@ -534,7 +534,7 @@ out:
 static void
 dsl_crypto_key_free(dsl_crypto_key_t *dck)
 {
-	ASSERT(zfs_refcount_count(&dck->dck_holds) == 0);
+	ASSERT0(zfs_refcount_count(&dck->dck_holds));
 
 	/* destroy the zio_crypt_key_t */
 	zio_crypt_key_destroy(&dck->dck_key);
@@ -866,7 +866,7 @@ spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
 	dsl_pool_rele(dp, FTAG);
 
 	/* create any zvols under this ds */
-	zvol_create_minors_recursive(dsname);
+	zvol_create_minors(dsname);
 
 	return (0);
 
@@ -1912,7 +1912,7 @@ dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
 
 	/* clones always use their origin's wrapping key */
 	if (dsl_dir_is_clone(dd)) {
-		ASSERT3P(dcp, ==, NULL);
+		ASSERT0P(dcp);
 
 		/*
 		 * If this is an encrypted clone we just need to clone the
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
index c0a7872c40ad..420687480a76 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -159,7 +159,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 		return;
 	}
 
-	ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >,
+	ASSERT3U(BP_GET_BIRTH(bp), >,
 	    dsl_dataset_phys(ds)->ds_prev_snap_txg);
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	mutex_enter(&ds->ds_lock);
@@ -194,7 +194,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 	 * they do not need to be freed.
 	 */
 	if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
-	    BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
+	    BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
 	    !(BP_IS_EMBEDDED(bp))) {
 		ASSERT(dsl_dir_is_clone(ds->ds_dir));
 		ASSERT(spa_feature_is_enabled(spa,
@@ -263,7 +263,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
 		return (0);
 
 	ASSERT(dmu_tx_is_syncing(tx));
-	ASSERT(BP_GET_LOGICAL_BIRTH(bp) <= tx->tx_txg);
+	ASSERT(BP_GET_BIRTH(bp) <= tx->tx_txg);
 
 	if (ds == NULL) {
 		dsl_free(tx->tx_pool, tx->tx_txg, bp);
@@ -281,7 +281,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
 	 * they do not need to be freed.
 	 */
 	if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
-	    BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
+	    BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
 	    !(BP_IS_EMBEDDED(bp))) {
 		ASSERT(dsl_dir_is_clone(ds->ds_dir));
 		ASSERT(spa_feature_is_enabled(spa,
@@ -289,7 +289,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
 		bplist_append(&ds->ds_dir->dd_pending_frees, bp);
 	}
 
-	if (BP_GET_LOGICAL_BIRTH(bp) > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
+	if (BP_GET_BIRTH(bp) > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
 		int64_t delta;
 
 		/*
@@ -346,14 +346,14 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
 		ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0);
 		/* if (logical birth > prev prev snap txg) prev unique += bs */
 		if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==
-		    ds->ds_object && BP_GET_LOGICAL_BIRTH(bp) >
+		    ds->ds_object && BP_GET_BIRTH(bp) >
 		    dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) {
 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 			mutex_enter(&ds->ds_prev->ds_lock);
 			dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used;
 			mutex_exit(&ds->ds_prev->ds_lock);
 		}
-		if (BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg) {
+		if (BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg) {
 			dsl_dir_transfer_space(ds->ds_dir, used,
 			    DD_USED_HEAD, DD_USED_SNAP, tx);
 		}
@@ -450,7 +450,7 @@ dsl_dataset_evict_sync(void *dbu)
 {
 	dsl_dataset_t *ds = dbu;
 
-	ASSERT(ds->ds_owner == NULL);
+	ASSERT0P(ds->ds_owner);
 
 	unique_remove(ds->ds_fsid_guid);
 }
@@ -460,7 +460,7 @@ dsl_dataset_evict_async(void *dbu)
 {
 	dsl_dataset_t *ds = dbu;
 
-	ASSERT(ds->ds_owner == NULL);
+	ASSERT0P(ds->ds_owner);
 
 	ds->ds_dbuf = NULL;
 
@@ -1187,7 +1187,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
 	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
 	ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0);
 	ASSERT(dmu_tx_is_syncing(tx));
-	ASSERT(dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
+	ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);
 
 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
@@ -2005,7 +2005,7 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
 	if (error == 0) {
 		for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 		    pair = nvlist_next_nvpair(snaps, pair)) {
-			zvol_create_minor(nvpair_name(pair));
+			zvol_create_minors(nvpair_name(pair));
 		}
 	}
 
@@ -2112,7 +2112,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *rio, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(ds->ds_objset != NULL);
-	ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0);
+	ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);
 
 	/*
 	 * in case we had to change ds_fsid_guid when we opened it,
@@ -2944,7 +2944,7 @@ dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
 	if (snap == NULL)
 		return (B_FALSE);
 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
-	birth = BP_GET_LOGICAL_BIRTH(dsl_dataset_get_blkptr(ds));
+	birth = BP_GET_BIRTH(dsl_dataset_get_blkptr(ds));
 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
 	if (birth > dsl_dataset_phys(snap)->ds_creation_txg) {
 		objset_t *os, *os_snap;
@@ -3413,7 +3413,7 @@ dsl_dataset_clone(const char *clone, const char *origin)
 	    6, ZFS_SPACE_CHECK_NORMAL);
 
 	if (rv == 0)
-		zvol_create_minor(clone);
+		zvol_create_minors(clone);
 
 	crfree(cr);
 
@@ -4180,7 +4180,7 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	int64_t unused_refres_delta;
 
-	ASSERT(clone->ds_reserved == 0);
+	ASSERT0(clone->ds_reserved);
 	/*
 	 * NOTE: On DEBUG kernels there could be a race between this and
 	 * the check function if spa_asize_inflation is adjusted...
diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
index 3113d932fb68..475db3c89508 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
@@ -484,7 +484,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed,
 	dl->dl_phys->dl_comp += sign * BP_GET_PSIZE(bp);
 	dl->dl_phys->dl_uncomp += sign * BP_GET_UCSIZE(bp);
 
-	dle_tofind.dle_mintxg = BP_GET_LOGICAL_BIRTH(bp);
+	dle_tofind.dle_mintxg = BP_GET_BIRTH(bp);
 	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
 	if (dle == NULL)
 		dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
@@ -493,7 +493,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed,
 
 	if (dle == NULL) {
 		zfs_panic_recover("blkptr at %p has invalid BLK_BIRTH %llu",
-		    bp, (longlong_t)BP_GET_LOGICAL_BIRTH(bp));
+		    bp, (longlong_t)BP_GET_BIRTH(bp));
 		dle = avl_first(&dl->dl_tree);
 	}
 
@@ -1037,7 +1037,7 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed,
 	avl_tree_t *avl = lia->avl;
 	bplist_t *to_free = lia->to_free;
 	zthr_t *t = lia->t;
-	ASSERT(tx == NULL);
+	ASSERT0P(tx);
 
 	if ((t != NULL) && (zthr_has_waiters(t) || zthr_iscancelled(t)))
 		return (SET_ERROR(EINTR));
diff --git a/sys/contrib/openzfs/module/zfs/dsl_deleg.c b/sys/contrib/openzfs/module/zfs/dsl_deleg.c
index c01a06e98340..fdd37b36e280 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_deleg.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_deleg.c
@@ -102,7 +102,7 @@ dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr)
 		nvlist_t *perms;
 		nvpair_t *permpair = NULL;
 
-		VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
+		VERIFY0(nvpair_value_nvlist(whopair, &perms));
 
 		while ((permpair = nvlist_next_nvpair(perms, permpair))) {
 			const char *perm = nvpair_name(permpair);
@@ -189,8 +189,7 @@ dsl_deleg_set_sync(void *arg, dmu_tx_t *tx)
 			const char *perm = nvpair_name(permpair);
 			uint64_t n = 0;
 
-			VERIFY(zap_update(mos, jumpobj,
-			    perm, 8, 1, &n, tx) == 0);
+			VERIFY0(zap_update(mos, jumpobj, perm, 8, 1, &n, tx));
 			spa_history_log_internal_dd(dd, "permission update", tx,
 			    "%s %s", whokey, perm);
 		}
@@ -225,7 +224,7 @@ dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx)
 			if (zap_lookup(mos, zapobj, whokey, 8,
 			    1, &jumpobj) == 0) {
 				(void) zap_remove(mos, zapobj, whokey, tx);
-				VERIFY(0 == zap_destroy(mos, jumpobj, tx));
+				VERIFY0(zap_destroy(mos, jumpobj, tx));
 			}
 			spa_history_log_internal_dd(dd, "permission who remove",
 			    tx, "%s", whokey);
@@ -243,7 +242,7 @@ dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx)
 			if (zap_count(mos, jumpobj, &n) == 0 && n == 0) {
 				(void) zap_remove(mos, zapobj,
 				    whokey, tx);
-				VERIFY(0 == zap_destroy(mos,
+				VERIFY0(zap_destroy(mos,
 				    jumpobj, tx));
 			}
 			spa_history_log_internal_dd(dd, "permission remove", tx,
@@ -332,7 +331,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
 	basezc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
 	baseza = zap_attribute_alloc();
 	source = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	VERIFY0(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
 
 	for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
 		nvlist_t *sp_nvp;
@@ -706,7 +705,7 @@ copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj,
 	    ZFS_DELEG_LOCAL, &uid);
 	if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == ENOENT) {
 		jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
-		VERIFY(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0);
+		VERIFY0(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx));
 	}
 
 	za = zap_attribute_alloc();
@@ -716,8 +715,7 @@ copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj,
 		uint64_t zero = 0;
 		ASSERT(za->za_integer_length == 8 && za->za_num_integers == 1);
 
-		VERIFY(zap_update(mos, jumpobj, za->za_name,
-		    8, 1, &zero, tx) == 0);
+		VERIFY0(zap_update(mos, jumpobj, za->za_name, 8, 1, &zero, tx));
 	}
 	zap_cursor_fini(&zc);
 	zap_attribute_free(za);
@@ -761,10 +759,10 @@ dsl_deleg_destroy(objset_t *mos, uint64_t zapobj, dmu_tx_t *tx)
 	    zap_cursor_retrieve(&zc, za) == 0;
 	    zap_cursor_advance(&zc)) {
 		ASSERT(za->za_integer_length == 8 && za->za_num_integers == 1);
-		VERIFY(0 == zap_destroy(mos, za->za_first_integer, tx));
+		VERIFY0(zap_destroy(mos, za->za_first_integer, tx));
 	}
 	zap_cursor_fini(&zc);
-	VERIFY(0 == zap_destroy(mos, zapobj, tx));
+	VERIFY0(zap_destroy(mos, zapobj, tx));
 	zap_attribute_free(za);
 	return (0);
 }
diff --git a/sys/contrib/openzfs/module/zfs/dsl_destroy.c b/sys/contrib/openzfs/module/zfs/dsl_destroy.c
index f5ec93b2dc5c..ea01ee586f8b 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_destroy.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_destroy.c
@@ -133,11 +133,11 @@ process_old_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
 
 	ASSERT(!BP_IS_HOLE(bp));
 
-	if (BP_GET_LOGICAL_BIRTH(bp) <=
+	if (BP_GET_BIRTH(bp) <=
 	    dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
 		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, bp_freed, tx);
 		if (poa->ds_prev && !poa->after_branch_point &&
-		    BP_GET_LOGICAL_BIRTH(bp) >
+		    BP_GET_BIRTH(bp) >
 		    dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
 			dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
 			    bp_get_dsize_sync(dp->dp_spa, bp);
@@ -315,8 +315,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
 
 	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
-	ASSERT3U(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=,
-	    tx->tx_txg);
+	ASSERT3U(BP_GET_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=, tx->tx_txg);
 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
 	ASSERT(zfs_refcount_is_zero(&ds->ds_longholds));
 
@@ -351,7 +350,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
 			dsl_dataset_deactivate_feature(ds, f, tx);
 	}
 	if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
-		ASSERT3P(ds->ds_prev, ==, NULL);
+		ASSERT0P(ds->ds_prev);
 		VERIFY0(dsl_dataset_hold_obj(dp,
 		    dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev));
 		after_branch_point =
@@ -466,7 +465,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
 		    &used, &comp, &uncomp);
 		dsl_dataset_phys(ds_next)->ds_unique_bytes += used;
 		dsl_dataset_rele(ds_nextnext, FTAG);
-		ASSERT3P(ds_next->ds_prev, ==, NULL);
+		ASSERT0P(ds_next->ds_prev);
 
 		/* Collapse range in this head. */
 		dsl_dataset_t *hds;
@@ -526,7 +525,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
 
 	/* remove from snapshot namespace */
 	dsl_dataset_t *ds_head;
-	ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0);
+	ASSERT0(dsl_dataset_phys(ds)->ds_snapnames_zapobj);
 	VERIFY0(dsl_dataset_hold_obj(dp,
 	    dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head));
 	VERIFY0(dsl_dataset_get_snapname(ds));
@@ -729,8 +728,8 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 		 */
 		dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
 	} else {
-		ASSERT(zilog == NULL);
-		ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >,
+		ASSERT0P(zilog);
+		ASSERT3U(BP_GET_BIRTH(bp), >,
 		    dsl_dataset_phys(ka->ds)->ds_prev_snap_txg);
 		(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
 	}
@@ -1020,8 +1019,7 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
 	ASSERT(ds->ds_prev == NULL ||
 	    dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
-	ASSERT3U(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=,
-	    tx->tx_txg);
+	ASSERT3U(BP_GET_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=, tx->tx_txg);
 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
 	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c
index f24cd2049533..6ce1890cfea1 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dir.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c
@@ -151,8 +151,8 @@ dsl_dir_evict_async(void *dbu)
 
 	for (t = 0; t < TXG_SIZE; t++) {
 		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
-		ASSERT(dd->dd_tempreserved[t] == 0);
-		ASSERT(dd->dd_space_towrite[t] == 0);
+		ASSERT0(dd->dd_tempreserved[t]);
+		ASSERT0(dd->dd_space_towrite[t]);
 	}
 
 	if (dd->dd_parent)
diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c
index f1088d87208b..f47822df8b53 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_pool.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c
@@ -522,8 +522,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops __attribute__((unused)),
 
 		/* create and open the free_bplist */
 		obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx);
-		VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-		    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
+		VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+		    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
 		VERIFY0(bpobj_open(&dp->dp_free_bpobj,
 		    dp->dp_meta_objset, obj));
 	}
@@ -1056,7 +1056,7 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 		 * will be wrong.
 		 */
 		rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
-		ASSERT0(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(prev)->ds_bp));
+		ASSERT0(BP_GET_BIRTH(&dsl_dataset_phys(prev)->ds_bp));
 		rrw_exit(&ds->ds_bp_rwlock, FTAG);
 
 		/* The origin doesn't get attached to itself */
@@ -1077,7 +1077,7 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 		dsl_dataset_phys(prev)->ds_num_children++;
 
 		if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) {
-			ASSERT(ds->ds_prev == NULL);
+			ASSERT0P(ds->ds_prev);
 			VERIFY0(dsl_dataset_hold_obj(dp,
 			    dsl_dataset_phys(ds)->ds_prev_snap_obj,
 			    ds, &ds->ds_prev));
@@ -1173,7 +1173,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
 	dsl_dataset_t *ds;
 
 	ASSERT(dmu_tx_is_syncing(tx));
-	ASSERT(dp->dp_origin_snap == NULL);
+	ASSERT0P(dp->dp_origin_snap);
 	ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER));
 
 	/* create the origin dir, ds, & snap-ds */
@@ -1250,7 +1250,7 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
 {
 	objset_t *mos = dp->dp_meta_objset;
 
-	ASSERT(dp->dp_tmp_userrefs_obj == 0);
+	ASSERT0(dp->dp_tmp_userrefs_obj);
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS,
diff --git a/sys/contrib/openzfs/module/zfs/dsl_prop.c b/sys/contrib/openzfs/module/zfs/dsl_prop.c
index b76f22df61e2..51f624da5689 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_prop.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_prop.c
@@ -815,7 +815,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
 		 */
 		err = zap_update(mos, zapobj, recvdstr,
 		    intsz, numints, value, tx);
-		ASSERT(err == 0);
+		ASSERT0(err);
 		break;
 	case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED):
 		/*
@@ -1166,7 +1166,7 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
 		if (nvlist_exists(nv, propname))
 			continue;
 
-		VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP));
 		if (za->za_integer_length == 1) {
 			/*
 			 * String property
@@ -1179,8 +1179,7 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
 				kmem_free(tmp, za->za_num_integers);
 				break;
 			}
-			VERIFY(nvlist_add_string(propval, ZPROP_VALUE,
-			    tmp) == 0);
+			VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, tmp));
 			kmem_free(tmp, za->za_num_integers);
 		} else {
 			/*
@@ -1191,8 +1190,8 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
 			    za->za_first_integer);
 		}
 
-		VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, source) == 0);
-		VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
+		VERIFY0(nvlist_add_string(propval, ZPROP_SOURCE, source));
+		VERIFY0(nvlist_add_nvlist(nv, propname, propval));
 		nvlist_free(propval);
 	}
 	zap_cursor_fini(&zc);
@@ -1215,7 +1214,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
 	int err = 0;
 	char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 
-	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	VERIFY0(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
 
 	if (ds->ds_is_snapshot)
 		flags |= DSL_PROP_GET_SNAPSHOT;
@@ -1333,18 +1332,18 @@ dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value)
 	uint64_t default_value;
 
 	if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) {
-		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
+		VERIFY0(nvlist_add_uint64(propval, ZPROP_VALUE, value));
 		return;
 	}
 
-	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
+	VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP));
+	VERIFY0(nvlist_add_uint64(propval, ZPROP_VALUE, value));
 	/* Indicate the default source if we can. */
 	if (dodefault(prop, 8, 1, &default_value) == 0 &&
 	    value == default_value) {
-		VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, "") == 0);
+		VERIFY0(nvlist_add_string(propval, ZPROP_SOURCE, ""));
 	}
-	VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
+	VERIFY0(nvlist_add_nvlist(nv, propname, propval));
 	nvlist_free(propval);
 }
 
@@ -1355,13 +1354,13 @@ dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value)
 	const char *propname = zfs_prop_to_name(prop);
 
 	if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) {
-		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0);
+		VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, value));
 		return;
 	}
 
-	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0);
-	VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
+	VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP));
+	VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, value));
+	VERIFY0(nvlist_add_nvlist(nv, propname, propval));
 	nvlist_free(propval);
 }
 
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index 1b2cd3e361d1..fcd50c459d07 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -454,7 +454,7 @@ static inline void
 bp2sio(const blkptr_t *bp, scan_io_t *sio, int dva_i)
 {
 	sio->sio_blk_prop = bp->blk_prop;
-	sio->sio_phys_birth = BP_GET_PHYSICAL_BIRTH(bp);
+	sio->sio_phys_birth = BP_GET_RAW_PHYSICAL_BIRTH(bp);
 	sio->sio_birth = BP_GET_LOGICAL_BIRTH(bp);
 	sio->sio_cksum = bp->blk_cksum;
 	sio->sio_nr_dvas = BP_GET_NDVAS(bp);
@@ -1768,7 +1768,7 @@ dsl_scan_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
 
 	ASSERT(!BP_IS_REDACTED(bp));
 	if (BP_IS_HOLE(bp) ||
-	    BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg)
+	    BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg)
 		return (0);
 
 	/*
@@ -1778,13 +1778,13 @@ dsl_scan_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
 	 * scrub there's nothing to do to it).
 	 */
 	if (claim_txg == 0 &&
-	    BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(dp->dp_spa))
+	    BP_GET_BIRTH(bp) >= spa_min_claim_txg(dp->dp_spa))
 		return (0);
 
 	SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
-	VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
+	VERIFY0(scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
 	return (0);
 }
 
@@ -1804,7 +1804,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
 
 		ASSERT(!BP_IS_REDACTED(bp));
 		if (BP_IS_HOLE(bp) ||
-		    BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg)
+		    BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg)
 			return (0);
 
 		/*
@@ -1812,7 +1812,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
 		 * already txg sync'ed (but this log block contains
 		 * other records that are not synced)
 		 */
-		if (claim_txg == 0 || BP_GET_LOGICAL_BIRTH(bp) < claim_txg)
+		if (claim_txg == 0 || BP_GET_BIRTH(bp) < claim_txg)
 			return (0);
 
 		ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
@@ -1820,7 +1820,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
 		    lr->lr_foid, ZB_ZIL_LEVEL,
 		    lr->lr_offset / BP_GET_LSIZE(bp));
 
-		VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
+		VERIFY0(scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
 	}
 	return (0);
 }
@@ -1952,7 +1952,7 @@ dsl_scan_prefetch(scan_prefetch_ctx_t *spc, blkptr_t *bp, zbookmark_phys_t *zb)
 		return;
 
 	if (BP_IS_HOLE(bp) ||
-	    BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg ||
+	    BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg ||
 	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE &&
 	    BP_GET_TYPE(bp) != DMU_OT_OBJSET))
 		return;
@@ -2223,7 +2223,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
 	if (dnp != NULL &&
 	    dnp->dn_bonuslen > DN_MAX_BONUS_LEN(dnp)) {
 		scn->scn_phys.scn_errors++;
-		spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp));
+		spa_log_error(spa, zb, BP_GET_PHYSICAL_BIRTH(bp));
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -2319,7 +2319,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
 		 * by arc_read() for the cases above.
 		 */
 		scn->scn_phys.scn_errors++;
-		spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp));
+		spa_log_error(spa, zb, BP_GET_PHYSICAL_BIRTH(bp));
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -2396,7 +2396,12 @@ dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb,
 	if (f != SPA_FEATURE_NONE)
 		ASSERT(dsl_dataset_feature_is_active(ds, f));
 
-	if (BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) {
+	/*
+	 * Recurse any blocks that were written either logically or physically
+	 * at or after cur_min_txg.  About logical birth we care for traversal,
+	 * looking for any changes, while about physical for the actual scan.
+	 */
+	if (BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) {
 		scn->scn_lt_min_this_txg++;
 		return;
 	}
@@ -2422,7 +2427,7 @@ dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb,
 	 * Don't scan it now unless we need to because something
 	 * under it was modified.
 	 */
-	if (BP_GET_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) {
+	if (BP_GET_PHYSICAL_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) {
 		scn->scn_gt_max_this_txg++;
 		return;
 	}
@@ -4806,7 +4811,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
 {
 	dsl_scan_t *scn = dp->dp_scan;
 	spa_t *spa = dp->dp_spa;
-	uint64_t phys_birth = BP_GET_BIRTH(bp);
+	uint64_t phys_birth = BP_GET_PHYSICAL_BIRTH(bp);
 	size_t psize = BP_GET_PSIZE(bp);
 	boolean_t needs_io = B_FALSE;
 	int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
@@ -5136,7 +5141,7 @@ dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd)
 	mutex_enter(&svd->vdev_scan_io_queue_lock);
 	mutex_enter(&tvd->vdev_scan_io_queue_lock);
 
-	VERIFY3P(tvd->vdev_scan_io_queue, ==, NULL);
+	VERIFY0P(tvd->vdev_scan_io_queue);
 	tvd->vdev_scan_io_queue = svd->vdev_scan_io_queue;
 	svd->vdev_scan_io_queue = NULL;
 	if (tvd->vdev_scan_io_queue != NULL)
diff --git a/sys/contrib/openzfs/module/zfs/dsl_userhold.c b/sys/contrib/openzfs/module/zfs/dsl_userhold.c
index 57c70e4ce3d2..f91b7a1eb69a 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_userhold.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_userhold.c
@@ -335,7 +335,7 @@ dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 
 	dduha.dduha_holds = holds;
 	/* chkholds can have non-unique name */
-	VERIFY(0 == nvlist_alloc(&dduha.dduha_chkholds, 0, KM_SLEEP));
+	VERIFY0(nvlist_alloc(&dduha.dduha_chkholds, 0, KM_SLEEP));
 	dduha.dduha_errlist = errlist;
 	dduha.dduha_minor = cleanup_minor;
 
diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c
index a092817efedd..ae788b2310d8 100644
--- a/sys/contrib/openzfs/module/zfs/fm.c
+++ b/sys/contrib/openzfs/module/zfs/fm.c
@@ -337,7 +337,7 @@ zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
 		}
 	}
 
-	VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
+	VERIFY0(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE));
 	if (size > *event_size) {
 		*event_size = size;
 		error = ENOMEM;
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index 43b94eba2d58..9f4399af56bd 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -375,13 +375,23 @@ static metaslab_stats_t metaslab_stats = {
 #define	METASLABSTAT_BUMP(stat) \
 	atomic_inc_64(&metaslab_stats.stat.value.ui64);
 
+char *
+metaslab_rt_name(metaslab_group_t *mg, metaslab_t *ms, const char *name)
+{
+	return (kmem_asprintf("{spa=%s vdev_guid=%llu ms_id=%llu %s}",
+	    spa_name(mg->mg_vd->vdev_spa),
+	    (u_longlong_t)mg->mg_vd->vdev_guid,
+	    (u_longlong_t)ms->ms_id,
+	    name));
+}
+
 
 static kstat_t *metaslab_ksp;
 
 void
 metaslab_stat_init(void)
 {
-	ASSERT(metaslab_alloc_trace_cache == NULL);
+	ASSERT0P(metaslab_alloc_trace_cache);
 	metaslab_alloc_trace_cache = kmem_cache_create(
 	    "metaslab_alloc_trace_cache", sizeof (metaslab_alloc_trace_t),
 	    0, NULL, NULL, NULL, NULL, NULL, 0);
@@ -446,16 +456,16 @@ metaslab_class_destroy(metaslab_class_t *mc)
 {
 	spa_t *spa = mc->mc_spa;
 
-	ASSERT(mc->mc_alloc == 0);
-	ASSERT(mc->mc_deferred == 0);
-	ASSERT(mc->mc_space == 0);
-	ASSERT(mc->mc_dspace == 0);
+	ASSERT0(mc->mc_alloc);
+	ASSERT0(mc->mc_deferred);
+	ASSERT0(mc->mc_space);
+	ASSERT0(mc->mc_dspace);
 
 	for (int i = 0; i < spa->spa_alloc_count; i++) {
 		metaslab_class_allocator_t *mca = &mc->mc_allocator[i];
 		avl_destroy(&mca->mca_tree);
 		mutex_destroy(&mca->mca_lock);
-		ASSERT(mca->mca_rotor == NULL);
+		ASSERT0P(mca->mca_rotor);
 		ASSERT0(mca->mca_reserved);
 	}
 	mutex_destroy(&mc->mc_lock);
@@ -750,7 +760,8 @@ metaslab_class_histogram_verify(metaslab_class_t *mc)
 		}
 
 		IMPLY(mg == mg->mg_vd->vdev_log_mg,
-		    mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
+		    mc == spa_embedded_log_class(mg->mg_vd->vdev_spa) ||
+		    mc == spa_special_embedded_log_class(mg->mg_vd->vdev_spa));
 
 		for (i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++)
 			mc_hist[i] += mg->mg_histogram[i];
@@ -1076,8 +1087,8 @@ metaslab_group_destroy(metaslab_group_t *mg)
 {
 	spa_t *spa = mg->mg_class->mc_spa;
 
-	ASSERT(mg->mg_prev == NULL);
-	ASSERT(mg->mg_next == NULL);
+	ASSERT0P(mg->mg_prev);
+	ASSERT0P(mg->mg_next);
 	/*
 	 * We may have gone below zero with the activation count
 	 * either because we never activated in the first place or
@@ -1107,8 +1118,8 @@ metaslab_group_activate(metaslab_group_t *mg)
 
 	ASSERT3U(spa_config_held(spa, SCL_ALLOC, RW_WRITER), !=, 0);
 
-	ASSERT(mg->mg_prev == NULL);
-	ASSERT(mg->mg_next == NULL);
+	ASSERT0P(mg->mg_prev);
+	ASSERT0P(mg->mg_next);
 	ASSERT(mg->mg_activation_count <= 0);
 
 	if (++mg->mg_activation_count <= 0)
@@ -1153,8 +1164,8 @@ metaslab_group_passivate(metaslab_group_t *mg)
 	if (--mg->mg_activation_count != 0) {
 		for (int i = 0; i < spa->spa_alloc_count; i++)
 			ASSERT(mc->mc_allocator[i].mca_rotor != mg);
-		ASSERT(mg->mg_prev == NULL);
-		ASSERT(mg->mg_next == NULL);
+		ASSERT0P(mg->mg_prev);
+		ASSERT0P(mg->mg_next);
 		ASSERT(mg->mg_activation_count < 0);
 		return;
 	}
@@ -1183,14 +1194,16 @@ metaslab_group_passivate(metaslab_group_t *mg)
 		if (msp != NULL) {
 			mutex_enter(&msp->ms_lock);
 			metaslab_passivate(msp,
-			    metaslab_weight_from_range_tree(msp));
+			    metaslab_weight(msp, B_TRUE) &
+			    ~METASLAB_ACTIVE_MASK);
 			mutex_exit(&msp->ms_lock);
 		}
 		msp = mga->mga_secondary;
 		if (msp != NULL) {
 			mutex_enter(&msp->ms_lock);
 			metaslab_passivate(msp,
-			    metaslab_weight_from_range_tree(msp));
+			    metaslab_weight(msp, B_TRUE) &
+			    ~METASLAB_ACTIVE_MASK);
 			mutex_exit(&msp->ms_lock);
 		}
 	}
@@ -1288,7 +1301,8 @@ metaslab_group_histogram_add(metaslab_group_t *mg, metaslab_t *msp)
 	mutex_enter(&mc->mc_lock);
 	for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
 		IMPLY(mg == mg->mg_vd->vdev_log_mg,
-		    mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
+		    mc == spa_embedded_log_class(mg->mg_vd->vdev_spa) ||
+		    mc == spa_special_embedded_log_class(mg->mg_vd->vdev_spa));
 		mg->mg_histogram[i + ashift] +=
 		    msp->ms_sm->sm_phys->smp_histogram[i];
 		mc->mc_histogram[i + ashift] +=
@@ -1316,7 +1330,8 @@ metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp)
 		ASSERT3U(mc->mc_histogram[i + ashift], >=,
 		    msp->ms_sm->sm_phys->smp_histogram[i]);
 		IMPLY(mg == mg->mg_vd->vdev_log_mg,
-		    mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
+		    mc == spa_embedded_log_class(mg->mg_vd->vdev_spa) ||
+		    mc == spa_special_embedded_log_class(mg->mg_vd->vdev_spa));
 
 		mg->mg_histogram[i + ashift] -=
 		    msp->ms_sm->sm_phys->smp_histogram[i];
@@ -1330,7 +1345,7 @@ metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp)
 static void
 metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp)
 {
-	ASSERT(msp->ms_group == NULL);
+	ASSERT0P(msp->ms_group);
 	mutex_enter(&mg->mg_lock);
 	msp->ms_group = mg;
 	msp->ms_weight = 0;
@@ -2895,30 +2910,43 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object,
 	zfs_range_seg_type_t type =
 	    metaslab_calculate_range_tree_type(vd, ms, &start, &shift);
 
-	ms->ms_allocatable = zfs_range_tree_create(NULL, type, NULL, start,
-	    shift);
+	ms->ms_allocatable = zfs_range_tree_create_flags(
+	    NULL, type, NULL, start, shift,
+	    ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_allocatable"));
 	for (int t = 0; t < TXG_SIZE; t++) {
-		ms->ms_allocating[t] = zfs_range_tree_create(NULL, type,
-		    NULL, start, shift);
-	}
-	ms->ms_freeing = zfs_range_tree_create(NULL, type, NULL, start, shift);
-	ms->ms_freed = zfs_range_tree_create(NULL, type, NULL, start, shift);
+		ms->ms_allocating[t] = zfs_range_tree_create_flags(
+		    NULL, type, NULL, start, shift,
+		    ZFS_RT_F_DYN_NAME,
+		    metaslab_rt_name(mg, ms, "ms_allocating"));
+	}
+	ms->ms_freeing = zfs_range_tree_create_flags(
+	    NULL, type, NULL, start, shift,
+	    ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_freeing"));
+	ms->ms_freed = zfs_range_tree_create_flags(
+	    NULL, type, NULL, start, shift,
+	    ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_freed"));
 	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
-		ms->ms_defer[t] = zfs_range_tree_create(NULL, type, NULL,
-		    start, shift);
+		ms->ms_defer[t] = zfs_range_tree_create_flags(
+		    NULL, type, NULL, start, shift,
+		    ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_defer"));
 	}
-	ms->ms_checkpointing =
-	    zfs_range_tree_create(NULL, type, NULL, start, shift);
-	ms->ms_unflushed_allocs =
-	    zfs_range_tree_create(NULL, type, NULL, start, shift);
+	ms->ms_checkpointing = zfs_range_tree_create_flags(
+	    NULL, type, NULL, start, shift,
+	    ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_checkpointing"));
+	ms->ms_unflushed_allocs = zfs_range_tree_create_flags(
+	    NULL, type, NULL, start, shift,
+	    ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_unflushed_allocs"));
 
 	metaslab_rt_arg_t *mrap = kmem_zalloc(sizeof (*mrap), KM_SLEEP);
 	mrap->mra_bt = &ms->ms_unflushed_frees_by_size;
 	mrap->mra_floor_shift = metaslab_by_size_min_shift;
-	ms->ms_unflushed_frees = zfs_range_tree_create(&metaslab_rt_ops,
-	    type, mrap, start, shift);
+	ms->ms_unflushed_frees = zfs_range_tree_create_flags(
+	    &metaslab_rt_ops, type, mrap, start, shift,
+	    ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_unflushed_frees"));
 
-	ms->ms_trim = zfs_range_tree_create(NULL, type, NULL, start, shift);
+	ms->ms_trim = zfs_range_tree_create_flags(
+	    NULL, type, NULL, start, shift,
+	    ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_trim"));
 
 	metaslab_group_add(mg, ms);
 	metaslab_set_fragmentation(ms, B_FALSE);
@@ -2989,7 +3017,7 @@ metaslab_fini(metaslab_t *msp)
 	metaslab_group_remove(mg, msp);
 
 	mutex_enter(&msp->ms_lock);
-	VERIFY(msp->ms_group == NULL);
+	VERIFY0P(msp->ms_group);
 
 	/*
 	 * If this metaslab hasn't been through metaslab_sync_done() yet its
@@ -3892,7 +3920,10 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx)
 	type = metaslab_calculate_range_tree_type(msp->ms_group->mg_vd, msp,
 	    &start, &shift);
 
-	condense_tree = zfs_range_tree_create(NULL, type, NULL, start, shift);
+	condense_tree = zfs_range_tree_create_flags(
+	    NULL, type, NULL, start, shift,
+	    ZFS_RT_F_DYN_NAME,
+	    metaslab_rt_name(msp->ms_group, msp, "condense_tree"));
 
 	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
 		zfs_range_tree_walk(msp->ms_defer[t],
@@ -3949,8 +3980,10 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx)
 	 * followed by FREES (due to space_map_write() in metaslab_sync()) for
 	 * sync pass 1.
 	 */
-	zfs_range_tree_t *tmp_tree = zfs_range_tree_create(NULL, type, NULL,
-	    start, shift);
+	zfs_range_tree_t *tmp_tree = zfs_range_tree_create_flags(
+	    NULL, type, NULL, start, shift,
+	    ZFS_RT_F_DYN_NAME,
+	    metaslab_rt_name(msp->ms_group, msp, "tmp_tree"));
 	zfs_range_tree_add(tmp_tree, msp->ms_start, msp->ms_size);
 	space_map_write(sm, tmp_tree, SM_ALLOC, SM_NO_VDEVID, tx);
 	space_map_write(sm, msp->ms_allocatable, SM_FREE, SM_NO_VDEVID, tx);
@@ -5199,29 +5232,16 @@ next:
 
 		/*
 		 * We were unable to allocate from this metaslab so determine
-		 * a new weight for this metaslab. Now that we have loaded
-		 * the metaslab we can provide a better hint to the metaslab
-		 * selector.
-		 *
-		 * For space-based metaslabs, we use the maximum block size.
-		 * This information is only available when the metaslab
-		 * is loaded and is more accurate than the generic free
-		 * space weight that was calculated by metaslab_weight().
-		 * This information allows us to quickly compare the maximum
-		 * available allocation in the metaslab to the allocation
-		 * size being requested.
-		 *
-		 * For segment-based metaslabs, determine the new weight
-		 * based on the highest bucket in the range tree. We
-		 * explicitly use the loaded segment weight (i.e. the range
-		 * tree histogram) since it contains the space that is
-		 * currently available for allocation and is accurate
-		 * even within a sync pass.
+		 * a new weight for this metaslab. The weight was last
+		 * recalculated either when we loaded it (if this is the first
+		 * TXG it's been loaded in), or the last time a txg was synced
+		 * out.
 		 */
 		uint64_t weight;
 		if (WEIGHT_IS_SPACEBASED(msp->ms_weight)) {
-			weight = metaslab_largest_allocatable(msp);
-			WEIGHT_SET_SPACEBASED(weight);
+			metaslab_set_fragmentation(msp, B_TRUE);
+			weight = metaslab_space_weight(msp) &
+			    ~METASLAB_ACTIVE_MASK;
 		} else {
 			weight = metaslab_weight_from_range_tree(msp);
 		}
@@ -5233,13 +5253,6 @@ next:
 			 * For the case where we use the metaslab that is
 			 * active for another allocator we want to make
 			 * sure that we retain the activation mask.
-			 *
-			 * Note that we could attempt to use something like
-			 * metaslab_recalculate_weight_and_sort() that
-			 * retains the activation mask here. That function
-			 * uses metaslab_weight() to set the weight though
-			 * which is not as accurate as the calculations
-			 * above.
 			 */
 			weight |= msp->ms_weight & METASLAB_ACTIVE_MASK;
 			metaslab_group_sort(mg, msp, weight);
@@ -5590,7 +5603,21 @@ remap_blkptr_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
 	vdev_indirect_births_t *vib = oldvd->vdev_indirect_births;
 	uint64_t physical_birth = vdev_indirect_births_physbirth(vib,
 	    DVA_GET_OFFSET(&bp->blk_dva[0]), DVA_GET_ASIZE(&bp->blk_dva[0]));
-	BP_SET_PHYSICAL_BIRTH(bp, physical_birth);
+
+	/*
+	 * For rewritten blocks, use the old physical birth as the new logical
+	 * birth (representing when the space was allocated) and the removal
+	 * time as the new physical birth (representing when it was actually
+	 * written).
+	 */
+	if (BP_GET_REWRITE(bp)) {
+		uint64_t old_physical_birth = BP_GET_PHYSICAL_BIRTH(bp);
+		ASSERT3U(old_physical_birth, <, physical_birth);
+		BP_SET_BIRTH(bp, old_physical_birth, physical_birth);
+		BP_SET_REWRITE(bp, 0);
+	} else {
+		BP_SET_PHYSICAL_BIRTH(bp, physical_birth);
+	}
 
 	DVA_SET_VDEV(&bp->blk_dva[0], vd->vdev_id);
 	DVA_SET_OFFSET(&bp->blk_dva[0], offset);
@@ -5712,7 +5739,7 @@ metaslab_unalloc_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
 	ASSERT(!vd->vdev_removing);
 	ASSERT(vdev_is_concrete(vd));
 	ASSERT0(vd->vdev_indirect_config.vic_mapping_object);
-	ASSERT3P(vd->vdev_indirect_mapping, ==, NULL);
+	ASSERT0P(vd->vdev_indirect_mapping);
 
 	if (DVA_GET_GANG(dva))
 		size = vdev_gang_header_asize(vd);
@@ -5757,21 +5784,21 @@ metaslab_free_dva(spa_t *spa, const dva_t *dva, boolean_t checkpoint)
 }
 
 /*
- * Reserve some allocation slots. The reservation system must be called
- * before we call into the allocator. If there aren't any available slots
- * then the I/O will be throttled until an I/O completes and its slots are
- * freed up. The function returns true if it was successful in placing
- * the reservation.
+ * Reserve some space for a future allocation. The reservation system must be
+ * called before we call into the allocator. If there aren't enough space
+ * available, the calling I/O will be throttled until another I/O completes and
+ * its reservation is released. The function returns true if it was successful
+ * in placing the reservation.
  */
 boolean_t
-metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio,
-    boolean_t must, boolean_t *more)
+metaslab_class_throttle_reserve(metaslab_class_t *mc, int allocator,
+    int copies, uint64_t io_size, boolean_t must, boolean_t *more)
 {
-	metaslab_class_allocator_t *mca = &mc->mc_allocator[zio->io_allocator];
+	metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
 
 	ASSERT(mc->mc_alloc_throttle_enabled);
-	if (mc->mc_alloc_io_size < zio->io_size) {
-		mc->mc_alloc_io_size = zio->io_size;
+	if (mc->mc_alloc_io_size < io_size) {
+		mc->mc_alloc_io_size = io_size;
 		metaslab_class_balance(mc, B_FALSE);
 	}
 	if (must || mca->mca_reserved <= mc->mc_alloc_max) {
@@ -5782,10 +5809,9 @@ metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio,
 		 * worst that can happen is few more I/Os get to allocation
 		 * earlier, that is not a problem.
 		 */
-		int64_t delta = slots * zio->io_size;
+		int64_t delta = copies * io_size;
 		*more = (atomic_add_64_nv(&mca->mca_reserved, delta) <=
 		    mc->mc_alloc_max);
-		zio->io_flags |= ZIO_FLAG_IO_ALLOCATING;
 		return (B_TRUE);
 	}
 	*more = B_FALSE;
@@ -5793,13 +5819,13 @@ metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio,
 }
 
 boolean_t
-metaslab_class_throttle_unreserve(metaslab_class_t *mc, int slots,
-    zio_t *zio)
+metaslab_class_throttle_unreserve(metaslab_class_t *mc, int allocator,
+    int copies, uint64_t io_size)
 {
-	metaslab_class_allocator_t *mca = &mc->mc_allocator[zio->io_allocator];
+	metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
 
 	ASSERT(mc->mc_alloc_throttle_enabled);
-	int64_t delta = slots * zio->io_size;
+	int64_t delta = copies * io_size;
 	return (atomic_add_64_nv(&mca->mca_reserved, -delta) <=
 	    mc->mc_alloc_max);
 }
@@ -5960,7 +5986,7 @@ metaslab_alloc_range(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
 	int error = 0;
 
 	ASSERT0(BP_GET_LOGICAL_BIRTH(bp));
-	ASSERT0(BP_GET_PHYSICAL_BIRTH(bp));
+	ASSERT0(BP_GET_RAW_PHYSICAL_BIRTH(bp));
 
 	spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);
 
@@ -5971,16 +5997,16 @@ metaslab_alloc_range(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
 	}
 
 	ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa));
-	ASSERT(BP_GET_NDVAS(bp) == 0);
+	ASSERT0(BP_GET_NDVAS(bp));
 	ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp));
 	ASSERT3P(zal, !=, NULL);
 
-	uint64_t cur_psize = 0;
-
+	uint64_t smallest_psize = UINT64_MAX;
 	for (int d = 0; d < ndvas; d++) {
-		error = metaslab_alloc_dva_range(spa, mc, psize, max_psize,
-		    dva, d, hintdva, txg, flags, zal, allocator,
-		    actual_psize ? &cur_psize : NULL);
+		uint64_t cur_psize = 0;
+		error = metaslab_alloc_dva_range(spa, mc, psize,
+		    MIN(smallest_psize, max_psize), dva, d, hintdva, txg,
+		    flags, zal, allocator, actual_psize ? &cur_psize : NULL);
 		if (error != 0) {
 			for (d--; d >= 0; d--) {
 				metaslab_unalloc_dva(spa, &dva[d], txg);
@@ -6000,13 +6026,13 @@ metaslab_alloc_range(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
 			    DVA_GET_VDEV(&dva[d]), allocator, flags, psize,
 			    tag);
 			if (actual_psize)
-				max_psize = MIN(cur_psize, max_psize);
+				smallest_psize = MIN(cur_psize, smallest_psize);
 		}
 	}
-	ASSERT(error == 0);
+	ASSERT0(error);
 	ASSERT(BP_GET_NDVAS(bp) == ndvas);
 	if (actual_psize)
-		*actual_psize = max_psize;
+		*actual_psize = smallest_psize;
 
 	spa_config_exit(spa, SCL_ALLOC, FTAG);
 
@@ -6022,7 +6048,7 @@ metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
 	int ndvas = BP_GET_NDVAS(bp);
 
 	ASSERT(!BP_IS_HOLE(bp));
-	ASSERT(!now || BP_GET_LOGICAL_BIRTH(bp) >= spa_syncing_txg(spa));
+	ASSERT(!now || BP_GET_BIRTH(bp) >= spa_syncing_txg(spa));
 
 	/*
 	 * If we have a checkpoint for the pool we need to make sure that
@@ -6040,7 +6066,7 @@ metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
 	 * normally as they will be referenced by the checkpointed uberblock.
 	 */
 	boolean_t checkpoint = B_FALSE;
-	if (BP_GET_LOGICAL_BIRTH(bp) <= spa->spa_checkpoint_txg &&
+	if (BP_GET_BIRTH(bp) <= spa->spa_checkpoint_txg &&
 	    spa_syncing_txg(spa) > spa->spa_checkpoint_txg) {
 		/*
 		 * At this point, if the block is part of the checkpoint
diff --git a/sys/contrib/openzfs/module/zfs/mmp.c b/sys/contrib/openzfs/module/zfs/mmp.c
index f3665d29b8b4..7db72b9b04b0 100644
--- a/sys/contrib/openzfs/module/zfs/mmp.c
+++ b/sys/contrib/openzfs/module/zfs/mmp.c
@@ -260,7 +260,7 @@ mmp_thread_stop(spa_t *spa)
 	zfs_dbgmsg("MMP thread stopped pool '%s' gethrtime %llu",
 	    spa_name(spa), gethrtime());
 
-	ASSERT(mmp->mmp_thread == NULL);
+	ASSERT0P(mmp->mmp_thread);
 	mmp->mmp_thread_exiting = 0;
 }
 
diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c
index 373636c69254..ea2d2c7227c8 100644
--- a/sys/contrib/openzfs/module/zfs/range_tree.c
+++ b/sys/contrib/openzfs/module/zfs/range_tree.c
@@ -201,10 +201,10 @@ ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg64_find_in_buf, zfs_range_seg64_t,
 ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg_gap_find_in_buf,
     zfs_range_seg_gap_t, zfs_range_tree_seg_gap_compare)
 
-zfs_range_tree_t *
-zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops,
+static zfs_range_tree_t *
+zfs_range_tree_create_impl(const zfs_range_tree_ops_t *ops,
     zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
-    uint64_t gap)
+    uint64_t gap, uint64_t flags, const char *name)
 {
 	zfs_range_tree_t *rt = kmem_zalloc(sizeof (zfs_range_tree_t), KM_SLEEP);
 
@@ -236,6 +236,8 @@ zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops,
 
 	rt->rt_ops = ops;
 	rt->rt_gap = gap;
+	rt->rt_flags = flags;
+	rt->rt_name = name;
 	rt->rt_arg = arg;
 	rt->rt_type = type;
 	rt->rt_start = start;
@@ -248,10 +250,29 @@ zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops,
 }
 
 zfs_range_tree_t *
+zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops,
+    zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
+    uint64_t gap)
+{
+	return (zfs_range_tree_create_impl(ops, type, arg, start, shift, gap,
+	    0, NULL));
+}
+
+zfs_range_tree_t *
 zfs_range_tree_create(const zfs_range_tree_ops_t *ops,
     zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift)
 {
-	return (zfs_range_tree_create_gap(ops, type, arg, start, shift, 0));
+	return (zfs_range_tree_create_impl(ops, type, arg, start, shift, 0,
+	    0, NULL));
+}
+
+zfs_range_tree_t *
+zfs_range_tree_create_flags(const zfs_range_tree_ops_t *ops,
+    zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
+    uint64_t flags, const char *name)
+{
+	return (zfs_range_tree_create_impl(ops, type, arg, start, shift, 0,
+	    flags, name));
 }
 
 void
@@ -262,6 +283,9 @@ zfs_range_tree_destroy(zfs_range_tree_t *rt)
 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL)
 		rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
 
+	if (rt->rt_name != NULL && (rt->rt_flags & ZFS_RT_F_DYN_NAME))
+		kmem_strfree((char *)(uintptr_t)rt->rt_name);
+
 	zfs_btree_destroy(&rt->rt_root);
 	kmem_free(rt, sizeof (*rt));
 }
@@ -271,15 +295,17 @@ zfs_range_tree_adjust_fill(zfs_range_tree_t *rt, zfs_range_seg_t *rs,
     int64_t delta)
 {
 	if (delta < 0 && delta * -1 >= zfs_rs_get_fill(rs, rt)) {
-		zfs_panic_recover("zfs: attempting to decrease fill to or "
-		    "below 0; probable double remove in segment [%llx:%llx]",
+		zfs_panic_recover("zfs: rt=%s: attempting to decrease fill to "
+		    "or below 0; probable double remove in segment [%llx:%llx]",
+		    ZFS_RT_NAME(rt),
 		    (longlong_t)zfs_rs_get_start(rs, rt),
 		    (longlong_t)zfs_rs_get_end(rs, rt));
 	}
 	if (zfs_rs_get_fill(rs, rt) + delta > zfs_rs_get_end(rs, rt) -
 	    zfs_rs_get_start(rs, rt)) {
-		zfs_panic_recover("zfs: attempting to increase fill beyond "
-		    "max; probable double add in segment [%llx:%llx]",
+		zfs_panic_recover("zfs: rt=%s: attempting to increase fill "
+		    "beyond max; probable double add in segment [%llx:%llx]",
+		    ZFS_RT_NAME(rt),
 		    (longlong_t)zfs_rs_get_start(rs, rt),
 		    (longlong_t)zfs_rs_get_end(rs, rt));
 	}
@@ -319,14 +345,17 @@ zfs_range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
 	 * the normal code paths.
 	 */
 	if (rs != NULL) {
+		uint64_t rstart = zfs_rs_get_start(rs, rt);
+		uint64_t rend = zfs_rs_get_end(rs, rt);
 		if (gap == 0) {
-			zfs_panic_recover("zfs: adding existent segment to "
-			    "range tree (offset=%llx size=%llx)",
-			    (longlong_t)start, (longlong_t)size);
+			zfs_panic_recover("zfs: rt=%s: adding segment "
+			    "(offset=%llx size=%llx) overlapping with existing "
+			    "one (offset=%llx size=%llx)",
+			    ZFS_RT_NAME(rt),
+			    (longlong_t)start, (longlong_t)size,
+			    (longlong_t)rstart, (longlong_t)(rend - rstart));
 			return;
 		}
-		uint64_t rstart = zfs_rs_get_start(rs, rt);
-		uint64_t rend = zfs_rs_get_end(rs, rt);
 		if (rstart <= start && rend >= end) {
 			zfs_range_tree_adjust_fill(rt, rs, fill);
 			return;
@@ -348,7 +377,7 @@ zfs_range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
 		return;
 	}
 
-	ASSERT3P(rs, ==, NULL);
+	ASSERT0P(rs);
 
 	/*
 	 * Determine whether or not we will have to merge with our neighbors.
@@ -451,6 +480,7 @@ zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size,
 	zfs_range_seg_t *rs;
 	zfs_range_seg_max_t rsearch, rs_tmp;
 	uint64_t end = start + size;
+	uint64_t rstart, rend;
 	boolean_t left_over, right_over;
 
 	VERIFY3U(size, !=, 0);
@@ -464,12 +494,15 @@ zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size,
 
 	/* Make sure we completely overlap with someone */
 	if (rs == NULL) {
-		zfs_panic_recover("zfs: removing nonexistent segment from "
-		    "range tree (offset=%llx size=%llx)",
-		    (longlong_t)start, (longlong_t)size);
+		zfs_panic_recover("zfs: rt=%s: removing nonexistent segment "
+		    "from range tree (offset=%llx size=%llx)",
+		    ZFS_RT_NAME(rt), (longlong_t)start, (longlong_t)size);
 		return;
 	}
 
+	rstart = zfs_rs_get_start(rs, rt);
+	rend = zfs_rs_get_end(rs, rt);
+
 	/*
 	 * Range trees with gap support must only remove complete segments
 	 * from the tree. This allows us to maintain accurate fill accounting
@@ -479,31 +512,36 @@ zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size,
 	if (rt->rt_gap != 0) {
 		if (do_fill) {
 			if (zfs_rs_get_fill(rs, rt) == size) {
-				start = zfs_rs_get_start(rs, rt);
-				end = zfs_rs_get_end(rs, rt);
+				start = rstart;
+				end = rend;
 				size = end - start;
 			} else {
 				zfs_range_tree_adjust_fill(rt, rs, -size);
 				return;
 			}
-		} else if (zfs_rs_get_start(rs, rt) != start ||
-		    zfs_rs_get_end(rs, rt) != end) {
-			zfs_panic_recover("zfs: freeing partial segment of "
-			    "gap tree (offset=%llx size=%llx) of "
+		} else if (rstart != start || rend != end) {
+			zfs_panic_recover("zfs: rt=%s: freeing partial segment "
+			    "of gap tree (offset=%llx size=%llx) of "
 			    "(offset=%llx size=%llx)",
+			    ZFS_RT_NAME(rt),
 			    (longlong_t)start, (longlong_t)size,
-			    (longlong_t)zfs_rs_get_start(rs, rt),
-			    (longlong_t)zfs_rs_get_end(rs, rt) -
-			    zfs_rs_get_start(rs, rt));
+			    (longlong_t)rstart, (longlong_t)(rend - rstart));
 			return;
 		}
 	}
 
-	VERIFY3U(zfs_rs_get_start(rs, rt), <=, start);
-	VERIFY3U(zfs_rs_get_end(rs, rt), >=, end);
+	if (!(rstart <= start && rend >= end)) {
+		panic("zfs: rt=%s: removing segment "
+		    "(offset=%llx size=%llx) not completely overlapped by "
+		    "existing one (offset=%llx size=%llx)",
+		    ZFS_RT_NAME(rt),
+		    (longlong_t)start, (longlong_t)size,
+		    (longlong_t)rstart, (longlong_t)(rend - rstart));
+		return;
+	}
 
-	left_over = (zfs_rs_get_start(rs, rt) != start);
-	right_over = (zfs_rs_get_end(rs, rt) != end);
+	left_over = (rstart != start);
+	right_over = (rend != end);
 
 	zfs_range_tree_stat_decr(rt, rs);
 
@@ -829,7 +867,7 @@ zfs_range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
 
 		next = zfs_btree_next(&removefrom->rt_root, &where, &where);
 	}
-	VERIFY3P(curr, ==, NULL);
+	VERIFY0P(curr);
 
 	if (start != end) {
 		VERIFY3U(start, <, end);
diff --git a/sys/contrib/openzfs/module/zfs/rrwlock.c b/sys/contrib/openzfs/module/zfs/rrwlock.c
index 8ee784619839..d0df39b93560 100644
--- a/sys/contrib/openzfs/module/zfs/rrwlock.c
+++ b/sys/contrib/openzfs/module/zfs/rrwlock.c
@@ -108,7 +108,7 @@ rrn_add(rrwlock_t *rrl, const void *tag)
 	rn->rn_rrl = rrl;
 	rn->rn_next = tsd_get(rrw_tsd_key);
 	rn->rn_tag = tag;
-	VERIFY(tsd_set(rrw_tsd_key, rn) == 0);
+	VERIFY0(tsd_set(rrw_tsd_key, rn));
 }
 
 /*
@@ -129,7 +129,7 @@ rrn_find_and_remove(rrwlock_t *rrl, const void *tag)
 			if (prev)
 				prev->rn_next = rn->rn_next;
 			else
-				VERIFY(tsd_set(rrw_tsd_key, rn->rn_next) == 0);
+				VERIFY0(tsd_set(rrw_tsd_key, rn->rn_next));
 			kmem_free(rn, sizeof (*rn));
 			return (B_TRUE);
 		}
@@ -155,7 +155,7 @@ rrw_destroy(rrwlock_t *rrl)
 {
 	mutex_destroy(&rrl->rr_lock);
 	cv_destroy(&rrl->rr_cv);
-	ASSERT(rrl->rr_writer == NULL);
+	ASSERT0P(rrl->rr_writer);
 	zfs_refcount_destroy(&rrl->rr_anon_rcount);
 	zfs_refcount_destroy(&rrl->rr_linked_rcount);
 }
@@ -188,7 +188,7 @@ rrw_enter_read_impl(rrwlock_t *rrl, boolean_t prio, const void *tag)
 	} else {
 		(void) zfs_refcount_add(&rrl->rr_anon_rcount, tag);
 	}
-	ASSERT(rrl->rr_writer == NULL);
+	ASSERT0P(rrl->rr_writer);
 	mutex_exit(&rrl->rr_lock);
 }
 
diff --git a/sys/contrib/openzfs/module/zfs/sa.c b/sys/contrib/openzfs/module/zfs/sa.c
index 5db470ce6242..7ad25d4d85ba 100644
--- a/sys/contrib/openzfs/module/zfs/sa.c
+++ b/sys/contrib/openzfs/module/zfs/sa.c
@@ -304,7 +304,7 @@ sa_get_spill(sa_handle_t *hdl)
 	if (hdl->sa_spill == NULL) {
 		if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL,
 		    &hdl->sa_spill)) == 0)
-			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
+			VERIFY0(sa_build_index(hdl, SA_SPILL));
 	} else {
 		rc = 0;
 	}
@@ -432,7 +432,7 @@ sa_add_layout_entry(objset_t *os, const sa_attr_type_t *attrs, int attr_count,
 
 		(void) snprintf(attr_name, sizeof (attr_name),
 		    "%d", (int)lot_num);
-		VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj,
+		VERIFY0(zap_update(os, os->os_sa->sa_layout_attr_obj,
 		    attr_name, 2, attr_count, attrs, tx));
 	}
 
@@ -505,7 +505,7 @@ sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx)
 	}
 
 	error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx);
-	ASSERT(error == 0);
+	ASSERT0(error);
 	return (error);
 }
 
@@ -717,7 +717,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
 
 		if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
 		    hdl->sa_spill->db_size)
-			VERIFY(0 == sa_resize_spill(hdl,
+			VERIFY0(sa_resize_spill(hdl,
 			    BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
 	}
 
@@ -791,7 +791,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
 		hdl->sa_bonus_tab = NULL;
 	}
 	if (!sa->sa_force_spill)
-		VERIFY(0 == sa_build_index(hdl, SA_BONUS));
+		VERIFY0(sa_build_index(hdl, SA_BONUS));
 	if (hdl->sa_spill) {
 		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
 		if (!spilling) {
@@ -801,10 +801,10 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
 			dmu_buf_rele(hdl->sa_spill, NULL);
 			hdl->sa_spill = NULL;
 			hdl->sa_spill_tab = NULL;
-			VERIFY(0 == dmu_rm_spill(hdl->sa_os,
+			VERIFY0(dmu_rm_spill(hdl->sa_os,
 			    sa_handle_object(hdl), tx));
 		} else {
-			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
+			VERIFY0(sa_build_index(hdl, SA_SPILL));
 		}
 	}
 
@@ -1733,10 +1733,10 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid)
 		    NULL, dxattr_obj, dxattr_size);
 	}
 
-	VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
-	VERIFY(sa_replace_all_by_template_locked(hdl, attrs, count, tx) == 0);
+	VERIFY0(dmu_set_bonustype(db, DMU_OT_SA, tx));
+	VERIFY0(sa_replace_all_by_template_locked(hdl, attrs, count, tx));
 	if (znode_acl.z_acl_extern_obj) {
-		VERIFY(0 == dmu_object_free(zfsvfs->z_os,
+		VERIFY0(dmu_object_free(zfsvfs->z_os,
 		    znode_acl.z_acl_extern_obj, tx));
 	}
 
@@ -1858,7 +1858,7 @@ sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
 			continue;
 		ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length,
 		    tb[i].sa_byteswap);
-		VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj,
+		VERIFY0(zap_update(hdl->sa_os, sa->sa_reg_attr_obj,
 		    tb[i].sa_name, 8, 1, &attr_value, tx));
 		tb[i].sa_registered = B_TRUE;
 	}
@@ -2013,7 +2013,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
 				 * Only a variable-sized attribute can be
 				 * replaced here, and its size must be changing.
 				 */
-				ASSERT3U(reg_length, ==, 0);
+				ASSERT0(reg_length);
 				ASSERT3U(length, !=, buflen);
 				SA_ADD_BULK_ATTR(attr_desc, j, attr,
 				    locator, datastart, buflen);
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index 6b52c6cb1f9e..b3bb46da263b 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -100,6 +100,7 @@
 #include <sys/vmsystm.h>
 #endif	/* _KERNEL */
 
+#include "zfs_crrd.h"
 #include "zfs_prop.h"
 #include "zfs_comutil.h"
 #include <cityhash.h>
@@ -311,6 +312,41 @@ static int zfs_livelist_condense_zthr_cancel = 0;
 static int zfs_livelist_condense_new_alloc = 0;
 
 /*
+ * Time variable to decide how often the txg should be added into the
+ * database (in seconds).
+ * The smallest available resolution is in minutes, which means an update occurs
+ * each time we reach `spa_note_txg_time` and the txg has changed. We provide
+ * a 256-slot ring buffer for minute-level resolution. The number is limited by
+ * the size of the structure we use and the maximum amount of bytes we can write
+ * into ZAP. Setting `spa_note_txg_time` to 10 minutes results in approximately
+ * 144 records per day. Given the 256 slots, this provides roughly 1.5 days of
+ * high-resolution data.
+ *
+ * The user can decrease `spa_note_txg_time` to increase resolution within
+ * a day, at the cost of retaining fewer days of data. Alternatively, increasing
+ * the interval allows storing data over a longer period, but with lower
+ * frequency.
+ *
+ * This parameter does not affect the daily or monthly databases, as those only
+ * store one record per day and per month, respectively.
+ */
+static uint_t spa_note_txg_time = 10 * 60;
+
+/*
+ * How often flush txg database to a disk (in seconds).
+ * We flush data every time we write to it, making it the most reliable option.
+ * Since this happens every 10 minutes, it shouldn't introduce any noticeable
+ * overhead for the system. In case of failure, we will always have an
+ * up-to-date version of the database.
+ *
+ * The user can adjust the flush interval to a lower value, but it probably
+ * doesn't make sense to flush more often than the database is updated.
+ * The user can also increase the interval if they're concerned about the
+ * performance of writing the entire database to disk.
+ */
+static uint_t spa_flush_txg_time = 10 * 60;
+
+/*
  * ==========================================================================
  * SPA properties routines
  * ==========================================================================
@@ -390,10 +426,10 @@ spa_prop_add_user(nvlist_t *nvl, const char *propname, char *strval,
 {
 	nvlist_t *propval;
 
-	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
-	VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
-	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
+	VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP));
+	VERIFY0(nvlist_add_uint64(propval, ZPROP_SOURCE, src));
+	VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, strval));
+	VERIFY0(nvlist_add_nvlist(nvl, propname, propval));
 	nvlist_free(propval);
 }
 
@@ -417,11 +453,15 @@ spa_prop_get_config(spa_t *spa, nvlist_t *nv)
 		alloc += metaslab_class_get_alloc(spa_special_class(spa));
 		alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
 		alloc += metaslab_class_get_alloc(spa_embedded_log_class(spa));
+		alloc += metaslab_class_get_alloc(
+		    spa_special_embedded_log_class(spa));
 
 		size = metaslab_class_get_space(mc);
 		size += metaslab_class_get_space(spa_special_class(spa));
 		size += metaslab_class_get_space(spa_dedup_class(spa));
 		size += metaslab_class_get_space(spa_embedded_log_class(spa));
+		size += metaslab_class_get_space(
+		    spa_special_embedded_log_class(spa));
 
 		spa_prop_add_list(nv, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
 		spa_prop_add_list(nv, ZPOOL_PROP_SIZE, NULL, size, src);
@@ -925,7 +965,7 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
 			uint64_t ver = 0;
 
 			if (prop == ZPOOL_PROP_VERSION) {
-				VERIFY(nvpair_value_uint64(elem, &ver) == 0);
+				VERIFY0(nvpair_value_uint64(elem, &ver));
 			} else {
 				ASSERT(zpool_prop_feature(nvpair_name(elem)));
 				ver = SPA_VERSION_FEATURES;
@@ -1255,7 +1295,7 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
 	spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
 
 	if (tqs->stqs_taskq == NULL) {
-		ASSERT3U(tqs->stqs_count, ==, 0);
+		ASSERT0(tqs->stqs_count);
 		return;
 	}
 
@@ -1679,6 +1719,8 @@ spa_activate(spa_t *spa, spa_mode_t mode)
 	    "embedded_log", msp, B_TRUE);
 	spa->spa_special_class = metaslab_class_create(spa, "special",
 	    msp, B_FALSE);
+	spa->spa_special_embedded_log_class = metaslab_class_create(spa,
+	    "special_embedded_log", msp, B_TRUE);
 	spa->spa_dedup_class = metaslab_class_create(spa, "dedup",
 	    msp, B_FALSE);
 
@@ -1794,9 +1836,9 @@ static void
 spa_deactivate(spa_t *spa)
 {
 	ASSERT(spa->spa_sync_on == B_FALSE);
-	ASSERT(spa->spa_dsl_pool == NULL);
-	ASSERT(spa->spa_root_vdev == NULL);
-	ASSERT(spa->spa_async_zio_root == NULL);
+	ASSERT0P(spa->spa_dsl_pool);
+	ASSERT0P(spa->spa_root_vdev);
+	ASSERT0P(spa->spa_async_zio_root);
 	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
 
 	spa_evicting_os_wait(spa);
@@ -1853,6 +1895,9 @@ spa_deactivate(spa_t *spa)
 	metaslab_class_destroy(spa->spa_special_class);
 	spa->spa_special_class = NULL;
 
+	metaslab_class_destroy(spa->spa_special_embedded_log_class);
+	spa->spa_special_embedded_log_class = NULL;
+
 	metaslab_class_destroy(spa->spa_dedup_class);
 	spa->spa_dedup_class = NULL;
 
@@ -1976,7 +2021,7 @@ spa_unload_log_sm_flush_all(spa_t *spa)
 	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 	VERIFY0(dmu_tx_assign(tx, DMU_TX_WAIT | DMU_TX_SUSPEND));
 
-	ASSERT3U(spa->spa_log_flushall_txg, ==, 0);
+	ASSERT0(spa->spa_log_flushall_txg);
 	spa->spa_log_flushall_txg = dmu_tx_get_txg(tx);
 
 	dmu_tx_commit(tx);
@@ -2031,6 +2076,111 @@ spa_destroy_aux_threads(spa_t *spa)
 	}
 }
 
+static void
+spa_sync_time_logger(spa_t *spa, uint64_t txg)
+{
+	uint64_t curtime;
+	dmu_tx_t *tx;
+
+	if (!spa_writeable(spa)) {
+		return;
+	}
+	curtime = gethrestime_sec();
+	if (curtime < spa->spa_last_noted_txg_time + spa_note_txg_time) {
+		return;
+	}
+
+	if (txg > spa->spa_last_noted_txg) {
+		spa->spa_last_noted_txg_time = curtime;
+		spa->spa_last_noted_txg = txg;
+
+		mutex_enter(&spa->spa_txg_log_time_lock);
+		dbrrd_add(&spa->spa_txg_log_time, curtime, txg);
+		mutex_exit(&spa->spa_txg_log_time_lock);
+	}
+
+	if (curtime < spa->spa_last_flush_txg_time + spa_flush_txg_time) {
+		return;
+	}
+	spa->spa_last_flush_txg_time = curtime;
+
+	tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
+
+	VERIFY0(zap_update(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_TXG_LOG_TIME_MINUTES, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+	    &spa->spa_txg_log_time.dbr_minutes, tx));
+	VERIFY0(zap_update(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_TXG_LOG_TIME_DAYS, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+	    &spa->spa_txg_log_time.dbr_days, tx));
+	VERIFY0(zap_update(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_TXG_LOG_TIME_MONTHS, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+	    &spa->spa_txg_log_time.dbr_months, tx));
+	dmu_tx_commit(tx);
+}
+
+static void
+spa_unload_sync_time_logger(spa_t *spa)
+{
+	uint64_t txg;
+	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+	VERIFY0(dmu_tx_assign(tx, DMU_TX_WAIT));
+
+	txg = dmu_tx_get_txg(tx);
+	spa->spa_last_noted_txg_time = 0;
+	spa->spa_last_flush_txg_time = 0;
+	spa_sync_time_logger(spa, txg);
+
+	dmu_tx_commit(tx);
+}
+
+static void
+spa_load_txg_log_time(spa_t *spa)
+{
+	int error;
+
+	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_TXG_LOG_TIME_MINUTES, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+	    &spa->spa_txg_log_time.dbr_minutes);
+	if (error != 0 && error != ENOENT) {
+		spa_load_note(spa, "unable to load a txg time database with "
+		    "minute resolution [error=%d]", error);
+	}
+	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_TXG_LOG_TIME_DAYS, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+	    &spa->spa_txg_log_time.dbr_days);
+	if (error != 0 && error != ENOENT) {
+		spa_load_note(spa, "unable to load a txg time database with "
+		    "day resolution [error=%d]", error);
+	}
+	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_TXG_LOG_TIME_MONTHS, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+	    &spa->spa_txg_log_time.dbr_months);
+	if (error != 0 && error != ENOENT) {
+		spa_load_note(spa, "unable to load a txg time database with "
+		    "month resolution [error=%d]", error);
+	}
+}
+
+static boolean_t
+spa_should_sync_time_logger_on_unload(spa_t *spa)
+{
+
+	if (!spa_writeable(spa))
+		return (B_FALSE);
+
+	if (!spa->spa_sync_on)
+		return (B_FALSE);
+
+	if (spa_state(spa) != POOL_STATE_EXPORTED)
+		return (B_FALSE);
+
+	if (spa->spa_last_noted_txg == 0)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+
 /*
  * Opposite of spa_load().
  */
@@ -2052,6 +2202,9 @@ spa_unload(spa_t *spa)
 	 * we delay the final TXGs beyond what spa_final_txg is set at.
 	 */
 	if (spa->spa_final_txg == UINT64_MAX) {
+		if (spa_should_sync_time_logger_on_unload(spa))
+			spa_unload_sync_time_logger(spa);
+
 		/*
 		 * If the log space map feature is enabled and the pool is
 		 * getting exported (but not destroyed), we want to spend some
@@ -2127,7 +2280,7 @@ spa_unload(spa_t *spa)
 	 */
 	if (spa->spa_root_vdev)
 		vdev_free(spa->spa_root_vdev);
-	ASSERT(spa->spa_root_vdev == NULL);
+	ASSERT0P(spa->spa_root_vdev);
 
 	/*
 	 * Close the dsl pool.
@@ -2265,8 +2418,8 @@ spa_load_spares(spa_t *spa)
 	spa->spa_spares.sav_vdevs = kmem_zalloc(nspares * sizeof (void *),
 	    KM_SLEEP);
 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
-		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
-		    VDEV_ALLOC_SPARE) == 0);
+		VERIFY0(spa_config_parse(spa, &vd, spares[i], NULL, 0,
+		    VDEV_ALLOC_SPARE));
 		ASSERT(vd != NULL);
 
 		spa->spa_spares.sav_vdevs[i] = vd;
@@ -2393,8 +2546,8 @@ spa_load_l2cache(spa_t *spa)
 			/*
 			 * Create new vdev
 			 */
-			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
-			    VDEV_ALLOC_L2CACHE) == 0);
+			VERIFY0(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
+			    VDEV_ALLOC_L2CACHE));
 			ASSERT(vd != NULL);
 			newvdevs[i] = vd;
 
@@ -2646,7 +2799,7 @@ spa_passivate_log(spa_t *spa)
 		vdev_t *tvd = rvd->vdev_child[c];
 
 		if (tvd->vdev_islog) {
-			ASSERT3P(tvd->vdev_log_mg, ==, NULL);
+			ASSERT0P(tvd->vdev_log_mg);
 			metaslab_group_passivate(tvd->vdev_mg);
 			slog_found = B_TRUE;
 		}
@@ -2669,7 +2822,7 @@ spa_activate_log(spa_t *spa)
 		vdev_t *tvd = rvd->vdev_child[c];
 
 		if (tvd->vdev_islog) {
-			ASSERT3P(tvd->vdev_log_mg, ==, NULL);
+			ASSERT0P(tvd->vdev_log_mg);
 			metaslab_group_activate(tvd->vdev_mg);
 		}
 	}
@@ -2709,8 +2862,8 @@ spa_claim_notify(zio_t *zio)
 		return;
 
 	mutex_enter(&spa->spa_props_lock);	/* any mutex will do */
-	if (spa->spa_claim_max_txg < BP_GET_LOGICAL_BIRTH(zio->io_bp))
-		spa->spa_claim_max_txg = BP_GET_LOGICAL_BIRTH(zio->io_bp);
+	if (spa->spa_claim_max_txg < BP_GET_BIRTH(zio->io_bp))
+		spa->spa_claim_max_txg = BP_GET_BIRTH(zio->io_bp);
 	mutex_exit(&spa->spa_props_lock);
 }
 
@@ -3106,7 +3259,7 @@ spa_livelist_delete_cb(void *arg, zthr_t *z)
 static void
 spa_start_livelist_destroy_thread(spa_t *spa)
 {
-	ASSERT3P(spa->spa_livelist_delete_zthr, ==, NULL);
+	ASSERT0P(spa->spa_livelist_delete_zthr);
 	spa->spa_livelist_delete_zthr =
 	    zthr_create("z_livelist_destroy",
 	    spa_livelist_delete_cb_check, spa_livelist_delete_cb, spa,
@@ -3122,7 +3275,7 @@ static int
 livelist_track_new_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
     dmu_tx_t *tx)
 {
-	ASSERT(tx == NULL);
+	ASSERT0P(tx);
 	livelist_new_arg_t *lna = arg;
 	if (bp_freed) {
 		bplist_append(lna->frees, bp);
@@ -3316,7 +3469,7 @@ spa_start_livelist_condensing_thread(spa_t *spa)
 	spa->spa_to_condense.syncing = B_FALSE;
 	spa->spa_to_condense.cancelled = B_FALSE;
 
-	ASSERT3P(spa->spa_livelist_condense_zthr, ==, NULL);
+	ASSERT0P(spa->spa_livelist_condense_zthr);
 	spa->spa_livelist_condense_zthr =
 	    zthr_create("z_livelist_condense",
 	    spa_livelist_condense_cb_check,
@@ -3333,7 +3486,7 @@ spa_spawn_aux_threads(spa_t *spa)
 	spa_start_livelist_destroy_thread(spa);
 	spa_start_livelist_condensing_thread(spa);
 
-	ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL);
+	ASSERT0P(spa->spa_checkpoint_discard_zthr);
 	spa->spa_checkpoint_discard_zthr =
 	    zthr_create("z_checkpoint_discard",
 	    spa_checkpoint_discard_thread_check,
@@ -3768,20 +3921,17 @@ out:
 	 * ZPOOL_CONFIG_MMP_HOSTID   - hostid from the active pool
 	 */
 	if (error == EREMOTEIO) {
-		const char *hostname = "<unknown>";
-		uint64_t hostid = 0;
-
 		if (mmp_label) {
 			if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTNAME)) {
-				hostname = fnvlist_lookup_string(mmp_label,
-				    ZPOOL_CONFIG_HOSTNAME);
+				const char *hostname = fnvlist_lookup_string(
+				    mmp_label, ZPOOL_CONFIG_HOSTNAME);
 				fnvlist_add_string(spa->spa_load_info,
 				    ZPOOL_CONFIG_MMP_HOSTNAME, hostname);
 			}
 
 			if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTID)) {
-				hostid = fnvlist_lookup_uint64(mmp_label,
-				    ZPOOL_CONFIG_HOSTID);
+				uint64_t hostid = fnvlist_lookup_uint64(
+				    mmp_label, ZPOOL_CONFIG_HOSTID);
 				fnvlist_add_uint64(spa->spa_load_info,
 				    ZPOOL_CONFIG_MMP_HOSTID, hostid);
 			}
@@ -3941,11 +4091,11 @@ spa_ld_parse_config(spa_t *spa, spa_import_type_t type)
 	nvlist_free(spa->spa_load_info);
 	spa->spa_load_info = fnvlist_alloc();
 
-	ASSERT(spa->spa_comment == NULL);
+	ASSERT0P(spa->spa_comment);
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
 		spa->spa_comment = spa_strdup(comment);
 
-	ASSERT(spa->spa_compatibility == NULL);
+	ASSERT0P(spa->spa_compatibility);
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMPATIBILITY,
 	    &compatibility) == 0)
 		spa->spa_compatibility = spa_strdup(compatibility);
@@ -4711,6 +4861,9 @@ spa_ld_get_props(spa_t *spa)
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
+	/* Load time log */
+	spa_load_txg_log_time(spa);
+
 	/*
 	 * Load the persistent error log.  If we have an older pool, this will
 	 * not be present.
@@ -5760,7 +5913,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
 		nvlist_free(config);
 
 	if (state == SPA_LOAD_RECOVER) {
-		ASSERT3P(loadinfo, ==, NULL);
+		ASSERT0P(loadinfo);
 		spa_import_progress_remove(spa_guid(spa));
 		return (rewind_error);
 	} else {
@@ -5899,7 +6052,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag,
 	}
 
 	if (firstopen)
-		zvol_create_minors_recursive(spa_name(spa));
+		zvol_create_minors(spa_name(spa));
 
 	*spapp = spa;
 
@@ -6877,7 +7030,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
 
 	mutex_exit(&spa_namespace_lock);
 
-	zvol_create_minors_recursive(pool);
+	zvol_create_minors(pool);
 
 	spa_import_os(spa);
 
@@ -7134,6 +7287,9 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
 			spa_config_exit(spa, SCL_ALL, FTAG);
 		}
 
+		if (spa_should_sync_time_logger_on_unload(spa))
+			spa_unload_sync_time_logger(spa);
+
 		/*
 		 * If the log space map feature is enabled and the pool is
 		 * getting exported (but not destroyed), we want to spend some
@@ -8935,7 +9091,7 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
 int
 spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd)
 {
-	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+	ASSERT0(spa_config_held(spa, SCL_ALL, RW_WRITER));
 
 	if (dsl_scan_resilvering(spa->spa_dsl_pool))
 		return (SET_ERROR(EBUSY));
@@ -8946,7 +9102,7 @@ spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd)
 int
 spa_scan_stop(spa_t *spa)
 {
-	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+	ASSERT0(spa_config_held(spa, SCL_ALL, RW_WRITER));
 	if (dsl_scan_resilvering(spa->spa_dsl_pool))
 		return (SET_ERROR(EBUSY));
 
@@ -8963,7 +9119,7 @@ int
 spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart,
     uint64_t txgend)
 {
-	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+	ASSERT0(spa_config_held(spa, SCL_ALL, RW_WRITER));
 
 	if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE)
 		return (SET_ERROR(ENOTSUP));
@@ -9092,6 +9248,8 @@ spa_async_thread(void *arg)
 		old_space += metaslab_class_get_space(spa_dedup_class(spa));
 		old_space += metaslab_class_get_space(
 		    spa_embedded_log_class(spa));
+		old_space += metaslab_class_get_space(
+		    spa_special_embedded_log_class(spa));
 
 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
 
@@ -9100,6 +9258,8 @@ spa_async_thread(void *arg)
 		new_space += metaslab_class_get_space(spa_dedup_class(spa));
 		new_space += metaslab_class_get_space(
 		    spa_embedded_log_class(spa));
+		new_space += metaslab_class_get_space(
+		    spa_special_embedded_log_class(spa));
 		mutex_exit(&spa_namespace_lock);
 
 		/*
@@ -9388,7 +9548,7 @@ spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx)
 {
 	zio_t *zio = zio_root(spa, NULL, NULL, 0);
 	bplist_iterate(bpl, spa_free_sync_cb, zio, tx);
-	VERIFY(zio_wait(zio) == 0);
+	VERIFY0(zio_wait(zio));
 }
 
 /*
@@ -9427,7 +9587,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
 	size_t nvsize = 0;
 	dmu_buf_t *db;
 
-	VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
+	VERIFY0(nvlist_size(nv, &nvsize, NV_ENCODE_XDR));
 
 	/*
 	 * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
@@ -9437,15 +9597,15 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
 	bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
 	packed = vmem_alloc(bufsize, KM_SLEEP);
 
-	VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
-	    KM_SLEEP) == 0);
+	VERIFY0(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
+	    KM_SLEEP));
 	memset(packed + nvsize, 0, bufsize - nvsize);
 
 	dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
 
 	vmem_free(packed, bufsize);
 
-	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
+	VERIFY0(dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
 	dmu_buf_will_dirty(db, tx);
 	*(uint64_t *)db->db_data = nvsize;
 	dmu_buf_rele(db, FTAG);
@@ -10180,6 +10340,8 @@ spa_sync(spa_t *spa, uint64_t txg)
 	 */
 	brt_pending_apply(spa, txg);
 
+	spa_sync_time_logger(spa, txg);
+
 	/*
 	 * Lock out configuration changes.
 	 */
@@ -10222,6 +10384,7 @@ spa_sync(spa_t *spa, uint64_t txg)
 	dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
 
 	spa->spa_sync_starttime = gethrtime();
+
 	taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
 	spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
 	    spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
@@ -10309,7 +10472,7 @@ spa_sync(spa_t *spa, uint64_t txg)
 
 	metaslab_class_evict_old(spa->spa_normal_class, txg);
 	metaslab_class_evict_old(spa->spa_log_class, txg);
-	/* spa_embedded_log_class has only one metaslab per vdev. */
+	/* Embedded log classes have only one metaslab per vdev. */
 	metaslab_class_evict_old(spa->spa_special_class, txg);
 	metaslab_class_evict_old(spa->spa_dedup_class, txg);
 
@@ -10378,7 +10541,7 @@ spa_sync_tq_create(spa_t *spa, const char *name)
 {
 	kthread_t **kthreads;
 
-	ASSERT(spa->spa_sync_tq == NULL);
+	ASSERT0P(spa->spa_sync_tq);
 	ASSERT3S(spa->spa_alloc_count, <=, boot_ncpus);
 
 	/*
@@ -11095,6 +11258,13 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT,
 	"Whether extra ALLOC blkptrs were added to a livelist entry while it "
 	"was being condensed");
 
+ZFS_MODULE_PARAM(zfs_spa, spa_, note_txg_time, UINT, ZMOD_RW,
+	"How frequently TXG timestamps are stored internally (in seconds)");
+
+ZFS_MODULE_PARAM(zfs_spa, spa_, flush_txg_time, UINT, ZMOD_RW,
+	"How frequently the TXG timestamps database should be flushed "
+	"to disk (in seconds)");
+
 #ifdef _KERNEL
 ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read,
 	spa_taskq_read_param_set, spa_taskq_read_param_get, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/spa_errlog.c b/sys/contrib/openzfs/module/zfs/spa_errlog.c
index 3e08f261fda1..7252fd534bdf 100644
--- a/sys/contrib/openzfs/module/zfs/spa_errlog.c
+++ b/sys/contrib/openzfs/module/zfs/spa_errlog.c
@@ -253,7 +253,7 @@ find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep,
 	if (error == 0 && BP_IS_HOLE(&bp))
 		error = SET_ERROR(ENOENT);
 
-	*birth_txg = BP_GET_LOGICAL_BIRTH(&bp);
+	*birth_txg = BP_GET_PHYSICAL_BIRTH(&bp);
 	rw_exit(&dn->dn_struct_rwlock);
 	dnode_rele(dn, FTAG);
 	return (error);
@@ -885,7 +885,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
 		if (error == EACCES)
 			error = 0;
 		else if (!error)
-			zep.zb_birth = BP_GET_LOGICAL_BIRTH(&bp);
+			zep.zb_birth = BP_GET_PHYSICAL_BIRTH(&bp);
 
 		rw_exit(&dn->dn_struct_rwlock);
 		dnode_rele(dn, FTAG);
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index f054e4290bbf..cce772eae598 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -471,9 +471,9 @@ spa_config_lock_destroy(spa_t *spa)
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		mutex_destroy(&scl->scl_lock);
 		cv_destroy(&scl->scl_cv);
-		ASSERT(scl->scl_writer == NULL);
-		ASSERT(scl->scl_write_wanted == 0);
-		ASSERT(scl->scl_count == 0);
+		ASSERT0P(scl->scl_writer);
+		ASSERT0(scl->scl_write_wanted);
+		ASSERT0(scl->scl_count);
 	}
 }
 
@@ -715,6 +715,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
 	mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_flushed_ms_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_activities_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&spa->spa_txg_log_time_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
@@ -783,24 +784,23 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
 	dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path);
 	list_insert_head(&spa->spa_config_list, dp);
 
-	VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
-	    KM_SLEEP) == 0);
+	VERIFY0(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME, KM_SLEEP));
 
 	if (config != NULL) {
 		nvlist_t *features;
 
 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
 		    &features) == 0) {
-			VERIFY(nvlist_dup(features, &spa->spa_label_features,
-			    0) == 0);
+			VERIFY0(nvlist_dup(features,
+			    &spa->spa_label_features, 0));
 		}
 
-		VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
+		VERIFY0(nvlist_dup(config, &spa->spa_config, 0));
 	}
 
 	if (spa->spa_label_features == NULL) {
-		VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
-		    KM_SLEEP) == 0);
+		VERIFY0(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
+		    KM_SLEEP));
 	}
 
 	spa->spa_min_ashift = INT_MAX;
@@ -903,6 +903,7 @@ spa_remove(spa_t *spa)
 	mutex_destroy(&spa->spa_vdev_top_lock);
 	mutex_destroy(&spa->spa_feat_stats_lock);
 	mutex_destroy(&spa->spa_activities_lock);
+	mutex_destroy(&spa->spa_txg_log_time_lock);
 
 	kmem_free(spa, sizeof (spa_t));
 }
@@ -1308,6 +1309,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error,
 	metaslab_class_validate(spa_log_class(spa));
 	metaslab_class_validate(spa_embedded_log_class(spa));
 	metaslab_class_validate(spa_special_class(spa));
+	metaslab_class_validate(spa_special_embedded_log_class(spa));
 	metaslab_class_validate(spa_dedup_class(spa));
 
 	spa_config_exit(spa, SCL_ALL, spa);
@@ -1896,6 +1898,8 @@ spa_get_slop_space(spa_t *spa)
 	 */
 	uint64_t embedded_log =
 	    metaslab_class_get_dspace(spa_embedded_log_class(spa));
+	embedded_log += metaslab_class_get_dspace(
+	    spa_special_embedded_log_class(spa));
 	slop -= MIN(embedded_log, slop >> 1);
 
 	/*
@@ -2001,6 +2005,12 @@ spa_special_class(spa_t *spa)
 }
 
 metaslab_class_t *
+spa_special_embedded_log_class(spa_t *spa)
+{
+	return (spa->spa_special_embedded_log_class);
+}
+
+metaslab_class_t *
 spa_dedup_class(spa_t *spa)
 {
 	return (spa->spa_dedup_class);
diff --git a/sys/contrib/openzfs/module/zfs/spa_stats.c b/sys/contrib/openzfs/module/zfs/spa_stats.c
index 6d7cabcf766d..2c87122a0aa9 100644
--- a/sys/contrib/openzfs/module/zfs/spa_stats.c
+++ b/sys/contrib/openzfs/module/zfs/spa_stats.c
@@ -718,7 +718,7 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
 	for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
 	    smh = list_prev(&shl->procfs_list.pl_list, smh)) {
 		if (smh->mmp_node_id == mmp_node_id) {
-			ASSERT(smh->io_error == 0);
+			ASSERT0(smh->io_error);
 			smh->io_error = io_error;
 			smh->duration = duration;
 			error = 0;
diff --git a/sys/contrib/openzfs/module/zfs/space_map.c b/sys/contrib/openzfs/module/zfs/space_map.c
index c429e0edd168..5f24963f2291 100644
--- a/sys/contrib/openzfs/module/zfs/space_map.c
+++ b/sys/contrib/openzfs/module/zfs/space_map.c
@@ -817,7 +817,7 @@ space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
 	space_map_t *sm;
 	int error;
 
-	ASSERT(*smp == NULL);
+	ASSERT0P(*smp);
 	ASSERT(os != NULL);
 	ASSERT(object != 0);
 
diff --git a/sys/contrib/openzfs/module/zfs/space_reftree.c b/sys/contrib/openzfs/module/zfs/space_reftree.c
index 9b2d5ed31dc9..889980e08c06 100644
--- a/sys/contrib/openzfs/module/zfs/space_reftree.c
+++ b/sys/contrib/openzfs/module/zfs/space_reftree.c
@@ -149,6 +149,6 @@ space_reftree_generate_map(avl_tree_t *t, zfs_range_tree_t *rt, int64_t minref)
 			}
 		}
 	}
-	ASSERT(refcnt == 0);
+	ASSERT0(refcnt);
 	ASSERT(start == -1ULL);
 }
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 01758b0c54c0..9cf35e379000 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -243,6 +243,25 @@ vdev_dbgmsg_print_tree(vdev_t *vd, int indent)
 		vdev_dbgmsg_print_tree(vd->vdev_child[i], indent + 2);
 }
 
+char *
+vdev_rt_name(vdev_t *vd, const char *name)
+{
+	return (kmem_asprintf("{spa=%s vdev_guid=%llu %s}",
+	    spa_name(vd->vdev_spa),
+	    (u_longlong_t)vd->vdev_guid,
+	    name));
+}
+
+static char *
+vdev_rt_name_dtl(vdev_t *vd, const char *name, vdev_dtl_type_t dtl_type)
+{
+	return (kmem_asprintf("{spa=%s vdev_guid=%llu %s[%d]}",
+	    spa_name(vd->vdev_spa),
+	    (u_longlong_t)vd->vdev_guid,
+	    name,
+	    dtl_type));
+}
+
 /*
  * Virtual device management.
  */
@@ -282,12 +301,15 @@ vdev_getops(const char *type)
  * Given a vdev and a metaslab class, find which metaslab group we're
  * interested in. All vdevs may belong to two different metaslab classes.
  * Dedicated slog devices use only the primary metaslab group, rather than a
- * separate log group. For embedded slogs, the vdev_log_mg will be non-NULL.
+ * separate log group.  For embedded slogs, vdev_log_mg will be non-NULL and
+ * will point to a metaslab group of either embedded_log_class (for normal
+ * vdevs) or special_embedded_log_class (for special vdevs).
  */
 metaslab_group_t *
 vdev_get_mg(vdev_t *vd, metaslab_class_t *mc)
 {
-	if (mc == spa_embedded_log_class(vd->vdev_spa) &&
+	if ((mc == spa_embedded_log_class(vd->vdev_spa) ||
+	    mc == spa_special_embedded_log_class(vd->vdev_spa)) &&
 	    vd->vdev_log_mg != NULL)
 		return (vd->vdev_log_mg);
 	else
@@ -532,7 +554,7 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd)
 	vdev_t **newchild;
 
 	ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
-	ASSERT(cvd->vdev_parent == NULL);
+	ASSERT0P(cvd->vdev_parent);
 
 	cvd->vdev_parent = pvd;
 
@@ -556,7 +578,7 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd)
 	pvd->vdev_nonrot &= cvd->vdev_nonrot;
 
 	cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd);
-	ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL);
+	ASSERT0P(cvd->vdev_top->vdev_parent->vdev_parent);
 
 	/*
 	 * Walk up all ancestors to update guid sum.
@@ -692,8 +714,9 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
 
 	rw_init(&vd->vdev_indirect_rwlock, NULL, RW_DEFAULT, NULL);
 	mutex_init(&vd->vdev_obsolete_lock, NULL, MUTEX_DEFAULT, NULL);
-	vd->vdev_obsolete_segments = zfs_range_tree_create(NULL,
-	    ZFS_RANGE_SEG64, NULL, 0, 0);
+	vd->vdev_obsolete_segments = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "vdev_obsolete_segments"));
 
 	/*
 	 * Initialize rate limit structs for events.  We rate limit ZIO delay
@@ -747,8 +770,9 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
 	cv_init(&vd->vdev_rebuild_cv, NULL, CV_DEFAULT, NULL);
 
 	for (int t = 0; t < DTL_TYPES; t++) {
-		vd->vdev_dtl[t] = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
-		    NULL, 0, 0);
+		vd->vdev_dtl[t] = zfs_range_tree_create_flags(
+		    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+		    ZFS_RT_F_DYN_NAME, vdev_rt_name_dtl(vd, "vdev_dtl", t));
 	}
 
 	txg_list_create(&vd->vdev_ms_list, spa,
@@ -1077,10 +1101,10 @@ vdev_free(vdev_t *vd)
 {
 	spa_t *spa = vd->vdev_spa;
 
-	ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
-	ASSERT3P(vd->vdev_trim_thread, ==, NULL);
-	ASSERT3P(vd->vdev_autotrim_thread, ==, NULL);
-	ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
+	ASSERT0P(vd->vdev_initialize_thread);
+	ASSERT0P(vd->vdev_trim_thread);
+	ASSERT0P(vd->vdev_autotrim_thread);
+	ASSERT0P(vd->vdev_rebuild_thread);
 
 	/*
 	 * Scan queues are normally destroyed at the end of a scan. If the
@@ -1109,7 +1133,7 @@ vdev_free(vdev_t *vd)
 	for (int c = 0; c < vd->vdev_children; c++)
 		vdev_free(vd->vdev_child[c]);
 
-	ASSERT(vd->vdev_child == NULL);
+	ASSERT0P(vd->vdev_child);
 	ASSERT(vd->vdev_guid_sum == vd->vdev_guid);
 
 	if (vd->vdev_ops->vdev_op_fini != NULL)
@@ -1138,7 +1162,7 @@ vdev_free(vdev_t *vd)
 	 */
 	vdev_remove_child(vd->vdev_parent, vd);
 
-	ASSERT(vd->vdev_parent == NULL);
+	ASSERT0P(vd->vdev_parent);
 	ASSERT(!list_link_active(&vd->vdev_leaf_node));
 
 	/*
@@ -1285,9 +1309,9 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
 	ASSERT0(tvd->vdev_indirect_config.vic_births_object);
 	ASSERT0(tvd->vdev_indirect_config.vic_mapping_object);
 	ASSERT3U(tvd->vdev_indirect_config.vic_prev_indirect_vdev, ==, -1ULL);
-	ASSERT3P(tvd->vdev_indirect_mapping, ==, NULL);
-	ASSERT3P(tvd->vdev_indirect_births, ==, NULL);
-	ASSERT3P(tvd->vdev_obsolete_sm, ==, NULL);
+	ASSERT0P(tvd->vdev_indirect_mapping);
+	ASSERT0P(tvd->vdev_indirect_births);
+	ASSERT0P(tvd->vdev_obsolete_sm);
 	ASSERT0(tvd->vdev_noalloc);
 	ASSERT0(tvd->vdev_removing);
 	ASSERT0(tvd->vdev_rebuilding);
@@ -1440,7 +1464,7 @@ vdev_remove_parent(vdev_t *cvd)
 	if (cvd == cvd->vdev_top)
 		vdev_top_transfer(mvd, cvd);
 
-	ASSERT(mvd->vdev_children == 0);
+	ASSERT0(mvd->vdev_children);
 	vdev_free(mvd);
 }
 
@@ -1508,8 +1532,13 @@ vdev_metaslab_group_create(vdev_t *vd)
 		vd->vdev_mg = metaslab_group_create(mc, vd);
 
 		if (!vd->vdev_islog) {
-			vd->vdev_log_mg = metaslab_group_create(
-			    spa_embedded_log_class(spa), vd);
+			if (mc == spa_special_class(spa)) {
+				vd->vdev_log_mg = metaslab_group_create(
+				    spa_special_embedded_log_class(spa), vd);
+			} else {
+				vd->vdev_log_mg = metaslab_group_create(
+				    spa_embedded_log_class(spa), vd);
+			}
 		}
 
 		/*
@@ -1624,9 +1653,10 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
 	/*
 	 * Find the emptiest metaslab on the vdev and mark it for use for
 	 * embedded slog by moving it from the regular to the log metaslab
-	 * group.
+	 * group.  This works for normal and special vdevs.
 	 */
-	if (vd->vdev_mg->mg_class == spa_normal_class(spa) &&
+	if ((vd->vdev_mg->mg_class == spa_normal_class(spa) ||
+	    vd->vdev_mg->mg_class == spa_special_class(spa)) &&
 	    vd->vdev_ms_count > zfs_embedded_slog_min_ms &&
 	    avl_is_empty(&vd->vdev_log_mg->mg_metaslab_tree)) {
 		uint64_t slog_msid = 0;
@@ -2104,14 +2134,14 @@ vdev_open(vdev_t *vd)
 	 * faulted, bail out of the open.
 	 */
 	if (!vd->vdev_removed && vd->vdev_faulted) {
-		ASSERT(vd->vdev_children == 0);
+		ASSERT0(vd->vdev_children);
 		ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED ||
 		    vd->vdev_label_aux == VDEV_AUX_EXTERNAL);
 		vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
 		    vd->vdev_label_aux);
 		return (SET_ERROR(ENXIO));
 	} else if (vd->vdev_offline) {
-		ASSERT(vd->vdev_children == 0);
+		ASSERT0(vd->vdev_children);
 		vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE);
 		return (SET_ERROR(ENXIO));
 	}
@@ -2167,7 +2197,7 @@ vdev_open(vdev_t *vd)
 	 * the vdev is accessible.  If we're faulted, bail.
 	 */
 	if (vd->vdev_faulted) {
-		ASSERT(vd->vdev_children == 0);
+		ASSERT0(vd->vdev_children);
 		ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED ||
 		    vd->vdev_label_aux == VDEV_AUX_EXTERNAL);
 		vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
@@ -2176,7 +2206,7 @@ vdev_open(vdev_t *vd)
 	}
 
 	if (vd->vdev_degraded) {
-		ASSERT(vd->vdev_children == 0);
+		ASSERT0(vd->vdev_children);
 		vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED,
 		    VDEV_AUX_ERR_EXCEEDED);
 	} else {
@@ -3449,7 +3479,9 @@ vdev_dtl_load(vdev_t *vd)
 			return (error);
 		ASSERT(vd->vdev_dtl_sm != NULL);
 
-		rt = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0);
+		rt = zfs_range_tree_create_flags(
+		    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+		    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "vdev_dtl_load:rt"));
 		error = space_map_load(vd->vdev_dtl_sm, rt, SM_ALLOC);
 		if (error == 0) {
 			mutex_enter(&vd->vdev_dtl_lock);
@@ -3597,7 +3629,8 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
 		ASSERT(vd->vdev_dtl_sm != NULL);
 	}
 
-	rtsync = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0);
+	rtsync = zfs_range_tree_create_flags(NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "rtsync"));
 
 	mutex_enter(&vd->vdev_dtl_lock);
 	zfs_range_tree_walk(rt, zfs_range_tree_add, rtsync);
@@ -3912,7 +3945,7 @@ vdev_load(vdev_t *vd)
 		if (error == 0 && checkpoint_sm_obj != 0) {
 			objset_t *mos = spa_meta_objset(vd->vdev_spa);
 			ASSERT(vd->vdev_asize != 0);
-			ASSERT3P(vd->vdev_checkpoint_sm, ==, NULL);
+			ASSERT0P(vd->vdev_checkpoint_sm);
 
 			error = space_map_open(&vd->vdev_checkpoint_sm,
 			    mos, checkpoint_sm_obj, 0, vd->vdev_asize,
@@ -3960,7 +3993,7 @@ vdev_load(vdev_t *vd)
 	if (error == 0 && obsolete_sm_object != 0) {
 		objset_t *mos = vd->vdev_spa->spa_meta_objset;
 		ASSERT(vd->vdev_asize != 0);
-		ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
+		ASSERT0P(vd->vdev_obsolete_sm);
 
 		if ((error = space_map_open(&vd->vdev_obsolete_sm, mos,
 		    obsolete_sm_object, 0, vd->vdev_asize, 0))) {
@@ -4488,7 +4521,7 @@ top:
 			/*
 			 * Prevent any future allocations.
 			 */
-			ASSERT3P(tvd->vdev_log_mg, ==, NULL);
+			ASSERT0P(tvd->vdev_log_mg);
 			metaslab_group_passivate(mg);
 			(void) spa_vdev_state_exit(spa, vd, 0);
 
@@ -5161,7 +5194,7 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
 int64_t
 vdev_deflated_space(vdev_t *vd, int64_t space)
 {
-	ASSERT((space & (SPA_MINBLOCKSIZE-1)) == 0);
+	ASSERT0((space & (SPA_MINBLOCKSIZE-1)));
 	ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache);
 
 	return ((space >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio);
@@ -5253,8 +5286,8 @@ vdev_config_dirty(vdev_t *vd)
 
 		if (nvlist_lookup_nvlist_array(sav->sav_config,
 		    ZPOOL_CONFIG_L2CACHE, &aux, &naux) != 0) {
-			VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
-			    ZPOOL_CONFIG_SPARES, &aux, &naux) == 0);
+			VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config,
+			    ZPOOL_CONFIG_SPARES, &aux, &naux));
 		}
 
 		ASSERT(c < naux);
@@ -5642,7 +5675,7 @@ vdev_expand(vdev_t *vd, uint64_t txg)
 	    (vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count &&
 	    vdev_is_concrete(vd)) {
 		vdev_metaslab_group_create(vd);
-		VERIFY(vdev_metaslab_init(vd, txg) == 0);
+		VERIFY0(vdev_metaslab_init(vd, txg));
 		vdev_config_dirty(vd);
 	}
 }
diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c
index feec5fd3ce17..a05289102af2 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_draid.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c
@@ -477,7 +477,7 @@ vdev_draid_generate_perms(const draid_map_t *map, uint8_t **permsp)
 	VERIFY3U(map->dm_children, <=, VDEV_DRAID_MAX_CHILDREN);
 	VERIFY3U(map->dm_seed, !=, 0);
 	VERIFY3U(map->dm_nperms, !=, 0);
-	VERIFY3P(map->dm_perms, ==, NULL);
+	VERIFY0P(map->dm_perms);
 
 #ifdef _KERNEL
 	/*
@@ -590,7 +590,7 @@ vdev_draid_psize_to_asize(vdev_t *vd, uint64_t psize, uint64_t txg)
 	uint64_t asize = (rows * vdc->vdc_groupwidth) << ashift;
 
 	ASSERT3U(asize, !=, 0);
-	ASSERT3U(asize % (vdc->vdc_groupwidth), ==, 0);
+	ASSERT0(asize % (vdc->vdc_groupwidth));
 
 	return (asize);
 }
@@ -704,7 +704,7 @@ vdev_draid_map_alloc_scrub(zio_t *zio, uint64_t abd_offset, raidz_row_t *rr)
 	uint64_t skip_off = 0;
 
 	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
-	ASSERT3P(rr->rr_abd_empty, ==, NULL);
+	ASSERT0P(rr->rr_abd_empty);
 
 	if (rr->rr_nempty > 0) {
 		rr->rr_abd_empty = abd_alloc_linear(rr->rr_nempty * skip_size,
@@ -793,7 +793,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
 	uint64_t skip_off = 0;
 
 	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
-	ASSERT3P(rr->rr_abd_empty, ==, NULL);
+	ASSERT0P(rr->rr_abd_empty);
 
 	if (rr->rr_nempty > 0) {
 		rr->rr_abd_empty = abd_alloc_linear(rr->rr_nempty * skip_size,
@@ -807,7 +807,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
 			/* empty data column (small read), add a skip sector */
 			ASSERT3U(skip_size, ==, parity_size);
 			ASSERT3U(rr->rr_nempty, !=, 0);
-			ASSERT3P(rc->rc_abd, ==, NULL);
+			ASSERT0P(rc->rc_abd);
 			rc->rc_abd = abd_get_offset_size(rr->rr_abd_empty,
 			    skip_off, skip_size);
 			skip_off += skip_size;
@@ -1623,7 +1623,7 @@ vdev_draid_rebuild_asize(vdev_t *vd, uint64_t start, uint64_t asize,
 	    SPA_MAXBLOCKSIZE);
 
 	ASSERT3U(vdev_draid_get_astart(vd, start), ==, start);
-	ASSERT3U(asize % (vdc->vdc_groupwidth << ashift), ==, 0);
+	ASSERT0(asize % (vdc->vdc_groupwidth << ashift));
 
 	/* Chunks must evenly span all data columns in the group. */
 	psize = (((psize >> ashift) / ndata) * ndata) << ashift;
@@ -1634,7 +1634,7 @@ vdev_draid_rebuild_asize(vdev_t *vd, uint64_t start, uint64_t asize,
 	uint64_t left = vdev_draid_group_to_offset(vd, group + 1) - start;
 	chunk_size = MIN(chunk_size, left);
 
-	ASSERT3U(chunk_size % (vdc->vdc_groupwidth << ashift), ==, 0);
+	ASSERT0(chunk_size % (vdc->vdc_groupwidth << ashift));
 	ASSERT3U(vdev_draid_offset_to_group(vd, start), ==,
 	    vdev_draid_offset_to_group(vd, start + chunk_size - 1));
 
@@ -2272,7 +2272,7 @@ vdev_draid_init(spa_t *spa, nvlist_t *nv, void **tsd)
 	ASSERT3U(vdc->vdc_groupwidth, <=, vdc->vdc_ndisks);
 	ASSERT3U(vdc->vdc_groupsz, >=, 2 * VDEV_DRAID_ROWHEIGHT);
 	ASSERT3U(vdc->vdc_devslicesz, >=, VDEV_DRAID_ROWHEIGHT);
-	ASSERT3U(vdc->vdc_devslicesz % VDEV_DRAID_ROWHEIGHT, ==, 0);
+	ASSERT0(vdc->vdc_devslicesz % VDEV_DRAID_ROWHEIGHT);
 	ASSERT3U((vdc->vdc_groupwidth * vdc->vdc_ngroups) %
 	    vdc->vdc_ndisks, ==, 0);
 
diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
index fac2c3a5f154..7538f471e63c 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
@@ -792,7 +792,7 @@ spa_condense_indirect_start_sync(vdev_t *vd, dmu_tx_t *tx)
 	    DMU_POOL_CONDENSING_INDIRECT, sizeof (uint64_t),
 	    sizeof (*scip) / sizeof (uint64_t), scip, tx));
 
-	ASSERT3P(spa->spa_condensing_indirect, ==, NULL);
+	ASSERT0P(spa->spa_condensing_indirect);
 	spa->spa_condensing_indirect = spa_condensing_indirect_create(spa);
 
 	zfs_dbgmsg("starting condense of vdev %llu in txg %llu: "
@@ -882,7 +882,7 @@ spa_condense_fini(spa_t *spa)
 void
 spa_start_indirect_condensing_thread(spa_t *spa)
 {
-	ASSERT3P(spa->spa_condense_zthr, ==, NULL);
+	ASSERT0P(spa->spa_condense_zthr);
 	spa->spa_condense_zthr = zthr_create("z_indirect_condense",
 	    spa_condense_indirect_thread_check,
 	    spa_condense_indirect_thread, spa, minclsyspri);
@@ -1504,7 +1504,7 @@ vdev_indirect_splits_checksum_validate(indirect_vsd_t *iv, zio_t *zio)
 	    is != NULL; is = list_next(&iv->iv_splits, is)) {
 
 		ASSERT3P(is->is_good_child->ic_data, !=, NULL);
-		ASSERT3P(is->is_good_child->ic_duplicate, ==, NULL);
+		ASSERT0P(is->is_good_child->ic_duplicate);
 
 		abd_copy_off(zio->io_abd, is->is_good_child->ic_data,
 		    is->is_split_offset, 0, is->is_size);
@@ -1842,7 +1842,7 @@ vdev_indirect_io_done(zio_t *zio)
 	 */
 	if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) {
 		zio->io_error = ret;
-		zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+		zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR;
 		zio_dio_chksum_verify_error_report(zio);
 		ret = 0;
 	}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_initialize.c b/sys/contrib/openzfs/module/zfs/vdev_initialize.c
index 4274728578ad..27188c46e561 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_initialize.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_initialize.c
@@ -541,8 +541,9 @@ vdev_initialize_thread(void *arg)
 
 	abd_t *deadbeef = vdev_initialize_block_alloc();
 
-	vd->vdev_initialize_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
-	    NULL, 0, 0);
+	vd->vdev_initialize_tree = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "vdev_initialize_tree"));
 
 	for (uint64_t i = 0; !vd->vdev_detached &&
 	    i < vd->vdev_top->vdev_ms_count; i++) {
@@ -631,7 +632,7 @@ vdev_initialize(vdev_t *vd)
 	ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 	ASSERT(vdev_is_concrete(vd));
-	ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
+	ASSERT0P(vd->vdev_initialize_thread);
 	ASSERT(!vd->vdev_detached);
 	ASSERT(!vd->vdev_initialize_exit_wanted);
 	ASSERT(!vd->vdev_top->vdev_removing);
@@ -652,7 +653,7 @@ vdev_uninitialize(vdev_t *vd)
 	ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 	ASSERT(vdev_is_concrete(vd));
-	ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
+	ASSERT0P(vd->vdev_initialize_thread);
 	ASSERT(!vd->vdev_detached);
 	ASSERT(!vd->vdev_initialize_exit_wanted);
 	ASSERT(!vd->vdev_top->vdev_removing);
@@ -671,7 +672,7 @@ vdev_initialize_stop_wait_impl(vdev_t *vd)
 	while (vd->vdev_initialize_thread != NULL)
 		cv_wait(&vd->vdev_initialize_cv, &vd->vdev_initialize_lock);
 
-	ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
+	ASSERT0P(vd->vdev_initialize_thread);
 	vd->vdev_initialize_exit_wanted = B_FALSE;
 }
 
diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c
index 6baa6236aac2..c44f654b0261 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_label.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_label.c
@@ -163,7 +163,7 @@ uint64_t
 vdev_label_offset(uint64_t psize, int l, uint64_t offset)
 {
 	ASSERT(offset < sizeof (vdev_label_t));
-	ASSERT(P2PHASE_TYPED(psize, sizeof (vdev_label_t), uint64_t) == 0);
+	ASSERT0(P2PHASE_TYPED(psize, sizeof (vdev_label_t), uint64_t));
 
 	return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
 	    0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
@@ -768,12 +768,12 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config)
 	}
 
 	if (idx) {
-		VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY,
-		    array, idx) == 0);
+		VERIFY0(nvlist_add_uint64_array(config,
+		    ZPOOL_CONFIG_HOLE_ARRAY, array, idx));
 	}
 
-	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
-	    rvd->vdev_children) == 0);
+	VERIFY0(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
+	    rvd->vdev_children));
 
 	kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
 }
@@ -1189,8 +1189,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
 		 * vdev uses as described above, and automatically expires if we
 		 * fail.
 		 */
-		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
-		    crtxg) == 0);
+		VERIFY0(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
+		    crtxg));
 	}
 
 	buf = vp->vp_nvlist;
diff --git a/sys/contrib/openzfs/module/zfs/vdev_mirror.c b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
index a6aee9437066..18efdaac006f 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_mirror.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
@@ -532,7 +532,7 @@ vdev_mirror_child_select(zio_t *zio)
 	uint64_t txg = zio->io_txg;
 	int c, lowest_load;
 
-	ASSERT(zio->io_bp == NULL || BP_GET_BIRTH(zio->io_bp) == txg);
+	ASSERT(zio->io_bp == NULL || BP_GET_PHYSICAL_BIRTH(zio->io_bp) == txg);
 
 	lowest_load = INT_MAX;
 	mm->mm_preferred_cnt = 0;
@@ -779,7 +779,7 @@ vdev_mirror_io_done(zio_t *zio)
 	 * being written out during self healing.
 	 */
 	if ((zio->io_flags & ZIO_FLAG_DIO_READ) &&
-	    (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) {
+	    (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)) {
 		zio_dio_chksum_verify_error_report(zio);
 		zio->io_error = vdev_mirror_worst_error(mm);
 		ASSERT3U(zio->io_error, ==, ECKSUM);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index aa41f7066036..c12713b107bf 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -780,7 +780,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
 			if (dio->io_flags & ZIO_FLAG_NODATA) {
 				/* allocate a buffer for a write gap */
 				ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE);
-				ASSERT3P(dio->io_abd, ==, NULL);
+				ASSERT0P(dio->io_abd);
 				abd_gang_add(aio->io_abd,
 				    abd_get_zeros(dio->io_size), B_TRUE);
 			} else {
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
index 71c4bfbdaf00..b597d6daefde 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
@@ -412,7 +412,7 @@ vdev_raidz_map_free(raidz_map_t *rm)
 		    rm->rm_nphys_cols);
 	}
 
-	ASSERT3P(rm->rm_lr, ==, NULL);
+	ASSERT0P(rm->rm_lr);
 	kmem_free(rm, offsetof(raidz_map_t, rm_row[rm->rm_nrows]));
 }
 
@@ -2206,11 +2206,7 @@ vdev_raidz_close(vdev_t *vd)
 
 /*
  * Return the logical width to use, given the txg in which the allocation
- * happened.  Note that BP_GET_BIRTH() is usually the txg in which the
- * BP was allocated.  Remapped BP's (that were relocated due to device
- * removal, see remap_blkptr_cb()), will have a more recent physical birth
- * which reflects when the BP was relocated, but we can ignore these because
- * they can't be on RAIDZ (device removal doesn't support RAIDZ).
+ * happened.
  */
 static uint64_t
 vdev_raidz_get_logical_width(vdev_raidz_t *vdrz, uint64_t txg)
@@ -2249,10 +2245,9 @@ vdev_raidz_asize_to_psize(vdev_t *vd, uint64_t asize, uint64_t txg)
 	vdev_raidz_t *vdrz = vd->vdev_tsd;
 	uint64_t psize;
 	uint64_t ashift = vd->vdev_top->vdev_ashift;
-	uint64_t cols = vdrz->vd_original_width;
 	uint64_t nparity = vdrz->vd_nparity;
 
-	cols = vdev_raidz_get_logical_width(vdrz, txg);
+	uint64_t cols = vdev_raidz_get_logical_width(vdrz, txg);
 
 	ASSERT0(asize % (1 << ashift));
 
@@ -2285,10 +2280,9 @@ vdev_raidz_psize_to_asize(vdev_t *vd, uint64_t psize, uint64_t txg)
 	vdev_raidz_t *vdrz = vd->vdev_tsd;
 	uint64_t asize;
 	uint64_t ashift = vd->vdev_top->vdev_ashift;
-	uint64_t cols = vdrz->vd_original_width;
 	uint64_t nparity = vdrz->vd_nparity;
 
-	cols = vdev_raidz_get_logical_width(vdrz, txg);
+	uint64_t cols = vdev_raidz_get_logical_width(vdrz, txg);
 
 	asize = ((psize - 1) >> ashift) + 1;
 	asize += nparity * ((asize + cols - nparity - 1) / (cols - nparity));
@@ -2345,7 +2339,7 @@ vdev_raidz_io_verify(zio_t *zio, raidz_map_t *rm, raidz_row_t *rr, int col)
 	logical_rs.rs_start = rr->rr_offset;
 	logical_rs.rs_end = logical_rs.rs_start +
 	    vdev_raidz_psize_to_asize(zio->io_vd, rr->rr_size,
-	    BP_GET_BIRTH(zio->io_bp));
+	    BP_GET_PHYSICAL_BIRTH(zio->io_bp));
 
 	raidz_col_t *rc = &rr->rr_col[col];
 	vdev_t *cvd = zio->io_vd->vdev_child[rc->rc_devidx];
@@ -2437,7 +2431,7 @@ raidz_start_skip_writes(zio_t *zio)
 		vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
 		if (rc->rc_size != 0)
 			continue;
-		ASSERT3P(rc->rc_abd, ==, NULL);
+		ASSERT0P(rc->rc_abd);
 
 		ASSERT3U(rc->rc_offset, <,
 		    cvd->vdev_psize - VDEV_LABEL_END_SIZE);
@@ -2568,7 +2562,7 @@ vdev_raidz_io_start(zio_t *zio)
 	raidz_map_t *rm;
 
 	uint64_t logical_width = vdev_raidz_get_logical_width(vdrz,
-	    BP_GET_BIRTH(zio->io_bp));
+	    BP_GET_PHYSICAL_BIRTH(zio->io_bp));
 	if (logical_width != vdrz->vd_physical_width) {
 		zfs_locked_range_t *lr = NULL;
 		uint64_t synced_offset = UINT64_MAX;
@@ -2691,7 +2685,7 @@ raidz_checksum_verify(zio_t *zio)
 	 */
 	if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) {
 		zio->io_error = ret;
-		zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+		zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR;
 		zio_dio_chksum_verify_error_report(zio);
 		zio_checksum_verified(zio);
 		return (0);
@@ -3048,7 +3042,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
 
 	/* Check for success */
 	if (raidz_checksum_verify(zio) == 0) {
-		if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
+		if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)
 			return (0);
 
 		/* Reconstruction succeeded - report errors */
@@ -3369,7 +3363,7 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
 		 * also have been fewer parity errors than parity
 		 * columns or, again, we wouldn't be in this code path.
 		 */
-		ASSERT(parity_untried == 0);
+		ASSERT0(parity_untried);
 		ASSERT(parity_errors < rr->rr_firstdatacol);
 
 		/*
@@ -3514,7 +3508,7 @@ vdev_raidz_io_done(zio_t *zio)
 		}
 
 		if (raidz_checksum_verify(zio) == 0) {
-			if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
+			if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)
 				goto done;
 
 			for (int i = 0; i < rm->rm_nrows; i++) {
@@ -4591,8 +4585,10 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr)
 		uint64_t shift, start;
 		zfs_range_seg_type_t type = metaslab_calculate_range_tree_type(
 		    raidvd, msp, &start, &shift);
-		zfs_range_tree_t *rt = zfs_range_tree_create(NULL, type, NULL,
-		    start, shift);
+		zfs_range_tree_t *rt = zfs_range_tree_create_flags(
+		    NULL, type, NULL, start, shift, ZFS_RT_F_DYN_NAME,
+		    metaslab_rt_name(msp->ms_group, msp,
+		    "spa_raidz_expand_thread:rt"));
 		zfs_range_tree_add(rt, msp->ms_start, msp->ms_size);
 		zfs_range_tree_walk(msp->ms_allocatable, zfs_range_tree_remove,
 		    rt);
@@ -4747,7 +4743,7 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr)
 void
 spa_start_raidz_expansion_thread(spa_t *spa)
 {
-	ASSERT3P(spa->spa_raidz_expand_zthr, ==, NULL);
+	ASSERT0P(spa->spa_raidz_expand_zthr);
 	spa->spa_raidz_expand_zthr = zthr_create("raidz_expand",
 	    spa_raidz_expand_thread_check, spa_raidz_expand_thread,
 	    spa, defclsyspri);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
index 0e296606d037..47b3b9921abe 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
@@ -256,7 +256,7 @@ vdev_rebuild_initiate_sync(void *arg, dmu_tx_t *tx)
 	    "vdev_id=%llu vdev_guid=%llu started",
 	    (u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid);
 
-	ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
+	ASSERT0P(vd->vdev_rebuild_thread);
 	vd->vdev_rebuild_thread = thread_create(NULL, 0,
 	    vdev_rebuild_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
 
@@ -413,7 +413,7 @@ vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx)
 	mutex_enter(&vd->vdev_rebuild_lock);
 
 	ASSERT(vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE);
-	ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
+	ASSERT0P(vd->vdev_rebuild_thread);
 
 	vrp->vrp_last_offset = 0;
 	vrp->vrp_min_txg = 0;
@@ -787,8 +787,9 @@ vdev_rebuild_thread(void *arg)
 	vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
 	vr->vr_top_vdev = vd;
 	vr->vr_scan_msp = NULL;
-	vr->vr_scan_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL,
-	    0, 0);
+	vr->vr_scan_tree = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "vr_scan_tree"));
 	mutex_init(&vr->vr_io_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&vr->vr_io_cv, NULL, CV_DEFAULT, NULL);
 
diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c
index db79ded6dce4..2f7a739da241 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_removal.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c
@@ -344,10 +344,10 @@ spa_vdev_remove_aux(nvlist_t *config, const char *name, nvlist_t **dev,
 	for (int i = 0, j = 0; i < count; i++) {
 		if (dev[i] == dev_to_remove)
 			continue;
-		VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0);
+		VERIFY0(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP));
 	}
 
-	VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0);
+	VERIFY0(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY));
 	fnvlist_add_nvlist_array(config, name, (const nvlist_t * const *)newdev,
 	    count - 1);
 
@@ -364,13 +364,15 @@ spa_vdev_removal_create(vdev_t *vd)
 	spa_vdev_removal_t *svr = kmem_zalloc(sizeof (*svr), KM_SLEEP);
 	mutex_init(&svr->svr_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&svr->svr_cv, NULL, CV_DEFAULT, NULL);
-	svr->svr_allocd_segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
-	    NULL, 0, 0);
+	svr->svr_allocd_segs = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "svr_allocd_segs"));
 	svr->svr_vdev_id = vd->vdev_id;
 
 	for (int i = 0; i < TXG_SIZE; i++) {
-		svr->svr_frees[i] = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
-		    NULL, 0, 0);
+		svr->svr_frees[i] = zfs_range_tree_create_flags(
+		    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+		    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "svr_frees"));
 		list_create(&svr->svr_new_segments[i],
 		    sizeof (vdev_indirect_mapping_entry_t),
 		    offsetof(vdev_indirect_mapping_entry_t, vime_node));
@@ -421,7 +423,7 @@ vdev_remove_initiate_sync(void *arg, dmu_tx_t *tx)
 	svr = spa_vdev_removal_create(vd);
 
 	ASSERT(vd->vdev_removing);
-	ASSERT3P(vd->vdev_indirect_mapping, ==, NULL);
+	ASSERT0P(vd->vdev_indirect_mapping);
 
 	spa_feature_incr(spa, SPA_FEATURE_DEVICE_REMOVAL, tx);
 	if (spa_feature_is_enabled(spa, SPA_FEATURE_OBSOLETE_COUNTS)) {
@@ -527,7 +529,7 @@ vdev_remove_initiate_sync(void *arg, dmu_tx_t *tx)
 	 * but in any case only when there are outstanding free i/os, which
 	 * there are not).
 	 */
-	ASSERT3P(spa->spa_vdev_removal, ==, NULL);
+	ASSERT0P(spa->spa_vdev_removal);
 	spa->spa_vdev_removal = svr;
 	svr->svr_thread = thread_create(NULL, 0,
 	    spa_vdev_remove_thread, spa, 0, &p0, TS_RUN, minclsyspri);
@@ -1179,8 +1181,9 @@ spa_vdev_copy_segment(vdev_t *vd, zfs_range_tree_t *segs,
 	 * relative to the start of the range to be copied (i.e. relative to the
 	 * local variable "start").
 	 */
-	zfs_range_tree_t *obsolete_segs = zfs_range_tree_create(NULL,
-	    ZFS_RANGE_SEG64, NULL, 0, 0);
+	zfs_range_tree_t *obsolete_segs = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "obsolete_segs"));
 
 	zfs_btree_index_t where;
 	zfs_range_seg_t *rs = zfs_btree_first(&segs->rt_root, &where);
@@ -1359,11 +1362,11 @@ vdev_remove_complete(spa_t *spa)
 	txg_wait_synced(spa->spa_dsl_pool, 0);
 	txg = spa_vdev_enter(spa);
 	vdev_t *vd = vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id);
-	ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
-	ASSERT3P(vd->vdev_trim_thread, ==, NULL);
-	ASSERT3P(vd->vdev_autotrim_thread, ==, NULL);
+	ASSERT0P(vd->vdev_initialize_thread);
+	ASSERT0P(vd->vdev_trim_thread);
+	ASSERT0P(vd->vdev_autotrim_thread);
 	vdev_rebuild_stop_wait(vd);
-	ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
+	ASSERT0P(vd->vdev_rebuild_thread);
 
 	sysevent_t *ev = spa_event_create(spa, vd, NULL,
 	    ESC_ZFS_VDEV_REMOVE_DEV);
@@ -1448,8 +1451,9 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca,
 	 * allocated segments that we are copying.  We may also be copying
 	 * free segments (of up to vdev_removal_max_span bytes).
 	 */
-	zfs_range_tree_t *segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
-	    NULL, 0, 0);
+	zfs_range_tree_t *segs = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "spa_vdev_copy_impl:segs"));
 	for (;;) {
 		zfs_range_tree_t *rt = svr->svr_allocd_segs;
 		zfs_range_seg_t *rs = zfs_range_tree_first(rt);
@@ -1610,8 +1614,9 @@ spa_vdev_remove_thread(void *arg)
 	vca.vca_read_error_bytes = 0;
 	vca.vca_write_error_bytes = 0;
 
-	zfs_range_tree_t *segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
-	    NULL, 0, 0);
+	zfs_range_tree_t *segs = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "spa_vdev_remove_thread:segs"));
 
 	mutex_enter(&svr->svr_lock);
 
@@ -1863,7 +1868,7 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx)
 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 	objset_t *mos = spa->spa_meta_objset;
 
-	ASSERT3P(svr->svr_thread, ==, NULL);
+	ASSERT0P(svr->svr_thread);
 
 	spa_feature_decr(spa, SPA_FEATURE_DEVICE_REMOVAL, tx);
 
@@ -1895,8 +1900,9 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx)
 		    vdev_indirect_mapping_max_offset(vim));
 	}
 
-	zfs_range_tree_t *segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
-	    NULL, 0, 0);
+	zfs_range_tree_t *segs = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0, ZFS_RT_F_DYN_NAME,
+	    vdev_rt_name(vd, "spa_vdev_remove_cancel_sync:segs"));
 	for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
 		metaslab_t *msp = vd->vdev_ms[msi];
 
@@ -2070,7 +2076,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
 
 	ASSERT(vd->vdev_islog);
 	ASSERT(vd == vd->vdev_top);
-	ASSERT3P(vd->vdev_log_mg, ==, NULL);
+	ASSERT0P(vd->vdev_log_mg);
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	/*
@@ -2106,7 +2112,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
 
 	if (error != 0) {
 		metaslab_group_activate(mg);
-		ASSERT3P(vd->vdev_log_mg, ==, NULL);
+		ASSERT0P(vd->vdev_log_mg);
 		return (error);
 	}
 	ASSERT0(vd->vdev_stat.vs_alloc);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c
index 842bb3e690d4..eee18b367909 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_trim.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c
@@ -902,7 +902,9 @@ vdev_trim_thread(void *arg)
 	ta.trim_vdev = vd;
 	ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
 	ta.trim_extent_bytes_min = zfs_trim_extent_bytes_min;
-	ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0);
+	ta.trim_tree = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "trim_tree"));
 	ta.trim_type = TRIM_TYPE_MANUAL;
 	ta.trim_flags = 0;
 
@@ -1008,7 +1010,7 @@ vdev_trim(vdev_t *vd, uint64_t rate, boolean_t partial, boolean_t secure)
 	ASSERT(MUTEX_HELD(&vd->vdev_trim_lock));
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 	ASSERT(vdev_is_concrete(vd));
-	ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+	ASSERT0P(vd->vdev_trim_thread);
 	ASSERT(!vd->vdev_detached);
 	ASSERT(!vd->vdev_trim_exit_wanted);
 	ASSERT(!vd->vdev_top->vdev_removing);
@@ -1030,7 +1032,7 @@ vdev_trim_stop_wait_impl(vdev_t *vd)
 	while (vd->vdev_trim_thread != NULL)
 		cv_wait(&vd->vdev_trim_cv, &vd->vdev_trim_lock);
 
-	ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+	ASSERT0P(vd->vdev_trim_thread);
 	vd->vdev_trim_exit_wanted = B_FALSE;
 }
 
@@ -1305,8 +1307,10 @@ vdev_autotrim_thread(void *arg)
 			 * Allocate an empty range tree which is swapped in
 			 * for the existing ms_trim tree while it is processed.
 			 */
-			trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
-			    NULL, 0, 0);
+			trim_tree = zfs_range_tree_create_flags(
+			    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+			    ZFS_RT_F_DYN_NAME,
+			    vdev_rt_name(vd, "autotrim_tree"));
 			zfs_range_tree_swap(&msp->ms_trim, &trim_tree);
 			ASSERT(zfs_range_tree_is_empty(msp->ms_trim));
 
@@ -1360,8 +1364,10 @@ vdev_autotrim_thread(void *arg)
 				if (!cvd->vdev_ops->vdev_op_leaf)
 					continue;
 
-				ta->trim_tree = zfs_range_tree_create(NULL,
-				    ZFS_RANGE_SEG64, NULL, 0, 0);
+				ta->trim_tree = zfs_range_tree_create_flags(
+				    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+				    ZFS_RT_F_DYN_NAME,
+				    vdev_rt_name(vd, "autotrim_tree"));
 				zfs_range_tree_walk(trim_tree,
 				    vdev_trim_range_add, ta);
 			}
@@ -1533,7 +1539,7 @@ vdev_autotrim_stop_wait(vdev_t *tvd)
 		cv_wait(&tvd->vdev_autotrim_cv,
 		    &tvd->vdev_autotrim_lock);
 
-		ASSERT3P(tvd->vdev_autotrim_thread, ==, NULL);
+		ASSERT0P(tvd->vdev_autotrim_thread);
 		tvd->vdev_autotrim_exit_wanted = B_FALSE;
 	}
 	mutex_exit(&tvd->vdev_autotrim_lock);
@@ -1600,7 +1606,9 @@ vdev_trim_l2arc_thread(void *arg)
 	vd->vdev_trim_secure = 0;
 
 	ta.trim_vdev = vd;
-	ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0);
+	ta.trim_tree = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "trim_tree"));
 	ta.trim_type = TRIM_TYPE_MANUAL;
 	ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
 	ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
@@ -1704,7 +1712,7 @@ vdev_trim_l2arc(spa_t *spa)
 		mutex_enter(&vd->vdev_trim_lock);
 		ASSERT(vd->vdev_ops->vdev_op_leaf);
 		ASSERT(vdev_is_concrete(vd));
-		ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+		ASSERT0P(vd->vdev_trim_thread);
 		ASSERT(!vd->vdev_detached);
 		ASSERT(!vd->vdev_trim_exit_wanted);
 		ASSERT(!vd->vdev_top->vdev_removing);
@@ -1735,7 +1743,9 @@ vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size)
 	ASSERT(!vd->vdev_top->vdev_rz_expanding);
 
 	ta.trim_vdev = vd;
-	ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0);
+	ta.trim_tree = zfs_range_tree_create_flags(
+	    NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+	    ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "trim_tree"));
 	ta.trim_type = TRIM_TYPE_SIMPLE;
 	ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
 	ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
diff --git a/sys/contrib/openzfs/module/zfs/zap.c b/sys/contrib/openzfs/module/zfs/zap.c
index 9711c91d7e4e..3e4e997798a3 100644
--- a/sys/contrib/openzfs/module/zfs/zap.c
+++ b/sys/contrib/openzfs/module/zfs/zap.c
@@ -921,7 +921,7 @@ fzap_add_cd(zap_name_t *zn,
 
 	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
 	ASSERT(!zap->zap_ismicro);
-	ASSERT(fzap_check(zn, integer_size, num_integers) == 0);
+	ASSERT0(fzap_check(zn, integer_size, num_integers));
 
 	err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
 	if (err != 0)
@@ -1304,7 +1304,7 @@ zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
 int
 fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
 {
-	int err = ENOENT;
+	int err;
 	zap_entry_handle_t zeh;
 	zap_leaf_t *l;
 
@@ -1386,7 +1386,7 @@ again:
 		}
 		err = zap_entry_read_name(zap, &zeh,
 		    za->za_name_len, za->za_name);
-		ASSERT(err == 0);
+		ASSERT0(err);
 
 		za->za_normalization_conflict =
 		    zap_entry_normalization_conflict(&zeh,
@@ -1546,7 +1546,7 @@ zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
 	boolean_t trunc = B_FALSE;
 	int err = 0;
 
-	ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nentries, ==, 0);
+	ASSERT0(zap_leaf_phys(l)->l_hdr.lh_nentries);
 	ASSERT3U(prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
 	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
 	ASSERT3U(ZAP_HASH_IDX(hash, prefix_len), ==, prefix);
@@ -1564,7 +1564,7 @@ zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
 		uint64_t sl_hash = ZAP_PREFIX_HASH(sl_prefix, prefix_len);
 		int slbit = prefix & 1;
 
-		ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nentries, ==, 0);
+		ASSERT0(zap_leaf_phys(l)->l_hdr.lh_nentries);
 
 		/*
 		 * Check if there is a sibling by reading ptrtbl ptrs.
diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c
index 411b1a9db5ab..ea4e3117a8b9 100644
--- a/sys/contrib/openzfs/module/zfs/zap_micro.c
+++ b/sys/contrib/openzfs/module/zfs/zap_micro.c
@@ -346,7 +346,7 @@ zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
 {
 	zap_name_t *zn = kmem_cache_alloc(zap_name_cache, KM_SLEEP);
 
-	ASSERT(zap->zap_normflags == 0);
+	ASSERT0(zap->zap_normflags);
 	zn->zn_zap = zap;
 	zn->zn_key_intlen = sizeof (*key);
 	zn->zn_key_orig = zn->zn_key_norm = key;
@@ -1876,7 +1876,7 @@ zap_cursor_serialize(zap_cursor_t *zc)
 		return (-1ULL);
 	if (zc->zc_zap == NULL)
 		return (zc->zc_serialized);
-	ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0);
+	ASSERT0((zc->zc_hash & zap_maxcd(zc->zc_zap)));
 	ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
 
 	/*
@@ -1911,7 +1911,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
 		 * we must add to the existing zc_cd, which may already
 		 * be 1 due to the zap_cursor_advance.
 		 */
-		ASSERT(zc->zc_hash == 0);
+		ASSERT0(zc->zc_hash);
 		hb = zap_hashbits(zc->zc_zap);
 		zc->zc_hash = zc->zc_serialized << (64 - hb);
 		zc->zc_cd += zc->zc_serialized >> hb;
diff --git a/sys/contrib/openzfs/module/zfs/zcp.c b/sys/contrib/openzfs/module/zfs/zcp.c
index 6960ea360b15..c6684f453e95 100644
--- a/sys/contrib/openzfs/module/zfs/zcp.c
+++ b/sys/contrib/openzfs/module/zfs/zcp.c
@@ -765,7 +765,7 @@ zcp_lua_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
 			return (NULL);
 		}
 		(void) memcpy(luabuf, ptr, osize);
-		VERIFY3P(zcp_lua_alloc(ud, ptr, osize, 0), ==, NULL);
+		VERIFY0P(zcp_lua_alloc(ud, ptr, osize, 0));
 		return (luabuf);
 	}
 }
@@ -1175,7 +1175,7 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync,
 	for (nvpair_t *pair = nvlist_next_nvpair(runinfo.zri_new_zvols, NULL);
 	    pair != NULL;
 	    pair = nvlist_next_nvpair(runinfo.zri_new_zvols, pair)) {
-		zvol_create_minor(nvpair_name(pair));
+		zvol_create_minors(nvpair_name(pair));
 	}
 	fnvlist_free(runinfo.zri_new_zvols);
 
diff --git a/sys/contrib/openzfs/module/zfs/zfeature.c b/sys/contrib/openzfs/module/zfs/zfeature.c
index 7dfe00d42a08..0816ea134bf3 100644
--- a/sys/contrib/openzfs/module/zfs/zfeature.c
+++ b/sys/contrib/openzfs/module/zfs/zfeature.c
@@ -210,8 +210,8 @@ spa_features_check(spa_t *spa, boolean_t for_write,
 				    za->za_name, 1, MAXPATHLEN, buf) == 0)
 					desc = buf;
 
-				VERIFY(nvlist_add_string(unsup_feat,
-				    za->za_name, desc) == 0);
+				VERIFY0(nvlist_add_string(unsup_feat,
+				    za->za_name, desc));
 			}
 		}
 	}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_chksum.c b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
index 5c92be21c0c8..21852bf3d865 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_chksum.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
@@ -32,9 +32,6 @@
 #include <sys/blake3.h>
 #include <sys/sha2.h>
 
-/* limit benchmarking to max 256KiB, when EdonR is slower then this: */
-#define	LIMIT_PERF_MBS	300
-
 typedef struct {
 	const char *name;
 	const char *impl;
@@ -52,9 +49,15 @@ typedef struct {
 	zio_checksum_tmpl_free_t *(free);
 } chksum_stat_t;
 
+#define	AT_STARTUP	0
+#define	AT_BENCHMARK	1
+#define	AT_DONE		2
+
 static chksum_stat_t *chksum_stat_data = 0;
-static int chksum_stat_cnt = 0;
 static kstat_t *chksum_kstat = NULL;
+static int chksum_stat_limit = AT_STARTUP;
+static int chksum_stat_cnt = 0;
+static void chksum_benchmark(void);
 
 /*
  * Sample output on i3-1005G1 System:
@@ -129,6 +132,9 @@ chksum_kstat_data(char *buf, size_t size, void *data)
 static void *
 chksum_kstat_addr(kstat_t *ksp, loff_t n)
 {
+	/* full benchmark */
+	chksum_benchmark();
+
 	if (n < chksum_stat_cnt)
 		ksp->ks_private = (void *)(chksum_stat_data + n);
 	else
@@ -176,47 +182,36 @@ chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
 	kpreempt_enable();
 
 	run_bw = size * run_count * NANOSEC;
-	run_bw /= run_time_ns;	/* B/s */
+	run_bw /= run_time_ns; /* B/s */
 	*result = run_bw/1024/1024; /* MiB/s */
 }
 
-#define	LIMIT_INIT	0
-#define	LIMIT_NEEDED	1
-#define	LIMIT_NOLIMIT	2
-
 static void
 chksum_benchit(chksum_stat_t *cs)
 {
 	abd_t *abd;
 	void *ctx = 0;
 	void *salt = &cs->salt.zcs_bytes;
-	static int chksum_stat_limit = LIMIT_INIT;
 
 	memset(salt, 0, sizeof (cs->salt.zcs_bytes));
 	if (cs->init)
 		ctx = cs->init(&cs->salt);
 
+	/* benchmarks in startup mode */
+	if (chksum_stat_limit == AT_STARTUP) {
+		abd = abd_alloc_linear(1<<18, B_FALSE);
+		chksum_run(cs, abd, ctx, 5, &cs->bs256k);
+		goto done;
+	}
+
 	/* allocate test memory via abd linear interface */
 	abd = abd_alloc_linear(1<<20, B_FALSE);
+
+	/* benchmarks when requested */
 	chksum_run(cs, abd, ctx, 1, &cs->bs1k);
 	chksum_run(cs, abd, ctx, 2, &cs->bs4k);
 	chksum_run(cs, abd, ctx, 3, &cs->bs16k);
 	chksum_run(cs, abd, ctx, 4, &cs->bs64k);
-	chksum_run(cs, abd, ctx, 5, &cs->bs256k);
-
-	/* check if we ran on a slow cpu */
-	if (chksum_stat_limit == LIMIT_INIT) {
-		if (cs->bs1k < LIMIT_PERF_MBS) {
-			chksum_stat_limit = LIMIT_NEEDED;
-		} else {
-			chksum_stat_limit = LIMIT_NOLIMIT;
-		}
-	}
-
-	/* skip benchmarks >= 1MiB when the CPU is to slow */
-	if (chksum_stat_limit == LIMIT_NEEDED)
-		goto abort;
-
 	chksum_run(cs, abd, ctx, 6, &cs->bs1m);
 	abd_free(abd);
 
@@ -225,7 +220,7 @@ chksum_benchit(chksum_stat_t *cs)
 	chksum_run(cs, abd, ctx, 7, &cs->bs4m);
 	chksum_run(cs, abd, ctx, 8, &cs->bs16m);
 
-abort:
+done:
 	abd_free(abd);
 
 	/* free up temp memory */
@@ -243,7 +238,6 @@ chksum_benchmark(void)
 	/* we need the benchmark only for the kernel module */
 	return;
 #endif
-
 	chksum_stat_t *cs;
 	uint64_t max;
 	uint32_t id, cbid = 0, id_save;
@@ -251,8 +245,14 @@ chksum_benchmark(void)
 	const zfs_impl_t *sha256 = zfs_impl_get_ops("sha256");
 	const zfs_impl_t *sha512 = zfs_impl_get_ops("sha512");
 
+	/* benchmarks are done */
+	if (chksum_stat_limit == AT_DONE)
+		return;
+
+
 	/* count implementations */
-	chksum_stat_cnt = 2;
+	chksum_stat_cnt = 1;  /* edonr */
+	chksum_stat_cnt += 1; /* skein */
 	chksum_stat_cnt += sha256->getcnt();
 	chksum_stat_cnt += sha512->getcnt();
 	chksum_stat_cnt += blake3->getcnt();
@@ -332,6 +332,17 @@ chksum_benchmark(void)
 		}
 	}
 	blake3->setid(id_save);
+
+	switch (chksum_stat_limit) {
+	case AT_STARTUP:
+		/* next time we want a full benchmark */
+		chksum_stat_limit = AT_BENCHMARK;
+		break;
+	case AT_BENCHMARK:
+		/* no further benchmarks */
+		chksum_stat_limit = AT_DONE;
+		break;
+	}
 }
 
 void
@@ -341,7 +352,7 @@ chksum_init(void)
 	blake3_per_cpu_ctx_init();
 #endif
 
-	/* Benchmark supported implementations */
+	/* 256KiB benchmark */
 	chksum_benchmark();
 
 	/* Install kstats for all implementations */
diff --git a/sys/contrib/openzfs/module/zfs/zfs_crrd.c b/sys/contrib/openzfs/module/zfs/zfs_crrd.c
new file mode 100644
index 000000000000..f9267ed41d71
--- /dev/null
+++ b/sys/contrib/openzfs/module/zfs/zfs_crrd.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2024 Klara Inc.
+ *
+ * This software was developed by
+ * Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
+ * Fred Weigel <fred.weigel@klarasystems.com>
+ * under sponsorship from Wasabi Technology, Inc. and Klara Inc.
+ */
+/*
+ * This file implements a round-robin database that stores timestamps and txg
+ * numbers. Due to limited space, we use a round-robin approach, where
+ * the oldest records are overwritten when there is no longer enough room.
+ * This is a best-effort mechanism, and the database should be treated as
+ * an approximation. Consider this before consuming it.
+ *
+ * The database is linear, meaning we assume each new entry is newer than the
+ * ones already stored. Because of this, if time is manipulated, the database
+ * will only accept records that are newer than the existing ones.
+ * (For example, jumping 10 years into the future and then back can lead to
+ * situation when for 10 years we wont write anything to database)
+ *
+ * All times stored in the database use UTC, which makes it easy to convert to
+ * and from local time.
+ *
+ * Each database holds 256 records (as defined in the `RRD_MAX_ENTRIES` macro).
+ * This limit comes from the maximum size of a ZAP object, where we store the
+ * binary blob.
+ *
+ * We've split the database into three smaller ones.
+ * The `minute database` provides high resolution (default: every 10 minutes),
+ * but only covers approximately 1.5 days. This gives a detailed view of recent
+ * activity, useful, for example, when performing a scrub of the last hour.
+ * The `daily database` records one txg per day. With 256 entries, it retains
+ * roughly 8 months of data. This allows users to scrub or analyze txgs across
+ * a range of days.
+ * The `monthly database` stores one record per month, giving approximately
+ * 21 years of history.
+ * All these calculations assume the worst-case scenario: the pool is always
+ * online and actively written to.
+ *
+ * A potential source of confusion is that the database does not store data
+ * while the pool is offline, leading to potential gaps in timeline. Also,
+ * the database contains no records from before this feature was enabled.
+ * Both, upon reflection, are expected.
+ */
+#include <sys/zfs_context.h>
+
+#include "zfs_crrd.h"
+
+rrd_data_t *
+rrd_tail_entry(rrd_t *rrd)
+{
+	size_t n;
+
+	if (rrd_len(rrd) == 0)
+		return (NULL);
+
+	if (rrd->rrd_tail == 0)
+		n = RRD_MAX_ENTRIES - 1;
+	else
+		n = rrd->rrd_tail - 1;
+
+	return (&rrd->rrd_entries[n]);
+}
+
+uint64_t
+rrd_tail(rrd_t *rrd)
+{
+	const rrd_data_t *tail;
+
+	tail = rrd_tail_entry(rrd);
+
+	return (tail == NULL ? 0 : tail->rrdd_time);
+}
+
+/*
+ * Return length of data in the rrd.
+ * rrd_get works from 0..rrd_len()-1.
+ */
+size_t
+rrd_len(rrd_t *rrd)
+{
+
+	return (rrd->rrd_length);
+}
+
+const rrd_data_t *
+rrd_entry(rrd_t *rrd, size_t i)
+{
+	size_t n;
+
+	if (i >= rrd_len(rrd)) {
+		return (0);
+	}
+
+	n = (rrd->rrd_head + i) % RRD_MAX_ENTRIES;
+	return (&rrd->rrd_entries[n]);
+}
+
+uint64_t
+rrd_get(rrd_t *rrd, size_t i)
+{
+	const rrd_data_t *data = rrd_entry(rrd, i);
+
+	return (data == NULL ? 0 : data->rrdd_txg);
+}
+
+/* Add value to database. */
+void
+rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg)
+{
+	rrd_data_t *tail;
+
+	tail = rrd_tail_entry(rrd);
+	if (tail != NULL && tail->rrdd_time == time) {
+		if (tail->rrdd_txg < txg) {
+			tail->rrdd_txg = txg;
+		} else {
+			return;
+		}
+	}
+
+	rrd->rrd_entries[rrd->rrd_tail].rrdd_time = time;
+	rrd->rrd_entries[rrd->rrd_tail].rrdd_txg = txg;
+
+	rrd->rrd_tail = (rrd->rrd_tail + 1) % RRD_MAX_ENTRIES;
+
+	if (rrd->rrd_length < RRD_MAX_ENTRIES) {
+		rrd->rrd_length++;
+	} else {
+		rrd->rrd_head = (rrd->rrd_head + 1) % RRD_MAX_ENTRIES;
+	}
+}
+
+void
+dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg)
+{
+	hrtime_t daydiff, monthdiff, minutedif;
+
+	minutedif = time - rrd_tail(&db->dbr_minutes);
+	daydiff = time - rrd_tail(&db->dbr_days);
+	monthdiff = time - rrd_tail(&db->dbr_months);
+
+	if (monthdiff >= 0 && monthdiff >= SEC2NSEC(30 * 24 * 60 * 60))
+		rrd_add(&db->dbr_months, time, txg);
+	else if (daydiff >= 0 && daydiff >= SEC2NSEC(24 * 60 * 60))
+		rrd_add(&db->dbr_days, time, txg);
+	else if (minutedif >= 0)
+		rrd_add(&db->dbr_minutes, time, txg);
+}
+
+/*
+ * We could do a binary search here, but the routine isn't frequently
+ * called and the data is small so we stick to a simple loop.
+ */
+static const rrd_data_t *
+rrd_query(rrd_t *rrd, hrtime_t tv, dbrrd_rounding_t rounding)
+{
+	const rrd_data_t *data = NULL;
+
+	for (size_t i = 0; i < rrd_len(rrd); i++) {
+		const rrd_data_t *cur = rrd_entry(rrd, i);
+
+		if (rounding == DBRRD_FLOOR) {
+			if (tv < cur->rrdd_time) {
+				break;
+			}
+			data = cur;
+		} else {
+			/* DBRRD_CEILING */
+			if (tv <= cur->rrdd_time) {
+				data = cur;
+				break;
+			}
+		}
+	}
+
+	return (data);
+}
+
+static const rrd_data_t *
+dbrrd_closest(hrtime_t tv, const rrd_data_t *r1, const rrd_data_t *r2)
+{
+
+	if (r1 == NULL)
+		return (r2);
+	if (r2 == NULL)
+		return (r1);
+
+	return (ABS(tv - r1->rrdd_time) < ABS(tv - r2->rrdd_time) ? r1 : r2);
+}
+
+uint64_t
+dbrrd_query(dbrrd_t *r, hrtime_t tv, dbrrd_rounding_t rounding)
+{
+	const rrd_data_t *data, *dm, *dd, *dy;
+
+	data = NULL;
+	dm = rrd_query(&r->dbr_minutes, tv, rounding);
+	dd = rrd_query(&r->dbr_days, tv, rounding);
+	dy = rrd_query(&r->dbr_months, tv, rounding);
+
+	data = dbrrd_closest(tv, dbrrd_closest(tv, dd, dm), dy);
+
+	return (data == NULL ? 0 : data->rrdd_txg);
+}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_fuid.c b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
index 10a6d289fbf8..2af1efe82e62 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_fuid.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
@@ -112,8 +112,7 @@ zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
 	uint64_t fuid_size;
 
 	ASSERT(fuid_obj != 0);
-	VERIFY(0 == dmu_bonus_hold(os, fuid_obj,
-	    FTAG, &db));
+	VERIFY0(dmu_bonus_hold(os, fuid_obj, FTAG, &db));
 	fuid_size = *(uint64_t *)db->db_data;
 	dmu_buf_rele(db, FTAG);
 
@@ -125,22 +124,21 @@ zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
 		int i;
 
 		packed = kmem_alloc(fuid_size, KM_SLEEP);
-		VERIFY(dmu_read(os, fuid_obj, 0,
-		    fuid_size, packed, DMU_READ_PREFETCH) == 0);
-		VERIFY(nvlist_unpack(packed, fuid_size,
-		    &nvp, 0) == 0);
-		VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
-		    &fuidnvp, &count) == 0);
+		VERIFY0(dmu_read(os, fuid_obj, 0,
+		    fuid_size, packed, DMU_READ_PREFETCH));
+		VERIFY0(nvlist_unpack(packed, fuid_size, &nvp, 0));
+		VERIFY0(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
+		    &fuidnvp, &count));
 
 		for (i = 0; i != count; i++) {
 			fuid_domain_t *domnode;
 			const char *domain;
 			uint64_t idx;
 
-			VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
-			    &domain) == 0);
-			VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
-			    &idx) == 0);
+			VERIFY0(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
+			    &domain));
+			VERIFY0(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
+			    &idx));
 
 			domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
 
@@ -246,35 +244,33 @@ zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
 		    &zfsvfs->z_fuid_obj, tx) == 0);
 	}
 
-	VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	VERIFY0(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP));
 
 	numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
 	fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
 	for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
 	    domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
-		VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
-		VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
-		    domnode->f_idx) == 0);
-		VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
-		VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
-		    domnode->f_ksid->kd_name) == 0);
+		VERIFY0(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP));
+		VERIFY0(nvlist_add_uint64(fuids[i], FUID_IDX,
+		    domnode->f_idx));
+		VERIFY0(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0));
+		VERIFY0(nvlist_add_string(fuids[i], FUID_DOMAIN,
+		    domnode->f_ksid->kd_name));
 	}
 	fnvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
 	    (const nvlist_t * const *)fuids, numnodes);
 	for (i = 0; i != numnodes; i++)
 		nvlist_free(fuids[i]);
 	kmem_free(fuids, numnodes * sizeof (void *));
-	VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
+	VERIFY0(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR));
 	packed = kmem_alloc(nvsize, KM_SLEEP);
-	VERIFY(nvlist_pack(nvp, &packed, &nvsize,
-	    NV_ENCODE_XDR, KM_SLEEP) == 0);
+	VERIFY0(nvlist_pack(nvp, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP));
 	nvlist_free(nvp);
 	zfsvfs->z_fuid_size = nvsize;
 	dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
 	    zfsvfs->z_fuid_size, packed, tx);
 	kmem_free(packed, zfsvfs->z_fuid_size);
-	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
-	    FTAG, &db));
+	VERIFY0(dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, FTAG, &db));
 	dmu_buf_will_dirty(db, tx);
 	*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
 	dmu_buf_rele(db, FTAG);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index ebb1cfd07125..121b966b9864 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -1493,7 +1493,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
 			goto pool_props_bad;
 		(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
 
-		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		VERIFY0(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP));
 		error = zfs_fill_zplprops_root(version, rootprops,
 		    zplprops, NULL);
 		if (error != 0)
@@ -1704,6 +1704,8 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc)
 static const zfs_ioc_key_t zfs_keys_pool_scrub[] = {
 	{"scan_type",		DATA_TYPE_UINT64,	0},
 	{"scan_command",	DATA_TYPE_UINT64,	0},
+	{"scan_date_start",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
+	{"scan_date_end",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 };
 
 static int
@@ -1712,6 +1714,7 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 	spa_t *spa;
 	int error;
 	uint64_t scan_type, scan_cmd;
+	uint64_t date_start, date_end;
 
 	if (nvlist_lookup_uint64(innvl, "scan_type", &scan_type) != 0)
 		return (SET_ERROR(EINVAL));
@@ -1721,6 +1724,11 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 	if (scan_cmd >= POOL_SCRUB_FLAGS_END)
 		return (SET_ERROR(EINVAL));
 
+	if (nvlist_lookup_uint64(innvl, "scan_date_start", &date_start) != 0)
+		date_start = 0;
+	if (nvlist_lookup_uint64(innvl, "scan_date_end", &date_end) != 0)
+		date_end = 0;
+
 	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
 		return (error);
 
@@ -1732,7 +1740,24 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 		error = spa_scan_range(spa, scan_type,
 		    spa_get_last_scrubbed_txg(spa), 0);
 	} else {
-		error = spa_scan(spa, scan_type);
+		uint64_t txg_start, txg_end;
+
+		txg_start = txg_end = 0;
+		if (date_start != 0 || date_end != 0) {
+			mutex_enter(&spa->spa_txg_log_time_lock);
+			if (date_start != 0) {
+				txg_start = dbrrd_query(&spa->spa_txg_log_time,
+				    date_start, DBRRD_FLOOR);
+			}
+
+			if (date_end != 0) {
+				txg_end = dbrrd_query(&spa->spa_txg_log_time,
+				    date_end, DBRRD_CEILING);
+			}
+			mutex_exit(&spa->spa_txg_log_time_lock);
+		}
+
+		error = spa_scan_range(spa, scan_type, txg_start, txg_end);
 	}
 
 	spa_close(spa, FTAG);
@@ -2220,7 +2245,7 @@ nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
 	 */
 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
 		return (error);
-	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
+	VERIFY0(nvlist_add_uint64(props, zfs_prop_to_name(prop), value));
 	return (0);
 }
 
@@ -2255,7 +2280,7 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
 	    dmu_objset_type(os) == DMU_OST_ZFS) {
 		nvlist_t *nv;
 
-		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		VERIFY0(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP));
 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
@@ -2458,7 +2483,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
-		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
+		VERIFY0(nvpair_value_nvlist(pair, &attrs));
 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) != 0)
 			return (SET_ERROR(EINVAL));
@@ -2513,9 +2538,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
-		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
-		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
-		    &pair) == 0);
+		VERIFY0(nvpair_value_nvlist(pair, &attrs));
+		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair));
 	}
 
 	/* all special properties are numeric except for keylocation */
@@ -2907,14 +2931,14 @@ props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
 {
 	nvpair_t *pair;
 
-	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	VERIFY0(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP));
 
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
 		if (nvlist_exists(skipped, nvpair_name(pair)))
 			continue;
 
-		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
+		VERIFY0(nvlist_add_nvpair(*newprops, pair));
 	}
 }
 
@@ -3039,11 +3063,11 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
 
 	switch (type) {
 	case PROP_TYPE_STRING:
-		VERIFY(0 == nvlist_add_string(dummy, propname, ""));
+		VERIFY0(nvlist_add_string(dummy, propname, ""));
 		break;
 	case PROP_TYPE_NUMBER:
 	case PROP_TYPE_INDEX:
-		VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
+		VERIFY0(nvlist_add_uint64(dummy, propname, 0));
 		break;
 	default:
 		err = SET_ERROR(EINVAL);
@@ -3429,14 +3453,14 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
 	/*
 	 * Put the version in the zplprops
 	 */
-	VERIFY(nvlist_add_uint64(zplprops,
-	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
+	VERIFY0(nvlist_add_uint64(zplprops,
+	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver));
 
 	if (norm == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
 		return (error);
-	VERIFY(nvlist_add_uint64(zplprops,
-	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
+	VERIFY0(nvlist_add_uint64(zplprops,
+	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm));
 
 	/*
 	 * If we're normalizing, names must always be valid UTF-8 strings.
@@ -3446,55 +3470,55 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
 	if (u8 == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
 		return (error);
-	VERIFY(nvlist_add_uint64(zplprops,
-	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
+	VERIFY0(nvlist_add_uint64(zplprops,
+	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8));
 
 	if (sense == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
 		return (error);
-	VERIFY(nvlist_add_uint64(zplprops,
-	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
+	VERIFY0(nvlist_add_uint64(zplprops,
+	    zfs_prop_to_name(ZFS_PROP_CASE), sense));
 
 	if (duq == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSERQUOTA, &duq)) != 0)
 		return (error);
-	VERIFY(nvlist_add_uint64(zplprops,
-	    zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), duq) == 0);
+	VERIFY0(nvlist_add_uint64(zplprops,
+	    zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), duq));
 
 	if (dgq == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPQUOTA,
 	    &dgq)) != 0)
 		return (error);
-	VERIFY(nvlist_add_uint64(zplprops,
-	    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), dgq) == 0);
+	VERIFY0(nvlist_add_uint64(zplprops,
+	    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), dgq));
 
 	if (dpq == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTQUOTA,
 	    &dpq)) != 0)
 		return (error);
-	VERIFY(nvlist_add_uint64(zplprops,
-	    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), dpq) == 0);
+	VERIFY0(nvlist_add_uint64(zplprops,
+	    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), dpq));
 
 	if (duoq == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSEROBJQUOTA,
 	    &duoq)) != 0)
 		return (error);
-	VERIFY(nvlist_add_uint64(zplprops,
-	    zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), duoq) == 0);
+	VERIFY0(nvlist_add_uint64(zplprops,
+	    zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), duoq));
 
 	if (dgoq == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPOBJQUOTA,
 	    &dgoq)) != 0)
 		return (error);
-	VERIFY(nvlist_add_uint64(zplprops,
-	    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), dgoq) == 0);
+	VERIFY0(nvlist_add_uint64(zplprops,
+	    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), dgoq));
 
 	if (dpoq == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTOBJQUOTA,
 	    &dpoq)) != 0)
 		return (error);
-	VERIFY(nvlist_add_uint64(zplprops,
-	    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), dpoq) == 0);
+	VERIFY0(nvlist_add_uint64(zplprops,
+	    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), dpoq));
 
 	if (is_ci)
 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
@@ -3643,8 +3667,8 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 		 * file system creation, so go figure them out
 		 * now.
 		 */
-		VERIFY(nvlist_alloc(&zct.zct_zplprops,
-		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		VERIFY0(nvlist_alloc(&zct.zct_zplprops,
+		    NV_UNIQUE_NAME, KM_SLEEP));
 		error = zfs_fill_zplprops(fsname, nvprops,
 		    zct.zct_zplprops, &is_insensitive);
 		if (error != 0) {
@@ -4891,9 +4915,8 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 		 * format.
 		 */
 		nvlist_t *attrs;
-		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
-		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
-		    &pair) == 0);
+		VERIFY0(nvpair_value_nvlist(pair, &attrs));
+		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair));
 	}
 
 	/*
@@ -5000,15 +5023,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 		}
 		break;
 
-	case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
-		/*
-		 * This property could require the allocation classes
-		 * feature to be active for setting, however we allow
-		 * it so that tests of settable properties succeed.
-		 * The CLI will issue a warning in this case.
-		 */
-		break;
-
 	case ZFS_PROP_SHARESMB:
 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
 			return (SET_ERROR(ENOTSUP));
@@ -5087,7 +5101,7 @@ zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
 	if (props == NULL)
 		return (0);
 
-	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	VERIFY0(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP));
 
 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
 	(void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
@@ -5099,9 +5113,8 @@ zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
 		    sizeof (zc->zc_value));
 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
-			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
-			VERIFY(nvlist_add_int32(errors,
-			    zc->zc_value, err) == 0);
+			VERIFY0(nvlist_remove_nvpair(props, pair));
+			VERIFY0(nvlist_add_int32(errors, zc->zc_value, err));
 		}
 		pair = next_pair;
 	}
@@ -5111,7 +5124,7 @@ zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
 		nvlist_free(errors);
 		errors = NULL;
 	} else {
-		VERIFY(nvpair_value_int32(pair, &rv) == 0);
+		VERIFY0(nvpair_value_int32(pair, &rv));
 	}
 
 	if (errlist == NULL)
@@ -5128,16 +5141,14 @@ propval_equals(nvpair_t *p1, nvpair_t *p2)
 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
 		/* dsl_prop_get_all_impl() format */
 		nvlist_t *attrs;
-		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
-		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
-		    &p1) == 0);
+		VERIFY0(nvpair_value_nvlist(p1, &attrs));
+		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p1));
 	}
 
 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
-		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
-		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
-		    &p2) == 0);
+		VERIFY0(nvpair_value_nvlist(p2, &attrs));
+		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p2));
 	}
 
 	if (nvpair_type(p1) != nvpair_type(p2))
@@ -5146,14 +5157,14 @@ propval_equals(nvpair_t *p1, nvpair_t *p2)
 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
 		const char *valstr1, *valstr2;
 
-		VERIFY(nvpair_value_string(p1, &valstr1) == 0);
-		VERIFY(nvpair_value_string(p2, &valstr2) == 0);
+		VERIFY0(nvpair_value_string(p1, &valstr1));
+		VERIFY0(nvpair_value_string(p2, &valstr2));
 		return (strcmp(valstr1, valstr2) == 0);
 	} else {
 		uint64_t intval1, intval2;
 
-		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
-		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
+		VERIFY0(nvpair_value_uint64(p1, &intval1));
+		VERIFY0(nvpair_value_uint64(p2, &intval2));
 		return (intval1 == intval2);
 	}
 }
@@ -5221,7 +5232,7 @@ extract_delay_props(nvlist_t *props)
 	};
 	int i;
 
-	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	VERIFY0(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP));
 
 	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
 	    nvp = nvlist_next_nvpair(props, nvp)) {
@@ -5237,8 +5248,8 @@ extract_delay_props(nvlist_t *props)
 		}
 		if (delayable[i] != 0) {
 			tmp = nvlist_prev_nvpair(props, nvp);
-			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
-			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
+			VERIFY0(nvlist_add_nvpair(delayprops, nvp));
+			VERIFY0(nvlist_remove_nvpair(props, nvp));
 			nvp = tmp;
 		}
 	}
@@ -5469,15 +5480,15 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin,
 	 * using ASSERT() will be just like a VERIFY.
 	 */
 	if (recv_delayprops != NULL) {
-		ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
+		ASSERT0(nvlist_merge(recvprops, recv_delayprops, 0));
 		nvlist_free(recv_delayprops);
 	}
 	if (local_delayprops != NULL) {
-		ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
+		ASSERT0(nvlist_merge(localprops, local_delayprops, 0));
 		nvlist_free(local_delayprops);
 	}
 	if (inherited_delayprops != NULL) {
-		ASSERT(nvlist_merge(localprops, inherited_delayprops, 0) == 0);
+		ASSERT0(nvlist_merge(localprops, inherited_delayprops, 0));
 		nvlist_free(inherited_delayprops);
 	}
 	*read_bytes = off - noff;
@@ -7326,8 +7337,8 @@ zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
 
 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
-	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
-	ASSERT3P(vec->zvec_func, ==, NULL);
+	ASSERT0P(vec->zvec_legacy_func);
+	ASSERT0P(vec->zvec_func);
 
 	vec->zvec_legacy_func = func;
 	vec->zvec_secpolicy = secpolicy;
@@ -7350,8 +7361,8 @@ zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
 
 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
-	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
-	ASSERT3P(vec->zvec_func, ==, NULL);
+	ASSERT0P(vec->zvec_legacy_func);
+	ASSERT0P(vec->zvec_func);
 
 	/* if we are logging, the name must be valid */
 	ASSERT(!allow_log || namecheck != NO_NAME);
@@ -8132,7 +8143,7 @@ zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
 		spa_t *spa;
 		nvlist_t *lognv = NULL;
 
-		ASSERT(vec->zvec_legacy_func == NULL);
+		ASSERT0P(vec->zvec_legacy_func);
 
 		/*
 		 * Add the innvl to the lognv before calling the func,
diff --git a/sys/contrib/openzfs/module/zfs/zfs_log.c b/sys/contrib/openzfs/module/zfs/zfs_log.c
index 2ce25b72b288..ea17e049279f 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_log.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_log.c
@@ -607,8 +607,6 @@ zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
  * called as soon as the write is on stable storage (be it via a DMU sync or a
  * ZIL commit).
  */
-static uint_t zfs_immediate_write_sz = 32768;
-
 void
 zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
     znode_t *zp, offset_t off, ssize_t resid, boolean_t commit,
@@ -622,19 +620,12 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
 	if (zil_replaying(zilog, tx) || zp->z_unlinked ||
 	    zfs_xattr_owner_unlinked(zp)) {
 		if (callback != NULL)
-			callback(callback_data);
+			callback(callback_data, 0);
 		return;
 	}
 
-	if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT || o_direct)
-		write_state = WR_INDIRECT;
-	else if (!spa_has_slogs(zilog->zl_spa) &&
-	    resid >= zfs_immediate_write_sz)
-		write_state = WR_INDIRECT;
-	else if (commit)
-		write_state = WR_COPIED;
-	else
-		write_state = WR_NEED_COPY;
+	write_state = zil_write_state(zilog, resid, blocksize, o_direct,
+	    commit);
 
 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
 	    sizeof (gen));
@@ -672,7 +663,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
 			    DMU_KEEP_CACHING);
 			DB_DNODE_EXIT(db);
 			if (err != 0) {
-				zil_itx_destroy(itx);
+				zil_itx_destroy(itx, 0);
 				itx = zil_itx_create(txtype, sizeof (*lr));
 				lr = (lr_write_t *)&itx->itx_lr;
 				wr_state = WR_NEED_COPY;
@@ -938,6 +929,3 @@ zfs_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp,
 		len -= partlen;
 	}
 }
-
-ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, UINT, ZMOD_RW,
-	"Largest data block to write to zil");
diff --git a/sys/contrib/openzfs/module/zfs/zfs_quota.c b/sys/contrib/openzfs/module/zfs/zfs_quota.c
index b8fe512d4f09..2e91ccc27d6d 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_quota.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_quota.c
@@ -374,7 +374,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 	if (*objp == 0) {
 		*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
 		    DMU_OT_NONE, 0, tx);
-		VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
+		VERIFY0(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
 		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
 	}
 	mutex_exit(&zfsvfs->z_lock);
@@ -386,7 +386,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 	} else {
 		err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
 	}
-	ASSERT(err == 0);
+	ASSERT0(err);
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 	dmu_tx_commit(tx);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_rlock.c b/sys/contrib/openzfs/module/zfs/zfs_rlock.c
index 53eb3ef1b66e..4035baff77d6 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_rlock.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_rlock.c
@@ -666,7 +666,7 @@ zfs_rangelock_reduce(zfs_locked_range_t *lr, uint64_t off, uint64_t len)
 
 	/* Ensure there are no other locks */
 	ASSERT3U(avl_numnodes(&rl->rl_tree), ==, 1);
-	ASSERT3U(lr->lr_offset, ==, 0);
+	ASSERT0(lr->lr_offset);
 	ASSERT3U(lr->lr_type, ==, RL_WRITER);
 	ASSERT(!lr->lr_proxy);
 	ASSERT3U(lr->lr_length, ==, UINT64_MAX);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_sa.c b/sys/contrib/openzfs/module/zfs/zfs_sa.c
index 59b6ae4e4203..8b4fc6fd7fbd 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_sa.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_sa.c
@@ -169,7 +169,7 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
 	ASSERT(MUTEX_HELD(&zp->z_lock));
 	VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
 	if (zp->z_is_sa)
-		VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
+		VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
 		    &xoap->xoa_av_scanstamp,
 		    sizeof (xoap->xoa_av_scanstamp), tx));
 	else {
@@ -181,12 +181,12 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
 		len = sizeof (xoap->xoa_av_scanstamp) +
 		    ZFS_OLD_ZNODE_PHYS_SIZE;
 		if (len > doi.doi_bonus_size)
-			VERIFY(dmu_set_bonus(db, len, tx) == 0);
+			VERIFY0(dmu_set_bonus(db, len, tx));
 		(void) memcpy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
 		    xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp));
 
 		zp->z_pflags |= ZFS_BONUS_SCANSTAMP;
-		VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+		VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
 		    &zp->z_pflags, sizeof (uint64_t), tx));
 	}
 }
@@ -286,7 +286,7 @@ zfs_sa_set_xattr(znode_t *zp, const char *name, const void *value, size_t vsize)
 
 		dmu_tx_commit(tx);
 		if (logsaxattr && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-			zil_commit(zilog, 0);
+			error = zil_commit(zilog, 0);
 	}
 out_free:
 	vmem_free(obj, size);
@@ -427,11 +427,10 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
 		zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP;
 	}
 
-	VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
-	VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs,
-	    count, tx) == 0);
+	VERIFY0(dmu_set_bonustype(db, DMU_OT_SA, tx));
+	VERIFY0(sa_replace_all_by_template_locked(hdl, sa_attrs, count, tx));
 	if (znode_acl.z_acl_extern_obj)
-		VERIFY(0 == dmu_object_free(zfsvfs->z_os,
+		VERIFY0(dmu_object_free(zfsvfs->z_os,
 		    znode_acl.z_acl_extern_obj, tx));
 
 	zp->z_is_sa = B_TRUE;
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index 656ca4dc22ff..7bb9ba57c69e 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -27,6 +27,7 @@
  * Copyright 2017 Nexenta Systems, Inc.
  * Copyright (c) 2021, 2022 by Pawel Jakub Dawidek
  * Copyright (c) 2025, Rob Norris <robn@despairlabs.com>
+ * Copyright (c) 2025, Klara, Inc.
  */
 
 /* Portions Copyright 2007 Jeremy Teo */
@@ -49,6 +50,7 @@
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_crypt.h>
+#include <sys/dsl_dataset.h>
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/dbuf.h>
@@ -67,13 +69,14 @@
 int zfs_bclone_enabled = 1;
 
 /*
- * When set zfs_clone_range() waits for dirty data to be written to disk.
- * This allows the clone operation to reliably succeed when a file is modified
- * and then immediately cloned. For small files this may be slower than making
- * a copy of the file and is therefore not the default.  However, in certain
- * scenarios this behavior may be desirable so a tunable is provided.
+ * When set to 1 the FICLONE and FICLONERANGE ioctls will wait for any dirty
+ * data to be written to disk before proceeding. This ensures that the clone
+ * operation reliably succeeds, even if a file is modified and then immediately
+ * cloned. Note that for small files this may be slower than simply copying
+ * the file. When set to 0 the clone operation will immediately fail if it
+ * encounters any dirty blocks. By default waiting is enabled.
  */
-int zfs_bclone_wait_dirty = 0;
+int zfs_bclone_wait_dirty = 1;
 
 /*
  * Enable Direct I/O. If this setting is 0, then all I/O requests will be
@@ -114,9 +117,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
 	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
 		if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 			return (error);
-		atomic_inc_32(&zp->z_sync_writes_cnt);
-		zil_commit(zfsvfs->z_log, zp->z_id);
-		atomic_dec_32(&zp->z_sync_writes_cnt);
+		error = zil_commit(zfsvfs->z_log, zp->z_id);
 		zfs_exit(zfsvfs, FTAG);
 	}
 	return (error);
@@ -375,8 +376,13 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	frsync = !!(ioflag & FRSYNC);
 #endif
 	if (zfsvfs->z_log &&
-	    (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
-		zil_commit(zfsvfs->z_log, zp->z_id);
+	    (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) {
+		error = zil_commit(zfsvfs->z_log, zp->z_id);
+		if (error != 0) {
+			zfs_exit(zfsvfs, FTAG);
+			return (error);
+		}
+	}
 
 	/*
 	 * Lock the range against changes.
@@ -1074,8 +1080,13 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 		return (error);
 	}
 
-	if (commit)
-		zil_commit(zilog, zp->z_id);
+	if (commit) {
+		error = zil_commit(zilog, zp->z_id);
+		if (error != 0) {
+			zfs_exit(zfsvfs, FTAG);
+			return (error);
+		}
+	}
 
 	int64_t nwritten = start_resid - zfs_uio_resid(uio);
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
@@ -1102,13 +1113,21 @@ zfs_rewrite(znode_t *zp, uint64_t off, uint64_t len, uint64_t flags,
 {
 	int error;
 
-	if (flags != 0 || arg != 0)
+	if ((flags & ~ZFS_REWRITE_PHYSICAL) != 0 || arg != 0)
 		return (SET_ERROR(EINVAL));
 
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
+	/* Check if physical rewrite is allowed */
+	spa_t *spa = zfsvfs->z_os->os_spa;
+	if ((flags & ZFS_REWRITE_PHYSICAL) &&
+	    !spa_feature_is_enabled(spa, SPA_FEATURE_PHYSICAL_REWRITE)) {
+		zfs_exit(zfsvfs, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+
 	if (zfs_is_readonly(zfsvfs)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EROFS));
@@ -1196,7 +1215,10 @@ zfs_rewrite(znode_t *zp, uint64_t off, uint64_t len, uint64_t flags,
 			if (dmu_buf_is_dirty(dbp[i], tx))
 				continue;
 			nw += dbp[i]->db_size;
-			dmu_buf_will_dirty(dbp[i], tx);
+			if (flags & ZFS_REWRITE_PHYSICAL)
+				dmu_buf_will_rewrite(dbp[i], tx);
+			else
+				dmu_buf_will_dirty(dbp[i], tx);
 		}
 		dmu_buf_rele_array(dbp, numbufs, FTAG);
 
@@ -1249,8 +1271,8 @@ zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
 	zilog = zfsvfs->z_log;
 	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
 
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
+	if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		error = zil_commit(zilog, 0);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
@@ -1935,7 +1957,7 @@ unlock:
 		ZFS_ACCESSTIME_STAMP(inzfsvfs, inzp);
 
 		if (outos->os_sync == ZFS_SYNC_ALWAYS) {
-			zil_commit(zilog, outzp->z_id);
+			error = zil_commit(zilog, outzp->z_id);
 		}
 
 		*inoffp += done;
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index 00059b2c6de0..31b59c55f17b 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -24,6 +24,7 @@
  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright (c) 2018 Datto Inc.
+ * Copyright (c) 2025, Klara, Inc.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -103,6 +104,7 @@ static zil_kstat_values_t zil_stats = {
 	{ "zil_commit_error_count",		KSTAT_DATA_UINT64 },
 	{ "zil_commit_stall_count",		KSTAT_DATA_UINT64 },
 	{ "zil_commit_suspend_count",		KSTAT_DATA_UINT64 },
+	{ "zil_commit_crash_count",		KSTAT_DATA_UINT64 },
 	{ "zil_itx_count",			KSTAT_DATA_UINT64 },
 	{ "zil_itx_indirect_count",		KSTAT_DATA_UINT64 },
 	{ "zil_itx_indirect_bytes",		KSTAT_DATA_UINT64 },
@@ -145,7 +147,7 @@ static uint64_t zil_slog_bulk = 64 * 1024 * 1024;
 static kmem_cache_t *zil_lwb_cache;
 static kmem_cache_t *zil_zcw_cache;
 
-static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
+static int zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
 static itx_t *zil_itx_clone(itx_t *oitx);
 static uint64_t zil_max_waste_space(zilog_t *zilog);
 
@@ -367,6 +369,7 @@ zil_sums_init(zil_sums_t *zs)
 	wmsum_init(&zs->zil_commit_error_count, 0);
 	wmsum_init(&zs->zil_commit_stall_count, 0);
 	wmsum_init(&zs->zil_commit_suspend_count, 0);
+	wmsum_init(&zs->zil_commit_crash_count, 0);
 	wmsum_init(&zs->zil_itx_count, 0);
 	wmsum_init(&zs->zil_itx_indirect_count, 0);
 	wmsum_init(&zs->zil_itx_indirect_bytes, 0);
@@ -392,6 +395,7 @@ zil_sums_fini(zil_sums_t *zs)
 	wmsum_fini(&zs->zil_commit_error_count);
 	wmsum_fini(&zs->zil_commit_stall_count);
 	wmsum_fini(&zs->zil_commit_suspend_count);
+	wmsum_fini(&zs->zil_commit_crash_count);
 	wmsum_fini(&zs->zil_itx_count);
 	wmsum_fini(&zs->zil_itx_indirect_count);
 	wmsum_fini(&zs->zil_itx_indirect_bytes);
@@ -422,6 +426,8 @@ zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums)
 	    wmsum_value(&zil_sums->zil_commit_stall_count);
 	zs->zil_commit_suspend_count.value.ui64 =
 	    wmsum_value(&zil_sums->zil_commit_suspend_count);
+	zs->zil_commit_crash_count.value.ui64 =
+	    wmsum_value(&zil_sums->zil_commit_crash_count);
 	zs->zil_itx_count.value.ui64 =
 	    wmsum_value(&zil_sums->zil_itx_count);
 	zs->zil_itx_indirect_count.value.ui64 =
@@ -589,7 +595,7 @@ zil_clear_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
 	 * that we rewind to is invalid. Thus, we return -1 so
 	 * zil_parse() doesn't attempt to read it.
 	 */
-	if (BP_GET_LOGICAL_BIRTH(bp) >= first_txg)
+	if (BP_GET_BIRTH(bp) >= first_txg)
 		return (-1);
 
 	if (zil_bp_tree_add(zilog, bp) != 0)
@@ -615,7 +621,7 @@ zil_claim_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
 	 * Claim log block if not already committed and not already claimed.
 	 * If tx == NULL, just verify that the block is claimable.
 	 */
-	if (BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) < first_txg ||
+	if (BP_IS_HOLE(bp) || BP_GET_BIRTH(bp) < first_txg ||
 	    zil_bp_tree_add(zilog, bp) != 0)
 		return (0);
 
@@ -640,7 +646,7 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg)
 	 * waited for all writes to be stable first), so it is semantically
 	 * correct to declare this the end of the log.
 	 */
-	if (BP_GET_LOGICAL_BIRTH(&lr->lr_blkptr) >= first_txg) {
+	if (BP_GET_BIRTH(&lr->lr_blkptr) >= first_txg) {
 		error = zil_read_log_data(zilog, lr, NULL);
 		if (error != 0)
 			return (error);
@@ -687,7 +693,7 @@ zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx,
 		 * just in case lets be safe and just stop here now instead of
 		 * corrupting the pool.
 		 */
-		if (BP_GET_BIRTH(bp) >= first_txg)
+		if (BP_GET_PHYSICAL_BIRTH(bp) >= first_txg)
 			return (SET_ERROR(ENOENT));
 
 		/*
@@ -742,7 +748,7 @@ zil_free_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t claim_txg)
 	/*
 	 * If we previously claimed it, we need to free it.
 	 */
-	if (BP_GET_LOGICAL_BIRTH(bp) >= claim_txg &&
+	if (BP_GET_BIRTH(bp) >= claim_txg &&
 	    zil_bp_tree_add(zilog, bp) == 0 && !BP_IS_HOLE(bp)) {
 		zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
 	}
@@ -864,9 +870,9 @@ zil_free_lwb(zilog_t *zilog, lwb_t *lwb)
 	ASSERT(MUTEX_HELD(&zilog->zl_lock));
 	ASSERT(lwb->lwb_state == LWB_STATE_NEW ||
 	    lwb->lwb_state == LWB_STATE_FLUSH_DONE);
-	ASSERT3P(lwb->lwb_child_zio, ==, NULL);
-	ASSERT3P(lwb->lwb_write_zio, ==, NULL);
-	ASSERT3P(lwb->lwb_root_zio, ==, NULL);
+	ASSERT0P(lwb->lwb_child_zio);
+	ASSERT0P(lwb->lwb_write_zio);
+	ASSERT0P(lwb->lwb_root_zio);
 	ASSERT3U(lwb->lwb_alloc_txg, <=, spa_syncing_txg(zilog->zl_spa));
 	ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa));
 	VERIFY(list_is_empty(&lwb->lwb_itxs));
@@ -991,8 +997,8 @@ zil_create(zilog_t *zilog)
 	 */
 	txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
 
-	ASSERT(zh->zh_claim_txg == 0);
-	ASSERT(zh->zh_replay_seq == 0);
+	ASSERT0(zh->zh_claim_txg);
+	ASSERT0(zh->zh_replay_seq);
 
 	blk = zh->zh_log;
 
@@ -1104,7 +1110,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
 	zilog->zl_keep_first = keep_first;
 
 	if (!list_is_empty(&zilog->zl_lwb_list)) {
-		ASSERT(zh->zh_claim_txg == 0);
+		ASSERT0(zh->zh_claim_txg);
 		VERIFY(!keep_first);
 		while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) {
 			if (lwb->lwb_buf != NULL)
@@ -1250,7 +1256,7 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx)
 	blkptr_t *bp;
 	int error;
 
-	ASSERT(tx == NULL);
+	ASSERT0P(tx);
 
 	error = dmu_objset_from_ds(ds, &os);
 	if (error != 0) {
@@ -1351,7 +1357,7 @@ zil_commit_waiter_link_lwb(zil_commit_waiter_t *zcw, lwb_t *lwb)
 
 	ASSERT(!list_link_active(&zcw->zcw_node));
 	list_insert_tail(&lwb->lwb_waiters, zcw);
-	ASSERT3P(zcw->zcw_lwb, ==, NULL);
+	ASSERT0P(zcw->zcw_lwb);
 	zcw->zcw_lwb = lwb;
 }
 
@@ -1365,7 +1371,7 @@ zil_commit_waiter_link_nolwb(zil_commit_waiter_t *zcw, list_t *nolwb)
 {
 	ASSERT(!list_link_active(&zcw->zcw_node));
 	list_insert_tail(nolwb, zcw);
-	ASSERT3P(zcw->zcw_lwb, ==, NULL);
+	ASSERT0P(zcw->zcw_lwb);
 }
 
 void
@@ -1482,7 +1488,7 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
 	}
 
 	while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
-		zil_itx_destroy(itx);
+		zil_itx_destroy(itx, 0);
 
 	while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
 		mutex_enter(&zcw->zcw_lock);
@@ -1895,7 +1901,7 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, lwb_state_t state)
 /*
  * Finalize previously closed block and issue the write zio.
  */
-static void
+static int
 zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 {
 	spa_t *spa = zilog->zl_spa;
@@ -1909,8 +1915,13 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 
 	/* Actually fill the lwb with the data. */
 	for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
-	    itx = list_next(&lwb->lwb_itxs, itx))
-		zil_lwb_commit(zilog, lwb, itx);
+	    itx = list_next(&lwb->lwb_itxs, itx)) {
+		error = zil_lwb_commit(zilog, lwb, itx);
+		if (error != 0) {
+			ASSERT3U(error, ==, ESHUTDOWN);
+			return (error);
+		}
+	}
 	lwb->lwb_nused = lwb->lwb_nfilled;
 	ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax);
 
@@ -1928,7 +1939,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 	lwb->lwb_state = LWB_STATE_READY;
 	if (BP_IS_HOLE(&lwb->lwb_blk) && lwb->lwb_error == 0) {
 		mutex_exit(&zilog->zl_lock);
-		return;
+		return (0);
 	}
 	mutex_exit(&zilog->zl_lock);
 
@@ -1997,7 +2008,7 @@ next_lwb:
 		    &slog);
 	}
 	if (error == 0) {
-		ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), ==, txg);
+		ASSERT3U(BP_GET_BIRTH(bp), ==, txg);
 		BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 :
 		    ZIO_CHECKSUM_ZILOG);
 		bp->blk_cksum = lwb->lwb_blk.blk_cksum;
@@ -2065,6 +2076,8 @@ next_lwb:
 	lwb = nlwb;
 	if (lwb)
 		goto next_lwb;
+
+	return (0);
 }
 
 /*
@@ -2095,6 +2108,19 @@ zil_max_waste_space(zilog_t *zilog)
  */
 static uint_t zil_maxcopied = 7680;
 
+/*
+ * Largest write size to store the data directly into ZIL.
+ */
+uint_t zfs_immediate_write_sz = 32768;
+
+/*
+ * When enabled and blocks go to normal vdev, treat special vdevs as SLOG,
+ * writing data to ZIL (WR_COPIED/WR_NEED_COPY).  Disabling this forces the
+ * indirect writes (WR_INDIRECT) to preserve special vdev throughput and
+ * endurance, likely at the cost of normal vdev latency.
+ */
+int zil_special_is_slog = 1;
+
 uint64_t
 zil_max_copied_data(zilog_t *zilog)
 {
@@ -2102,6 +2128,46 @@ zil_max_copied_data(zilog_t *zilog)
 	return (MIN(max_data, zil_maxcopied));
 }
 
+/*
+ * Determine the appropriate write state for ZIL transactions based on
+ * pool configuration, data placement, write size, and logbias settings.
+ */
+itx_wr_state_t
+zil_write_state(zilog_t *zilog, uint64_t size, uint32_t blocksize,
+    boolean_t o_direct, boolean_t commit)
+{
+	if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT || o_direct)
+		return (WR_INDIRECT);
+
+	/*
+	 * Don't use indirect for too small writes to reduce overhead.
+	 * Don't use indirect if written less than a half of a block if
+	 * we are going to commit it immediately, since next write might
+	 * rewrite the same block again, causing inflation.  If commit
+	 * is not planned, then next writes might coalesce, and so the
+	 * indirect may be perfect.
+	 */
+	boolean_t indirect = (size >= zfs_immediate_write_sz &&
+	    (size >= blocksize / 2 || !commit));
+
+	if (spa_has_slogs(zilog->zl_spa)) {
+		/* Dedicated slogs: never use indirect */
+		indirect = B_FALSE;
+	} else if (spa_has_special(zilog->zl_spa)) {
+		/* Special vdevs: only when beneficial */
+		boolean_t on_special = (blocksize <=
+		    zilog->zl_os->os_zpl_special_smallblock);
+		indirect &= (on_special || !zil_special_is_slog);
+	}
+
+	if (indirect)
+		return (WR_INDIRECT);
+	else if (commit)
+		return (WR_COPIED);
+	else
+		return (WR_NEED_COPY);
+}
+
 static uint64_t
 zil_itx_record_size(itx_t *itx)
 {
@@ -2255,11 +2321,13 @@ cont:
 	return (lwb);
 }
 
+static void zil_crash(zilog_t *zilog);
+
 /*
  * Fill the actual transaction data into the lwb, following zil_lwb_assign().
  * Does not require locking.
  */
-static void
+static int
 zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
 {
 	lr_t *lr, *lrb;
@@ -2271,7 +2339,7 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
 	lrw = (lr_write_t *)lr;
 
 	if (lr->lrc_txtype == TX_COMMIT)
-		return;
+		return (0);
 
 	reclen = lr->lrc_reclen;
 	dlen = zil_itx_data_size(itx);
@@ -2357,16 +2425,35 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
 				    ". Falling back to txg_wait_synced().",
 				    error);
 				zfs_fallthrough;
-			case EIO:
-				txg_wait_synced(zilog->zl_dmu_pool,
-				    lr->lrc_txg);
+			case EIO: {
+				int error = txg_wait_synced_flags(
+				    zilog->zl_dmu_pool,
+				    lr->lrc_txg, TXG_WAIT_SUSPEND);
+				if (error != 0) {
+					ASSERT3U(error, ==, ESHUTDOWN);
+					/*
+					 * zil_lwb_commit() is called from a
+					 * loop over a list of itxs at the
+					 * top of zil_lwb_write_issue(), which
+					 * itself is called from a loop over a
+					 * list of lwbs in various places.
+					 * zil_crash() will free those itxs
+					 * and sometimes the lwbs, so they
+					 * are invalid when zil_crash() returns.
+					 * Callers must pretty much abort
+					 * immediately.
+					 */
+					zil_crash(zilog);
+					return (error);
+				}
 				zfs_fallthrough;
+			}
 			case ENOENT:
 				zfs_fallthrough;
 			case EEXIST:
 				zfs_fallthrough;
 			case EALREADY:
-				return;
+				return (0);
 			}
 		}
 	}
@@ -2374,6 +2461,8 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
 	lwb->lwb_nfilled += reclen + dlen;
 	ASSERT3S(lwb->lwb_nfilled, <=, lwb->lwb_nused);
 	ASSERT0(P2PHASE(lwb->lwb_nfilled, sizeof (uint64_t)));
+
+	return (0);
 }
 
 itx_t *
@@ -2415,7 +2504,7 @@ zil_itx_clone(itx_t *oitx)
 }
 
 void
-zil_itx_destroy(itx_t *itx)
+zil_itx_destroy(itx_t *itx, int err)
 {
 	ASSERT3U(itx->itx_size, >=, sizeof (itx_t));
 	ASSERT3U(itx->itx_lr.lrc_reclen, ==,
@@ -2424,7 +2513,7 @@ zil_itx_destroy(itx_t *itx)
 	IMPLY(itx->itx_callback != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT);
 
 	if (itx->itx_callback != NULL)
-		itx->itx_callback(itx->itx_callback_data);
+		itx->itx_callback(itx->itx_callback_data, err);
 
 	zio_data_buf_free(itx, itx->itx_size);
 }
@@ -2467,7 +2556,7 @@ zil_itxg_clean(void *arg)
 		if (itx->itx_lr.lrc_txtype == TX_COMMIT)
 			zil_commit_waiter_skip(itx->itx_private);
 
-		zil_itx_destroy(itx);
+		zil_itx_destroy(itx, 0);
 	}
 
 	cookie = NULL;
@@ -2477,7 +2566,7 @@ zil_itxg_clean(void *arg)
 		while ((itx = list_remove_head(list)) != NULL) {
 			/* commit itxs should never be on the async lists. */
 			ASSERT3U(itx->itx_lr.lrc_txtype, !=, TX_COMMIT);
-			zil_itx_destroy(itx);
+			zil_itx_destroy(itx, 0);
 		}
 		list_destroy(list);
 		kmem_free(ian, sizeof (itx_async_node_t));
@@ -2539,7 +2628,7 @@ zil_remove_async(zilog_t *zilog, uint64_t oid)
 	while ((itx = list_remove_head(&clean_list)) != NULL) {
 		/* commit itxs should never be on the async lists. */
 		ASSERT3U(itx->itx_lr.lrc_txtype, !=, TX_COMMIT);
-		zil_itx_destroy(itx);
+		zil_itx_destroy(itx, 0);
 	}
 	list_destroy(&clean_list);
 }
@@ -2624,6 +2713,67 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
 }
 
 /*
+ * Post-crash cleanup. This is called from zil_clean() because it needs to
+ * do cleanup after every txg until the ZIL is restarted, and zilog_dirty()
+ * can arrange that easily, unlike zil_sync() which is more complicated to
+ * get a call to without actual dirty data.
+ */
+static void
+zil_crash_clean(zilog_t *zilog, uint64_t synced_txg)
+{
+	ASSERT(MUTEX_HELD(&zilog->zl_lock));
+	ASSERT3U(zilog->zl_restart_txg, >, 0);
+
+	/* Clean up anything on the crash list from earlier txgs */
+	lwb_t *lwb;
+	while ((lwb = list_head(&zilog->zl_lwb_crash_list)) != NULL) {
+		if (lwb->lwb_alloc_txg >= synced_txg ||
+		    lwb->lwb_max_txg >= synced_txg) {
+			/*
+			 * This lwb was allocated or updated on this txg, or
+			 * in the future. We stop processing here, to avoid
+			 * the strange situation of freeing a ZIL block on
+			 * on the same or earlier txg than what it was
+			 * allocated for.
+			 *
+			 * We'll take care of it on the next txg.
+			 */
+			break;
+		}
+
+		/* This LWB is from the past, so we can clean it up now. */
+		list_remove(&zilog->zl_lwb_crash_list, lwb);
+		if (lwb->lwb_buf != NULL)
+			zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
+		if (!BP_IS_HOLE(&lwb->lwb_blk))
+			/*
+			 * Free on the next txg, since zil_clean() is called
+			 * once synced_txg has already been completed.
+			 */
+			zio_free(zilog->zl_spa, synced_txg+1, &lwb->lwb_blk);
+		zil_free_lwb(zilog, lwb);
+	}
+
+	if (zilog->zl_restart_txg > synced_txg) {
+		/*
+		 * Not reached the restart txg yet, so mark the ZIL dirty for
+		 * the next txg and we'll consider it all again then.
+		 */
+		zilog_dirty(zilog, synced_txg+1);
+		return;
+	}
+
+	/*
+	 * Reached the restart txg, so we can allow new calls to zil_commit().
+	 * All ZIL txgs have long past so there should be no IO waiting.
+	 */
+	ASSERT(list_is_empty(&zilog->zl_lwb_list));
+	ASSERT(list_is_empty(&zilog->zl_lwb_crash_list));
+
+	zilog->zl_restart_txg = 0;
+}
+
+/*
  * If there are any in-memory intent log transactions which have now been
  * synced then start up a taskq to free them. We should only do this after we
  * have written out the uberblocks (i.e. txg has been committed) so that
@@ -2638,6 +2788,15 @@ zil_clean(zilog_t *zilog, uint64_t synced_txg)
 
 	ASSERT3U(synced_txg, <, ZILTEST_TXG);
 
+	/* Do cleanup and restart after crash. */
+	if (zilog->zl_restart_txg > 0) {
+		mutex_enter(&zilog->zl_lock);
+		/* Make sure we didn't lose a race. */
+		if (zilog->zl_restart_txg > 0)
+			zil_crash_clean(zilog, synced_txg);
+		mutex_exit(&zilog->zl_lock);
+	}
+
 	mutex_enter(&itxg->itxg_lock);
 	if (itxg->itxg_itxs == NULL || itxg->itxg_txg == ZILTEST_TXG) {
 		mutex_exit(&itxg->itxg_lock);
@@ -2830,13 +2989,13 @@ zil_prune_commit_list(zilog_t *zilog)
 		mutex_exit(&zilog->zl_lock);
 
 		list_remove(&zilog->zl_itx_commit_list, itx);
-		zil_itx_destroy(itx);
+		zil_itx_destroy(itx, 0);
 	}
 
 	IMPLY(itx != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT);
 }
 
-static void
+static int
 zil_commit_writer_stall(zilog_t *zilog)
 {
 	/*
@@ -2861,8 +3020,22 @@ zil_commit_writer_stall(zilog_t *zilog)
 	 */
 	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
 	ZIL_STAT_BUMP(zilog, zil_commit_stall_count);
-	txg_wait_synced(zilog->zl_dmu_pool, 0);
+
+	int err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0,
+	    TXG_WAIT_SUSPEND);
+	if (err != 0) {
+		ASSERT3U(err, ==, ESHUTDOWN);
+		zil_crash(zilog);
+	}
+
+	/*
+	 * Either zil_sync() has been called to wait for and clean up any
+	 * in-flight LWBs, or zil_crash() has emptied out the list and arranged
+	 * for them to be cleaned up later.
+	 */
 	ASSERT(list_is_empty(&zilog->zl_lwb_list));
+
+	return (err);
 }
 
 static void
@@ -2902,19 +3075,14 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
 
 	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
 
-	/*
-	 * Return if there's nothing to commit before we dirty the fs by
-	 * calling zil_create().
-	 */
-	if (list_is_empty(&zilog->zl_itx_commit_list))
-		return;
-
-	list_create(&nolwb_itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
-	list_create(&nolwb_waiters, sizeof (zil_commit_waiter_t),
-	    offsetof(zil_commit_waiter_t, zcw_node));
-
 	lwb = list_tail(&zilog->zl_lwb_list);
 	if (lwb == NULL) {
+		/*
+		 * Return if there's nothing to commit before we dirty the fs.
+		 */
+		if (list_is_empty(&zilog->zl_itx_commit_list))
+			return;
+
 		lwb = zil_create(zilog);
 	} else {
 		/*
@@ -2942,6 +3110,10 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
 		}
 	}
 
+	list_create(&nolwb_itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
+	list_create(&nolwb_waiters, sizeof (zil_commit_waiter_t),
+	    offsetof(zil_commit_waiter_t, zcw_node));
+
 	while ((itx = list_remove_head(&zilog->zl_itx_commit_list)) != NULL) {
 		lr_t *lrc = &itx->itx_lr;
 		uint64_t txg = lrc->lrc_txg;
@@ -3030,7 +3202,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
 		} else {
 			ASSERT3S(lrc->lrc_txtype, !=, TX_COMMIT);
 			zilog->zl_cur_left -= zil_itx_full_size(itx);
-			zil_itx_destroy(itx);
+			zil_itx_destroy(itx, 0);
 		}
 	}
 
@@ -3041,9 +3213,14 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
 		 * the ZIL write pipeline; see the comment within
 		 * zil_commit_writer_stall() for more details.
 		 */
-		while ((lwb = list_remove_head(ilwbs)) != NULL)
-			zil_lwb_write_issue(zilog, lwb);
-		zil_commit_writer_stall(zilog);
+		int err = 0;
+		while ((lwb = list_remove_head(ilwbs)) != NULL) {
+			err = zil_lwb_write_issue(zilog, lwb);
+			if (err != 0)
+				break;
+		}
+		if (err == 0)
+			err = zil_commit_writer_stall(zilog);
 
 		/*
 		 * Additionally, we have to signal and mark the "nolwb"
@@ -3061,7 +3238,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
 		 * the itx's callback if one exists for the itx.
 		 */
 		while ((itx = list_remove_head(&nolwb_itxs)) != NULL)
-			zil_itx_destroy(itx);
+			zil_itx_destroy(itx, 0);
 	} else {
 		ASSERT(list_is_empty(&nolwb_waiters));
 		ASSERT3P(lwb, !=, NULL);
@@ -3111,14 +3288,21 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
 		 * possible, without significantly impacting the latency
 		 * of each individual itx.
 		 */
-		if (lwb->lwb_state == LWB_STATE_OPENED && !zilog->zl_parallel) {
+		if (lwb->lwb_state == LWB_STATE_OPENED &&
+		    (!zilog->zl_parallel || zilog->zl_suspend > 0)) {
 			zil_burst_done(zilog);
 			list_insert_tail(ilwbs, lwb);
 			lwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW);
 			if (lwb == NULL) {
-				while ((lwb = list_remove_head(ilwbs)) != NULL)
-					zil_lwb_write_issue(zilog, lwb);
-				zil_commit_writer_stall(zilog);
+				int err = 0;
+				while ((lwb =
+				    list_remove_head(ilwbs)) != NULL) {
+					err = zil_lwb_write_issue(zilog, lwb);
+					if (err != 0)
+						break;
+				}
+				if (err == 0)
+					zil_commit_writer_stall(zilog);
 			}
 		}
 	}
@@ -3177,10 +3361,23 @@ zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)
 	zil_prune_commit_list(zilog);
 	zil_process_commit_list(zilog, zcw, &ilwbs);
 
+	/*
+	 * If the ZIL failed somewhere inside zil_process_commit_list(), it's
+	 * will be because a fallback to txg_wait_sync_flags() happened at some
+	 * point (eg zil_commit_writer_stall()). All cases should issue and
+	 * empty ilwbs, so there will be nothing to in the issue loop below.
+	 * That's why we don't have to plumb the error value back from
+	 * zil_process_commit_list(), and don't have to skip it.
+	 */
+	IMPLY(zilog->zl_restart_txg > 0, list_is_empty(&ilwbs));
+
 out:
 	mutex_exit(&zilog->zl_issuer_lock);
-	while ((lwb = list_remove_head(&ilwbs)) != NULL)
-		zil_lwb_write_issue(zilog, lwb);
+	int err = 0;
+	while ((lwb = list_remove_head(&ilwbs)) != NULL) {
+		if (err == 0)
+			err = zil_lwb_write_issue(zilog, lwb);
+	}
 	list_destroy(&ilwbs);
 	return (wtxg);
 }
@@ -3436,7 +3633,7 @@ static void
 zil_free_commit_waiter(zil_commit_waiter_t *zcw)
 {
 	ASSERT(!list_link_active(&zcw->zcw_node));
-	ASSERT3P(zcw->zcw_lwb, ==, NULL);
+	ASSERT0P(zcw->zcw_lwb);
 	ASSERT3B(zcw->zcw_done, ==, B_TRUE);
 	mutex_destroy(&zcw->zcw_lock);
 	cv_destroy(&zcw->zcw_cv);
@@ -3473,6 +3670,96 @@ zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw)
 }
 
 /*
+ * Crash the ZIL. This is something like suspending, but abandons the ZIL
+ * without further IO until the wanted txg completes. No effort is made to
+ * close the on-disk chain or do any other on-disk work, as the pool may
+ * have suspended. zil_sync() will handle cleanup as normal and restart the
+ * ZIL once enough txgs have passed.
+ */
+static void
+zil_crash(zilog_t *zilog)
+{
+	mutex_enter(&zilog->zl_lock);
+
+	uint64_t txg = spa_syncing_txg(zilog->zl_spa);
+	uint64_t restart_txg =
+	    spa_syncing_txg(zilog->zl_spa) + TXG_CONCURRENT_STATES;
+
+	if (zilog->zl_restart_txg > 0) {
+		/*
+		 * If the ZIL is already crashed, it's almost certainly because
+		 * we lost a race involving multiple callers from
+		 * zil_commit_impl().
+		 */
+
+		/*
+		 * This sanity check is to support my understanding that in the
+		 * event of multiple callers to zil_crash(), only one of them
+		 * can possibly be in the codepath to issue lwbs; the rest
+		 * should be calling from zil_commit_impl() after their waiters
+		 * have completed. As I understand it, a second thread trying
+		 * to issue will eventually wait on zl_issuer_lock, and then
+		 * have no work to do and leave.
+		 *
+		 * If more lwbs had been created an issued between zil_crash()
+		 * calls, then we probably just need to take those too, add
+		 * them to the crash list and clean them up, but it complicates
+		 * this function and I don't think it can happend.
+		 */
+		ASSERT(list_is_empty(&zilog->zl_lwb_list));
+
+		mutex_exit(&zilog->zl_lock);
+		return;
+	}
+
+	zilog->zl_restart_txg = restart_txg;
+
+	/*
+	 * Capture any live LWBs. Depending on the state of the pool they may
+	 * represent in-flight IO that won't return for some time, and we want
+	 * to make sure they don't get in the way of normal ZIL operation.
+	 */
+	ASSERT(list_is_empty(&zilog->zl_lwb_crash_list));
+	list_move_tail(&zilog->zl_lwb_crash_list, &zilog->zl_lwb_list);
+
+	/*
+	 * Run through the LWB list; erroring all itxes and signalling error
+	 * to all waiters.
+	 */
+	for (lwb_t *lwb = list_head(&zilog->zl_lwb_crash_list); lwb != NULL;
+	    lwb = list_next(&zilog->zl_lwb_crash_list, lwb)) {
+		itx_t *itx;
+		while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
+			zil_itx_destroy(itx, EIO);
+
+		zil_commit_waiter_t *zcw;
+		while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
+			mutex_enter(&zcw->zcw_lock);
+			zcw->zcw_lwb = NULL;
+			zcw->zcw_zio_error = EIO;
+			zcw->zcw_done = B_TRUE;
+			cv_broadcast(&zcw->zcw_cv);
+			mutex_exit(&zcw->zcw_lock);
+		}
+	}
+
+	/*
+	 * Zero the ZIL header bp after the ZIL restarts. We'll free it in
+	 * zil_clean() when we clean up the lwbs.
+	 */
+	zil_header_t *zh = zil_header_in_syncing_context(zilog);
+	BP_ZERO(&zh->zh_log);
+
+	/*
+	 * Mark this ZIL dirty on the next txg, so that zil_clean() will be
+	 * called for cleanup.
+	 */
+	zilog_dirty(zilog, txg+1);
+
+	mutex_exit(&zilog->zl_lock);
+}
+
+/*
  * Commit ZFS Intent Log transactions (itxs) to stable storage.
  *
  * When writing ZIL transactions to the on-disk representation of the
@@ -3587,9 +3874,17 @@ zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw)
  *      but the order in which they complete will be the same order in
  *      which they were created.
  */
-void
+static int zil_commit_impl(zilog_t *zilog, uint64_t foid);
+
+int
 zil_commit(zilog_t *zilog, uint64_t foid)
 {
+	return (zil_commit_flags(zilog, foid, ZIL_COMMIT_FAILMODE));
+}
+
+int
+zil_commit_flags(zilog_t *zilog, uint64_t foid, zil_commit_flag_t flags)
+{
 	/*
 	 * We should never attempt to call zil_commit on a snapshot for
 	 * a couple of reasons:
@@ -3606,7 +3901,7 @@ zil_commit(zilog_t *zilog, uint64_t foid)
 	ASSERT3B(dmu_objset_is_snapshot(zilog->zl_os), ==, B_FALSE);
 
 	if (zilog->zl_sync == ZFS_SYNC_DISABLED)
-		return;
+		return (0);
 
 	if (!spa_writeable(zilog->zl_spa)) {
 		/*
@@ -3617,10 +3912,23 @@ zil_commit(zilog_t *zilog, uint64_t foid)
 		 * verifying that truth before we return to the caller.
 		 */
 		ASSERT(list_is_empty(&zilog->zl_lwb_list));
-		ASSERT3P(zilog->zl_last_lwb_opened, ==, NULL);
+		ASSERT0P(zilog->zl_last_lwb_opened);
 		for (int i = 0; i < TXG_SIZE; i++)
-			ASSERT3P(zilog->zl_itxg[i].itxg_itxs, ==, NULL);
-		return;
+			ASSERT0P(zilog->zl_itxg[i].itxg_itxs);
+		return (0);
+	}
+
+	int err = 0;
+
+	/*
+	 * If the ZIL crashed, bypass it entirely, and rely on txg_wait_sync()
+	 * to get the data out to disk.
+	 */
+	if (zilog->zl_restart_txg > 0) {
+		ZIL_STAT_BUMP(zilog, zil_commit_crash_count);
+		err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0,
+		    TXG_WAIT_SUSPEND);
+		goto out;
 	}
 
 	/*
@@ -3632,14 +3940,43 @@ zil_commit(zilog_t *zilog, uint64_t foid)
 	 */
 	if (zilog->zl_suspend > 0) {
 		ZIL_STAT_BUMP(zilog, zil_commit_suspend_count);
-		txg_wait_synced(zilog->zl_dmu_pool, 0);
-		return;
+		err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0,
+		    TXG_WAIT_SUSPEND);
+		if (err != 0) {
+			ASSERT3U(err, ==, ESHUTDOWN);
+			zil_crash(zilog);
+		}
+		goto out;
 	}
 
-	zil_commit_impl(zilog, foid);
+	err = zil_commit_impl(zilog, foid);
+
+out:
+	if (err == 0)
+		return (0);
+
+	/*
+	 * The ZIL write failed and the pool is suspended. There's nothing else
+	 * we can do except return or block.
+	 */
+	ASSERT3U(err, ==, ESHUTDOWN);
+
+	/*
+	 * Return error if failmode=continue or caller will handle directly.
+	 */
+	if (!(flags & ZIL_COMMIT_FAILMODE) ||
+	    spa_get_failmode(zilog->zl_spa) == ZIO_FAILURE_MODE_CONTINUE)
+		return (SET_ERROR(EIO));
+
+	/*
+	 * Block until the pool returns. We assume that the data will make
+	 * it out to disk in the end, and so return success.
+	 */
+	txg_wait_synced(zilog->zl_dmu_pool, 0);
+	return (0);
 }
 
-void
+static int
 zil_commit_impl(zilog_t *zilog, uint64_t foid)
 {
 	ZIL_STAT_BUMP(zilog, zil_commit_count);
@@ -3676,6 +4013,7 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid)
 	uint64_t wtxg = zil_commit_writer(zilog, zcw);
 	zil_commit_waiter(zilog, zcw);
 
+	int err = 0;
 	if (zcw->zcw_zio_error != 0) {
 		/*
 		 * If there was an error writing out the ZIL blocks that
@@ -3688,13 +4026,29 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid)
 		ZIL_STAT_BUMP(zilog, zil_commit_error_count);
 		DTRACE_PROBE2(zil__commit__io__error,
 		    zilog_t *, zilog, zil_commit_waiter_t *, zcw);
-		txg_wait_synced(zilog->zl_dmu_pool, 0);
+		err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0,
+		    TXG_WAIT_SUSPEND);
 	} else if (wtxg != 0) {
 		ZIL_STAT_BUMP(zilog, zil_commit_suspend_count);
-		txg_wait_synced(zilog->zl_dmu_pool, wtxg);
+		err = txg_wait_synced_flags(zilog->zl_dmu_pool, wtxg,
+		    TXG_WAIT_SUSPEND);
 	}
 
 	zil_free_commit_waiter(zcw);
+
+	if (err == 0)
+		return (0);
+
+	/*
+	 * ZIL write failed and pool failed in the fallback to
+	 * txg_wait_synced_flags(). Right now we have no idea if the data is on
+	 * disk and the pool is probably suspended so we have no idea when it's
+	 * coming back. All we can do is shut down and return error to the
+	 * caller.
+	 */
+	ASSERT3U(err, ==, ESHUTDOWN);
+	zil_crash(zilog);
+	return (err);
 }
 
 /*
@@ -3720,7 +4074,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
 
 	mutex_enter(&zilog->zl_lock);
 
-	ASSERT(zilog->zl_stop_sync == 0);
+	ASSERT0(zilog->zl_stop_sync);
 
 	if (*replayed_seq != 0) {
 		ASSERT(zh->zh_replay_seq < *replayed_seq);
@@ -3890,6 +4244,8 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
 
 	list_create(&zilog->zl_lwb_list, sizeof (lwb_t),
 	    offsetof(lwb_t, lwb_node));
+	list_create(&zilog->zl_lwb_crash_list, sizeof (lwb_t),
+	    offsetof(lwb_t, lwb_node));
 
 	list_create(&zilog->zl_itx_commit_list, sizeof (itx_t),
 	    offsetof(itx_t, itx_node));
@@ -3914,9 +4270,12 @@ zil_free(zilog_t *zilog)
 
 	ASSERT0(zilog->zl_suspend);
 	ASSERT0(zilog->zl_suspending);
+	ASSERT0(zilog->zl_restart_txg);
 
 	ASSERT(list_is_empty(&zilog->zl_lwb_list));
 	list_destroy(&zilog->zl_lwb_list);
+	ASSERT(list_is_empty(&zilog->zl_lwb_crash_list));
+	list_destroy(&zilog->zl_lwb_crash_list);
 
 	ASSERT(list_is_empty(&zilog->zl_itx_commit_list));
 	list_destroy(&zilog->zl_itx_commit_list);
@@ -3952,8 +4311,8 @@ zil_open(objset_t *os, zil_get_data_t *get_data, zil_sums_t *zil_sums)
 {
 	zilog_t *zilog = dmu_objset_zil(os);
 
-	ASSERT3P(zilog->zl_get_data, ==, NULL);
-	ASSERT3P(zilog->zl_last_lwb_opened, ==, NULL);
+	ASSERT0P(zilog->zl_get_data);
+	ASSERT0P(zilog->zl_last_lwb_opened);
 	ASSERT(list_is_empty(&zilog->zl_lwb_list));
 
 	zilog->zl_get_data = get_data;
@@ -3972,7 +4331,8 @@ zil_close(zilog_t *zilog)
 	uint64_t txg;
 
 	if (!dmu_objset_is_snapshot(zilog->zl_os)) {
-		zil_commit(zilog, 0);
+		if (zil_commit_flags(zilog, 0, ZIL_COMMIT_NOW) != 0)
+			txg_wait_synced(zilog->zl_dmu_pool, 0);
 	} else {
 		ASSERT(list_is_empty(&zilog->zl_lwb_list));
 		ASSERT0(zilog->zl_dirty_max_txg);
@@ -4073,6 +4433,17 @@ zil_suspend(const char *osname, void **cookiep)
 		return (SET_ERROR(EBUSY));
 	}
 
+	if (zilog->zl_restart_txg > 0) {
+		/*
+		 * ZIL crashed. It effectively _is_ suspended, but callers
+		 * are usually trying to make sure it's empty on-disk, which
+		 * we can't guarantee right now.
+		 */
+		mutex_exit(&zilog->zl_lock);
+		dmu_objset_rele(os, suspend_tag);
+		return (SET_ERROR(EBUSY));
+	}
+
 	/*
 	 * Don't put a long hold in the cases where we can avoid it.  This
 	 * is when there is no cookie so we are doing a suspend & resume
@@ -4105,6 +4476,11 @@ zil_suspend(const char *osname, void **cookiep)
 			zil_resume(os);
 		else
 			*cookiep = os;
+
+		if (zilog->zl_restart_txg > 0)
+			/* ZIL crashed while we were waiting. */
+			return (SET_ERROR(EBUSY));
+
 		return (0);
 	}
 
@@ -4146,17 +4522,34 @@ zil_suspend(const char *osname, void **cookiep)
 	 * would just call txg_wait_synced(), because zl_suspend is set.
 	 * txg_wait_synced() doesn't wait for these lwb's to be
 	 * LWB_STATE_FLUSH_DONE before returning.
+	 *
+	 * However, zil_commit_impl() itself can return an error if any of the
+	 * lwbs fail, or the pool suspends in the fallback
+	 * txg_wait_sync_flushed(), which affects what we do next, so we
+	 * capture that error.
 	 */
-	zil_commit_impl(zilog, 0);
+	error = zil_commit_impl(zilog, 0);
+	if (error == ESHUTDOWN)
+		/* zil_commit_impl() has called zil_crash() already */
+		error = SET_ERROR(EBUSY);
 
 	/*
 	 * Now that we've ensured all lwb's are LWB_STATE_FLUSH_DONE, we
 	 * use txg_wait_synced() to ensure the data from the zilog has
 	 * migrated to the main pool before calling zil_destroy().
 	 */
-	txg_wait_synced(zilog->zl_dmu_pool, 0);
+	if (error == 0) {
+		error = txg_wait_synced_flags(zilog->zl_dmu_pool, 0,
+		    TXG_WAIT_SUSPEND);
+		if (error != 0) {
+			ASSERT3U(error, ==, ESHUTDOWN);
+			zil_crash(zilog);
+			error = SET_ERROR(EBUSY);
+		}
+	}
 
-	zil_destroy(zilog, B_FALSE);
+	if (error == 0)
+		zil_destroy(zilog, B_FALSE);
 
 	mutex_enter(&zilog->zl_lock);
 	zilog->zl_suspending = B_FALSE;
@@ -4170,7 +4563,8 @@ zil_suspend(const char *osname, void **cookiep)
 		zil_resume(os);
 	else
 		*cookiep = os;
-	return (0);
+
+	return (error);
 }
 
 void
@@ -4333,7 +4727,7 @@ zil_replay(objset_t *os, void *arg,
 
 	zilog->zl_replay = B_TRUE;
 	zilog->zl_replay_time = ddi_get_lbolt();
-	ASSERT(zilog->zl_replay_blks == 0);
+	ASSERT0(zilog->zl_replay_blks);
 	(void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
 	    zh->zh_claim_txg, B_TRUE);
 	vmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE);
@@ -4418,3 +4812,9 @@ ZFS_MODULE_PARAM(zfs_zil, zil_, maxblocksize, UINT, ZMOD_RW,
 
 ZFS_MODULE_PARAM(zfs_zil, zil_, maxcopied, UINT, ZMOD_RW,
 	"Limit in bytes WR_COPIED size");
+
+ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, UINT, ZMOD_RW,
+	"Largest write size to store data into ZIL");
+
+ZFS_MODULE_PARAM(zfs_zil, zil_, special_is_slog, INT, ZMOD_RW,
+	"Treat special vdevs as SLOG");
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index 6d7bce8b0e10..3f0ddb63249d 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -339,8 +339,8 @@ zio_fini(void)
 	}
 
 	for (size_t i = 0; i < n; i++) {
-		VERIFY3P(zio_buf_cache[i], ==, NULL);
-		VERIFY3P(zio_data_buf_cache[i], ==, NULL);
+		VERIFY0P(zio_buf_cache[i]);
+		VERIFY0P(zio_data_buf_cache[i]);
 	}
 
 	if (zio_ksp != NULL) {
@@ -692,7 +692,7 @@ error:
 		zio->io_error = SET_ERROR(EIO);
 		if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
 			spa_log_error(spa, &zio->io_bookmark,
-			    BP_GET_LOGICAL_BIRTH(zio->io_bp));
+			    BP_GET_PHYSICAL_BIRTH(zio->io_bp));
 			(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
 			    spa, NULL, &zio->io_bookmark, zio, 0);
 		}
@@ -771,7 +771,7 @@ zio_add_child_impl(zio_t *pio, zio_t *cio, boolean_t first)
 	else
 		mutex_enter(&cio->io_lock);
 
-	ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
+	ASSERT0(pio->io_state[ZIO_WAIT_DONE]);
 
 	uint64_t *countp = pio->io_children[cio->io_child_type];
 	for (int w = 0; w < ZIO_WAIT_TYPES; w++)
@@ -821,7 +821,7 @@ zio_wait_for_children(zio_t *zio, uint8_t childbits, enum zio_wait_type wait)
 	boolean_t waiting = B_FALSE;
 
 	mutex_enter(&zio->io_lock);
-	ASSERT(zio->io_stall == NULL);
+	ASSERT0P(zio->io_stall);
 	for (int c = 0; c < ZIO_CHILD_TYPES; c++) {
 		if (!(ZIO_CHILD_BIT_IS_SET(childbits, c)))
 			continue;
@@ -850,15 +850,9 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait,
 	mutex_enter(&pio->io_lock);
 	if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
 		*errorp = zio_worst_error(*errorp, zio->io_error);
-	pio->io_reexecute |= zio->io_reexecute;
+	pio->io_post |= zio->io_post;
 	ASSERT3U(*countp, >, 0);
 
-	/*
-	 * Propogate the Direct I/O checksum verify failure to the parent.
-	 */
-	if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
-		pio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
-
 	(*countp)--;
 
 	if (*countp == 0 && pio->io_stall == countp) {
@@ -961,8 +955,8 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
 	zio_t *zio;
 
 	IMPLY(type != ZIO_TYPE_TRIM, psize <= SPA_MAXBLOCKSIZE);
-	ASSERT(P2PHASE(psize, SPA_MINBLOCKSIZE) == 0);
-	ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
+	ASSERT0(P2PHASE(psize, SPA_MINBLOCKSIZE));
+	ASSERT0(P2PHASE(offset, SPA_MINBLOCKSIZE));
 
 	ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER));
 	ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
@@ -1110,7 +1104,8 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
 	    "DVA[1]=%#llx/%#llx "
 	    "DVA[2]=%#llx/%#llx "
 	    "prop=%#llx "
-	    "pad=%#llx,%#llx "
+	    "prop2=%#llx "
+	    "pad=%#llx "
 	    "phys_birth=%#llx "
 	    "birth=%#llx "
 	    "fill=%#llx "
@@ -1123,9 +1118,9 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
 	    (long long)bp->blk_dva[2].dva_word[0],
 	    (long long)bp->blk_dva[2].dva_word[1],
 	    (long long)bp->blk_prop,
-	    (long long)bp->blk_pad[0],
-	    (long long)bp->blk_pad[1],
-	    (long long)BP_GET_PHYSICAL_BIRTH(bp),
+	    (long long)bp->blk_prop2,
+	    (long long)bp->blk_pad,
+	    (long long)BP_GET_RAW_PHYSICAL_BIRTH(bp),
 	    (long long)BP_GET_LOGICAL_BIRTH(bp),
 	    (long long)bp->blk_fill,
 	    (long long)bp->blk_cksum.zc_word[0],
@@ -1340,7 +1335,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
 {
 	zio_t *zio;
 
-	zio = zio_create(pio, spa, BP_GET_BIRTH(bp), bp,
+	zio = zio_create(pio, spa, BP_GET_PHYSICAL_BIRTH(bp), bp,
 	    data, size, size, done, private,
 	    ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
 	    ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
@@ -1456,7 +1451,7 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
 		metaslab_check_free(spa, bp);
 		bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
 	} else {
-		VERIFY3P(zio_free_sync(NULL, spa, txg, bp, 0), ==, NULL);
+		VERIFY0P(zio_free_sync(NULL, spa, txg, bp, 0));
 	}
 }
 
@@ -1564,7 +1559,7 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
 {
 	zio_t *zio;
 
-	ASSERT(vd->vdev_children == 0);
+	ASSERT0(vd->vdev_children);
 	ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
 	    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
 	ASSERT3U(offset + size, <=, vd->vdev_psize);
@@ -1585,7 +1580,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
 {
 	zio_t *zio;
 
-	ASSERT(vd->vdev_children == 0);
+	ASSERT0(vd->vdev_children);
 	ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
 	    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
 	ASSERT3U(offset + size, <=, vd->vdev_psize);
@@ -1649,7 +1644,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
 		 * through the mirror during self healing. See comment in
 		 * vdev_mirror_io_done() for more details.
 		 */
-		ASSERT0(pio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
+		ASSERT0(pio->io_post & ZIO_POST_DIO_CHKSUM_ERR);
 	} else if (type == ZIO_TYPE_WRITE &&
 	    pio->io_prop.zp_direct_write == B_TRUE) {
 		/*
@@ -1685,7 +1680,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
 	 * If this is a retried I/O then we ignore it since we will
 	 * have already processed the original allocating I/O.
 	 */
-	if (flags & ZIO_FLAG_IO_ALLOCATING &&
+	if (flags & ZIO_FLAG_ALLOC_THROTTLED &&
 	    (vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY))) {
 		ASSERT(pio->io_metaslab_class != NULL);
 		ASSERT(pio->io_metaslab_class->mc_alloc_throttle_enabled);
@@ -1695,7 +1690,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
 		ASSERT(!(pio->io_flags & ZIO_FLAG_IO_REWRITE) ||
 		    pio->io_child_type == ZIO_CHILD_GANG);
 
-		flags &= ~ZIO_FLAG_IO_ALLOCATING;
+		flags &= ~ZIO_FLAG_ALLOC_THROTTLED;
 	}
 
 	zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, size,
@@ -1752,7 +1747,7 @@ zio_flush(zio_t *pio, vdev_t *vd)
 void
 zio_shrink(zio_t *zio, uint64_t size)
 {
-	ASSERT3P(zio->io_executor, ==, NULL);
+	ASSERT0P(zio->io_executor);
 	ASSERT3U(zio->io_orig_size, ==, zio->io_size);
 	ASSERT3U(size, <=, zio->io_size);
 
@@ -1860,7 +1855,7 @@ zio_write_bp_init(zio_t *zio)
 		blkptr_t *bp = zio->io_bp;
 		zio_prop_t *zp = &zio->io_prop;
 
-		ASSERT(BP_GET_LOGICAL_BIRTH(bp) != zio->io_txg);
+		ASSERT(BP_GET_BIRTH(bp) != zio->io_txg);
 
 		*bp = *zio->io_bp_override;
 		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
@@ -1946,9 +1941,9 @@ zio_write_compress(zio_t *zio)
 	}
 
 	ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
-	ASSERT(zio->io_bp_override == NULL);
+	ASSERT0P(zio->io_bp_override);
 
-	if (!BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg) {
+	if (!BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) == zio->io_txg) {
 		/*
 		 * We're rewriting an existing block, which means we're
 		 * working on behalf of spa_sync().  For spa_sync() to
@@ -2085,7 +2080,7 @@ zio_write_compress(zio_t *zio)
 	 * spa_sync() to allocate new blocks, but force rewrites after that.
 	 * There should only be a handful of blocks after pass 1 in any case.
 	 */
-	if (!BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg &&
+	if (!BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) == zio->io_txg &&
 	    BP_GET_PSIZE(bp) == psize &&
 	    pass >= zfs_sync_pass_rewrite) {
 		VERIFY3U(psize, !=, 0);
@@ -2441,7 +2436,7 @@ __zio_execute(zio_t *zio)
 
 		ASSERT(!MUTEX_HELD(&zio->io_lock));
 		ASSERT(ISP2(stage));
-		ASSERT(zio->io_stall == NULL);
+		ASSERT0P(zio->io_stall);
 
 		do {
 			stage <<= 1;
@@ -2514,7 +2509,7 @@ zio_wait(zio_t *zio)
 	int error;
 
 	ASSERT3S(zio->io_stage, ==, ZIO_STAGE_OPEN);
-	ASSERT3P(zio->io_executor, ==, NULL);
+	ASSERT0P(zio->io_executor);
 
 	zio->io_waiter = curthread;
 	ASSERT0(zio->io_queued_timestamp);
@@ -2556,7 +2551,7 @@ zio_nowait(zio_t *zio)
 	if (zio == NULL)
 		return;
 
-	ASSERT3P(zio->io_executor, ==, NULL);
+	ASSERT0P(zio->io_executor);
 
 	if (zio->io_child_type == ZIO_CHILD_LOGICAL &&
 	    list_is_empty(&zio->io_parent_list)) {
@@ -2595,14 +2590,14 @@ zio_reexecute(void *arg)
 
 	ASSERT(pio->io_child_type == ZIO_CHILD_LOGICAL);
 	ASSERT(pio->io_orig_stage == ZIO_STAGE_OPEN);
-	ASSERT(pio->io_gang_leader == NULL);
-	ASSERT(pio->io_gang_tree == NULL);
+	ASSERT0P(pio->io_gang_leader);
+	ASSERT0P(pio->io_gang_tree);
 
 	mutex_enter(&pio->io_lock);
 	pio->io_flags = pio->io_orig_flags;
 	pio->io_stage = pio->io_orig_stage;
 	pio->io_pipeline = pio->io_orig_pipeline;
-	pio->io_reexecute = 0;
+	pio->io_post = 0;
 	pio->io_flags |= ZIO_FLAG_REEXECUTED;
 	pio->io_pipeline_trace = 0;
 	pio->io_error = 0;
@@ -2694,7 +2689,7 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
 		ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
 		ASSERT(zio != spa->spa_suspend_zio_root);
 		ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
-		ASSERT(zio_unique_parent(zio) == NULL);
+		ASSERT0P(zio_unique_parent(zio));
 		ASSERT(zio->io_stage == ZIO_STAGE_DONE);
 		zio_add_child(spa->spa_suspend_zio_root, zio);
 	}
@@ -2749,11 +2744,14 @@ zio_resume_wait(spa_t *spa)
  * being nearly full, it calls zio_write_gang_block() to construct the
  * block from smaller fragments.
  *
- * A gang block consists of a gang header (zio_gbh_phys_t) and up to
- * three (SPA_GBH_NBLKPTRS) gang members.  The gang header is just like
- * an indirect block: it's an array of block pointers.  It consumes
- * only one sector and hence is allocatable regardless of fragmentation.
- * The gang header's bps point to its gang members, which hold the data.
+ * A gang block consists of a a gang header and up to gbh_nblkptrs(size)
+ * gang members. The gang header is like an indirect block: it's an array
+ * of block pointers, though the header has a small tail (a zio_eck_t)
+ * that stores an embedded checksum. It is allocated using only a single
+ * sector as the requested size, and hence is allocatable regardless of
+ * fragmentation. Its size is determined by the smallest allocatable
+ * asize of the vdevs it was allocated on. The gang header's bps point
+ * to its gang members, which hold the data.
  *
  * Gang blocks are self-checksumming, using the bp's <vdev, offset, txg>
  * as the verifier to ensure uniqueness of the SHA256 checksum.
@@ -2832,10 +2830,10 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data,
 
 	if (gn != NULL) {
 		abd_t *gbh_abd =
-		    abd_get_from_buf(gn->gn_gbh, SPA_GANGBLOCKSIZE);
+		    abd_get_from_buf(gn->gn_gbh, gn->gn_gangblocksize);
 		zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp,
-		    gbh_abd, SPA_GANGBLOCKSIZE, zio_gang_issue_func_done, NULL,
-		    pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
+		    gbh_abd, gn->gn_gangblocksize, zio_gang_issue_func_done,
+		    NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
 		    &pio->io_bookmark);
 		/*
 		 * As we rewrite each gang header, the pipeline will compute
@@ -2906,14 +2904,16 @@ static zio_gang_issue_func_t *zio_gang_issue_func[ZIO_TYPES] = {
 static void zio_gang_tree_assemble_done(zio_t *zio);
 
 static zio_gang_node_t *
-zio_gang_node_alloc(zio_gang_node_t **gnpp)
+zio_gang_node_alloc(zio_gang_node_t **gnpp, uint64_t gangblocksize)
 {
 	zio_gang_node_t *gn;
 
-	ASSERT(*gnpp == NULL);
+	ASSERT0P(*gnpp);
 
-	gn = kmem_zalloc(sizeof (*gn), KM_SLEEP);
-	gn->gn_gbh = zio_buf_alloc(SPA_GANGBLOCKSIZE);
+	gn = kmem_zalloc(sizeof (*gn) +
+	    (gbh_nblkptrs(gangblocksize) * sizeof (gn)), KM_SLEEP);
+	gn->gn_gangblocksize = gn->gn_allocsize = gangblocksize;
+	gn->gn_gbh = zio_buf_alloc(gangblocksize);
 	*gnpp = gn;
 
 	return (gn);
@@ -2924,11 +2924,12 @@ zio_gang_node_free(zio_gang_node_t **gnpp)
 {
 	zio_gang_node_t *gn = *gnpp;
 
-	for (int g = 0; g < SPA_GBH_NBLKPTRS; g++)
-		ASSERT(gn->gn_child[g] == NULL);
+	for (int g = 0; g < gbh_nblkptrs(gn->gn_allocsize); g++)
+		ASSERT0P(gn->gn_child[g]);
 
-	zio_buf_free(gn->gn_gbh, SPA_GANGBLOCKSIZE);
-	kmem_free(gn, sizeof (*gn));
+	zio_buf_free(gn->gn_gbh, gn->gn_allocsize);
+	kmem_free(gn, sizeof (*gn) +
+	    (gbh_nblkptrs(gn->gn_allocsize) * sizeof (gn)));
 	*gnpp = NULL;
 }
 
@@ -2940,7 +2941,7 @@ zio_gang_tree_free(zio_gang_node_t **gnpp)
 	if (gn == NULL)
 		return;
 
-	for (int g = 0; g < SPA_GBH_NBLKPTRS; g++)
+	for (int g = 0; g < gbh_nblkptrs(gn->gn_allocsize); g++)
 		zio_gang_tree_free(&gn->gn_child[g]);
 
 	zio_gang_node_free(gnpp);
@@ -2949,13 +2950,28 @@ zio_gang_tree_free(zio_gang_node_t **gnpp)
 static void
 zio_gang_tree_assemble(zio_t *gio, blkptr_t *bp, zio_gang_node_t **gnpp)
 {
-	zio_gang_node_t *gn = zio_gang_node_alloc(gnpp);
-	abd_t *gbh_abd = abd_get_from_buf(gn->gn_gbh, SPA_GANGBLOCKSIZE);
+	uint64_t gangblocksize = UINT64_MAX;
+	if (spa_feature_is_active(gio->io_spa,
+	    SPA_FEATURE_DYNAMIC_GANG_HEADER)) {
+		spa_config_enter(gio->io_spa, SCL_VDEV, FTAG, RW_READER);
+		for (int dva = 0; dva < BP_GET_NDVAS(bp); dva++) {
+			vdev_t *vd = vdev_lookup_top(gio->io_spa,
+			    DVA_GET_VDEV(&bp->blk_dva[dva]));
+			uint64_t psize = vdev_gang_header_psize(vd);
+			gangblocksize = MIN(gangblocksize, psize);
+		}
+		spa_config_exit(gio->io_spa, SCL_VDEV, FTAG);
+	} else {
+		gangblocksize = SPA_OLD_GANGBLOCKSIZE;
+	}
+	ASSERT3U(gangblocksize, !=, UINT64_MAX);
+	zio_gang_node_t *gn = zio_gang_node_alloc(gnpp, gangblocksize);
+	abd_t *gbh_abd = abd_get_from_buf(gn->gn_gbh, gangblocksize);
 
 	ASSERT(gio->io_gang_leader == gio);
 	ASSERT(BP_IS_GANG(bp));
 
-	zio_nowait(zio_read(gio, gio->io_spa, bp, gbh_abd, SPA_GANGBLOCKSIZE,
+	zio_nowait(zio_read(gio, gio->io_spa, bp, gbh_abd, gangblocksize,
 	    zio_gang_tree_assemble_done, gn, gio->io_priority,
 	    ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark));
 }
@@ -2978,13 +2994,17 @@ zio_gang_tree_assemble_done(zio_t *zio)
 		byteswap_uint64_array(abd_to_buf(zio->io_abd), zio->io_size);
 
 	ASSERT3P(abd_to_buf(zio->io_abd), ==, gn->gn_gbh);
-	ASSERT(zio->io_size == SPA_GANGBLOCKSIZE);
-	ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC);
+	/*
+	 * If this was an old-style gangblock, the gangblocksize should have
+	 * been updated in zio_checksum_error to reflect that.
+	 */
+	ASSERT3U(gbh_eck(gn->gn_gbh, gn->gn_gangblocksize)->zec_magic,
+	    ==, ZEC_MAGIC);
 
 	abd_free(zio->io_abd);
 
-	for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) {
-		blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g];
+	for (int g = 0; g < gbh_nblkptrs(gn->gn_gangblocksize); g++) {
+		blkptr_t *gbp = gbh_bp(gn->gn_gbh, g);
 		if (!BP_IS_GANG(gbp))
 			continue;
 		zio_gang_tree_assemble(gio, gbp, &gn->gn_child[g]);
@@ -3009,10 +3029,11 @@ zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, abd_t *data,
 	zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data, offset);
 
 	if (gn != NULL) {
-		ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC);
+		ASSERT3U(gbh_eck(gn->gn_gbh,
+		    gn->gn_gangblocksize)->zec_magic, ==, ZEC_MAGIC);
 
-		for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) {
-			blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g];
+		for (int g = 0; g < gbh_nblkptrs(gn->gn_gangblocksize); g++) {
+			blkptr_t *gbp = gbh_bp(gn->gn_gbh, g);
 			if (BP_IS_HOLE(gbp))
 				continue;
 			zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data,
@@ -3119,6 +3140,13 @@ zio_write_gang_done(zio_t *zio)
 		abd_free(zio->io_abd);
 }
 
+static void
+zio_update_feature(void *arg, dmu_tx_t *tx)
+{
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	spa_feature_incr(spa, (spa_feature_t)(uintptr_t)arg, tx);
+}
+
 static zio_t *
 zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
 {
@@ -3157,20 +3185,24 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
 
 	ASSERT(ZIO_HAS_ALLOCATOR(pio));
 	int flags = METASLAB_GANG_HEADER;
-	if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+	if (pio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
 		ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
 		ASSERT(has_data);
 
 		flags |= METASLAB_ASYNC_ALLOC;
 	}
 
-	error = metaslab_alloc(spa, mc, SPA_GANGBLOCKSIZE,
+	uint64_t gangblocksize = SPA_OLD_GANGBLOCKSIZE;
+	uint64_t candidate = gangblocksize;
+	error = metaslab_alloc_range(spa, mc, gangblocksize, gangblocksize,
 	    bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, flags,
-	    &pio->io_alloc_list, pio->io_allocator, pio);
+	    &pio->io_alloc_list, pio->io_allocator, pio, &candidate);
 	if (error) {
 		pio->io_error = error;
 		return (pio);
 	}
+	if (spa_feature_is_active(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER))
+		gangblocksize = candidate;
 
 	if (pio == gio) {
 		gnpp = &gio->io_gang_tree;
@@ -3179,23 +3211,24 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
 		ASSERT(pio->io_ready == zio_write_gang_member_ready);
 	}
 
-	gn = zio_gang_node_alloc(gnpp);
+	gn = zio_gang_node_alloc(gnpp, gangblocksize);
 	gbh = gn->gn_gbh;
-	memset(gbh, 0, SPA_GANGBLOCKSIZE);
-	gbh_abd = abd_get_from_buf(gbh, SPA_GANGBLOCKSIZE);
+	memset(gbh, 0, gangblocksize);
+	gbh_abd = abd_get_from_buf(gbh, gangblocksize);
 
 	/*
 	 * Create the gang header.
 	 */
-	zio = zio_rewrite(pio, spa, txg, bp, gbh_abd, SPA_GANGBLOCKSIZE,
+	zio = zio_rewrite(pio, spa, txg, bp, gbh_abd, gangblocksize,
 	    zio_write_gang_done, NULL, pio->io_priority,
 	    ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
 
 	zio_gang_inherit_allocator(pio, zio);
-	if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+	if (pio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
 		boolean_t more;
-		VERIFY(metaslab_class_throttle_reserve(mc, gbh_copies,
-		    zio, B_TRUE, &more));
+		VERIFY(metaslab_class_throttle_reserve(mc, zio->io_allocator,
+		    gbh_copies, zio->io_size, B_TRUE, &more));
+		zio->io_flags |= ZIO_FLAG_ALLOC_THROTTLED;
 	}
 
 	/*
@@ -3203,7 +3236,9 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
 	 * opportunistic allocations. If that fails to generate enough
 	 * space, we fall back to normal zio_write calls for nested gang.
 	 */
-	for (int g = 0; resid != 0; g++) {
+	int g;
+	boolean_t any_failed = B_FALSE;
+	for (g = 0; resid != 0; g++) {
 		flags &= METASLAB_ASYNC_ALLOC;
 		flags |= METASLAB_GANG_CHILD;
 		zp.zp_checksum = gio->io_prop.zp_checksum;
@@ -3224,9 +3259,9 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
 		memset(zp.zp_mac, 0, ZIO_DATA_MAC_LEN);
 
 		uint64_t min_size = zio_roundup_alloc_size(spa,
-		    resid / (SPA_GBH_NBLKPTRS - g));
+		    resid / (gbh_nblkptrs(gangblocksize) - g));
 		min_size = MIN(min_size, resid);
-		bp = &gbh->zg_blkptr[g];
+		bp = &((blkptr_t *)gbh)[g];
 
 		zio_alloc_list_t cio_list;
 		metaslab_trace_init(&cio_list);
@@ -3236,6 +3271,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
 		    flags, &cio_list, zio->io_allocator, NULL, &allocated_size);
 
 		boolean_t allocated = error == 0;
+		any_failed |= !allocated;
 
 		uint64_t psize = allocated ? MIN(resid, allocated_size) :
 		    min_size;
@@ -3268,6 +3304,29 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
 	}
 
 	/*
+	 * If we used more gang children than the old limit, we must already be
+	 * using the new headers. No need to update anything, just move on.
+	 *
+	 * Otherwise, we might be in a case where we need to turn on the new
+	 * feature, so we check that. We enable the new feature if we didn't
+	 * manage to fit everything into 3 gang children and we could have
+	 * written more than that.
+	 */
+	if (g > gbh_nblkptrs(SPA_OLD_GANGBLOCKSIZE)) {
+		ASSERT(spa_feature_is_active(spa,
+		    SPA_FEATURE_DYNAMIC_GANG_HEADER));
+	} else if (any_failed && candidate > SPA_OLD_GANGBLOCKSIZE &&
+	    spa_feature_is_enabled(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER) &&
+	    !spa_feature_is_active(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER)) {
+		dmu_tx_t *tx =
+		    dmu_tx_create_assigned(spa->spa_dsl_pool, txg + 1);
+		dsl_sync_task_nowait(spa->spa_dsl_pool,
+		    zio_update_feature,
+		    (void *)SPA_FEATURE_DYNAMIC_GANG_HEADER, tx);
+		dmu_tx_commit(tx);
+	}
+
+	/*
 	 * Set pio's pipeline to just wait for zio to finish.
 	 */
 	pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
@@ -3303,11 +3362,11 @@ zio_nop_write(zio_t *zio)
 	zio_prop_t *zp = &zio->io_prop;
 
 	ASSERT(BP_IS_HOLE(bp));
-	ASSERT(BP_GET_LEVEL(bp) == 0);
+	ASSERT0(BP_GET_LEVEL(bp));
 	ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
 	ASSERT(zp->zp_nopwrite);
 	ASSERT(!zp->zp_dedup);
-	ASSERT(zio->io_bp_override == NULL);
+	ASSERT0P(zio->io_bp_override);
 	ASSERT(IO_IS_ALLOCATING(zio));
 
 	/*
@@ -3436,7 +3495,7 @@ zio_ddt_read_start(zio_t *zio)
 		ddt_univ_phys_t *ddp = dde->dde_phys;
 		blkptr_t blk;
 
-		ASSERT(zio->io_vsd == NULL);
+		ASSERT0P(zio->io_vsd);
 		zio->io_vsd = dde;
 
 		if (v_self == DDT_PHYS_NONE)
@@ -3501,7 +3560,7 @@ zio_ddt_read_done(zio_t *zio)
 		zio->io_vsd = NULL;
 	}
 
-	ASSERT(zio->io_vsd == NULL);
+	ASSERT0P(zio->io_vsd);
 
 	return (zio);
 }
@@ -3836,7 +3895,7 @@ zio_ddt_write(zio_t *zio)
 			 * block and leave.
 			 */
 			if (have_dvas == 0) {
-				ASSERT(BP_GET_LOGICAL_BIRTH(bp) == txg);
+				ASSERT(BP_GET_BIRTH(bp) == txg);
 				ASSERT(BP_EQUAL(bp, zio->io_bp_override));
 				ddt_phys_extend(ddp, v, bp);
 				ddt_phys_addref(ddp, v);
@@ -3864,6 +3923,23 @@ zio_ddt_write(zio_t *zio)
 		 * then we can just use them as-is.
 		 */
 		if (have_dvas >= need_dvas) {
+			/*
+			 * For rewrite operations, try preserving the original
+			 * logical birth time.  If the result matches the
+			 * original BP, this becomes a NOP.
+			 */
+			if (zp->zp_rewrite) {
+				uint64_t orig_logical_birth =
+				    BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig);
+				ddt_bp_fill(ddp, v, bp, orig_logical_birth);
+				if (BP_EQUAL(bp, &zio->io_bp_orig)) {
+					/* We can skip accounting. */
+					zio->io_flags |= ZIO_FLAG_NOPWRITE;
+					ddt_exit(ddt);
+					return (zio);
+				}
+			}
+
 			ddt_bp_fill(ddp, v, bp, txg);
 			ddt_phys_addref(ddp, v);
 			ddt_exit(ddt);
@@ -4078,9 +4154,11 @@ zio_io_to_allocate(metaslab_class_allocator_t *mca, boolean_t *more)
 	 * reserve then we throttle.
 	 */
 	if (!metaslab_class_throttle_reserve(zio->io_metaslab_class,
-	    zio->io_prop.zp_copies, zio, B_FALSE, more)) {
+	    zio->io_allocator, zio->io_prop.zp_copies, zio->io_size,
+	    B_FALSE, more)) {
 		return (NULL);
 	}
+	zio->io_flags |= ZIO_FLAG_ALLOC_THROTTLED;
 
 	avl_remove(&mca->mca_tree, zio);
 	ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE);
@@ -4164,8 +4242,10 @@ zio_dva_allocate(zio_t *zio)
 		ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_GANG);
 		memcpy(zio->io_bp->blk_dva, zio->io_bp_orig.blk_dva,
 		    3 * sizeof (dva_t));
-		BP_SET_BIRTH(zio->io_bp, BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig),
-		    BP_GET_PHYSICAL_BIRTH(&zio->io_bp_orig));
+		BP_SET_LOGICAL_BIRTH(zio->io_bp,
+		    BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig));
+		BP_SET_PHYSICAL_BIRTH(zio->io_bp,
+		    BP_GET_RAW_PHYSICAL_BIRTH(&zio->io_bp_orig));
 		return (zio);
 	}
 
@@ -4236,13 +4316,14 @@ again:
 		 * If we are holding old class reservation, drop it.
 		 * Dispatch the next ZIO(s) there if some are waiting.
 		 */
-		if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+		if (zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
 			if (metaslab_class_throttle_unreserve(mc,
-			    zio->io_prop.zp_copies, zio)) {
+			    zio->io_allocator, zio->io_prop.zp_copies,
+			    zio->io_size)) {
 				zio_allocate_dispatch(zio->io_metaslab_class,
 				    zio->io_allocator);
 			}
-			zio->io_flags &= ~ZIO_FLAG_IO_ALLOCATING;
+			zio->io_flags &= ~ZIO_FLAG_ALLOC_THROTTLED;
 		}
 
 		if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC) {
@@ -4291,6 +4372,15 @@ again:
 			    error);
 		}
 		zio->io_error = error;
+	} else if (zio->io_prop.zp_rewrite) {
+		/*
+		 * For rewrite operations, preserve the logical birth time
+		 * but set the physical birth time to the current txg.
+		 */
+		uint64_t logical_birth = BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig);
+		ASSERT3U(logical_birth, <=, zio->io_txg);
+		BP_SET_BIRTH(zio->io_bp, logical_birth, zio->io_txg);
+		BP_SET_REWRITE(zio->io_bp, 1);
 	}
 
 	return (zio);
@@ -4324,18 +4414,17 @@ zio_dva_claim(zio_t *zio)
 static void
 zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
 {
-	ASSERT(BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg || BP_IS_HOLE(bp));
-	ASSERT(zio->io_bp_override == NULL);
+	ASSERT(BP_GET_BIRTH(bp) == zio->io_txg || BP_IS_HOLE(bp));
+	ASSERT0P(zio->io_bp_override);
 
 	if (!BP_IS_HOLE(bp)) {
-		metaslab_free(zio->io_spa, bp, BP_GET_LOGICAL_BIRTH(bp),
-		    B_TRUE);
+		metaslab_free(zio->io_spa, bp, BP_GET_BIRTH(bp), B_TRUE);
 	}
 
 	if (gn != NULL) {
-		for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) {
+		for (int g = 0; g < gbh_nblkptrs(gn->gn_gangblocksize); g++) {
 			zio_dva_unallocate(zio, gn->gn_child[g],
-			    &gn->gn_gbh->zg_blkptr[g]);
+			    gbh_bp(gn->gn_gbh, g));
 		}
 	}
 }
@@ -4347,7 +4436,7 @@ int
 zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
     uint64_t size, boolean_t *slog)
 {
-	int error = 1;
+	int error;
 	zio_alloc_list_t io_alloc_list;
 
 	ASSERT(txg > spa_syncing_txg(spa));
@@ -4372,14 +4461,34 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
 	int allocator = (uint_t)cityhash1(os->os_dsl_dataset->ds_object)
 	    % spa->spa_alloc_count;
 	ZIOSTAT_BUMP(ziostat_total_allocations);
+
+	/* Try log class (dedicated slog devices) first */
 	error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
 	    txg, NULL, flags, &io_alloc_list, allocator, NULL);
 	*slog = (error == 0);
+
+	/* Try special_embedded_log class (reserved on special vdevs) */
+	if (error != 0) {
+		error = metaslab_alloc(spa, spa_special_embedded_log_class(spa),
+		    size, new_bp, 1, txg, NULL, flags, &io_alloc_list,
+		    allocator, NULL);
+	}
+
+	/* Try special class (general special vdev allocation) */
+	if (error != 0) {
+		error = metaslab_alloc(spa, spa_special_class(spa), size,
+		    new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator,
+		    NULL);
+	}
+
+	/* Try embedded_log class (reserved on normal vdevs) */
 	if (error != 0) {
 		error = metaslab_alloc(spa, spa_embedded_log_class(spa), size,
 		    new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator,
 		    NULL);
 	}
+
+	/* Finally fall back to normal class */
 	if (error != 0) {
 		ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks);
 		error = metaslab_alloc(spa, spa_normal_class(spa), size,
@@ -4450,8 +4559,8 @@ zio_vdev_io_start(zio_t *zio)
 
 	zio->io_delay = 0;
 
-	ASSERT(zio->io_error == 0);
-	ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0);
+	ASSERT0(zio->io_error);
+	ASSERT0(zio->io_child_error[ZIO_CHILD_VDEV]);
 
 	if (vd == NULL) {
 		if (!(zio->io_flags & ZIO_FLAG_CONFIG_WRITER))
@@ -4642,7 +4751,7 @@ zio_vdev_io_done(zio_t *zio)
 	ops->vdev_op_io_done(zio);
 
 	if (unexpected_error && vd->vdev_remove_wanted == B_FALSE)
-		VERIFY(vdev_probe(vd, zio) == NULL);
+		VERIFY0P(vdev_probe(vd, zio));
 
 	return (zio);
 }
@@ -4722,7 +4831,7 @@ zio_vdev_io_assess(zio_t *zio)
 	 * If a Direct I/O operation has a checksum verify error then this I/O
 	 * should not attempt to be issued again.
 	 */
-	if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) {
+	if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR) {
 		if (zio->io_type == ZIO_TYPE_WRITE) {
 			ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL);
 			ASSERT3U(zio->io_error, ==, EIO);
@@ -4794,7 +4903,7 @@ void
 zio_vdev_io_reissue(zio_t *zio)
 {
 	ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START);
-	ASSERT(zio->io_error == 0);
+	ASSERT0(zio->io_error);
 
 	zio->io_stage >>= 1;
 }
@@ -4811,7 +4920,7 @@ void
 zio_vdev_io_bypass(zio_t *zio)
 {
 	ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START);
-	ASSERT(zio->io_error == 0);
+	ASSERT0(zio->io_error);
 
 	zio->io_flags |= ZIO_FLAG_IO_BYPASS;
 	zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1;
@@ -5031,7 +5140,7 @@ zio_checksum_verify(zio_t *zio)
 		ASSERT3U(zio->io_prop.zp_checksum, ==, ZIO_CHECKSUM_LABEL);
 	}
 
-	ASSERT0(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
+	ASSERT0(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR);
 	IMPLY(zio->io_flags & ZIO_FLAG_DIO_READ,
 	    !(zio->io_flags & ZIO_FLAG_SPECULATIVE));
 
@@ -5040,7 +5149,7 @@ zio_checksum_verify(zio_t *zio)
 		if (error == ECKSUM &&
 		    !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
 			if (zio->io_flags & ZIO_FLAG_DIO_READ) {
-				zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+				zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR;
 				zio_t *pio = zio_unique_parent(zio);
 				/*
 				 * Any Direct I/O read that has a checksum
@@ -5090,7 +5199,7 @@ zio_dio_checksum_verify(zio_t *zio)
 	if ((error = zio_checksum_error(zio, NULL)) != 0) {
 		zio->io_error = error;
 		if (error == ECKSUM) {
-			zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+			zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR;
 			zio_dio_chksum_verify_error_report(zio);
 		}
 	}
@@ -5115,7 +5224,7 @@ zio_checksum_verified(zio_t *zio)
 void
 zio_dio_chksum_verify_error_report(zio_t *zio)
 {
-	ASSERT(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
+	ASSERT(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR);
 
 	if (zio->io_child_type == ZIO_CHILD_LOGICAL)
 		return;
@@ -5187,9 +5296,9 @@ zio_ready(zio_t *zio)
 
 	if (zio->io_ready) {
 		ASSERT(IO_IS_ALLOCATING(zio));
-		ASSERT(BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg ||
+		ASSERT(BP_GET_BIRTH(bp) == zio->io_txg ||
 		    BP_IS_HOLE(bp) || (zio->io_flags & ZIO_FLAG_NOPWRITE));
-		ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0);
+		ASSERT0(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY]);
 
 		zio->io_ready(zio);
 	}
@@ -5202,7 +5311,7 @@ zio_ready(zio_t *zio)
 	if (zio->io_error != 0) {
 		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
 
-		if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+		if (zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
 			ASSERT(IO_IS_ALLOCATING(zio));
 			ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
 			ASSERT(zio->io_metaslab_class != NULL);
@@ -5213,8 +5322,8 @@ zio_ready(zio_t *zio)
 			 * issue the next I/O to allocate.
 			 */
 			if (metaslab_class_throttle_unreserve(
-			    zio->io_metaslab_class, zio->io_prop.zp_copies,
-			    zio)) {
+			    zio->io_metaslab_class, zio->io_allocator,
+			    zio->io_prop.zp_copies, zio->io_size)) {
 				zio_allocate_dispatch(zio->io_metaslab_class,
 				    zio->io_allocator);
 			}
@@ -5264,6 +5373,7 @@ zio_dva_throttle_done(zio_t *zio)
 	vdev_t *vd = zio->io_vd;
 	int flags = METASLAB_ASYNC_ALLOC;
 	const void *tag = pio;
+	uint64_t size = pio->io_size;
 
 	ASSERT3P(zio->io_bp, !=, NULL);
 	ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
@@ -5273,16 +5383,19 @@ zio_dva_throttle_done(zio_t *zio)
 	ASSERT3P(vd, ==, vd->vdev_top);
 	ASSERT(zio_injection_enabled || !(zio->io_flags & ZIO_FLAG_IO_RETRY));
 	ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
-	ASSERT(zio->io_flags & ZIO_FLAG_IO_ALLOCATING);
+	ASSERT(zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED);
 
 	/*
 	 * Parents of gang children can have two flavors -- ones that allocated
 	 * the gang header (will have ZIO_FLAG_IO_REWRITE set) and ones that
 	 * allocated the constituent blocks.  The first use their parent as tag.
+	 * We set the size to match the original allocation call for that case.
 	 */
 	if (pio->io_child_type == ZIO_CHILD_GANG &&
-	    (pio->io_flags & ZIO_FLAG_IO_REWRITE))
+	    (pio->io_flags & ZIO_FLAG_IO_REWRITE)) {
 		tag = zio_unique_parent(pio);
+		size = SPA_OLD_GANGBLOCKSIZE;
+	}
 
 	ASSERT(IO_IS_ALLOCATING(pio) || (pio->io_child_type == ZIO_CHILD_GANG &&
 	    (pio->io_flags & ZIO_FLAG_IO_REWRITE)));
@@ -5295,9 +5408,10 @@ zio_dva_throttle_done(zio_t *zio)
 	ASSERT(zio->io_metaslab_class->mc_alloc_throttle_enabled);
 
 	metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id,
-	    pio->io_allocator, flags, pio->io_size, tag);
+	    pio->io_allocator, flags, size, tag);
 
-	if (metaslab_class_throttle_unreserve(zio->io_metaslab_class, 1, pio)) {
+	if (metaslab_class_throttle_unreserve(pio->io_metaslab_class,
+	    pio->io_allocator, 1, pio->io_size)) {
 		zio_allocate_dispatch(zio->io_metaslab_class,
 		    pio->io_allocator);
 	}
@@ -5328,17 +5442,15 @@ zio_done(zio_t *zio)
 	 * write. We must do this since the allocation is performed
 	 * by the logical I/O but the actual write is done by child I/Os.
 	 */
-	if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING &&
+	if (zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED &&
 	    zio->io_child_type == ZIO_CHILD_VDEV)
 		zio_dva_throttle_done(zio);
 
 	for (int c = 0; c < ZIO_CHILD_TYPES; c++)
 		for (int w = 0; w < ZIO_WAIT_TYPES; w++)
-			ASSERT(zio->io_children[c][w] == 0);
+			ASSERT0(zio->io_children[c][w]);
 
 	if (zio->io_bp != NULL && !BP_IS_EMBEDDED(zio->io_bp)) {
-		ASSERT(zio->io_bp->blk_pad[0] == 0);
-		ASSERT(zio->io_bp->blk_pad[1] == 0);
 		ASSERT(memcmp(zio->io_bp, &zio->io_bp_copy,
 		    sizeof (blkptr_t)) == 0 ||
 		    (zio->io_bp == zio_unique_parent(zio)->io_bp));
@@ -5431,7 +5543,7 @@ zio_done(zio_t *zio)
 		 */
 		if (zio->io_error != ECKSUM && zio->io_vd != NULL &&
 		    !vdev_is_dead(zio->io_vd) &&
-		    !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) {
+		    !(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)) {
 			int ret = zfs_ereport_post(FM_EREPORT_ZFS_IO,
 			    zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0);
 			if (ret != EALREADY) {
@@ -5446,14 +5558,14 @@ zio_done(zio_t *zio)
 
 		if ((zio->io_error == EIO || !(zio->io_flags &
 		    (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) &&
-		    !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) &&
+		    !(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR) &&
 		    zio == zio->io_logical) {
 			/*
 			 * For logical I/O requests, tell the SPA to log the
 			 * error and generate a logical data ereport.
 			 */
 			spa_log_error(zio->io_spa, &zio->io_bookmark,
-			    BP_GET_LOGICAL_BIRTH(zio->io_bp));
+			    BP_GET_PHYSICAL_BIRTH(zio->io_bp));
 			(void) zfs_ereport_post(FM_EREPORT_ZFS_DATA,
 			    zio->io_spa, NULL, &zio->io_bookmark, zio, 0);
 		}
@@ -5467,7 +5579,7 @@ zio_done(zio_t *zio)
 		 */
 		if (zio->io_error == EAGAIN && IO_IS_ALLOCATING(zio) &&
 		    zio->io_prop.zp_dedup) {
-			zio->io_reexecute |= ZIO_REEXECUTE_NOW;
+			zio->io_post |= ZIO_POST_REEXECUTE;
 			zio->io_prop.zp_dedup = B_FALSE;
 		}
 		/*
@@ -5479,11 +5591,11 @@ zio_done(zio_t *zio)
 
 		if (IO_IS_ALLOCATING(zio) &&
 		    !(zio->io_flags & ZIO_FLAG_CANFAIL) &&
-		    !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) {
+		    !(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)) {
 			if (zio->io_error != ENOSPC)
-				zio->io_reexecute |= ZIO_REEXECUTE_NOW;
+				zio->io_post |= ZIO_POST_REEXECUTE;
 			else
-				zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;
+				zio->io_post |= ZIO_POST_SUSPEND;
 		}
 
 		if ((zio->io_type == ZIO_TYPE_READ ||
@@ -5492,10 +5604,11 @@ zio_done(zio_t *zio)
 		    zio->io_error == ENXIO &&
 		    spa_load_state(zio->io_spa) == SPA_LOAD_NONE &&
 		    spa_get_failmode(zio->io_spa) != ZIO_FAILURE_MODE_CONTINUE)
-			zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;
+			zio->io_post |= ZIO_POST_SUSPEND;
 
-		if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && !zio->io_reexecute)
-			zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;
+		if (!(zio->io_flags & ZIO_FLAG_CANFAIL) &&
+		    !(zio->io_post & (ZIO_POST_REEXECUTE|ZIO_POST_SUSPEND)))
+			zio->io_post |= ZIO_POST_SUSPEND;
 
 		/*
 		 * Here is a possibly good place to attempt to do
@@ -5514,7 +5627,8 @@ zio_done(zio_t *zio)
 	 */
 	zio_inherit_child_errors(zio, ZIO_CHILD_LOGICAL);
 
-	if ((zio->io_error || zio->io_reexecute) &&
+	if ((zio->io_error ||
+	    (zio->io_post & (ZIO_POST_REEXECUTE|ZIO_POST_SUSPEND))) &&
 	    IO_IS_ALLOCATING(zio) && zio->io_gang_leader == zio &&
 	    !(zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)))
 		zio_dva_unallocate(zio, zio->io_gang_tree, zio->io_bp);
@@ -5525,16 +5639,16 @@ zio_done(zio_t *zio)
 	 * Godfather I/Os should never suspend.
 	 */
 	if ((zio->io_flags & ZIO_FLAG_GODFATHER) &&
-	    (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND))
-		zio->io_reexecute &= ~ZIO_REEXECUTE_SUSPEND;
+	    (zio->io_post & ZIO_POST_SUSPEND))
+		zio->io_post &= ~ZIO_POST_SUSPEND;
 
-	if (zio->io_reexecute) {
+	if (zio->io_post & (ZIO_POST_REEXECUTE|ZIO_POST_SUSPEND)) {
 		/*
 		 * A Direct I/O operation that has a checksum verify error
 		 * should not attempt to reexecute. Instead, the error should
 		 * just be propagated back.
 		 */
-		ASSERT(!(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR));
+		ASSERT0(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR);
 
 		/*
 		 * This is a logical I/O that wants to reexecute.
@@ -5571,7 +5685,7 @@ zio_done(zio_t *zio)
 			pio_next = zio_walk_parents(zio, &zl);
 
 			if ((pio->io_flags & ZIO_FLAG_GODFATHER) &&
-			    (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) {
+			    (zio->io_post & ZIO_POST_SUSPEND)) {
 				zio_remove_child(pio, zio, remove_zl);
 				/*
 				 * This is a rare code path, so we don't
@@ -5595,13 +5709,14 @@ zio_done(zio_t *zio)
 			 * "next_to_execute".
 			 */
 			zio_notify_parent(pio, zio, ZIO_WAIT_DONE, NULL);
-		} else if (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND) {
+		} else if (zio->io_post & ZIO_POST_SUSPEND) {
 			/*
 			 * We'd fail again if we reexecuted now, so suspend
 			 * until conditions improve (e.g. device comes online).
 			 */
 			zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR);
 		} else {
+			ASSERT(zio->io_post & ZIO_POST_REEXECUTE);
 			/*
 			 * Reexecution is potentially a huge amount of work.
 			 * Hand it off to the otherwise-unused claim taskq.
@@ -5614,7 +5729,8 @@ zio_done(zio_t *zio)
 	}
 
 	ASSERT(list_is_empty(&zio->io_child_list));
-	ASSERT(zio->io_reexecute == 0);
+	ASSERT0(zio->io_post & ZIO_POST_REEXECUTE);
+	ASSERT0(zio->io_post & ZIO_POST_SUSPEND);
 	ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL));
 
 	/*
diff --git a/sys/contrib/openzfs/module/zfs/zio_checksum.c b/sys/contrib/openzfs/module/zfs/zio_checksum.c
index a91775b04af2..1d0646a61185 100644
--- a/sys/contrib/openzfs/module/zfs/zio_checksum.c
+++ b/sys/contrib/openzfs/module/zfs/zio_checksum.c
@@ -215,7 +215,7 @@ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
 spa_feature_t
 zio_checksum_to_feature(enum zio_checksum cksum)
 {
-	VERIFY((cksum & ~ZIO_CHECKSUM_MASK) == 0);
+	VERIFY0((cksum & ~ZIO_CHECKSUM_MASK));
 
 	switch (cksum) {
 	case ZIO_CHECKSUM_BLAKE3:
@@ -279,7 +279,7 @@ static void
 zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp)
 {
 	const dva_t *dva = BP_IDENTITY(bp);
-	uint64_t txg = BP_GET_BIRTH(bp);
+	uint64_t txg = BP_GET_PHYSICAL_BIRTH(bp);
 
 	ASSERT(BP_IS_GANG(bp));
 
@@ -545,14 +545,39 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
 	uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum :
 	    (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
 	int error;
-	uint64_t size = (bp == NULL ? zio->io_size :
-	    (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp)));
+	uint64_t size = bp ? BP_GET_PSIZE(bp) : zio->io_size;
 	uint64_t offset = zio->io_offset;
 	abd_t *data = zio->io_abd;
 	spa_t *spa = zio->io_spa;
 
+	if (bp && BP_IS_GANG(bp)) {
+		if (spa_feature_is_active(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER))
+			size = zio->io_size;
+		else
+			size = SPA_OLD_GANGBLOCKSIZE;
+	}
+
 	error = zio_checksum_error_impl(spa, bp, checksum, data, size,
 	    offset, info);
+	if (error && bp && BP_IS_GANG(bp) && size > SPA_OLD_GANGBLOCKSIZE) {
+		/*
+		 * It's possible that this is an old gang block. Rerun
+		 * the checksum with the old size; if that passes, then
+		 * update the gangblocksize appropriately.
+		 */
+		error = zio_checksum_error_impl(spa, bp, checksum, data,
+		    SPA_OLD_GANGBLOCKSIZE, offset, info);
+		if (error == 0) {
+			ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);
+			zio_t *pio;
+			for (pio = zio_unique_parent(zio);
+			    pio->io_child_type != ZIO_CHILD_GANG;
+			    pio = zio_unique_parent(pio))
+				;
+			zio_gang_node_t *gn = pio->io_private;
+			gn->gn_gangblocksize = SPA_OLD_GANGBLOCKSIZE;
+		}
+	}
 
 	if (zio_injection_enabled && error == 0 && zio->io_error == 0) {
 		error = zio_handle_fault_injection(zio, ECKSUM);
diff --git a/sys/contrib/openzfs/module/zfs/zio_compress.c b/sys/contrib/openzfs/module/zfs/zio_compress.c
index 9f0ac1b63146..89ceeb58ad91 100644
--- a/sys/contrib/openzfs/module/zfs/zio_compress.c
+++ b/sys/contrib/openzfs/module/zfs/zio_compress.c
@@ -38,12 +38,6 @@
 #include <sys/zstd/zstd.h>
 
 /*
- * If nonzero, every 1/X decompression attempts will fail, simulating
- * an undetected memory error.
- */
-static unsigned long zio_decompress_fail_fraction = 0;
-
-/*
  * Compression vectors.
  */
 zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
@@ -171,15 +165,6 @@ zio_decompress_data(enum zio_compress c, abd_t *src, abd_t *dst,
 	else
 		err = ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level);
 
-	/*
-	 * Decompression shouldn't fail, because we've already verified
-	 * the checksum.  However, for extra protection (e.g. against bitflips
-	 * in non-ECC RAM), we handle this error (and test it).
-	 */
-	if (zio_decompress_fail_fraction != 0 &&
-	    random_in_range(zio_decompress_fail_fraction) == 0)
-		err = SET_ERROR(EINVAL);
-
 	return (err);
 }
 
diff --git a/sys/contrib/openzfs/module/zfs/zio_inject.c b/sys/contrib/openzfs/module/zfs/zio_inject.c
index df7b01ba879e..981a1be4847c 100644
--- a/sys/contrib/openzfs/module/zfs/zio_inject.c
+++ b/sys/contrib/openzfs/module/zfs/zio_inject.c
@@ -1119,7 +1119,7 @@ zio_clear_fault(int id)
 		kmem_free(handler->zi_lanes, sizeof (*handler->zi_lanes) *
 		    handler->zi_record.zi_nlanes);
 	} else {
-		ASSERT3P(handler->zi_lanes, ==, NULL);
+		ASSERT0P(handler->zi_lanes);
 	}
 
 	if (handler->zi_spa_name != NULL)
diff --git a/sys/contrib/openzfs/module/zfs/zrlock.c b/sys/contrib/openzfs/module/zfs/zrlock.c
index 3c0f1b7bbbc1..09c110945c97 100644
--- a/sys/contrib/openzfs/module/zfs/zrlock.c
+++ b/sys/contrib/openzfs/module/zfs/zrlock.c
@@ -129,7 +129,7 @@ zrl_tryenter(zrlock_t *zrl)
 		    (uint32_t *)&zrl->zr_refcount, 0, ZRL_LOCKED);
 		if (cas == 0) {
 #ifdef	ZFS_DEBUG
-			ASSERT3P(zrl->zr_owner, ==, NULL);
+			ASSERT0P(zrl->zr_owner);
 			zrl->zr_owner = curthread;
 #endif
 			return (1);
diff --git a/sys/contrib/openzfs/module/zfs/zthr.c b/sys/contrib/openzfs/module/zfs/zthr.c
index 597a510528ea..d245ce4946e0 100644
--- a/sys/contrib/openzfs/module/zfs/zthr.c
+++ b/sys/contrib/openzfs/module/zfs/zthr.c
@@ -316,7 +316,7 @@ zthr_destroy(zthr_t *t)
 {
 	ASSERT(!MUTEX_HELD(&t->zthr_state_lock));
 	ASSERT(!MUTEX_HELD(&t->zthr_request_lock));
-	VERIFY3P(t->zthr_thread, ==, NULL);
+	VERIFY0P(t->zthr_thread);
 	mutex_destroy(&t->zthr_request_lock);
 	mutex_destroy(&t->zthr_state_lock);
 	cv_destroy(&t->zthr_cv);
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index 3568d4f43fcb..29f51e230a37 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -102,6 +102,7 @@ extern int zfs_bclone_wait_dirty;
 zv_taskq_t zvol_taskqs;
 
 typedef enum {
+	ZVOL_ASYNC_CREATE_MINORS,
 	ZVOL_ASYNC_REMOVE_MINORS,
 	ZVOL_ASYNC_RENAME_MINORS,
 	ZVOL_ASYNC_SET_SNAPDEV,
@@ -110,10 +111,14 @@ typedef enum {
 } zvol_async_op_t;
 
 typedef struct {
-	zvol_async_op_t op;
-	char name1[MAXNAMELEN];
-	char name2[MAXNAMELEN];
-	uint64_t value;
+	zvol_async_op_t zt_op;
+	char zt_name1[MAXNAMELEN];
+	char zt_name2[MAXNAMELEN];
+	uint64_t zt_value;
+	uint32_t zt_total;
+	uint32_t zt_done;
+	int32_t zt_status;
+	int zt_error;
 } zvol_task_t;
 
 zv_request_task_t *
@@ -210,8 +215,8 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 	int error;
 	uint64_t volblocksize, volsize;
 
-	VERIFY(nvlist_lookup_uint64(nvprops,
-	    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
+	VERIFY0(nvlist_lookup_uint64(nvprops,
+	    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize));
 	if (nvlist_lookup_uint64(nvprops,
 	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
 		volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
@@ -220,21 +225,20 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 	 * These properties must be removed from the list so the generic
 	 * property setting step won't apply to them.
 	 */
-	VERIFY(nvlist_remove_all(nvprops,
-	    zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
+	VERIFY0(nvlist_remove_all(nvprops, zfs_prop_to_name(ZFS_PROP_VOLSIZE)));
 	(void) nvlist_remove_all(nvprops,
 	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
 
 	error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
 	    DMU_OT_NONE, 0, tx);
-	ASSERT(error == 0);
+	ASSERT0(error);
 
 	error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
 	    DMU_OT_NONE, 0, tx);
-	ASSERT(error == 0);
+	ASSERT0(error);
 
 	error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
-	ASSERT(error == 0);
+	ASSERT0(error);
 }
 
 /*
@@ -249,7 +253,7 @@ zvol_get_stats(objset_t *os, nvlist_t *nv)
 
 	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
 	if (error)
-		return (SET_ERROR(error));
+		return (error);
 
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
 	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
@@ -262,7 +266,7 @@ zvol_get_stats(objset_t *os, nvlist_t *nv)
 
 	kmem_free(doi, sizeof (dmu_object_info_t));
 
-	return (SET_ERROR(error));
+	return (error);
 }
 
 /*
@@ -300,7 +304,7 @@ zvol_update_volsize(uint64_t volsize, objset_t *os)
 	error = dmu_tx_assign(tx, DMU_TX_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
-		return (SET_ERROR(error));
+		return (error);
 	}
 	txg = dmu_tx_get_txg(tx);
 
@@ -332,7 +336,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
 	error = dsl_prop_get_integer(name,
 	    zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL);
 	if (error != 0)
-		return (SET_ERROR(error));
+		return (error);
 	if (readonly)
 		return (SET_ERROR(EROFS));
 
@@ -348,7 +352,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
 		    FTAG, &os)) != 0) {
 			if (zv != NULL)
 				mutex_exit(&zv->zv_state_lock);
-			return (SET_ERROR(error));
+			return (error);
 		}
 		owned = B_TRUE;
 		if (zv != NULL)
@@ -385,7 +389,7 @@ out:
 	if (error == 0 && zv != NULL)
 		zvol_os_update_volsize(zv, volsize);
 
-	return (SET_ERROR(error));
+	return (error);
 }
 
 /*
@@ -396,7 +400,7 @@ zvol_set_volthreading(const char *name, boolean_t value)
 {
 	zvol_state_t *zv = zvol_find_by_name(name, RW_NONE);
 	if (zv == NULL)
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	zv->zv_threading = value;
 	mutex_exit(&zv->zv_state_lock);
 	return (0);
@@ -445,8 +449,10 @@ zvol_check_volblocksize(const char *name, uint64_t volblocksize)
 		 * We don't allow setting the property above 1MB,
 		 * unless the tunable has been changed.
 		 */
-		if (volblocksize > zfs_max_recordsize)
+		if (volblocksize > zfs_max_recordsize) {
+			spa_close(spa, FTAG);
 			return (SET_ERROR(EDOM));
+		}
 
 		spa_close(spa, FTAG);
 	}
@@ -613,7 +619,7 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
 	dmu_tx_t *tx;
 	blkptr_t *bps;
 	size_t maxblocks;
-	int error = EINVAL;
+	int error = 0;
 
 	rw_enter(&zv_dst->zv_suspend_lock, RW_READER);
 	if (zv_dst->zv_zilog == NULL) {
@@ -639,23 +645,22 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
 	 */
 	if (!spa_feature_is_enabled(dmu_objset_spa(outos),
 	    SPA_FEATURE_BLOCK_CLONING)) {
-		error = EOPNOTSUPP;
+		error = SET_ERROR(EOPNOTSUPP);
 		goto out;
 	}
 	if (dmu_objset_spa(inos) != dmu_objset_spa(outos)) {
-		error = EXDEV;
+		error = SET_ERROR(EXDEV);
 		goto out;
 	}
 	if (inos->os_encrypted != outos->os_encrypted) {
-		error = EXDEV;
+		error = SET_ERROR(EXDEV);
 		goto out;
 	}
 	if (zv_src->zv_volblocksize != zv_dst->zv_volblocksize) {
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 	if (inoff >= zv_src->zv_volsize || outoff >= zv_dst->zv_volsize) {
-		error = 0;
 		goto out;
 	}
 
@@ -666,17 +671,15 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
 		len = zv_src->zv_volsize - inoff;
 	if (len > zv_dst->zv_volsize - outoff)
 		len = zv_dst->zv_volsize - outoff;
-	if (len == 0) {
-		error = 0;
+	if (len == 0)
 		goto out;
-	}
 
 	/*
 	 * No overlapping if we are cloning within the same file
 	 */
 	if (zv_src == zv_dst) {
 		if (inoff < outoff + len && outoff < inoff + len) {
-			error = EINVAL;
+			error = SET_ERROR(EINVAL);
 			goto out;
 		}
 	}
@@ -686,7 +689,7 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
 	 */
 	if ((inoff % zv_src->zv_volblocksize) != 0 ||
 	    (outoff % zv_dst->zv_volblocksize) != 0) {
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
@@ -694,7 +697,7 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
 	 * Length must be multiple of block size
 	 */
 	if ((len % zv_src->zv_volblocksize) != 0) {
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
@@ -766,13 +769,13 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
 	zfs_rangelock_exit(outlr);
 	zfs_rangelock_exit(inlr);
 	if (error == 0 && zv_dst->zv_objset->os_sync == ZFS_SYNC_ALWAYS) {
-		zil_commit(zilog_dst, ZVOL_OBJ);
+		error = zil_commit(zilog_dst, ZVOL_OBJ);
 	}
 out:
 	if (zv_src != zv_dst)
 		rw_exit(&zv_src->zv_suspend_lock);
 	rw_exit(&zv_dst->zv_suspend_lock);
-	return (SET_ERROR(error));
+	return (error);
 }
 
 /*
@@ -859,13 +862,8 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = {
 };
 
 /*
- * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
- *
- * We store data in the log buffers if it's small enough.
- * Otherwise we will later flush the data out via dmu_sync().
+ * zvol_log_write() handles TX_WRITE transactions.
  */
-static const ssize_t zvol_immediate_write_sz = 32768;
-
 void
 zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
     uint64_t size, boolean_t commit)
@@ -878,15 +876,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
 	if (zil_replaying(zilog, tx))
 		return;
 
-	if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
-		write_state = WR_INDIRECT;
-	else if (!spa_has_slogs(zilog->zl_spa) &&
-	    size >= blocksize && blocksize > zvol_immediate_write_sz)
-		write_state = WR_INDIRECT;
-	else if (commit)
-		write_state = WR_COPIED;
-	else
-		write_state = WR_NEED_COPY;
+	write_state = zil_write_state(zilog, size, blocksize, B_FALSE, commit);
 
 	while (size) {
 		itx_t *itx;
@@ -905,7 +895,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
 		if (wr_state == WR_COPIED &&
 		    dmu_read_by_dnode(zv->zv_dn, offset, len, lr + 1,
 		    DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING) != 0) {
-			zil_itx_destroy(itx);
+			zil_itx_destroy(itx, 0);
 			itx = zil_itx_create(TX_WRITE, sizeof (*lr));
 			lr = (lr_write_t *)&itx->itx_lr;
 			wr_state = WR_NEED_COPY;
@@ -924,7 +914,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
 
 		itx->itx_private = zv;
 
-		(void) zil_itx_assign(zilog, itx, tx);
+		zil_itx_assign(zilog, itx, tx);
 
 		offset += len;
 		size -= len;
@@ -1034,7 +1024,7 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
 
 	zvol_get_done(zgd, error);
 
-	return (SET_ERROR(error));
+	return (error);
 }
 
 /*
@@ -1079,15 +1069,15 @@ zvol_setup_zv(zvol_state_t *zv)
 
 	error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
 	if (error)
-		return (SET_ERROR(error));
+		return (error);
 
 	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
 	if (error)
-		return (SET_ERROR(error));
+		return (error);
 
 	error = dnode_hold(os, ZVOL_OBJ, zv, &zv->zv_dn);
 	if (error)
-		return (SET_ERROR(error));
+		return (error);
 
 	zvol_os_set_capacity(zv, volsize >> 9);
 	zv->zv_volsize = volsize;
@@ -1129,7 +1119,7 @@ zvol_shutdown_zv(zvol_state_t *zv)
 	 */
 	if (zv->zv_flags & ZVOL_WRITTEN_TO)
 		txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
-	(void) dmu_objset_evict_dbufs(zv->zv_objset);
+	dmu_objset_evict_dbufs(zv->zv_objset);
 }
 
 /*
@@ -1206,7 +1196,7 @@ zvol_resume(zvol_state_t *zv)
 	if (zv->zv_flags & ZVOL_REMOVING)
 		cv_broadcast(&zv->zv_removing_cv);
 
-	return (SET_ERROR(error));
+	return (error);
 }
 
 int
@@ -1222,7 +1212,7 @@ zvol_first_open(zvol_state_t *zv, boolean_t readonly)
 	boolean_t ro = (readonly || (strchr(zv->zv_name, '@') != NULL));
 	error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, ro, B_TRUE, zv, &os);
 	if (error)
-		return (SET_ERROR(error));
+		return (error);
 
 	zv->zv_objset = os;
 
@@ -1434,6 +1424,48 @@ zvol_create_minors_cb(const char *dsname, void *arg)
 	return (0);
 }
 
+static void
+zvol_task_update_status(zvol_task_t *task, uint64_t total, uint64_t done,
+    int error)
+{
+
+	task->zt_total += total;
+	task->zt_done += done;
+	if (task->zt_total != task->zt_done) {
+		task->zt_status = -1;
+		if (error)
+			task->zt_error = error;
+	}
+}
+
+static void
+zvol_task_report_status(zvol_task_t *task)
+{
+#ifdef ZFS_DEBUG
+	static const char *const msg[] = {
+		"create",
+		"remove",
+		"rename",
+		"set snapdev",
+		"set volmode",
+		"unknown",
+	};
+
+	if (task->zt_status == 0)
+		return;
+
+	zvol_async_op_t op = MIN(task->zt_op, ZVOL_ASYNC_MAX);
+	if (task->zt_error) {
+		dprintf("The %s minors zvol task was not ok, last error %d\n",
+		    msg[op], task->zt_error);
+	} else {
+		dprintf("The %s minors zvol task was not ok\n", msg[op]);
+	}
+#else
+	(void) task;
+#endif
+}
+
 /*
  * Create minors for the specified dataset, including children and snapshots.
  * Pay attention to the 'snapdev' property and iterate over the snapshots
@@ -1451,14 +1483,27 @@ zvol_create_minors_cb(const char *dsname, void *arg)
  * 'visible' (which also verifies that the parent is a zvol), and if so,
  * a minor node for that snapshot is created.
  */
-void
-zvol_create_minors_recursive(const char *name)
+static void
+zvol_create_minors_impl(zvol_task_t *task)
 {
+	const char *name = task->zt_name1;
 	list_t minors_list;
 	minors_job_t *job;
+	uint64_t snapdev;
+	int total = 0, done = 0, last_error, error;
 
-	if (zvol_inhibit_dev)
+	/*
+	 * Note: the dsl_pool_config_lock must not be held.
+	 * Minor node creation needs to obtain the zvol_state_lock.
+	 * zvol_open() obtains the zvol_state_lock and then the dsl pool
+	 * config lock.  Therefore, we can't have the config lock now if
+	 * we are going to wait for the zvol_state_lock, because it
+	 * would be a lock order inversion which could lead to deadlock.
+	 */
+
+	if (zvol_inhibit_dev) {
 		return;
+	}
 
 	/*
 	 * This is the list for prefetch jobs. Whenever we found a match
@@ -1474,13 +1519,16 @@ zvol_create_minors_recursive(const char *name)
 
 
 	if (strchr(name, '@') != NULL) {
-		uint64_t snapdev;
-
-		int error = dsl_prop_get_integer(name, "snapdev",
-		    &snapdev, NULL);
-
-		if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE)
-			(void) zvol_os_create_minor(name);
+		error = dsl_prop_get_integer(name, "snapdev", &snapdev, NULL);
+		if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE) {
+			error = zvol_os_create_minor(name);
+			if (error == 0) {
+				done++;
+			} else {
+				last_error = error;
+			}
+			total++;
+		}
 	} else {
 		fstrans_cookie_t cookie = spl_fstrans_mark();
 		(void) dmu_objset_find(name, zvol_create_minors_cb,
@@ -1495,41 +1543,30 @@ zvol_create_minors_recursive(const char *name)
 	 * sequentially.
 	 */
 	while ((job = list_remove_head(&minors_list)) != NULL) {
-		if (!job->error)
-			(void) zvol_os_create_minor(job->name);
+		if (!job->error) {
+			error = zvol_os_create_minor(job->name);
+			if (error == 0) {
+				done++;
+			} else {
+				last_error = error;
+			}
+		} else if (job->error == EINVAL) {
+			/*
+			 * The objset, with the name requested by current job
+			 * exist, but have the type different from zvol.
+			 * Just ignore this sort of errors.
+			 */
+			done++;
+		} else {
+			last_error = job->error;
+		}
+		total++;
 		kmem_strfree(job->name);
 		kmem_free(job, sizeof (minors_job_t));
 	}
 
 	list_destroy(&minors_list);
-}
-
-void
-zvol_create_minor(const char *name)
-{
-	/*
-	 * Note: the dsl_pool_config_lock must not be held.
-	 * Minor node creation needs to obtain the zvol_state_lock.
-	 * zvol_open() obtains the zvol_state_lock and then the dsl pool
-	 * config lock.  Therefore, we can't have the config lock now if
-	 * we are going to wait for the zvol_state_lock, because it
-	 * would be a lock order inversion which could lead to deadlock.
-	 */
-
-	if (zvol_inhibit_dev)
-		return;
-
-	if (strchr(name, '@') != NULL) {
-		uint64_t snapdev;
-
-		int error = dsl_prop_get_integer(name,
-		    "snapdev", &snapdev, NULL);
-
-		if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE)
-			(void) zvol_os_create_minor(name);
-	} else {
-		(void) zvol_os_create_minor(name);
-	}
+	zvol_task_update_status(task, total, done, last_error);
 }
 
 /*
@@ -1577,10 +1614,11 @@ zvol_free_task(void *arg)
 	zvol_os_free(arg);
 }
 
-void
-zvol_remove_minors_impl(const char *name)
+static void
+zvol_remove_minors_impl(zvol_task_t *task)
 {
 	zvol_state_t *zv, *zv_next;
+	const char *name = task ? task->zt_name1 : NULL;
 	int namelen = ((name) ? strlen(name) : 0);
 	taskqid_t t;
 	list_t delay_list, free_list;
@@ -1662,13 +1700,13 @@ zvol_remove_minors_impl(const char *name)
 }
 
 /* Remove minor for this specific volume only */
-static void
+static int
 zvol_remove_minor_impl(const char *name)
 {
 	zvol_state_t *zv = NULL, *zv_next;
 
 	if (zvol_inhibit_dev)
-		return;
+		return (0);
 
 	rw_enter(&zvol_state_lock, RW_WRITER);
 
@@ -1684,7 +1722,7 @@ zvol_remove_minor_impl(const char *name)
 
 	if (zv == NULL) {
 		rw_exit(&zvol_state_lock);
-		return;
+		return (SET_ERROR(ENOENT));
 	}
 
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
@@ -1698,7 +1736,7 @@ zvol_remove_minor_impl(const char *name)
 		mutex_exit(&zv->zv_state_lock);
 		rw_exit(&zvol_state_lock);
 		zvol_remove_minor_task(zv);
-		return;
+		return (0);
 	}
 
 	zvol_remove(zv);
@@ -1708,16 +1746,20 @@ zvol_remove_minor_impl(const char *name)
 	rw_exit(&zvol_state_lock);
 
 	zvol_os_free(zv);
+
+	return (0);
 }
 
 /*
  * Rename minors for specified dataset including children and snapshots.
  */
 static void
-zvol_rename_minors_impl(const char *oldname, const char *newname)
+zvol_rename_minors_impl(zvol_task_t *task)
 {
 	zvol_state_t *zv, *zv_next;
-	int oldnamelen;
+	const char *oldname = task->zt_name1;
+	const char *newname = task->zt_name2;
+	int total = 0, done = 0, last_error, error, oldnamelen;
 
 	if (zvol_inhibit_dev)
 		return;
@@ -1732,24 +1774,31 @@ zvol_rename_minors_impl(const char *oldname, const char *newname)
 		mutex_enter(&zv->zv_state_lock);
 
 		if (strcmp(zv->zv_name, oldname) == 0) {
-			zvol_os_rename_minor(zv, newname);
+			error = zvol_os_rename_minor(zv, newname);
 		} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
 		    (zv->zv_name[oldnamelen] == '/' ||
 		    zv->zv_name[oldnamelen] == '@')) {
 			char *name = kmem_asprintf("%s%c%s", newname,
 			    zv->zv_name[oldnamelen],
 			    zv->zv_name + oldnamelen + 1);
-			zvol_os_rename_minor(zv, name);
+			error = zvol_os_rename_minor(zv, name);
 			kmem_strfree(name);
 		}
-
+		if (error) {
+			last_error = error;
+		} else {
+			done++;
+		}
+		total++;
 		mutex_exit(&zv->zv_state_lock);
 	}
 
 	rw_exit(&zvol_state_lock);
+	zvol_task_update_status(task, total, done, last_error);
 }
 
 typedef struct zvol_snapdev_cb_arg {
+	zvol_task_t *task;
 	uint64_t snapdev;
 } zvol_snapdev_cb_arg_t;
 
@@ -1757,26 +1806,31 @@ static int
 zvol_set_snapdev_cb(const char *dsname, void *param)
 {
 	zvol_snapdev_cb_arg_t *arg = param;
+	int error = 0;
 
 	if (strchr(dsname, '@') == NULL)
 		return (0);
 
 	switch (arg->snapdev) {
 		case ZFS_SNAPDEV_VISIBLE:
-			(void) zvol_os_create_minor(dsname);
+			error = zvol_os_create_minor(dsname);
 			break;
 		case ZFS_SNAPDEV_HIDDEN:
-			(void) zvol_remove_minor_impl(dsname);
+			error = zvol_remove_minor_impl(dsname);
 			break;
 	}
 
+	zvol_task_update_status(arg->task, 1, error == 0, error);
 	return (0);
 }
 
 static void
-zvol_set_snapdev_impl(char *name, uint64_t snapdev)
+zvol_set_snapdev_impl(zvol_task_t *task)
 {
-	zvol_snapdev_cb_arg_t arg = {snapdev};
+	const char *name = task->zt_name1;
+	uint64_t snapdev = task->zt_value;
+
+	zvol_snapdev_cb_arg_t arg = {task, snapdev};
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 	/*
 	 * The zvol_set_snapdev_sync() sets snapdev appropriately
@@ -1787,11 +1841,14 @@ zvol_set_snapdev_impl(char *name, uint64_t snapdev)
 }
 
 static void
-zvol_set_volmode_impl(char *name, uint64_t volmode)
+zvol_set_volmode_impl(zvol_task_t *task)
 {
+	const char *name = task->zt_name1;
+	uint64_t volmode = task->zt_value;
 	fstrans_cookie_t cookie;
 	uint64_t old_volmode;
 	zvol_state_t *zv;
+	int error;
 
 	if (strchr(name, '@') != NULL)
 		return;
@@ -1804,7 +1861,7 @@ zvol_set_volmode_impl(char *name, uint64_t volmode)
 	 */
 	zv = zvol_find_by_name(name, RW_NONE);
 	if (zv == NULL && volmode == ZFS_VOLMODE_NONE)
-			return;
+		return;
 	if (zv != NULL) {
 		old_volmode = zv->zv_volmode;
 		mutex_exit(&zv->zv_state_lock);
@@ -1815,51 +1872,34 @@ zvol_set_volmode_impl(char *name, uint64_t volmode)
 	cookie = spl_fstrans_mark();
 	switch (volmode) {
 		case ZFS_VOLMODE_NONE:
-			(void) zvol_remove_minor_impl(name);
+			error = zvol_remove_minor_impl(name);
 			break;
 		case ZFS_VOLMODE_GEOM:
 		case ZFS_VOLMODE_DEV:
-			(void) zvol_remove_minor_impl(name);
-			(void) zvol_os_create_minor(name);
+			error = zvol_remove_minor_impl(name);
+			/*
+			 * The remove minor function call above, might be not
+			 * needed, if volmode was switched from 'none' value.
+			 * Ignore error in this case.
+			 */
+			if (error == ENOENT)
+				error = 0;
+			else if (error)
+				break;
+			error = zvol_os_create_minor(name);
 			break;
 		case ZFS_VOLMODE_DEFAULT:
-			(void) zvol_remove_minor_impl(name);
+			error = zvol_remove_minor_impl(name);
 			if (zvol_volmode == ZFS_VOLMODE_NONE)
 				break;
 			else /* if zvol_volmode is invalid defaults to "geom" */
-				(void) zvol_os_create_minor(name);
+				error = zvol_os_create_minor(name);
 			break;
 	}
+	zvol_task_update_status(task, 1, error == 0, error);
 	spl_fstrans_unmark(cookie);
 }
 
-static zvol_task_t *
-zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2,
-    uint64_t value)
-{
-	zvol_task_t *task;
-
-	/* Never allow tasks on hidden names. */
-	if (name1[0] == '$')
-		return (NULL);
-
-	task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
-	task->op = op;
-	task->value = value;
-
-	strlcpy(task->name1, name1, sizeof (task->name1));
-	if (name2 != NULL)
-		strlcpy(task->name2, name2, sizeof (task->name2));
-
-	return (task);
-}
-
-static void
-zvol_task_free(zvol_task_t *task)
-{
-	kmem_free(task, sizeof (zvol_task_t));
-}
-
 /*
  * The worker thread function performed asynchronously.
  */
@@ -1868,25 +1908,29 @@ zvol_task_cb(void *arg)
 {
 	zvol_task_t *task = arg;
 
-	switch (task->op) {
+	switch (task->zt_op) {
+	case ZVOL_ASYNC_CREATE_MINORS:
+		zvol_create_minors_impl(task);
+		break;
 	case ZVOL_ASYNC_REMOVE_MINORS:
-		zvol_remove_minors_impl(task->name1);
+		zvol_remove_minors_impl(task);
 		break;
 	case ZVOL_ASYNC_RENAME_MINORS:
-		zvol_rename_minors_impl(task->name1, task->name2);
+		zvol_rename_minors_impl(task);
 		break;
 	case ZVOL_ASYNC_SET_SNAPDEV:
-		zvol_set_snapdev_impl(task->name1, task->value);
+		zvol_set_snapdev_impl(task);
 		break;
 	case ZVOL_ASYNC_SET_VOLMODE:
-		zvol_set_volmode_impl(task->name1, task->value);
+		zvol_set_volmode_impl(task);
 		break;
 	default:
 		VERIFY(0);
 		break;
 	}
 
-	zvol_task_free(task);
+	zvol_task_report_status(task);
+	kmem_free(task, sizeof (zvol_task_t));
 }
 
 typedef struct zvol_set_prop_int_arg {
@@ -1931,23 +1975,17 @@ zvol_set_common_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 	if (dsl_prop_get_int_ds(ds, prop_name, &prop) != 0)
 		return (0);
 
-	switch (zsda->zsda_prop) {
-		case ZFS_PROP_VOLMODE:
-			task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname,
-			    NULL, prop);
-			break;
-		case ZFS_PROP_SNAPDEV:
-			task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname,
-			    NULL, prop);
-			break;
-		default:
-			task = NULL;
-			break;
-	}
-
-	if (task == NULL)
+	task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
+	if (zsda->zsda_prop == ZFS_PROP_VOLMODE) {
+		task->zt_op = ZVOL_ASYNC_SET_VOLMODE;
+	} else if (zsda->zsda_prop == ZFS_PROP_SNAPDEV) {
+		task->zt_op = ZVOL_ASYNC_SET_SNAPDEV;
+	} else {
+		kmem_free(task, sizeof (zvol_task_t));
 		return (0);
-
+	}
+	task->zt_value = prop;
+	strlcpy(task->zt_name1, dsname, sizeof (task->zt_name1));
 	(void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
 	    task, TQ_SLEEP);
 	return (0);
@@ -2001,15 +2039,34 @@ zvol_set_common(const char *ddname, zfs_prop_t prop, zprop_source_t source,
 }
 
 void
-zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
+zvol_create_minors(const char *name)
 {
+	spa_t *spa;
 	zvol_task_t *task;
 	taskqid_t id;
 
-	task = zvol_task_alloc(ZVOL_ASYNC_REMOVE_MINORS, name, NULL, ~0ULL);
-	if (task == NULL)
+	if (spa_open(name, &spa, FTAG) != 0)
 		return;
 
+	task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
+	task->zt_op = ZVOL_ASYNC_CREATE_MINORS;
+	strlcpy(task->zt_name1, name, sizeof (task->zt_name1));
+	id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
+	if (id != TASKQID_INVALID)
+		taskq_wait_id(spa->spa_zvol_taskq, id);
+
+	spa_close(spa, FTAG);
+}
+
+void
+zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
+{
+	zvol_task_t *task;
+	taskqid_t id;
+
+	task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
+	task->zt_op = ZVOL_ASYNC_REMOVE_MINORS;
+	strlcpy(task->zt_name1, name, sizeof (task->zt_name1));
 	id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
 	if ((async == B_FALSE) && (id != TASKQID_INVALID))
 		taskq_wait_id(spa->spa_zvol_taskq, id);
@@ -2022,10 +2079,10 @@ zvol_rename_minors(spa_t *spa, const char *name1, const char *name2,
 	zvol_task_t *task;
 	taskqid_t id;
 
-	task = zvol_task_alloc(ZVOL_ASYNC_RENAME_MINORS, name1, name2, ~0ULL);
-	if (task == NULL)
-		return;
-
+	task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
+	task->zt_op = ZVOL_ASYNC_RENAME_MINORS;
+	strlcpy(task->zt_name1, name1, sizeof (task->zt_name1));
+	strlcpy(task->zt_name2, name2, sizeof (task->zt_name2));
 	id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
 	if ((async == B_FALSE) && (id != TASKQID_INVALID))
 		taskq_wait_id(spa->spa_zvol_taskq, id);
@@ -2144,7 +2201,7 @@ zvol_fini_impl(void)
 	rw_destroy(&zvol_state_lock);
 
 	if (ztqs->tqs_taskq == NULL) {
-		ASSERT3U(ztqs->tqs_cnt, ==, 0);
+		ASSERT0(ztqs->tqs_cnt);
 	} else {
 		for (uint_t i = 0; i < ztqs->tqs_cnt; i++) {
 			ASSERT3P(ztqs->tqs_taskq[i], !=, NULL);