aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/zfs')
-rw-r--r--sys/contrib/openzfs/module/zfs/abd.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c100
-rw-r--r--sys/contrib/openzfs/module/zfs/bpobj.c10
-rw-r--r--sys/contrib/openzfs/module/zfs/brt.c70
-rw-r--r--sys/contrib/openzfs/module/zfs/btree.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/dataset_kstats.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c156
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt.c23
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt_log.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c23
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_diff.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_direct.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_object.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_objset.c12
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_recv.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_redact.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_send.c10
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_traverse.c17
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_tx.c18
-rw-r--r--sys/contrib/openzfs/module/zfs/dnode.c41
-rw-r--r--sys/contrib/openzfs/module/zfs/dnode_sync.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_bookmark.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_crypt.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dataset.c30
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_deadlist.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_deleg.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_destroy.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dir.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_pool.c12
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_prop.c31
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_scan.c33
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_userhold.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/fm.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c216
-rw-r--r--sys/contrib/openzfs/module/zfs/mmp.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/range_tree.c100
-rw-r--r--sys/contrib/openzfs/module/zfs/rrwlock.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/sa.c24
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c260
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_errlog.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c30
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_stats.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/space_map.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/space_reftree.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c103
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_draid.c16
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_indirect.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_initialize.c11
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_label.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_mirror.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_queue.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_raidz.c36
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_rebuild.c9
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_removal.c52
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_trim.c32
-rw-r--r--sys/contrib/openzfs/module/zfs/zap.c10
-rw-r--r--sys/contrib/openzfs/module/zfs/zap_micro.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/zcp.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfeature.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_chksum.c69
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_crrd.c227
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_fuid.c44
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c155
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_log.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_quota.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_rlock.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_sa.c15
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_vnops.c58
-rw-r--r--sys/contrib/openzfs/module/zfs/zil.c556
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c388
-rw-r--r--sys/contrib/openzfs/module/zfs/zio_checksum.c33
-rw-r--r--sys/contrib/openzfs/module/zfs/zio_compress.c15
-rw-r--r--sys/contrib/openzfs/module/zfs/zio_inject.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zrlock.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zthr.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zvol.c409
76 files changed, 2470 insertions, 1211 deletions
diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c
index 826928e67350..bf9b13c30509 100644
--- a/sys/contrib/openzfs/module/zfs/abd.c
+++ b/sys/contrib/openzfs/module/zfs/abd.c
@@ -563,7 +563,7 @@ abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size)
left -= csize;
off = 0;
}
- ASSERT3U(left, ==, 0);
+ ASSERT0(left);
} else {
abd = abd_get_offset_scatter(abd, sabd, off, size);
}
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 04ca32356a6d..df41e3b49204 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -1052,7 +1052,7 @@ static arc_buf_hdr_t *
buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp)
{
const dva_t *dva = BP_IDENTITY(bp);
- uint64_t birth = BP_GET_BIRTH(bp);
+ uint64_t birth = BP_GET_PHYSICAL_BIRTH(bp);
uint64_t idx = BUF_HASH_INDEX(spa, dva, birth);
kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
arc_buf_hdr_t *hdr;
@@ -2239,8 +2239,8 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(state)) {
- ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_buf);
+ ASSERT0P(hdr->b_l1hdr.b_pabd);
ASSERT(!HDR_HAS_RABD(hdr));
(void) zfs_refcount_add_many(&state->arcs_esize[type],
HDR_GET_LSIZE(hdr), hdr);
@@ -2278,8 +2278,8 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(state)) {
- ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_buf);
+ ASSERT0P(hdr->b_l1hdr.b_pabd);
ASSERT(!HDR_HAS_RABD(hdr));
(void) zfs_refcount_remove_many(&state->arcs_esize[type],
HDR_GET_LSIZE(hdr), hdr);
@@ -2319,7 +2319,7 @@ add_reference(arc_buf_hdr_t *hdr, const void *tag)
if (!HDR_EMPTY(hdr) && !MUTEX_HELD(HDR_LOCK(hdr))) {
ASSERT(state == arc_anon);
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
- ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_buf);
}
if ((zfs_refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) &&
@@ -2503,7 +2503,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
(void) zfs_refcount_add_many(
&new_state->arcs_size[type],
HDR_GET_LSIZE(hdr), hdr);
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_pabd);
ASSERT(!HDR_HAS_RABD(hdr));
} else {
@@ -2547,7 +2547,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
if (update_old && old_state != arc_l2c_only) {
ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(old_state)) {
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_pabd);
ASSERT(!HDR_HAS_RABD(hdr));
/*
@@ -2631,7 +2631,7 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
ARCSTAT_INCR(arcstat_bonus_size, space);
break;
case ARC_SPACE_DNODE:
- ARCSTAT_INCR(arcstat_dnode_size, space);
+ aggsum_add(&arc_sums.arcstat_dnode_size, space);
break;
case ARC_SPACE_DBUF:
ARCSTAT_INCR(arcstat_dbuf_size, space);
@@ -2677,7 +2677,7 @@ arc_space_return(uint64_t space, arc_space_type_t type)
ARCSTAT_INCR(arcstat_bonus_size, -space);
break;
case ARC_SPACE_DNODE:
- ARCSTAT_INCR(arcstat_dnode_size, -space);
+ aggsum_add(&arc_sums.arcstat_dnode_size, -space);
break;
case ARC_SPACE_DBUF:
ARCSTAT_INCR(arcstat_dbuf_size, -space);
@@ -2758,7 +2758,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
VERIFY(hdr->b_type == ARC_BUFC_DATA ||
hdr->b_type == ARC_BUFC_METADATA);
ASSERT3P(ret, !=, NULL);
- ASSERT3P(*ret, ==, NULL);
+ ASSERT0P(*ret);
IMPLY(encrypted, compressed);
buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
@@ -2982,7 +2982,7 @@ static void
arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
{
ASSERT(arc_can_share(hdr, buf));
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_pabd);
ASSERT(!ARC_BUF_ENCRYPTED(buf));
ASSERT(HDR_EMPTY_OR_LOCKED(hdr));
@@ -3201,14 +3201,14 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
if (alloc_rdata) {
size = HDR_GET_PSIZE(hdr);
- ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
+ ASSERT0P(hdr->b_crypt_hdr.b_rabd);
hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr,
alloc_flags);
ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
ARCSTAT_INCR(arcstat_raw_size, size);
} else {
size = arc_hdr_size(hdr);
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_pabd);
hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr,
alloc_flags);
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
@@ -3290,7 +3290,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
ASSERT(HDR_EMPTY(hdr));
#ifdef ZFS_DEBUG
- ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_freeze_cksum);
#endif
HDR_SET_PSIZE(hdr, psize);
HDR_SET_LSIZE(hdr, lsize);
@@ -3351,12 +3351,12 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
nhdr->b_l1hdr.b_state = arc_l2c_only;
/* Verify previous threads set to NULL before freeing */
- ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT0P(nhdr->b_l1hdr.b_pabd);
ASSERT(!HDR_HAS_RABD(hdr));
} else {
- ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_buf);
#ifdef ZFS_DEBUG
- ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_freeze_cksum);
#endif
/*
@@ -3375,7 +3375,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
* might try to be accessed, even though it was removed.
*/
VERIFY(!HDR_L2_WRITING(hdr));
- VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ VERIFY0P(hdr->b_l1hdr.b_pabd);
ASSERT(!HDR_HAS_RABD(hdr));
arc_hdr_clear_flags(nhdr, ARC_FLAG_HAS_L1HDR);
@@ -3698,12 +3698,12 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
arc_hdr_free_abd(hdr, B_TRUE);
}
- ASSERT3P(hdr->b_hash_next, ==, NULL);
+ ASSERT0P(hdr->b_hash_next);
if (HDR_HAS_L1HDR(hdr)) {
ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
- ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_acb);
#ifdef ZFS_DEBUG
- ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_freeze_cksum);
#endif
kmem_cache_free(hdr_full_cache, hdr);
} else {
@@ -3771,7 +3771,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_buf);
ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
*real_evicted = 0;
@@ -3796,7 +3796,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr);
if (HDR_HAS_L2HDR(hdr)) {
- ASSERT(hdr->b_l1hdr.b_pabd == NULL);
+ ASSERT0P(hdr->b_l1hdr.b_pabd);
ASSERT(!HDR_HAS_RABD(hdr));
/*
* This buffer is cached on the 2nd Level ARC;
@@ -4490,7 +4490,7 @@ arc_evict(void)
* target is not evictable or if they go over arc_dnode_limit.
*/
int64_t prune = 0;
- int64_t dn = wmsum_value(&arc_sums.arcstat_dnode_size);
+ int64_t dn = aggsum_value(&arc_sums.arcstat_dnode_size);
int64_t nem = zfs_refcount_count(&arc_mru->arcs_size[ARC_BUFC_METADATA])
+ zfs_refcount_count(&arc_mfu->arcs_size[ARC_BUFC_METADATA])
- zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA])
@@ -5082,11 +5082,13 @@ arc_is_overflowing(boolean_t lax, boolean_t use_reserve)
* in the ARC. In practice, that's in the tens of MB, which is low
* enough to be safe.
*/
- int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) - arc_c -
+ int64_t arc_over = aggsum_lower_bound(&arc_sums.arcstat_size) - arc_c -
zfs_max_recordsize;
+ int64_t dn_over = aggsum_lower_bound(&arc_sums.arcstat_dnode_size) -
+ arc_dnode_limit;
/* Always allow at least one block of overflow. */
- if (over < 0)
+ if (arc_over < 0 && dn_over <= 0)
return (ARC_OVF_NONE);
/* If we are under memory pressure, report severe overflow. */
@@ -5097,7 +5099,7 @@ arc_is_overflowing(boolean_t lax, boolean_t use_reserve)
int64_t overflow = (arc_c >> zfs_arc_overflow_shift) / 2;
if (use_reserve)
overflow *= 3;
- return (over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE);
+ return (arc_over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE);
}
static abd_t *
@@ -5552,7 +5554,7 @@ static void
arc_hdr_verify(arc_buf_hdr_t *hdr, blkptr_t *bp)
{
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) {
- ASSERT3U(HDR_GET_PSIZE(hdr), ==, 0);
+ ASSERT0(HDR_GET_PSIZE(hdr));
ASSERT3U(arc_hdr_get_compress(hdr), ==, ZIO_COMPRESS_OFF);
} else {
if (HDR_COMPRESSION_ENABLED(hdr)) {
@@ -5585,7 +5587,7 @@ arc_read_done(zio_t *zio)
if (HDR_IN_HASH_TABLE(hdr)) {
arc_buf_hdr_t *found;
- ASSERT3U(hdr->b_birth, ==, BP_GET_BIRTH(zio->io_bp));
+ ASSERT3U(hdr->b_birth, ==, BP_GET_PHYSICAL_BIRTH(zio->io_bp));
ASSERT3U(hdr->b_dva.dva_word[0], ==,
BP_IDENTITY(zio->io_bp)->dva_word[0]);
ASSERT3U(hdr->b_dva.dva_word[1], ==,
@@ -5688,7 +5690,7 @@ arc_read_done(zio_t *zio)
error = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(zio->io_spa, &acb->acb_zb,
- BP_GET_LOGICAL_BIRTH(zio->io_bp));
+ BP_GET_PHYSICAL_BIRTH(zio->io_bp));
(void) zfs_ereport_post(
FM_EREPORT_ZFS_AUTHENTICATION,
zio->io_spa, NULL, &acb->acb_zb, zio, 0);
@@ -6107,7 +6109,7 @@ top:
if (!embedded_bp) {
hdr->b_dva = *BP_IDENTITY(bp);
- hdr->b_birth = BP_GET_BIRTH(bp);
+ hdr->b_birth = BP_GET_PHYSICAL_BIRTH(bp);
exists = buf_hash_insert(hdr, &hash_lock);
}
if (exists != NULL) {
@@ -6130,14 +6132,14 @@ top:
}
if (GHOST_STATE(hdr->b_l1hdr.b_state)) {
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_pabd);
ASSERT(!HDR_HAS_RABD(hdr));
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
ASSERT0(zfs_refcount_count(
&hdr->b_l1hdr.b_refcnt));
- ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_buf);
#ifdef ZFS_DEBUG
- ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_freeze_cksum);
#endif
} else if (HDR_IO_IN_PROGRESS(hdr)) {
/*
@@ -6231,7 +6233,7 @@ top:
acb->acb_nobuf = no_buf;
acb->acb_zb = *zb;
- ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_acb);
hdr->b_l1hdr.b_acb = acb;
if (HDR_HAS_L2HDR(hdr) &&
@@ -6715,7 +6717,7 @@ arc_release(arc_buf_t *buf, const void *tag)
nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
compress, hdr->b_complevel, type);
- ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT0P(nhdr->b_l1hdr.b_buf);
ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
VERIFY3U(nhdr->b_type, ==, type);
ASSERT(!HDR_SHARED_DATA(nhdr));
@@ -6802,7 +6804,7 @@ arc_write_ready(zio_t *zio)
if (HDR_HAS_RABD(hdr))
arc_hdr_free_abd(hdr, B_TRUE);
}
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_pabd);
ASSERT(!HDR_HAS_RABD(hdr));
ASSERT(!HDR_SHARED_DATA(hdr));
ASSERT(!arc_buf_is_shared(buf));
@@ -6946,7 +6948,7 @@ arc_write_done(zio_t *zio)
arc_buf_t *buf = callback->awcb_buf;
arc_buf_hdr_t *hdr = buf->b_hdr;
- ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_acb);
if (zio->io_error == 0) {
arc_hdr_verify(hdr, zio->io_bp);
@@ -6955,7 +6957,7 @@ arc_write_done(zio_t *zio)
buf_discard_identity(hdr);
} else {
hdr->b_dva = *BP_IDENTITY(zio->io_bp);
- hdr->b_birth = BP_GET_BIRTH(zio->io_bp);
+ hdr->b_birth = BP_GET_PHYSICAL_BIRTH(zio->io_bp);
}
} else {
ASSERT(HDR_EMPTY(hdr));
@@ -6971,7 +6973,7 @@ arc_write_done(zio_t *zio)
arc_buf_hdr_t *exists;
kmutex_t *hash_lock;
- ASSERT3U(zio->io_error, ==, 0);
+ ASSERT0(zio->io_error);
arc_cksum_verify(buf);
@@ -6992,7 +6994,7 @@ arc_write_done(zio_t *zio)
arc_hdr_destroy(exists);
mutex_exit(hash_lock);
exists = buf_hash_insert(hdr, &hash_lock);
- ASSERT3P(exists, ==, NULL);
+ ASSERT0P(exists);
} else if (zio->io_flags & ZIO_FLAG_NOPWRITE) {
/* nopwrite */
ASSERT(zio->io_prop.zp_nopwrite);
@@ -7005,7 +7007,7 @@ arc_write_done(zio_t *zio)
ASSERT(ARC_BUF_LAST(hdr->b_l1hdr.b_buf));
ASSERT(hdr->b_l1hdr.b_state == arc_anon);
ASSERT(BP_GET_DEDUP(zio->io_bp));
- ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
+ ASSERT0(BP_GET_LEVEL(zio->io_bp));
}
}
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
@@ -7042,7 +7044,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
ASSERT3P(done, !=, NULL);
ASSERT(!HDR_IO_ERROR(hdr));
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_acb);
ASSERT3P(hdr->b_l1hdr.b_buf, !=, NULL);
if (uncached)
arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED);
@@ -7111,7 +7113,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF);
ASSERT(!arc_buf_is_shared(buf));
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT0P(hdr->b_l1hdr.b_pabd);
zio = zio_write(pio, spa, txg, bp,
abd_get_from_buf(buf->b_data, HDR_GET_LSIZE(hdr)),
@@ -7326,7 +7328,7 @@ arc_kstat_update(kstat_t *ksp, int rw)
#if defined(COMPAT_FREEBSD11)
as->arcstat_other_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_bonus_size) +
- wmsum_value(&arc_sums.arcstat_dnode_size) +
+ aggsum_value(&arc_sums.arcstat_dnode_size) +
wmsum_value(&arc_sums.arcstat_dbuf_size);
#endif
@@ -7368,7 +7370,7 @@ arc_kstat_update(kstat_t *ksp, int rw)
&as->arcstat_uncached_evictable_metadata);
as->arcstat_dnode_size.value.ui64 =
- wmsum_value(&arc_sums.arcstat_dnode_size);
+ aggsum_value(&arc_sums.arcstat_dnode_size);
as->arcstat_bonus_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_bonus_size);
as->arcstat_l2_hits.value.ui64 =
@@ -7738,7 +7740,7 @@ arc_state_init(void)
wmsum_init(&arc_sums.arcstat_data_size, 0);
wmsum_init(&arc_sums.arcstat_metadata_size, 0);
wmsum_init(&arc_sums.arcstat_dbuf_size, 0);
- wmsum_init(&arc_sums.arcstat_dnode_size, 0);
+ aggsum_init(&arc_sums.arcstat_dnode_size, 0);
wmsum_init(&arc_sums.arcstat_bonus_size, 0);
wmsum_init(&arc_sums.arcstat_l2_hits, 0);
wmsum_init(&arc_sums.arcstat_l2_misses, 0);
@@ -7897,7 +7899,7 @@ arc_state_fini(void)
wmsum_fini(&arc_sums.arcstat_data_size);
wmsum_fini(&arc_sums.arcstat_metadata_size);
wmsum_fini(&arc_sums.arcstat_dbuf_size);
- wmsum_fini(&arc_sums.arcstat_dnode_size);
+ aggsum_fini(&arc_sums.arcstat_dnode_size);
wmsum_fini(&arc_sums.arcstat_bonus_size);
wmsum_fini(&arc_sums.arcstat_l2_hits);
wmsum_fini(&arc_sums.arcstat_l2_misses);
diff --git a/sys/contrib/openzfs/module/zfs/bpobj.c b/sys/contrib/openzfs/module/zfs/bpobj.c
index 8c19de93f12f..ea9fbd036c6e 100644
--- a/sys/contrib/openzfs/module/zfs/bpobj.c
+++ b/sys/contrib/openzfs/module/zfs/bpobj.c
@@ -160,8 +160,8 @@ bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object)
memset(bpo, 0, sizeof (*bpo));
mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL);
- ASSERT(bpo->bpo_dbuf == NULL);
- ASSERT(bpo->bpo_phys == NULL);
+ ASSERT0P(bpo->bpo_dbuf);
+ ASSERT0P(bpo->bpo_phys);
ASSERT(object != 0);
ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
@@ -478,7 +478,7 @@ bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg,
* We have unprocessed subobjs. Process the next one.
*/
ASSERT(bpo->bpo_havecomp);
- ASSERT3P(bpobj_size, ==, NULL);
+ ASSERT0P(bpobj_size);
/* Add the last subobj to stack. */
int64_t i = bpi->bpi_unprocessed_subobjs - 1;
@@ -954,8 +954,8 @@ space_range_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
(void) bp_freed, (void) tx;
struct space_range_arg *sra = arg;
- if (BP_GET_LOGICAL_BIRTH(bp) > sra->mintxg &&
- BP_GET_LOGICAL_BIRTH(bp) <= sra->maxtxg) {
+ if (BP_GET_BIRTH(bp) > sra->mintxg &&
+ BP_GET_BIRTH(bp) <= sra->maxtxg) {
if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
sra->used += bp_get_dsize_sync(sra->spa, bp);
else
diff --git a/sys/contrib/openzfs/module/zfs/brt.c b/sys/contrib/openzfs/module/zfs/brt.c
index 27d9ed7ea2b0..40664354aa73 100644
--- a/sys/contrib/openzfs/module/zfs/brt.c
+++ b/sys/contrib/openzfs/module/zfs/brt.c
@@ -478,6 +478,18 @@ brt_vdev_create(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx)
sizeof (uint64_t), 1, &brtvd->bv_mos_brtvdev, tx));
BRT_DEBUG("Pool directory object created, object=%s", name);
+ /*
+ * Activate the endian-fixed feature if this is the first BRT ZAP
+ * (i.e., BLOCK_CLONING is not yet active) and the feature is enabled.
+ */
+ if (spa_feature_is_enabled(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN) &&
+ !spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
+ spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx);
+ } else if (spa_feature_is_active(spa,
+ SPA_FEATURE_BLOCK_CLONING_ENDIAN)) {
+ spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx);
+ }
+
spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING, tx);
}
@@ -658,6 +670,8 @@ brt_vdev_destroy(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx)
rw_exit(&brtvd->bv_lock);
spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING, tx);
+ if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN))
+ spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx);
}
static void
@@ -855,16 +869,29 @@ brt_entry_fill(const blkptr_t *bp, brt_entry_t *bre, uint64_t *vdevidp)
*vdevidp = DVA_GET_VDEV(&bp->blk_dva[0]);
}
+static boolean_t
+brt_has_endian_fixed(spa_t *spa)
+{
+ return (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN));
+}
+
static int
-brt_entry_lookup(brt_vdev_t *brtvd, brt_entry_t *bre)
+brt_entry_lookup(spa_t *spa, brt_vdev_t *brtvd, brt_entry_t *bre)
{
uint64_t off = BRE_OFFSET(bre);
if (brtvd->bv_mos_entries == 0)
return (SET_ERROR(ENOENT));
- return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode,
- &off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count), &bre->bre_count));
+ if (brt_has_endian_fixed(spa)) {
+ return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode,
+ &off, BRT_KEY_WORDS, sizeof (bre->bre_count), 1,
+ &bre->bre_count));
+ } else {
+ return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode,
+ &off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
+ &bre->bre_count));
+ }
}
/*
@@ -1056,7 +1083,7 @@ brt_entry_decref(spa_t *spa, const blkptr_t *bp)
}
rw_exit(&brtvd->bv_lock);
- error = brt_entry_lookup(brtvd, &bre_search);
+ error = brt_entry_lookup(spa, brtvd, &bre_search);
/* bre_search now contains correct bre_count */
if (error == ENOENT) {
BRTSTAT_BUMP(brt_decref_no_entry);
@@ -1118,7 +1145,7 @@ brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp)
bre = avl_find(&brtvd->bv_tree, &bre_search, NULL);
if (bre == NULL) {
rw_exit(&brtvd->bv_lock);
- error = brt_entry_lookup(brtvd, &bre_search);
+ error = brt_entry_lookup(spa, brtvd, &bre_search);
if (error == ENOENT) {
refcnt = 0;
} else {
@@ -1270,10 +1297,18 @@ brt_pending_apply_vdev(spa_t *spa, brt_vdev_t *brtvd, uint64_t txg)
uint64_t off = BRE_OFFSET(bre);
if (brtvd->bv_mos_entries != 0 &&
brt_vdev_lookup(spa, brtvd, off)) {
- int error = zap_lookup_uint64_by_dnode(
- brtvd->bv_mos_entries_dnode, &off,
- BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
- &bre->bre_count);
+ int error;
+ if (brt_has_endian_fixed(spa)) {
+ error = zap_lookup_uint64_by_dnode(
+ brtvd->bv_mos_entries_dnode, &off,
+ BRT_KEY_WORDS, sizeof (bre->bre_count), 1,
+ &bre->bre_count);
+ } else {
+ error = zap_lookup_uint64_by_dnode(
+ brtvd->bv_mos_entries_dnode, &off,
+ BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
+ &bre->bre_count);
+ }
if (error == 0) {
BRTSTAT_BUMP(brt_addref_entry_on_disk);
} else {
@@ -1326,7 +1361,7 @@ brt_pending_apply(spa_t *spa, uint64_t txg)
}
static void
-brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
+brt_sync_entry(spa_t *spa, dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
{
uint64_t off = BRE_OFFSET(bre);
@@ -1337,9 +1372,15 @@ brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
BRT_KEY_WORDS, tx);
VERIFY(error == 0 || error == ENOENT);
} else {
- VERIFY0(zap_update_uint64_by_dnode(dn, &off,
- BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
- &bre->bre_count, tx));
+ if (brt_has_endian_fixed(spa)) {
+ VERIFY0(zap_update_uint64_by_dnode(dn, &off,
+ BRT_KEY_WORDS, sizeof (bre->bre_count), 1,
+ &bre->bre_count, tx));
+ } else {
+ VERIFY0(zap_update_uint64_by_dnode(dn, &off,
+ BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
+ &bre->bre_count, tx));
+ }
}
}
@@ -1368,7 +1409,8 @@ brt_sync_table(spa_t *spa, dmu_tx_t *tx)
void *c = NULL;
while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) {
- brt_sync_entry(brtvd->bv_mos_entries_dnode, bre, tx);
+ brt_sync_entry(spa, brtvd->bv_mos_entries_dnode, bre,
+ tx);
kmem_cache_free(brt_entry_cache, bre);
}
diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c
index aa282f711bc3..725b96a3b2c7 100644
--- a/sys/contrib/openzfs/module/zfs/btree.c
+++ b/sys/contrib/openzfs/module/zfs/btree.c
@@ -1110,7 +1110,7 @@ zfs_btree_add_idx(zfs_btree_t *tree, const void *value,
if (where->bti_node == NULL) {
ASSERT3U(tree->bt_num_elems, ==, 1);
ASSERT3S(tree->bt_height, ==, -1);
- ASSERT3P(tree->bt_root, ==, NULL);
+ ASSERT0P(tree->bt_root);
ASSERT0(where->bti_offset);
tree->bt_num_nodes++;
@@ -1947,7 +1947,7 @@ void
zfs_btree_destroy(zfs_btree_t *tree)
{
ASSERT0(tree->bt_num_elems);
- ASSERT3P(tree->bt_root, ==, NULL);
+ ASSERT0P(tree->bt_root);
}
/* Verify that every child of this node has the correct parent pointer. */
@@ -1969,10 +1969,10 @@ static void
zfs_btree_verify_pointers(zfs_btree_t *tree)
{
if (tree->bt_height == -1) {
- VERIFY3P(tree->bt_root, ==, NULL);
+ VERIFY0P(tree->bt_root);
return;
}
- VERIFY3P(tree->bt_root->bth_parent, ==, NULL);
+ VERIFY0P(tree->bt_root->bth_parent);
zfs_btree_verify_pointers_helper(tree, tree->bt_root);
}
diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
index d3baabd6169f..e5abcd2044cf 100644
--- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c
+++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
@@ -44,6 +44,7 @@ static dataset_kstat_values_t empty_dataset_kstats = {
{ "zil_commit_error_count", KSTAT_DATA_UINT64 },
{ "zil_commit_stall_count", KSTAT_DATA_UINT64 },
{ "zil_commit_suspend_count", KSTAT_DATA_UINT64 },
+ { "zil_commit_crash_count", KSTAT_DATA_UINT64 },
{ "zil_itx_count", KSTAT_DATA_UINT64 },
{ "zil_itx_indirect_count", KSTAT_DATA_UINT64 },
{ "zil_itx_indirect_bytes", KSTAT_DATA_UINT64 },
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index f1b5a17f337e..3d0f88b36336 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -523,7 +523,7 @@ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
return;
/* Only data blocks support the attachment of user data. */
- ASSERT(db->db_level == 0);
+ ASSERT0(db->db_level);
/* Clients must resolve a dbuf before attaching user data. */
ASSERT(db->db.db_data != NULL);
@@ -866,8 +866,16 @@ dbuf_evict_notify(uint64_t size)
* and grabbing the lock results in massive lock contention.
*/
if (size > dbuf_cache_target_bytes()) {
- if (size > dbuf_cache_hiwater_bytes())
+ /*
+ * Avoid calling dbuf_evict_one() from memory reclaim context
+ * (e.g. Linux kswapd, FreeBSD pagedaemon) to prevent deadlocks.
+ * Memory reclaim threads can get stuck waiting for the dbuf
+ * hash lock.
+ */
+ if (size > dbuf_cache_hiwater_bytes() &&
+ !current_is_reclaim_thread()) {
dbuf_evict_one();
+ }
cv_signal(&dbuf_evict_cv);
}
}
@@ -1120,8 +1128,8 @@ dbuf_verify(dmu_buf_impl_t *db)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
if (dn == NULL) {
- ASSERT(db->db_parent == NULL);
- ASSERT(db->db_blkptr == NULL);
+ ASSERT0P(db->db_parent);
+ ASSERT0P(db->db_blkptr);
} else {
ASSERT3U(db->db.db_object, ==, dn->dn_object);
ASSERT3P(db->db_objset, ==, dn->dn_objset);
@@ -1172,7 +1180,7 @@ dbuf_verify(dmu_buf_impl_t *db)
/* db is pointed to by the dnode */
/* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */
if (DMU_OBJECT_IS_SPECIAL(db->db.db_object))
- ASSERT(db->db_parent == NULL);
+ ASSERT0P(db->db_parent);
else
ASSERT(db->db_parent != NULL);
if (db->db_blkid != DMU_SPILL_BLKID)
@@ -1211,7 +1219,7 @@ dbuf_verify(dmu_buf_impl_t *db)
int i;
for (i = 0; i < db->db.db_size >> 3; i++) {
- ASSERT(buf[i] == 0);
+ ASSERT0(buf[i]);
}
} else {
blkptr_t *bps = db->db.db_data;
@@ -1235,11 +1243,9 @@ dbuf_verify(dmu_buf_impl_t *db)
DVA_IS_EMPTY(&bp->blk_dva[1]) &&
DVA_IS_EMPTY(&bp->blk_dva[2]));
ASSERT0(bp->blk_fill);
- ASSERT0(bp->blk_pad[0]);
- ASSERT0(bp->blk_pad[1]);
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT(BP_IS_HOLE(bp));
- ASSERT0(BP_GET_PHYSICAL_BIRTH(bp));
+ ASSERT0(BP_GET_RAW_PHYSICAL_BIRTH(bp));
}
}
}
@@ -1253,7 +1259,7 @@ dbuf_clear_data(dmu_buf_impl_t *db)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
dbuf_evict_user(db);
- ASSERT3P(db->db_buf, ==, NULL);
+ ASSERT0P(db->db_buf);
db->db.db_data = NULL;
if (db->db_state != DB_NOFILL) {
db->db_state = DB_UNCACHED;
@@ -1378,13 +1384,13 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
* All reads are synchronous, so we must have a hold on the dbuf
*/
ASSERT(zfs_refcount_count(&db->db_holds) > 0);
- ASSERT(db->db_buf == NULL);
- ASSERT(db->db.db_data == NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(db->db.db_data);
if (buf == NULL) {
/* i/o error */
ASSERT(zio == NULL || zio->io_error != 0);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
- ASSERT3P(db->db_buf, ==, NULL);
+ ASSERT0P(db->db_buf);
db->db_state = DB_UNCACHED;
DTRACE_SET_STATE(db, "i/o error");
} else if (db->db_level == 0 && db->db_freed_in_flight) {
@@ -1578,7 +1584,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, dmu_flags_t flags,
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
- ASSERT(db->db_buf == NULL);
+ ASSERT0P(db->db_buf);
ASSERT(db->db_parent == NULL ||
RW_LOCK_HELD(&db->db_parent->db_rwlock));
@@ -1615,7 +1621,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, dmu_flags_t flags,
*/
if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bp)) {
spa_log_error(db->db_objset->os_spa, &zb,
- BP_GET_LOGICAL_BIRTH(bp));
+ BP_GET_PHYSICAL_BIRTH(bp));
err = SET_ERROR(EIO);
goto early_unlock;
}
@@ -1676,7 +1682,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(db->db.db_data != NULL);
- ASSERT(db->db_level == 0);
+ ASSERT0(db->db_level);
ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
if (dr == NULL ||
@@ -1895,8 +1901,8 @@ dbuf_noread(dmu_buf_impl_t *db, dmu_flags_t flags)
while (db->db_state == DB_READ || db->db_state == DB_FILL)
cv_wait(&db->db_changed, &db->db_mtx);
if (db->db_state == DB_UNCACHED) {
- ASSERT(db->db_buf == NULL);
- ASSERT(db->db.db_data == NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(db->db.db_data);
dbuf_set_data(db, dbuf_alloc_arcbuf(db));
db->db_state = DB_FILL;
DTRACE_SET_STATE(db, "assigning filled buffer");
@@ -1923,7 +1929,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
* comes from dbuf_dirty() callers who must also hold a range lock.
*/
ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC);
- ASSERT(db->db_level == 0);
+ ASSERT0(db->db_level);
if (db->db_blkid == DMU_BONUS_BLKID ||
dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN)
@@ -1988,7 +1994,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
mutex_enter(&dn->dn_dbufs_mtx);
db = avl_find(&dn->dn_dbufs, db_search, &where);
- ASSERT3P(db, ==, NULL);
+ ASSERT0P(db);
db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
@@ -2011,7 +2017,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
if (db->db_state == DB_UNCACHED ||
db->db_state == DB_NOFILL ||
db->db_state == DB_EVICTING) {
- ASSERT(db->db.db_data == NULL);
+ ASSERT0P(db->db.db_data);
mutex_exit(&db->db_mtx);
continue;
}
@@ -2154,6 +2160,12 @@ dbuf_redirty(dbuf_dirty_record_t *dr)
ASSERT(arc_released(db->db_buf));
arc_buf_thaw(db->db_buf);
}
+
+ /*
+ * Clear the rewrite flag since this is now a logical
+ * modification.
+ */
+ dr->dt.dl.dr_rewrite = B_FALSE;
}
}
@@ -2701,6 +2713,38 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
}
+void
+dmu_buf_will_rewrite(dmu_buf_t *db_fake, dmu_tx_t *tx)
+{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+
+ ASSERT(tx->tx_txg != 0);
+ ASSERT(!zfs_refcount_is_zero(&db->db_holds));
+
+ /*
+ * If the dbuf is already dirty in this txg, it will be written
+ * anyway, so there's nothing to do.
+ */
+ mutex_enter(&db->db_mtx);
+ if (dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) {
+ mutex_exit(&db->db_mtx);
+ return;
+ }
+ mutex_exit(&db->db_mtx);
+
+ /*
+ * The dbuf is not dirty, so we need to make it dirty and
+ * mark it for rewrite (preserve logical birth time).
+ */
+ dmu_buf_will_dirty_flags(db_fake, tx, DMU_READ_NO_PREFETCH);
+
+ mutex_enter(&db->db_mtx);
+ dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
+ if (dr != NULL && db->db_level == 0)
+ dr->dt.dl.dr_rewrite = B_TRUE;
+ mutex_exit(&db->db_mtx);
+}
+
boolean_t
dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
@@ -2852,8 +2896,8 @@ dmu_buf_will_clone_or_dio(dmu_buf_t *db_fake, dmu_tx_t *tx)
dbuf_clear_data(db);
}
- ASSERT3P(db->db_buf, ==, NULL);
- ASSERT3P(db->db.db_data, ==, NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(db->db.db_data);
db->db_state = DB_NOFILL;
DTRACE_SET_STATE(db,
@@ -2888,7 +2932,7 @@ dmu_buf_will_fill_flags(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail,
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT(tx->tx_txg != 0);
- ASSERT(db->db_level == 0);
+ ASSERT0(db->db_level);
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT ||
@@ -3100,7 +3144,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx,
{
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
- ASSERT(db->db_level == 0);
+ ASSERT0(db->db_level);
ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));
ASSERT(buf != NULL);
ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size);
@@ -3165,7 +3209,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx,
VERIFY(!dbuf_undirty(db, tx));
db->db_state = DB_UNCACHED;
}
- ASSERT(db->db_buf == NULL);
+ ASSERT0P(db->db_buf);
dbuf_set_data(db, buf);
db->db_state = DB_FILL;
DTRACE_SET_STATE(db, "filling assigned arcbuf");
@@ -3225,7 +3269,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
}
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
- ASSERT(db->db_data_pending == NULL);
+ ASSERT0P(db->db_data_pending);
ASSERT(list_is_empty(&db->db_dirty_records));
db->db_state = DB_EVICTING;
@@ -3277,11 +3321,11 @@ dbuf_destroy(dmu_buf_impl_t *db)
db->db_parent = NULL;
- ASSERT(db->db_buf == NULL);
- ASSERT(db->db.db_data == NULL);
- ASSERT(db->db_hash_next == NULL);
- ASSERT(db->db_blkptr == NULL);
- ASSERT(db->db_data_pending == NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(db->db.db_data);
+ ASSERT0P(db->db_hash_next);
+ ASSERT0P(db->db_blkptr);
+ ASSERT0P(db->db_data_pending);
ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
ASSERT(!multilist_link_active(&db->db_cache_link));
@@ -3916,7 +3960,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
if (fail_uncached)
return (SET_ERROR(ENOENT));
- ASSERT3P(parent, ==, NULL);
+ ASSERT0P(parent);
err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
if (fail_sparse) {
if (err == 0 && bp && BP_IS_HOLE(bp))
@@ -4020,7 +4064,7 @@ dbuf_create_bonus(dnode_t *dn)
{
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
- ASSERT(dn->dn_bonus == NULL);
+ ASSERT0P(dn->dn_bonus);
dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL,
dbuf_hash(dn->dn_objset, dn->dn_object, 0, DMU_BONUS_BLKID));
dn->dn_bonus->db_pending_evict = FALSE;
@@ -4372,7 +4416,7 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
* inappropriate to hook it in (i.e., nlevels mismatch).
*/
ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr);
- ASSERT(db->db_parent == NULL);
+ ASSERT0P(db->db_parent);
db->db_parent = dn->dn_dbuf;
db->db_blkptr = &dn->dn_phys->dn_blkptr[db->db_blkid];
DBUF_VERIFY(db);
@@ -4433,7 +4477,7 @@ dbuf_prepare_encrypted_dnode_leaf(dbuf_dirty_record_t *dr)
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT);
- ASSERT3U(db->db_level, ==, 0);
+ ASSERT0(db->db_level);
if (!db->db_objset->os_raw_receive && arc_is_encrypted(db->db_buf)) {
zbookmark_phys_t zb;
@@ -4544,7 +4588,7 @@ dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr)
/* ensure that everything is zero after our data */
for (; datap_end < datap_max; datap_end++)
- ASSERT(*datap_end == 0);
+ ASSERT0(*datap_end);
#endif
}
@@ -4552,7 +4596,7 @@ static blkptr_t *
dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
{
/* This must be a lightweight dirty record. */
- ASSERT3P(dr->dr_dbuf, ==, NULL);
+ ASSERT0P(dr->dr_dbuf);
dnode_t *dn = dr->dr_dnode;
if (dn->dn_phys->dn_nlevels == 1) {
@@ -4695,7 +4739,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
*/
if (db->db_state == DB_UNCACHED) {
/* This buffer has been freed since it was dirtied */
- ASSERT3P(db->db.db_data, ==, NULL);
+ ASSERT0P(db->db.db_data);
} else if (db->db_state == DB_FILL) {
/* This buffer was freed and is now being re-filled */
ASSERT(db->db.db_data != dr->dt.dl.dr_data);
@@ -4712,9 +4756,9 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
*/
dbuf_dirty_record_t *dr_head =
list_head(&db->db_dirty_records);
- ASSERT3P(db->db_buf, ==, NULL);
- ASSERT3P(db->db.db_data, ==, NULL);
- ASSERT3P(dr_head->dt.dl.dr_data, ==, NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(db->db.db_data);
+ ASSERT0P(dr_head->dt.dl.dr_data);
ASSERT3U(dr_head->dt.dl.dr_override_state, ==, DR_OVERRIDDEN);
} else {
ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL);
@@ -4899,7 +4943,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
dnode_diduse_space(dn, delta - zio->io_prev_space_delta);
zio->io_prev_space_delta = delta;
- if (BP_GET_LOGICAL_BIRTH(bp) != 0) {
+ if (BP_GET_BIRTH(bp) != 0) {
ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
BP_GET_TYPE(bp) == dn->dn_type) ||
(db->db_blkid == DMU_SPILL_BLKID &&
@@ -5186,7 +5230,7 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)));
drica.drica_os = dn->dn_objset;
- drica.drica_blk_birth = BP_GET_LOGICAL_BIRTH(bp);
+ drica.drica_blk_birth = BP_GET_BIRTH(bp);
drica.drica_tx = tx;
if (spa_remap_blkptr(spa, &bp_copy, dbuf_remap_impl_callback,
&drica)) {
@@ -5201,8 +5245,7 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
if (dn->dn_objset != spa_meta_objset(spa)) {
dsl_dataset_t *ds = dmu_objset_ds(dn->dn_objset);
if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
- BP_GET_LOGICAL_BIRTH(bp) >
- ds->ds_dir->dd_origin_txg) {
+ BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg) {
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT(dsl_dir_is_clone(ds->ds_dir));
ASSERT(spa_feature_is_enabled(spa,
@@ -5320,7 +5363,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
}
ASSERT(db->db_level == 0 || data == db->db_buf);
- ASSERT3U(BP_GET_LOGICAL_BIRTH(db->db_blkptr), <=, txg);
+ ASSERT3U(BP_GET_BIRTH(db->db_blkptr), <=, txg);
ASSERT(pio);
SET_BOOKMARK(&zb, os->os_dsl_dataset ?
@@ -5334,6 +5377,24 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
/*
+ * Set rewrite properties for zfs_rewrite() operations.
+ */
+ if (db->db_level == 0 && dr->dt.dl.dr_rewrite) {
+ zp.zp_rewrite = B_TRUE;
+
+ /*
+ * Mark physical rewrite feature for activation.
+ * This will be activated automatically during dataset sync.
+ */
+ dsl_dataset_t *ds = os->os_dsl_dataset;
+ if (!dsl_dataset_feature_is_active(ds,
+ SPA_FEATURE_PHYSICAL_REWRITE)) {
+ ds->ds_feature_activation[
+ SPA_FEATURE_PHYSICAL_REWRITE] = (void *)B_TRUE;
+ }
+ }
+
+ /*
* We copy the blkptr now (rather than when we instantiate the dirty
* record), because its value can change between open context and
* syncing context. We do not need to hold dn_struct_rwlock to read
@@ -5403,6 +5464,7 @@ EXPORT_SYMBOL(dbuf_release_bp);
EXPORT_SYMBOL(dbuf_dirty);
EXPORT_SYMBOL(dmu_buf_set_crypt_params);
EXPORT_SYMBOL(dmu_buf_will_dirty);
+EXPORT_SYMBOL(dmu_buf_will_rewrite);
EXPORT_SYMBOL(dmu_buf_is_dirty);
EXPORT_SYMBOL(dmu_buf_will_clone_or_dio);
EXPORT_SYMBOL(dmu_buf_will_not_fill);
diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c
index 60cbb7755a7e..d6658375f810 100644
--- a/sys/contrib/openzfs/module/zfs/ddt.c
+++ b/sys/contrib/openzfs/module/zfs/ddt.c
@@ -397,7 +397,7 @@ ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_object_name(ddt, type, class, name);
- ASSERT3U(*objectp, ==, 0);
+ ASSERT0(*objectp);
VERIFY0(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash));
ASSERT3U(*objectp, !=, 0);
@@ -724,10 +724,13 @@ ddt_phys_extend(ddt_univ_phys_t *ddp, ddt_phys_variant_t v, const blkptr_t *bp)
dvas[2] = bp->blk_dva[2];
if (ddt_phys_birth(ddp, v) == 0) {
- if (v == DDT_PHYS_FLAT)
- ddp->ddp_flat.ddp_phys_birth = BP_GET_BIRTH(bp);
- else
- ddp->ddp_trad[v].ddp_phys_birth = BP_GET_BIRTH(bp);
+ if (v == DDT_PHYS_FLAT) {
+ ddp->ddp_flat.ddp_phys_birth =
+ BP_GET_PHYSICAL_BIRTH(bp);
+ } else {
+ ddp->ddp_trad[v].ddp_phys_birth =
+ BP_GET_PHYSICAL_BIRTH(bp);
+ }
}
}
@@ -891,14 +894,14 @@ ddt_phys_select(const ddt_t *ddt, const ddt_entry_t *dde, const blkptr_t *bp)
if (ddt->ddt_flags & DDT_FLAG_FLAT) {
if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_flat.ddp_dva[0]) &&
- BP_GET_BIRTH(bp) == ddp->ddp_flat.ddp_phys_birth) {
+ BP_GET_PHYSICAL_BIRTH(bp) == ddp->ddp_flat.ddp_phys_birth) {
return (DDT_PHYS_FLAT);
}
} else /* traditional phys */ {
for (int p = 0; p < DDT_PHYS_MAX; p++) {
if (DVA_EQUAL(BP_IDENTITY(bp),
&ddp->ddp_trad[p].ddp_dva[0]) &&
- BP_GET_BIRTH(bp) ==
+ BP_GET_PHYSICAL_BIRTH(bp) ==
ddp->ddp_trad[p].ddp_phys_birth) {
return (p);
}
@@ -1008,7 +1011,7 @@ ddt_free(const ddt_t *ddt, ddt_entry_t *dde)
{
if (dde->dde_io != NULL) {
for (int p = 0; p < DDT_NPHYS(ddt); p++)
- ASSERT3P(dde->dde_io->dde_lead_zio[p], ==, NULL);
+ ASSERT0P(dde->dde_io->dde_lead_zio[p]);
if (dde->dde_io->dde_repair_abd != NULL)
abd_free(dde->dde_io->dde_repair_abd);
@@ -1418,7 +1421,7 @@ ddt_key_compare(const void *x1, const void *x2)
static void
ddt_create_dir(ddt_t *ddt, dmu_tx_t *tx)
{
- ASSERT3U(ddt->ddt_dir_object, ==, 0);
+ ASSERT0(ddt->ddt_dir_object);
ASSERT3U(ddt->ddt_version, ==, DDT_VERSION_FDT);
char name[DDT_NAMELEN];
@@ -2392,7 +2395,7 @@ ddt_sync(spa_t *spa, uint64_t txg)
* scan's root zio here so that we can wait for any scan IOs in
* addition to the regular ddt IOs.
*/
- ASSERT3P(scn->scn_zio_root, ==, NULL);
+ ASSERT0P(scn->scn_zio_root);
scn->scn_zio_root = rio;
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c
index dbd381aa9609..3d30e244c1f7 100644
--- a/sys/contrib/openzfs/module/zfs/ddt_log.c
+++ b/sys/contrib/openzfs/module/zfs/ddt_log.c
@@ -116,7 +116,7 @@ static void
ddt_log_create_one(ddt_t *ddt, ddt_log_t *ddl, uint_t n, dmu_tx_t *tx)
{
ASSERT3U(ddt->ddt_dir_object, >, 0);
- ASSERT3U(ddl->ddl_object, ==, 0);
+ ASSERT0(ddl->ddl_object);
char name[DDT_NAMELEN];
ddt_log_name(ddt, name, n);
@@ -194,7 +194,7 @@ void
ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx, ddt_log_update_t *dlu)
{
ASSERT3U(nentries, >, 0);
- ASSERT3P(dlu->dlu_dbp, ==, NULL);
+ ASSERT0P(dlu->dlu_dbp);
if (ddt->ddt_log_active->ddl_object == 0)
ddt_log_create(ddt, tx);
@@ -748,8 +748,8 @@ ddt_log_load(ddt_t *ddt)
void
ddt_log_alloc(ddt_t *ddt)
{
- ASSERT3P(ddt->ddt_log_active, ==, NULL);
- ASSERT3P(ddt->ddt_log_flushing, ==, NULL);
+ ASSERT0P(ddt->ddt_log_active);
+ ASSERT0P(ddt->ddt_log_flushing);
avl_create(&ddt->ddt_log[0].ddl_tree, ddt_key_compare,
sizeof (ddt_log_entry_t), offsetof(ddt_log_entry_t, ddle_node));
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index 21c465328134..f7f808d5b8f7 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -1343,7 +1343,7 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
if (size == 0)
return;
- VERIFY(0 == dmu_buf_hold_array(os, object, offset, size,
+ VERIFY0(dmu_buf_hold_array(os, object, offset, size,
FALSE, FTAG, &numbufs, &dbp));
for (i = 0; i < numbufs; i++) {
@@ -1872,7 +1872,7 @@ dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg)
*/
BP_SET_LSIZE(bp, db->db_size);
} else if (!BP_IS_EMBEDDED(bp)) {
- ASSERT(BP_GET_LEVEL(bp) == 0);
+ ASSERT0(BP_GET_LEVEL(bp));
BP_SET_FILL(bp, 1);
}
}
@@ -1966,7 +1966,7 @@ dmu_sync_late_arrival_done(zio_t *zio)
blkptr_t *bp_orig __maybe_unused = &zio->io_bp_orig;
ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
- ASSERT(BP_GET_LOGICAL_BIRTH(zio->io_bp) == zio->io_txg);
+ ASSERT(BP_GET_BIRTH(zio->io_bp) == zio->io_txg);
ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
}
@@ -2405,7 +2405,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
}
}
} else if (wp & WP_NOFILL) {
- ASSERT(level == 0);
+ ASSERT0(level);
/*
* If we're writing preallocated blocks, we aren't actually
@@ -2508,6 +2508,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
zp->zp_encrypt = encrypt;
zp->zp_byteorder = ZFS_HOST_BYTEORDER;
zp->zp_direct_write = (wp & WP_DIRECT_WR) ? B_TRUE : B_FALSE;
+ zp->zp_rewrite = B_FALSE;
memset(zp->zp_salt, 0, ZIO_DATA_SALT_LEN);
memset(zp->zp_iv, 0, ZIO_DATA_IV_LEN);
memset(zp->zp_mac, 0, ZIO_DATA_MAC_LEN);
@@ -2655,11 +2656,12 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
* operation into ZIL, or it may be impossible to replay, since
* the block may appear not yet allocated at that point.
*/
- if (BP_GET_BIRTH(bp) > spa_freeze_txg(os->os_spa)) {
+ if (BP_GET_PHYSICAL_BIRTH(bp) > spa_freeze_txg(os->os_spa)) {
error = SET_ERROR(EINVAL);
goto out;
}
- if (BP_GET_BIRTH(bp) > spa_last_synced_txg(os->os_spa)) {
+ if (BP_GET_PHYSICAL_BIRTH(bp) >
+ spa_last_synced_txg(os->os_spa)) {
error = SET_ERROR(EAGAIN);
goto out;
}
@@ -2731,7 +2733,8 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
if (!BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) != 0) {
if (!BP_IS_EMBEDDED(bp)) {
BP_SET_BIRTH(&dl->dr_overridden_by, dr->dr_txg,
- BP_GET_BIRTH(bp));
+ BP_GET_PHYSICAL_BIRTH(bp));
+ BP_SET_REWRITE(&dl->dr_overridden_by, 0);
} else {
BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by,
dr->dr_txg);
@@ -2862,7 +2865,7 @@ byteswap_uint64_array(void *vbuf, size_t size)
size_t count = size >> 3;
int i;
- ASSERT((size & 7) == 0);
+ ASSERT0((size & 7));
for (i = 0; i < count; i++)
buf[i] = BSWAP_64(buf[i]);
@@ -2875,7 +2878,7 @@ byteswap_uint32_array(void *vbuf, size_t size)
size_t count = size >> 2;
int i;
- ASSERT((size & 3) == 0);
+ ASSERT0((size & 3));
for (i = 0; i < count; i++)
buf[i] = BSWAP_32(buf[i]);
@@ -2888,7 +2891,7 @@ byteswap_uint16_array(void *vbuf, size_t size)
size_t count = size >> 1;
int i;
- ASSERT((size & 1) == 0);
+ ASSERT0((size & 1));
for (i = 0; i < count; i++)
buf[i] = BSWAP_16(buf[i]);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_diff.c b/sys/contrib/openzfs/module/zfs/dmu_diff.c
index 86f751e886c9..fb13b2f87f57 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_diff.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_diff.c
@@ -224,8 +224,8 @@ dmu_diff(const char *tosnap_name, const char *fromsnap_name,
* call the ZFS_IOC_OBJ_TO_STATS ioctl.
*/
error = traverse_dataset(tosnap, fromtxg,
- TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_NO_DECRYPT,
- diff_cb, &da);
+ TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_NO_DECRYPT |
+ TRAVERSE_LOGICAL, diff_cb, &da);
if (error != 0) {
da.da_err = error;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_direct.c b/sys/contrib/openzfs/module/zfs/dmu_direct.c
index 12b0ffa2c99b..d44c686088fc 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_direct.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_direct.c
@@ -95,16 +95,16 @@ dmu_write_direct_done(zio_t *zio)
abd_free(zio->io_abd);
mutex_enter(&db->db_mtx);
- ASSERT3P(db->db_buf, ==, NULL);
- ASSERT3P(dr->dt.dl.dr_data, ==, NULL);
- ASSERT3P(db->db.db_data, ==, NULL);
+ ASSERT0P(db->db_buf);
+ ASSERT0P(dr->dt.dl.dr_data);
+ ASSERT0P(db->db.db_data);
db->db_state = DB_UNCACHED;
mutex_exit(&db->db_mtx);
dmu_sync_done(zio, NULL, zio->io_private);
if (zio->io_error != 0) {
- if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
+ if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)
ASSERT3U(zio->io_error, ==, EIO);
/*
diff --git a/sys/contrib/openzfs/module/zfs/dmu_object.c b/sys/contrib/openzfs/module/zfs/dmu_object.c
index b4ff7d224cc9..207cc6d0e713 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_object.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_object.c
@@ -90,7 +90,7 @@ dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize,
if (allocated_dnode != NULL) {
ASSERT3P(tag, !=, NULL);
} else {
- ASSERT3P(tag, ==, NULL);
+ ASSERT0P(tag);
tag = FTAG;
}
diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
index b3f792e4ae6b..a77f338bdfd3 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@@ -345,12 +345,6 @@ smallblk_changed_cb(void *arg, uint64_t newval)
{
objset_t *os = arg;
- /*
- * Inheritance and range checking should have been done by now.
- */
- ASSERT(newval <= SPA_MAXBLOCKSIZE);
- ASSERT(ISP2(newval));
-
os->os_zpl_special_smallblock = newval;
}
@@ -730,7 +724,7 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
if (err == 0) {
mutex_enter(&ds->ds_lock);
- ASSERT(ds->ds_objset == NULL);
+ ASSERT0P(ds->ds_objset);
ds->ds_objset = os;
mutex_exit(&ds->ds_lock);
}
@@ -1376,7 +1370,7 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
6, ZFS_SPACE_CHECK_NORMAL);
if (rv == 0)
- zvol_create_minor(name);
+ zvol_create_minors(name);
crfree(cr);
@@ -2232,7 +2226,7 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
rf |= DB_RF_HAVESTRUCT;
error = dmu_spill_hold_by_dnode(dn, rf,
FTAG, (dmu_buf_t **)&db);
- ASSERT(error == 0);
+ ASSERT0(error);
mutex_enter(&db->db_mtx);
data = (before) ? db->db.db_data :
dmu_objset_userquota_find_data(db, tx);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c
index 3a4bd7a1cea9..45c7af2bdcd2 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_recv.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c
@@ -866,7 +866,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
*/
if (dcp == NULL && drrb->drr_fromguid == 0 &&
drba->drba_origin == NULL) {
- ASSERT3P(dcp, ==, NULL);
+ ASSERT0P(dcp);
dcp = &dummy_dcp;
if (featureflags & DMU_BACKUP_FEATURE_RAW)
@@ -881,7 +881,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
if (drba->drba_cookie->drc_fromsnapobj != 0) {
VERIFY0(dsl_dataset_hold_obj(dp,
drba->drba_cookie->drc_fromsnapobj, FTAG, &snap));
- ASSERT3P(dcp, ==, NULL);
+ ASSERT0P(dcp);
}
if (drc->drc_heal) {
/* When healing we want to use the provided snapshot */
@@ -905,7 +905,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
if (drba->drba_origin != NULL) {
VERIFY0(dsl_dataset_hold(dp, drba->drba_origin,
FTAG, &origin));
- ASSERT3P(dcp, ==, NULL);
+ ASSERT0P(dcp);
}
/* Create new dataset. */
@@ -1403,7 +1403,7 @@ corrective_read_done(zio_t *zio)
/* Corruption corrected; update error log if needed */
if (zio->io_error == 0) {
spa_remove_error(data->spa, &data->zb,
- BP_GET_LOGICAL_BIRTH(zio->io_bp));
+ BP_GET_PHYSICAL_BIRTH(zio->io_bp));
}
kmem_free(data, sizeof (cr_cb_data_t));
abd_free(zio->io_abd);
@@ -1530,7 +1530,7 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
}
rrd->abd = abd;
- io = zio_rewrite(NULL, rwa->os->os_spa, BP_GET_LOGICAL_BIRTH(bp), bp,
+ io = zio_rewrite(NULL, rwa->os->os_spa, BP_GET_BIRTH(bp), bp,
abd, BP_GET_PSIZE(bp), NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, flags,
&zb);
@@ -2792,7 +2792,7 @@ receive_read_payload_and_next_header(dmu_recv_cookie_t *drc, int len, void *buf)
drc->drc_rrd->bytes_read = drc->drc_bytes_read;
}
} else {
- ASSERT3P(buf, ==, NULL);
+ ASSERT0P(buf);
}
drc->drc_prev_cksum = drc->drc_cksum;
@@ -3450,7 +3450,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp)
break;
}
- ASSERT3P(drc->drc_rrd, ==, NULL);
+ ASSERT0P(drc->drc_rrd);
drc->drc_rrd = drc->drc_next_rrd;
drc->drc_next_rrd = NULL;
/* Allocates and loads header into drc->drc_next_rrd */
@@ -3468,7 +3468,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, offset_t *voffp)
drc->drc_rrd = NULL;
}
- ASSERT3P(drc->drc_rrd, ==, NULL);
+ ASSERT0P(drc->drc_rrd);
drc->drc_rrd = kmem_zalloc(sizeof (*drc->drc_rrd), KM_SLEEP);
drc->drc_rrd->eos_marker = B_TRUE;
bqueue_enqueue_flush(&rwa->q, drc->drc_rrd, 1);
@@ -3831,11 +3831,11 @@ dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
nvlist_free(drc->drc_keynvl);
} else if (!drc->drc_heal) {
if (drc->drc_newfs) {
- zvol_create_minor(drc->drc_tofs);
+ zvol_create_minors(drc->drc_tofs);
}
char *snapname = kmem_asprintf("%s@%s",
drc->drc_tofs, drc->drc_tosnap);
- zvol_create_minor(snapname);
+ zvol_create_minors(snapname);
kmem_strfree(snapname);
}
diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c
index 65443d112f27..5a22ed71a5fe 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_redact.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c
@@ -370,8 +370,8 @@ redact_traverse_thread(void *arg)
#endif
err = traverse_dataset_resume(rt_arg->ds, rt_arg->txg,
- &rt_arg->resume, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
- redact_cb, rt_arg);
+ &rt_arg->resume, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
+ TRAVERSE_LOGICAL, redact_cb, rt_arg);
if (err != EINTR)
rt_arg->error_code = err;
@@ -1067,7 +1067,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
}
if (err != 0)
goto out;
- VERIFY3P(nvlist_next_nvpair(redactnvl, pair), ==, NULL);
+ VERIFY0P(nvlist_next_nvpair(redactnvl, pair));
boolean_t resuming = B_FALSE;
zfs_bookmark_phys_t bookmark;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c
index 4f27f3df0e55..8ecb99d5f57c 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_send.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_send.c
@@ -962,7 +962,7 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range)
char *data = NULL;
if (srdp->abd != NULL) {
data = abd_to_buf(srdp->abd);
- ASSERT3P(srdp->abuf, ==, NULL);
+ ASSERT0P(srdp->abuf);
} else if (srdp->abuf != NULL) {
data = srdp->abuf->b_data;
}
@@ -1084,7 +1084,7 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
*/
if (sta->os->os_encrypted &&
!BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
- spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp));
+ spa_log_error(spa, zb, BP_GET_PHYSICAL_BIRTH(bp));
return (SET_ERROR(EIO));
}
@@ -1210,7 +1210,7 @@ send_traverse_thread(void *arg)
err = traverse_dataset_resume(st_arg->os->os_dsl_dataset,
st_arg->fromtxg, &st_arg->resume,
- st_arg->flags, send_cb, st_arg);
+ st_arg->flags | TRAVERSE_LOGICAL, send_cb, st_arg);
if (err != EINTR)
st_arg->error_code = err;
@@ -2514,7 +2514,7 @@ dmu_send_impl(struct dmu_send_params *dspp)
* list in the stream.
*/
if (dspp->numfromredactsnaps != NUM_SNAPS_NOT_REDACTED) {
- ASSERT3P(from_rl, ==, NULL);
+ ASSERT0P(from_rl);
fnvlist_add_uint64_array(nvl, BEGINNV_REDACT_FROM_SNAPS,
dspp->fromredactsnaps, (uint_t)dspp->numfromredactsnaps);
if (dspp->numfromredactsnaps > 0) {
@@ -2891,7 +2891,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
&fromds);
if (err != 0) {
- ASSERT3P(fromds, ==, NULL);
+ ASSERT0P(fromds);
} else {
/*
* We need to make a deep copy of the redact
diff --git a/sys/contrib/openzfs/module/zfs/dmu_traverse.c b/sys/contrib/openzfs/module/zfs/dmu_traverse.c
index f534a7dd64e3..dd1df1705040 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_traverse.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_traverse.c
@@ -74,6 +74,15 @@ static int traverse_dnode(traverse_data_t *td, const blkptr_t *bp,
static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *,
uint64_t objset, uint64_t object);
+static inline uint64_t
+get_birth_time(traverse_data_t *td, const blkptr_t *bp)
+{
+ if (td->td_flags & TRAVERSE_LOGICAL)
+ return (BP_GET_LOGICAL_BIRTH(bp));
+ else
+ return (BP_GET_BIRTH(bp));
+}
+
static int
traverse_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
uint64_t claim_txg)
@@ -85,7 +94,7 @@ traverse_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
return (0);
if (claim_txg == 0 &&
- BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(td->td_spa))
+ get_birth_time(td, bp) >= spa_min_claim_txg(td->td_spa))
return (-1);
SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
@@ -110,7 +119,7 @@ traverse_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
if (BP_IS_HOLE(bp))
return (0);
- if (claim_txg == 0 || BP_GET_LOGICAL_BIRTH(bp) < claim_txg)
+ if (claim_txg == 0 || get_birth_time(td, bp) < claim_txg)
return (0);
ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
@@ -194,7 +203,7 @@ traverse_prefetch_metadata(traverse_data_t *td, const dnode_phys_t *dnp,
*/
if (resume_skip_check(td, dnp, zb) != RESUME_SKIP_NONE)
return (B_FALSE);
- if (BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) <= td->td_min_txg)
+ if (BP_IS_HOLE(bp) || get_birth_time(td, bp) <= td->td_min_txg)
return (B_FALSE);
if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
return (B_FALSE);
@@ -265,7 +274,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
zb->zb_object == DMU_META_DNODE_OBJECT) &&
td->td_hole_birth_enabled_txg <= td->td_min_txg)
return (0);
- } else if (BP_GET_LOGICAL_BIRTH(bp) <= td->td_min_txg) {
+ } else if (get_birth_time(td, bp) <= td->td_min_txg) {
return (0);
}
diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c
index d85d8b89423e..40c0b3402a05 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_tx.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c
@@ -126,7 +126,7 @@ dmu_tx_hold_dnode_impl(dmu_tx_t *tx, dnode_t *dn, enum dmu_tx_hold_type type,
* problem, but there's no way for it to happen (for
* now, at least).
*/
- ASSERT(dn->dn_assigned_txg == 0);
+ ASSERT0(dn->dn_assigned_txg);
dn->dn_assigned_txg = tx->tx_txg;
(void) zfs_refcount_add(&dn->dn_tx_holds, tx);
mutex_exit(&dn->dn_mtx);
@@ -443,7 +443,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
dnode_t *dn = txh->txh_dnode;
int err;
- ASSERT(tx->tx_txg == 0);
+ ASSERT0(tx->tx_txg);
if (off >= (dn->dn_maxblkid + 1) * dn->dn_datablksz)
return;
@@ -607,7 +607,7 @@ dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, const char *name)
dnode_t *dn = txh->txh_dnode;
int err;
- ASSERT(tx->tx_txg == 0);
+ ASSERT0(tx->tx_txg);
dmu_tx_count_dnode(txh);
@@ -681,7 +681,7 @@ dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object)
{
dmu_tx_hold_t *txh;
- ASSERT(tx->tx_txg == 0);
+ ASSERT0(tx->tx_txg);
txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
object, THT_BONUS, 0, 0);
@@ -706,7 +706,7 @@ dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space)
{
dmu_tx_hold_t *txh;
- ASSERT(tx->tx_txg == 0);
+ ASSERT0(tx->tx_txg);
txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
DMU_NEW_OBJECT, THT_SPACE, space, 0);
@@ -1232,7 +1232,7 @@ dmu_tx_assign(dmu_tx_t *tx, dmu_tx_flag_t flags)
{
int err;
- ASSERT(tx->tx_txg == 0);
+ ASSERT0(tx->tx_txg);
ASSERT0(flags & ~(DMU_TX_WAIT | DMU_TX_NOTHROTTLE | DMU_TX_SUSPEND));
IMPLY(flags & DMU_TX_SUSPEND, flags & DMU_TX_WAIT);
ASSERT(!dsl_pool_sync_context(tx->tx_pool));
@@ -1328,7 +1328,7 @@ dmu_tx_wait(dmu_tx_t *tx)
dsl_pool_t *dp = tx->tx_pool;
hrtime_t before;
- ASSERT(tx->tx_txg == 0);
+ ASSERT0(tx->tx_txg);
ASSERT(!dsl_pool_config_held(tx->tx_pool));
/*
@@ -1644,12 +1644,12 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow)
dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
if (sa->sa_force_spill || may_grow || hdl->sa_spill) {
- ASSERT(tx->tx_txg == 0);
+ ASSERT0(tx->tx_txg);
dmu_tx_hold_spill(tx, object);
} else {
DB_DNODE_ENTER(db);
if (DB_DNODE(db)->dn_have_spill) {
- ASSERT(tx->tx_txg == 0);
+ ASSERT0(tx->tx_txg);
dmu_tx_hold_spill(tx, object);
}
DB_DNODE_EXIT(db);
diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c
index 904a039edf95..963ff41232a3 100644
--- a/sys/contrib/openzfs/module/zfs/dnode.c
+++ b/sys/contrib/openzfs/module/zfs/dnode.c
@@ -86,6 +86,19 @@ int zfs_default_ibs = DN_MAX_INDBLKSHIFT;
static kmem_cbrc_t dnode_move(void *, void *, size_t, void *);
#endif /* _KERNEL */
+static char *
+rt_name(dnode_t *dn, const char *name)
+{
+ struct objset *os = dn->dn_objset;
+
+ return (kmem_asprintf("{spa=%s objset=%llu obj=%llu %s}",
+ spa_name(os->os_spa),
+ (u_longlong_t)(os->os_dsl_dataset ?
+ os->os_dsl_dataset->ds_object : DMU_META_OBJSET),
+ (u_longlong_t)dn->dn_object,
+ name));
+}
+
static int
dbuf_compare(const void *x1, const void *x2)
{
@@ -201,7 +214,7 @@ dnode_dest(void *arg, void *unused)
for (int i = 0; i < TXG_SIZE; i++) {
ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
- ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
+ ASSERT0P(dn->dn_free_ranges[i]);
list_destroy(&dn->dn_dirty_records[i]);
ASSERT0(dn->dn_next_nblkptr[i]);
ASSERT0(dn->dn_next_nlevels[i]);
@@ -218,10 +231,10 @@ dnode_dest(void *arg, void *unused)
ASSERT0(dn->dn_assigned_txg);
ASSERT0(dn->dn_dirty_txg);
ASSERT0(dn->dn_dirtyctx);
- ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL);
- ASSERT3P(dn->dn_bonus, ==, NULL);
+ ASSERT0P(dn->dn_dirtyctx_firstset);
+ ASSERT0P(dn->dn_bonus);
ASSERT(!dn->dn_have_spill);
- ASSERT3P(dn->dn_zio, ==, NULL);
+ ASSERT0P(dn->dn_zio);
ASSERT0(dn->dn_oldused);
ASSERT0(dn->dn_oldflags);
ASSERT0(dn->dn_olduid);
@@ -305,7 +318,7 @@ dnode_kstats_update(kstat_t *ksp, int rw)
void
dnode_init(void)
{
- ASSERT(dnode_cache == NULL);
+ ASSERT0P(dnode_cache);
dnode_cache = kmem_cache_create("dnode_t", sizeof (dnode_t),
0, dnode_cons, dnode_dest, NULL, NULL, NULL, KMC_RECLAIMABLE);
kmem_cache_set_move(dnode_cache, dnode_move);
@@ -496,7 +509,7 @@ dnode_buf_byteswap(void *vbuf, size_t size)
int i = 0;
ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT));
- ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0);
+ ASSERT0((size & (sizeof (dnode_phys_t)-1)));
while (i < size) {
dnode_phys_t *dnp = (void *)(((char *)vbuf) + i);
@@ -660,7 +673,7 @@ dnode_destroy(dnode_t *dn)
objset_t *os = dn->dn_objset;
boolean_t complete_os_eviction = B_FALSE;
- ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
+ ASSERT0((dn->dn_id_flags & DN_ID_NEW_EXIST));
mutex_enter(&os->os_lock);
POINTER_INVALIDATE(&dn->dn_objset);
@@ -767,7 +780,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT0(dn->dn_next_maxblkid[i]);
ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
- ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
+ ASSERT0P(dn->dn_free_ranges[i]);
}
dn->dn_type = ot;
@@ -945,7 +958,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
ndn->dn_dirty_txg = odn->dn_dirty_txg;
ndn->dn_dirtyctx = odn->dn_dirtyctx;
ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
- ASSERT(zfs_refcount_count(&odn->dn_tx_holds) == 0);
+ ASSERT0(zfs_refcount_count(&odn->dn_tx_holds));
zfs_refcount_transfer(&ndn->dn_holds, &odn->dn_holds);
ASSERT(avl_is_empty(&ndn->dn_dbufs));
avl_swap(&ndn->dn_dbufs, &odn->dn_dbufs);
@@ -2291,7 +2304,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
if ((off >> blkshift) > dn->dn_maxblkid)
return;
} else {
- ASSERT(dn->dn_maxblkid == 0);
+ ASSERT0(dn->dn_maxblkid);
if (off == 0 && len >= blksz) {
/*
* Freeing the whole block; fast-track this request.
@@ -2436,8 +2449,10 @@ done:
{
int txgoff = tx->tx_txg & TXG_MASK;
if (dn->dn_free_ranges[txgoff] == NULL) {
- dn->dn_free_ranges[txgoff] = zfs_range_tree_create(NULL,
- ZFS_RANGE_SEG64, NULL, 0, 0);
+ dn->dn_free_ranges[txgoff] =
+ zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, rt_name(dn, "dn_free_ranges"));
}
zfs_range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks);
zfs_range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks);
@@ -2509,7 +2524,7 @@ dnode_diduse_space(dnode_t *dn, int64_t delta)
}
space += delta;
if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_DNODE_BYTES) {
- ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0);
+ ASSERT0((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES));
ASSERT0(P2PHASE(space, 1<<DEV_BSHIFT));
dn->dn_phys->dn_used = space >> DEV_BSHIFT;
} else {
diff --git a/sys/contrib/openzfs/module/zfs/dnode_sync.c b/sys/contrib/openzfs/module/zfs/dnode_sync.c
index 4067f221f1bf..046ceddb3609 100644
--- a/sys/contrib/openzfs/module/zfs/dnode_sync.c
+++ b/sys/contrib/openzfs/module/zfs/dnode_sync.c
@@ -209,8 +209,8 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
rw_exit(&dn->dn_struct_rwlock);
if (err == ENOENT)
continue;
- ASSERT(err == 0);
- ASSERT(child->db_level == 0);
+ ASSERT0(err);
+ ASSERT0(child->db_level);
dr = dbuf_find_dirty_eq(child, txg);
/* data_old better be zeroed */
@@ -868,7 +868,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dbuf_sync_list(list, dn->dn_phys->dn_nlevels - 1, tx);
if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
- ASSERT3P(list_head(list), ==, NULL);
+ ASSERT0P(list_head(list));
dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
}
diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
index e301fe19f645..ee574c499f9f 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
@@ -243,7 +243,7 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp,
/* error is retval of the following if-cascade */
if (strchr(source, '@') != NULL) {
dsl_dataset_t *source_snap_ds;
- ASSERT3S(snapshot_namecheck(source, NULL, NULL), ==, 0);
+ ASSERT0(snapshot_namecheck(source, NULL, NULL));
error = dsl_dataset_hold(dp, source, FTAG, &source_snap_ds);
if (error == 0) {
VERIFY(source_snap_ds->ds_is_snapshot);
@@ -258,7 +258,7 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp,
}
} else if (strchr(source, '#') != NULL) {
zfs_bookmark_phys_t source_phys;
- ASSERT3S(bookmark_namecheck(source, NULL, NULL), ==, 0);
+ ASSERT0(bookmark_namecheck(source, NULL, NULL));
/*
* Source must exists and be an earlier point in newbm_ds's
* timeline (newbm_ds's origin may be a snap of source's ds)
@@ -501,7 +501,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
sizeof (uint64_t) * num_redact_snaps);
local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
if (bookmark_redacted) {
- ASSERT3P(redaction_list, ==, NULL);
+ ASSERT0P(redaction_list);
local_rl->rl_phys->rlp_last_blkid = UINT64_MAX;
local_rl->rl_phys->rlp_last_object = UINT64_MAX;
dsl_redaction_list_long_rele(local_rl, tag);
@@ -1523,7 +1523,7 @@ dsl_bookmark_block_killed(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
* If the block was live (referenced) at the time of this
* bookmark, add its space to the bookmark's FBN.
*/
- if (BP_GET_LOGICAL_BIRTH(bp) <=
+ if (BP_GET_BIRTH(bp) <=
dbn->dbn_phys.zbm_creation_txg &&
(dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) {
mutex_enter(&dbn->dbn_lock);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c
index db568f42d24e..f519b937edc0 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c
@@ -534,7 +534,7 @@ out:
static void
dsl_crypto_key_free(dsl_crypto_key_t *dck)
{
- ASSERT(zfs_refcount_count(&dck->dck_holds) == 0);
+ ASSERT0(zfs_refcount_count(&dck->dck_holds));
/* destroy the zio_crypt_key_t */
zio_crypt_key_destroy(&dck->dck_key);
@@ -866,7 +866,7 @@ spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
dsl_pool_rele(dp, FTAG);
/* create any zvols under this ds */
- zvol_create_minors_recursive(dsname);
+ zvol_create_minors(dsname);
return (0);
@@ -1912,7 +1912,7 @@ dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
/* clones always use their origin's wrapping key */
if (dsl_dir_is_clone(dd)) {
- ASSERT3P(dcp, ==, NULL);
+ ASSERT0P(dcp);
/*
* If this is an encrypted clone we just need to clone the
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
index c0a7872c40ad..420687480a76 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -159,7 +159,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
return;
}
- ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >,
+ ASSERT3U(BP_GET_BIRTH(bp), >,
dsl_dataset_phys(ds)->ds_prev_snap_txg);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
mutex_enter(&ds->ds_lock);
@@ -194,7 +194,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
* they do not need to be freed.
*/
if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
- BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
+ BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
!(BP_IS_EMBEDDED(bp))) {
ASSERT(dsl_dir_is_clone(ds->ds_dir));
ASSERT(spa_feature_is_enabled(spa,
@@ -263,7 +263,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
return (0);
ASSERT(dmu_tx_is_syncing(tx));
- ASSERT(BP_GET_LOGICAL_BIRTH(bp) <= tx->tx_txg);
+ ASSERT(BP_GET_BIRTH(bp) <= tx->tx_txg);
if (ds == NULL) {
dsl_free(tx->tx_pool, tx->tx_txg, bp);
@@ -281,7 +281,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
* they do not need to be freed.
*/
if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
- BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
+ BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg &&
!(BP_IS_EMBEDDED(bp))) {
ASSERT(dsl_dir_is_clone(ds->ds_dir));
ASSERT(spa_feature_is_enabled(spa,
@@ -289,7 +289,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
bplist_append(&ds->ds_dir->dd_pending_frees, bp);
}
- if (BP_GET_LOGICAL_BIRTH(bp) > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
+ if (BP_GET_BIRTH(bp) > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
int64_t delta;
/*
@@ -346,14 +346,14 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0);
/* if (logical birth > prev prev snap txg) prev unique += bs */
if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==
- ds->ds_object && BP_GET_LOGICAL_BIRTH(bp) >
+ ds->ds_object && BP_GET_BIRTH(bp) >
dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) {
dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
mutex_enter(&ds->ds_prev->ds_lock);
dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used;
mutex_exit(&ds->ds_prev->ds_lock);
}
- if (BP_GET_LOGICAL_BIRTH(bp) > ds->ds_dir->dd_origin_txg) {
+ if (BP_GET_BIRTH(bp) > ds->ds_dir->dd_origin_txg) {
dsl_dir_transfer_space(ds->ds_dir, used,
DD_USED_HEAD, DD_USED_SNAP, tx);
}
@@ -450,7 +450,7 @@ dsl_dataset_evict_sync(void *dbu)
{
dsl_dataset_t *ds = dbu;
- ASSERT(ds->ds_owner == NULL);
+ ASSERT0P(ds->ds_owner);
unique_remove(ds->ds_fsid_guid);
}
@@ -460,7 +460,7 @@ dsl_dataset_evict_async(void *dbu)
{
dsl_dataset_t *ds = dbu;
- ASSERT(ds->ds_owner == NULL);
+ ASSERT0P(ds->ds_owner);
ds->ds_dbuf = NULL;
@@ -1187,7 +1187,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0);
ASSERT(dmu_tx_is_syncing(tx));
- ASSERT(dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
+ ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);
dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
@@ -2005,7 +2005,7 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
if (error == 0) {
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
- zvol_create_minor(nvpair_name(pair));
+ zvol_create_minors(nvpair_name(pair));
}
}
@@ -2112,7 +2112,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *rio, dmu_tx_t *tx)
{
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(ds->ds_objset != NULL);
- ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0);
+ ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);
/*
* in case we had to change ds_fsid_guid when we opened it,
@@ -2944,7 +2944,7 @@ dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
if (snap == NULL)
return (B_FALSE);
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
- birth = BP_GET_LOGICAL_BIRTH(dsl_dataset_get_blkptr(ds));
+ birth = BP_GET_BIRTH(dsl_dataset_get_blkptr(ds));
rrw_exit(&ds->ds_bp_rwlock, FTAG);
if (birth > dsl_dataset_phys(snap)->ds_creation_txg) {
objset_t *os, *os_snap;
@@ -3413,7 +3413,7 @@ dsl_dataset_clone(const char *clone, const char *origin)
6, ZFS_SPACE_CHECK_NORMAL);
if (rv == 0)
- zvol_create_minor(clone);
+ zvol_create_minors(clone);
crfree(cr);
@@ -4180,7 +4180,7 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
dsl_pool_t *dp = dmu_tx_pool(tx);
int64_t unused_refres_delta;
- ASSERT(clone->ds_reserved == 0);
+ ASSERT0(clone->ds_reserved);
/*
* NOTE: On DEBUG kernels there could be a race between this and
* the check function if spa_asize_inflation is adjusted...
diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
index 3113d932fb68..475db3c89508 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c
@@ -484,7 +484,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed,
dl->dl_phys->dl_comp += sign * BP_GET_PSIZE(bp);
dl->dl_phys->dl_uncomp += sign * BP_GET_UCSIZE(bp);
- dle_tofind.dle_mintxg = BP_GET_LOGICAL_BIRTH(bp);
+ dle_tofind.dle_mintxg = BP_GET_BIRTH(bp);
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
if (dle == NULL)
dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
@@ -493,7 +493,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed,
if (dle == NULL) {
zfs_panic_recover("blkptr at %p has invalid BLK_BIRTH %llu",
- bp, (longlong_t)BP_GET_LOGICAL_BIRTH(bp));
+ bp, (longlong_t)BP_GET_BIRTH(bp));
dle = avl_first(&dl->dl_tree);
}
@@ -1037,7 +1037,7 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed,
avl_tree_t *avl = lia->avl;
bplist_t *to_free = lia->to_free;
zthr_t *t = lia->t;
- ASSERT(tx == NULL);
+ ASSERT0P(tx);
if ((t != NULL) && (zthr_has_waiters(t) || zthr_iscancelled(t)))
return (SET_ERROR(EINTR));
diff --git a/sys/contrib/openzfs/module/zfs/dsl_deleg.c b/sys/contrib/openzfs/module/zfs/dsl_deleg.c
index c01a06e98340..fdd37b36e280 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_deleg.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_deleg.c
@@ -102,7 +102,7 @@ dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr)
nvlist_t *perms;
nvpair_t *permpair = NULL;
- VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
+ VERIFY0(nvpair_value_nvlist(whopair, &perms));
while ((permpair = nvlist_next_nvpair(perms, permpair))) {
const char *perm = nvpair_name(permpair);
@@ -189,8 +189,7 @@ dsl_deleg_set_sync(void *arg, dmu_tx_t *tx)
const char *perm = nvpair_name(permpair);
uint64_t n = 0;
- VERIFY(zap_update(mos, jumpobj,
- perm, 8, 1, &n, tx) == 0);
+ VERIFY0(zap_update(mos, jumpobj, perm, 8, 1, &n, tx));
spa_history_log_internal_dd(dd, "permission update", tx,
"%s %s", whokey, perm);
}
@@ -225,7 +224,7 @@ dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx)
if (zap_lookup(mos, zapobj, whokey, 8,
1, &jumpobj) == 0) {
(void) zap_remove(mos, zapobj, whokey, tx);
- VERIFY(0 == zap_destroy(mos, jumpobj, tx));
+ VERIFY0(zap_destroy(mos, jumpobj, tx));
}
spa_history_log_internal_dd(dd, "permission who remove",
tx, "%s", whokey);
@@ -243,7 +242,7 @@ dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx)
if (zap_count(mos, jumpobj, &n) == 0 && n == 0) {
(void) zap_remove(mos, zapobj,
whokey, tx);
- VERIFY(0 == zap_destroy(mos,
+ VERIFY0(zap_destroy(mos,
jumpobj, tx));
}
spa_history_log_internal_dd(dd, "permission remove", tx,
@@ -332,7 +331,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
basezc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
baseza = zap_attribute_alloc();
source = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
- VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
nvlist_t *sp_nvp;
@@ -706,7 +705,7 @@ copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj,
ZFS_DELEG_LOCAL, &uid);
if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == ENOENT) {
jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
- VERIFY(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0);
+ VERIFY0(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx));
}
za = zap_attribute_alloc();
@@ -716,8 +715,7 @@ copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj,
uint64_t zero = 0;
ASSERT(za->za_integer_length == 8 && za->za_num_integers == 1);
- VERIFY(zap_update(mos, jumpobj, za->za_name,
- 8, 1, &zero, tx) == 0);
+ VERIFY0(zap_update(mos, jumpobj, za->za_name, 8, 1, &zero, tx));
}
zap_cursor_fini(&zc);
zap_attribute_free(za);
@@ -761,10 +759,10 @@ dsl_deleg_destroy(objset_t *mos, uint64_t zapobj, dmu_tx_t *tx)
zap_cursor_retrieve(&zc, za) == 0;
zap_cursor_advance(&zc)) {
ASSERT(za->za_integer_length == 8 && za->za_num_integers == 1);
- VERIFY(0 == zap_destroy(mos, za->za_first_integer, tx));
+ VERIFY0(zap_destroy(mos, za->za_first_integer, tx));
}
zap_cursor_fini(&zc);
- VERIFY(0 == zap_destroy(mos, zapobj, tx));
+ VERIFY0(zap_destroy(mos, zapobj, tx));
zap_attribute_free(za);
return (0);
}
diff --git a/sys/contrib/openzfs/module/zfs/dsl_destroy.c b/sys/contrib/openzfs/module/zfs/dsl_destroy.c
index f5ec93b2dc5c..ea01ee586f8b 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_destroy.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_destroy.c
@@ -133,11 +133,11 @@ process_old_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
ASSERT(!BP_IS_HOLE(bp));
- if (BP_GET_LOGICAL_BIRTH(bp) <=
+ if (BP_GET_BIRTH(bp) <=
dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, bp_freed, tx);
if (poa->ds_prev && !poa->after_branch_point &&
- BP_GET_LOGICAL_BIRTH(bp) >
+ BP_GET_BIRTH(bp) >
dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
bp_get_dsize_sync(dp->dp_spa, bp);
@@ -315,8 +315,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
- ASSERT3U(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=,
- tx->tx_txg);
+ ASSERT3U(BP_GET_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=, tx->tx_txg);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
ASSERT(zfs_refcount_is_zero(&ds->ds_longholds));
@@ -351,7 +350,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
dsl_dataset_deactivate_feature(ds, f, tx);
}
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
- ASSERT3P(ds->ds_prev, ==, NULL);
+ ASSERT0P(ds->ds_prev);
VERIFY0(dsl_dataset_hold_obj(dp,
dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev));
after_branch_point =
@@ -466,7 +465,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
&used, &comp, &uncomp);
dsl_dataset_phys(ds_next)->ds_unique_bytes += used;
dsl_dataset_rele(ds_nextnext, FTAG);
- ASSERT3P(ds_next->ds_prev, ==, NULL);
+ ASSERT0P(ds_next->ds_prev);
/* Collapse range in this head. */
dsl_dataset_t *hds;
@@ -526,7 +525,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
/* remove from snapshot namespace */
dsl_dataset_t *ds_head;
- ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0);
+ ASSERT0(dsl_dataset_phys(ds)->ds_snapnames_zapobj);
VERIFY0(dsl_dataset_hold_obj(dp,
dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head));
VERIFY0(dsl_dataset_get_snapname(ds));
@@ -729,8 +728,8 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
*/
dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
} else {
- ASSERT(zilog == NULL);
- ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >,
+ ASSERT0P(zilog);
+ ASSERT3U(BP_GET_BIRTH(bp), >,
dsl_dataset_phys(ka->ds)->ds_prev_snap_txg);
(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
}
@@ -1020,8 +1019,7 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT(ds->ds_prev == NULL ||
dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
- ASSERT3U(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=,
- tx->tx_txg);
+ ASSERT3U(BP_GET_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=, tx->tx_txg);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c
index f24cd2049533..6ce1890cfea1 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dir.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c
@@ -151,8 +151,8 @@ dsl_dir_evict_async(void *dbu)
for (t = 0; t < TXG_SIZE; t++) {
ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
- ASSERT(dd->dd_tempreserved[t] == 0);
- ASSERT(dd->dd_space_towrite[t] == 0);
+ ASSERT0(dd->dd_tempreserved[t]);
+ ASSERT0(dd->dd_space_towrite[t]);
}
if (dd->dd_parent)
diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c
index f1088d87208b..f47822df8b53 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_pool.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c
@@ -522,8 +522,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops __attribute__((unused)),
/* create and open the free_bplist */
obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx);
- VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
+ VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
VERIFY0(bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
@@ -1056,7 +1056,7 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
* will be wrong.
*/
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
- ASSERT0(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(prev)->ds_bp));
+ ASSERT0(BP_GET_BIRTH(&dsl_dataset_phys(prev)->ds_bp));
rrw_exit(&ds->ds_bp_rwlock, FTAG);
/* The origin doesn't get attached to itself */
@@ -1077,7 +1077,7 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
dsl_dataset_phys(prev)->ds_num_children++;
if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) {
- ASSERT(ds->ds_prev == NULL);
+ ASSERT0P(ds->ds_prev);
VERIFY0(dsl_dataset_hold_obj(dp,
dsl_dataset_phys(ds)->ds_prev_snap_obj,
ds, &ds->ds_prev));
@@ -1173,7 +1173,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
dsl_dataset_t *ds;
ASSERT(dmu_tx_is_syncing(tx));
- ASSERT(dp->dp_origin_snap == NULL);
+ ASSERT0P(dp->dp_origin_snap);
ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER));
/* create the origin dir, ds, & snap-ds */
@@ -1250,7 +1250,7 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
{
objset_t *mos = dp->dp_meta_objset;
- ASSERT(dp->dp_tmp_userrefs_obj == 0);
+ ASSERT0(dp->dp_tmp_userrefs_obj);
ASSERT(dmu_tx_is_syncing(tx));
dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS,
diff --git a/sys/contrib/openzfs/module/zfs/dsl_prop.c b/sys/contrib/openzfs/module/zfs/dsl_prop.c
index b76f22df61e2..51f624da5689 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_prop.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_prop.c
@@ -815,7 +815,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
*/
err = zap_update(mos, zapobj, recvdstr,
intsz, numints, value, tx);
- ASSERT(err == 0);
+ ASSERT0(err);
break;
case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED):
/*
@@ -1166,7 +1166,7 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
if (nvlist_exists(nv, propname))
continue;
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP));
if (za->za_integer_length == 1) {
/*
* String property
@@ -1179,8 +1179,7 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
kmem_free(tmp, za->za_num_integers);
break;
}
- VERIFY(nvlist_add_string(propval, ZPROP_VALUE,
- tmp) == 0);
+ VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, tmp));
kmem_free(tmp, za->za_num_integers);
} else {
/*
@@ -1191,8 +1190,8 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
za->za_first_integer);
}
- VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, source) == 0);
- VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
+ VERIFY0(nvlist_add_string(propval, ZPROP_SOURCE, source));
+ VERIFY0(nvlist_add_nvlist(nv, propname, propval));
nvlist_free(propval);
}
zap_cursor_fini(&zc);
@@ -1215,7 +1214,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
int err = 0;
char setpoint[ZFS_MAX_DATASET_NAME_LEN];
- VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
if (ds->ds_is_snapshot)
flags |= DSL_PROP_GET_SNAPSHOT;
@@ -1333,18 +1332,18 @@ dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value)
uint64_t default_value;
if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) {
- VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
+ VERIFY0(nvlist_add_uint64(propval, ZPROP_VALUE, value));
return;
}
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
+ VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP));
+ VERIFY0(nvlist_add_uint64(propval, ZPROP_VALUE, value));
/* Indicate the default source if we can. */
if (dodefault(prop, 8, 1, &default_value) == 0 &&
value == default_value) {
- VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, "") == 0);
+ VERIFY0(nvlist_add_string(propval, ZPROP_SOURCE, ""));
}
- VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
+ VERIFY0(nvlist_add_nvlist(nv, propname, propval));
nvlist_free(propval);
}
@@ -1355,13 +1354,13 @@ dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value)
const char *propname = zfs_prop_to_name(prop);
if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) {
- VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0);
+ VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, value));
return;
}
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0);
- VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
+ VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP));
+ VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, value));
+ VERIFY0(nvlist_add_nvlist(nv, propname, propval));
nvlist_free(propval);
}
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index 1b2cd3e361d1..fcd50c459d07 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -454,7 +454,7 @@ static inline void
bp2sio(const blkptr_t *bp, scan_io_t *sio, int dva_i)
{
sio->sio_blk_prop = bp->blk_prop;
- sio->sio_phys_birth = BP_GET_PHYSICAL_BIRTH(bp);
+ sio->sio_phys_birth = BP_GET_RAW_PHYSICAL_BIRTH(bp);
sio->sio_birth = BP_GET_LOGICAL_BIRTH(bp);
sio->sio_cksum = bp->blk_cksum;
sio->sio_nr_dvas = BP_GET_NDVAS(bp);
@@ -1768,7 +1768,7 @@ dsl_scan_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
ASSERT(!BP_IS_REDACTED(bp));
if (BP_IS_HOLE(bp) ||
- BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg)
+ BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg)
return (0);
/*
@@ -1778,13 +1778,13 @@ dsl_scan_zil_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
* scrub there's nothing to do to it).
*/
if (claim_txg == 0 &&
- BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(dp->dp_spa))
+ BP_GET_BIRTH(bp) >= spa_min_claim_txg(dp->dp_spa))
return (0);
SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
- VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
+ VERIFY0(scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
return (0);
}
@@ -1804,7 +1804,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
ASSERT(!BP_IS_REDACTED(bp));
if (BP_IS_HOLE(bp) ||
- BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg)
+ BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg)
return (0);
/*
@@ -1812,7 +1812,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
* already txg sync'ed (but this log block contains
* other records that are not synced)
*/
- if (claim_txg == 0 || BP_GET_LOGICAL_BIRTH(bp) < claim_txg)
+ if (claim_txg == 0 || BP_GET_BIRTH(bp) < claim_txg)
return (0);
ASSERT3U(BP_GET_LSIZE(bp), !=, 0);
@@ -1820,7 +1820,7 @@ dsl_scan_zil_record(zilog_t *zilog, const lr_t *lrc, void *arg,
lr->lr_foid, ZB_ZIL_LEVEL,
lr->lr_offset / BP_GET_LSIZE(bp));
- VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
+ VERIFY0(scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
}
return (0);
}
@@ -1952,7 +1952,7 @@ dsl_scan_prefetch(scan_prefetch_ctx_t *spc, blkptr_t *bp, zbookmark_phys_t *zb)
return;
if (BP_IS_HOLE(bp) ||
- BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg ||
+ BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg ||
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE &&
BP_GET_TYPE(bp) != DMU_OT_OBJSET))
return;
@@ -2223,7 +2223,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
if (dnp != NULL &&
dnp->dn_bonuslen > DN_MAX_BONUS_LEN(dnp)) {
scn->scn_phys.scn_errors++;
- spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp));
+ spa_log_error(spa, zb, BP_GET_PHYSICAL_BIRTH(bp));
return (SET_ERROR(EINVAL));
}
@@ -2319,7 +2319,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
* by arc_read() for the cases above.
*/
scn->scn_phys.scn_errors++;
- spa_log_error(spa, zb, BP_GET_LOGICAL_BIRTH(bp));
+ spa_log_error(spa, zb, BP_GET_PHYSICAL_BIRTH(bp));
return (SET_ERROR(EINVAL));
}
@@ -2396,7 +2396,12 @@ dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb,
if (f != SPA_FEATURE_NONE)
ASSERT(dsl_dataset_feature_is_active(ds, f));
- if (BP_GET_LOGICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) {
+ /*
+ * Recurse any blocks that were written either logically or physically
+ * at or after cur_min_txg. About logical birth we care for traversal,
+ * looking for any changes, while about physical for the actual scan.
+ */
+ if (BP_GET_BIRTH(bp) <= scn->scn_phys.scn_cur_min_txg) {
scn->scn_lt_min_this_txg++;
return;
}
@@ -2422,7 +2427,7 @@ dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb,
* Don't scan it now unless we need to because something
* under it was modified.
*/
- if (BP_GET_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) {
+ if (BP_GET_PHYSICAL_BIRTH(bp) > scn->scn_phys.scn_cur_max_txg) {
scn->scn_gt_max_this_txg++;
return;
}
@@ -4806,7 +4811,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
{
dsl_scan_t *scn = dp->dp_scan;
spa_t *spa = dp->dp_spa;
- uint64_t phys_birth = BP_GET_BIRTH(bp);
+ uint64_t phys_birth = BP_GET_PHYSICAL_BIRTH(bp);
size_t psize = BP_GET_PSIZE(bp);
boolean_t needs_io = B_FALSE;
int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
@@ -5136,7 +5141,7 @@ dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd)
mutex_enter(&svd->vdev_scan_io_queue_lock);
mutex_enter(&tvd->vdev_scan_io_queue_lock);
- VERIFY3P(tvd->vdev_scan_io_queue, ==, NULL);
+ VERIFY0P(tvd->vdev_scan_io_queue);
tvd->vdev_scan_io_queue = svd->vdev_scan_io_queue;
svd->vdev_scan_io_queue = NULL;
if (tvd->vdev_scan_io_queue != NULL)
diff --git a/sys/contrib/openzfs/module/zfs/dsl_userhold.c b/sys/contrib/openzfs/module/zfs/dsl_userhold.c
index 57c70e4ce3d2..f91b7a1eb69a 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_userhold.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_userhold.c
@@ -335,7 +335,7 @@ dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
dduha.dduha_holds = holds;
/* chkholds can have non-unique name */
- VERIFY(0 == nvlist_alloc(&dduha.dduha_chkholds, 0, KM_SLEEP));
+ VERIFY0(nvlist_alloc(&dduha.dduha_chkholds, 0, KM_SLEEP));
dduha.dduha_errlist = errlist;
dduha.dduha_minor = cleanup_minor;
diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c
index a092817efedd..ae788b2310d8 100644
--- a/sys/contrib/openzfs/module/zfs/fm.c
+++ b/sys/contrib/openzfs/module/zfs/fm.c
@@ -337,7 +337,7 @@ zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
}
}
- VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
+ VERIFY0(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE));
if (size > *event_size) {
*event_size = size;
error = ENOMEM;
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index 43b94eba2d58..9f4399af56bd 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -375,13 +375,23 @@ static metaslab_stats_t metaslab_stats = {
#define METASLABSTAT_BUMP(stat) \
atomic_inc_64(&metaslab_stats.stat.value.ui64);
+char *
+metaslab_rt_name(metaslab_group_t *mg, metaslab_t *ms, const char *name)
+{
+ return (kmem_asprintf("{spa=%s vdev_guid=%llu ms_id=%llu %s}",
+ spa_name(mg->mg_vd->vdev_spa),
+ (u_longlong_t)mg->mg_vd->vdev_guid,
+ (u_longlong_t)ms->ms_id,
+ name));
+}
+
static kstat_t *metaslab_ksp;
void
metaslab_stat_init(void)
{
- ASSERT(metaslab_alloc_trace_cache == NULL);
+ ASSERT0P(metaslab_alloc_trace_cache);
metaslab_alloc_trace_cache = kmem_cache_create(
"metaslab_alloc_trace_cache", sizeof (metaslab_alloc_trace_t),
0, NULL, NULL, NULL, NULL, NULL, 0);
@@ -446,16 +456,16 @@ metaslab_class_destroy(metaslab_class_t *mc)
{
spa_t *spa = mc->mc_spa;
- ASSERT(mc->mc_alloc == 0);
- ASSERT(mc->mc_deferred == 0);
- ASSERT(mc->mc_space == 0);
- ASSERT(mc->mc_dspace == 0);
+ ASSERT0(mc->mc_alloc);
+ ASSERT0(mc->mc_deferred);
+ ASSERT0(mc->mc_space);
+ ASSERT0(mc->mc_dspace);
for (int i = 0; i < spa->spa_alloc_count; i++) {
metaslab_class_allocator_t *mca = &mc->mc_allocator[i];
avl_destroy(&mca->mca_tree);
mutex_destroy(&mca->mca_lock);
- ASSERT(mca->mca_rotor == NULL);
+ ASSERT0P(mca->mca_rotor);
ASSERT0(mca->mca_reserved);
}
mutex_destroy(&mc->mc_lock);
@@ -750,7 +760,8 @@ metaslab_class_histogram_verify(metaslab_class_t *mc)
}
IMPLY(mg == mg->mg_vd->vdev_log_mg,
- mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
+ mc == spa_embedded_log_class(mg->mg_vd->vdev_spa) ||
+ mc == spa_special_embedded_log_class(mg->mg_vd->vdev_spa));
for (i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++)
mc_hist[i] += mg->mg_histogram[i];
@@ -1076,8 +1087,8 @@ metaslab_group_destroy(metaslab_group_t *mg)
{
spa_t *spa = mg->mg_class->mc_spa;
- ASSERT(mg->mg_prev == NULL);
- ASSERT(mg->mg_next == NULL);
+ ASSERT0P(mg->mg_prev);
+ ASSERT0P(mg->mg_next);
/*
* We may have gone below zero with the activation count
* either because we never activated in the first place or
@@ -1107,8 +1118,8 @@ metaslab_group_activate(metaslab_group_t *mg)
ASSERT3U(spa_config_held(spa, SCL_ALLOC, RW_WRITER), !=, 0);
- ASSERT(mg->mg_prev == NULL);
- ASSERT(mg->mg_next == NULL);
+ ASSERT0P(mg->mg_prev);
+ ASSERT0P(mg->mg_next);
ASSERT(mg->mg_activation_count <= 0);
if (++mg->mg_activation_count <= 0)
@@ -1153,8 +1164,8 @@ metaslab_group_passivate(metaslab_group_t *mg)
if (--mg->mg_activation_count != 0) {
for (int i = 0; i < spa->spa_alloc_count; i++)
ASSERT(mc->mc_allocator[i].mca_rotor != mg);
- ASSERT(mg->mg_prev == NULL);
- ASSERT(mg->mg_next == NULL);
+ ASSERT0P(mg->mg_prev);
+ ASSERT0P(mg->mg_next);
ASSERT(mg->mg_activation_count < 0);
return;
}
@@ -1183,14 +1194,16 @@ metaslab_group_passivate(metaslab_group_t *mg)
if (msp != NULL) {
mutex_enter(&msp->ms_lock);
metaslab_passivate(msp,
- metaslab_weight_from_range_tree(msp));
+ metaslab_weight(msp, B_TRUE) &
+ ~METASLAB_ACTIVE_MASK);
mutex_exit(&msp->ms_lock);
}
msp = mga->mga_secondary;
if (msp != NULL) {
mutex_enter(&msp->ms_lock);
metaslab_passivate(msp,
- metaslab_weight_from_range_tree(msp));
+ metaslab_weight(msp, B_TRUE) &
+ ~METASLAB_ACTIVE_MASK);
mutex_exit(&msp->ms_lock);
}
}
@@ -1288,7 +1301,8 @@ metaslab_group_histogram_add(metaslab_group_t *mg, metaslab_t *msp)
mutex_enter(&mc->mc_lock);
for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
IMPLY(mg == mg->mg_vd->vdev_log_mg,
- mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
+ mc == spa_embedded_log_class(mg->mg_vd->vdev_spa) ||
+ mc == spa_special_embedded_log_class(mg->mg_vd->vdev_spa));
mg->mg_histogram[i + ashift] +=
msp->ms_sm->sm_phys->smp_histogram[i];
mc->mc_histogram[i + ashift] +=
@@ -1316,7 +1330,8 @@ metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp)
ASSERT3U(mc->mc_histogram[i + ashift], >=,
msp->ms_sm->sm_phys->smp_histogram[i]);
IMPLY(mg == mg->mg_vd->vdev_log_mg,
- mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
+ mc == spa_embedded_log_class(mg->mg_vd->vdev_spa) ||
+ mc == spa_special_embedded_log_class(mg->mg_vd->vdev_spa));
mg->mg_histogram[i + ashift] -=
msp->ms_sm->sm_phys->smp_histogram[i];
@@ -1330,7 +1345,7 @@ metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp)
static void
metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp)
{
- ASSERT(msp->ms_group == NULL);
+ ASSERT0P(msp->ms_group);
mutex_enter(&mg->mg_lock);
msp->ms_group = mg;
msp->ms_weight = 0;
@@ -2895,30 +2910,43 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object,
zfs_range_seg_type_t type =
metaslab_calculate_range_tree_type(vd, ms, &start, &shift);
- ms->ms_allocatable = zfs_range_tree_create(NULL, type, NULL, start,
- shift);
+ ms->ms_allocatable = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_allocatable"));
for (int t = 0; t < TXG_SIZE; t++) {
- ms->ms_allocating[t] = zfs_range_tree_create(NULL, type,
- NULL, start, shift);
- }
- ms->ms_freeing = zfs_range_tree_create(NULL, type, NULL, start, shift);
- ms->ms_freed = zfs_range_tree_create(NULL, type, NULL, start, shift);
+ ms->ms_allocating[t] = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ ZFS_RT_F_DYN_NAME,
+ metaslab_rt_name(mg, ms, "ms_allocating"));
+ }
+ ms->ms_freeing = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_freeing"));
+ ms->ms_freed = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_freed"));
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
- ms->ms_defer[t] = zfs_range_tree_create(NULL, type, NULL,
- start, shift);
+ ms->ms_defer[t] = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_defer"));
}
- ms->ms_checkpointing =
- zfs_range_tree_create(NULL, type, NULL, start, shift);
- ms->ms_unflushed_allocs =
- zfs_range_tree_create(NULL, type, NULL, start, shift);
+ ms->ms_checkpointing = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_checkpointing"));
+ ms->ms_unflushed_allocs = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_unflushed_allocs"));
metaslab_rt_arg_t *mrap = kmem_zalloc(sizeof (*mrap), KM_SLEEP);
mrap->mra_bt = &ms->ms_unflushed_frees_by_size;
mrap->mra_floor_shift = metaslab_by_size_min_shift;
- ms->ms_unflushed_frees = zfs_range_tree_create(&metaslab_rt_ops,
- type, mrap, start, shift);
+ ms->ms_unflushed_frees = zfs_range_tree_create_flags(
+ &metaslab_rt_ops, type, mrap, start, shift,
+ ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_unflushed_frees"));
- ms->ms_trim = zfs_range_tree_create(NULL, type, NULL, start, shift);
+ ms->ms_trim = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ ZFS_RT_F_DYN_NAME, metaslab_rt_name(mg, ms, "ms_trim"));
metaslab_group_add(mg, ms);
metaslab_set_fragmentation(ms, B_FALSE);
@@ -2989,7 +3017,7 @@ metaslab_fini(metaslab_t *msp)
metaslab_group_remove(mg, msp);
mutex_enter(&msp->ms_lock);
- VERIFY(msp->ms_group == NULL);
+ VERIFY0P(msp->ms_group);
/*
* If this metaslab hasn't been through metaslab_sync_done() yet its
@@ -3892,7 +3920,10 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx)
type = metaslab_calculate_range_tree_type(msp->ms_group->mg_vd, msp,
&start, &shift);
- condense_tree = zfs_range_tree_create(NULL, type, NULL, start, shift);
+ condense_tree = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ ZFS_RT_F_DYN_NAME,
+ metaslab_rt_name(msp->ms_group, msp, "condense_tree"));
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
zfs_range_tree_walk(msp->ms_defer[t],
@@ -3949,8 +3980,10 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx)
* followed by FREES (due to space_map_write() in metaslab_sync()) for
* sync pass 1.
*/
- zfs_range_tree_t *tmp_tree = zfs_range_tree_create(NULL, type, NULL,
- start, shift);
+ zfs_range_tree_t *tmp_tree = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ ZFS_RT_F_DYN_NAME,
+ metaslab_rt_name(msp->ms_group, msp, "tmp_tree"));
zfs_range_tree_add(tmp_tree, msp->ms_start, msp->ms_size);
space_map_write(sm, tmp_tree, SM_ALLOC, SM_NO_VDEVID, tx);
space_map_write(sm, msp->ms_allocatable, SM_FREE, SM_NO_VDEVID, tx);
@@ -5199,29 +5232,16 @@ next:
/*
* We were unable to allocate from this metaslab so determine
- * a new weight for this metaslab. Now that we have loaded
- * the metaslab we can provide a better hint to the metaslab
- * selector.
- *
- * For space-based metaslabs, we use the maximum block size.
- * This information is only available when the metaslab
- * is loaded and is more accurate than the generic free
- * space weight that was calculated by metaslab_weight().
- * This information allows us to quickly compare the maximum
- * available allocation in the metaslab to the allocation
- * size being requested.
- *
- * For segment-based metaslabs, determine the new weight
- * based on the highest bucket in the range tree. We
- * explicitly use the loaded segment weight (i.e. the range
- * tree histogram) since it contains the space that is
- * currently available for allocation and is accurate
- * even within a sync pass.
+ * a new weight for this metaslab. The weight was last
+ * recalculated either when we loaded it (if this is the first
+ * TXG it's been loaded in), or the last time a txg was synced
+ * out.
*/
uint64_t weight;
if (WEIGHT_IS_SPACEBASED(msp->ms_weight)) {
- weight = metaslab_largest_allocatable(msp);
- WEIGHT_SET_SPACEBASED(weight);
+ metaslab_set_fragmentation(msp, B_TRUE);
+ weight = metaslab_space_weight(msp) &
+ ~METASLAB_ACTIVE_MASK;
} else {
weight = metaslab_weight_from_range_tree(msp);
}
@@ -5233,13 +5253,6 @@ next:
* For the case where we use the metaslab that is
* active for another allocator we want to make
* sure that we retain the activation mask.
- *
- * Note that we could attempt to use something like
- * metaslab_recalculate_weight_and_sort() that
- * retains the activation mask here. That function
- * uses metaslab_weight() to set the weight though
- * which is not as accurate as the calculations
- * above.
*/
weight |= msp->ms_weight & METASLAB_ACTIVE_MASK;
metaslab_group_sort(mg, msp, weight);
@@ -5590,7 +5603,21 @@ remap_blkptr_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
vdev_indirect_births_t *vib = oldvd->vdev_indirect_births;
uint64_t physical_birth = vdev_indirect_births_physbirth(vib,
DVA_GET_OFFSET(&bp->blk_dva[0]), DVA_GET_ASIZE(&bp->blk_dva[0]));
- BP_SET_PHYSICAL_BIRTH(bp, physical_birth);
+
+ /*
+ * For rewritten blocks, use the old physical birth as the new logical
+ * birth (representing when the space was allocated) and the removal
+ * time as the new physical birth (representing when it was actually
+ * written).
+ */
+ if (BP_GET_REWRITE(bp)) {
+ uint64_t old_physical_birth = BP_GET_PHYSICAL_BIRTH(bp);
+ ASSERT3U(old_physical_birth, <, physical_birth);
+ BP_SET_BIRTH(bp, old_physical_birth, physical_birth);
+ BP_SET_REWRITE(bp, 0);
+ } else {
+ BP_SET_PHYSICAL_BIRTH(bp, physical_birth);
+ }
DVA_SET_VDEV(&bp->blk_dva[0], vd->vdev_id);
DVA_SET_OFFSET(&bp->blk_dva[0], offset);
@@ -5712,7 +5739,7 @@ metaslab_unalloc_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
ASSERT(!vd->vdev_removing);
ASSERT(vdev_is_concrete(vd));
ASSERT0(vd->vdev_indirect_config.vic_mapping_object);
- ASSERT3P(vd->vdev_indirect_mapping, ==, NULL);
+ ASSERT0P(vd->vdev_indirect_mapping);
if (DVA_GET_GANG(dva))
size = vdev_gang_header_asize(vd);
@@ -5757,21 +5784,21 @@ metaslab_free_dva(spa_t *spa, const dva_t *dva, boolean_t checkpoint)
}
/*
- * Reserve some allocation slots. The reservation system must be called
- * before we call into the allocator. If there aren't any available slots
- * then the I/O will be throttled until an I/O completes and its slots are
- * freed up. The function returns true if it was successful in placing
- * the reservation.
+ * Reserve some space for a future allocation. The reservation system must be
+ * called before we call into the allocator. If there aren't enough space
+ * available, the calling I/O will be throttled until another I/O completes and
+ * its reservation is released. The function returns true if it was successful
+ * in placing the reservation.
*/
boolean_t
-metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio,
- boolean_t must, boolean_t *more)
+metaslab_class_throttle_reserve(metaslab_class_t *mc, int allocator,
+ int copies, uint64_t io_size, boolean_t must, boolean_t *more)
{
- metaslab_class_allocator_t *mca = &mc->mc_allocator[zio->io_allocator];
+ metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
ASSERT(mc->mc_alloc_throttle_enabled);
- if (mc->mc_alloc_io_size < zio->io_size) {
- mc->mc_alloc_io_size = zio->io_size;
+ if (mc->mc_alloc_io_size < io_size) {
+ mc->mc_alloc_io_size = io_size;
metaslab_class_balance(mc, B_FALSE);
}
if (must || mca->mca_reserved <= mc->mc_alloc_max) {
@@ -5782,10 +5809,9 @@ metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio,
* worst that can happen is few more I/Os get to allocation
* earlier, that is not a problem.
*/
- int64_t delta = slots * zio->io_size;
+ int64_t delta = copies * io_size;
*more = (atomic_add_64_nv(&mca->mca_reserved, delta) <=
mc->mc_alloc_max);
- zio->io_flags |= ZIO_FLAG_IO_ALLOCATING;
return (B_TRUE);
}
*more = B_FALSE;
@@ -5793,13 +5819,13 @@ metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio,
}
boolean_t
-metaslab_class_throttle_unreserve(metaslab_class_t *mc, int slots,
- zio_t *zio)
+metaslab_class_throttle_unreserve(metaslab_class_t *mc, int allocator,
+ int copies, uint64_t io_size)
{
- metaslab_class_allocator_t *mca = &mc->mc_allocator[zio->io_allocator];
+ metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
ASSERT(mc->mc_alloc_throttle_enabled);
- int64_t delta = slots * zio->io_size;
+ int64_t delta = copies * io_size;
return (atomic_add_64_nv(&mca->mca_reserved, -delta) <=
mc->mc_alloc_max);
}
@@ -5960,7 +5986,7 @@ metaslab_alloc_range(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
int error = 0;
ASSERT0(BP_GET_LOGICAL_BIRTH(bp));
- ASSERT0(BP_GET_PHYSICAL_BIRTH(bp));
+ ASSERT0(BP_GET_RAW_PHYSICAL_BIRTH(bp));
spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);
@@ -5971,16 +5997,16 @@ metaslab_alloc_range(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
}
ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa));
- ASSERT(BP_GET_NDVAS(bp) == 0);
+ ASSERT0(BP_GET_NDVAS(bp));
ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp));
ASSERT3P(zal, !=, NULL);
- uint64_t cur_psize = 0;
-
+ uint64_t smallest_psize = UINT64_MAX;
for (int d = 0; d < ndvas; d++) {
- error = metaslab_alloc_dva_range(spa, mc, psize, max_psize,
- dva, d, hintdva, txg, flags, zal, allocator,
- actual_psize ? &cur_psize : NULL);
+ uint64_t cur_psize = 0;
+ error = metaslab_alloc_dva_range(spa, mc, psize,
+ MIN(smallest_psize, max_psize), dva, d, hintdva, txg,
+ flags, zal, allocator, actual_psize ? &cur_psize : NULL);
if (error != 0) {
for (d--; d >= 0; d--) {
metaslab_unalloc_dva(spa, &dva[d], txg);
@@ -6000,13 +6026,13 @@ metaslab_alloc_range(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
DVA_GET_VDEV(&dva[d]), allocator, flags, psize,
tag);
if (actual_psize)
- max_psize = MIN(cur_psize, max_psize);
+ smallest_psize = MIN(cur_psize, smallest_psize);
}
}
- ASSERT(error == 0);
+ ASSERT0(error);
ASSERT(BP_GET_NDVAS(bp) == ndvas);
if (actual_psize)
- *actual_psize = max_psize;
+ *actual_psize = smallest_psize;
spa_config_exit(spa, SCL_ALLOC, FTAG);
@@ -6022,7 +6048,7 @@ metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
int ndvas = BP_GET_NDVAS(bp);
ASSERT(!BP_IS_HOLE(bp));
- ASSERT(!now || BP_GET_LOGICAL_BIRTH(bp) >= spa_syncing_txg(spa));
+ ASSERT(!now || BP_GET_BIRTH(bp) >= spa_syncing_txg(spa));
/*
* If we have a checkpoint for the pool we need to make sure that
@@ -6040,7 +6066,7 @@ metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
* normally as they will be referenced by the checkpointed uberblock.
*/
boolean_t checkpoint = B_FALSE;
- if (BP_GET_LOGICAL_BIRTH(bp) <= spa->spa_checkpoint_txg &&
+ if (BP_GET_BIRTH(bp) <= spa->spa_checkpoint_txg &&
spa_syncing_txg(spa) > spa->spa_checkpoint_txg) {
/*
* At this point, if the block is part of the checkpoint
diff --git a/sys/contrib/openzfs/module/zfs/mmp.c b/sys/contrib/openzfs/module/zfs/mmp.c
index f3665d29b8b4..7db72b9b04b0 100644
--- a/sys/contrib/openzfs/module/zfs/mmp.c
+++ b/sys/contrib/openzfs/module/zfs/mmp.c
@@ -260,7 +260,7 @@ mmp_thread_stop(spa_t *spa)
zfs_dbgmsg("MMP thread stopped pool '%s' gethrtime %llu",
spa_name(spa), gethrtime());
- ASSERT(mmp->mmp_thread == NULL);
+ ASSERT0P(mmp->mmp_thread);
mmp->mmp_thread_exiting = 0;
}
diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c
index 373636c69254..ea2d2c7227c8 100644
--- a/sys/contrib/openzfs/module/zfs/range_tree.c
+++ b/sys/contrib/openzfs/module/zfs/range_tree.c
@@ -201,10 +201,10 @@ ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg64_find_in_buf, zfs_range_seg64_t,
ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg_gap_find_in_buf,
zfs_range_seg_gap_t, zfs_range_tree_seg_gap_compare)
-zfs_range_tree_t *
-zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops,
+static zfs_range_tree_t *
+zfs_range_tree_create_impl(const zfs_range_tree_ops_t *ops,
zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
- uint64_t gap)
+ uint64_t gap, uint64_t flags, const char *name)
{
zfs_range_tree_t *rt = kmem_zalloc(sizeof (zfs_range_tree_t), KM_SLEEP);
@@ -236,6 +236,8 @@ zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops,
rt->rt_ops = ops;
rt->rt_gap = gap;
+ rt->rt_flags = flags;
+ rt->rt_name = name;
rt->rt_arg = arg;
rt->rt_type = type;
rt->rt_start = start;
@@ -248,10 +250,29 @@ zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops,
}
zfs_range_tree_t *
+zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops,
+ zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
+ uint64_t gap)
+{
+ return (zfs_range_tree_create_impl(ops, type, arg, start, shift, gap,
+ 0, NULL));
+}
+
+zfs_range_tree_t *
zfs_range_tree_create(const zfs_range_tree_ops_t *ops,
zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift)
{
- return (zfs_range_tree_create_gap(ops, type, arg, start, shift, 0));
+ return (zfs_range_tree_create_impl(ops, type, arg, start, shift, 0,
+ 0, NULL));
+}
+
+zfs_range_tree_t *
+zfs_range_tree_create_flags(const zfs_range_tree_ops_t *ops,
+ zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
+ uint64_t flags, const char *name)
+{
+ return (zfs_range_tree_create_impl(ops, type, arg, start, shift, 0,
+ flags, name));
}
void
@@ -262,6 +283,9 @@ zfs_range_tree_destroy(zfs_range_tree_t *rt)
if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL)
rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
+ if (rt->rt_name != NULL && (rt->rt_flags & ZFS_RT_F_DYN_NAME))
+ kmem_strfree((char *)(uintptr_t)rt->rt_name);
+
zfs_btree_destroy(&rt->rt_root);
kmem_free(rt, sizeof (*rt));
}
@@ -271,15 +295,17 @@ zfs_range_tree_adjust_fill(zfs_range_tree_t *rt, zfs_range_seg_t *rs,
int64_t delta)
{
if (delta < 0 && delta * -1 >= zfs_rs_get_fill(rs, rt)) {
- zfs_panic_recover("zfs: attempting to decrease fill to or "
- "below 0; probable double remove in segment [%llx:%llx]",
+ zfs_panic_recover("zfs: rt=%s: attempting to decrease fill to "
+ "or below 0; probable double remove in segment [%llx:%llx]",
+ ZFS_RT_NAME(rt),
(longlong_t)zfs_rs_get_start(rs, rt),
(longlong_t)zfs_rs_get_end(rs, rt));
}
if (zfs_rs_get_fill(rs, rt) + delta > zfs_rs_get_end(rs, rt) -
zfs_rs_get_start(rs, rt)) {
- zfs_panic_recover("zfs: attempting to increase fill beyond "
- "max; probable double add in segment [%llx:%llx]",
+ zfs_panic_recover("zfs: rt=%s: attempting to increase fill "
+ "beyond max; probable double add in segment [%llx:%llx]",
+ ZFS_RT_NAME(rt),
(longlong_t)zfs_rs_get_start(rs, rt),
(longlong_t)zfs_rs_get_end(rs, rt));
}
@@ -319,14 +345,17 @@ zfs_range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
* the normal code paths.
*/
if (rs != NULL) {
+ uint64_t rstart = zfs_rs_get_start(rs, rt);
+ uint64_t rend = zfs_rs_get_end(rs, rt);
if (gap == 0) {
- zfs_panic_recover("zfs: adding existent segment to "
- "range tree (offset=%llx size=%llx)",
- (longlong_t)start, (longlong_t)size);
+ zfs_panic_recover("zfs: rt=%s: adding segment "
+ "(offset=%llx size=%llx) overlapping with existing "
+ "one (offset=%llx size=%llx)",
+ ZFS_RT_NAME(rt),
+ (longlong_t)start, (longlong_t)size,
+ (longlong_t)rstart, (longlong_t)(rend - rstart));
return;
}
- uint64_t rstart = zfs_rs_get_start(rs, rt);
- uint64_t rend = zfs_rs_get_end(rs, rt);
if (rstart <= start && rend >= end) {
zfs_range_tree_adjust_fill(rt, rs, fill);
return;
@@ -348,7 +377,7 @@ zfs_range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
return;
}
- ASSERT3P(rs, ==, NULL);
+ ASSERT0P(rs);
/*
* Determine whether or not we will have to merge with our neighbors.
@@ -451,6 +480,7 @@ zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size,
zfs_range_seg_t *rs;
zfs_range_seg_max_t rsearch, rs_tmp;
uint64_t end = start + size;
+ uint64_t rstart, rend;
boolean_t left_over, right_over;
VERIFY3U(size, !=, 0);
@@ -464,12 +494,15 @@ zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size,
/* Make sure we completely overlap with someone */
if (rs == NULL) {
- zfs_panic_recover("zfs: removing nonexistent segment from "
- "range tree (offset=%llx size=%llx)",
- (longlong_t)start, (longlong_t)size);
+ zfs_panic_recover("zfs: rt=%s: removing nonexistent segment "
+ "from range tree (offset=%llx size=%llx)",
+ ZFS_RT_NAME(rt), (longlong_t)start, (longlong_t)size);
return;
}
+ rstart = zfs_rs_get_start(rs, rt);
+ rend = zfs_rs_get_end(rs, rt);
+
/*
* Range trees with gap support must only remove complete segments
* from the tree. This allows us to maintain accurate fill accounting
@@ -479,31 +512,36 @@ zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size,
if (rt->rt_gap != 0) {
if (do_fill) {
if (zfs_rs_get_fill(rs, rt) == size) {
- start = zfs_rs_get_start(rs, rt);
- end = zfs_rs_get_end(rs, rt);
+ start = rstart;
+ end = rend;
size = end - start;
} else {
zfs_range_tree_adjust_fill(rt, rs, -size);
return;
}
- } else if (zfs_rs_get_start(rs, rt) != start ||
- zfs_rs_get_end(rs, rt) != end) {
- zfs_panic_recover("zfs: freeing partial segment of "
- "gap tree (offset=%llx size=%llx) of "
+ } else if (rstart != start || rend != end) {
+ zfs_panic_recover("zfs: rt=%s: freeing partial segment "
+ "of gap tree (offset=%llx size=%llx) of "
"(offset=%llx size=%llx)",
+ ZFS_RT_NAME(rt),
(longlong_t)start, (longlong_t)size,
- (longlong_t)zfs_rs_get_start(rs, rt),
- (longlong_t)zfs_rs_get_end(rs, rt) -
- zfs_rs_get_start(rs, rt));
+ (longlong_t)rstart, (longlong_t)(rend - rstart));
return;
}
}
- VERIFY3U(zfs_rs_get_start(rs, rt), <=, start);
- VERIFY3U(zfs_rs_get_end(rs, rt), >=, end);
+ if (!(rstart <= start && rend >= end)) {
+ panic("zfs: rt=%s: removing segment "
+ "(offset=%llx size=%llx) not completely overlapped by "
+ "existing one (offset=%llx size=%llx)",
+ ZFS_RT_NAME(rt),
+ (longlong_t)start, (longlong_t)size,
+ (longlong_t)rstart, (longlong_t)(rend - rstart));
+ return;
+ }
- left_over = (zfs_rs_get_start(rs, rt) != start);
- right_over = (zfs_rs_get_end(rs, rt) != end);
+ left_over = (rstart != start);
+ right_over = (rend != end);
zfs_range_tree_stat_decr(rt, rs);
@@ -829,7 +867,7 @@ zfs_range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
next = zfs_btree_next(&removefrom->rt_root, &where, &where);
}
- VERIFY3P(curr, ==, NULL);
+ VERIFY0P(curr);
if (start != end) {
VERIFY3U(start, <, end);
diff --git a/sys/contrib/openzfs/module/zfs/rrwlock.c b/sys/contrib/openzfs/module/zfs/rrwlock.c
index 8ee784619839..d0df39b93560 100644
--- a/sys/contrib/openzfs/module/zfs/rrwlock.c
+++ b/sys/contrib/openzfs/module/zfs/rrwlock.c
@@ -108,7 +108,7 @@ rrn_add(rrwlock_t *rrl, const void *tag)
rn->rn_rrl = rrl;
rn->rn_next = tsd_get(rrw_tsd_key);
rn->rn_tag = tag;
- VERIFY(tsd_set(rrw_tsd_key, rn) == 0);
+ VERIFY0(tsd_set(rrw_tsd_key, rn));
}
/*
@@ -129,7 +129,7 @@ rrn_find_and_remove(rrwlock_t *rrl, const void *tag)
if (prev)
prev->rn_next = rn->rn_next;
else
- VERIFY(tsd_set(rrw_tsd_key, rn->rn_next) == 0);
+ VERIFY0(tsd_set(rrw_tsd_key, rn->rn_next));
kmem_free(rn, sizeof (*rn));
return (B_TRUE);
}
@@ -155,7 +155,7 @@ rrw_destroy(rrwlock_t *rrl)
{
mutex_destroy(&rrl->rr_lock);
cv_destroy(&rrl->rr_cv);
- ASSERT(rrl->rr_writer == NULL);
+ ASSERT0P(rrl->rr_writer);
zfs_refcount_destroy(&rrl->rr_anon_rcount);
zfs_refcount_destroy(&rrl->rr_linked_rcount);
}
@@ -188,7 +188,7 @@ rrw_enter_read_impl(rrwlock_t *rrl, boolean_t prio, const void *tag)
} else {
(void) zfs_refcount_add(&rrl->rr_anon_rcount, tag);
}
- ASSERT(rrl->rr_writer == NULL);
+ ASSERT0P(rrl->rr_writer);
mutex_exit(&rrl->rr_lock);
}
diff --git a/sys/contrib/openzfs/module/zfs/sa.c b/sys/contrib/openzfs/module/zfs/sa.c
index 5db470ce6242..7ad25d4d85ba 100644
--- a/sys/contrib/openzfs/module/zfs/sa.c
+++ b/sys/contrib/openzfs/module/zfs/sa.c
@@ -304,7 +304,7 @@ sa_get_spill(sa_handle_t *hdl)
if (hdl->sa_spill == NULL) {
if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL,
&hdl->sa_spill)) == 0)
- VERIFY(0 == sa_build_index(hdl, SA_SPILL));
+ VERIFY0(sa_build_index(hdl, SA_SPILL));
} else {
rc = 0;
}
@@ -432,7 +432,7 @@ sa_add_layout_entry(objset_t *os, const sa_attr_type_t *attrs, int attr_count,
(void) snprintf(attr_name, sizeof (attr_name),
"%d", (int)lot_num);
- VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj,
+ VERIFY0(zap_update(os, os->os_sa->sa_layout_attr_obj,
attr_name, 2, attr_count, attrs, tx));
}
@@ -505,7 +505,7 @@ sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx)
}
error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx);
- ASSERT(error == 0);
+ ASSERT0(error);
return (error);
}
@@ -717,7 +717,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
hdl->sa_spill->db_size)
- VERIFY(0 == sa_resize_spill(hdl,
+ VERIFY0(sa_resize_spill(hdl,
BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
}
@@ -791,7 +791,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
hdl->sa_bonus_tab = NULL;
}
if (!sa->sa_force_spill)
- VERIFY(0 == sa_build_index(hdl, SA_BONUS));
+ VERIFY0(sa_build_index(hdl, SA_BONUS));
if (hdl->sa_spill) {
sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
if (!spilling) {
@@ -801,10 +801,10 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
dmu_buf_rele(hdl->sa_spill, NULL);
hdl->sa_spill = NULL;
hdl->sa_spill_tab = NULL;
- VERIFY(0 == dmu_rm_spill(hdl->sa_os,
+ VERIFY0(dmu_rm_spill(hdl->sa_os,
sa_handle_object(hdl), tx));
} else {
- VERIFY(0 == sa_build_index(hdl, SA_SPILL));
+ VERIFY0(sa_build_index(hdl, SA_SPILL));
}
}
@@ -1733,10 +1733,10 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid)
NULL, dxattr_obj, dxattr_size);
}
- VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
- VERIFY(sa_replace_all_by_template_locked(hdl, attrs, count, tx) == 0);
+ VERIFY0(dmu_set_bonustype(db, DMU_OT_SA, tx));
+ VERIFY0(sa_replace_all_by_template_locked(hdl, attrs, count, tx));
if (znode_acl.z_acl_extern_obj) {
- VERIFY(0 == dmu_object_free(zfsvfs->z_os,
+ VERIFY0(dmu_object_free(zfsvfs->z_os,
znode_acl.z_acl_extern_obj, tx));
}
@@ -1858,7 +1858,7 @@ sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
continue;
ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length,
tb[i].sa_byteswap);
- VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj,
+ VERIFY0(zap_update(hdl->sa_os, sa->sa_reg_attr_obj,
tb[i].sa_name, 8, 1, &attr_value, tx));
tb[i].sa_registered = B_TRUE;
}
@@ -2013,7 +2013,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
* Only a variable-sized attribute can be
* replaced here, and its size must be changing.
*/
- ASSERT3U(reg_length, ==, 0);
+ ASSERT0(reg_length);
ASSERT3U(length, !=, buflen);
SA_ADD_BULK_ATTR(attr_desc, j, attr,
locator, datastart, buflen);
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index 6b52c6cb1f9e..b3bb46da263b 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -100,6 +100,7 @@
#include <sys/vmsystm.h>
#endif /* _KERNEL */
+#include "zfs_crrd.h"
#include "zfs_prop.h"
#include "zfs_comutil.h"
#include <cityhash.h>
@@ -311,6 +312,41 @@ static int zfs_livelist_condense_zthr_cancel = 0;
static int zfs_livelist_condense_new_alloc = 0;
/*
+ * Time variable to decide how often the txg should be added into the
+ * database (in seconds).
+ * The smallest available resolution is in minutes, which means an update occurs
+ * each time we reach `spa_note_txg_time` and the txg has changed. We provide
+ * a 256-slot ring buffer for minute-level resolution. The number is limited by
+ * the size of the structure we use and the maximum amount of bytes we can write
+ * into ZAP. Setting `spa_note_txg_time` to 10 minutes results in approximately
+ * 144 records per day. Given the 256 slots, this provides roughly 1.5 days of
+ * high-resolution data.
+ *
+ * The user can decrease `spa_note_txg_time` to increase resolution within
+ * a day, at the cost of retaining fewer days of data. Alternatively, increasing
+ * the interval allows storing data over a longer period, but with lower
+ * frequency.
+ *
+ * This parameter does not affect the daily or monthly databases, as those only
+ * store one record per day and per month, respectively.
+ */
+static uint_t spa_note_txg_time = 10 * 60;
+
+/*
+ * How often flush txg database to a disk (in seconds).
+ * We flush data every time we write to it, making it the most reliable option.
+ * Since this happens every 10 minutes, it shouldn't introduce any noticeable
+ * overhead for the system. In case of failure, we will always have an
+ * up-to-date version of the database.
+ *
+ * The user can adjust the flush interval to a lower value, but it probably
+ * doesn't make sense to flush more often than the database is updated.
+ * The user can also increase the interval if they're concerned about the
+ * performance of writing the entire database to disk.
+ */
+static uint_t spa_flush_txg_time = 10 * 60;
+
+/*
* ==========================================================================
* SPA properties routines
* ==========================================================================
@@ -390,10 +426,10 @@ spa_prop_add_user(nvlist_t *nvl, const char *propname, char *strval,
{
nvlist_t *propval;
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
- VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
- VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
+ VERIFY0(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP));
+ VERIFY0(nvlist_add_uint64(propval, ZPROP_SOURCE, src));
+ VERIFY0(nvlist_add_string(propval, ZPROP_VALUE, strval));
+ VERIFY0(nvlist_add_nvlist(nvl, propname, propval));
nvlist_free(propval);
}
@@ -417,11 +453,15 @@ spa_prop_get_config(spa_t *spa, nvlist_t *nv)
alloc += metaslab_class_get_alloc(spa_special_class(spa));
alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
alloc += metaslab_class_get_alloc(spa_embedded_log_class(spa));
+ alloc += metaslab_class_get_alloc(
+ spa_special_embedded_log_class(spa));
size = metaslab_class_get_space(mc);
size += metaslab_class_get_space(spa_special_class(spa));
size += metaslab_class_get_space(spa_dedup_class(spa));
size += metaslab_class_get_space(spa_embedded_log_class(spa));
+ size += metaslab_class_get_space(
+ spa_special_embedded_log_class(spa));
spa_prop_add_list(nv, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
spa_prop_add_list(nv, ZPOOL_PROP_SIZE, NULL, size, src);
@@ -925,7 +965,7 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
uint64_t ver = 0;
if (prop == ZPOOL_PROP_VERSION) {
- VERIFY(nvpair_value_uint64(elem, &ver) == 0);
+ VERIFY0(nvpair_value_uint64(elem, &ver));
} else {
ASSERT(zpool_prop_feature(nvpair_name(elem)));
ver = SPA_VERSION_FEATURES;
@@ -1255,7 +1295,7 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
if (tqs->stqs_taskq == NULL) {
- ASSERT3U(tqs->stqs_count, ==, 0);
+ ASSERT0(tqs->stqs_count);
return;
}
@@ -1679,6 +1719,8 @@ spa_activate(spa_t *spa, spa_mode_t mode)
"embedded_log", msp, B_TRUE);
spa->spa_special_class = metaslab_class_create(spa, "special",
msp, B_FALSE);
+ spa->spa_special_embedded_log_class = metaslab_class_create(spa,
+ "special_embedded_log", msp, B_TRUE);
spa->spa_dedup_class = metaslab_class_create(spa, "dedup",
msp, B_FALSE);
@@ -1794,9 +1836,9 @@ static void
spa_deactivate(spa_t *spa)
{
ASSERT(spa->spa_sync_on == B_FALSE);
- ASSERT(spa->spa_dsl_pool == NULL);
- ASSERT(spa->spa_root_vdev == NULL);
- ASSERT(spa->spa_async_zio_root == NULL);
+ ASSERT0P(spa->spa_dsl_pool);
+ ASSERT0P(spa->spa_root_vdev);
+ ASSERT0P(spa->spa_async_zio_root);
ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
spa_evicting_os_wait(spa);
@@ -1853,6 +1895,9 @@ spa_deactivate(spa_t *spa)
metaslab_class_destroy(spa->spa_special_class);
spa->spa_special_class = NULL;
+ metaslab_class_destroy(spa->spa_special_embedded_log_class);
+ spa->spa_special_embedded_log_class = NULL;
+
metaslab_class_destroy(spa->spa_dedup_class);
spa->spa_dedup_class = NULL;
@@ -1976,7 +2021,7 @@ spa_unload_log_sm_flush_all(spa_t *spa)
dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
VERIFY0(dmu_tx_assign(tx, DMU_TX_WAIT | DMU_TX_SUSPEND));
- ASSERT3U(spa->spa_log_flushall_txg, ==, 0);
+ ASSERT0(spa->spa_log_flushall_txg);
spa->spa_log_flushall_txg = dmu_tx_get_txg(tx);
dmu_tx_commit(tx);
@@ -2031,6 +2076,111 @@ spa_destroy_aux_threads(spa_t *spa)
}
}
+static void
+spa_sync_time_logger(spa_t *spa, uint64_t txg)
+{
+ uint64_t curtime;
+ dmu_tx_t *tx;
+
+ if (!spa_writeable(spa)) {
+ return;
+ }
+ curtime = gethrestime_sec();
+ if (curtime < spa->spa_last_noted_txg_time + spa_note_txg_time) {
+ return;
+ }
+
+ if (txg > spa->spa_last_noted_txg) {
+ spa->spa_last_noted_txg_time = curtime;
+ spa->spa_last_noted_txg = txg;
+
+ mutex_enter(&spa->spa_txg_log_time_lock);
+ dbrrd_add(&spa->spa_txg_log_time, curtime, txg);
+ mutex_exit(&spa->spa_txg_log_time_lock);
+ }
+
+ if (curtime < spa->spa_last_flush_txg_time + spa_flush_txg_time) {
+ return;
+ }
+ spa->spa_last_flush_txg_time = curtime;
+
+ tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
+
+ VERIFY0(zap_update(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_TXG_LOG_TIME_MINUTES, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+ &spa->spa_txg_log_time.dbr_minutes, tx));
+ VERIFY0(zap_update(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_TXG_LOG_TIME_DAYS, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+ &spa->spa_txg_log_time.dbr_days, tx));
+ VERIFY0(zap_update(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_TXG_LOG_TIME_MONTHS, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+ &spa->spa_txg_log_time.dbr_months, tx));
+ dmu_tx_commit(tx);
+}
+
+static void
+spa_unload_sync_time_logger(spa_t *spa)
+{
+ uint64_t txg;
+ dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+ VERIFY0(dmu_tx_assign(tx, DMU_TX_WAIT));
+
+ txg = dmu_tx_get_txg(tx);
+ spa->spa_last_noted_txg_time = 0;
+ spa->spa_last_flush_txg_time = 0;
+ spa_sync_time_logger(spa, txg);
+
+ dmu_tx_commit(tx);
+}
+
+static void
+spa_load_txg_log_time(spa_t *spa)
+{
+ int error;
+
+ error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_TXG_LOG_TIME_MINUTES, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+ &spa->spa_txg_log_time.dbr_minutes);
+ if (error != 0 && error != ENOENT) {
+ spa_load_note(spa, "unable to load a txg time database with "
+ "minute resolution [error=%d]", error);
+ }
+ error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_TXG_LOG_TIME_DAYS, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+ &spa->spa_txg_log_time.dbr_days);
+ if (error != 0 && error != ENOENT) {
+ spa_load_note(spa, "unable to load a txg time database with "
+ "day resolution [error=%d]", error);
+ }
+ error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_TXG_LOG_TIME_MONTHS, RRD_ENTRY_SIZE, RRD_STRUCT_ELEM,
+ &spa->spa_txg_log_time.dbr_months);
+ if (error != 0 && error != ENOENT) {
+ spa_load_note(spa, "unable to load a txg time database with "
+ "month resolution [error=%d]", error);
+ }
+}
+
+static boolean_t
+spa_should_sync_time_logger_on_unload(spa_t *spa)
+{
+
+ if (!spa_writeable(spa))
+ return (B_FALSE);
+
+ if (!spa->spa_sync_on)
+ return (B_FALSE);
+
+ if (spa_state(spa) != POOL_STATE_EXPORTED)
+ return (B_FALSE);
+
+ if (spa->spa_last_noted_txg == 0)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+
/*
* Opposite of spa_load().
*/
@@ -2052,6 +2202,9 @@ spa_unload(spa_t *spa)
* we delay the final TXGs beyond what spa_final_txg is set at.
*/
if (spa->spa_final_txg == UINT64_MAX) {
+ if (spa_should_sync_time_logger_on_unload(spa))
+ spa_unload_sync_time_logger(spa);
+
/*
* If the log space map feature is enabled and the pool is
* getting exported (but not destroyed), we want to spend some
@@ -2127,7 +2280,7 @@ spa_unload(spa_t *spa)
*/
if (spa->spa_root_vdev)
vdev_free(spa->spa_root_vdev);
- ASSERT(spa->spa_root_vdev == NULL);
+ ASSERT0P(spa->spa_root_vdev);
/*
* Close the dsl pool.
@@ -2265,8 +2418,8 @@ spa_load_spares(spa_t *spa)
spa->spa_spares.sav_vdevs = kmem_zalloc(nspares * sizeof (void *),
KM_SLEEP);
for (i = 0; i < spa->spa_spares.sav_count; i++) {
- VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
- VDEV_ALLOC_SPARE) == 0);
+ VERIFY0(spa_config_parse(spa, &vd, spares[i], NULL, 0,
+ VDEV_ALLOC_SPARE));
ASSERT(vd != NULL);
spa->spa_spares.sav_vdevs[i] = vd;
@@ -2393,8 +2546,8 @@ spa_load_l2cache(spa_t *spa)
/*
* Create new vdev
*/
- VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
- VDEV_ALLOC_L2CACHE) == 0);
+ VERIFY0(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
+ VDEV_ALLOC_L2CACHE));
ASSERT(vd != NULL);
newvdevs[i] = vd;
@@ -2646,7 +2799,7 @@ spa_passivate_log(spa_t *spa)
vdev_t *tvd = rvd->vdev_child[c];
if (tvd->vdev_islog) {
- ASSERT3P(tvd->vdev_log_mg, ==, NULL);
+ ASSERT0P(tvd->vdev_log_mg);
metaslab_group_passivate(tvd->vdev_mg);
slog_found = B_TRUE;
}
@@ -2669,7 +2822,7 @@ spa_activate_log(spa_t *spa)
vdev_t *tvd = rvd->vdev_child[c];
if (tvd->vdev_islog) {
- ASSERT3P(tvd->vdev_log_mg, ==, NULL);
+ ASSERT0P(tvd->vdev_log_mg);
metaslab_group_activate(tvd->vdev_mg);
}
}
@@ -2709,8 +2862,8 @@ spa_claim_notify(zio_t *zio)
return;
mutex_enter(&spa->spa_props_lock); /* any mutex will do */
- if (spa->spa_claim_max_txg < BP_GET_LOGICAL_BIRTH(zio->io_bp))
- spa->spa_claim_max_txg = BP_GET_LOGICAL_BIRTH(zio->io_bp);
+ if (spa->spa_claim_max_txg < BP_GET_BIRTH(zio->io_bp))
+ spa->spa_claim_max_txg = BP_GET_BIRTH(zio->io_bp);
mutex_exit(&spa->spa_props_lock);
}
@@ -3106,7 +3259,7 @@ spa_livelist_delete_cb(void *arg, zthr_t *z)
static void
spa_start_livelist_destroy_thread(spa_t *spa)
{
- ASSERT3P(spa->spa_livelist_delete_zthr, ==, NULL);
+ ASSERT0P(spa->spa_livelist_delete_zthr);
spa->spa_livelist_delete_zthr =
zthr_create("z_livelist_destroy",
spa_livelist_delete_cb_check, spa_livelist_delete_cb, spa,
@@ -3122,7 +3275,7 @@ static int
livelist_track_new_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx)
{
- ASSERT(tx == NULL);
+ ASSERT0P(tx);
livelist_new_arg_t *lna = arg;
if (bp_freed) {
bplist_append(lna->frees, bp);
@@ -3316,7 +3469,7 @@ spa_start_livelist_condensing_thread(spa_t *spa)
spa->spa_to_condense.syncing = B_FALSE;
spa->spa_to_condense.cancelled = B_FALSE;
- ASSERT3P(spa->spa_livelist_condense_zthr, ==, NULL);
+ ASSERT0P(spa->spa_livelist_condense_zthr);
spa->spa_livelist_condense_zthr =
zthr_create("z_livelist_condense",
spa_livelist_condense_cb_check,
@@ -3333,7 +3486,7 @@ spa_spawn_aux_threads(spa_t *spa)
spa_start_livelist_destroy_thread(spa);
spa_start_livelist_condensing_thread(spa);
- ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL);
+ ASSERT0P(spa->spa_checkpoint_discard_zthr);
spa->spa_checkpoint_discard_zthr =
zthr_create("z_checkpoint_discard",
spa_checkpoint_discard_thread_check,
@@ -3768,20 +3921,17 @@ out:
* ZPOOL_CONFIG_MMP_HOSTID - hostid from the active pool
*/
if (error == EREMOTEIO) {
- const char *hostname = "<unknown>";
- uint64_t hostid = 0;
-
if (mmp_label) {
if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTNAME)) {
- hostname = fnvlist_lookup_string(mmp_label,
- ZPOOL_CONFIG_HOSTNAME);
+ const char *hostname = fnvlist_lookup_string(
+ mmp_label, ZPOOL_CONFIG_HOSTNAME);
fnvlist_add_string(spa->spa_load_info,
ZPOOL_CONFIG_MMP_HOSTNAME, hostname);
}
if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTID)) {
- hostid = fnvlist_lookup_uint64(mmp_label,
- ZPOOL_CONFIG_HOSTID);
+ uint64_t hostid = fnvlist_lookup_uint64(
+ mmp_label, ZPOOL_CONFIG_HOSTID);
fnvlist_add_uint64(spa->spa_load_info,
ZPOOL_CONFIG_MMP_HOSTID, hostid);
}
@@ -3941,11 +4091,11 @@ spa_ld_parse_config(spa_t *spa, spa_import_type_t type)
nvlist_free(spa->spa_load_info);
spa->spa_load_info = fnvlist_alloc();
- ASSERT(spa->spa_comment == NULL);
+ ASSERT0P(spa->spa_comment);
if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
spa->spa_comment = spa_strdup(comment);
- ASSERT(spa->spa_compatibility == NULL);
+ ASSERT0P(spa->spa_compatibility);
if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMPATIBILITY,
&compatibility) == 0)
spa->spa_compatibility = spa_strdup(compatibility);
@@ -4711,6 +4861,9 @@ spa_ld_get_props(spa_t *spa)
if (error != 0 && error != ENOENT)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ /* Load time log */
+ spa_load_txg_log_time(spa);
+
/*
* Load the persistent error log. If we have an older pool, this will
* not be present.
@@ -5760,7 +5913,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
nvlist_free(config);
if (state == SPA_LOAD_RECOVER) {
- ASSERT3P(loadinfo, ==, NULL);
+ ASSERT0P(loadinfo);
spa_import_progress_remove(spa_guid(spa));
return (rewind_error);
} else {
@@ -5899,7 +6052,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag,
}
if (firstopen)
- zvol_create_minors_recursive(spa_name(spa));
+ zvol_create_minors(spa_name(spa));
*spapp = spa;
@@ -6877,7 +7030,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
mutex_exit(&spa_namespace_lock);
- zvol_create_minors_recursive(pool);
+ zvol_create_minors(pool);
spa_import_os(spa);
@@ -7134,6 +7287,9 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
spa_config_exit(spa, SCL_ALL, FTAG);
}
+ if (spa_should_sync_time_logger_on_unload(spa))
+ spa_unload_sync_time_logger(spa);
+
/*
* If the log space map feature is enabled and the pool is
* getting exported (but not destroyed), we want to spend some
@@ -8935,7 +9091,7 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
int
spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd)
{
- ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+ ASSERT0(spa_config_held(spa, SCL_ALL, RW_WRITER));
if (dsl_scan_resilvering(spa->spa_dsl_pool))
return (SET_ERROR(EBUSY));
@@ -8946,7 +9102,7 @@ spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd)
int
spa_scan_stop(spa_t *spa)
{
- ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+ ASSERT0(spa_config_held(spa, SCL_ALL, RW_WRITER));
if (dsl_scan_resilvering(spa->spa_dsl_pool))
return (SET_ERROR(EBUSY));
@@ -8963,7 +9119,7 @@ int
spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart,
uint64_t txgend)
{
- ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+ ASSERT0(spa_config_held(spa, SCL_ALL, RW_WRITER));
if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE)
return (SET_ERROR(ENOTSUP));
@@ -9092,6 +9248,8 @@ spa_async_thread(void *arg)
old_space += metaslab_class_get_space(spa_dedup_class(spa));
old_space += metaslab_class_get_space(
spa_embedded_log_class(spa));
+ old_space += metaslab_class_get_space(
+ spa_special_embedded_log_class(spa));
spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
@@ -9100,6 +9258,8 @@ spa_async_thread(void *arg)
new_space += metaslab_class_get_space(spa_dedup_class(spa));
new_space += metaslab_class_get_space(
spa_embedded_log_class(spa));
+ new_space += metaslab_class_get_space(
+ spa_special_embedded_log_class(spa));
mutex_exit(&spa_namespace_lock);
/*
@@ -9388,7 +9548,7 @@ spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx)
{
zio_t *zio = zio_root(spa, NULL, NULL, 0);
bplist_iterate(bpl, spa_free_sync_cb, zio, tx);
- VERIFY(zio_wait(zio) == 0);
+ VERIFY0(zio_wait(zio));
}
/*
@@ -9427,7 +9587,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
size_t nvsize = 0;
dmu_buf_t *db;
- VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
+ VERIFY0(nvlist_size(nv, &nvsize, NV_ENCODE_XDR));
/*
* Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
@@ -9437,15 +9597,15 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
packed = vmem_alloc(bufsize, KM_SLEEP);
- VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
- KM_SLEEP) == 0);
+ VERIFY0(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
+ KM_SLEEP));
memset(packed + nvsize, 0, bufsize - nvsize);
dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
vmem_free(packed, bufsize);
- VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
+ VERIFY0(dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
dmu_buf_will_dirty(db, tx);
*(uint64_t *)db->db_data = nvsize;
dmu_buf_rele(db, FTAG);
@@ -10180,6 +10340,8 @@ spa_sync(spa_t *spa, uint64_t txg)
*/
brt_pending_apply(spa, txg);
+ spa_sync_time_logger(spa, txg);
+
/*
* Lock out configuration changes.
*/
@@ -10222,6 +10384,7 @@ spa_sync(spa_t *spa, uint64_t txg)
dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
spa->spa_sync_starttime = gethrtime();
+
taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
@@ -10309,7 +10472,7 @@ spa_sync(spa_t *spa, uint64_t txg)
metaslab_class_evict_old(spa->spa_normal_class, txg);
metaslab_class_evict_old(spa->spa_log_class, txg);
- /* spa_embedded_log_class has only one metaslab per vdev. */
+ /* Embedded log classes have only one metaslab per vdev. */
metaslab_class_evict_old(spa->spa_special_class, txg);
metaslab_class_evict_old(spa->spa_dedup_class, txg);
@@ -10378,7 +10541,7 @@ spa_sync_tq_create(spa_t *spa, const char *name)
{
kthread_t **kthreads;
- ASSERT(spa->spa_sync_tq == NULL);
+ ASSERT0P(spa->spa_sync_tq);
ASSERT3S(spa->spa_alloc_count, <=, boot_ncpus);
/*
@@ -11095,6 +11258,13 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT,
"Whether extra ALLOC blkptrs were added to a livelist entry while it "
"was being condensed");
+ZFS_MODULE_PARAM(zfs_spa, spa_, note_txg_time, UINT, ZMOD_RW,
+ "How frequently TXG timestamps are stored internally (in seconds)");
+
+ZFS_MODULE_PARAM(zfs_spa, spa_, flush_txg_time, UINT, ZMOD_RW,
+ "How frequently the TXG timestamps database should be flushed "
+ "to disk (in seconds)");
+
#ifdef _KERNEL
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read,
spa_taskq_read_param_set, spa_taskq_read_param_get, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/spa_errlog.c b/sys/contrib/openzfs/module/zfs/spa_errlog.c
index 3e08f261fda1..7252fd534bdf 100644
--- a/sys/contrib/openzfs/module/zfs/spa_errlog.c
+++ b/sys/contrib/openzfs/module/zfs/spa_errlog.c
@@ -253,7 +253,7 @@ find_birth_txg(dsl_dataset_t *ds, zbookmark_err_phys_t *zep,
if (error == 0 && BP_IS_HOLE(&bp))
error = SET_ERROR(ENOENT);
- *birth_txg = BP_GET_LOGICAL_BIRTH(&bp);
+ *birth_txg = BP_GET_PHYSICAL_BIRTH(&bp);
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
return (error);
@@ -885,7 +885,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
if (error == EACCES)
error = 0;
else if (!error)
- zep.zb_birth = BP_GET_LOGICAL_BIRTH(&bp);
+ zep.zb_birth = BP_GET_PHYSICAL_BIRTH(&bp);
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index f054e4290bbf..cce772eae598 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -471,9 +471,9 @@ spa_config_lock_destroy(spa_t *spa)
spa_config_lock_t *scl = &spa->spa_config_lock[i];
mutex_destroy(&scl->scl_lock);
cv_destroy(&scl->scl_cv);
- ASSERT(scl->scl_writer == NULL);
- ASSERT(scl->scl_write_wanted == 0);
- ASSERT(scl->scl_count == 0);
+ ASSERT0P(scl->scl_writer);
+ ASSERT0(scl->scl_write_wanted);
+ ASSERT0(scl->scl_count);
}
}
@@ -715,6 +715,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_flushed_ms_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_activities_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&spa->spa_txg_log_time_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
@@ -783,24 +784,23 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path);
list_insert_head(&spa->spa_config_list, dp);
- VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
- KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME, KM_SLEEP));
if (config != NULL) {
nvlist_t *features;
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
&features) == 0) {
- VERIFY(nvlist_dup(features, &spa->spa_label_features,
- 0) == 0);
+ VERIFY0(nvlist_dup(features,
+ &spa->spa_label_features, 0));
}
- VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
+ VERIFY0(nvlist_dup(config, &spa->spa_config, 0));
}
if (spa->spa_label_features == NULL) {
- VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
- KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
+ KM_SLEEP));
}
spa->spa_min_ashift = INT_MAX;
@@ -903,6 +903,7 @@ spa_remove(spa_t *spa)
mutex_destroy(&spa->spa_vdev_top_lock);
mutex_destroy(&spa->spa_feat_stats_lock);
mutex_destroy(&spa->spa_activities_lock);
+ mutex_destroy(&spa->spa_txg_log_time_lock);
kmem_free(spa, sizeof (spa_t));
}
@@ -1308,6 +1309,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error,
metaslab_class_validate(spa_log_class(spa));
metaslab_class_validate(spa_embedded_log_class(spa));
metaslab_class_validate(spa_special_class(spa));
+ metaslab_class_validate(spa_special_embedded_log_class(spa));
metaslab_class_validate(spa_dedup_class(spa));
spa_config_exit(spa, SCL_ALL, spa);
@@ -1896,6 +1898,8 @@ spa_get_slop_space(spa_t *spa)
*/
uint64_t embedded_log =
metaslab_class_get_dspace(spa_embedded_log_class(spa));
+ embedded_log += metaslab_class_get_dspace(
+ spa_special_embedded_log_class(spa));
slop -= MIN(embedded_log, slop >> 1);
/*
@@ -2001,6 +2005,12 @@ spa_special_class(spa_t *spa)
}
metaslab_class_t *
+spa_special_embedded_log_class(spa_t *spa)
+{
+ return (spa->spa_special_embedded_log_class);
+}
+
+metaslab_class_t *
spa_dedup_class(spa_t *spa)
{
return (spa->spa_dedup_class);
diff --git a/sys/contrib/openzfs/module/zfs/spa_stats.c b/sys/contrib/openzfs/module/zfs/spa_stats.c
index 6d7cabcf766d..2c87122a0aa9 100644
--- a/sys/contrib/openzfs/module/zfs/spa_stats.c
+++ b/sys/contrib/openzfs/module/zfs/spa_stats.c
@@ -718,7 +718,7 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
smh = list_prev(&shl->procfs_list.pl_list, smh)) {
if (smh->mmp_node_id == mmp_node_id) {
- ASSERT(smh->io_error == 0);
+ ASSERT0(smh->io_error);
smh->io_error = io_error;
smh->duration = duration;
error = 0;
diff --git a/sys/contrib/openzfs/module/zfs/space_map.c b/sys/contrib/openzfs/module/zfs/space_map.c
index c429e0edd168..5f24963f2291 100644
--- a/sys/contrib/openzfs/module/zfs/space_map.c
+++ b/sys/contrib/openzfs/module/zfs/space_map.c
@@ -817,7 +817,7 @@ space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
space_map_t *sm;
int error;
- ASSERT(*smp == NULL);
+ ASSERT0P(*smp);
ASSERT(os != NULL);
ASSERT(object != 0);
diff --git a/sys/contrib/openzfs/module/zfs/space_reftree.c b/sys/contrib/openzfs/module/zfs/space_reftree.c
index 9b2d5ed31dc9..889980e08c06 100644
--- a/sys/contrib/openzfs/module/zfs/space_reftree.c
+++ b/sys/contrib/openzfs/module/zfs/space_reftree.c
@@ -149,6 +149,6 @@ space_reftree_generate_map(avl_tree_t *t, zfs_range_tree_t *rt, int64_t minref)
}
}
}
- ASSERT(refcnt == 0);
+ ASSERT0(refcnt);
ASSERT(start == -1ULL);
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 01758b0c54c0..9cf35e379000 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -243,6 +243,25 @@ vdev_dbgmsg_print_tree(vdev_t *vd, int indent)
vdev_dbgmsg_print_tree(vd->vdev_child[i], indent + 2);
}
+char *
+vdev_rt_name(vdev_t *vd, const char *name)
+{
+ return (kmem_asprintf("{spa=%s vdev_guid=%llu %s}",
+ spa_name(vd->vdev_spa),
+ (u_longlong_t)vd->vdev_guid,
+ name));
+}
+
+static char *
+vdev_rt_name_dtl(vdev_t *vd, const char *name, vdev_dtl_type_t dtl_type)
+{
+ return (kmem_asprintf("{spa=%s vdev_guid=%llu %s[%d]}",
+ spa_name(vd->vdev_spa),
+ (u_longlong_t)vd->vdev_guid,
+ name,
+ dtl_type));
+}
+
/*
* Virtual device management.
*/
@@ -282,12 +301,15 @@ vdev_getops(const char *type)
* Given a vdev and a metaslab class, find which metaslab group we're
* interested in. All vdevs may belong to two different metaslab classes.
* Dedicated slog devices use only the primary metaslab group, rather than a
- * separate log group. For embedded slogs, the vdev_log_mg will be non-NULL.
+ * separate log group. For embedded slogs, vdev_log_mg will be non-NULL and
+ * will point to a metaslab group of either embedded_log_class (for normal
+ * vdevs) or special_embedded_log_class (for special vdevs).
*/
metaslab_group_t *
vdev_get_mg(vdev_t *vd, metaslab_class_t *mc)
{
- if (mc == spa_embedded_log_class(vd->vdev_spa) &&
+ if ((mc == spa_embedded_log_class(vd->vdev_spa) ||
+ mc == spa_special_embedded_log_class(vd->vdev_spa)) &&
vd->vdev_log_mg != NULL)
return (vd->vdev_log_mg);
else
@@ -532,7 +554,7 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd)
vdev_t **newchild;
ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
- ASSERT(cvd->vdev_parent == NULL);
+ ASSERT0P(cvd->vdev_parent);
cvd->vdev_parent = pvd;
@@ -556,7 +578,7 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd)
pvd->vdev_nonrot &= cvd->vdev_nonrot;
cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd);
- ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL);
+ ASSERT0P(cvd->vdev_top->vdev_parent->vdev_parent);
/*
* Walk up all ancestors to update guid sum.
@@ -692,8 +714,9 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
rw_init(&vd->vdev_indirect_rwlock, NULL, RW_DEFAULT, NULL);
mutex_init(&vd->vdev_obsolete_lock, NULL, MUTEX_DEFAULT, NULL);
- vd->vdev_obsolete_segments = zfs_range_tree_create(NULL,
- ZFS_RANGE_SEG64, NULL, 0, 0);
+ vd->vdev_obsolete_segments = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "vdev_obsolete_segments"));
/*
* Initialize rate limit structs for events. We rate limit ZIO delay
@@ -747,8 +770,9 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
cv_init(&vd->vdev_rebuild_cv, NULL, CV_DEFAULT, NULL);
for (int t = 0; t < DTL_TYPES; t++) {
- vd->vdev_dtl[t] = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
- NULL, 0, 0);
+ vd->vdev_dtl[t] = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name_dtl(vd, "vdev_dtl", t));
}
txg_list_create(&vd->vdev_ms_list, spa,
@@ -1077,10 +1101,10 @@ vdev_free(vdev_t *vd)
{
spa_t *spa = vd->vdev_spa;
- ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
- ASSERT3P(vd->vdev_trim_thread, ==, NULL);
- ASSERT3P(vd->vdev_autotrim_thread, ==, NULL);
- ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
+ ASSERT0P(vd->vdev_initialize_thread);
+ ASSERT0P(vd->vdev_trim_thread);
+ ASSERT0P(vd->vdev_autotrim_thread);
+ ASSERT0P(vd->vdev_rebuild_thread);
/*
* Scan queues are normally destroyed at the end of a scan. If the
@@ -1109,7 +1133,7 @@ vdev_free(vdev_t *vd)
for (int c = 0; c < vd->vdev_children; c++)
vdev_free(vd->vdev_child[c]);
- ASSERT(vd->vdev_child == NULL);
+ ASSERT0P(vd->vdev_child);
ASSERT(vd->vdev_guid_sum == vd->vdev_guid);
if (vd->vdev_ops->vdev_op_fini != NULL)
@@ -1138,7 +1162,7 @@ vdev_free(vdev_t *vd)
*/
vdev_remove_child(vd->vdev_parent, vd);
- ASSERT(vd->vdev_parent == NULL);
+ ASSERT0P(vd->vdev_parent);
ASSERT(!list_link_active(&vd->vdev_leaf_node));
/*
@@ -1285,9 +1309,9 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
ASSERT0(tvd->vdev_indirect_config.vic_births_object);
ASSERT0(tvd->vdev_indirect_config.vic_mapping_object);
ASSERT3U(tvd->vdev_indirect_config.vic_prev_indirect_vdev, ==, -1ULL);
- ASSERT3P(tvd->vdev_indirect_mapping, ==, NULL);
- ASSERT3P(tvd->vdev_indirect_births, ==, NULL);
- ASSERT3P(tvd->vdev_obsolete_sm, ==, NULL);
+ ASSERT0P(tvd->vdev_indirect_mapping);
+ ASSERT0P(tvd->vdev_indirect_births);
+ ASSERT0P(tvd->vdev_obsolete_sm);
ASSERT0(tvd->vdev_noalloc);
ASSERT0(tvd->vdev_removing);
ASSERT0(tvd->vdev_rebuilding);
@@ -1440,7 +1464,7 @@ vdev_remove_parent(vdev_t *cvd)
if (cvd == cvd->vdev_top)
vdev_top_transfer(mvd, cvd);
- ASSERT(mvd->vdev_children == 0);
+ ASSERT0(mvd->vdev_children);
vdev_free(mvd);
}
@@ -1508,8 +1532,13 @@ vdev_metaslab_group_create(vdev_t *vd)
vd->vdev_mg = metaslab_group_create(mc, vd);
if (!vd->vdev_islog) {
- vd->vdev_log_mg = metaslab_group_create(
- spa_embedded_log_class(spa), vd);
+ if (mc == spa_special_class(spa)) {
+ vd->vdev_log_mg = metaslab_group_create(
+ spa_special_embedded_log_class(spa), vd);
+ } else {
+ vd->vdev_log_mg = metaslab_group_create(
+ spa_embedded_log_class(spa), vd);
+ }
}
/*
@@ -1624,9 +1653,10 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
/*
* Find the emptiest metaslab on the vdev and mark it for use for
* embedded slog by moving it from the regular to the log metaslab
- * group.
+ * group. This works for normal and special vdevs.
*/
- if (vd->vdev_mg->mg_class == spa_normal_class(spa) &&
+ if ((vd->vdev_mg->mg_class == spa_normal_class(spa) ||
+ vd->vdev_mg->mg_class == spa_special_class(spa)) &&
vd->vdev_ms_count > zfs_embedded_slog_min_ms &&
avl_is_empty(&vd->vdev_log_mg->mg_metaslab_tree)) {
uint64_t slog_msid = 0;
@@ -2104,14 +2134,14 @@ vdev_open(vdev_t *vd)
* faulted, bail out of the open.
*/
if (!vd->vdev_removed && vd->vdev_faulted) {
- ASSERT(vd->vdev_children == 0);
+ ASSERT0(vd->vdev_children);
ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED ||
vd->vdev_label_aux == VDEV_AUX_EXTERNAL);
vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
vd->vdev_label_aux);
return (SET_ERROR(ENXIO));
} else if (vd->vdev_offline) {
- ASSERT(vd->vdev_children == 0);
+ ASSERT0(vd->vdev_children);
vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE);
return (SET_ERROR(ENXIO));
}
@@ -2167,7 +2197,7 @@ vdev_open(vdev_t *vd)
* the vdev is accessible. If we're faulted, bail.
*/
if (vd->vdev_faulted) {
- ASSERT(vd->vdev_children == 0);
+ ASSERT0(vd->vdev_children);
ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED ||
vd->vdev_label_aux == VDEV_AUX_EXTERNAL);
vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
@@ -2176,7 +2206,7 @@ vdev_open(vdev_t *vd)
}
if (vd->vdev_degraded) {
- ASSERT(vd->vdev_children == 0);
+ ASSERT0(vd->vdev_children);
vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED,
VDEV_AUX_ERR_EXCEEDED);
} else {
@@ -3449,7 +3479,9 @@ vdev_dtl_load(vdev_t *vd)
return (error);
ASSERT(vd->vdev_dtl_sm != NULL);
- rt = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0);
+ rt = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "vdev_dtl_load:rt"));
error = space_map_load(vd->vdev_dtl_sm, rt, SM_ALLOC);
if (error == 0) {
mutex_enter(&vd->vdev_dtl_lock);
@@ -3597,7 +3629,8 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
ASSERT(vd->vdev_dtl_sm != NULL);
}
- rtsync = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0);
+ rtsync = zfs_range_tree_create_flags(NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "rtsync"));
mutex_enter(&vd->vdev_dtl_lock);
zfs_range_tree_walk(rt, zfs_range_tree_add, rtsync);
@@ -3912,7 +3945,7 @@ vdev_load(vdev_t *vd)
if (error == 0 && checkpoint_sm_obj != 0) {
objset_t *mos = spa_meta_objset(vd->vdev_spa);
ASSERT(vd->vdev_asize != 0);
- ASSERT3P(vd->vdev_checkpoint_sm, ==, NULL);
+ ASSERT0P(vd->vdev_checkpoint_sm);
error = space_map_open(&vd->vdev_checkpoint_sm,
mos, checkpoint_sm_obj, 0, vd->vdev_asize,
@@ -3960,7 +3993,7 @@ vdev_load(vdev_t *vd)
if (error == 0 && obsolete_sm_object != 0) {
objset_t *mos = vd->vdev_spa->spa_meta_objset;
ASSERT(vd->vdev_asize != 0);
- ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
+ ASSERT0P(vd->vdev_obsolete_sm);
if ((error = space_map_open(&vd->vdev_obsolete_sm, mos,
obsolete_sm_object, 0, vd->vdev_asize, 0))) {
@@ -4488,7 +4521,7 @@ top:
/*
* Prevent any future allocations.
*/
- ASSERT3P(tvd->vdev_log_mg, ==, NULL);
+ ASSERT0P(tvd->vdev_log_mg);
metaslab_group_passivate(mg);
(void) spa_vdev_state_exit(spa, vd, 0);
@@ -5161,7 +5194,7 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
int64_t
vdev_deflated_space(vdev_t *vd, int64_t space)
{
- ASSERT((space & (SPA_MINBLOCKSIZE-1)) == 0);
+ ASSERT0((space & (SPA_MINBLOCKSIZE-1)));
ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache);
return ((space >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio);
@@ -5253,8 +5286,8 @@ vdev_config_dirty(vdev_t *vd)
if (nvlist_lookup_nvlist_array(sav->sav_config,
ZPOOL_CONFIG_L2CACHE, &aux, &naux) != 0) {
- VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
- ZPOOL_CONFIG_SPARES, &aux, &naux) == 0);
+ VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config,
+ ZPOOL_CONFIG_SPARES, &aux, &naux));
}
ASSERT(c < naux);
@@ -5642,7 +5675,7 @@ vdev_expand(vdev_t *vd, uint64_t txg)
(vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count &&
vdev_is_concrete(vd)) {
vdev_metaslab_group_create(vd);
- VERIFY(vdev_metaslab_init(vd, txg) == 0);
+ VERIFY0(vdev_metaslab_init(vd, txg));
vdev_config_dirty(vd);
}
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c
index feec5fd3ce17..a05289102af2 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_draid.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c
@@ -477,7 +477,7 @@ vdev_draid_generate_perms(const draid_map_t *map, uint8_t **permsp)
VERIFY3U(map->dm_children, <=, VDEV_DRAID_MAX_CHILDREN);
VERIFY3U(map->dm_seed, !=, 0);
VERIFY3U(map->dm_nperms, !=, 0);
- VERIFY3P(map->dm_perms, ==, NULL);
+ VERIFY0P(map->dm_perms);
#ifdef _KERNEL
/*
@@ -590,7 +590,7 @@ vdev_draid_psize_to_asize(vdev_t *vd, uint64_t psize, uint64_t txg)
uint64_t asize = (rows * vdc->vdc_groupwidth) << ashift;
ASSERT3U(asize, !=, 0);
- ASSERT3U(asize % (vdc->vdc_groupwidth), ==, 0);
+ ASSERT0(asize % (vdc->vdc_groupwidth));
return (asize);
}
@@ -704,7 +704,7 @@ vdev_draid_map_alloc_scrub(zio_t *zio, uint64_t abd_offset, raidz_row_t *rr)
uint64_t skip_off = 0;
ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
- ASSERT3P(rr->rr_abd_empty, ==, NULL);
+ ASSERT0P(rr->rr_abd_empty);
if (rr->rr_nempty > 0) {
rr->rr_abd_empty = abd_alloc_linear(rr->rr_nempty * skip_size,
@@ -793,7 +793,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
uint64_t skip_off = 0;
ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
- ASSERT3P(rr->rr_abd_empty, ==, NULL);
+ ASSERT0P(rr->rr_abd_empty);
if (rr->rr_nempty > 0) {
rr->rr_abd_empty = abd_alloc_linear(rr->rr_nempty * skip_size,
@@ -807,7 +807,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
/* empty data column (small read), add a skip sector */
ASSERT3U(skip_size, ==, parity_size);
ASSERT3U(rr->rr_nempty, !=, 0);
- ASSERT3P(rc->rc_abd, ==, NULL);
+ ASSERT0P(rc->rc_abd);
rc->rc_abd = abd_get_offset_size(rr->rr_abd_empty,
skip_off, skip_size);
skip_off += skip_size;
@@ -1623,7 +1623,7 @@ vdev_draid_rebuild_asize(vdev_t *vd, uint64_t start, uint64_t asize,
SPA_MAXBLOCKSIZE);
ASSERT3U(vdev_draid_get_astart(vd, start), ==, start);
- ASSERT3U(asize % (vdc->vdc_groupwidth << ashift), ==, 0);
+ ASSERT0(asize % (vdc->vdc_groupwidth << ashift));
/* Chunks must evenly span all data columns in the group. */
psize = (((psize >> ashift) / ndata) * ndata) << ashift;
@@ -1634,7 +1634,7 @@ vdev_draid_rebuild_asize(vdev_t *vd, uint64_t start, uint64_t asize,
uint64_t left = vdev_draid_group_to_offset(vd, group + 1) - start;
chunk_size = MIN(chunk_size, left);
- ASSERT3U(chunk_size % (vdc->vdc_groupwidth << ashift), ==, 0);
+ ASSERT0(chunk_size % (vdc->vdc_groupwidth << ashift));
ASSERT3U(vdev_draid_offset_to_group(vd, start), ==,
vdev_draid_offset_to_group(vd, start + chunk_size - 1));
@@ -2272,7 +2272,7 @@ vdev_draid_init(spa_t *spa, nvlist_t *nv, void **tsd)
ASSERT3U(vdc->vdc_groupwidth, <=, vdc->vdc_ndisks);
ASSERT3U(vdc->vdc_groupsz, >=, 2 * VDEV_DRAID_ROWHEIGHT);
ASSERT3U(vdc->vdc_devslicesz, >=, VDEV_DRAID_ROWHEIGHT);
- ASSERT3U(vdc->vdc_devslicesz % VDEV_DRAID_ROWHEIGHT, ==, 0);
+ ASSERT0(vdc->vdc_devslicesz % VDEV_DRAID_ROWHEIGHT);
ASSERT3U((vdc->vdc_groupwidth * vdc->vdc_ngroups) %
vdc->vdc_ndisks, ==, 0);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
index fac2c3a5f154..7538f471e63c 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
@@ -792,7 +792,7 @@ spa_condense_indirect_start_sync(vdev_t *vd, dmu_tx_t *tx)
DMU_POOL_CONDENSING_INDIRECT, sizeof (uint64_t),
sizeof (*scip) / sizeof (uint64_t), scip, tx));
- ASSERT3P(spa->spa_condensing_indirect, ==, NULL);
+ ASSERT0P(spa->spa_condensing_indirect);
spa->spa_condensing_indirect = spa_condensing_indirect_create(spa);
zfs_dbgmsg("starting condense of vdev %llu in txg %llu: "
@@ -882,7 +882,7 @@ spa_condense_fini(spa_t *spa)
void
spa_start_indirect_condensing_thread(spa_t *spa)
{
- ASSERT3P(spa->spa_condense_zthr, ==, NULL);
+ ASSERT0P(spa->spa_condense_zthr);
spa->spa_condense_zthr = zthr_create("z_indirect_condense",
spa_condense_indirect_thread_check,
spa_condense_indirect_thread, spa, minclsyspri);
@@ -1504,7 +1504,7 @@ vdev_indirect_splits_checksum_validate(indirect_vsd_t *iv, zio_t *zio)
is != NULL; is = list_next(&iv->iv_splits, is)) {
ASSERT3P(is->is_good_child->ic_data, !=, NULL);
- ASSERT3P(is->is_good_child->ic_duplicate, ==, NULL);
+ ASSERT0P(is->is_good_child->ic_duplicate);
abd_copy_off(zio->io_abd, is->is_good_child->ic_data,
is->is_split_offset, 0, is->is_size);
@@ -1842,7 +1842,7 @@ vdev_indirect_io_done(zio_t *zio)
*/
if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) {
zio->io_error = ret;
- zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+ zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR;
zio_dio_chksum_verify_error_report(zio);
ret = 0;
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_initialize.c b/sys/contrib/openzfs/module/zfs/vdev_initialize.c
index 4274728578ad..27188c46e561 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_initialize.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_initialize.c
@@ -541,8 +541,9 @@ vdev_initialize_thread(void *arg)
abd_t *deadbeef = vdev_initialize_block_alloc();
- vd->vdev_initialize_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
- NULL, 0, 0);
+ vd->vdev_initialize_tree = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "vdev_initialize_tree"));
for (uint64_t i = 0; !vd->vdev_detached &&
i < vd->vdev_top->vdev_ms_count; i++) {
@@ -631,7 +632,7 @@ vdev_initialize(vdev_t *vd)
ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
ASSERT(vd->vdev_ops->vdev_op_leaf);
ASSERT(vdev_is_concrete(vd));
- ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
+ ASSERT0P(vd->vdev_initialize_thread);
ASSERT(!vd->vdev_detached);
ASSERT(!vd->vdev_initialize_exit_wanted);
ASSERT(!vd->vdev_top->vdev_removing);
@@ -652,7 +653,7 @@ vdev_uninitialize(vdev_t *vd)
ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
ASSERT(vd->vdev_ops->vdev_op_leaf);
ASSERT(vdev_is_concrete(vd));
- ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
+ ASSERT0P(vd->vdev_initialize_thread);
ASSERT(!vd->vdev_detached);
ASSERT(!vd->vdev_initialize_exit_wanted);
ASSERT(!vd->vdev_top->vdev_removing);
@@ -671,7 +672,7 @@ vdev_initialize_stop_wait_impl(vdev_t *vd)
while (vd->vdev_initialize_thread != NULL)
cv_wait(&vd->vdev_initialize_cv, &vd->vdev_initialize_lock);
- ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
+ ASSERT0P(vd->vdev_initialize_thread);
vd->vdev_initialize_exit_wanted = B_FALSE;
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c
index 6baa6236aac2..c44f654b0261 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_label.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_label.c
@@ -163,7 +163,7 @@ uint64_t
vdev_label_offset(uint64_t psize, int l, uint64_t offset)
{
ASSERT(offset < sizeof (vdev_label_t));
- ASSERT(P2PHASE_TYPED(psize, sizeof (vdev_label_t), uint64_t) == 0);
+ ASSERT0(P2PHASE_TYPED(psize, sizeof (vdev_label_t), uint64_t));
return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
@@ -768,12 +768,12 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config)
}
if (idx) {
- VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY,
- array, idx) == 0);
+ VERIFY0(nvlist_add_uint64_array(config,
+ ZPOOL_CONFIG_HOLE_ARRAY, array, idx));
}
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
- rvd->vdev_children) == 0);
+ VERIFY0(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
+ rvd->vdev_children));
kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
}
@@ -1189,8 +1189,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
* vdev uses as described above, and automatically expires if we
* fail.
*/
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
- crtxg) == 0);
+ VERIFY0(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
+ crtxg));
}
buf = vp->vp_nvlist;
diff --git a/sys/contrib/openzfs/module/zfs/vdev_mirror.c b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
index a6aee9437066..18efdaac006f 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_mirror.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
@@ -532,7 +532,7 @@ vdev_mirror_child_select(zio_t *zio)
uint64_t txg = zio->io_txg;
int c, lowest_load;
- ASSERT(zio->io_bp == NULL || BP_GET_BIRTH(zio->io_bp) == txg);
+ ASSERT(zio->io_bp == NULL || BP_GET_PHYSICAL_BIRTH(zio->io_bp) == txg);
lowest_load = INT_MAX;
mm->mm_preferred_cnt = 0;
@@ -779,7 +779,7 @@ vdev_mirror_io_done(zio_t *zio)
* being written out during self healing.
*/
if ((zio->io_flags & ZIO_FLAG_DIO_READ) &&
- (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) {
+ (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)) {
zio_dio_chksum_verify_error_report(zio);
zio->io_error = vdev_mirror_worst_error(mm);
ASSERT3U(zio->io_error, ==, ECKSUM);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index aa41f7066036..c12713b107bf 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -780,7 +780,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
if (dio->io_flags & ZIO_FLAG_NODATA) {
/* allocate a buffer for a write gap */
ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE);
- ASSERT3P(dio->io_abd, ==, NULL);
+ ASSERT0P(dio->io_abd);
abd_gang_add(aio->io_abd,
abd_get_zeros(dio->io_size), B_TRUE);
} else {
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
index 71c4bfbdaf00..b597d6daefde 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
@@ -412,7 +412,7 @@ vdev_raidz_map_free(raidz_map_t *rm)
rm->rm_nphys_cols);
}
- ASSERT3P(rm->rm_lr, ==, NULL);
+ ASSERT0P(rm->rm_lr);
kmem_free(rm, offsetof(raidz_map_t, rm_row[rm->rm_nrows]));
}
@@ -2206,11 +2206,7 @@ vdev_raidz_close(vdev_t *vd)
/*
* Return the logical width to use, given the txg in which the allocation
- * happened. Note that BP_GET_BIRTH() is usually the txg in which the
- * BP was allocated. Remapped BP's (that were relocated due to device
- * removal, see remap_blkptr_cb()), will have a more recent physical birth
- * which reflects when the BP was relocated, but we can ignore these because
- * they can't be on RAIDZ (device removal doesn't support RAIDZ).
+ * happened.
*/
static uint64_t
vdev_raidz_get_logical_width(vdev_raidz_t *vdrz, uint64_t txg)
@@ -2249,10 +2245,9 @@ vdev_raidz_asize_to_psize(vdev_t *vd, uint64_t asize, uint64_t txg)
vdev_raidz_t *vdrz = vd->vdev_tsd;
uint64_t psize;
uint64_t ashift = vd->vdev_top->vdev_ashift;
- uint64_t cols = vdrz->vd_original_width;
uint64_t nparity = vdrz->vd_nparity;
- cols = vdev_raidz_get_logical_width(vdrz, txg);
+ uint64_t cols = vdev_raidz_get_logical_width(vdrz, txg);
ASSERT0(asize % (1 << ashift));
@@ -2285,10 +2280,9 @@ vdev_raidz_psize_to_asize(vdev_t *vd, uint64_t psize, uint64_t txg)
vdev_raidz_t *vdrz = vd->vdev_tsd;
uint64_t asize;
uint64_t ashift = vd->vdev_top->vdev_ashift;
- uint64_t cols = vdrz->vd_original_width;
uint64_t nparity = vdrz->vd_nparity;
- cols = vdev_raidz_get_logical_width(vdrz, txg);
+ uint64_t cols = vdev_raidz_get_logical_width(vdrz, txg);
asize = ((psize - 1) >> ashift) + 1;
asize += nparity * ((asize + cols - nparity - 1) / (cols - nparity));
@@ -2345,7 +2339,7 @@ vdev_raidz_io_verify(zio_t *zio, raidz_map_t *rm, raidz_row_t *rr, int col)
logical_rs.rs_start = rr->rr_offset;
logical_rs.rs_end = logical_rs.rs_start +
vdev_raidz_psize_to_asize(zio->io_vd, rr->rr_size,
- BP_GET_BIRTH(zio->io_bp));
+ BP_GET_PHYSICAL_BIRTH(zio->io_bp));
raidz_col_t *rc = &rr->rr_col[col];
vdev_t *cvd = zio->io_vd->vdev_child[rc->rc_devidx];
@@ -2437,7 +2431,7 @@ raidz_start_skip_writes(zio_t *zio)
vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
if (rc->rc_size != 0)
continue;
- ASSERT3P(rc->rc_abd, ==, NULL);
+ ASSERT0P(rc->rc_abd);
ASSERT3U(rc->rc_offset, <,
cvd->vdev_psize - VDEV_LABEL_END_SIZE);
@@ -2568,7 +2562,7 @@ vdev_raidz_io_start(zio_t *zio)
raidz_map_t *rm;
uint64_t logical_width = vdev_raidz_get_logical_width(vdrz,
- BP_GET_BIRTH(zio->io_bp));
+ BP_GET_PHYSICAL_BIRTH(zio->io_bp));
if (logical_width != vdrz->vd_physical_width) {
zfs_locked_range_t *lr = NULL;
uint64_t synced_offset = UINT64_MAX;
@@ -2691,7 +2685,7 @@ raidz_checksum_verify(zio_t *zio)
*/
if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) {
zio->io_error = ret;
- zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+ zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR;
zio_dio_chksum_verify_error_report(zio);
zio_checksum_verified(zio);
return (0);
@@ -3048,7 +3042,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
/* Check for success */
if (raidz_checksum_verify(zio) == 0) {
- if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
+ if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)
return (0);
/* Reconstruction succeeded - report errors */
@@ -3369,7 +3363,7 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
* also have been fewer parity errors than parity
* columns or, again, we wouldn't be in this code path.
*/
- ASSERT(parity_untried == 0);
+ ASSERT0(parity_untried);
ASSERT(parity_errors < rr->rr_firstdatacol);
/*
@@ -3514,7 +3508,7 @@ vdev_raidz_io_done(zio_t *zio)
}
if (raidz_checksum_verify(zio) == 0) {
- if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
+ if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)
goto done;
for (int i = 0; i < rm->rm_nrows; i++) {
@@ -4591,8 +4585,10 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr)
uint64_t shift, start;
zfs_range_seg_type_t type = metaslab_calculate_range_tree_type(
raidvd, msp, &start, &shift);
- zfs_range_tree_t *rt = zfs_range_tree_create(NULL, type, NULL,
- start, shift);
+ zfs_range_tree_t *rt = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift, ZFS_RT_F_DYN_NAME,
+ metaslab_rt_name(msp->ms_group, msp,
+ "spa_raidz_expand_thread:rt"));
zfs_range_tree_add(rt, msp->ms_start, msp->ms_size);
zfs_range_tree_walk(msp->ms_allocatable, zfs_range_tree_remove,
rt);
@@ -4747,7 +4743,7 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr)
void
spa_start_raidz_expansion_thread(spa_t *spa)
{
- ASSERT3P(spa->spa_raidz_expand_zthr, ==, NULL);
+ ASSERT0P(spa->spa_raidz_expand_zthr);
spa->spa_raidz_expand_zthr = zthr_create("raidz_expand",
spa_raidz_expand_thread_check, spa_raidz_expand_thread,
spa, defclsyspri);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
index 0e296606d037..47b3b9921abe 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
@@ -256,7 +256,7 @@ vdev_rebuild_initiate_sync(void *arg, dmu_tx_t *tx)
"vdev_id=%llu vdev_guid=%llu started",
(u_longlong_t)vd->vdev_id, (u_longlong_t)vd->vdev_guid);
- ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
+ ASSERT0P(vd->vdev_rebuild_thread);
vd->vdev_rebuild_thread = thread_create(NULL, 0,
vdev_rebuild_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
@@ -413,7 +413,7 @@ vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx)
mutex_enter(&vd->vdev_rebuild_lock);
ASSERT(vrp->vrp_rebuild_state == VDEV_REBUILD_ACTIVE);
- ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
+ ASSERT0P(vd->vdev_rebuild_thread);
vrp->vrp_last_offset = 0;
vrp->vrp_min_txg = 0;
@@ -787,8 +787,9 @@ vdev_rebuild_thread(void *arg)
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
vr->vr_top_vdev = vd;
vr->vr_scan_msp = NULL;
- vr->vr_scan_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL,
- 0, 0);
+ vr->vr_scan_tree = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "vr_scan_tree"));
mutex_init(&vr->vr_io_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&vr->vr_io_cv, NULL, CV_DEFAULT, NULL);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c
index db79ded6dce4..2f7a739da241 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_removal.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c
@@ -344,10 +344,10 @@ spa_vdev_remove_aux(nvlist_t *config, const char *name, nvlist_t **dev,
for (int i = 0, j = 0; i < count; i++) {
if (dev[i] == dev_to_remove)
continue;
- VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0);
+ VERIFY0(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP));
}
- VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0);
+ VERIFY0(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY));
fnvlist_add_nvlist_array(config, name, (const nvlist_t * const *)newdev,
count - 1);
@@ -364,13 +364,15 @@ spa_vdev_removal_create(vdev_t *vd)
spa_vdev_removal_t *svr = kmem_zalloc(sizeof (*svr), KM_SLEEP);
mutex_init(&svr->svr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&svr->svr_cv, NULL, CV_DEFAULT, NULL);
- svr->svr_allocd_segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
- NULL, 0, 0);
+ svr->svr_allocd_segs = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "svr_allocd_segs"));
svr->svr_vdev_id = vd->vdev_id;
for (int i = 0; i < TXG_SIZE; i++) {
- svr->svr_frees[i] = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
- NULL, 0, 0);
+ svr->svr_frees[i] = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "svr_frees"));
list_create(&svr->svr_new_segments[i],
sizeof (vdev_indirect_mapping_entry_t),
offsetof(vdev_indirect_mapping_entry_t, vime_node));
@@ -421,7 +423,7 @@ vdev_remove_initiate_sync(void *arg, dmu_tx_t *tx)
svr = spa_vdev_removal_create(vd);
ASSERT(vd->vdev_removing);
- ASSERT3P(vd->vdev_indirect_mapping, ==, NULL);
+ ASSERT0P(vd->vdev_indirect_mapping);
spa_feature_incr(spa, SPA_FEATURE_DEVICE_REMOVAL, tx);
if (spa_feature_is_enabled(spa, SPA_FEATURE_OBSOLETE_COUNTS)) {
@@ -527,7 +529,7 @@ vdev_remove_initiate_sync(void *arg, dmu_tx_t *tx)
* but in any case only when there are outstanding free i/os, which
* there are not).
*/
- ASSERT3P(spa->spa_vdev_removal, ==, NULL);
+ ASSERT0P(spa->spa_vdev_removal);
spa->spa_vdev_removal = svr;
svr->svr_thread = thread_create(NULL, 0,
spa_vdev_remove_thread, spa, 0, &p0, TS_RUN, minclsyspri);
@@ -1179,8 +1181,9 @@ spa_vdev_copy_segment(vdev_t *vd, zfs_range_tree_t *segs,
* relative to the start of the range to be copied (i.e. relative to the
* local variable "start").
*/
- zfs_range_tree_t *obsolete_segs = zfs_range_tree_create(NULL,
- ZFS_RANGE_SEG64, NULL, 0, 0);
+ zfs_range_tree_t *obsolete_segs = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "obsolete_segs"));
zfs_btree_index_t where;
zfs_range_seg_t *rs = zfs_btree_first(&segs->rt_root, &where);
@@ -1359,11 +1362,11 @@ vdev_remove_complete(spa_t *spa)
txg_wait_synced(spa->spa_dsl_pool, 0);
txg = spa_vdev_enter(spa);
vdev_t *vd = vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id);
- ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
- ASSERT3P(vd->vdev_trim_thread, ==, NULL);
- ASSERT3P(vd->vdev_autotrim_thread, ==, NULL);
+ ASSERT0P(vd->vdev_initialize_thread);
+ ASSERT0P(vd->vdev_trim_thread);
+ ASSERT0P(vd->vdev_autotrim_thread);
vdev_rebuild_stop_wait(vd);
- ASSERT3P(vd->vdev_rebuild_thread, ==, NULL);
+ ASSERT0P(vd->vdev_rebuild_thread);
sysevent_t *ev = spa_event_create(spa, vd, NULL,
ESC_ZFS_VDEV_REMOVE_DEV);
@@ -1448,8 +1451,9 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca,
* allocated segments that we are copying. We may also be copying
* free segments (of up to vdev_removal_max_span bytes).
*/
- zfs_range_tree_t *segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
- NULL, 0, 0);
+ zfs_range_tree_t *segs = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "spa_vdev_copy_impl:segs"));
for (;;) {
zfs_range_tree_t *rt = svr->svr_allocd_segs;
zfs_range_seg_t *rs = zfs_range_tree_first(rt);
@@ -1610,8 +1614,9 @@ spa_vdev_remove_thread(void *arg)
vca.vca_read_error_bytes = 0;
vca.vca_write_error_bytes = 0;
- zfs_range_tree_t *segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
- NULL, 0, 0);
+ zfs_range_tree_t *segs = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "spa_vdev_remove_thread:segs"));
mutex_enter(&svr->svr_lock);
@@ -1863,7 +1868,7 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx)
vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
objset_t *mos = spa->spa_meta_objset;
- ASSERT3P(svr->svr_thread, ==, NULL);
+ ASSERT0P(svr->svr_thread);
spa_feature_decr(spa, SPA_FEATURE_DEVICE_REMOVAL, tx);
@@ -1895,8 +1900,9 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx)
vdev_indirect_mapping_max_offset(vim));
}
- zfs_range_tree_t *segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
- NULL, 0, 0);
+ zfs_range_tree_t *segs = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0, ZFS_RT_F_DYN_NAME,
+ vdev_rt_name(vd, "spa_vdev_remove_cancel_sync:segs"));
for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
metaslab_t *msp = vd->vdev_ms[msi];
@@ -2070,7 +2076,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
ASSERT(vd->vdev_islog);
ASSERT(vd == vd->vdev_top);
- ASSERT3P(vd->vdev_log_mg, ==, NULL);
+ ASSERT0P(vd->vdev_log_mg);
ASSERT(MUTEX_HELD(&spa_namespace_lock));
/*
@@ -2106,7 +2112,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
if (error != 0) {
metaslab_group_activate(mg);
- ASSERT3P(vd->vdev_log_mg, ==, NULL);
+ ASSERT0P(vd->vdev_log_mg);
return (error);
}
ASSERT0(vd->vdev_stat.vs_alloc);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c
index 842bb3e690d4..eee18b367909 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_trim.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c
@@ -902,7 +902,9 @@ vdev_trim_thread(void *arg)
ta.trim_vdev = vd;
ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
ta.trim_extent_bytes_min = zfs_trim_extent_bytes_min;
- ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0);
+ ta.trim_tree = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "trim_tree"));
ta.trim_type = TRIM_TYPE_MANUAL;
ta.trim_flags = 0;
@@ -1008,7 +1010,7 @@ vdev_trim(vdev_t *vd, uint64_t rate, boolean_t partial, boolean_t secure)
ASSERT(MUTEX_HELD(&vd->vdev_trim_lock));
ASSERT(vd->vdev_ops->vdev_op_leaf);
ASSERT(vdev_is_concrete(vd));
- ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+ ASSERT0P(vd->vdev_trim_thread);
ASSERT(!vd->vdev_detached);
ASSERT(!vd->vdev_trim_exit_wanted);
ASSERT(!vd->vdev_top->vdev_removing);
@@ -1030,7 +1032,7 @@ vdev_trim_stop_wait_impl(vdev_t *vd)
while (vd->vdev_trim_thread != NULL)
cv_wait(&vd->vdev_trim_cv, &vd->vdev_trim_lock);
- ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+ ASSERT0P(vd->vdev_trim_thread);
vd->vdev_trim_exit_wanted = B_FALSE;
}
@@ -1305,8 +1307,10 @@ vdev_autotrim_thread(void *arg)
* Allocate an empty range tree which is swapped in
* for the existing ms_trim tree while it is processed.
*/
- trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
- NULL, 0, 0);
+ trim_tree = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME,
+ vdev_rt_name(vd, "autotrim_tree"));
zfs_range_tree_swap(&msp->ms_trim, &trim_tree);
ASSERT(zfs_range_tree_is_empty(msp->ms_trim));
@@ -1360,8 +1364,10 @@ vdev_autotrim_thread(void *arg)
if (!cvd->vdev_ops->vdev_op_leaf)
continue;
- ta->trim_tree = zfs_range_tree_create(NULL,
- ZFS_RANGE_SEG64, NULL, 0, 0);
+ ta->trim_tree = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME,
+ vdev_rt_name(vd, "autotrim_tree"));
zfs_range_tree_walk(trim_tree,
vdev_trim_range_add, ta);
}
@@ -1533,7 +1539,7 @@ vdev_autotrim_stop_wait(vdev_t *tvd)
cv_wait(&tvd->vdev_autotrim_cv,
&tvd->vdev_autotrim_lock);
- ASSERT3P(tvd->vdev_autotrim_thread, ==, NULL);
+ ASSERT0P(tvd->vdev_autotrim_thread);
tvd->vdev_autotrim_exit_wanted = B_FALSE;
}
mutex_exit(&tvd->vdev_autotrim_lock);
@@ -1600,7 +1606,9 @@ vdev_trim_l2arc_thread(void *arg)
vd->vdev_trim_secure = 0;
ta.trim_vdev = vd;
- ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0);
+ ta.trim_tree = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "trim_tree"));
ta.trim_type = TRIM_TYPE_MANUAL;
ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
@@ -1704,7 +1712,7 @@ vdev_trim_l2arc(spa_t *spa)
mutex_enter(&vd->vdev_trim_lock);
ASSERT(vd->vdev_ops->vdev_op_leaf);
ASSERT(vdev_is_concrete(vd));
- ASSERT3P(vd->vdev_trim_thread, ==, NULL);
+ ASSERT0P(vd->vdev_trim_thread);
ASSERT(!vd->vdev_detached);
ASSERT(!vd->vdev_trim_exit_wanted);
ASSERT(!vd->vdev_top->vdev_removing);
@@ -1735,7 +1743,9 @@ vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size)
ASSERT(!vd->vdev_top->vdev_rz_expanding);
ta.trim_vdev = vd;
- ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0);
+ ta.trim_tree = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ ZFS_RT_F_DYN_NAME, vdev_rt_name(vd, "trim_tree"));
ta.trim_type = TRIM_TYPE_SIMPLE;
ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max;
ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;
diff --git a/sys/contrib/openzfs/module/zfs/zap.c b/sys/contrib/openzfs/module/zfs/zap.c
index 9711c91d7e4e..3e4e997798a3 100644
--- a/sys/contrib/openzfs/module/zfs/zap.c
+++ b/sys/contrib/openzfs/module/zfs/zap.c
@@ -921,7 +921,7 @@ fzap_add_cd(zap_name_t *zn,
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
ASSERT(!zap->zap_ismicro);
- ASSERT(fzap_check(zn, integer_size, num_integers) == 0);
+ ASSERT0(fzap_check(zn, integer_size, num_integers));
err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
if (err != 0)
@@ -1304,7 +1304,7 @@ zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
int
fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
{
- int err = ENOENT;
+ int err;
zap_entry_handle_t zeh;
zap_leaf_t *l;
@@ -1386,7 +1386,7 @@ again:
}
err = zap_entry_read_name(zap, &zeh,
za->za_name_len, za->za_name);
- ASSERT(err == 0);
+ ASSERT0(err);
za->za_normalization_conflict =
zap_entry_normalization_conflict(&zeh,
@@ -1546,7 +1546,7 @@ zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
boolean_t trunc = B_FALSE;
int err = 0;
- ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nentries, ==, 0);
+ ASSERT0(zap_leaf_phys(l)->l_hdr.lh_nentries);
ASSERT3U(prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
ASSERT3U(ZAP_HASH_IDX(hash, prefix_len), ==, prefix);
@@ -1564,7 +1564,7 @@ zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
uint64_t sl_hash = ZAP_PREFIX_HASH(sl_prefix, prefix_len);
int slbit = prefix & 1;
- ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nentries, ==, 0);
+ ASSERT0(zap_leaf_phys(l)->l_hdr.lh_nentries);
/*
* Check if there is a sibling by reading ptrtbl ptrs.
diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c
index 411b1a9db5ab..ea4e3117a8b9 100644
--- a/sys/contrib/openzfs/module/zfs/zap_micro.c
+++ b/sys/contrib/openzfs/module/zfs/zap_micro.c
@@ -346,7 +346,7 @@ zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
{
zap_name_t *zn = kmem_cache_alloc(zap_name_cache, KM_SLEEP);
- ASSERT(zap->zap_normflags == 0);
+ ASSERT0(zap->zap_normflags);
zn->zn_zap = zap;
zn->zn_key_intlen = sizeof (*key);
zn->zn_key_orig = zn->zn_key_norm = key;
@@ -1876,7 +1876,7 @@ zap_cursor_serialize(zap_cursor_t *zc)
return (-1ULL);
if (zc->zc_zap == NULL)
return (zc->zc_serialized);
- ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0);
+ ASSERT0((zc->zc_hash & zap_maxcd(zc->zc_zap)));
ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
/*
@@ -1911,7 +1911,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
* we must add to the existing zc_cd, which may already
* be 1 due to the zap_cursor_advance.
*/
- ASSERT(zc->zc_hash == 0);
+ ASSERT0(zc->zc_hash);
hb = zap_hashbits(zc->zc_zap);
zc->zc_hash = zc->zc_serialized << (64 - hb);
zc->zc_cd += zc->zc_serialized >> hb;
diff --git a/sys/contrib/openzfs/module/zfs/zcp.c b/sys/contrib/openzfs/module/zfs/zcp.c
index 6960ea360b15..c6684f453e95 100644
--- a/sys/contrib/openzfs/module/zfs/zcp.c
+++ b/sys/contrib/openzfs/module/zfs/zcp.c
@@ -765,7 +765,7 @@ zcp_lua_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
return (NULL);
}
(void) memcpy(luabuf, ptr, osize);
- VERIFY3P(zcp_lua_alloc(ud, ptr, osize, 0), ==, NULL);
+ VERIFY0P(zcp_lua_alloc(ud, ptr, osize, 0));
return (luabuf);
}
}
@@ -1175,7 +1175,7 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync,
for (nvpair_t *pair = nvlist_next_nvpair(runinfo.zri_new_zvols, NULL);
pair != NULL;
pair = nvlist_next_nvpair(runinfo.zri_new_zvols, pair)) {
- zvol_create_minor(nvpair_name(pair));
+ zvol_create_minors(nvpair_name(pair));
}
fnvlist_free(runinfo.zri_new_zvols);
diff --git a/sys/contrib/openzfs/module/zfs/zfeature.c b/sys/contrib/openzfs/module/zfs/zfeature.c
index 7dfe00d42a08..0816ea134bf3 100644
--- a/sys/contrib/openzfs/module/zfs/zfeature.c
+++ b/sys/contrib/openzfs/module/zfs/zfeature.c
@@ -210,8 +210,8 @@ spa_features_check(spa_t *spa, boolean_t for_write,
za->za_name, 1, MAXPATHLEN, buf) == 0)
desc = buf;
- VERIFY(nvlist_add_string(unsup_feat,
- za->za_name, desc) == 0);
+ VERIFY0(nvlist_add_string(unsup_feat,
+ za->za_name, desc));
}
}
}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_chksum.c b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
index 5c92be21c0c8..21852bf3d865 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_chksum.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
@@ -32,9 +32,6 @@
#include <sys/blake3.h>
#include <sys/sha2.h>
-/* limit benchmarking to max 256KiB, when EdonR is slower then this: */
-#define LIMIT_PERF_MBS 300
-
typedef struct {
const char *name;
const char *impl;
@@ -52,9 +49,15 @@ typedef struct {
zio_checksum_tmpl_free_t *(free);
} chksum_stat_t;
+#define AT_STARTUP 0
+#define AT_BENCHMARK 1
+#define AT_DONE 2
+
static chksum_stat_t *chksum_stat_data = 0;
-static int chksum_stat_cnt = 0;
static kstat_t *chksum_kstat = NULL;
+static int chksum_stat_limit = AT_STARTUP;
+static int chksum_stat_cnt = 0;
+static void chksum_benchmark(void);
/*
* Sample output on i3-1005G1 System:
@@ -129,6 +132,9 @@ chksum_kstat_data(char *buf, size_t size, void *data)
static void *
chksum_kstat_addr(kstat_t *ksp, loff_t n)
{
+ /* full benchmark */
+ chksum_benchmark();
+
if (n < chksum_stat_cnt)
ksp->ks_private = (void *)(chksum_stat_data + n);
else
@@ -176,47 +182,36 @@ chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
kpreempt_enable();
run_bw = size * run_count * NANOSEC;
- run_bw /= run_time_ns; /* B/s */
+ run_bw /= run_time_ns; /* B/s */
*result = run_bw/1024/1024; /* MiB/s */
}
-#define LIMIT_INIT 0
-#define LIMIT_NEEDED 1
-#define LIMIT_NOLIMIT 2
-
static void
chksum_benchit(chksum_stat_t *cs)
{
abd_t *abd;
void *ctx = 0;
void *salt = &cs->salt.zcs_bytes;
- static int chksum_stat_limit = LIMIT_INIT;
memset(salt, 0, sizeof (cs->salt.zcs_bytes));
if (cs->init)
ctx = cs->init(&cs->salt);
+ /* benchmarks in startup mode */
+ if (chksum_stat_limit == AT_STARTUP) {
+ abd = abd_alloc_linear(1<<18, B_FALSE);
+ chksum_run(cs, abd, ctx, 5, &cs->bs256k);
+ goto done;
+ }
+
/* allocate test memory via abd linear interface */
abd = abd_alloc_linear(1<<20, B_FALSE);
+
+ /* benchmarks when requested */
chksum_run(cs, abd, ctx, 1, &cs->bs1k);
chksum_run(cs, abd, ctx, 2, &cs->bs4k);
chksum_run(cs, abd, ctx, 3, &cs->bs16k);
chksum_run(cs, abd, ctx, 4, &cs->bs64k);
- chksum_run(cs, abd, ctx, 5, &cs->bs256k);
-
- /* check if we ran on a slow cpu */
- if (chksum_stat_limit == LIMIT_INIT) {
- if (cs->bs1k < LIMIT_PERF_MBS) {
- chksum_stat_limit = LIMIT_NEEDED;
- } else {
- chksum_stat_limit = LIMIT_NOLIMIT;
- }
- }
-
- /* skip benchmarks >= 1MiB when the CPU is to slow */
- if (chksum_stat_limit == LIMIT_NEEDED)
- goto abort;
-
chksum_run(cs, abd, ctx, 6, &cs->bs1m);
abd_free(abd);
@@ -225,7 +220,7 @@ chksum_benchit(chksum_stat_t *cs)
chksum_run(cs, abd, ctx, 7, &cs->bs4m);
chksum_run(cs, abd, ctx, 8, &cs->bs16m);
-abort:
+done:
abd_free(abd);
/* free up temp memory */
@@ -243,7 +238,6 @@ chksum_benchmark(void)
/* we need the benchmark only for the kernel module */
return;
#endif
-
chksum_stat_t *cs;
uint64_t max;
uint32_t id, cbid = 0, id_save;
@@ -251,8 +245,14 @@ chksum_benchmark(void)
const zfs_impl_t *sha256 = zfs_impl_get_ops("sha256");
const zfs_impl_t *sha512 = zfs_impl_get_ops("sha512");
+ /* benchmarks are done */
+ if (chksum_stat_limit == AT_DONE)
+ return;
+
+
/* count implementations */
- chksum_stat_cnt = 2;
+ chksum_stat_cnt = 1; /* edonr */
+ chksum_stat_cnt += 1; /* skein */
chksum_stat_cnt += sha256->getcnt();
chksum_stat_cnt += sha512->getcnt();
chksum_stat_cnt += blake3->getcnt();
@@ -332,6 +332,17 @@ chksum_benchmark(void)
}
}
blake3->setid(id_save);
+
+ switch (chksum_stat_limit) {
+ case AT_STARTUP:
+ /* next time we want a full benchmark */
+ chksum_stat_limit = AT_BENCHMARK;
+ break;
+ case AT_BENCHMARK:
+ /* no further benchmarks */
+ chksum_stat_limit = AT_DONE;
+ break;
+ }
}
void
@@ -341,7 +352,7 @@ chksum_init(void)
blake3_per_cpu_ctx_init();
#endif
- /* Benchmark supported implementations */
+ /* 256KiB benchmark */
chksum_benchmark();
/* Install kstats for all implementations */
diff --git a/sys/contrib/openzfs/module/zfs/zfs_crrd.c b/sys/contrib/openzfs/module/zfs/zfs_crrd.c
new file mode 100644
index 000000000000..f9267ed41d71
--- /dev/null
+++ b/sys/contrib/openzfs/module/zfs/zfs_crrd.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2024 Klara Inc.
+ *
+ * This software was developed by
+ * Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
+ * Fred Weigel <fred.weigel@klarasystems.com>
+ * under sponsorship from Wasabi Technology, Inc. and Klara Inc.
+ */
+/*
+ * This file implements a round-robin database that stores timestamps and txg
+ * numbers. Due to limited space, we use a round-robin approach, where
+ * the oldest records are overwritten when there is no longer enough room.
+ * This is a best-effort mechanism, and the database should be treated as
+ * an approximation. Consider this before consuming it.
+ *
+ * The database is linear, meaning we assume each new entry is newer than the
+ * ones already stored. Because of this, if time is manipulated, the database
+ * will only accept records that are newer than the existing ones.
+ * (For example, jumping 10 years into the future and then back can lead to
+ * situation when for 10 years we wont write anything to database)
+ *
+ * All times stored in the database use UTC, which makes it easy to convert to
+ * and from local time.
+ *
+ * Each database holds 256 records (as defined in the `RRD_MAX_ENTRIES` macro).
+ * This limit comes from the maximum size of a ZAP object, where we store the
+ * binary blob.
+ *
+ * We've split the database into three smaller ones.
+ * The `minute database` provides high resolution (default: every 10 minutes),
+ * but only covers approximately 1.5 days. This gives a detailed view of recent
+ * activity, useful, for example, when performing a scrub of the last hour.
+ * The `daily database` records one txg per day. With 256 entries, it retains
+ * roughly 8 months of data. This allows users to scrub or analyze txgs across
+ * a range of days.
+ * The `monthly database` stores one record per month, giving approximately
+ * 21 years of history.
+ * All these calculations assume the worst-case scenario: the pool is always
+ * online and actively written to.
+ *
+ * A potential source of confusion is that the database does not store data
+ * while the pool is offline, leading to potential gaps in timeline. Also,
+ * the database contains no records from before this feature was enabled.
+ * Both, upon reflection, are expected.
+ */
+#include <sys/zfs_context.h>
+
+#include "zfs_crrd.h"
+
+rrd_data_t *
+rrd_tail_entry(rrd_t *rrd)
+{
+ size_t n;
+
+ if (rrd_len(rrd) == 0)
+ return (NULL);
+
+ if (rrd->rrd_tail == 0)
+ n = RRD_MAX_ENTRIES - 1;
+ else
+ n = rrd->rrd_tail - 1;
+
+ return (&rrd->rrd_entries[n]);
+}
+
+uint64_t
+rrd_tail(rrd_t *rrd)
+{
+ const rrd_data_t *tail;
+
+ tail = rrd_tail_entry(rrd);
+
+ return (tail == NULL ? 0 : tail->rrdd_time);
+}
+
+/*
+ * Return length of data in the rrd.
+ * rrd_get works from 0..rrd_len()-1.
+ */
+size_t
+rrd_len(rrd_t *rrd)
+{
+
+ return (rrd->rrd_length);
+}
+
+const rrd_data_t *
+rrd_entry(rrd_t *rrd, size_t i)
+{
+ size_t n;
+
+ if (i >= rrd_len(rrd)) {
+ return (0);
+ }
+
+ n = (rrd->rrd_head + i) % RRD_MAX_ENTRIES;
+ return (&rrd->rrd_entries[n]);
+}
+
+uint64_t
+rrd_get(rrd_t *rrd, size_t i)
+{
+ const rrd_data_t *data = rrd_entry(rrd, i);
+
+ return (data == NULL ? 0 : data->rrdd_txg);
+}
+
+/* Add value to database. */
+void
+rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg)
+{
+ rrd_data_t *tail;
+
+ tail = rrd_tail_entry(rrd);
+ if (tail != NULL && tail->rrdd_time == time) {
+ if (tail->rrdd_txg < txg) {
+ tail->rrdd_txg = txg;
+ } else {
+ return;
+ }
+ }
+
+ rrd->rrd_entries[rrd->rrd_tail].rrdd_time = time;
+ rrd->rrd_entries[rrd->rrd_tail].rrdd_txg = txg;
+
+ rrd->rrd_tail = (rrd->rrd_tail + 1) % RRD_MAX_ENTRIES;
+
+ if (rrd->rrd_length < RRD_MAX_ENTRIES) {
+ rrd->rrd_length++;
+ } else {
+ rrd->rrd_head = (rrd->rrd_head + 1) % RRD_MAX_ENTRIES;
+ }
+}
+
+void
+dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg)
+{
+ hrtime_t daydiff, monthdiff, minutedif;
+
+ minutedif = time - rrd_tail(&db->dbr_minutes);
+ daydiff = time - rrd_tail(&db->dbr_days);
+ monthdiff = time - rrd_tail(&db->dbr_months);
+
+ if (monthdiff >= 0 && monthdiff >= SEC2NSEC(30 * 24 * 60 * 60))
+ rrd_add(&db->dbr_months, time, txg);
+ else if (daydiff >= 0 && daydiff >= SEC2NSEC(24 * 60 * 60))
+ rrd_add(&db->dbr_days, time, txg);
+ else if (minutedif >= 0)
+ rrd_add(&db->dbr_minutes, time, txg);
+}
+
+/*
+ * We could do a binary search here, but the routine isn't frequently
+ * called and the data is small so we stick to a simple loop.
+ */
+static const rrd_data_t *
+rrd_query(rrd_t *rrd, hrtime_t tv, dbrrd_rounding_t rounding)
+{
+ const rrd_data_t *data = NULL;
+
+ for (size_t i = 0; i < rrd_len(rrd); i++) {
+ const rrd_data_t *cur = rrd_entry(rrd, i);
+
+ if (rounding == DBRRD_FLOOR) {
+ if (tv < cur->rrdd_time) {
+ break;
+ }
+ data = cur;
+ } else {
+ /* DBRRD_CEILING */
+ if (tv <= cur->rrdd_time) {
+ data = cur;
+ break;
+ }
+ }
+ }
+
+ return (data);
+}
+
+static const rrd_data_t *
+dbrrd_closest(hrtime_t tv, const rrd_data_t *r1, const rrd_data_t *r2)
+{
+
+ if (r1 == NULL)
+ return (r2);
+ if (r2 == NULL)
+ return (r1);
+
+ return (ABS(tv - r1->rrdd_time) < ABS(tv - r2->rrdd_time) ? r1 : r2);
+}
+
+uint64_t
+dbrrd_query(dbrrd_t *r, hrtime_t tv, dbrrd_rounding_t rounding)
+{
+ const rrd_data_t *data, *dm, *dd, *dy;
+
+ data = NULL;
+ dm = rrd_query(&r->dbr_minutes, tv, rounding);
+ dd = rrd_query(&r->dbr_days, tv, rounding);
+ dy = rrd_query(&r->dbr_months, tv, rounding);
+
+ data = dbrrd_closest(tv, dbrrd_closest(tv, dd, dm), dy);
+
+ return (data == NULL ? 0 : data->rrdd_txg);
+}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_fuid.c b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
index 10a6d289fbf8..2af1efe82e62 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_fuid.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
@@ -112,8 +112,7 @@ zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
uint64_t fuid_size;
ASSERT(fuid_obj != 0);
- VERIFY(0 == dmu_bonus_hold(os, fuid_obj,
- FTAG, &db));
+ VERIFY0(dmu_bonus_hold(os, fuid_obj, FTAG, &db));
fuid_size = *(uint64_t *)db->db_data;
dmu_buf_rele(db, FTAG);
@@ -125,22 +124,21 @@ zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
int i;
packed = kmem_alloc(fuid_size, KM_SLEEP);
- VERIFY(dmu_read(os, fuid_obj, 0,
- fuid_size, packed, DMU_READ_PREFETCH) == 0);
- VERIFY(nvlist_unpack(packed, fuid_size,
- &nvp, 0) == 0);
- VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
- &fuidnvp, &count) == 0);
+ VERIFY0(dmu_read(os, fuid_obj, 0,
+ fuid_size, packed, DMU_READ_PREFETCH));
+ VERIFY0(nvlist_unpack(packed, fuid_size, &nvp, 0));
+ VERIFY0(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
+ &fuidnvp, &count));
for (i = 0; i != count; i++) {
fuid_domain_t *domnode;
const char *domain;
uint64_t idx;
- VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
- &domain) == 0);
- VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
- &idx) == 0);
+ VERIFY0(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
+ &domain));
+ VERIFY0(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
+ &idx));
domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
@@ -246,35 +244,33 @@ zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
&zfsvfs->z_fuid_obj, tx) == 0);
}
- VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP));
numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
- VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
- domnode->f_idx) == 0);
- VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
- VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
- domnode->f_ksid->kd_name) == 0);
+ VERIFY0(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP));
+ VERIFY0(nvlist_add_uint64(fuids[i], FUID_IDX,
+ domnode->f_idx));
+ VERIFY0(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0));
+ VERIFY0(nvlist_add_string(fuids[i], FUID_DOMAIN,
+ domnode->f_ksid->kd_name));
}
fnvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
(const nvlist_t * const *)fuids, numnodes);
for (i = 0; i != numnodes; i++)
nvlist_free(fuids[i]);
kmem_free(fuids, numnodes * sizeof (void *));
- VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
+ VERIFY0(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR));
packed = kmem_alloc(nvsize, KM_SLEEP);
- VERIFY(nvlist_pack(nvp, &packed, &nvsize,
- NV_ENCODE_XDR, KM_SLEEP) == 0);
+ VERIFY0(nvlist_pack(nvp, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP));
nvlist_free(nvp);
zfsvfs->z_fuid_size = nvsize;
dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
zfsvfs->z_fuid_size, packed, tx);
kmem_free(packed, zfsvfs->z_fuid_size);
- VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
- FTAG, &db));
+ VERIFY0(dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, FTAG, &db));
dmu_buf_will_dirty(db, tx);
*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
dmu_buf_rele(db, FTAG);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index ebb1cfd07125..121b966b9864 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -1493,7 +1493,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
goto pool_props_bad;
(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
- VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP));
error = zfs_fill_zplprops_root(version, rootprops,
zplprops, NULL);
if (error != 0)
@@ -1704,6 +1704,8 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc)
static const zfs_ioc_key_t zfs_keys_pool_scrub[] = {
{"scan_type", DATA_TYPE_UINT64, 0},
{"scan_command", DATA_TYPE_UINT64, 0},
+ {"scan_date_start", DATA_TYPE_UINT64, ZK_OPTIONAL},
+ {"scan_date_end", DATA_TYPE_UINT64, ZK_OPTIONAL},
};
static int
@@ -1712,6 +1714,7 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
spa_t *spa;
int error;
uint64_t scan_type, scan_cmd;
+ uint64_t date_start, date_end;
if (nvlist_lookup_uint64(innvl, "scan_type", &scan_type) != 0)
return (SET_ERROR(EINVAL));
@@ -1721,6 +1724,11 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
if (scan_cmd >= POOL_SCRUB_FLAGS_END)
return (SET_ERROR(EINVAL));
+ if (nvlist_lookup_uint64(innvl, "scan_date_start", &date_start) != 0)
+ date_start = 0;
+ if (nvlist_lookup_uint64(innvl, "scan_date_end", &date_end) != 0)
+ date_end = 0;
+
if ((error = spa_open(poolname, &spa, FTAG)) != 0)
return (error);
@@ -1732,7 +1740,24 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
error = spa_scan_range(spa, scan_type,
spa_get_last_scrubbed_txg(spa), 0);
} else {
- error = spa_scan(spa, scan_type);
+ uint64_t txg_start, txg_end;
+
+ txg_start = txg_end = 0;
+ if (date_start != 0 || date_end != 0) {
+ mutex_enter(&spa->spa_txg_log_time_lock);
+ if (date_start != 0) {
+ txg_start = dbrrd_query(&spa->spa_txg_log_time,
+ date_start, DBRRD_FLOOR);
+ }
+
+ if (date_end != 0) {
+ txg_end = dbrrd_query(&spa->spa_txg_log_time,
+ date_end, DBRRD_CEILING);
+ }
+ mutex_exit(&spa->spa_txg_log_time_lock);
+ }
+
+ error = spa_scan_range(spa, scan_type, txg_start, txg_end);
}
spa_close(spa, FTAG);
@@ -2220,7 +2245,7 @@ nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
*/
if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
return (error);
- VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
+ VERIFY0(nvlist_add_uint64(props, zfs_prop_to_name(prop), value));
return (0);
}
@@ -2255,7 +2280,7 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
dmu_objset_type(os) == DMU_OST_ZFS) {
nvlist_t *nv;
- VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP));
if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
(err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
(err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
@@ -2458,7 +2483,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
nvlist_t *attrs;
- VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
+ VERIFY0(nvpair_value_nvlist(pair, &attrs));
if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
&pair) != 0)
return (SET_ERROR(EINVAL));
@@ -2513,9 +2538,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
nvlist_t *attrs;
- VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
- VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
- &pair) == 0);
+ VERIFY0(nvpair_value_nvlist(pair, &attrs));
+ VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair));
}
/* all special properties are numeric except for keylocation */
@@ -2907,14 +2931,14 @@ props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
{
nvpair_t *pair;
- VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP));
pair = NULL;
while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
if (nvlist_exists(skipped, nvpair_name(pair)))
continue;
- VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
+ VERIFY0(nvlist_add_nvpair(*newprops, pair));
}
}
@@ -3039,11 +3063,11 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
switch (type) {
case PROP_TYPE_STRING:
- VERIFY(0 == nvlist_add_string(dummy, propname, ""));
+ VERIFY0(nvlist_add_string(dummy, propname, ""));
break;
case PROP_TYPE_NUMBER:
case PROP_TYPE_INDEX:
- VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
+ VERIFY0(nvlist_add_uint64(dummy, propname, 0));
break;
default:
err = SET_ERROR(EINVAL);
@@ -3429,14 +3453,14 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
/*
* Put the version in the zplprops
*/
- VERIFY(nvlist_add_uint64(zplprops,
- zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
+ VERIFY0(nvlist_add_uint64(zplprops,
+ zfs_prop_to_name(ZFS_PROP_VERSION), zplver));
if (norm == ZFS_PROP_UNDEFINED &&
(error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
return (error);
- VERIFY(nvlist_add_uint64(zplprops,
- zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
+ VERIFY0(nvlist_add_uint64(zplprops,
+ zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm));
/*
* If we're normalizing, names must always be valid UTF-8 strings.
@@ -3446,55 +3470,55 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
if (u8 == ZFS_PROP_UNDEFINED &&
(error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
return (error);
- VERIFY(nvlist_add_uint64(zplprops,
- zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
+ VERIFY0(nvlist_add_uint64(zplprops,
+ zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8));
if (sense == ZFS_PROP_UNDEFINED &&
(error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
return (error);
- VERIFY(nvlist_add_uint64(zplprops,
- zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
+ VERIFY0(nvlist_add_uint64(zplprops,
+ zfs_prop_to_name(ZFS_PROP_CASE), sense));
if (duq == ZFS_PROP_UNDEFINED &&
(error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSERQUOTA, &duq)) != 0)
return (error);
- VERIFY(nvlist_add_uint64(zplprops,
- zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), duq) == 0);
+ VERIFY0(nvlist_add_uint64(zplprops,
+ zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), duq));
if (dgq == ZFS_PROP_UNDEFINED &&
(error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPQUOTA,
&dgq)) != 0)
return (error);
- VERIFY(nvlist_add_uint64(zplprops,
- zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), dgq) == 0);
+ VERIFY0(nvlist_add_uint64(zplprops,
+ zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), dgq));
if (dpq == ZFS_PROP_UNDEFINED &&
(error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTQUOTA,
&dpq)) != 0)
return (error);
- VERIFY(nvlist_add_uint64(zplprops,
- zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), dpq) == 0);
+ VERIFY0(nvlist_add_uint64(zplprops,
+ zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), dpq));
if (duoq == ZFS_PROP_UNDEFINED &&
(error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSEROBJQUOTA,
&duoq)) != 0)
return (error);
- VERIFY(nvlist_add_uint64(zplprops,
- zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), duoq) == 0);
+ VERIFY0(nvlist_add_uint64(zplprops,
+ zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), duoq));
if (dgoq == ZFS_PROP_UNDEFINED &&
(error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPOBJQUOTA,
&dgoq)) != 0)
return (error);
- VERIFY(nvlist_add_uint64(zplprops,
- zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), dgoq) == 0);
+ VERIFY0(nvlist_add_uint64(zplprops,
+ zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), dgoq));
if (dpoq == ZFS_PROP_UNDEFINED &&
(error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTOBJQUOTA,
&dpoq)) != 0)
return (error);
- VERIFY(nvlist_add_uint64(zplprops,
- zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), dpoq) == 0);
+ VERIFY0(nvlist_add_uint64(zplprops,
+ zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), dpoq));
if (is_ci)
*is_ci = (sense == ZFS_CASE_INSENSITIVE);
@@ -3643,8 +3667,8 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
* file system creation, so go figure them out
* now.
*/
- VERIFY(nvlist_alloc(&zct.zct_zplprops,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(&zct.zct_zplprops,
+ NV_UNIQUE_NAME, KM_SLEEP));
error = zfs_fill_zplprops(fsname, nvprops,
zct.zct_zplprops, &is_insensitive);
if (error != 0) {
@@ -4891,9 +4915,8 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
* format.
*/
nvlist_t *attrs;
- VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
- VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
- &pair) == 0);
+ VERIFY0(nvpair_value_nvlist(pair, &attrs));
+ VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair));
}
/*
@@ -5000,15 +5023,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
}
break;
- case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
- /*
- * This property could require the allocation classes
- * feature to be active for setting, however we allow
- * it so that tests of settable properties succeed.
- * The CLI will issue a warning in this case.
- */
- break;
-
case ZFS_PROP_SHARESMB:
if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
return (SET_ERROR(ENOTSUP));
@@ -5087,7 +5101,7 @@ zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
if (props == NULL)
return (0);
- VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP));
zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
(void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
@@ -5099,9 +5113,8 @@ zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
sizeof (zc->zc_value));
if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
(err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
- VERIFY(nvlist_remove_nvpair(props, pair) == 0);
- VERIFY(nvlist_add_int32(errors,
- zc->zc_value, err) == 0);
+ VERIFY0(nvlist_remove_nvpair(props, pair));
+ VERIFY0(nvlist_add_int32(errors, zc->zc_value, err));
}
pair = next_pair;
}
@@ -5111,7 +5124,7 @@ zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
nvlist_free(errors);
errors = NULL;
} else {
- VERIFY(nvpair_value_int32(pair, &rv) == 0);
+ VERIFY0(nvpair_value_int32(pair, &rv));
}
if (errlist == NULL)
@@ -5128,16 +5141,14 @@ propval_equals(nvpair_t *p1, nvpair_t *p2)
if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
/* dsl_prop_get_all_impl() format */
nvlist_t *attrs;
- VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
- VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
- &p1) == 0);
+ VERIFY0(nvpair_value_nvlist(p1, &attrs));
+ VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p1));
}
if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
nvlist_t *attrs;
- VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
- VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
- &p2) == 0);
+ VERIFY0(nvpair_value_nvlist(p2, &attrs));
+ VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p2));
}
if (nvpair_type(p1) != nvpair_type(p2))
@@ -5146,14 +5157,14 @@ propval_equals(nvpair_t *p1, nvpair_t *p2)
if (nvpair_type(p1) == DATA_TYPE_STRING) {
const char *valstr1, *valstr2;
- VERIFY(nvpair_value_string(p1, &valstr1) == 0);
- VERIFY(nvpair_value_string(p2, &valstr2) == 0);
+ VERIFY0(nvpair_value_string(p1, &valstr1));
+ VERIFY0(nvpair_value_string(p2, &valstr2));
return (strcmp(valstr1, valstr2) == 0);
} else {
uint64_t intval1, intval2;
- VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
- VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
+ VERIFY0(nvpair_value_uint64(p1, &intval1));
+ VERIFY0(nvpair_value_uint64(p2, &intval2));
return (intval1 == intval2);
}
}
@@ -5221,7 +5232,7 @@ extract_delay_props(nvlist_t *props)
};
int i;
- VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY0(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP));
for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
nvp = nvlist_next_nvpair(props, nvp)) {
@@ -5237,8 +5248,8 @@ extract_delay_props(nvlist_t *props)
}
if (delayable[i] != 0) {
tmp = nvlist_prev_nvpair(props, nvp);
- VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
- VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
+ VERIFY0(nvlist_add_nvpair(delayprops, nvp));
+ VERIFY0(nvlist_remove_nvpair(props, nvp));
nvp = tmp;
}
}
@@ -5469,15 +5480,15 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin,
* using ASSERT() will be just like a VERIFY.
*/
if (recv_delayprops != NULL) {
- ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
+ ASSERT0(nvlist_merge(recvprops, recv_delayprops, 0));
nvlist_free(recv_delayprops);
}
if (local_delayprops != NULL) {
- ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
+ ASSERT0(nvlist_merge(localprops, local_delayprops, 0));
nvlist_free(local_delayprops);
}
if (inherited_delayprops != NULL) {
- ASSERT(nvlist_merge(localprops, inherited_delayprops, 0) == 0);
+ ASSERT0(nvlist_merge(localprops, inherited_delayprops, 0));
nvlist_free(inherited_delayprops);
}
*read_bytes = off - noff;
@@ -7326,8 +7337,8 @@ zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
ASSERT3U(ioc, <, ZFS_IOC_LAST);
- ASSERT3P(vec->zvec_legacy_func, ==, NULL);
- ASSERT3P(vec->zvec_func, ==, NULL);
+ ASSERT0P(vec->zvec_legacy_func);
+ ASSERT0P(vec->zvec_func);
vec->zvec_legacy_func = func;
vec->zvec_secpolicy = secpolicy;
@@ -7350,8 +7361,8 @@ zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
ASSERT3U(ioc, <, ZFS_IOC_LAST);
- ASSERT3P(vec->zvec_legacy_func, ==, NULL);
- ASSERT3P(vec->zvec_func, ==, NULL);
+ ASSERT0P(vec->zvec_legacy_func);
+ ASSERT0P(vec->zvec_func);
/* if we are logging, the name must be valid */
ASSERT(!allow_log || namecheck != NO_NAME);
@@ -8132,7 +8143,7 @@ zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
spa_t *spa;
nvlist_t *lognv = NULL;
- ASSERT(vec->zvec_legacy_func == NULL);
+ ASSERT0P(vec->zvec_legacy_func);
/*
* Add the innvl to the lognv before calling the func,
diff --git a/sys/contrib/openzfs/module/zfs/zfs_log.c b/sys/contrib/openzfs/module/zfs/zfs_log.c
index 2ce25b72b288..ea17e049279f 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_log.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_log.c
@@ -607,8 +607,6 @@ zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
* called as soon as the write is on stable storage (be it via a DMU sync or a
* ZIL commit).
*/
-static uint_t zfs_immediate_write_sz = 32768;
-
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t resid, boolean_t commit,
@@ -622,19 +620,12 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp)) {
if (callback != NULL)
- callback(callback_data);
+ callback(callback_data, 0);
return;
}
- if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT || o_direct)
- write_state = WR_INDIRECT;
- else if (!spa_has_slogs(zilog->zl_spa) &&
- resid >= zfs_immediate_write_sz)
- write_state = WR_INDIRECT;
- else if (commit)
- write_state = WR_COPIED;
- else
- write_state = WR_NEED_COPY;
+ write_state = zil_write_state(zilog, resid, blocksize, o_direct,
+ commit);
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
sizeof (gen));
@@ -672,7 +663,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
DMU_KEEP_CACHING);
DB_DNODE_EXIT(db);
if (err != 0) {
- zil_itx_destroy(itx);
+ zil_itx_destroy(itx, 0);
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
wr_state = WR_NEED_COPY;
@@ -938,6 +929,3 @@ zfs_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp,
len -= partlen;
}
}
-
-ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, UINT, ZMOD_RW,
- "Largest data block to write to zil");
diff --git a/sys/contrib/openzfs/module/zfs/zfs_quota.c b/sys/contrib/openzfs/module/zfs/zfs_quota.c
index b8fe512d4f09..2e91ccc27d6d 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_quota.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_quota.c
@@ -374,7 +374,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
if (*objp == 0) {
*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
DMU_OT_NONE, 0, tx);
- VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
+ VERIFY0(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
}
mutex_exit(&zfsvfs->z_lock);
@@ -386,7 +386,7 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
} else {
err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
}
- ASSERT(err == 0);
+ ASSERT0(err);
if (fuid_dirtied)
zfs_fuid_sync(zfsvfs, tx);
dmu_tx_commit(tx);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_rlock.c b/sys/contrib/openzfs/module/zfs/zfs_rlock.c
index 53eb3ef1b66e..4035baff77d6 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_rlock.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_rlock.c
@@ -666,7 +666,7 @@ zfs_rangelock_reduce(zfs_locked_range_t *lr, uint64_t off, uint64_t len)
/* Ensure there are no other locks */
ASSERT3U(avl_numnodes(&rl->rl_tree), ==, 1);
- ASSERT3U(lr->lr_offset, ==, 0);
+ ASSERT0(lr->lr_offset);
ASSERT3U(lr->lr_type, ==, RL_WRITER);
ASSERT(!lr->lr_proxy);
ASSERT3U(lr->lr_length, ==, UINT64_MAX);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_sa.c b/sys/contrib/openzfs/module/zfs/zfs_sa.c
index 59b6ae4e4203..8b4fc6fd7fbd 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_sa.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_sa.c
@@ -169,7 +169,7 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
ASSERT(MUTEX_HELD(&zp->z_lock));
VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
if (zp->z_is_sa)
- VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
+ VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
&xoap->xoa_av_scanstamp,
sizeof (xoap->xoa_av_scanstamp), tx));
else {
@@ -181,12 +181,12 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
len = sizeof (xoap->xoa_av_scanstamp) +
ZFS_OLD_ZNODE_PHYS_SIZE;
if (len > doi.doi_bonus_size)
- VERIFY(dmu_set_bonus(db, len, tx) == 0);
+ VERIFY0(dmu_set_bonus(db, len, tx));
(void) memcpy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp));
zp->z_pflags |= ZFS_BONUS_SCANSTAMP;
- VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+ VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
&zp->z_pflags, sizeof (uint64_t), tx));
}
}
@@ -286,7 +286,7 @@ zfs_sa_set_xattr(znode_t *zp, const char *name, const void *value, size_t vsize)
dmu_tx_commit(tx);
if (logsaxattr && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
- zil_commit(zilog, 0);
+ error = zil_commit(zilog, 0);
}
out_free:
vmem_free(obj, size);
@@ -427,11 +427,10 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP;
}
- VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
- VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs,
- count, tx) == 0);
+ VERIFY0(dmu_set_bonustype(db, DMU_OT_SA, tx));
+ VERIFY0(sa_replace_all_by_template_locked(hdl, sa_attrs, count, tx));
if (znode_acl.z_acl_extern_obj)
- VERIFY(0 == dmu_object_free(zfsvfs->z_os,
+ VERIFY0(dmu_object_free(zfsvfs->z_os,
znode_acl.z_acl_extern_obj, tx));
zp->z_is_sa = B_TRUE;
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index 656ca4dc22ff..7bb9ba57c69e 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -27,6 +27,7 @@
* Copyright 2017 Nexenta Systems, Inc.
* Copyright (c) 2021, 2022 by Pawel Jakub Dawidek
* Copyright (c) 2025, Rob Norris <robn@despairlabs.com>
+ * Copyright (c) 2025, Klara, Inc.
*/
/* Portions Copyright 2007 Jeremy Teo */
@@ -49,6 +50,7 @@
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_crypt.h>
+#include <sys/dsl_dataset.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/dbuf.h>
@@ -67,13 +69,14 @@
int zfs_bclone_enabled = 1;
/*
- * When set zfs_clone_range() waits for dirty data to be written to disk.
- * This allows the clone operation to reliably succeed when a file is modified
- * and then immediately cloned. For small files this may be slower than making
- * a copy of the file and is therefore not the default. However, in certain
- * scenarios this behavior may be desirable so a tunable is provided.
+ * When set to 1 the FICLONE and FICLONERANGE ioctls will wait for any dirty
+ * data to be written to disk before proceeding. This ensures that the clone
+ * operation reliably succeeds, even if a file is modified and then immediately
+ * cloned. Note that for small files this may be slower than simply copying
+ * the file. When set to 0 the clone operation will immediately fail if it
+ * encounters any dirty blocks. By default waiting is enabled.
*/
-int zfs_bclone_wait_dirty = 0;
+int zfs_bclone_wait_dirty = 1;
/*
* Enable Direct I/O. If this setting is 0, then all I/O requests will be
@@ -114,9 +117,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
return (error);
- atomic_inc_32(&zp->z_sync_writes_cnt);
- zil_commit(zfsvfs->z_log, zp->z_id);
- atomic_dec_32(&zp->z_sync_writes_cnt);
+ error = zil_commit(zfsvfs->z_log, zp->z_id);
zfs_exit(zfsvfs, FTAG);
}
return (error);
@@ -375,8 +376,13 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
frsync = !!(ioflag & FRSYNC);
#endif
if (zfsvfs->z_log &&
- (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
- zil_commit(zfsvfs->z_log, zp->z_id);
+ (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) {
+ error = zil_commit(zfsvfs->z_log, zp->z_id);
+ if (error != 0) {
+ zfs_exit(zfsvfs, FTAG);
+ return (error);
+ }
+ }
/*
* Lock the range against changes.
@@ -1074,8 +1080,13 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
return (error);
}
- if (commit)
- zil_commit(zilog, zp->z_id);
+ if (commit) {
+ error = zil_commit(zilog, zp->z_id);
+ if (error != 0) {
+ zfs_exit(zfsvfs, FTAG);
+ return (error);
+ }
+ }
int64_t nwritten = start_resid - zfs_uio_resid(uio);
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
@@ -1102,13 +1113,21 @@ zfs_rewrite(znode_t *zp, uint64_t off, uint64_t len, uint64_t flags,
{
int error;
- if (flags != 0 || arg != 0)
+ if ((flags & ~ZFS_REWRITE_PHYSICAL) != 0 || arg != 0)
return (SET_ERROR(EINVAL));
zfsvfs_t *zfsvfs = ZTOZSB(zp);
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
return (error);
+ /* Check if physical rewrite is allowed */
+ spa_t *spa = zfsvfs->z_os->os_spa;
+ if ((flags & ZFS_REWRITE_PHYSICAL) &&
+ !spa_feature_is_enabled(spa, SPA_FEATURE_PHYSICAL_REWRITE)) {
+ zfs_exit(zfsvfs, FTAG);
+ return (SET_ERROR(ENOTSUP));
+ }
+
if (zfs_is_readonly(zfsvfs)) {
zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EROFS));
@@ -1196,7 +1215,10 @@ zfs_rewrite(znode_t *zp, uint64_t off, uint64_t len, uint64_t flags,
if (dmu_buf_is_dirty(dbp[i], tx))
continue;
nw += dbp[i]->db_size;
- dmu_buf_will_dirty(dbp[i], tx);
+ if (flags & ZFS_REWRITE_PHYSICAL)
+ dmu_buf_will_rewrite(dbp[i], tx);
+ else
+ dmu_buf_will_dirty(dbp[i], tx);
}
dmu_buf_rele_array(dbp, numbufs, FTAG);
@@ -1249,8 +1271,8 @@ zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
zilog = zfsvfs->z_log;
error = zfs_setacl(zp, vsecp, skipaclchk, cr);
- if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
- zil_commit(zilog, 0);
+ if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ error = zil_commit(zilog, 0);
zfs_exit(zfsvfs, FTAG);
return (error);
@@ -1935,7 +1957,7 @@ unlock:
ZFS_ACCESSTIME_STAMP(inzfsvfs, inzp);
if (outos->os_sync == ZFS_SYNC_ALWAYS) {
- zil_commit(zilog, outzp->z_id);
+ error = zil_commit(zilog, outzp->z_id);
}
*inoffp += done;
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index 00059b2c6de0..31b59c55f17b 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -24,6 +24,7 @@
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright (c) 2018 Datto Inc.
+ * Copyright (c) 2025, Klara, Inc.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -103,6 +104,7 @@ static zil_kstat_values_t zil_stats = {
{ "zil_commit_error_count", KSTAT_DATA_UINT64 },
{ "zil_commit_stall_count", KSTAT_DATA_UINT64 },
{ "zil_commit_suspend_count", KSTAT_DATA_UINT64 },
+ { "zil_commit_crash_count", KSTAT_DATA_UINT64 },
{ "zil_itx_count", KSTAT_DATA_UINT64 },
{ "zil_itx_indirect_count", KSTAT_DATA_UINT64 },
{ "zil_itx_indirect_bytes", KSTAT_DATA_UINT64 },
@@ -145,7 +147,7 @@ static uint64_t zil_slog_bulk = 64 * 1024 * 1024;
static kmem_cache_t *zil_lwb_cache;
static kmem_cache_t *zil_zcw_cache;
-static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
+static int zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
static itx_t *zil_itx_clone(itx_t *oitx);
static uint64_t zil_max_waste_space(zilog_t *zilog);
@@ -367,6 +369,7 @@ zil_sums_init(zil_sums_t *zs)
wmsum_init(&zs->zil_commit_error_count, 0);
wmsum_init(&zs->zil_commit_stall_count, 0);
wmsum_init(&zs->zil_commit_suspend_count, 0);
+ wmsum_init(&zs->zil_commit_crash_count, 0);
wmsum_init(&zs->zil_itx_count, 0);
wmsum_init(&zs->zil_itx_indirect_count, 0);
wmsum_init(&zs->zil_itx_indirect_bytes, 0);
@@ -392,6 +395,7 @@ zil_sums_fini(zil_sums_t *zs)
wmsum_fini(&zs->zil_commit_error_count);
wmsum_fini(&zs->zil_commit_stall_count);
wmsum_fini(&zs->zil_commit_suspend_count);
+ wmsum_fini(&zs->zil_commit_crash_count);
wmsum_fini(&zs->zil_itx_count);
wmsum_fini(&zs->zil_itx_indirect_count);
wmsum_fini(&zs->zil_itx_indirect_bytes);
@@ -422,6 +426,8 @@ zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums)
wmsum_value(&zil_sums->zil_commit_stall_count);
zs->zil_commit_suspend_count.value.ui64 =
wmsum_value(&zil_sums->zil_commit_suspend_count);
+ zs->zil_commit_crash_count.value.ui64 =
+ wmsum_value(&zil_sums->zil_commit_crash_count);
zs->zil_itx_count.value.ui64 =
wmsum_value(&zil_sums->zil_itx_count);
zs->zil_itx_indirect_count.value.ui64 =
@@ -589,7 +595,7 @@ zil_clear_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
* that we rewind to is invalid. Thus, we return -1 so
* zil_parse() doesn't attempt to read it.
*/
- if (BP_GET_LOGICAL_BIRTH(bp) >= first_txg)
+ if (BP_GET_BIRTH(bp) >= first_txg)
return (-1);
if (zil_bp_tree_add(zilog, bp) != 0)
@@ -615,7 +621,7 @@ zil_claim_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx,
* Claim log block if not already committed and not already claimed.
* If tx == NULL, just verify that the block is claimable.
*/
- if (BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) < first_txg ||
+ if (BP_IS_HOLE(bp) || BP_GET_BIRTH(bp) < first_txg ||
zil_bp_tree_add(zilog, bp) != 0)
return (0);
@@ -640,7 +646,7 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg)
* waited for all writes to be stable first), so it is semantically
* correct to declare this the end of the log.
*/
- if (BP_GET_LOGICAL_BIRTH(&lr->lr_blkptr) >= first_txg) {
+ if (BP_GET_BIRTH(&lr->lr_blkptr) >= first_txg) {
error = zil_read_log_data(zilog, lr, NULL);
if (error != 0)
return (error);
@@ -687,7 +693,7 @@ zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx,
* just in case lets be safe and just stop here now instead of
* corrupting the pool.
*/
- if (BP_GET_BIRTH(bp) >= first_txg)
+ if (BP_GET_PHYSICAL_BIRTH(bp) >= first_txg)
return (SET_ERROR(ENOENT));
/*
@@ -742,7 +748,7 @@ zil_free_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t claim_txg)
/*
* If we previously claimed it, we need to free it.
*/
- if (BP_GET_LOGICAL_BIRTH(bp) >= claim_txg &&
+ if (BP_GET_BIRTH(bp) >= claim_txg &&
zil_bp_tree_add(zilog, bp) == 0 && !BP_IS_HOLE(bp)) {
zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
}
@@ -864,9 +870,9 @@ zil_free_lwb(zilog_t *zilog, lwb_t *lwb)
ASSERT(MUTEX_HELD(&zilog->zl_lock));
ASSERT(lwb->lwb_state == LWB_STATE_NEW ||
lwb->lwb_state == LWB_STATE_FLUSH_DONE);
- ASSERT3P(lwb->lwb_child_zio, ==, NULL);
- ASSERT3P(lwb->lwb_write_zio, ==, NULL);
- ASSERT3P(lwb->lwb_root_zio, ==, NULL);
+ ASSERT0P(lwb->lwb_child_zio);
+ ASSERT0P(lwb->lwb_write_zio);
+ ASSERT0P(lwb->lwb_root_zio);
ASSERT3U(lwb->lwb_alloc_txg, <=, spa_syncing_txg(zilog->zl_spa));
ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa));
VERIFY(list_is_empty(&lwb->lwb_itxs));
@@ -991,8 +997,8 @@ zil_create(zilog_t *zilog)
*/
txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
- ASSERT(zh->zh_claim_txg == 0);
- ASSERT(zh->zh_replay_seq == 0);
+ ASSERT0(zh->zh_claim_txg);
+ ASSERT0(zh->zh_replay_seq);
blk = zh->zh_log;
@@ -1104,7 +1110,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
zilog->zl_keep_first = keep_first;
if (!list_is_empty(&zilog->zl_lwb_list)) {
- ASSERT(zh->zh_claim_txg == 0);
+ ASSERT0(zh->zh_claim_txg);
VERIFY(!keep_first);
while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) {
if (lwb->lwb_buf != NULL)
@@ -1250,7 +1256,7 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx)
blkptr_t *bp;
int error;
- ASSERT(tx == NULL);
+ ASSERT0P(tx);
error = dmu_objset_from_ds(ds, &os);
if (error != 0) {
@@ -1351,7 +1357,7 @@ zil_commit_waiter_link_lwb(zil_commit_waiter_t *zcw, lwb_t *lwb)
ASSERT(!list_link_active(&zcw->zcw_node));
list_insert_tail(&lwb->lwb_waiters, zcw);
- ASSERT3P(zcw->zcw_lwb, ==, NULL);
+ ASSERT0P(zcw->zcw_lwb);
zcw->zcw_lwb = lwb;
}
@@ -1365,7 +1371,7 @@ zil_commit_waiter_link_nolwb(zil_commit_waiter_t *zcw, list_t *nolwb)
{
ASSERT(!list_link_active(&zcw->zcw_node));
list_insert_tail(nolwb, zcw);
- ASSERT3P(zcw->zcw_lwb, ==, NULL);
+ ASSERT0P(zcw->zcw_lwb);
}
void
@@ -1482,7 +1488,7 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
}
while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
- zil_itx_destroy(itx);
+ zil_itx_destroy(itx, 0);
while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
mutex_enter(&zcw->zcw_lock);
@@ -1895,7 +1901,7 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, lwb_state_t state)
/*
* Finalize previously closed block and issue the write zio.
*/
-static void
+static int
zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
{
spa_t *spa = zilog->zl_spa;
@@ -1909,8 +1915,13 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
/* Actually fill the lwb with the data. */
for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
- itx = list_next(&lwb->lwb_itxs, itx))
- zil_lwb_commit(zilog, lwb, itx);
+ itx = list_next(&lwb->lwb_itxs, itx)) {
+ error = zil_lwb_commit(zilog, lwb, itx);
+ if (error != 0) {
+ ASSERT3U(error, ==, ESHUTDOWN);
+ return (error);
+ }
+ }
lwb->lwb_nused = lwb->lwb_nfilled;
ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax);
@@ -1928,7 +1939,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
lwb->lwb_state = LWB_STATE_READY;
if (BP_IS_HOLE(&lwb->lwb_blk) && lwb->lwb_error == 0) {
mutex_exit(&zilog->zl_lock);
- return;
+ return (0);
}
mutex_exit(&zilog->zl_lock);
@@ -1997,7 +2008,7 @@ next_lwb:
&slog);
}
if (error == 0) {
- ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), ==, txg);
+ ASSERT3U(BP_GET_BIRTH(bp), ==, txg);
BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 :
ZIO_CHECKSUM_ZILOG);
bp->blk_cksum = lwb->lwb_blk.blk_cksum;
@@ -2065,6 +2076,8 @@ next_lwb:
lwb = nlwb;
if (lwb)
goto next_lwb;
+
+ return (0);
}
/*
@@ -2095,6 +2108,19 @@ zil_max_waste_space(zilog_t *zilog)
*/
static uint_t zil_maxcopied = 7680;
+/*
+ * Largest write size to store the data directly into ZIL.
+ */
+uint_t zfs_immediate_write_sz = 32768;
+
+/*
+ * When enabled and blocks go to normal vdev, treat special vdevs as SLOG,
+ * writing data to ZIL (WR_COPIED/WR_NEED_COPY). Disabling this forces the
+ * indirect writes (WR_INDIRECT) to preserve special vdev throughput and
+ * endurance, likely at the cost of normal vdev latency.
+ */
+int zil_special_is_slog = 1;
+
uint64_t
zil_max_copied_data(zilog_t *zilog)
{
@@ -2102,6 +2128,46 @@ zil_max_copied_data(zilog_t *zilog)
return (MIN(max_data, zil_maxcopied));
}
+/*
+ * Determine the appropriate write state for ZIL transactions based on
+ * pool configuration, data placement, write size, and logbias settings.
+ */
+itx_wr_state_t
+zil_write_state(zilog_t *zilog, uint64_t size, uint32_t blocksize,
+ boolean_t o_direct, boolean_t commit)
+{
+ if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT || o_direct)
+ return (WR_INDIRECT);
+
+ /*
+ * Don't use indirect for too small writes to reduce overhead.
+ * Don't use indirect if written less than a half of a block if
+ * we are going to commit it immediately, since next write might
+ * rewrite the same block again, causing inflation. If commit
+ * is not planned, then next writes might coalesce, and so the
+ * indirect may be perfect.
+ */
+ boolean_t indirect = (size >= zfs_immediate_write_sz &&
+ (size >= blocksize / 2 || !commit));
+
+ if (spa_has_slogs(zilog->zl_spa)) {
+ /* Dedicated slogs: never use indirect */
+ indirect = B_FALSE;
+ } else if (spa_has_special(zilog->zl_spa)) {
+ /* Special vdevs: only when beneficial */
+ boolean_t on_special = (blocksize <=
+ zilog->zl_os->os_zpl_special_smallblock);
+ indirect &= (on_special || !zil_special_is_slog);
+ }
+
+ if (indirect)
+ return (WR_INDIRECT);
+ else if (commit)
+ return (WR_COPIED);
+ else
+ return (WR_NEED_COPY);
+}
+
static uint64_t
zil_itx_record_size(itx_t *itx)
{
@@ -2255,11 +2321,13 @@ cont:
return (lwb);
}
+static void zil_crash(zilog_t *zilog);
+
/*
* Fill the actual transaction data into the lwb, following zil_lwb_assign().
* Does not require locking.
*/
-static void
+static int
zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
{
lr_t *lr, *lrb;
@@ -2271,7 +2339,7 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
lrw = (lr_write_t *)lr;
if (lr->lrc_txtype == TX_COMMIT)
- return;
+ return (0);
reclen = lr->lrc_reclen;
dlen = zil_itx_data_size(itx);
@@ -2357,16 +2425,35 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
". Falling back to txg_wait_synced().",
error);
zfs_fallthrough;
- case EIO:
- txg_wait_synced(zilog->zl_dmu_pool,
- lr->lrc_txg);
+ case EIO: {
+ int error = txg_wait_synced_flags(
+ zilog->zl_dmu_pool,
+ lr->lrc_txg, TXG_WAIT_SUSPEND);
+ if (error != 0) {
+ ASSERT3U(error, ==, ESHUTDOWN);
+ /*
+ * zil_lwb_commit() is called from a
+ * loop over a list of itxs at the
+ * top of zil_lwb_write_issue(), which
+ * itself is called from a loop over a
+ * list of lwbs in various places.
+ * zil_crash() will free those itxs
+ * and sometimes the lwbs, so they
+ * are invalid when zil_crash() returns.
+ * Callers must pretty much abort
+ * immediately.
+ */
+ zil_crash(zilog);
+ return (error);
+ }
zfs_fallthrough;
+ }
case ENOENT:
zfs_fallthrough;
case EEXIST:
zfs_fallthrough;
case EALREADY:
- return;
+ return (0);
}
}
}
@@ -2374,6 +2461,8 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
lwb->lwb_nfilled += reclen + dlen;
ASSERT3S(lwb->lwb_nfilled, <=, lwb->lwb_nused);
ASSERT0(P2PHASE(lwb->lwb_nfilled, sizeof (uint64_t)));
+
+ return (0);
}
itx_t *
@@ -2415,7 +2504,7 @@ zil_itx_clone(itx_t *oitx)
}
void
-zil_itx_destroy(itx_t *itx)
+zil_itx_destroy(itx_t *itx, int err)
{
ASSERT3U(itx->itx_size, >=, sizeof (itx_t));
ASSERT3U(itx->itx_lr.lrc_reclen, ==,
@@ -2424,7 +2513,7 @@ zil_itx_destroy(itx_t *itx)
IMPLY(itx->itx_callback != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT);
if (itx->itx_callback != NULL)
- itx->itx_callback(itx->itx_callback_data);
+ itx->itx_callback(itx->itx_callback_data, err);
zio_data_buf_free(itx, itx->itx_size);
}
@@ -2467,7 +2556,7 @@ zil_itxg_clean(void *arg)
if (itx->itx_lr.lrc_txtype == TX_COMMIT)
zil_commit_waiter_skip(itx->itx_private);
- zil_itx_destroy(itx);
+ zil_itx_destroy(itx, 0);
}
cookie = NULL;
@@ -2477,7 +2566,7 @@ zil_itxg_clean(void *arg)
while ((itx = list_remove_head(list)) != NULL) {
/* commit itxs should never be on the async lists. */
ASSERT3U(itx->itx_lr.lrc_txtype, !=, TX_COMMIT);
- zil_itx_destroy(itx);
+ zil_itx_destroy(itx, 0);
}
list_destroy(list);
kmem_free(ian, sizeof (itx_async_node_t));
@@ -2539,7 +2628,7 @@ zil_remove_async(zilog_t *zilog, uint64_t oid)
while ((itx = list_remove_head(&clean_list)) != NULL) {
/* commit itxs should never be on the async lists. */
ASSERT3U(itx->itx_lr.lrc_txtype, !=, TX_COMMIT);
- zil_itx_destroy(itx);
+ zil_itx_destroy(itx, 0);
}
list_destroy(&clean_list);
}
@@ -2624,6 +2713,67 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
}
/*
+ * Post-crash cleanup. This is called from zil_clean() because it needs to
+ * do cleanup after every txg until the ZIL is restarted, and zilog_dirty()
+ * can arrange that easily, unlike zil_sync() which is more complicated to
+ * get a call to without actual dirty data.
+ */
+static void
+zil_crash_clean(zilog_t *zilog, uint64_t synced_txg)
+{
+ ASSERT(MUTEX_HELD(&zilog->zl_lock));
+ ASSERT3U(zilog->zl_restart_txg, >, 0);
+
+ /* Clean up anything on the crash list from earlier txgs */
+ lwb_t *lwb;
+ while ((lwb = list_head(&zilog->zl_lwb_crash_list)) != NULL) {
+ if (lwb->lwb_alloc_txg >= synced_txg ||
+ lwb->lwb_max_txg >= synced_txg) {
+ /*
+ * This lwb was allocated or updated on this txg, or
+ * in the future. We stop processing here, to avoid
+ * the strange situation of freeing a ZIL block on
+ * on the same or earlier txg than what it was
+ * allocated for.
+ *
+ * We'll take care of it on the next txg.
+ */
+ break;
+ }
+
+ /* This LWB is from the past, so we can clean it up now. */
+ list_remove(&zilog->zl_lwb_crash_list, lwb);
+ if (lwb->lwb_buf != NULL)
+ zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
+ if (!BP_IS_HOLE(&lwb->lwb_blk))
+ /*
+ * Free on the next txg, since zil_clean() is called
+ * once synced_txg has already been completed.
+ */
+ zio_free(zilog->zl_spa, synced_txg+1, &lwb->lwb_blk);
+ zil_free_lwb(zilog, lwb);
+ }
+
+ if (zilog->zl_restart_txg > synced_txg) {
+ /*
+ * Not reached the restart txg yet, so mark the ZIL dirty for
+ * the next txg and we'll consider it all again then.
+ */
+ zilog_dirty(zilog, synced_txg+1);
+ return;
+ }
+
+ /*
+ * Reached the restart txg, so we can allow new calls to zil_commit().
+ * All ZIL txgs have long past so there should be no IO waiting.
+ */
+ ASSERT(list_is_empty(&zilog->zl_lwb_list));
+ ASSERT(list_is_empty(&zilog->zl_lwb_crash_list));
+
+ zilog->zl_restart_txg = 0;
+}
+
+/*
* If there are any in-memory intent log transactions which have now been
* synced then start up a taskq to free them. We should only do this after we
* have written out the uberblocks (i.e. txg has been committed) so that
@@ -2638,6 +2788,15 @@ zil_clean(zilog_t *zilog, uint64_t synced_txg)
ASSERT3U(synced_txg, <, ZILTEST_TXG);
+ /* Do cleanup and restart after crash. */
+ if (zilog->zl_restart_txg > 0) {
+ mutex_enter(&zilog->zl_lock);
+ /* Make sure we didn't lose a race. */
+ if (zilog->zl_restart_txg > 0)
+ zil_crash_clean(zilog, synced_txg);
+ mutex_exit(&zilog->zl_lock);
+ }
+
mutex_enter(&itxg->itxg_lock);
if (itxg->itxg_itxs == NULL || itxg->itxg_txg == ZILTEST_TXG) {
mutex_exit(&itxg->itxg_lock);
@@ -2830,13 +2989,13 @@ zil_prune_commit_list(zilog_t *zilog)
mutex_exit(&zilog->zl_lock);
list_remove(&zilog->zl_itx_commit_list, itx);
- zil_itx_destroy(itx);
+ zil_itx_destroy(itx, 0);
}
IMPLY(itx != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT);
}
-static void
+static int
zil_commit_writer_stall(zilog_t *zilog)
{
/*
@@ -2861,8 +3020,22 @@ zil_commit_writer_stall(zilog_t *zilog)
*/
ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
ZIL_STAT_BUMP(zilog, zil_commit_stall_count);
- txg_wait_synced(zilog->zl_dmu_pool, 0);
+
+ int err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0,
+ TXG_WAIT_SUSPEND);
+ if (err != 0) {
+ ASSERT3U(err, ==, ESHUTDOWN);
+ zil_crash(zilog);
+ }
+
+ /*
+ * Either zil_sync() has been called to wait for and clean up any
+ * in-flight LWBs, or zil_crash() has emptied out the list and arranged
+ * for them to be cleaned up later.
+ */
ASSERT(list_is_empty(&zilog->zl_lwb_list));
+
+ return (err);
}
static void
@@ -2902,19 +3075,14 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
- /*
- * Return if there's nothing to commit before we dirty the fs by
- * calling zil_create().
- */
- if (list_is_empty(&zilog->zl_itx_commit_list))
- return;
-
- list_create(&nolwb_itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
- list_create(&nolwb_waiters, sizeof (zil_commit_waiter_t),
- offsetof(zil_commit_waiter_t, zcw_node));
-
lwb = list_tail(&zilog->zl_lwb_list);
if (lwb == NULL) {
+ /*
+ * Return if there's nothing to commit before we dirty the fs.
+ */
+ if (list_is_empty(&zilog->zl_itx_commit_list))
+ return;
+
lwb = zil_create(zilog);
} else {
/*
@@ -2942,6 +3110,10 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
}
}
+ list_create(&nolwb_itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
+ list_create(&nolwb_waiters, sizeof (zil_commit_waiter_t),
+ offsetof(zil_commit_waiter_t, zcw_node));
+
while ((itx = list_remove_head(&zilog->zl_itx_commit_list)) != NULL) {
lr_t *lrc = &itx->itx_lr;
uint64_t txg = lrc->lrc_txg;
@@ -3030,7 +3202,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
} else {
ASSERT3S(lrc->lrc_txtype, !=, TX_COMMIT);
zilog->zl_cur_left -= zil_itx_full_size(itx);
- zil_itx_destroy(itx);
+ zil_itx_destroy(itx, 0);
}
}
@@ -3041,9 +3213,14 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
* the ZIL write pipeline; see the comment within
* zil_commit_writer_stall() for more details.
*/
- while ((lwb = list_remove_head(ilwbs)) != NULL)
- zil_lwb_write_issue(zilog, lwb);
- zil_commit_writer_stall(zilog);
+ int err = 0;
+ while ((lwb = list_remove_head(ilwbs)) != NULL) {
+ err = zil_lwb_write_issue(zilog, lwb);
+ if (err != 0)
+ break;
+ }
+ if (err == 0)
+ err = zil_commit_writer_stall(zilog);
/*
* Additionally, we have to signal and mark the "nolwb"
@@ -3061,7 +3238,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
* the itx's callback if one exists for the itx.
*/
while ((itx = list_remove_head(&nolwb_itxs)) != NULL)
- zil_itx_destroy(itx);
+ zil_itx_destroy(itx, 0);
} else {
ASSERT(list_is_empty(&nolwb_waiters));
ASSERT3P(lwb, !=, NULL);
@@ -3111,14 +3288,21 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
* possible, without significantly impacting the latency
* of each individual itx.
*/
- if (lwb->lwb_state == LWB_STATE_OPENED && !zilog->zl_parallel) {
+ if (lwb->lwb_state == LWB_STATE_OPENED &&
+ (!zilog->zl_parallel || zilog->zl_suspend > 0)) {
zil_burst_done(zilog);
list_insert_tail(ilwbs, lwb);
lwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW);
if (lwb == NULL) {
- while ((lwb = list_remove_head(ilwbs)) != NULL)
- zil_lwb_write_issue(zilog, lwb);
- zil_commit_writer_stall(zilog);
+ int err = 0;
+ while ((lwb =
+ list_remove_head(ilwbs)) != NULL) {
+ err = zil_lwb_write_issue(zilog, lwb);
+ if (err != 0)
+ break;
+ }
+ if (err == 0)
+ zil_commit_writer_stall(zilog);
}
}
}
@@ -3177,10 +3361,23 @@ zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)
zil_prune_commit_list(zilog);
zil_process_commit_list(zilog, zcw, &ilwbs);
+ /*
+ * If the ZIL failed somewhere inside zil_process_commit_list(), it's
+ * will be because a fallback to txg_wait_sync_flags() happened at some
+ * point (eg zil_commit_writer_stall()). All cases should issue and
+ * empty ilwbs, so there will be nothing to in the issue loop below.
+ * That's why we don't have to plumb the error value back from
+ * zil_process_commit_list(), and don't have to skip it.
+ */
+ IMPLY(zilog->zl_restart_txg > 0, list_is_empty(&ilwbs));
+
out:
mutex_exit(&zilog->zl_issuer_lock);
- while ((lwb = list_remove_head(&ilwbs)) != NULL)
- zil_lwb_write_issue(zilog, lwb);
+ int err = 0;
+ while ((lwb = list_remove_head(&ilwbs)) != NULL) {
+ if (err == 0)
+ err = zil_lwb_write_issue(zilog, lwb);
+ }
list_destroy(&ilwbs);
return (wtxg);
}
@@ -3436,7 +3633,7 @@ static void
zil_free_commit_waiter(zil_commit_waiter_t *zcw)
{
ASSERT(!list_link_active(&zcw->zcw_node));
- ASSERT3P(zcw->zcw_lwb, ==, NULL);
+ ASSERT0P(zcw->zcw_lwb);
ASSERT3B(zcw->zcw_done, ==, B_TRUE);
mutex_destroy(&zcw->zcw_lock);
cv_destroy(&zcw->zcw_cv);
@@ -3473,6 +3670,96 @@ zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw)
}
/*
+ * Crash the ZIL. This is something like suspending, but abandons the ZIL
+ * without further IO until the wanted txg completes. No effort is made to
+ * close the on-disk chain or do any other on-disk work, as the pool may
+ * have suspended. zil_sync() will handle cleanup as normal and restart the
+ * ZIL once enough txgs have passed.
+ */
+static void
+zil_crash(zilog_t *zilog)
+{
+ mutex_enter(&zilog->zl_lock);
+
+ uint64_t txg = spa_syncing_txg(zilog->zl_spa);
+ uint64_t restart_txg =
+ spa_syncing_txg(zilog->zl_spa) + TXG_CONCURRENT_STATES;
+
+ if (zilog->zl_restart_txg > 0) {
+ /*
+ * If the ZIL is already crashed, it's almost certainly because
+ * we lost a race involving multiple callers from
+ * zil_commit_impl().
+ */
+
+ /*
+ * This sanity check is to support my understanding that in the
+ * event of multiple callers to zil_crash(), only one of them
+ * can possibly be in the codepath to issue lwbs; the rest
+ * should be calling from zil_commit_impl() after their waiters
+ * have completed. As I understand it, a second thread trying
+ * to issue will eventually wait on zl_issuer_lock, and then
+ * have no work to do and leave.
+ *
+ * If more lwbs had been created an issued between zil_crash()
+ * calls, then we probably just need to take those too, add
+ * them to the crash list and clean them up, but it complicates
+ * this function and I don't think it can happend.
+ */
+ ASSERT(list_is_empty(&zilog->zl_lwb_list));
+
+ mutex_exit(&zilog->zl_lock);
+ return;
+ }
+
+ zilog->zl_restart_txg = restart_txg;
+
+ /*
+ * Capture any live LWBs. Depending on the state of the pool they may
+ * represent in-flight IO that won't return for some time, and we want
+ * to make sure they don't get in the way of normal ZIL operation.
+ */
+ ASSERT(list_is_empty(&zilog->zl_lwb_crash_list));
+ list_move_tail(&zilog->zl_lwb_crash_list, &zilog->zl_lwb_list);
+
+ /*
+ * Run through the LWB list; erroring all itxes and signalling error
+ * to all waiters.
+ */
+ for (lwb_t *lwb = list_head(&zilog->zl_lwb_crash_list); lwb != NULL;
+ lwb = list_next(&zilog->zl_lwb_crash_list, lwb)) {
+ itx_t *itx;
+ while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
+ zil_itx_destroy(itx, EIO);
+
+ zil_commit_waiter_t *zcw;
+ while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
+ mutex_enter(&zcw->zcw_lock);
+ zcw->zcw_lwb = NULL;
+ zcw->zcw_zio_error = EIO;
+ zcw->zcw_done = B_TRUE;
+ cv_broadcast(&zcw->zcw_cv);
+ mutex_exit(&zcw->zcw_lock);
+ }
+ }
+
+ /*
+ * Zero the ZIL header bp after the ZIL restarts. We'll free it in
+ * zil_clean() when we clean up the lwbs.
+ */
+ zil_header_t *zh = zil_header_in_syncing_context(zilog);
+ BP_ZERO(&zh->zh_log);
+
+ /*
+ * Mark this ZIL dirty on the next txg, so that zil_clean() will be
+ * called for cleanup.
+ */
+ zilog_dirty(zilog, txg+1);
+
+ mutex_exit(&zilog->zl_lock);
+}
+
+/*
* Commit ZFS Intent Log transactions (itxs) to stable storage.
*
* When writing ZIL transactions to the on-disk representation of the
@@ -3587,9 +3874,17 @@ zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw)
* but the order in which they complete will be the same order in
* which they were created.
*/
-void
+static int zil_commit_impl(zilog_t *zilog, uint64_t foid);
+
+int
zil_commit(zilog_t *zilog, uint64_t foid)
{
+ return (zil_commit_flags(zilog, foid, ZIL_COMMIT_FAILMODE));
+}
+
+int
+zil_commit_flags(zilog_t *zilog, uint64_t foid, zil_commit_flag_t flags)
+{
/*
* We should never attempt to call zil_commit on a snapshot for
* a couple of reasons:
@@ -3606,7 +3901,7 @@ zil_commit(zilog_t *zilog, uint64_t foid)
ASSERT3B(dmu_objset_is_snapshot(zilog->zl_os), ==, B_FALSE);
if (zilog->zl_sync == ZFS_SYNC_DISABLED)
- return;
+ return (0);
if (!spa_writeable(zilog->zl_spa)) {
/*
@@ -3617,10 +3912,23 @@ zil_commit(zilog_t *zilog, uint64_t foid)
* verifying that truth before we return to the caller.
*/
ASSERT(list_is_empty(&zilog->zl_lwb_list));
- ASSERT3P(zilog->zl_last_lwb_opened, ==, NULL);
+ ASSERT0P(zilog->zl_last_lwb_opened);
for (int i = 0; i < TXG_SIZE; i++)
- ASSERT3P(zilog->zl_itxg[i].itxg_itxs, ==, NULL);
- return;
+ ASSERT0P(zilog->zl_itxg[i].itxg_itxs);
+ return (0);
+ }
+
+ int err = 0;
+
+ /*
+ * If the ZIL crashed, bypass it entirely, and rely on txg_wait_sync()
+ * to get the data out to disk.
+ */
+ if (zilog->zl_restart_txg > 0) {
+ ZIL_STAT_BUMP(zilog, zil_commit_crash_count);
+ err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0,
+ TXG_WAIT_SUSPEND);
+ goto out;
}
/*
@@ -3632,14 +3940,43 @@ zil_commit(zilog_t *zilog, uint64_t foid)
*/
if (zilog->zl_suspend > 0) {
ZIL_STAT_BUMP(zilog, zil_commit_suspend_count);
- txg_wait_synced(zilog->zl_dmu_pool, 0);
- return;
+ err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0,
+ TXG_WAIT_SUSPEND);
+ if (err != 0) {
+ ASSERT3U(err, ==, ESHUTDOWN);
+ zil_crash(zilog);
+ }
+ goto out;
}
- zil_commit_impl(zilog, foid);
+ err = zil_commit_impl(zilog, foid);
+
+out:
+ if (err == 0)
+ return (0);
+
+ /*
+ * The ZIL write failed and the pool is suspended. There's nothing else
+ * we can do except return or block.
+ */
+ ASSERT3U(err, ==, ESHUTDOWN);
+
+ /*
+ * Return error if failmode=continue or caller will handle directly.
+ */
+ if (!(flags & ZIL_COMMIT_FAILMODE) ||
+ spa_get_failmode(zilog->zl_spa) == ZIO_FAILURE_MODE_CONTINUE)
+ return (SET_ERROR(EIO));
+
+ /*
+ * Block until the pool returns. We assume that the data will make
+ * it out to disk in the end, and so return success.
+ */
+ txg_wait_synced(zilog->zl_dmu_pool, 0);
+ return (0);
}
-void
+static int
zil_commit_impl(zilog_t *zilog, uint64_t foid)
{
ZIL_STAT_BUMP(zilog, zil_commit_count);
@@ -3676,6 +4013,7 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid)
uint64_t wtxg = zil_commit_writer(zilog, zcw);
zil_commit_waiter(zilog, zcw);
+ int err = 0;
if (zcw->zcw_zio_error != 0) {
/*
* If there was an error writing out the ZIL blocks that
@@ -3688,13 +4026,29 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid)
ZIL_STAT_BUMP(zilog, zil_commit_error_count);
DTRACE_PROBE2(zil__commit__io__error,
zilog_t *, zilog, zil_commit_waiter_t *, zcw);
- txg_wait_synced(zilog->zl_dmu_pool, 0);
+ err = txg_wait_synced_flags(zilog->zl_dmu_pool, 0,
+ TXG_WAIT_SUSPEND);
} else if (wtxg != 0) {
ZIL_STAT_BUMP(zilog, zil_commit_suspend_count);
- txg_wait_synced(zilog->zl_dmu_pool, wtxg);
+ err = txg_wait_synced_flags(zilog->zl_dmu_pool, wtxg,
+ TXG_WAIT_SUSPEND);
}
zil_free_commit_waiter(zcw);
+
+ if (err == 0)
+ return (0);
+
+ /*
+ * ZIL write failed and pool failed in the fallback to
+ * txg_wait_synced_flags(). Right now we have no idea if the data is on
+ * disk and the pool is probably suspended so we have no idea when it's
+ * coming back. All we can do is shut down and return error to the
+ * caller.
+ */
+ ASSERT3U(err, ==, ESHUTDOWN);
+ zil_crash(zilog);
+ return (err);
}
/*
@@ -3720,7 +4074,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
mutex_enter(&zilog->zl_lock);
- ASSERT(zilog->zl_stop_sync == 0);
+ ASSERT0(zilog->zl_stop_sync);
if (*replayed_seq != 0) {
ASSERT(zh->zh_replay_seq < *replayed_seq);
@@ -3890,6 +4244,8 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
list_create(&zilog->zl_lwb_list, sizeof (lwb_t),
offsetof(lwb_t, lwb_node));
+ list_create(&zilog->zl_lwb_crash_list, sizeof (lwb_t),
+ offsetof(lwb_t, lwb_node));
list_create(&zilog->zl_itx_commit_list, sizeof (itx_t),
offsetof(itx_t, itx_node));
@@ -3914,9 +4270,12 @@ zil_free(zilog_t *zilog)
ASSERT0(zilog->zl_suspend);
ASSERT0(zilog->zl_suspending);
+ ASSERT0(zilog->zl_restart_txg);
ASSERT(list_is_empty(&zilog->zl_lwb_list));
list_destroy(&zilog->zl_lwb_list);
+ ASSERT(list_is_empty(&zilog->zl_lwb_crash_list));
+ list_destroy(&zilog->zl_lwb_crash_list);
ASSERT(list_is_empty(&zilog->zl_itx_commit_list));
list_destroy(&zilog->zl_itx_commit_list);
@@ -3952,8 +4311,8 @@ zil_open(objset_t *os, zil_get_data_t *get_data, zil_sums_t *zil_sums)
{
zilog_t *zilog = dmu_objset_zil(os);
- ASSERT3P(zilog->zl_get_data, ==, NULL);
- ASSERT3P(zilog->zl_last_lwb_opened, ==, NULL);
+ ASSERT0P(zilog->zl_get_data);
+ ASSERT0P(zilog->zl_last_lwb_opened);
ASSERT(list_is_empty(&zilog->zl_lwb_list));
zilog->zl_get_data = get_data;
@@ -3972,7 +4331,8 @@ zil_close(zilog_t *zilog)
uint64_t txg;
if (!dmu_objset_is_snapshot(zilog->zl_os)) {
- zil_commit(zilog, 0);
+ if (zil_commit_flags(zilog, 0, ZIL_COMMIT_NOW) != 0)
+ txg_wait_synced(zilog->zl_dmu_pool, 0);
} else {
ASSERT(list_is_empty(&zilog->zl_lwb_list));
ASSERT0(zilog->zl_dirty_max_txg);
@@ -4073,6 +4433,17 @@ zil_suspend(const char *osname, void **cookiep)
return (SET_ERROR(EBUSY));
}
+ if (zilog->zl_restart_txg > 0) {
+ /*
+ * ZIL crashed. It effectively _is_ suspended, but callers
+ * are usually trying to make sure it's empty on-disk, which
+ * we can't guarantee right now.
+ */
+ mutex_exit(&zilog->zl_lock);
+ dmu_objset_rele(os, suspend_tag);
+ return (SET_ERROR(EBUSY));
+ }
+
/*
* Don't put a long hold in the cases where we can avoid it. This
* is when there is no cookie so we are doing a suspend & resume
@@ -4105,6 +4476,11 @@ zil_suspend(const char *osname, void **cookiep)
zil_resume(os);
else
*cookiep = os;
+
+ if (zilog->zl_restart_txg > 0)
+ /* ZIL crashed while we were waiting. */
+ return (SET_ERROR(EBUSY));
+
return (0);
}
@@ -4146,17 +4522,34 @@ zil_suspend(const char *osname, void **cookiep)
* would just call txg_wait_synced(), because zl_suspend is set.
* txg_wait_synced() doesn't wait for these lwb's to be
* LWB_STATE_FLUSH_DONE before returning.
+ *
+ * However, zil_commit_impl() itself can return an error if any of the
+ * lwbs fail, or the pool suspends in the fallback
+ * txg_wait_sync_flushed(), which affects what we do next, so we
+ * capture that error.
*/
- zil_commit_impl(zilog, 0);
+ error = zil_commit_impl(zilog, 0);
+ if (error == ESHUTDOWN)
+ /* zil_commit_impl() has called zil_crash() already */
+ error = SET_ERROR(EBUSY);
/*
* Now that we've ensured all lwb's are LWB_STATE_FLUSH_DONE, we
* use txg_wait_synced() to ensure the data from the zilog has
* migrated to the main pool before calling zil_destroy().
*/
- txg_wait_synced(zilog->zl_dmu_pool, 0);
+ if (error == 0) {
+ error = txg_wait_synced_flags(zilog->zl_dmu_pool, 0,
+ TXG_WAIT_SUSPEND);
+ if (error != 0) {
+ ASSERT3U(error, ==, ESHUTDOWN);
+ zil_crash(zilog);
+ error = SET_ERROR(EBUSY);
+ }
+ }
- zil_destroy(zilog, B_FALSE);
+ if (error == 0)
+ zil_destroy(zilog, B_FALSE);
mutex_enter(&zilog->zl_lock);
zilog->zl_suspending = B_FALSE;
@@ -4170,7 +4563,8 @@ zil_suspend(const char *osname, void **cookiep)
zil_resume(os);
else
*cookiep = os;
- return (0);
+
+ return (error);
}
void
@@ -4333,7 +4727,7 @@ zil_replay(objset_t *os, void *arg,
zilog->zl_replay = B_TRUE;
zilog->zl_replay_time = ddi_get_lbolt();
- ASSERT(zilog->zl_replay_blks == 0);
+ ASSERT0(zilog->zl_replay_blks);
(void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
zh->zh_claim_txg, B_TRUE);
vmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE);
@@ -4418,3 +4812,9 @@ ZFS_MODULE_PARAM(zfs_zil, zil_, maxblocksize, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_zil, zil_, maxcopied, UINT, ZMOD_RW,
"Limit in bytes WR_COPIED size");
+
+ZFS_MODULE_PARAM(zfs, zfs_, immediate_write_sz, UINT, ZMOD_RW,
+ "Largest write size to store data into ZIL");
+
+ZFS_MODULE_PARAM(zfs_zil, zil_, special_is_slog, INT, ZMOD_RW,
+ "Treat special vdevs as SLOG");
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index 6d7bce8b0e10..3f0ddb63249d 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -339,8 +339,8 @@ zio_fini(void)
}
for (size_t i = 0; i < n; i++) {
- VERIFY3P(zio_buf_cache[i], ==, NULL);
- VERIFY3P(zio_data_buf_cache[i], ==, NULL);
+ VERIFY0P(zio_buf_cache[i]);
+ VERIFY0P(zio_data_buf_cache[i]);
}
if (zio_ksp != NULL) {
@@ -692,7 +692,7 @@ error:
zio->io_error = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(spa, &zio->io_bookmark,
- BP_GET_LOGICAL_BIRTH(zio->io_bp));
+ BP_GET_PHYSICAL_BIRTH(zio->io_bp));
(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, &zio->io_bookmark, zio, 0);
}
@@ -771,7 +771,7 @@ zio_add_child_impl(zio_t *pio, zio_t *cio, boolean_t first)
else
mutex_enter(&cio->io_lock);
- ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
+ ASSERT0(pio->io_state[ZIO_WAIT_DONE]);
uint64_t *countp = pio->io_children[cio->io_child_type];
for (int w = 0; w < ZIO_WAIT_TYPES; w++)
@@ -821,7 +821,7 @@ zio_wait_for_children(zio_t *zio, uint8_t childbits, enum zio_wait_type wait)
boolean_t waiting = B_FALSE;
mutex_enter(&zio->io_lock);
- ASSERT(zio->io_stall == NULL);
+ ASSERT0P(zio->io_stall);
for (int c = 0; c < ZIO_CHILD_TYPES; c++) {
if (!(ZIO_CHILD_BIT_IS_SET(childbits, c)))
continue;
@@ -850,15 +850,9 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait,
mutex_enter(&pio->io_lock);
if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
*errorp = zio_worst_error(*errorp, zio->io_error);
- pio->io_reexecute |= zio->io_reexecute;
+ pio->io_post |= zio->io_post;
ASSERT3U(*countp, >, 0);
- /*
- * Propogate the Direct I/O checksum verify failure to the parent.
- */
- if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
- pio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
-
(*countp)--;
if (*countp == 0 && pio->io_stall == countp) {
@@ -961,8 +955,8 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio_t *zio;
IMPLY(type != ZIO_TYPE_TRIM, psize <= SPA_MAXBLOCKSIZE);
- ASSERT(P2PHASE(psize, SPA_MINBLOCKSIZE) == 0);
- ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
+ ASSERT0(P2PHASE(psize, SPA_MINBLOCKSIZE));
+ ASSERT0(P2PHASE(offset, SPA_MINBLOCKSIZE));
ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER));
ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
@@ -1110,7 +1104,8 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
"DVA[1]=%#llx/%#llx "
"DVA[2]=%#llx/%#llx "
"prop=%#llx "
- "pad=%#llx,%#llx "
+ "prop2=%#llx "
+ "pad=%#llx "
"phys_birth=%#llx "
"birth=%#llx "
"fill=%#llx "
@@ -1123,9 +1118,9 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
(long long)bp->blk_dva[2].dva_word[0],
(long long)bp->blk_dva[2].dva_word[1],
(long long)bp->blk_prop,
- (long long)bp->blk_pad[0],
- (long long)bp->blk_pad[1],
- (long long)BP_GET_PHYSICAL_BIRTH(bp),
+ (long long)bp->blk_prop2,
+ (long long)bp->blk_pad,
+ (long long)BP_GET_RAW_PHYSICAL_BIRTH(bp),
(long long)BP_GET_LOGICAL_BIRTH(bp),
(long long)bp->blk_fill,
(long long)bp->blk_cksum.zc_word[0],
@@ -1340,7 +1335,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
{
zio_t *zio;
- zio = zio_create(pio, spa, BP_GET_BIRTH(bp), bp,
+ zio = zio_create(pio, spa, BP_GET_PHYSICAL_BIRTH(bp), bp,
data, size, size, done, private,
ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
@@ -1456,7 +1451,7 @@ zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
metaslab_check_free(spa, bp);
bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
} else {
- VERIFY3P(zio_free_sync(NULL, spa, txg, bp, 0), ==, NULL);
+ VERIFY0P(zio_free_sync(NULL, spa, txg, bp, 0));
}
}
@@ -1564,7 +1559,7 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
{
zio_t *zio;
- ASSERT(vd->vdev_children == 0);
+ ASSERT0(vd->vdev_children);
ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
ASSERT3U(offset + size, <=, vd->vdev_psize);
@@ -1585,7 +1580,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
{
zio_t *zio;
- ASSERT(vd->vdev_children == 0);
+ ASSERT0(vd->vdev_children);
ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
ASSERT3U(offset + size, <=, vd->vdev_psize);
@@ -1649,7 +1644,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
* through the mirror during self healing. See comment in
* vdev_mirror_io_done() for more details.
*/
- ASSERT0(pio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
+ ASSERT0(pio->io_post & ZIO_POST_DIO_CHKSUM_ERR);
} else if (type == ZIO_TYPE_WRITE &&
pio->io_prop.zp_direct_write == B_TRUE) {
/*
@@ -1685,7 +1680,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
* If this is a retried I/O then we ignore it since we will
* have already processed the original allocating I/O.
*/
- if (flags & ZIO_FLAG_IO_ALLOCATING &&
+ if (flags & ZIO_FLAG_ALLOC_THROTTLED &&
(vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY))) {
ASSERT(pio->io_metaslab_class != NULL);
ASSERT(pio->io_metaslab_class->mc_alloc_throttle_enabled);
@@ -1695,7 +1690,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
ASSERT(!(pio->io_flags & ZIO_FLAG_IO_REWRITE) ||
pio->io_child_type == ZIO_CHILD_GANG);
- flags &= ~ZIO_FLAG_IO_ALLOCATING;
+ flags &= ~ZIO_FLAG_ALLOC_THROTTLED;
}
zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, size,
@@ -1752,7 +1747,7 @@ zio_flush(zio_t *pio, vdev_t *vd)
void
zio_shrink(zio_t *zio, uint64_t size)
{
- ASSERT3P(zio->io_executor, ==, NULL);
+ ASSERT0P(zio->io_executor);
ASSERT3U(zio->io_orig_size, ==, zio->io_size);
ASSERT3U(size, <=, zio->io_size);
@@ -1860,7 +1855,7 @@ zio_write_bp_init(zio_t *zio)
blkptr_t *bp = zio->io_bp;
zio_prop_t *zp = &zio->io_prop;
- ASSERT(BP_GET_LOGICAL_BIRTH(bp) != zio->io_txg);
+ ASSERT(BP_GET_BIRTH(bp) != zio->io_txg);
*bp = *zio->io_bp_override;
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
@@ -1946,9 +1941,9 @@ zio_write_compress(zio_t *zio)
}
ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
- ASSERT(zio->io_bp_override == NULL);
+ ASSERT0P(zio->io_bp_override);
- if (!BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg) {
+ if (!BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) == zio->io_txg) {
/*
* We're rewriting an existing block, which means we're
* working on behalf of spa_sync(). For spa_sync() to
@@ -2085,7 +2080,7 @@ zio_write_compress(zio_t *zio)
* spa_sync() to allocate new blocks, but force rewrites after that.
* There should only be a handful of blocks after pass 1 in any case.
*/
- if (!BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg &&
+ if (!BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) == zio->io_txg &&
BP_GET_PSIZE(bp) == psize &&
pass >= zfs_sync_pass_rewrite) {
VERIFY3U(psize, !=, 0);
@@ -2441,7 +2436,7 @@ __zio_execute(zio_t *zio)
ASSERT(!MUTEX_HELD(&zio->io_lock));
ASSERT(ISP2(stage));
- ASSERT(zio->io_stall == NULL);
+ ASSERT0P(zio->io_stall);
do {
stage <<= 1;
@@ -2514,7 +2509,7 @@ zio_wait(zio_t *zio)
int error;
ASSERT3S(zio->io_stage, ==, ZIO_STAGE_OPEN);
- ASSERT3P(zio->io_executor, ==, NULL);
+ ASSERT0P(zio->io_executor);
zio->io_waiter = curthread;
ASSERT0(zio->io_queued_timestamp);
@@ -2556,7 +2551,7 @@ zio_nowait(zio_t *zio)
if (zio == NULL)
return;
- ASSERT3P(zio->io_executor, ==, NULL);
+ ASSERT0P(zio->io_executor);
if (zio->io_child_type == ZIO_CHILD_LOGICAL &&
list_is_empty(&zio->io_parent_list)) {
@@ -2595,14 +2590,14 @@ zio_reexecute(void *arg)
ASSERT(pio->io_child_type == ZIO_CHILD_LOGICAL);
ASSERT(pio->io_orig_stage == ZIO_STAGE_OPEN);
- ASSERT(pio->io_gang_leader == NULL);
- ASSERT(pio->io_gang_tree == NULL);
+ ASSERT0P(pio->io_gang_leader);
+ ASSERT0P(pio->io_gang_tree);
mutex_enter(&pio->io_lock);
pio->io_flags = pio->io_orig_flags;
pio->io_stage = pio->io_orig_stage;
pio->io_pipeline = pio->io_orig_pipeline;
- pio->io_reexecute = 0;
+ pio->io_post = 0;
pio->io_flags |= ZIO_FLAG_REEXECUTED;
pio->io_pipeline_trace = 0;
pio->io_error = 0;
@@ -2694,7 +2689,7 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
ASSERT(zio != spa->spa_suspend_zio_root);
ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
- ASSERT(zio_unique_parent(zio) == NULL);
+ ASSERT0P(zio_unique_parent(zio));
ASSERT(zio->io_stage == ZIO_STAGE_DONE);
zio_add_child(spa->spa_suspend_zio_root, zio);
}
@@ -2749,11 +2744,14 @@ zio_resume_wait(spa_t *spa)
* being nearly full, it calls zio_write_gang_block() to construct the
* block from smaller fragments.
*
- * A gang block consists of a gang header (zio_gbh_phys_t) and up to
- * three (SPA_GBH_NBLKPTRS) gang members. The gang header is just like
- * an indirect block: it's an array of block pointers. It consumes
- * only one sector and hence is allocatable regardless of fragmentation.
- * The gang header's bps point to its gang members, which hold the data.
+ * A gang block consists of a a gang header and up to gbh_nblkptrs(size)
+ * gang members. The gang header is like an indirect block: it's an array
+ * of block pointers, though the header has a small tail (a zio_eck_t)
+ * that stores an embedded checksum. It is allocated using only a single
+ * sector as the requested size, and hence is allocatable regardless of
+ * fragmentation. Its size is determined by the smallest allocatable
+ * asize of the vdevs it was allocated on. The gang header's bps point
+ * to its gang members, which hold the data.
*
* Gang blocks are self-checksumming, using the bp's <vdev, offset, txg>
* as the verifier to ensure uniqueness of the SHA256 checksum.
@@ -2832,10 +2830,10 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data,
if (gn != NULL) {
abd_t *gbh_abd =
- abd_get_from_buf(gn->gn_gbh, SPA_GANGBLOCKSIZE);
+ abd_get_from_buf(gn->gn_gbh, gn->gn_gangblocksize);
zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp,
- gbh_abd, SPA_GANGBLOCKSIZE, zio_gang_issue_func_done, NULL,
- pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
+ gbh_abd, gn->gn_gangblocksize, zio_gang_issue_func_done,
+ NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
&pio->io_bookmark);
/*
* As we rewrite each gang header, the pipeline will compute
@@ -2906,14 +2904,16 @@ static zio_gang_issue_func_t *zio_gang_issue_func[ZIO_TYPES] = {
static void zio_gang_tree_assemble_done(zio_t *zio);
static zio_gang_node_t *
-zio_gang_node_alloc(zio_gang_node_t **gnpp)
+zio_gang_node_alloc(zio_gang_node_t **gnpp, uint64_t gangblocksize)
{
zio_gang_node_t *gn;
- ASSERT(*gnpp == NULL);
+ ASSERT0P(*gnpp);
- gn = kmem_zalloc(sizeof (*gn), KM_SLEEP);
- gn->gn_gbh = zio_buf_alloc(SPA_GANGBLOCKSIZE);
+ gn = kmem_zalloc(sizeof (*gn) +
+ (gbh_nblkptrs(gangblocksize) * sizeof (gn)), KM_SLEEP);
+ gn->gn_gangblocksize = gn->gn_allocsize = gangblocksize;
+ gn->gn_gbh = zio_buf_alloc(gangblocksize);
*gnpp = gn;
return (gn);
@@ -2924,11 +2924,12 @@ zio_gang_node_free(zio_gang_node_t **gnpp)
{
zio_gang_node_t *gn = *gnpp;
- for (int g = 0; g < SPA_GBH_NBLKPTRS; g++)
- ASSERT(gn->gn_child[g] == NULL);
+ for (int g = 0; g < gbh_nblkptrs(gn->gn_allocsize); g++)
+ ASSERT0P(gn->gn_child[g]);
- zio_buf_free(gn->gn_gbh, SPA_GANGBLOCKSIZE);
- kmem_free(gn, sizeof (*gn));
+ zio_buf_free(gn->gn_gbh, gn->gn_allocsize);
+ kmem_free(gn, sizeof (*gn) +
+ (gbh_nblkptrs(gn->gn_allocsize) * sizeof (gn)));
*gnpp = NULL;
}
@@ -2940,7 +2941,7 @@ zio_gang_tree_free(zio_gang_node_t **gnpp)
if (gn == NULL)
return;
- for (int g = 0; g < SPA_GBH_NBLKPTRS; g++)
+ for (int g = 0; g < gbh_nblkptrs(gn->gn_allocsize); g++)
zio_gang_tree_free(&gn->gn_child[g]);
zio_gang_node_free(gnpp);
@@ -2949,13 +2950,28 @@ zio_gang_tree_free(zio_gang_node_t **gnpp)
static void
zio_gang_tree_assemble(zio_t *gio, blkptr_t *bp, zio_gang_node_t **gnpp)
{
- zio_gang_node_t *gn = zio_gang_node_alloc(gnpp);
- abd_t *gbh_abd = abd_get_from_buf(gn->gn_gbh, SPA_GANGBLOCKSIZE);
+ uint64_t gangblocksize = UINT64_MAX;
+ if (spa_feature_is_active(gio->io_spa,
+ SPA_FEATURE_DYNAMIC_GANG_HEADER)) {
+ spa_config_enter(gio->io_spa, SCL_VDEV, FTAG, RW_READER);
+ for (int dva = 0; dva < BP_GET_NDVAS(bp); dva++) {
+ vdev_t *vd = vdev_lookup_top(gio->io_spa,
+ DVA_GET_VDEV(&bp->blk_dva[dva]));
+ uint64_t psize = vdev_gang_header_psize(vd);
+ gangblocksize = MIN(gangblocksize, psize);
+ }
+ spa_config_exit(gio->io_spa, SCL_VDEV, FTAG);
+ } else {
+ gangblocksize = SPA_OLD_GANGBLOCKSIZE;
+ }
+ ASSERT3U(gangblocksize, !=, UINT64_MAX);
+ zio_gang_node_t *gn = zio_gang_node_alloc(gnpp, gangblocksize);
+ abd_t *gbh_abd = abd_get_from_buf(gn->gn_gbh, gangblocksize);
ASSERT(gio->io_gang_leader == gio);
ASSERT(BP_IS_GANG(bp));
- zio_nowait(zio_read(gio, gio->io_spa, bp, gbh_abd, SPA_GANGBLOCKSIZE,
+ zio_nowait(zio_read(gio, gio->io_spa, bp, gbh_abd, gangblocksize,
zio_gang_tree_assemble_done, gn, gio->io_priority,
ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark));
}
@@ -2978,13 +2994,17 @@ zio_gang_tree_assemble_done(zio_t *zio)
byteswap_uint64_array(abd_to_buf(zio->io_abd), zio->io_size);
ASSERT3P(abd_to_buf(zio->io_abd), ==, gn->gn_gbh);
- ASSERT(zio->io_size == SPA_GANGBLOCKSIZE);
- ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC);
+ /*
+ * If this was an old-style gangblock, the gangblocksize should have
+ * been updated in zio_checksum_error to reflect that.
+ */
+ ASSERT3U(gbh_eck(gn->gn_gbh, gn->gn_gangblocksize)->zec_magic,
+ ==, ZEC_MAGIC);
abd_free(zio->io_abd);
- for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) {
- blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g];
+ for (int g = 0; g < gbh_nblkptrs(gn->gn_gangblocksize); g++) {
+ blkptr_t *gbp = gbh_bp(gn->gn_gbh, g);
if (!BP_IS_GANG(gbp))
continue;
zio_gang_tree_assemble(gio, gbp, &gn->gn_child[g]);
@@ -3009,10 +3029,11 @@ zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, abd_t *data,
zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data, offset);
if (gn != NULL) {
- ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC);
+ ASSERT3U(gbh_eck(gn->gn_gbh,
+ gn->gn_gangblocksize)->zec_magic, ==, ZEC_MAGIC);
- for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) {
- blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g];
+ for (int g = 0; g < gbh_nblkptrs(gn->gn_gangblocksize); g++) {
+ blkptr_t *gbp = gbh_bp(gn->gn_gbh, g);
if (BP_IS_HOLE(gbp))
continue;
zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data,
@@ -3119,6 +3140,13 @@ zio_write_gang_done(zio_t *zio)
abd_free(zio->io_abd);
}
+static void
+zio_update_feature(void *arg, dmu_tx_t *tx)
+{
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ spa_feature_incr(spa, (spa_feature_t)(uintptr_t)arg, tx);
+}
+
static zio_t *
zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
{
@@ -3157,20 +3185,24 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
ASSERT(ZIO_HAS_ALLOCATOR(pio));
int flags = METASLAB_GANG_HEADER;
- if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+ if (pio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(has_data);
flags |= METASLAB_ASYNC_ALLOC;
}
- error = metaslab_alloc(spa, mc, SPA_GANGBLOCKSIZE,
+ uint64_t gangblocksize = SPA_OLD_GANGBLOCKSIZE;
+ uint64_t candidate = gangblocksize;
+ error = metaslab_alloc_range(spa, mc, gangblocksize, gangblocksize,
bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, flags,
- &pio->io_alloc_list, pio->io_allocator, pio);
+ &pio->io_alloc_list, pio->io_allocator, pio, &candidate);
if (error) {
pio->io_error = error;
return (pio);
}
+ if (spa_feature_is_active(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER))
+ gangblocksize = candidate;
if (pio == gio) {
gnpp = &gio->io_gang_tree;
@@ -3179,23 +3211,24 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
ASSERT(pio->io_ready == zio_write_gang_member_ready);
}
- gn = zio_gang_node_alloc(gnpp);
+ gn = zio_gang_node_alloc(gnpp, gangblocksize);
gbh = gn->gn_gbh;
- memset(gbh, 0, SPA_GANGBLOCKSIZE);
- gbh_abd = abd_get_from_buf(gbh, SPA_GANGBLOCKSIZE);
+ memset(gbh, 0, gangblocksize);
+ gbh_abd = abd_get_from_buf(gbh, gangblocksize);
/*
* Create the gang header.
*/
- zio = zio_rewrite(pio, spa, txg, bp, gbh_abd, SPA_GANGBLOCKSIZE,
+ zio = zio_rewrite(pio, spa, txg, bp, gbh_abd, gangblocksize,
zio_write_gang_done, NULL, pio->io_priority,
ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
zio_gang_inherit_allocator(pio, zio);
- if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+ if (pio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
boolean_t more;
- VERIFY(metaslab_class_throttle_reserve(mc, gbh_copies,
- zio, B_TRUE, &more));
+ VERIFY(metaslab_class_throttle_reserve(mc, zio->io_allocator,
+ gbh_copies, zio->io_size, B_TRUE, &more));
+ zio->io_flags |= ZIO_FLAG_ALLOC_THROTTLED;
}
/*
@@ -3203,7 +3236,9 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
* opportunistic allocations. If that fails to generate enough
* space, we fall back to normal zio_write calls for nested gang.
*/
- for (int g = 0; resid != 0; g++) {
+ int g;
+ boolean_t any_failed = B_FALSE;
+ for (g = 0; resid != 0; g++) {
flags &= METASLAB_ASYNC_ALLOC;
flags |= METASLAB_GANG_CHILD;
zp.zp_checksum = gio->io_prop.zp_checksum;
@@ -3224,9 +3259,9 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
memset(zp.zp_mac, 0, ZIO_DATA_MAC_LEN);
uint64_t min_size = zio_roundup_alloc_size(spa,
- resid / (SPA_GBH_NBLKPTRS - g));
+ resid / (gbh_nblkptrs(gangblocksize) - g));
min_size = MIN(min_size, resid);
- bp = &gbh->zg_blkptr[g];
+ bp = &((blkptr_t *)gbh)[g];
zio_alloc_list_t cio_list;
metaslab_trace_init(&cio_list);
@@ -3236,6 +3271,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
flags, &cio_list, zio->io_allocator, NULL, &allocated_size);
boolean_t allocated = error == 0;
+ any_failed |= !allocated;
uint64_t psize = allocated ? MIN(resid, allocated_size) :
min_size;
@@ -3268,6 +3304,29 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
}
/*
+ * If we used more gang children than the old limit, we must already be
+ * using the new headers. No need to update anything, just move on.
+ *
+ * Otherwise, we might be in a case where we need to turn on the new
+ * feature, so we check that. We enable the new feature if we didn't
+ * manage to fit everything into 3 gang children and we could have
+ * written more than that.
+ */
+ if (g > gbh_nblkptrs(SPA_OLD_GANGBLOCKSIZE)) {
+ ASSERT(spa_feature_is_active(spa,
+ SPA_FEATURE_DYNAMIC_GANG_HEADER));
+ } else if (any_failed && candidate > SPA_OLD_GANGBLOCKSIZE &&
+ spa_feature_is_enabled(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER) &&
+ !spa_feature_is_active(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER)) {
+ dmu_tx_t *tx =
+ dmu_tx_create_assigned(spa->spa_dsl_pool, txg + 1);
+ dsl_sync_task_nowait(spa->spa_dsl_pool,
+ zio_update_feature,
+ (void *)SPA_FEATURE_DYNAMIC_GANG_HEADER, tx);
+ dmu_tx_commit(tx);
+ }
+
+ /*
* Set pio's pipeline to just wait for zio to finish.
*/
pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
@@ -3303,11 +3362,11 @@ zio_nop_write(zio_t *zio)
zio_prop_t *zp = &zio->io_prop;
ASSERT(BP_IS_HOLE(bp));
- ASSERT(BP_GET_LEVEL(bp) == 0);
+ ASSERT0(BP_GET_LEVEL(bp));
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
ASSERT(zp->zp_nopwrite);
ASSERT(!zp->zp_dedup);
- ASSERT(zio->io_bp_override == NULL);
+ ASSERT0P(zio->io_bp_override);
ASSERT(IO_IS_ALLOCATING(zio));
/*
@@ -3436,7 +3495,7 @@ zio_ddt_read_start(zio_t *zio)
ddt_univ_phys_t *ddp = dde->dde_phys;
blkptr_t blk;
- ASSERT(zio->io_vsd == NULL);
+ ASSERT0P(zio->io_vsd);
zio->io_vsd = dde;
if (v_self == DDT_PHYS_NONE)
@@ -3501,7 +3560,7 @@ zio_ddt_read_done(zio_t *zio)
zio->io_vsd = NULL;
}
- ASSERT(zio->io_vsd == NULL);
+ ASSERT0P(zio->io_vsd);
return (zio);
}
@@ -3836,7 +3895,7 @@ zio_ddt_write(zio_t *zio)
* block and leave.
*/
if (have_dvas == 0) {
- ASSERT(BP_GET_LOGICAL_BIRTH(bp) == txg);
+ ASSERT(BP_GET_BIRTH(bp) == txg);
ASSERT(BP_EQUAL(bp, zio->io_bp_override));
ddt_phys_extend(ddp, v, bp);
ddt_phys_addref(ddp, v);
@@ -3864,6 +3923,23 @@ zio_ddt_write(zio_t *zio)
* then we can just use them as-is.
*/
if (have_dvas >= need_dvas) {
+ /*
+ * For rewrite operations, try preserving the original
+ * logical birth time. If the result matches the
+ * original BP, this becomes a NOP.
+ */
+ if (zp->zp_rewrite) {
+ uint64_t orig_logical_birth =
+ BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig);
+ ddt_bp_fill(ddp, v, bp, orig_logical_birth);
+ if (BP_EQUAL(bp, &zio->io_bp_orig)) {
+ /* We can skip accounting. */
+ zio->io_flags |= ZIO_FLAG_NOPWRITE;
+ ddt_exit(ddt);
+ return (zio);
+ }
+ }
+
ddt_bp_fill(ddp, v, bp, txg);
ddt_phys_addref(ddp, v);
ddt_exit(ddt);
@@ -4078,9 +4154,11 @@ zio_io_to_allocate(metaslab_class_allocator_t *mca, boolean_t *more)
* reserve then we throttle.
*/
if (!metaslab_class_throttle_reserve(zio->io_metaslab_class,
- zio->io_prop.zp_copies, zio, B_FALSE, more)) {
+ zio->io_allocator, zio->io_prop.zp_copies, zio->io_size,
+ B_FALSE, more)) {
return (NULL);
}
+ zio->io_flags |= ZIO_FLAG_ALLOC_THROTTLED;
avl_remove(&mca->mca_tree, zio);
ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE);
@@ -4164,8 +4242,10 @@ zio_dva_allocate(zio_t *zio)
ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_GANG);
memcpy(zio->io_bp->blk_dva, zio->io_bp_orig.blk_dva,
3 * sizeof (dva_t));
- BP_SET_BIRTH(zio->io_bp, BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig),
- BP_GET_PHYSICAL_BIRTH(&zio->io_bp_orig));
+ BP_SET_LOGICAL_BIRTH(zio->io_bp,
+ BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig));
+ BP_SET_PHYSICAL_BIRTH(zio->io_bp,
+ BP_GET_RAW_PHYSICAL_BIRTH(&zio->io_bp_orig));
return (zio);
}
@@ -4236,13 +4316,14 @@ again:
* If we are holding old class reservation, drop it.
* Dispatch the next ZIO(s) there if some are waiting.
*/
- if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+ if (zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
if (metaslab_class_throttle_unreserve(mc,
- zio->io_prop.zp_copies, zio)) {
+ zio->io_allocator, zio->io_prop.zp_copies,
+ zio->io_size)) {
zio_allocate_dispatch(zio->io_metaslab_class,
zio->io_allocator);
}
- zio->io_flags &= ~ZIO_FLAG_IO_ALLOCATING;
+ zio->io_flags &= ~ZIO_FLAG_ALLOC_THROTTLED;
}
if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC) {
@@ -4291,6 +4372,15 @@ again:
error);
}
zio->io_error = error;
+ } else if (zio->io_prop.zp_rewrite) {
+ /*
+ * For rewrite operations, preserve the logical birth time
+ * but set the physical birth time to the current txg.
+ */
+ uint64_t logical_birth = BP_GET_LOGICAL_BIRTH(&zio->io_bp_orig);
+ ASSERT3U(logical_birth, <=, zio->io_txg);
+ BP_SET_BIRTH(zio->io_bp, logical_birth, zio->io_txg);
+ BP_SET_REWRITE(zio->io_bp, 1);
}
return (zio);
@@ -4324,18 +4414,17 @@ zio_dva_claim(zio_t *zio)
static void
zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
{
- ASSERT(BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg || BP_IS_HOLE(bp));
- ASSERT(zio->io_bp_override == NULL);
+ ASSERT(BP_GET_BIRTH(bp) == zio->io_txg || BP_IS_HOLE(bp));
+ ASSERT0P(zio->io_bp_override);
if (!BP_IS_HOLE(bp)) {
- metaslab_free(zio->io_spa, bp, BP_GET_LOGICAL_BIRTH(bp),
- B_TRUE);
+ metaslab_free(zio->io_spa, bp, BP_GET_BIRTH(bp), B_TRUE);
}
if (gn != NULL) {
- for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) {
+ for (int g = 0; g < gbh_nblkptrs(gn->gn_gangblocksize); g++) {
zio_dva_unallocate(zio, gn->gn_child[g],
- &gn->gn_gbh->zg_blkptr[g]);
+ gbh_bp(gn->gn_gbh, g));
}
}
}
@@ -4347,7 +4436,7 @@ int
zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
uint64_t size, boolean_t *slog)
{
- int error = 1;
+ int error;
zio_alloc_list_t io_alloc_list;
ASSERT(txg > spa_syncing_txg(spa));
@@ -4372,14 +4461,34 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
int allocator = (uint_t)cityhash1(os->os_dsl_dataset->ds_object)
% spa->spa_alloc_count;
ZIOSTAT_BUMP(ziostat_total_allocations);
+
+ /* Try log class (dedicated slog devices) first */
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
txg, NULL, flags, &io_alloc_list, allocator, NULL);
*slog = (error == 0);
+
+ /* Try special_embedded_log class (reserved on special vdevs) */
+ if (error != 0) {
+ error = metaslab_alloc(spa, spa_special_embedded_log_class(spa),
+ size, new_bp, 1, txg, NULL, flags, &io_alloc_list,
+ allocator, NULL);
+ }
+
+ /* Try special class (general special vdev allocation) */
+ if (error != 0) {
+ error = metaslab_alloc(spa, spa_special_class(spa), size,
+ new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator,
+ NULL);
+ }
+
+ /* Try embedded_log class (reserved on normal vdevs) */
if (error != 0) {
error = metaslab_alloc(spa, spa_embedded_log_class(spa), size,
new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator,
NULL);
}
+
+ /* Finally fall back to normal class */
if (error != 0) {
ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks);
error = metaslab_alloc(spa, spa_normal_class(spa), size,
@@ -4450,8 +4559,8 @@ zio_vdev_io_start(zio_t *zio)
zio->io_delay = 0;
- ASSERT(zio->io_error == 0);
- ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0);
+ ASSERT0(zio->io_error);
+ ASSERT0(zio->io_child_error[ZIO_CHILD_VDEV]);
if (vd == NULL) {
if (!(zio->io_flags & ZIO_FLAG_CONFIG_WRITER))
@@ -4642,7 +4751,7 @@ zio_vdev_io_done(zio_t *zio)
ops->vdev_op_io_done(zio);
if (unexpected_error && vd->vdev_remove_wanted == B_FALSE)
- VERIFY(vdev_probe(vd, zio) == NULL);
+ VERIFY0P(vdev_probe(vd, zio));
return (zio);
}
@@ -4722,7 +4831,7 @@ zio_vdev_io_assess(zio_t *zio)
* If a Direct I/O operation has a checksum verify error then this I/O
* should not attempt to be issued again.
*/
- if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) {
+ if (zio->io_post & ZIO_POST_DIO_CHKSUM_ERR) {
if (zio->io_type == ZIO_TYPE_WRITE) {
ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL);
ASSERT3U(zio->io_error, ==, EIO);
@@ -4794,7 +4903,7 @@ void
zio_vdev_io_reissue(zio_t *zio)
{
ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START);
- ASSERT(zio->io_error == 0);
+ ASSERT0(zio->io_error);
zio->io_stage >>= 1;
}
@@ -4811,7 +4920,7 @@ void
zio_vdev_io_bypass(zio_t *zio)
{
ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START);
- ASSERT(zio->io_error == 0);
+ ASSERT0(zio->io_error);
zio->io_flags |= ZIO_FLAG_IO_BYPASS;
zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1;
@@ -5031,7 +5140,7 @@ zio_checksum_verify(zio_t *zio)
ASSERT3U(zio->io_prop.zp_checksum, ==, ZIO_CHECKSUM_LABEL);
}
- ASSERT0(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
+ ASSERT0(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR);
IMPLY(zio->io_flags & ZIO_FLAG_DIO_READ,
!(zio->io_flags & ZIO_FLAG_SPECULATIVE));
@@ -5040,7 +5149,7 @@ zio_checksum_verify(zio_t *zio)
if (error == ECKSUM &&
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
if (zio->io_flags & ZIO_FLAG_DIO_READ) {
- zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+ zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR;
zio_t *pio = zio_unique_parent(zio);
/*
* Any Direct I/O read that has a checksum
@@ -5090,7 +5199,7 @@ zio_dio_checksum_verify(zio_t *zio)
if ((error = zio_checksum_error(zio, NULL)) != 0) {
zio->io_error = error;
if (error == ECKSUM) {
- zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
+ zio->io_post |= ZIO_POST_DIO_CHKSUM_ERR;
zio_dio_chksum_verify_error_report(zio);
}
}
@@ -5115,7 +5224,7 @@ zio_checksum_verified(zio_t *zio)
void
zio_dio_chksum_verify_error_report(zio_t *zio)
{
- ASSERT(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
+ ASSERT(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR);
if (zio->io_child_type == ZIO_CHILD_LOGICAL)
return;
@@ -5187,9 +5296,9 @@ zio_ready(zio_t *zio)
if (zio->io_ready) {
ASSERT(IO_IS_ALLOCATING(zio));
- ASSERT(BP_GET_LOGICAL_BIRTH(bp) == zio->io_txg ||
+ ASSERT(BP_GET_BIRTH(bp) == zio->io_txg ||
BP_IS_HOLE(bp) || (zio->io_flags & ZIO_FLAG_NOPWRITE));
- ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0);
+ ASSERT0(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY]);
zio->io_ready(zio);
}
@@ -5202,7 +5311,7 @@ zio_ready(zio_t *zio)
if (zio->io_error != 0) {
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
- if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+ if (zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
ASSERT(IO_IS_ALLOCATING(zio));
ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(zio->io_metaslab_class != NULL);
@@ -5213,8 +5322,8 @@ zio_ready(zio_t *zio)
* issue the next I/O to allocate.
*/
if (metaslab_class_throttle_unreserve(
- zio->io_metaslab_class, zio->io_prop.zp_copies,
- zio)) {
+ zio->io_metaslab_class, zio->io_allocator,
+ zio->io_prop.zp_copies, zio->io_size)) {
zio_allocate_dispatch(zio->io_metaslab_class,
zio->io_allocator);
}
@@ -5264,6 +5373,7 @@ zio_dva_throttle_done(zio_t *zio)
vdev_t *vd = zio->io_vd;
int flags = METASLAB_ASYNC_ALLOC;
const void *tag = pio;
+ uint64_t size = pio->io_size;
ASSERT3P(zio->io_bp, !=, NULL);
ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
@@ -5273,16 +5383,19 @@ zio_dva_throttle_done(zio_t *zio)
ASSERT3P(vd, ==, vd->vdev_top);
ASSERT(zio_injection_enabled || !(zio->io_flags & ZIO_FLAG_IO_RETRY));
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
- ASSERT(zio->io_flags & ZIO_FLAG_IO_ALLOCATING);
+ ASSERT(zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED);
/*
* Parents of gang children can have two flavors -- ones that allocated
* the gang header (will have ZIO_FLAG_IO_REWRITE set) and ones that
* allocated the constituent blocks. The first use their parent as tag.
+ * We set the size to match the original allocation call for that case.
*/
if (pio->io_child_type == ZIO_CHILD_GANG &&
- (pio->io_flags & ZIO_FLAG_IO_REWRITE))
+ (pio->io_flags & ZIO_FLAG_IO_REWRITE)) {
tag = zio_unique_parent(pio);
+ size = SPA_OLD_GANGBLOCKSIZE;
+ }
ASSERT(IO_IS_ALLOCATING(pio) || (pio->io_child_type == ZIO_CHILD_GANG &&
(pio->io_flags & ZIO_FLAG_IO_REWRITE)));
@@ -5295,9 +5408,10 @@ zio_dva_throttle_done(zio_t *zio)
ASSERT(zio->io_metaslab_class->mc_alloc_throttle_enabled);
metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id,
- pio->io_allocator, flags, pio->io_size, tag);
+ pio->io_allocator, flags, size, tag);
- if (metaslab_class_throttle_unreserve(zio->io_metaslab_class, 1, pio)) {
+ if (metaslab_class_throttle_unreserve(pio->io_metaslab_class,
+ pio->io_allocator, 1, pio->io_size)) {
zio_allocate_dispatch(zio->io_metaslab_class,
pio->io_allocator);
}
@@ -5328,17 +5442,15 @@ zio_done(zio_t *zio)
* write. We must do this since the allocation is performed
* by the logical I/O but the actual write is done by child I/Os.
*/
- if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING &&
+ if (zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED &&
zio->io_child_type == ZIO_CHILD_VDEV)
zio_dva_throttle_done(zio);
for (int c = 0; c < ZIO_CHILD_TYPES; c++)
for (int w = 0; w < ZIO_WAIT_TYPES; w++)
- ASSERT(zio->io_children[c][w] == 0);
+ ASSERT0(zio->io_children[c][w]);
if (zio->io_bp != NULL && !BP_IS_EMBEDDED(zio->io_bp)) {
- ASSERT(zio->io_bp->blk_pad[0] == 0);
- ASSERT(zio->io_bp->blk_pad[1] == 0);
ASSERT(memcmp(zio->io_bp, &zio->io_bp_copy,
sizeof (blkptr_t)) == 0 ||
(zio->io_bp == zio_unique_parent(zio)->io_bp));
@@ -5431,7 +5543,7 @@ zio_done(zio_t *zio)
*/
if (zio->io_error != ECKSUM && zio->io_vd != NULL &&
!vdev_is_dead(zio->io_vd) &&
- !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) {
+ !(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)) {
int ret = zfs_ereport_post(FM_EREPORT_ZFS_IO,
zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0);
if (ret != EALREADY) {
@@ -5446,14 +5558,14 @@ zio_done(zio_t *zio)
if ((zio->io_error == EIO || !(zio->io_flags &
(ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) &&
- !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) &&
+ !(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR) &&
zio == zio->io_logical) {
/*
* For logical I/O requests, tell the SPA to log the
* error and generate a logical data ereport.
*/
spa_log_error(zio->io_spa, &zio->io_bookmark,
- BP_GET_LOGICAL_BIRTH(zio->io_bp));
+ BP_GET_PHYSICAL_BIRTH(zio->io_bp));
(void) zfs_ereport_post(FM_EREPORT_ZFS_DATA,
zio->io_spa, NULL, &zio->io_bookmark, zio, 0);
}
@@ -5467,7 +5579,7 @@ zio_done(zio_t *zio)
*/
if (zio->io_error == EAGAIN && IO_IS_ALLOCATING(zio) &&
zio->io_prop.zp_dedup) {
- zio->io_reexecute |= ZIO_REEXECUTE_NOW;
+ zio->io_post |= ZIO_POST_REEXECUTE;
zio->io_prop.zp_dedup = B_FALSE;
}
/*
@@ -5479,11 +5591,11 @@ zio_done(zio_t *zio)
if (IO_IS_ALLOCATING(zio) &&
!(zio->io_flags & ZIO_FLAG_CANFAIL) &&
- !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) {
+ !(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR)) {
if (zio->io_error != ENOSPC)
- zio->io_reexecute |= ZIO_REEXECUTE_NOW;
+ zio->io_post |= ZIO_POST_REEXECUTE;
else
- zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;
+ zio->io_post |= ZIO_POST_SUSPEND;
}
if ((zio->io_type == ZIO_TYPE_READ ||
@@ -5492,10 +5604,11 @@ zio_done(zio_t *zio)
zio->io_error == ENXIO &&
spa_load_state(zio->io_spa) == SPA_LOAD_NONE &&
spa_get_failmode(zio->io_spa) != ZIO_FAILURE_MODE_CONTINUE)
- zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;
+ zio->io_post |= ZIO_POST_SUSPEND;
- if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && !zio->io_reexecute)
- zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;
+ if (!(zio->io_flags & ZIO_FLAG_CANFAIL) &&
+ !(zio->io_post & (ZIO_POST_REEXECUTE|ZIO_POST_SUSPEND)))
+ zio->io_post |= ZIO_POST_SUSPEND;
/*
* Here is a possibly good place to attempt to do
@@ -5514,7 +5627,8 @@ zio_done(zio_t *zio)
*/
zio_inherit_child_errors(zio, ZIO_CHILD_LOGICAL);
- if ((zio->io_error || zio->io_reexecute) &&
+ if ((zio->io_error ||
+ (zio->io_post & (ZIO_POST_REEXECUTE|ZIO_POST_SUSPEND))) &&
IO_IS_ALLOCATING(zio) && zio->io_gang_leader == zio &&
!(zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)))
zio_dva_unallocate(zio, zio->io_gang_tree, zio->io_bp);
@@ -5525,16 +5639,16 @@ zio_done(zio_t *zio)
* Godfather I/Os should never suspend.
*/
if ((zio->io_flags & ZIO_FLAG_GODFATHER) &&
- (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND))
- zio->io_reexecute &= ~ZIO_REEXECUTE_SUSPEND;
+ (zio->io_post & ZIO_POST_SUSPEND))
+ zio->io_post &= ~ZIO_POST_SUSPEND;
- if (zio->io_reexecute) {
+ if (zio->io_post & (ZIO_POST_REEXECUTE|ZIO_POST_SUSPEND)) {
/*
* A Direct I/O operation that has a checksum verify error
* should not attempt to reexecute. Instead, the error should
* just be propagated back.
*/
- ASSERT(!(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR));
+ ASSERT0(zio->io_post & ZIO_POST_DIO_CHKSUM_ERR);
/*
* This is a logical I/O that wants to reexecute.
@@ -5571,7 +5685,7 @@ zio_done(zio_t *zio)
pio_next = zio_walk_parents(zio, &zl);
if ((pio->io_flags & ZIO_FLAG_GODFATHER) &&
- (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) {
+ (zio->io_post & ZIO_POST_SUSPEND)) {
zio_remove_child(pio, zio, remove_zl);
/*
* This is a rare code path, so we don't
@@ -5595,13 +5709,14 @@ zio_done(zio_t *zio)
* "next_to_execute".
*/
zio_notify_parent(pio, zio, ZIO_WAIT_DONE, NULL);
- } else if (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND) {
+ } else if (zio->io_post & ZIO_POST_SUSPEND) {
/*
* We'd fail again if we reexecuted now, so suspend
* until conditions improve (e.g. device comes online).
*/
zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR);
} else {
+ ASSERT(zio->io_post & ZIO_POST_REEXECUTE);
/*
* Reexecution is potentially a huge amount of work.
* Hand it off to the otherwise-unused claim taskq.
@@ -5614,7 +5729,8 @@ zio_done(zio_t *zio)
}
ASSERT(list_is_empty(&zio->io_child_list));
- ASSERT(zio->io_reexecute == 0);
+ ASSERT0(zio->io_post & ZIO_POST_REEXECUTE);
+ ASSERT0(zio->io_post & ZIO_POST_SUSPEND);
ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL));
/*
diff --git a/sys/contrib/openzfs/module/zfs/zio_checksum.c b/sys/contrib/openzfs/module/zfs/zio_checksum.c
index a91775b04af2..1d0646a61185 100644
--- a/sys/contrib/openzfs/module/zfs/zio_checksum.c
+++ b/sys/contrib/openzfs/module/zfs/zio_checksum.c
@@ -215,7 +215,7 @@ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
spa_feature_t
zio_checksum_to_feature(enum zio_checksum cksum)
{
- VERIFY((cksum & ~ZIO_CHECKSUM_MASK) == 0);
+ VERIFY0((cksum & ~ZIO_CHECKSUM_MASK));
switch (cksum) {
case ZIO_CHECKSUM_BLAKE3:
@@ -279,7 +279,7 @@ static void
zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp)
{
const dva_t *dva = BP_IDENTITY(bp);
- uint64_t txg = BP_GET_BIRTH(bp);
+ uint64_t txg = BP_GET_PHYSICAL_BIRTH(bp);
ASSERT(BP_IS_GANG(bp));
@@ -545,14 +545,39 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum :
(BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
int error;
- uint64_t size = (bp == NULL ? zio->io_size :
- (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp)));
+ uint64_t size = bp ? BP_GET_PSIZE(bp) : zio->io_size;
uint64_t offset = zio->io_offset;
abd_t *data = zio->io_abd;
spa_t *spa = zio->io_spa;
+ if (bp && BP_IS_GANG(bp)) {
+ if (spa_feature_is_active(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER))
+ size = zio->io_size;
+ else
+ size = SPA_OLD_GANGBLOCKSIZE;
+ }
+
error = zio_checksum_error_impl(spa, bp, checksum, data, size,
offset, info);
+ if (error && bp && BP_IS_GANG(bp) && size > SPA_OLD_GANGBLOCKSIZE) {
+ /*
+ * It's possible that this is an old gang block. Rerun
+ * the checksum with the old size; if that passes, then
+ * update the gangblocksize appropriately.
+ */
+ error = zio_checksum_error_impl(spa, bp, checksum, data,
+ SPA_OLD_GANGBLOCKSIZE, offset, info);
+ if (error == 0) {
+ ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);
+ zio_t *pio;
+ for (pio = zio_unique_parent(zio);
+ pio->io_child_type != ZIO_CHILD_GANG;
+ pio = zio_unique_parent(pio))
+ ;
+ zio_gang_node_t *gn = pio->io_private;
+ gn->gn_gangblocksize = SPA_OLD_GANGBLOCKSIZE;
+ }
+ }
if (zio_injection_enabled && error == 0 && zio->io_error == 0) {
error = zio_handle_fault_injection(zio, ECKSUM);
diff --git a/sys/contrib/openzfs/module/zfs/zio_compress.c b/sys/contrib/openzfs/module/zfs/zio_compress.c
index 9f0ac1b63146..89ceeb58ad91 100644
--- a/sys/contrib/openzfs/module/zfs/zio_compress.c
+++ b/sys/contrib/openzfs/module/zfs/zio_compress.c
@@ -38,12 +38,6 @@
#include <sys/zstd/zstd.h>
/*
- * If nonzero, every 1/X decompression attempts will fail, simulating
- * an undetected memory error.
- */
-static unsigned long zio_decompress_fail_fraction = 0;
-
-/*
* Compression vectors.
*/
zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
@@ -171,15 +165,6 @@ zio_decompress_data(enum zio_compress c, abd_t *src, abd_t *dst,
else
err = ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level);
- /*
- * Decompression shouldn't fail, because we've already verified
- * the checksum. However, for extra protection (e.g. against bitflips
- * in non-ECC RAM), we handle this error (and test it).
- */
- if (zio_decompress_fail_fraction != 0 &&
- random_in_range(zio_decompress_fail_fraction) == 0)
- err = SET_ERROR(EINVAL);
-
return (err);
}
diff --git a/sys/contrib/openzfs/module/zfs/zio_inject.c b/sys/contrib/openzfs/module/zfs/zio_inject.c
index df7b01ba879e..981a1be4847c 100644
--- a/sys/contrib/openzfs/module/zfs/zio_inject.c
+++ b/sys/contrib/openzfs/module/zfs/zio_inject.c
@@ -1119,7 +1119,7 @@ zio_clear_fault(int id)
kmem_free(handler->zi_lanes, sizeof (*handler->zi_lanes) *
handler->zi_record.zi_nlanes);
} else {
- ASSERT3P(handler->zi_lanes, ==, NULL);
+ ASSERT0P(handler->zi_lanes);
}
if (handler->zi_spa_name != NULL)
diff --git a/sys/contrib/openzfs/module/zfs/zrlock.c b/sys/contrib/openzfs/module/zfs/zrlock.c
index 3c0f1b7bbbc1..09c110945c97 100644
--- a/sys/contrib/openzfs/module/zfs/zrlock.c
+++ b/sys/contrib/openzfs/module/zfs/zrlock.c
@@ -129,7 +129,7 @@ zrl_tryenter(zrlock_t *zrl)
(uint32_t *)&zrl->zr_refcount, 0, ZRL_LOCKED);
if (cas == 0) {
#ifdef ZFS_DEBUG
- ASSERT3P(zrl->zr_owner, ==, NULL);
+ ASSERT0P(zrl->zr_owner);
zrl->zr_owner = curthread;
#endif
return (1);
diff --git a/sys/contrib/openzfs/module/zfs/zthr.c b/sys/contrib/openzfs/module/zfs/zthr.c
index 597a510528ea..d245ce4946e0 100644
--- a/sys/contrib/openzfs/module/zfs/zthr.c
+++ b/sys/contrib/openzfs/module/zfs/zthr.c
@@ -316,7 +316,7 @@ zthr_destroy(zthr_t *t)
{
ASSERT(!MUTEX_HELD(&t->zthr_state_lock));
ASSERT(!MUTEX_HELD(&t->zthr_request_lock));
- VERIFY3P(t->zthr_thread, ==, NULL);
+ VERIFY0P(t->zthr_thread);
mutex_destroy(&t->zthr_request_lock);
mutex_destroy(&t->zthr_state_lock);
cv_destroy(&t->zthr_cv);
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index 3568d4f43fcb..29f51e230a37 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -102,6 +102,7 @@ extern int zfs_bclone_wait_dirty;
zv_taskq_t zvol_taskqs;
typedef enum {
+ ZVOL_ASYNC_CREATE_MINORS,
ZVOL_ASYNC_REMOVE_MINORS,
ZVOL_ASYNC_RENAME_MINORS,
ZVOL_ASYNC_SET_SNAPDEV,
@@ -110,10 +111,14 @@ typedef enum {
} zvol_async_op_t;
typedef struct {
- zvol_async_op_t op;
- char name1[MAXNAMELEN];
- char name2[MAXNAMELEN];
- uint64_t value;
+ zvol_async_op_t zt_op;
+ char zt_name1[MAXNAMELEN];
+ char zt_name2[MAXNAMELEN];
+ uint64_t zt_value;
+ uint32_t zt_total;
+ uint32_t zt_done;
+ int32_t zt_status;
+ int zt_error;
} zvol_task_t;
zv_request_task_t *
@@ -210,8 +215,8 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
int error;
uint64_t volblocksize, volsize;
- VERIFY(nvlist_lookup_uint64(nvprops,
- zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
+ VERIFY0(nvlist_lookup_uint64(nvprops,
+ zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize));
if (nvlist_lookup_uint64(nvprops,
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
@@ -220,21 +225,20 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
* These properties must be removed from the list so the generic
* property setting step won't apply to them.
*/
- VERIFY(nvlist_remove_all(nvprops,
- zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
+ VERIFY0(nvlist_remove_all(nvprops, zfs_prop_to_name(ZFS_PROP_VOLSIZE)));
(void) nvlist_remove_all(nvprops,
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
DMU_OT_NONE, 0, tx);
- ASSERT(error == 0);
+ ASSERT0(error);
error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
DMU_OT_NONE, 0, tx);
- ASSERT(error == 0);
+ ASSERT0(error);
error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
- ASSERT(error == 0);
+ ASSERT0(error);
}
/*
@@ -249,7 +253,7 @@ zvol_get_stats(objset_t *os, nvlist_t *nv)
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
if (error)
- return (SET_ERROR(error));
+ return (error);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
@@ -262,7 +266,7 @@ zvol_get_stats(objset_t *os, nvlist_t *nv)
kmem_free(doi, sizeof (dmu_object_info_t));
- return (SET_ERROR(error));
+ return (error);
}
/*
@@ -300,7 +304,7 @@ zvol_update_volsize(uint64_t volsize, objset_t *os)
error = dmu_tx_assign(tx, DMU_TX_WAIT);
if (error) {
dmu_tx_abort(tx);
- return (SET_ERROR(error));
+ return (error);
}
txg = dmu_tx_get_txg(tx);
@@ -332,7 +336,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
error = dsl_prop_get_integer(name,
zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL);
if (error != 0)
- return (SET_ERROR(error));
+ return (error);
if (readonly)
return (SET_ERROR(EROFS));
@@ -348,7 +352,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
FTAG, &os)) != 0) {
if (zv != NULL)
mutex_exit(&zv->zv_state_lock);
- return (SET_ERROR(error));
+ return (error);
}
owned = B_TRUE;
if (zv != NULL)
@@ -385,7 +389,7 @@ out:
if (error == 0 && zv != NULL)
zvol_os_update_volsize(zv, volsize);
- return (SET_ERROR(error));
+ return (error);
}
/*
@@ -396,7 +400,7 @@ zvol_set_volthreading(const char *name, boolean_t value)
{
zvol_state_t *zv = zvol_find_by_name(name, RW_NONE);
if (zv == NULL)
- return (ENOENT);
+ return (SET_ERROR(ENOENT));
zv->zv_threading = value;
mutex_exit(&zv->zv_state_lock);
return (0);
@@ -445,8 +449,10 @@ zvol_check_volblocksize(const char *name, uint64_t volblocksize)
* We don't allow setting the property above 1MB,
* unless the tunable has been changed.
*/
- if (volblocksize > zfs_max_recordsize)
+ if (volblocksize > zfs_max_recordsize) {
+ spa_close(spa, FTAG);
return (SET_ERROR(EDOM));
+ }
spa_close(spa, FTAG);
}
@@ -613,7 +619,7 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
dmu_tx_t *tx;
blkptr_t *bps;
size_t maxblocks;
- int error = EINVAL;
+ int error = 0;
rw_enter(&zv_dst->zv_suspend_lock, RW_READER);
if (zv_dst->zv_zilog == NULL) {
@@ -639,23 +645,22 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
*/
if (!spa_feature_is_enabled(dmu_objset_spa(outos),
SPA_FEATURE_BLOCK_CLONING)) {
- error = EOPNOTSUPP;
+ error = SET_ERROR(EOPNOTSUPP);
goto out;
}
if (dmu_objset_spa(inos) != dmu_objset_spa(outos)) {
- error = EXDEV;
+ error = SET_ERROR(EXDEV);
goto out;
}
if (inos->os_encrypted != outos->os_encrypted) {
- error = EXDEV;
+ error = SET_ERROR(EXDEV);
goto out;
}
if (zv_src->zv_volblocksize != zv_dst->zv_volblocksize) {
- error = EINVAL;
+ error = SET_ERROR(EINVAL);
goto out;
}
if (inoff >= zv_src->zv_volsize || outoff >= zv_dst->zv_volsize) {
- error = 0;
goto out;
}
@@ -666,17 +671,15 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
len = zv_src->zv_volsize - inoff;
if (len > zv_dst->zv_volsize - outoff)
len = zv_dst->zv_volsize - outoff;
- if (len == 0) {
- error = 0;
+ if (len == 0)
goto out;
- }
/*
* No overlapping if we are cloning within the same file
*/
if (zv_src == zv_dst) {
if (inoff < outoff + len && outoff < inoff + len) {
- error = EINVAL;
+ error = SET_ERROR(EINVAL);
goto out;
}
}
@@ -686,7 +689,7 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
*/
if ((inoff % zv_src->zv_volblocksize) != 0 ||
(outoff % zv_dst->zv_volblocksize) != 0) {
- error = EINVAL;
+ error = SET_ERROR(EINVAL);
goto out;
}
@@ -694,7 +697,7 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
* Length must be multiple of block size
*/
if ((len % zv_src->zv_volblocksize) != 0) {
- error = EINVAL;
+ error = SET_ERROR(EINVAL);
goto out;
}
@@ -766,13 +769,13 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
zfs_rangelock_exit(outlr);
zfs_rangelock_exit(inlr);
if (error == 0 && zv_dst->zv_objset->os_sync == ZFS_SYNC_ALWAYS) {
- zil_commit(zilog_dst, ZVOL_OBJ);
+ error = zil_commit(zilog_dst, ZVOL_OBJ);
}
out:
if (zv_src != zv_dst)
rw_exit(&zv_src->zv_suspend_lock);
rw_exit(&zv_dst->zv_suspend_lock);
- return (SET_ERROR(error));
+ return (error);
}
/*
@@ -859,13 +862,8 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = {
};
/*
- * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
- *
- * We store data in the log buffers if it's small enough.
- * Otherwise we will later flush the data out via dmu_sync().
+ * zvol_log_write() handles TX_WRITE transactions.
*/
-static const ssize_t zvol_immediate_write_sz = 32768;
-
void
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint64_t size, boolean_t commit)
@@ -878,15 +876,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
if (zil_replaying(zilog, tx))
return;
- if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
- write_state = WR_INDIRECT;
- else if (!spa_has_slogs(zilog->zl_spa) &&
- size >= blocksize && blocksize > zvol_immediate_write_sz)
- write_state = WR_INDIRECT;
- else if (commit)
- write_state = WR_COPIED;
- else
- write_state = WR_NEED_COPY;
+ write_state = zil_write_state(zilog, size, blocksize, B_FALSE, commit);
while (size) {
itx_t *itx;
@@ -905,7 +895,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
if (wr_state == WR_COPIED &&
dmu_read_by_dnode(zv->zv_dn, offset, len, lr + 1,
DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING) != 0) {
- zil_itx_destroy(itx);
+ zil_itx_destroy(itx, 0);
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
wr_state = WR_NEED_COPY;
@@ -924,7 +914,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
itx->itx_private = zv;
- (void) zil_itx_assign(zilog, itx, tx);
+ zil_itx_assign(zilog, itx, tx);
offset += len;
size -= len;
@@ -1034,7 +1024,7 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
zvol_get_done(zgd, error);
- return (SET_ERROR(error));
+ return (error);
}
/*
@@ -1079,15 +1069,15 @@ zvol_setup_zv(zvol_state_t *zv)
error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
if (error)
- return (SET_ERROR(error));
+ return (error);
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
if (error)
- return (SET_ERROR(error));
+ return (error);
error = dnode_hold(os, ZVOL_OBJ, zv, &zv->zv_dn);
if (error)
- return (SET_ERROR(error));
+ return (error);
zvol_os_set_capacity(zv, volsize >> 9);
zv->zv_volsize = volsize;
@@ -1129,7 +1119,7 @@ zvol_shutdown_zv(zvol_state_t *zv)
*/
if (zv->zv_flags & ZVOL_WRITTEN_TO)
txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
- (void) dmu_objset_evict_dbufs(zv->zv_objset);
+ dmu_objset_evict_dbufs(zv->zv_objset);
}
/*
@@ -1206,7 +1196,7 @@ zvol_resume(zvol_state_t *zv)
if (zv->zv_flags & ZVOL_REMOVING)
cv_broadcast(&zv->zv_removing_cv);
- return (SET_ERROR(error));
+ return (error);
}
int
@@ -1222,7 +1212,7 @@ zvol_first_open(zvol_state_t *zv, boolean_t readonly)
boolean_t ro = (readonly || (strchr(zv->zv_name, '@') != NULL));
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, ro, B_TRUE, zv, &os);
if (error)
- return (SET_ERROR(error));
+ return (error);
zv->zv_objset = os;
@@ -1434,6 +1424,48 @@ zvol_create_minors_cb(const char *dsname, void *arg)
return (0);
}
+static void
+zvol_task_update_status(zvol_task_t *task, uint64_t total, uint64_t done,
+ int error)
+{
+
+ task->zt_total += total;
+ task->zt_done += done;
+ if (task->zt_total != task->zt_done) {
+ task->zt_status = -1;
+ if (error)
+ task->zt_error = error;
+ }
+}
+
+static void
+zvol_task_report_status(zvol_task_t *task)
+{
+#ifdef ZFS_DEBUG
+ static const char *const msg[] = {
+ "create",
+ "remove",
+ "rename",
+ "set snapdev",
+ "set volmode",
+ "unknown",
+ };
+
+ if (task->zt_status == 0)
+ return;
+
+ zvol_async_op_t op = MIN(task->zt_op, ZVOL_ASYNC_MAX);
+ if (task->zt_error) {
+ dprintf("The %s minors zvol task was not ok, last error %d\n",
+ msg[op], task->zt_error);
+ } else {
+ dprintf("The %s minors zvol task was not ok\n", msg[op]);
+ }
+#else
+ (void) task;
+#endif
+}
+
/*
* Create minors for the specified dataset, including children and snapshots.
* Pay attention to the 'snapdev' property and iterate over the snapshots
@@ -1451,14 +1483,27 @@ zvol_create_minors_cb(const char *dsname, void *arg)
* 'visible' (which also verifies that the parent is a zvol), and if so,
* a minor node for that snapshot is created.
*/
-void
-zvol_create_minors_recursive(const char *name)
+static void
+zvol_create_minors_impl(zvol_task_t *task)
{
+ const char *name = task->zt_name1;
list_t minors_list;
minors_job_t *job;
+ uint64_t snapdev;
+ int total = 0, done = 0, last_error, error;
- if (zvol_inhibit_dev)
+ /*
+ * Note: the dsl_pool_config_lock must not be held.
+ * Minor node creation needs to obtain the zvol_state_lock.
+ * zvol_open() obtains the zvol_state_lock and then the dsl pool
+ * config lock. Therefore, we can't have the config lock now if
+ * we are going to wait for the zvol_state_lock, because it
+ * would be a lock order inversion which could lead to deadlock.
+ */
+
+ if (zvol_inhibit_dev) {
return;
+ }
/*
* This is the list for prefetch jobs. Whenever we found a match
@@ -1474,13 +1519,16 @@ zvol_create_minors_recursive(const char *name)
if (strchr(name, '@') != NULL) {
- uint64_t snapdev;
-
- int error = dsl_prop_get_integer(name, "snapdev",
- &snapdev, NULL);
-
- if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE)
- (void) zvol_os_create_minor(name);
+ error = dsl_prop_get_integer(name, "snapdev", &snapdev, NULL);
+ if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE) {
+ error = zvol_os_create_minor(name);
+ if (error == 0) {
+ done++;
+ } else {
+ last_error = error;
+ }
+ total++;
+ }
} else {
fstrans_cookie_t cookie = spl_fstrans_mark();
(void) dmu_objset_find(name, zvol_create_minors_cb,
@@ -1495,41 +1543,30 @@ zvol_create_minors_recursive(const char *name)
* sequentially.
*/
while ((job = list_remove_head(&minors_list)) != NULL) {
- if (!job->error)
- (void) zvol_os_create_minor(job->name);
+ if (!job->error) {
+ error = zvol_os_create_minor(job->name);
+ if (error == 0) {
+ done++;
+ } else {
+ last_error = error;
+ }
+ } else if (job->error == EINVAL) {
+ /*
+ * The objset, with the name requested by current job
+ * exist, but have the type different from zvol.
+ * Just ignore this sort of errors.
+ */
+ done++;
+ } else {
+ last_error = job->error;
+ }
+ total++;
kmem_strfree(job->name);
kmem_free(job, sizeof (minors_job_t));
}
list_destroy(&minors_list);
-}
-
-void
-zvol_create_minor(const char *name)
-{
- /*
- * Note: the dsl_pool_config_lock must not be held.
- * Minor node creation needs to obtain the zvol_state_lock.
- * zvol_open() obtains the zvol_state_lock and then the dsl pool
- * config lock. Therefore, we can't have the config lock now if
- * we are going to wait for the zvol_state_lock, because it
- * would be a lock order inversion which could lead to deadlock.
- */
-
- if (zvol_inhibit_dev)
- return;
-
- if (strchr(name, '@') != NULL) {
- uint64_t snapdev;
-
- int error = dsl_prop_get_integer(name,
- "snapdev", &snapdev, NULL);
-
- if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE)
- (void) zvol_os_create_minor(name);
- } else {
- (void) zvol_os_create_minor(name);
- }
+ zvol_task_update_status(task, total, done, last_error);
}
/*
@@ -1577,10 +1614,11 @@ zvol_free_task(void *arg)
zvol_os_free(arg);
}
-void
-zvol_remove_minors_impl(const char *name)
+static void
+zvol_remove_minors_impl(zvol_task_t *task)
{
zvol_state_t *zv, *zv_next;
+ const char *name = task ? task->zt_name1 : NULL;
int namelen = ((name) ? strlen(name) : 0);
taskqid_t t;
list_t delay_list, free_list;
@@ -1662,13 +1700,13 @@ zvol_remove_minors_impl(const char *name)
}
/* Remove minor for this specific volume only */
-static void
+static int
zvol_remove_minor_impl(const char *name)
{
zvol_state_t *zv = NULL, *zv_next;
if (zvol_inhibit_dev)
- return;
+ return (0);
rw_enter(&zvol_state_lock, RW_WRITER);
@@ -1684,7 +1722,7 @@ zvol_remove_minor_impl(const char *name)
if (zv == NULL) {
rw_exit(&zvol_state_lock);
- return;
+ return (SET_ERROR(ENOENT));
}
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
@@ -1698,7 +1736,7 @@ zvol_remove_minor_impl(const char *name)
mutex_exit(&zv->zv_state_lock);
rw_exit(&zvol_state_lock);
zvol_remove_minor_task(zv);
- return;
+ return (0);
}
zvol_remove(zv);
@@ -1708,16 +1746,20 @@ zvol_remove_minor_impl(const char *name)
rw_exit(&zvol_state_lock);
zvol_os_free(zv);
+
+ return (0);
}
/*
* Rename minors for specified dataset including children and snapshots.
*/
static void
-zvol_rename_minors_impl(const char *oldname, const char *newname)
+zvol_rename_minors_impl(zvol_task_t *task)
{
zvol_state_t *zv, *zv_next;
- int oldnamelen;
+ const char *oldname = task->zt_name1;
+ const char *newname = task->zt_name2;
+ int total = 0, done = 0, last_error, error, oldnamelen;
if (zvol_inhibit_dev)
return;
@@ -1732,24 +1774,31 @@ zvol_rename_minors_impl(const char *oldname, const char *newname)
mutex_enter(&zv->zv_state_lock);
if (strcmp(zv->zv_name, oldname) == 0) {
- zvol_os_rename_minor(zv, newname);
+ error = zvol_os_rename_minor(zv, newname);
} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
(zv->zv_name[oldnamelen] == '/' ||
zv->zv_name[oldnamelen] == '@')) {
char *name = kmem_asprintf("%s%c%s", newname,
zv->zv_name[oldnamelen],
zv->zv_name + oldnamelen + 1);
- zvol_os_rename_minor(zv, name);
+ error = zvol_os_rename_minor(zv, name);
kmem_strfree(name);
}
-
+ if (error) {
+ last_error = error;
+ } else {
+ done++;
+ }
+ total++;
mutex_exit(&zv->zv_state_lock);
}
rw_exit(&zvol_state_lock);
+ zvol_task_update_status(task, total, done, last_error);
}
typedef struct zvol_snapdev_cb_arg {
+ zvol_task_t *task;
uint64_t snapdev;
} zvol_snapdev_cb_arg_t;
@@ -1757,26 +1806,31 @@ static int
zvol_set_snapdev_cb(const char *dsname, void *param)
{
zvol_snapdev_cb_arg_t *arg = param;
+ int error = 0;
if (strchr(dsname, '@') == NULL)
return (0);
switch (arg->snapdev) {
case ZFS_SNAPDEV_VISIBLE:
- (void) zvol_os_create_minor(dsname);
+ error = zvol_os_create_minor(dsname);
break;
case ZFS_SNAPDEV_HIDDEN:
- (void) zvol_remove_minor_impl(dsname);
+ error = zvol_remove_minor_impl(dsname);
break;
}
+ zvol_task_update_status(arg->task, 1, error == 0, error);
return (0);
}
static void
-zvol_set_snapdev_impl(char *name, uint64_t snapdev)
+zvol_set_snapdev_impl(zvol_task_t *task)
{
- zvol_snapdev_cb_arg_t arg = {snapdev};
+ const char *name = task->zt_name1;
+ uint64_t snapdev = task->zt_value;
+
+ zvol_snapdev_cb_arg_t arg = {task, snapdev};
fstrans_cookie_t cookie = spl_fstrans_mark();
/*
* The zvol_set_snapdev_sync() sets snapdev appropriately
@@ -1787,11 +1841,14 @@ zvol_set_snapdev_impl(char *name, uint64_t snapdev)
}
static void
-zvol_set_volmode_impl(char *name, uint64_t volmode)
+zvol_set_volmode_impl(zvol_task_t *task)
{
+ const char *name = task->zt_name1;
+ uint64_t volmode = task->zt_value;
fstrans_cookie_t cookie;
uint64_t old_volmode;
zvol_state_t *zv;
+ int error;
if (strchr(name, '@') != NULL)
return;
@@ -1804,7 +1861,7 @@ zvol_set_volmode_impl(char *name, uint64_t volmode)
*/
zv = zvol_find_by_name(name, RW_NONE);
if (zv == NULL && volmode == ZFS_VOLMODE_NONE)
- return;
+ return;
if (zv != NULL) {
old_volmode = zv->zv_volmode;
mutex_exit(&zv->zv_state_lock);
@@ -1815,51 +1872,34 @@ zvol_set_volmode_impl(char *name, uint64_t volmode)
cookie = spl_fstrans_mark();
switch (volmode) {
case ZFS_VOLMODE_NONE:
- (void) zvol_remove_minor_impl(name);
+ error = zvol_remove_minor_impl(name);
break;
case ZFS_VOLMODE_GEOM:
case ZFS_VOLMODE_DEV:
- (void) zvol_remove_minor_impl(name);
- (void) zvol_os_create_minor(name);
+ error = zvol_remove_minor_impl(name);
+ /*
+ * The remove minor function call above, might be not
+ * needed, if volmode was switched from 'none' value.
+ * Ignore error in this case.
+ */
+ if (error == ENOENT)
+ error = 0;
+ else if (error)
+ break;
+ error = zvol_os_create_minor(name);
break;
case ZFS_VOLMODE_DEFAULT:
- (void) zvol_remove_minor_impl(name);
+ error = zvol_remove_minor_impl(name);
if (zvol_volmode == ZFS_VOLMODE_NONE)
break;
else /* if zvol_volmode is invalid defaults to "geom" */
- (void) zvol_os_create_minor(name);
+ error = zvol_os_create_minor(name);
break;
}
+ zvol_task_update_status(task, 1, error == 0, error);
spl_fstrans_unmark(cookie);
}
-static zvol_task_t *
-zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2,
- uint64_t value)
-{
- zvol_task_t *task;
-
- /* Never allow tasks on hidden names. */
- if (name1[0] == '$')
- return (NULL);
-
- task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
- task->op = op;
- task->value = value;
-
- strlcpy(task->name1, name1, sizeof (task->name1));
- if (name2 != NULL)
- strlcpy(task->name2, name2, sizeof (task->name2));
-
- return (task);
-}
-
-static void
-zvol_task_free(zvol_task_t *task)
-{
- kmem_free(task, sizeof (zvol_task_t));
-}
-
/*
* The worker thread function performed asynchronously.
*/
@@ -1868,25 +1908,29 @@ zvol_task_cb(void *arg)
{
zvol_task_t *task = arg;
- switch (task->op) {
+ switch (task->zt_op) {
+ case ZVOL_ASYNC_CREATE_MINORS:
+ zvol_create_minors_impl(task);
+ break;
case ZVOL_ASYNC_REMOVE_MINORS:
- zvol_remove_minors_impl(task->name1);
+ zvol_remove_minors_impl(task);
break;
case ZVOL_ASYNC_RENAME_MINORS:
- zvol_rename_minors_impl(task->name1, task->name2);
+ zvol_rename_minors_impl(task);
break;
case ZVOL_ASYNC_SET_SNAPDEV:
- zvol_set_snapdev_impl(task->name1, task->value);
+ zvol_set_snapdev_impl(task);
break;
case ZVOL_ASYNC_SET_VOLMODE:
- zvol_set_volmode_impl(task->name1, task->value);
+ zvol_set_volmode_impl(task);
break;
default:
VERIFY(0);
break;
}
- zvol_task_free(task);
+ zvol_task_report_status(task);
+ kmem_free(task, sizeof (zvol_task_t));
}
typedef struct zvol_set_prop_int_arg {
@@ -1931,23 +1975,17 @@ zvol_set_common_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
if (dsl_prop_get_int_ds(ds, prop_name, &prop) != 0)
return (0);
- switch (zsda->zsda_prop) {
- case ZFS_PROP_VOLMODE:
- task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname,
- NULL, prop);
- break;
- case ZFS_PROP_SNAPDEV:
- task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname,
- NULL, prop);
- break;
- default:
- task = NULL;
- break;
- }
-
- if (task == NULL)
+ task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
+ if (zsda->zsda_prop == ZFS_PROP_VOLMODE) {
+ task->zt_op = ZVOL_ASYNC_SET_VOLMODE;
+ } else if (zsda->zsda_prop == ZFS_PROP_SNAPDEV) {
+ task->zt_op = ZVOL_ASYNC_SET_SNAPDEV;
+ } else {
+ kmem_free(task, sizeof (zvol_task_t));
return (0);
-
+ }
+ task->zt_value = prop;
+ strlcpy(task->zt_name1, dsname, sizeof (task->zt_name1));
(void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
task, TQ_SLEEP);
return (0);
@@ -2001,15 +2039,34 @@ zvol_set_common(const char *ddname, zfs_prop_t prop, zprop_source_t source,
}
void
-zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
+zvol_create_minors(const char *name)
{
+ spa_t *spa;
zvol_task_t *task;
taskqid_t id;
- task = zvol_task_alloc(ZVOL_ASYNC_REMOVE_MINORS, name, NULL, ~0ULL);
- if (task == NULL)
+ if (spa_open(name, &spa, FTAG) != 0)
return;
+ task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
+ task->zt_op = ZVOL_ASYNC_CREATE_MINORS;
+ strlcpy(task->zt_name1, name, sizeof (task->zt_name1));
+ id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
+ if (id != TASKQID_INVALID)
+ taskq_wait_id(spa->spa_zvol_taskq, id);
+
+ spa_close(spa, FTAG);
+}
+
+void
+zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
+{
+ zvol_task_t *task;
+ taskqid_t id;
+
+ task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
+ task->zt_op = ZVOL_ASYNC_REMOVE_MINORS;
+ strlcpy(task->zt_name1, name, sizeof (task->zt_name1));
id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
if ((async == B_FALSE) && (id != TASKQID_INVALID))
taskq_wait_id(spa->spa_zvol_taskq, id);
@@ -2022,10 +2079,10 @@ zvol_rename_minors(spa_t *spa, const char *name1, const char *name2,
zvol_task_t *task;
taskqid_t id;
- task = zvol_task_alloc(ZVOL_ASYNC_RENAME_MINORS, name1, name2, ~0ULL);
- if (task == NULL)
- return;
-
+ task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
+ task->zt_op = ZVOL_ASYNC_RENAME_MINORS;
+ strlcpy(task->zt_name1, name1, sizeof (task->zt_name1));
+ strlcpy(task->zt_name2, name2, sizeof (task->zt_name2));
id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
if ((async == B_FALSE) && (id != TASKQID_INVALID))
taskq_wait_id(spa->spa_zvol_taskq, id);
@@ -2144,7 +2201,7 @@ zvol_fini_impl(void)
rw_destroy(&zvol_state_lock);
if (ztqs->tqs_taskq == NULL) {
- ASSERT3U(ztqs->tqs_cnt, ==, 0);
+ ASSERT0(ztqs->tqs_cnt);
} else {
for (uint_t i = 0; i < ztqs->tqs_cnt; i++) {
ASSERT3P(ztqs->tqs_taskq[i], !=, NULL);