aboutsummaryrefslogtreecommitdiff
path: root/cddl/contrib/opensolaris
diff options
context:
space:
mode:
authorAndriy Gapon <avg@FreeBSD.org>2019-11-21 13:35:43 +0000
committerAndriy Gapon <avg@FreeBSD.org>2019-11-21 13:35:43 +0000
commit8491540808d9b00abc2a1ea8e37a697ce4020120 (patch)
tree0a255d1562e375d932fd1fe55fbbbf6f4f9d7b85 /cddl/contrib/opensolaris
parent68cad681496af17482fdd4d9cb71d48fa85e605c (diff)
parent4ca571aba060489dbd628e880af130c26c80b269 (diff)
downloadsrc-8491540808d9b00abc2a1ea8e37a697ce4020120.tar.gz
src-8491540808d9b00abc2a1ea8e37a697ce4020120.zip
MFV r354383: 10592 misc. metaslab and vdev related ZoL bug fixes
illumos/illumos-gate@555d674d5d4b8191dc83723188349d28278b2431 https://github.com/illumos/illumos-gate/commit/555d674d5d4b8191dc83723188349d28278b2431 https://www.illumos.org/issues/10592 This is a collection of recent fixes from ZoL: 8eef997679b Error path in metaslab_load_impl() forgets to drop ms_sync_lock 928e8ad47d3 Introduce auxiliary metaslab histograms 425d3237ee8 Get rid of space_map_update() for ms_synced_length 6c926f426a2 Simplify log vdev removal code 21e7cf5da89 zdb -L should skip leak detection altogether df72b8bebe0 Rename range_tree_verify to range_tree_verify_not_present 75058f33034 Remove unused vdev_t fields Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com> Author: Serapheim Dimitropoulos <serapheim@delphix.com> MFC after: 4 weeks
Notes
Notes: svn path=/head/; revision=354948
Diffstat (limited to 'cddl/contrib/opensolaris')
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.84
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.c275
2 files changed, 147 insertions, 132 deletions
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.8 b/cddl/contrib/opensolaris/cmd/zdb/zdb.8
index 8561b972ac45..6ec6f7481c03 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.8
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.8
@@ -10,7 +10,7 @@
.\"
.\"
.\" Copyright 2012, Richard Lowe.
-.\" Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
.\" Copyright 2017 Nexenta Systems, Inc.
.\"
.Dd October 06, 2017
@@ -187,7 +187,7 @@ If the
.Fl u
option is also specified, also display the uberblocks on this device.
.It Fl L
-Disable leak tracing and the loading of space maps.
+Disable leak detection and the loading of space maps.
By default,
.Nm
verifies that all non-free blocks are referenced, which can be very expensive.
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
index e3ee43a8661c..d51ddc68908c 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
@@ -785,18 +785,21 @@ dump_spacemap(objset_t *os, space_map_t *sm)
return;
(void) printf("space map object %llu:\n",
- (longlong_t)sm->sm_phys->smp_object);
- (void) printf(" smp_objsize = 0x%llx\n",
- (longlong_t)sm->sm_phys->smp_objsize);
+ (longlong_t)sm->sm_object);
+ (void) printf(" smp_length = 0x%llx\n",
+ (longlong_t)sm->sm_phys->smp_length);
(void) printf(" smp_alloc = 0x%llx\n",
(longlong_t)sm->sm_phys->smp_alloc);
+ if (dump_opt['d'] < 6 && dump_opt['m'] < 4)
+ return;
+
/*
* Print out the freelist entries in both encoded and decoded form.
*/
uint8_t mapshift = sm->sm_shift;
int64_t alloc = 0;
- uint64_t word;
+ uint64_t word, entry_id = 0;
for (uint64_t offset = 0; offset < space_map_length(sm);
offset += sizeof (word)) {
@@ -804,11 +807,12 @@ dump_spacemap(objset_t *os, space_map_t *sm)
sizeof (word), &word, DMU_READ_PREFETCH));
if (sm_entry_is_debug(word)) {
- (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
- (u_longlong_t)(offset / sizeof (word)),
+ (void) printf("\t [%6llu] %s: txg %llu pass %llu\n",
+ (u_longlong_t)entry_id,
ddata[SM_DEBUG_ACTION_DECODE(word)],
(u_longlong_t)SM_DEBUG_TXG_DECODE(word),
(u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
+ entry_id++;
continue;
}
@@ -846,7 +850,7 @@ dump_spacemap(objset_t *os, space_map_t *sm)
(void) printf("\t [%6llu] %c range:"
" %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
- (u_longlong_t)(offset / sizeof (word)),
+ (u_longlong_t)entry_id,
entry_type, (u_longlong_t)entry_off,
(u_longlong_t)(entry_off + entry_run),
(u_longlong_t)entry_run,
@@ -856,8 +860,9 @@ dump_spacemap(objset_t *os, space_map_t *sm)
alloc += entry_run;
else
alloc -= entry_run;
+ entry_id++;
}
- if ((uint64_t)alloc != space_map_allocated(sm)) {
+ if (alloc != space_map_allocated(sm)) {
(void) printf("space_map_object alloc (%lld) INCONSISTENT "
"with space map summary (%lld)\n",
(longlong_t)space_map_allocated(sm), (longlong_t)alloc);
@@ -921,11 +926,8 @@ dump_metaslab(metaslab_t *msp)
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
}
- if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
- ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
-
- dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
- }
+ ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
+ dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
}
static void
@@ -3140,6 +3142,8 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
ddt_entry_t dde;
int error;
+ ASSERT(!dump_opt['L']);
+
bzero(&ddb, sizeof (ddb));
while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
blkptr_t blk;
@@ -3163,12 +3167,10 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
zcb->zcb_dedup_blocks++;
}
}
- if (!dump_opt['L']) {
- ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
- ddt_enter(ddt);
- VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
- ddt_exit(ddt);
- }
+ ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
+ ddt_enter(ddt);
+ VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
+ ddt_exit(ddt);
}
ASSERT(error == ENOENT);
@@ -3210,6 +3212,9 @@ claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
static void
zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
{
+ if (dump_opt['L'])
+ return;
+
if (spa->spa_vdev_removal == NULL)
return;
@@ -3301,7 +3306,6 @@ zdb_load_obsolete_counts(vdev_t *vd)
space_map_t *prev_obsolete_sm = NULL;
VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
- space_map_update(prev_obsolete_sm);
vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
prev_obsolete_sm);
space_map_close(prev_obsolete_sm);
@@ -3395,9 +3399,9 @@ zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb)
VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
- space_map_update(checkpoint_sm);
VERIFY0(space_map_iterate(checkpoint_sm,
+ space_map_length(checkpoint_sm),
checkpoint_sm_exclude_entry_cb, &cseea));
space_map_close(checkpoint_sm);
@@ -3407,6 +3411,8 @@ zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb)
static void
zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb)
{
+ ASSERT(!dump_opt['L']);
+
vdev_t *rvd = spa->spa_root_vdev;
for (uint64_t c = 0; c < rvd->vdev_children; c++) {
ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id);
@@ -3503,6 +3509,8 @@ load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp,
static void
zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb)
{
+ ASSERT(!dump_opt['L']);
+
vdev_t *rvd = spa->spa_root_vdev;
for (uint64_t c = 0; c < rvd->vdev_children; c++) {
vdev_t *vd = rvd->vdev_child[c];
@@ -3549,67 +3557,63 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
{
zcb->zcb_spa = spa;
- if (!dump_opt['L']) {
- dsl_pool_t *dp = spa->spa_dsl_pool;
- vdev_t *rvd = spa->spa_root_vdev;
+ if (dump_opt['L'])
+ return;
- /*
- * We are going to be changing the meaning of the metaslab's
- * ms_allocatable. Ensure that the allocator doesn't try to
- * use the tree.
- */
- spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
- spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
+ dsl_pool_t *dp = spa->spa_dsl_pool;
+ vdev_t *rvd = spa->spa_root_vdev;
- zcb->zcb_vd_obsolete_counts =
- umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
- UMEM_NOFAIL);
+ /*
+ * We are going to be changing the meaning of the metaslab's
+ * ms_allocatable. Ensure that the allocator doesn't try to
+ * use the tree.
+ */
+ spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
+ spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
- /*
- * For leak detection, we overload the ms_allocatable trees
- * to contain allocated segments instead of free segments.
- * As a result, we can't use the normal metaslab_load/unload
- * interfaces.
- */
- zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
- load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
+ zcb->zcb_vd_obsolete_counts =
+ umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
+ UMEM_NOFAIL);
- /*
- * On load_concrete_ms_allocatable_trees() we loaded all the
- * allocated entries from the ms_sm to the ms_allocatable for
- * each metaslab. If the pool has a checkpoint or is in the
- * middle of discarding a checkpoint, some of these blocks
- * may have been freed but their ms_sm may not have been
- * updated because they are referenced by the checkpoint. In
- * order to avoid false-positives during leak-detection, we
- * go through the vdev's checkpoint space map and exclude all
- * its entries from their relevant ms_allocatable.
- *
- * We also aggregate the space held by the checkpoint and add
- * it to zcb_checkpoint_size.
- *
- * Note that at this point we are also verifying that all the
- * entries on the checkpoint_sm are marked as allocated in
- * the ms_sm of their relevant metaslab.
- * [see comment in checkpoint_sm_exclude_entry_cb()]
- */
- zdb_leak_init_exclude_checkpoint(spa, zcb);
+ /*
+ * For leak detection, we overload the ms_allocatable trees
+ * to contain allocated segments instead of free segments.
+ * As a result, we can't use the normal metaslab_load/unload
+ * interfaces.
+ */
+ zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
+ load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
- /* for cleaner progress output */
- (void) fprintf(stderr, "\n");
+ /*
+ * On load_concrete_ms_allocatable_trees() we loaded all the
+ * allocated entries from the ms_sm to the ms_allocatable for
+ * each metaslab. If the pool has a checkpoint or is in the
+ * middle of discarding a checkpoint, some of these blocks
+ * may have been freed but their ms_sm may not have been
+ * updated because they are referenced by the checkpoint. In
+ * order to avoid false-positives during leak-detection, we
+ * go through the vdev's checkpoint space map and exclude all
+ * its entries from their relevant ms_allocatable.
+ *
+ * We also aggregate the space held by the checkpoint and add
+ * it to zcb_checkpoint_size.
+ *
+ * Note that at this point we are also verifying that all the
+ * entries on the checkpoint_sm are marked as allocated in
+ * the ms_sm of their relevant metaslab.
+ * [see comment in checkpoint_sm_exclude_entry_cb()]
+ */
+ zdb_leak_init_exclude_checkpoint(spa, zcb);
+ ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa));
- if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
- ASSERT(spa_feature_is_enabled(spa,
- SPA_FEATURE_DEVICE_REMOVAL));
- (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
- increment_indirect_mapping_cb, zcb, NULL);
- }
- } else {
- /*
- * If leak tracing is disabled, we still need to consider
- * any checkpointed space in our space verification.
- */
- zcb->zcb_checkpoint_size += spa_get_checkpoint_space(spa);
+ /* for cleaner progress output */
+ (void) fprintf(stderr, "\n");
+
+ if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
+ ASSERT(spa_feature_is_enabled(spa,
+ SPA_FEATURE_DEVICE_REMOVAL));
+ (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
+ increment_indirect_mapping_cb, zcb, NULL);
}
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
@@ -3690,52 +3694,58 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
static boolean_t
zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
{
+ if (dump_opt['L'])
+ return (B_FALSE);
+
boolean_t leaks = B_FALSE;
- if (!dump_opt['L']) {
- vdev_t *rvd = spa->spa_root_vdev;
- for (unsigned c = 0; c < rvd->vdev_children; c++) {
- vdev_t *vd = rvd->vdev_child[c];
- metaslab_group_t *mg = vd->vdev_mg;
-
- if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
- leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
- }
- for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- ASSERT3P(mg, ==, msp->ms_group);
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (unsigned c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+#if DEBUG
+ metaslab_group_t *mg = vd->vdev_mg;
+#endif
- /*
- * ms_allocatable has been overloaded
- * to contain allocated segments. Now that
- * we finished traversing all blocks, any
- * block that remains in the ms_allocatable
- * represents an allocated block that we
- * did not claim during the traversal.
- * Claimed blocks would have been removed
- * from the ms_allocatable. For indirect
- * vdevs, space remaining in the tree
- * represents parts of the mapping that are
- * not referenced, which is not a bug.
- */
- if (vd->vdev_ops == &vdev_indirect_ops) {
- range_tree_vacate(msp->ms_allocatable,
- NULL, NULL);
- } else {
- range_tree_vacate(msp->ms_allocatable,
- zdb_leak, vd);
- }
+ if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
+ leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
+ }
- if (msp->ms_loaded) {
- msp->ms_loaded = B_FALSE;
- }
+ for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
+ metaslab_t *msp = vd->vdev_ms[m];
+ ASSERT3P(mg, ==, msp->ms_group);
+
+ /*
+ * ms_allocatable has been overloaded
+ * to contain allocated segments. Now that
+ * we finished traversing all blocks, any
+ * block that remains in the ms_allocatable
+ * represents an allocated block that we
+ * did not claim during the traversal.
+ * Claimed blocks would have been removed
+ * from the ms_allocatable. For indirect
+ * vdevs, space remaining in the tree
+ * represents parts of the mapping that are
+ * not referenced, which is not a bug.
+ */
+ if (vd->vdev_ops == &vdev_indirect_ops) {
+ range_tree_vacate(msp->ms_allocatable,
+ NULL, NULL);
+ } else {
+ range_tree_vacate(msp->ms_allocatable,
+ zdb_leak, vd);
+ }
+
+ if (msp->ms_loaded) {
+ msp->ms_loaded = B_FALSE;
}
}
- umem_free(zcb->zcb_vd_obsolete_counts,
- rvd->vdev_children * sizeof (uint32_t *));
- zcb->zcb_vd_obsolete_counts = NULL;
}
+
+ umem_free(zcb->zcb_vd_obsolete_counts,
+ rvd->vdev_children * sizeof (uint32_t *));
+ zcb->zcb_vd_obsolete_counts = NULL;
+
return (leaks);
}
@@ -3774,13 +3784,18 @@ dump_block_stats(spa_t *spa)
!dump_opt['L'] ? "nothing leaked " : "");
/*
- * Load all space maps as SM_ALLOC maps, then traverse the pool
- * claiming each block we discover. If the pool is perfectly
- * consistent, the space maps will be empty when we're done.
- * Anything left over is a leak; any block we can't claim (because
- * it's not part of any space map) is a double allocation,
- * reference to a freed block, or an unclaimed log block.
+ * When leak detection is enabled we load all space maps as SM_ALLOC
+ * maps, then traverse the pool claiming each block we discover. If
+ * the pool is perfectly consistent, the segment trees will be empty
+ * when we're done. Anything left over is a leak; any block we can't
+ * claim (because it's not part of any space map) is a double
+ * allocation, reference to a freed block, or an unclaimed log block.
+ *
+ * When leak detection is disabled (-L option) we still traverse the
+ * pool claiming each block we discover, but we skip opening any space
+ * maps.
*/
+ bzero(&zcb, sizeof (zdb_cb_t));
zdb_leak_init(spa, &zcb);
/*
@@ -3859,11 +3874,10 @@ dump_block_stats(spa_t *spa)
total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
- if (total_found == total_alloc) {
- if (!dump_opt['L'])
- (void) printf("\n\tNo leaks (block sum matches space"
- " maps exactly)\n");
- } else {
+ if (total_found == total_alloc && !dump_opt['L']) {
+ (void) printf("\n\tNo leaks (block sum matches space"
+ " maps exactly)\n");
+ } else if (!dump_opt['L']) {
(void) printf("block traversal size %llu != alloc %llu "
"(%s %lld)\n",
(u_longlong_t)total_found,
@@ -4203,7 +4217,6 @@ verify_device_removal_feature_counts(spa_t *spa)
spa->spa_meta_objset,
scip->scip_prev_obsolete_sm_object,
0, vd->vdev_asize, 0));
- space_map_update(prev_obsolete_sm);
dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm);
(void) printf("\n");
space_map_close(prev_obsolete_sm);
@@ -4409,7 +4422,8 @@ verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg)
* their respective ms_allocateable trees should not contain them.
*/
mutex_enter(&ms->ms_lock);
- range_tree_verify(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
+ range_tree_verify_not_present(ms->ms_allocatable,
+ sme->sme_offset, sme->sme_run);
mutex_exit(&ms->ms_lock);
return (0);
@@ -4472,7 +4486,6 @@ verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current)
VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current),
checkpoint_sm_obj, 0, current_vd->vdev_asize,
current_vd->vdev_ashift));
- space_map_update(checkpoint_sm);
verify_checkpoint_sm_entry_cb_arg_t vcsec;
vcsec.vcsec_vd = ckpoint_vd;
@@ -4480,6 +4493,7 @@ verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current)
vcsec.vcsec_num_entries =
space_map_length(checkpoint_sm) / sizeof (uint64_t);
VERIFY0(space_map_iterate(checkpoint_sm,
+ space_map_length(checkpoint_sm),
verify_checkpoint_sm_entry_cb, &vcsec));
dump_spacemap(current->spa_meta_objset, checkpoint_sm);
space_map_close(checkpoint_sm);
@@ -4559,7 +4573,7 @@ verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current)
* are part of the checkpoint were freed by mistake.
*/
range_tree_walk(ckpoint_msp->ms_allocatable,
- (range_tree_func_t *)range_tree_verify,
+ (range_tree_func_t *)range_tree_verify_not_present,
current_msp->ms_allocatable);
}
}
@@ -4571,6 +4585,8 @@ verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current)
static void
verify_checkpoint_blocks(spa_t *spa)
{
+ ASSERT(!dump_opt['L']);
+
spa_t *checkpoint_spa;
char *checkpoint_pool;
nvlist_t *config = NULL;
@@ -4636,7 +4652,6 @@ dump_leftover_checkpoint_blocks(spa_t *spa)
VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
- space_map_update(checkpoint_sm);
dump_spacemap(spa->spa_meta_objset, checkpoint_sm);
space_map_close(checkpoint_sm);
}