diff options
Diffstat (limited to 'sys/contrib/openzfs/module/zfs/metaslab.c')
-rw-r--r-- | sys/contrib/openzfs/module/zfs/metaslab.c | 141 |
1 files changed, 81 insertions, 60 deletions
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c index 7ed83b305db7..ab32bfec1310 100644 --- a/sys/contrib/openzfs/module/zfs/metaslab.c +++ b/sys/contrib/openzfs/module/zfs/metaslab.c @@ -48,10 +48,10 @@ /* * Metaslab granularity, in bytes. This is roughly similar to what would be * referred to as the "stripe size" in traditional RAID arrays. In normal - * operation, we will try to write this amount of data to a top-level vdev - * before moving on to the next one. + * operation, we will try to write this amount of data to each disk before + * moving on to the next top-level vdev. */ -static unsigned long metaslab_aliquot = 512 << 10; +static unsigned long metaslab_aliquot = 1024 * 1024; /* * For testing, make some blocks above a certain size be gang blocks. @@ -899,7 +899,8 @@ metaslab_group_activate(metaslab_group_t *mg) if (++mg->mg_activation_count <= 0) return; - mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children); + mg->mg_aliquot = metaslab_aliquot * MAX(1, + vdev_get_ndisks(mg->mg_vd) - vdev_get_nparity(mg->mg_vd)); metaslab_group_alloc_update(mg); if ((mgprev = mc->mc_allocator[0].mca_rotor) == NULL) { @@ -2750,7 +2751,8 @@ metaslab_fini_flush_data(metaslab_t *msp) mutex_exit(&spa->spa_flushed_ms_lock); spa_log_sm_decrement_mscount(spa, metaslab_unflushed_txg(msp)); - spa_log_summary_decrement_mscount(spa, metaslab_unflushed_txg(msp)); + spa_log_summary_decrement_mscount(spa, metaslab_unflushed_txg(msp), + metaslab_unflushed_dirty(msp)); } uint64_t @@ -3728,50 +3730,45 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx) metaslab_flush_update(msp, tx); } -/* - * Called when the metaslab has been flushed (its own spacemap now reflects - * all the contents of the pool-wide spacemap log). Updates the metaslab's - * metadata and any pool-wide related log space map data (e.g. summary, - * obsolete logs, etc..) to reflect that. - */ static void -metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx) +metaslab_unflushed_add(metaslab_t *msp, dmu_tx_t *tx) { - metaslab_group_t *mg = msp->ms_group; - spa_t *spa = mg->mg_vd->vdev_spa; - - ASSERT(MUTEX_HELD(&msp->ms_lock)); - - ASSERT3U(spa_sync_pass(spa), ==, 1); + spa_t *spa = msp->ms_group->mg_vd->vdev_spa; + ASSERT(spa_syncing_log_sm(spa) != NULL); + ASSERT(msp->ms_sm != NULL); ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs)); ASSERT(range_tree_is_empty(msp->ms_unflushed_frees)); - /* - * Just because a metaslab got flushed, that doesn't mean that - * it will pass through metaslab_sync_done(). Thus, make sure to - * update ms_synced_length here in case it doesn't. - */ - msp->ms_synced_length = space_map_length(msp->ms_sm); + mutex_enter(&spa->spa_flushed_ms_lock); + metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx); + metaslab_set_unflushed_dirty(msp, B_TRUE); + avl_add(&spa->spa_metaslabs_by_flushed, msp); + mutex_exit(&spa->spa_flushed_ms_lock); - /* - * We may end up here from metaslab_condense() without the - * feature being active. In that case this is a no-op. - */ - if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) - return; + spa_log_sm_increment_current_mscount(spa); + spa_log_summary_add_flushed_metaslab(spa, B_TRUE); +} +void +metaslab_unflushed_bump(metaslab_t *msp, dmu_tx_t *tx, boolean_t dirty) +{ + spa_t *spa = msp->ms_group->mg_vd->vdev_spa; ASSERT(spa_syncing_log_sm(spa) != NULL); ASSERT(msp->ms_sm != NULL); ASSERT(metaslab_unflushed_txg(msp) != 0); ASSERT3P(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL), ==, msp); + ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs)); + ASSERT(range_tree_is_empty(msp->ms_unflushed_frees)); VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(spa)); /* update metaslab's position in our flushing tree */ uint64_t ms_prev_flushed_txg = metaslab_unflushed_txg(msp); + boolean_t ms_prev_flushed_dirty = metaslab_unflushed_dirty(msp); mutex_enter(&spa->spa_flushed_ms_lock); avl_remove(&spa->spa_metaslabs_by_flushed, msp); metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx); + metaslab_set_unflushed_dirty(msp, dirty); avl_add(&spa->spa_metaslabs_by_flushed, msp); mutex_exit(&spa->spa_flushed_ms_lock); @@ -3779,17 +3776,47 @@ metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx) spa_log_sm_decrement_mscount(spa, ms_prev_flushed_txg); spa_log_sm_increment_current_mscount(spa); + /* update log space map summary */ + spa_log_summary_decrement_mscount(spa, ms_prev_flushed_txg, + ms_prev_flushed_dirty); + spa_log_summary_add_flushed_metaslab(spa, dirty); + /* cleanup obsolete logs if any */ - uint64_t log_blocks_before = spa_log_sm_nblocks(spa); spa_cleanup_old_sm_logs(spa, tx); - uint64_t log_blocks_after = spa_log_sm_nblocks(spa); - VERIFY3U(log_blocks_after, <=, log_blocks_before); +} - /* update log space map summary */ - uint64_t blocks_gone = log_blocks_before - log_blocks_after; - spa_log_summary_add_flushed_metaslab(spa); - spa_log_summary_decrement_mscount(spa, ms_prev_flushed_txg); - spa_log_summary_decrement_blkcount(spa, blocks_gone); +/* + * Called when the metaslab has been flushed (its own spacemap now reflects + * all the contents of the pool-wide spacemap log). Updates the metaslab's + * metadata and any pool-wide related log space map data (e.g. summary, + * obsolete logs, etc..) to reflect that. + */ +static void +metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx) +{ + metaslab_group_t *mg = msp->ms_group; + spa_t *spa = mg->mg_vd->vdev_spa; + + ASSERT(MUTEX_HELD(&msp->ms_lock)); + + ASSERT3U(spa_sync_pass(spa), ==, 1); + + /* + * Just because a metaslab got flushed, that doesn't mean that + * it will pass through metaslab_sync_done(). Thus, make sure to + * update ms_synced_length here in case it doesn't. + */ + msp->ms_synced_length = space_map_length(msp->ms_sm); + + /* + * We may end up here from metaslab_condense() without the + * feature being active. In that case this is a no-op. + */ + if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP) || + metaslab_unflushed_txg(msp) == 0) + return; + + metaslab_unflushed_bump(msp, tx, B_FALSE); } boolean_t @@ -4005,23 +4032,6 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) ASSERT0(metaslab_allocated_space(msp)); } - if (metaslab_unflushed_txg(msp) == 0 && - spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) { - ASSERT(spa_syncing_log_sm(spa) != NULL); - - metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx); - spa_log_sm_increment_current_mscount(spa); - spa_log_summary_add_flushed_metaslab(spa); - - ASSERT(msp->ms_sm != NULL); - mutex_enter(&spa->spa_flushed_ms_lock); - avl_add(&spa->spa_metaslabs_by_flushed, msp); - mutex_exit(&spa->spa_flushed_ms_lock); - - ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs)); - ASSERT(range_tree_is_empty(msp->ms_unflushed_frees)); - } - if (!range_tree_is_empty(msp->ms_checkpointing) && vd->vdev_checkpoint_sm == NULL) { ASSERT(spa_has_checkpoint(spa)); @@ -4069,6 +4079,10 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) space_map_t *log_sm = spa_syncing_log_sm(spa); if (log_sm != NULL) { ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_LOG_SPACEMAP)); + if (metaslab_unflushed_txg(msp) == 0) + metaslab_unflushed_add(msp, tx); + else if (!metaslab_unflushed_dirty(msp)) + metaslab_unflushed_bump(msp, tx, B_TRUE); space_map_write(log_sm, alloctree, SM_ALLOC, vd->vdev_id, tx); @@ -6131,6 +6145,12 @@ metaslab_enable(metaslab_t *msp, boolean_t sync, boolean_t unload) mutex_exit(&mg->mg_ms_disabled_lock); } +void +metaslab_set_unflushed_dirty(metaslab_t *ms, boolean_t dirty) +{ + ms->ms_unflushed_dirty = dirty; +} + static void metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx) { @@ -6167,15 +6187,16 @@ metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx) void metaslab_set_unflushed_txg(metaslab_t *ms, uint64_t txg, dmu_tx_t *tx) { - spa_t *spa = ms->ms_group->mg_vd->vdev_spa; - - if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) - return; - ms->ms_unflushed_txg = txg; metaslab_update_ondisk_flush_data(ms, tx); } +boolean_t +metaslab_unflushed_dirty(metaslab_t *ms) +{ + return (ms->ms_unflushed_dirty); +} + uint64_t metaslab_unflushed_txg(metaslab_t *ms) { |