aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/zfs/metaslab.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/zfs/metaslab.c')
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c141
1 files changed, 81 insertions, 60 deletions
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index 7ed83b305db7..ab32bfec1310 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -48,10 +48,10 @@
/*
* Metaslab granularity, in bytes. This is roughly similar to what would be
* referred to as the "stripe size" in traditional RAID arrays. In normal
- * operation, we will try to write this amount of data to a top-level vdev
- * before moving on to the next one.
+ * operation, we will try to write this amount of data to each disk before
+ * moving on to the next top-level vdev.
*/
-static unsigned long metaslab_aliquot = 512 << 10;
+static unsigned long metaslab_aliquot = 1024 * 1024;
/*
* For testing, make some blocks above a certain size be gang blocks.
@@ -899,7 +899,8 @@ metaslab_group_activate(metaslab_group_t *mg)
if (++mg->mg_activation_count <= 0)
return;
- mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children);
+ mg->mg_aliquot = metaslab_aliquot * MAX(1,
+ vdev_get_ndisks(mg->mg_vd) - vdev_get_nparity(mg->mg_vd));
metaslab_group_alloc_update(mg);
if ((mgprev = mc->mc_allocator[0].mca_rotor) == NULL) {
@@ -2750,7 +2751,8 @@ metaslab_fini_flush_data(metaslab_t *msp)
mutex_exit(&spa->spa_flushed_ms_lock);
spa_log_sm_decrement_mscount(spa, metaslab_unflushed_txg(msp));
- spa_log_summary_decrement_mscount(spa, metaslab_unflushed_txg(msp));
+ spa_log_summary_decrement_mscount(spa, metaslab_unflushed_txg(msp),
+ metaslab_unflushed_dirty(msp));
}
uint64_t
@@ -3728,50 +3730,45 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx)
metaslab_flush_update(msp, tx);
}
-/*
- * Called when the metaslab has been flushed (its own spacemap now reflects
- * all the contents of the pool-wide spacemap log). Updates the metaslab's
- * metadata and any pool-wide related log space map data (e.g. summary,
- * obsolete logs, etc..) to reflect that.
- */
static void
-metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
+metaslab_unflushed_add(metaslab_t *msp, dmu_tx_t *tx)
{
- metaslab_group_t *mg = msp->ms_group;
- spa_t *spa = mg->mg_vd->vdev_spa;
-
- ASSERT(MUTEX_HELD(&msp->ms_lock));
-
- ASSERT3U(spa_sync_pass(spa), ==, 1);
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+ ASSERT(spa_syncing_log_sm(spa) != NULL);
+ ASSERT(msp->ms_sm != NULL);
ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
- /*
- * Just because a metaslab got flushed, that doesn't mean that
- * it will pass through metaslab_sync_done(). Thus, make sure to
- * update ms_synced_length here in case it doesn't.
- */
- msp->ms_synced_length = space_map_length(msp->ms_sm);
+ mutex_enter(&spa->spa_flushed_ms_lock);
+ metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
+ metaslab_set_unflushed_dirty(msp, B_TRUE);
+ avl_add(&spa->spa_metaslabs_by_flushed, msp);
+ mutex_exit(&spa->spa_flushed_ms_lock);
- /*
- * We may end up here from metaslab_condense() without the
- * feature being active. In that case this is a no-op.
- */
- if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
- return;
+ spa_log_sm_increment_current_mscount(spa);
+ spa_log_summary_add_flushed_metaslab(spa, B_TRUE);
+}
+void
+metaslab_unflushed_bump(metaslab_t *msp, dmu_tx_t *tx, boolean_t dirty)
+{
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
ASSERT(spa_syncing_log_sm(spa) != NULL);
ASSERT(msp->ms_sm != NULL);
ASSERT(metaslab_unflushed_txg(msp) != 0);
ASSERT3P(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL), ==, msp);
+ ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
+ ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(spa));
/* update metaslab's position in our flushing tree */
uint64_t ms_prev_flushed_txg = metaslab_unflushed_txg(msp);
+ boolean_t ms_prev_flushed_dirty = metaslab_unflushed_dirty(msp);
mutex_enter(&spa->spa_flushed_ms_lock);
avl_remove(&spa->spa_metaslabs_by_flushed, msp);
metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
+ metaslab_set_unflushed_dirty(msp, dirty);
avl_add(&spa->spa_metaslabs_by_flushed, msp);
mutex_exit(&spa->spa_flushed_ms_lock);
@@ -3779,17 +3776,47 @@ metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
spa_log_sm_decrement_mscount(spa, ms_prev_flushed_txg);
spa_log_sm_increment_current_mscount(spa);
+ /* update log space map summary */
+ spa_log_summary_decrement_mscount(spa, ms_prev_flushed_txg,
+ ms_prev_flushed_dirty);
+ spa_log_summary_add_flushed_metaslab(spa, dirty);
+
/* cleanup obsolete logs if any */
- uint64_t log_blocks_before = spa_log_sm_nblocks(spa);
spa_cleanup_old_sm_logs(spa, tx);
- uint64_t log_blocks_after = spa_log_sm_nblocks(spa);
- VERIFY3U(log_blocks_after, <=, log_blocks_before);
+}
- /* update log space map summary */
- uint64_t blocks_gone = log_blocks_before - log_blocks_after;
- spa_log_summary_add_flushed_metaslab(spa);
- spa_log_summary_decrement_mscount(spa, ms_prev_flushed_txg);
- spa_log_summary_decrement_blkcount(spa, blocks_gone);
+/*
+ * Called when the metaslab has been flushed (its own spacemap now reflects
+ * all the contents of the pool-wide spacemap log). Updates the metaslab's
+ * metadata and any pool-wide related log space map data (e.g. summary,
+ * obsolete logs, etc..) to reflect that.
+ */
+static void
+metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
+{
+ metaslab_group_t *mg = msp->ms_group;
+ spa_t *spa = mg->mg_vd->vdev_spa;
+
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+ ASSERT3U(spa_sync_pass(spa), ==, 1);
+
+ /*
+ * Just because a metaslab got flushed, that doesn't mean that
+ * it will pass through metaslab_sync_done(). Thus, make sure to
+ * update ms_synced_length here in case it doesn't.
+ */
+ msp->ms_synced_length = space_map_length(msp->ms_sm);
+
+ /*
+ * We may end up here from metaslab_condense() without the
+ * feature being active. In that case this is a no-op.
+ */
+ if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP) ||
+ metaslab_unflushed_txg(msp) == 0)
+ return;
+
+ metaslab_unflushed_bump(msp, tx, B_FALSE);
}
boolean_t
@@ -4005,23 +4032,6 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
ASSERT0(metaslab_allocated_space(msp));
}
- if (metaslab_unflushed_txg(msp) == 0 &&
- spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
- ASSERT(spa_syncing_log_sm(spa) != NULL);
-
- metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
- spa_log_sm_increment_current_mscount(spa);
- spa_log_summary_add_flushed_metaslab(spa);
-
- ASSERT(msp->ms_sm != NULL);
- mutex_enter(&spa->spa_flushed_ms_lock);
- avl_add(&spa->spa_metaslabs_by_flushed, msp);
- mutex_exit(&spa->spa_flushed_ms_lock);
-
- ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
- ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
- }
-
if (!range_tree_is_empty(msp->ms_checkpointing) &&
vd->vdev_checkpoint_sm == NULL) {
ASSERT(spa_has_checkpoint(spa));
@@ -4069,6 +4079,10 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
space_map_t *log_sm = spa_syncing_log_sm(spa);
if (log_sm != NULL) {
ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_LOG_SPACEMAP));
+ if (metaslab_unflushed_txg(msp) == 0)
+ metaslab_unflushed_add(msp, tx);
+ else if (!metaslab_unflushed_dirty(msp))
+ metaslab_unflushed_bump(msp, tx, B_TRUE);
space_map_write(log_sm, alloctree, SM_ALLOC,
vd->vdev_id, tx);
@@ -6131,6 +6145,12 @@ metaslab_enable(metaslab_t *msp, boolean_t sync, boolean_t unload)
mutex_exit(&mg->mg_ms_disabled_lock);
}
+void
+metaslab_set_unflushed_dirty(metaslab_t *ms, boolean_t dirty)
+{
+ ms->ms_unflushed_dirty = dirty;
+}
+
static void
metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx)
{
@@ -6167,15 +6187,16 @@ metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx)
void
metaslab_set_unflushed_txg(metaslab_t *ms, uint64_t txg, dmu_tx_t *tx)
{
- spa_t *spa = ms->ms_group->mg_vd->vdev_spa;
-
- if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
- return;
-
ms->ms_unflushed_txg = txg;
metaslab_update_ondisk_flush_data(ms, tx);
}
+boolean_t
+metaslab_unflushed_dirty(metaslab_t *ms)
+{
+ return (ms->ms_unflushed_dirty);
+}
+
uint64_t
metaslab_unflushed_txg(metaslab_t *ms)
{