diff options
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h | 36 |
1 files changed, 33 insertions, 3 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h index 36d11d9907b7..eb7c93205929 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h @@ -41,6 +41,23 @@ extern "C" { #endif +/* + * A metaslab class encompasses a category of allocatable top-level vdevs. + * Each top-level vdev is associated with a metaslab group which defines + * the allocatable region for that vdev. Examples of these categories include + * "normal" for data block allocations (i.e. main pool allocations) or "log" + * for allocations designated for intent log devices (i.e. slog devices). + * When a block allocation is requested from the SPA it is associated with a + * metaslab_class_t, and only top-level vdevs (i.e. metaslab groups) belonging + * to the class can be used to satisfy that request. Allocations are done + * by traversing the metaslab groups that are linked off of the mc_rotor field. + * This rotor points to the next metaslab group where allocations will be + * attempted. Allocating a block is a 3 step process -- select the metaslab + * group, select the metaslab, and then allocate the block. The metaslab + * class defines the low-level block allocator that will be used as the + * final step in allocation. These allocators are pluggable allowing each class + * to use a block allocator that best suits that class. + */ struct metaslab_class { spa_t *mc_spa; metaslab_group_t *mc_rotor; @@ -52,8 +69,18 @@ struct metaslab_class { uint64_t mc_space; /* total space (alloc + free) */ uint64_t mc_dspace; /* total deflated space */ uint64_t mc_minblocksize; + uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE]; }; +/* + * Metaslab groups encapsulate all the allocatable regions (i.e. metaslabs) + * of a top-level vdev. They are linked togther to form a circular linked + * list and can belong to only one metaslab class. Metaslab groups may become + * ineligible for allocations for a number of reasons such as limited free + * space, fragmentation, or going offline. When this happens the allocator will + * simply find the next metaslab group in the linked list and attempt + * to allocate from that group instead. + */ struct metaslab_group { kmutex_t mg_lock; avl_tree_t mg_metaslab_tree; @@ -67,12 +94,14 @@ struct metaslab_group { taskq_t *mg_taskq; metaslab_group_t *mg_prev; metaslab_group_t *mg_next; + uint64_t mg_fragmentation; + uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE]; }; /* * This value defines the number of elements in the ms_lbas array. The value - * of 64 was chosen as it covers to cover all power of 2 buckets up to - * UINT64_MAX. This is the equivalent of highbit(UINT64_MAX). + * of 64 was chosen as it covers all power of 2 buckets up to UINT64_MAX. + * This is the equivalent of highbit(UINT64_MAX). */ #define MAX_LBAS 64 @@ -135,6 +164,7 @@ struct metaslab { uint64_t ms_id; uint64_t ms_start; uint64_t ms_size; + uint64_t ms_fragmentation; range_tree_t *ms_alloctree[TXG_SIZE]; range_tree_t *ms_freetree[TXG_SIZE]; @@ -142,12 +172,12 @@ struct metaslab { range_tree_t *ms_tree; boolean_t ms_condensing; /* condensing? */ + boolean_t ms_condense_wanted; boolean_t ms_loaded; boolean_t ms_loading; int64_t ms_deferspace; /* sum of ms_defermap[] space */ uint64_t ms_weight; /* weight vs. others in group */ - uint64_t ms_factor; uint64_t ms_access_txg; /* |