diff options
author | Xin LI <delphij@FreeBSD.org> | 2014-07-26 10:20:48 +0000 |
---|---|---|
committer | Xin LI <delphij@FreeBSD.org> | 2014-07-26 10:20:48 +0000 |
commit | 7e37b1e609f0e6163a3e9bb24cd76d4ebd9edc5b (patch) | |
tree | 5cafe8ee4c6165148ca59e000281c72b1f70ed51 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys | |
parent | 1bc04f6a8c72079e8cc170f8a764642c464cd4bd (diff) | |
parent | d6fb141e089436367a13a4f02c2ff21c71e1e8cc (diff) | |
download | src-7e37b1e609f0e6163a3e9bb24cd76d4ebd9edc5b.tar.gz src-7e37b1e609f0e6163a3e9bb24cd76d4ebd9edc5b.zip |
MFV r269010:
Import Illumos changes to address the following Illumos issues:
4976 zfs should only avoid writing to a failing non-redundant
top-level vdev
4978 ztest fails in get_metaslab_refcount()
4979 extend free space histogram to device and pool
4980 metaslabs should have a fragmentation metric
4981 remove fragmented ops vector from block allocator
4982 space_map object should proactively upgrade when feature
is enabled
4984 device selection should use fragmentation metric
MFC after: 2 weeks
Notes
Notes:
svn path=/head/; revision=269118
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys')
4 files changed, 81 insertions, 47 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h index fda9fffc70df..deefb196fdef 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_H @@ -38,23 +38,22 @@ extern "C" { typedef struct metaslab_ops { uint64_t (*msop_alloc)(metaslab_t *msp, uint64_t size); - boolean_t (*msop_fragmented)(metaslab_t *msp); } metaslab_ops_t; extern metaslab_ops_t *zfs_metaslab_ops; -metaslab_t *metaslab_init(metaslab_group_t *mg, uint64_t id, - uint64_t object, uint64_t txg); -void metaslab_fini(metaslab_t *msp); +metaslab_t *metaslab_init(metaslab_group_t *, uint64_t, + uint64_t, uint64_t); +void metaslab_fini(metaslab_t *); -void metaslab_load_wait(metaslab_t *msp); -int metaslab_load(metaslab_t *msp); -void metaslab_unload(metaslab_t *msp); +void metaslab_load_wait(metaslab_t *); +int metaslab_load(metaslab_t *); +void metaslab_unload(metaslab_t *); -void metaslab_sync(metaslab_t *msp, uint64_t txg); -void metaslab_sync_done(metaslab_t *msp, uint64_t txg); -void metaslab_sync_reassess(metaslab_group_t *mg); -uint64_t metaslab_block_maxsize(metaslab_t *msp); +void metaslab_sync(metaslab_t *, uint64_t); +void metaslab_sync_done(metaslab_t *, uint64_t); +void metaslab_sync_reassess(metaslab_group_t *); +uint64_t metaslab_block_maxsize(metaslab_t *); #define METASLAB_HINTBP_FAVOR 0x0 #define METASLAB_HINTBP_AVOID 0x1 @@ -62,29 +61,35 @@ uint64_t metaslab_block_maxsize(metaslab_t *msp); #define METASLAB_GANG_CHILD 0x4 #define METASLAB_GANG_AVOID 0x8 -int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, - blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags); -void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now); -int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg); -void metaslab_check_free(spa_t *spa, const blkptr_t *bp); +int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t, + blkptr_t *, int, uint64_t, blkptr_t *, int); +void metaslab_free(spa_t *, const blkptr_t *, uint64_t, boolean_t); +int metaslab_claim(spa_t *, const blkptr_t *, uint64_t); +void metaslab_check_free(spa_t *, const blkptr_t *); -metaslab_class_t *metaslab_class_create(spa_t *spa, metaslab_ops_t *ops); -void metaslab_class_destroy(metaslab_class_t *mc); -int metaslab_class_validate(metaslab_class_t *mc); +metaslab_class_t *metaslab_class_create(spa_t *, metaslab_ops_t *); +void metaslab_class_destroy(metaslab_class_t *); +int metaslab_class_validate(metaslab_class_t *); +void metaslab_class_histogram_verify(metaslab_class_t *); +uint64_t metaslab_class_fragmentation(metaslab_class_t *); +uint64_t metaslab_class_expandable_space(metaslab_class_t *); -void metaslab_class_space_update(metaslab_class_t *mc, - int64_t alloc_delta, int64_t defer_delta, - int64_t space_delta, int64_t dspace_delta); -uint64_t metaslab_class_get_alloc(metaslab_class_t *mc); -uint64_t metaslab_class_get_space(metaslab_class_t *mc); -uint64_t metaslab_class_get_dspace(metaslab_class_t *mc); -uint64_t metaslab_class_get_deferred(metaslab_class_t *mc); +void metaslab_class_space_update(metaslab_class_t *, int64_t, int64_t, + int64_t, int64_t); +uint64_t metaslab_class_get_alloc(metaslab_class_t *); +uint64_t metaslab_class_get_space(metaslab_class_t *); +uint64_t metaslab_class_get_dspace(metaslab_class_t *); +uint64_t metaslab_class_get_deferred(metaslab_class_t *); uint64_t metaslab_class_get_minblocksize(metaslab_class_t *mc); -metaslab_group_t *metaslab_group_create(metaslab_class_t *mc, vdev_t *vd); -void metaslab_group_destroy(metaslab_group_t *mg); -void metaslab_group_activate(metaslab_group_t *mg); -void metaslab_group_passivate(metaslab_group_t *mg); +metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *); +void metaslab_group_destroy(metaslab_group_t *); +void metaslab_group_activate(metaslab_group_t *); +void metaslab_group_passivate(metaslab_group_t *); +uint64_t metaslab_group_get_space(metaslab_group_t *); +void metaslab_group_histogram_verify(metaslab_group_t *); +uint64_t metaslab_group_fragmentation(metaslab_group_t *); +void metaslab_group_histogram_remove(metaslab_group_t *, metaslab_t *); #ifdef __cplusplus } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h index 36d11d9907b7..eb7c93205929 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h @@ -41,6 +41,23 @@ extern "C" { #endif +/* + * A metaslab class encompasses a category of allocatable top-level vdevs. + * Each top-level vdev is associated with a metaslab group which defines + * the allocatable region for that vdev. Examples of these categories include + * "normal" for data block allocations (i.e. main pool allocations) or "log" + * for allocations designated for intent log devices (i.e. slog devices). + * When a block allocation is requested from the SPA it is associated with a + * metaslab_class_t, and only top-level vdevs (i.e. metaslab groups) belonging + * to the class can be used to satisfy that request. Allocations are done + * by traversing the metaslab groups that are linked off of the mc_rotor field. + * This rotor points to the next metaslab group where allocations will be + * attempted. Allocating a block is a 3 step process -- select the metaslab + * group, select the metaslab, and then allocate the block. The metaslab + * class defines the low-level block allocator that will be used as the + * final step in allocation. These allocators are pluggable allowing each class + * to use a block allocator that best suits that class. + */ struct metaslab_class { spa_t *mc_spa; metaslab_group_t *mc_rotor; @@ -52,8 +69,18 @@ struct metaslab_class { uint64_t mc_space; /* total space (alloc + free) */ uint64_t mc_dspace; /* total deflated space */ uint64_t mc_minblocksize; + uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE]; }; +/* + * Metaslab groups encapsulate all the allocatable regions (i.e. metaslabs) + * of a top-level vdev. They are linked togther to form a circular linked + * list and can belong to only one metaslab class. Metaslab groups may become + * ineligible for allocations for a number of reasons such as limited free + * space, fragmentation, or going offline. When this happens the allocator will + * simply find the next metaslab group in the linked list and attempt + * to allocate from that group instead. + */ struct metaslab_group { kmutex_t mg_lock; avl_tree_t mg_metaslab_tree; @@ -67,12 +94,14 @@ struct metaslab_group { taskq_t *mg_taskq; metaslab_group_t *mg_prev; metaslab_group_t *mg_next; + uint64_t mg_fragmentation; + uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE]; }; /* * This value defines the number of elements in the ms_lbas array. The value - * of 64 was chosen as it covers to cover all power of 2 buckets up to - * UINT64_MAX. This is the equivalent of highbit(UINT64_MAX). + * of 64 was chosen as it covers all power of 2 buckets up to UINT64_MAX. + * This is the equivalent of highbit(UINT64_MAX). */ #define MAX_LBAS 64 @@ -135,6 +164,7 @@ struct metaslab { uint64_t ms_id; uint64_t ms_start; uint64_t ms_size; + uint64_t ms_fragmentation; range_tree_t *ms_alloctree[TXG_SIZE]; range_tree_t *ms_freetree[TXG_SIZE]; @@ -142,12 +172,12 @@ struct metaslab { range_tree_t *ms_tree; boolean_t ms_condensing; /* condensing? */ + boolean_t ms_condense_wanted; boolean_t ms_loaded; boolean_t ms_loading; int64_t ms_deferspace; /* sum of ms_defermap[] space */ uint64_t ms_weight; /* weight vs. others in group */ - uint64_t ms_factor; uint64_t ms_access_txg; /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h index 3691803306d8..67fa2767b1f8 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ #ifndef _SYS_SPACE_MAP_H @@ -44,9 +44,7 @@ extern "C" { * maintain backward compatibility. */ #define SPACE_MAP_SIZE_V0 (3 * sizeof (uint64_t)) -#define SPACE_MAP_HISTOGRAM_SIZE(sm) \ - (sizeof ((sm)->sm_phys->smp_histogram) / \ - sizeof ((sm)->sm_phys->smp_histogram[0])) +#define SPACE_MAP_HISTOGRAM_SIZE 32 /* * The space_map_phys is the on-disk representation of the space map. @@ -68,7 +66,7 @@ typedef struct space_map_phys { * whose size is: * 2^(i+sm_shift) <= size of free region in bytes < 2^(i+sm_shift+1) */ - uint64_t smp_histogram[32]; /* histogram of free space */ + uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE]; } space_map_phys_t; /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h index 6ef5246b3036..6dc521baef13 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ #ifndef _SYS_ZFS_DEBUG_H @@ -52,13 +52,14 @@ extern int zfs_flags; extern boolean_t zfs_recover; extern boolean_t zfs_free_leak_on_eio; -#define ZFS_DEBUG_DPRINTF (1<<0) -#define ZFS_DEBUG_DBUF_VERIFY (1<<1) -#define ZFS_DEBUG_DNODE_VERIFY (1<<2) -#define ZFS_DEBUG_SNAPNAMES (1<<3) -#define ZFS_DEBUG_MODIFY (1<<4) -#define ZFS_DEBUG_SPA (1<<5) -#define ZFS_DEBUG_ZIO_FREE (1<<6) +#define ZFS_DEBUG_DPRINTF (1<<0) +#define ZFS_DEBUG_DBUF_VERIFY (1<<1) +#define ZFS_DEBUG_DNODE_VERIFY (1<<2) +#define ZFS_DEBUG_SNAPNAMES (1<<3) +#define ZFS_DEBUG_MODIFY (1<<4) +#define ZFS_DEBUG_SPA (1<<5) +#define ZFS_DEBUG_ZIO_FREE (1<<6) +#define ZFS_DEBUG_HISTOGRAM_VERIFY (1<<7) #ifdef ZFS_DEBUG extern void __dprintf(const char *file, const char *func, |