aboutsummaryrefslogtreecommitdiff
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
diff options
context:
space:
mode:
authorAndriy Gapon <avg@FreeBSD.org>2019-10-16 06:26:51 +0000
committerAndriy Gapon <avg@FreeBSD.org>2019-10-16 06:26:51 +0000
commit6cb9ab2bad02615ce39526ccdf1b58b63da4d155 (patch)
tree8aa9f21e29df0a7268ebf9dfeca9d3938c94e71f /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
parentb399ca755a71c0de0c7963b509a15260aaa07a99 (diff)
parentb7cab79de23a8bfb689bfb5bddb4deb2e09b360d (diff)
downloadsrc-6cb9ab2bad02615ce39526ccdf1b58b63da4d155.tar.gz
src-6cb9ab2bad02615ce39526ccdf1b58b63da4d155.zip
MFC r353611: 10330 merge recent ZoL vdev and metaslab changes
illumos/illumos-gate@a0b03b161c4df3cfc54fbc741db09b3bdc23ffba https://github.com/illumos/illumos-gate/commit/a0b03b161c4df3cfc54fbc741db09b3bdc23ffba https://www.illumos.org/issues/10330 3 recent ZoL changes in the vdev and metaslab code which we can pull over: PR 8324 c853f382db 8324 Change target size of metaslabs from 256GB to 16GB PR 8290 b194fab0fb 8290 Factor metaslab_load_wait() in metaslab_load() PR 8286 419ba59145 8286 Update vdev_is_spacemap_addressable() for new spacemap encoding Author: Serapheim Dimitropoulos <serapheimd@gmail.com> Obtained from: illumos, ZoL MFC after: 2 weeks
Notes
Notes: svn path=/head/; revision=353612
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c100
1 files changed, 59 insertions, 41 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
index ffcb8a465216..fd694b833782 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
@@ -163,34 +163,34 @@ static vdev_ops_t *vdev_ops_table[] = {
};
-/* target number of metaslabs per top-level vdev */
-int vdev_max_ms_count = 200;
-SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, max_ms_count, CTLFLAG_RWTUN,
- &vdev_max_ms_count, 0,
+/* default target for number of metaslabs per top-level vdev */
+int zfs_vdev_default_ms_count = 200;
+SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, default_ms_count, CTLFLAG_RWTUN,
+ &zfs_vdev_default_ms_count, 0,
"Target number of metaslabs per top-level vdev");
/* minimum number of metaslabs per top-level vdev */
-int vdev_min_ms_count = 16;
+int zfs_vdev_min_ms_count = 16;
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, min_ms_count, CTLFLAG_RWTUN,
- &vdev_min_ms_count, 0,
+ &zfs_vdev_min_ms_count, 0,
"Minimum number of metaslabs per top-level vdev");
/* practical upper limit of total metaslabs per top-level vdev */
-int vdev_ms_count_limit = 1ULL << 17;
+int zfs_vdev_ms_count_limit = 1ULL << 17;
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, max_ms_count_limit, CTLFLAG_RWTUN,
- &vdev_ms_count_limit, 0,
+ &zfs_vdev_ms_count_limit, 0,
"Maximum number of metaslabs per top-level vdev");
/* lower limit for metaslab size (512M) */
-int vdev_default_ms_shift = 29;
+int zfs_vdev_default_ms_shift = 29;
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, default_ms_shift, CTLFLAG_RWTUN,
- &vdev_default_ms_shift, 0,
+ &zfs_vdev_default_ms_shift, 0,
"Default shift between vdev size and number of metaslabs");
-/* upper limit for metaslab size (256G) */
-int vdev_max_ms_shift = 38;
+/* upper limit for metaslab size (16G) */
+int zfs_vdev_max_ms_shift = 34;
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, max_ms_shift, CTLFLAG_RWTUN,
- &vdev_max_ms_shift, 0,
+ &zfs_vdev_max_ms_shift, 0,
"Maximum shift between vdev size and number of metaslabs");
boolean_t vdev_validate_skip = B_FALSE;
@@ -2205,16 +2205,24 @@ void
vdev_metaslab_set_size(vdev_t *vd)
{
uint64_t asize = vd->vdev_asize;
- uint64_t ms_count = asize >> vdev_default_ms_shift;
+ uint64_t ms_count = asize >> zfs_vdev_default_ms_shift;
uint64_t ms_shift;
/*
* There are two dimensions to the metaslab sizing calculation:
* the size of the metaslab and the count of metaslabs per vdev.
- * In general, we aim for vdev_max_ms_count (200) metaslabs. The
- * range of the dimensions are as follows:
*
- * 2^29 <= ms_size <= 2^38
+ * The default values used below are a good balance between memory
+ * usage (larger metaslab size means more memory needed for loaded
+ * metaslabs; more metaslabs means more memory needed for the
+ * metaslab_t structs), metaslab load time (larger metaslabs take
+ * longer to load), and metaslab sync time (more metaslabs means
+ * more time spent syncing all of them).
+ *
+ * In general, we aim for zfs_vdev_default_ms_count (200) metaslabs.
+ * The range of the dimensions are as follows:
+ *
+ * 2^29 <= ms_size <= 2^34
* 16 <= ms_count <= 131,072
*
* On the lower end of vdev sizes, we aim for metaslabs sizes of
@@ -2223,35 +2231,41 @@ vdev_metaslab_set_size(vdev_t *vd)
* of at least 16 metaslabs will override this minimum size goal.
*
* On the upper end of vdev sizes, we aim for a maximum metaslab
- * size of 256GB. However, we will cap the total count to 2^17
- * metaslabs to keep our memory footprint in check.
+ * size of 16GB. However, we will cap the total count to 2^17
+ * metaslabs to keep our memory footprint in check and let the
+ * metaslab size grow from there if that limit is hit.
*
* The net effect of applying above constrains is summarized below.
*
- * vdev size metaslab count
- * -------------|-----------------
- * < 8GB ~16
- * 8GB - 100GB one per 512MB
- * 100GB - 50TB ~200
- * 50TB - 32PB one per 256GB
- * > 32PB ~131,072
- * -------------------------------
+ * vdev size metaslab count
+ * --------------|-----------------
+ * < 8GB ~16
+ * 8GB - 100GB one per 512MB
+ * 100GB - 3TB ~200
+ * 3TB - 2PB one per 16GB
+ * > 2PB ~131,072
+ * --------------------------------
+ *
+ * Finally, note that all of the above calculate the initial
+ * number of metaslabs. Expanding a top-level vdev will result
+ * in additional metaslabs being allocated making it possible
+ * to exceed the zfs_vdev_ms_count_limit.
*/
- if (ms_count < vdev_min_ms_count)
- ms_shift = highbit64(asize / vdev_min_ms_count);
- else if (ms_count > vdev_max_ms_count)
- ms_shift = highbit64(asize / vdev_max_ms_count);
+ if (ms_count < zfs_vdev_min_ms_count)
+ ms_shift = highbit64(asize / zfs_vdev_min_ms_count);
+ else if (ms_count > zfs_vdev_default_ms_count)
+ ms_shift = highbit64(asize / zfs_vdev_default_ms_count);
else
- ms_shift = vdev_default_ms_shift;
+ ms_shift = zfs_vdev_default_ms_shift;
if (ms_shift < SPA_MAXBLOCKSHIFT) {
ms_shift = SPA_MAXBLOCKSHIFT;
- } else if (ms_shift > vdev_max_ms_shift) {
- ms_shift = vdev_max_ms_shift;
+ } else if (ms_shift > zfs_vdev_max_ms_shift) {
+ ms_shift = zfs_vdev_max_ms_shift;
/* cap the total count to constrain memory footprint */
- if ((asize >> ms_shift) > vdev_ms_count_limit)
- ms_shift = highbit64(asize / vdev_ms_count_limit);
+ if ((asize >> ms_shift) > zfs_vdev_ms_count_limit)
+ ms_shift = highbit64(asize / zfs_vdev_ms_count_limit);
}
vd->vdev_ms_shift = ms_shift;
@@ -3611,13 +3625,17 @@ vdev_accessible(vdev_t *vd, zio_t *zio)
boolean_t
vdev_is_spacemap_addressable(vdev_t *vd)
{
+ if (spa_feature_is_active(vd->vdev_spa, SPA_FEATURE_SPACEMAP_V2))
+ return (B_TRUE);
+
/*
- * Assuming 47 bits of the space map entry dedicated for the entry's
- * offset (see description in space_map.h), we calculate the maximum
- * address that can be described by a space map entry for the given
- * device.
+ * If double-word space map entries are not enabled we assume
+ * 47 bits of the space map entry are dedicated to the entry's
+ * offset (see SM_OFFSET_BITS in space_map.h). We then use that
+ * to calculate the maximum address that can be described by a
+ * space map entry for the given device.
*/
- uint64_t shift = vd->vdev_ashift + 47;
+ uint64_t shift = vd->vdev_ashift + SM_OFFSET_BITS;
if (shift >= 63) /* detect potential overflow */
return (B_TRUE);