aboutsummaryrefslogtreecommitdiff
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
diff options
context:
space:
mode:
authorAlexander Motin <mav@FreeBSD.org>2016-09-03 10:04:37 +0000
committerAlexander Motin <mav@FreeBSD.org>2016-09-03 10:04:37 +0000
commitc9fa25c1102b85783482a72560ea4a177859915f (patch)
tree43bfc9a622dc75cbdb473084563be92c8f2a8803 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
parent5535f02daf17369fd8e0703769cf1492f560102e (diff)
parent2e97b962e7b843fe4908ce87fb60dd479036792c (diff)
downloadsrc-c9fa25c1102b85783482a72560ea4a177859915f.tar.gz
src-c9fa25c1102b85783482a72560ea4a177859915f.zip
MFV r304155: 7090 zfs should improve allocation order and throttle allocations
illumos/illumos-gate@0f7643c7376dd69a08acbfc9d1d7d548b10c846a https://github.com/illumos/illumos-gate/commit/0f7643c7376dd69a08acbfc9d1d7d548b 10c846a https://www.illumos.org/issues/7090 When write I/Os are issued, they are issued in block order but the ZIO pipelin e will drive them asynchronously through the allocation stage which can result i n blocks being allocated out-of-order. It would be nice to preserve as much of the logical order as possible. In addition, the allocations are equally scattered across all top-level VDEVs but not all top-level VDEVs are created equally. The pipeline should be able t o detect devices that are more capable of handling allocations and should allocate more blocks to those devices. This allows for dynamic allocation distribution when devices are imbalanced as fuller devices will tend to be slower than empty devices. The change includes a new pool-wide allocation queue which would throttle and order allocations in the ZIO pipeline. The queue would be ordered by issued time and offset and would provide an initial amount of allocation of work to each top-level vdev. The allocation logic utilizes a reservation system to reserve allocations that will be performed by the allocator. Once an allocatio n is successfully completed it's scheduled on a given top-level vdev. Each top- level vdev maintains a maximum number of allocations that it can handle (mg_alloc_queue_depth). The pool-wide reserved allocations (top-levels * mg_alloc_queue_depth) are distributed across the top-level vdevs metaslab groups and round robin across all eligible metaslab groups to distribute the work. As top-levels complete their work, they receive additional work from the pool-wide allocation queue until the allocation queue is emptied. Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Alex Reece <alex@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Paul Dagnelie <paul.dagnelie@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Sebastien Roy <sebastien.roy@delphix.com> Approved by: Robert Mustacchi <rm@joyent.com> Author: George Wilson <george.wilson@delphix.com>
Notes
Notes: svn path=/head/; revision=305331
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h21
1 files changed, 14 insertions, 7 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
index 74031f15ebe9..592aea5869cb 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_H
@@ -55,14 +55,15 @@ void metaslab_sync_done(metaslab_t *, uint64_t);
void metaslab_sync_reassess(metaslab_group_t *);
uint64_t metaslab_block_maxsize(metaslab_t *);
-#define METASLAB_HINTBP_FAVOR 0x0
-#define METASLAB_HINTBP_AVOID 0x1
-#define METASLAB_GANG_HEADER 0x2
-#define METASLAB_GANG_CHILD 0x4
-#define METASLAB_GANG_AVOID 0x8
+#define METASLAB_HINTBP_FAVOR 0x0
+#define METASLAB_HINTBP_AVOID 0x1
+#define METASLAB_GANG_HEADER 0x2
+#define METASLAB_GANG_CHILD 0x4
+#define METASLAB_ASYNC_ALLOC 0x8
+#define METASLAB_DONT_THROTTLE 0x10
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
- blkptr_t *, int, uint64_t, blkptr_t *, int);
+ blkptr_t *, int, uint64_t, blkptr_t *, int, zio_t *);
void metaslab_free(spa_t *, const blkptr_t *, uint64_t, boolean_t);
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
void metaslab_check_free(spa_t *, const blkptr_t *);
@@ -73,6 +74,9 @@ int metaslab_class_validate(metaslab_class_t *);
void metaslab_class_histogram_verify(metaslab_class_t *);
uint64_t metaslab_class_fragmentation(metaslab_class_t *);
uint64_t metaslab_class_expandable_space(metaslab_class_t *);
+boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int,
+ zio_t *, int);
+void metaslab_class_throttle_unreserve(metaslab_class_t *, int, zio_t *);
void metaslab_class_space_update(metaslab_class_t *, int64_t, int64_t,
int64_t, int64_t);
@@ -86,10 +90,13 @@ metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *);
void metaslab_group_destroy(metaslab_group_t *);
void metaslab_group_activate(metaslab_group_t *);
void metaslab_group_passivate(metaslab_group_t *);
+boolean_t metaslab_group_initialized(metaslab_group_t *);
uint64_t metaslab_group_get_space(metaslab_group_t *);
void metaslab_group_histogram_verify(metaslab_group_t *);
uint64_t metaslab_group_fragmentation(metaslab_group_t *);
void metaslab_group_histogram_remove(metaslab_group_t *, metaslab_t *);
+void metaslab_group_alloc_decrement(spa_t *, uint64_t, void *, int);
+void metaslab_group_alloc_verify(spa_t *, const blkptr_t *, void *);
#ifdef __cplusplus
}