aboutsummaryrefslogtreecommitdiff
path: root/sys/cddl/contrib/opensolaris/uts/common/sys
diff options
context:
space:
mode:
authorAlexander Motin <mav@FreeBSD.org>2018-02-21 16:51:02 +0000
committerAlexander Motin <mav@FreeBSD.org>2018-02-21 16:51:02 +0000
commit24433f00ea3eec91d7a86eea1bdc1565a1287eca (patch)
tree4e697527dad9e0508ad33ca7b729325375242a73 /sys/cddl/contrib/opensolaris/uts/common/sys
parent7efc058f76fa7af45860d864f4f9cd93b2c35de4 (diff)
parent79a23a69442d5f2ba114e37737bd6e9341ce2cab (diff)
downloadsrc-24433f00ea3eec91d7a86eea1bdc1565a1287eca.tar.gz
src-24433f00ea3eec91d7a86eea1bdc1565a1287eca.zip
MFV r329502: 7614 zfs device evacuation/removal
illumos/illumos-gate@5cabbc6b49070407fb9610cfe73d4c0e0dea3e77 https://www.illumos.org/issues/7614: This project allows top-level vdevs to be removed from the storage pool with “zpool remove”, reducing the total amount of storage in the pool. This operation copies all allocated regions of the device to be removed onto other devices, recording the mapping from old to new location. After the removal is complete, read and free operations to the removed (now “indirect”) vdev must be remapped and performed at the new location on disk. The indirect mapping table is kept in memory whenever the pool is loaded, so there is minimal performance overhead when doing operations on the indirect vdev. The size of the in-memory mapping table will be reduced when its entries become “obsolete” because they are no longer used by any block pointers in the pool. An entry becomes obsolete when all the blocks that use it are freed. An entry can also become obsolete when all the snapshots that reference it are deleted, and the block pointers that reference it have been “remapped” in all filesystems/zvols (and clones). Whenever an indirect block is written, all the block pointers in it will be “remapped” to their new (concrete) locations if possible. This process can be accelerated by using the “zfs remap” command to proactively rewrite all indirect blocks that reference indirect (removed) vdevs. Note that when a device is removed, we do not verify the checksum of the data that is copied. This makes the process much faster, but if it were used on redundant vdevs (i.e. mirror or raidz vdevs), it would be possible to copy the wrong data, when we have the correct data on e.g. the other side of the mirror. Therefore, mirror and raidz devices can not be removed. Reviewed by: Alex Reece <alex@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: John Kennedy <john.kennedy@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Richard Laager <rlaager@wiktel.com> Reviewed by: Tim Chase <tim@chase2k.com> Approved by: Garrett D'Amore <garrett@damore.org> Author: Prashanth Sreenivasa <pks@delphix.com>
Notes
Notes: svn path=/head/; revision=329732
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/sys')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h32
1 files changed, 31 insertions, 1 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
index 1f65f398be2e..bc184fe54cb4 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
@@ -165,6 +165,7 @@ typedef enum {
ZFS_PROP_REDUNDANT_METADATA,
ZFS_PROP_PREV_SNAP,
ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ ZFS_PROP_REMAPTXG, /* not exposed to the user */
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -517,7 +518,9 @@ typedef struct zpool_rewind_policy {
/*
* The following are configuration names used in the nvlist describing a pool's
- * configuration.
+ * configuration. New on-disk names should be prefixed with "<reverse-DNS>:"
+ * (e.g. "org.open-zfs:") to avoid conflicting names being developed
+ * independently.
*/
#define ZPOOL_CONFIG_VERSION "version"
#define ZPOOL_CONFIG_POOL_NAME "name"
@@ -531,6 +534,9 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_CHILDREN "children"
#define ZPOOL_CONFIG_ID "id"
#define ZPOOL_CONFIG_GUID "guid"
+#define ZPOOL_CONFIG_INDIRECT_OBJECT "com.delphix:indirect_object"
+#define ZPOOL_CONFIG_INDIRECT_BIRTHS "com.delphix:indirect_births"
+#define ZPOOL_CONFIG_PREV_INDIRECT_VDEV "com.delphix:prev_indirect_vdev"
#define ZPOOL_CONFIG_PATH "path"
#define ZPOOL_CONFIG_DEVID "devid"
#define ZPOOL_CONFIG_METASLAB_ARRAY "metaslab_array"
@@ -539,7 +545,9 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_ASIZE "asize"
#define ZPOOL_CONFIG_DTL "DTL"
#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */
+#define ZPOOL_CONFIG_REMOVAL_STATS "removal_stats" /* not stored on disk */
#define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */
+#define ZPOOL_CONFIG_INDIRECT_SIZE "indirect_size" /* not stored on disk */
#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
@@ -615,6 +623,13 @@ typedef struct zpool_rewind_policy {
#define VDEV_TYPE_SPARE "spare"
#define VDEV_TYPE_LOG "log"
#define VDEV_TYPE_L2CACHE "l2cache"
+#define VDEV_TYPE_INDIRECT "indirect"
+
+/* VDEV_TOP_ZAP_* are used in top-level vdev ZAP objects. */
+#define VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM \
+ "com.delphix:indirect_obsolete_sm"
+#define VDEV_TOP_ZAP_OBSOLETE_COUNTS_ARE_PRECISE \
+ "com.delphix:obsolete_counts_are_precise"
/*
* This is needed in userland to report the minimum necessary device size.
@@ -751,6 +766,20 @@ typedef struct pool_scan_stat {
uint64_t pss_pass_scrub_spent_paused;
} pool_scan_stat_t;
+typedef struct pool_removal_stat {
+ uint64_t prs_state; /* dsl_scan_state_t */
+ uint64_t prs_removing_vdev;
+ uint64_t prs_start_time;
+ uint64_t prs_end_time;
+ uint64_t prs_to_copy; /* bytes that need to be copied */
+ uint64_t prs_copied; /* bytes copied so far */
+ /*
+ * bytes of memory used for indirect mappings.
+ * This includes all removed vdevs.
+ */
+ uint64_t prs_mapping_memory;
+} pool_removal_stat_t;
+
typedef enum dsl_scan_state {
DSS_NONE,
DSS_SCANNING,
@@ -916,6 +945,7 @@ typedef enum zfs_ioc {
ZFS_IOC_NEXTBOOT,
#endif
ZFS_IOC_CHANNEL_PROGRAM,
+ ZFS_IOC_REMAP,
ZFS_IOC_LAST
} zfs_ioc_t;