diff options
author | Alexander Motin <mav@FreeBSD.org> | 2018-02-21 16:51:02 +0000 |
---|---|---|
committer | Alexander Motin <mav@FreeBSD.org> | 2018-02-21 16:51:02 +0000 |
commit | 24433f00ea3eec91d7a86eea1bdc1565a1287eca (patch) | |
tree | 4e697527dad9e0508ad33ca7b729325375242a73 /sys/cddl/contrib/opensolaris/uts/common/sys | |
parent | 7efc058f76fa7af45860d864f4f9cd93b2c35de4 (diff) | |
parent | 79a23a69442d5f2ba114e37737bd6e9341ce2cab (diff) | |
download | src-24433f00ea3eec91d7a86eea1bdc1565a1287eca.tar.gz src-24433f00ea3eec91d7a86eea1bdc1565a1287eca.zip |
MFV r329502: 7614 zfs device evacuation/removal
illumos/illumos-gate@5cabbc6b49070407fb9610cfe73d4c0e0dea3e77
https://www.illumos.org/issues/7614:
This project allows top-level vdevs to be removed from the storage pool with
“zpool remove”, reducing the total amount of storage in the pool. This
operation copies all allocated regions of the device to be removed onto other
devices, recording the mapping from old to new location. After the removal is
complete, read and free operations to the removed (now “indirect”) vdev must
be remapped and performed at the new location on disk. The indirect mapping
table is kept in memory whenever the pool is loaded, so there is minimal
performance overhead when doing operations on the indirect vdev.
The size of the in-memory mapping table will be reduced when its entries
become “obsolete” because they are no longer used by any block pointers in
the pool. An entry becomes obsolete when all the blocks that use it are
freed. An entry can also become obsolete when all the snapshots that
reference it are deleted, and the block pointers that reference it have been
“remapped” in all filesystems/zvols (and clones). Whenever an indirect block
is written, all the block pointers in it will be “remapped” to their new
(concrete) locations if possible. This process can be accelerated by using
the “zfs remap” command to proactively rewrite all indirect blocks that
reference indirect (removed) vdevs.
Note that when a device is removed, we do not verify the checksum of the data
that is copied. This makes the process much faster, but if it were used on
redundant vdevs (i.e. mirror or raidz vdevs), it would be possible to copy
the wrong data, when we have the correct data on e.g. the other side of the
mirror. Therefore, mirror and raidz devices can not be removed.
Reviewed by: Alex Reece <alex@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: John Kennedy <john.kennedy@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Richard Laager <rlaager@wiktel.com>
Reviewed by: Tim Chase <tim@chase2k.com>
Approved by: Garrett D'Amore <garrett@damore.org>
Author: Prashanth Sreenivasa <pks@delphix.com>
Notes
Notes:
svn path=/head/; revision=329732
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/sys')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h index 1f65f398be2e..bc184fe54cb4 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h @@ -165,6 +165,7 @@ typedef enum { ZFS_PROP_REDUNDANT_METADATA, ZFS_PROP_PREV_SNAP, ZFS_PROP_RECEIVE_RESUME_TOKEN, + ZFS_PROP_REMAPTXG, /* not exposed to the user */ ZFS_NUM_PROPS } zfs_prop_t; @@ -517,7 +518,9 @@ typedef struct zpool_rewind_policy { /* * The following are configuration names used in the nvlist describing a pool's - * configuration. + * configuration. New on-disk names should be prefixed with "<reverse-DNS>:" + * (e.g. "org.open-zfs:") to avoid conflicting names being developed + * independently. */ #define ZPOOL_CONFIG_VERSION "version" #define ZPOOL_CONFIG_POOL_NAME "name" @@ -531,6 +534,9 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_CHILDREN "children" #define ZPOOL_CONFIG_ID "id" #define ZPOOL_CONFIG_GUID "guid" +#define ZPOOL_CONFIG_INDIRECT_OBJECT "com.delphix:indirect_object" +#define ZPOOL_CONFIG_INDIRECT_BIRTHS "com.delphix:indirect_births" +#define ZPOOL_CONFIG_PREV_INDIRECT_VDEV "com.delphix:prev_indirect_vdev" #define ZPOOL_CONFIG_PATH "path" #define ZPOOL_CONFIG_DEVID "devid" #define ZPOOL_CONFIG_METASLAB_ARRAY "metaslab_array" @@ -539,7 +545,9 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_ASIZE "asize" #define ZPOOL_CONFIG_DTL "DTL" #define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */ +#define ZPOOL_CONFIG_REMOVAL_STATS "removal_stats" /* not stored on disk */ #define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */ +#define ZPOOL_CONFIG_INDIRECT_SIZE "indirect_size" /* not stored on disk */ #define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" #define ZPOOL_CONFIG_ERRCOUNT "error_count" #define ZPOOL_CONFIG_NOT_PRESENT "not_present" @@ -615,6 +623,13 @@ typedef struct zpool_rewind_policy { #define VDEV_TYPE_SPARE "spare" #define VDEV_TYPE_LOG "log" #define VDEV_TYPE_L2CACHE "l2cache" +#define VDEV_TYPE_INDIRECT "indirect" + +/* VDEV_TOP_ZAP_* are used in top-level vdev ZAP objects. */ +#define VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM \ + "com.delphix:indirect_obsolete_sm" +#define VDEV_TOP_ZAP_OBSOLETE_COUNTS_ARE_PRECISE \ + "com.delphix:obsolete_counts_are_precise" /* * This is needed in userland to report the minimum necessary device size. @@ -751,6 +766,20 @@ typedef struct pool_scan_stat { uint64_t pss_pass_scrub_spent_paused; } pool_scan_stat_t; +typedef struct pool_removal_stat { + uint64_t prs_state; /* dsl_scan_state_t */ + uint64_t prs_removing_vdev; + uint64_t prs_start_time; + uint64_t prs_end_time; + uint64_t prs_to_copy; /* bytes that need to be copied */ + uint64_t prs_copied; /* bytes copied so far */ + /* + * bytes of memory used for indirect mappings. + * This includes all removed vdevs. + */ + uint64_t prs_mapping_memory; +} pool_removal_stat_t; + typedef enum dsl_scan_state { DSS_NONE, DSS_SCANNING, @@ -916,6 +945,7 @@ typedef enum zfs_ioc { ZFS_IOC_NEXTBOOT, #endif ZFS_IOC_CHANNEL_PROGRAM, + ZFS_IOC_REMAP, ZFS_IOC_LAST } zfs_ioc_t; |