diff options
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c | 109 |
1 files changed, 64 insertions, 45 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c index d993d2aec8e1..d66fa4ef822f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c @@ -33,15 +33,15 @@ * 1. Uniquely identify this device as part of a ZFS pool and confirm its * identity within the pool. * - * 2. Verify that all the devices given in a configuration are present + * 2. Verify that all the devices given in a configuration are present * within the pool. * - * 3. Determine the uberblock for the pool. + * 3. Determine the uberblock for the pool. * - * 4. In case of an import operation, determine the configuration of the + * 4. In case of an import operation, determine the configuration of the * toplevel vdev of which it is a part. * - * 5. If an import operation cannot find all the devices in the pool, + * 5. If an import operation cannot find all the devices in the pool, * provide enough information to the administrator to determine which * devices are missing. * @@ -77,9 +77,9 @@ * In order to identify which labels are valid, the labels are written in the * following manner: * - * 1. For each vdev, update 'L1' to the new label - * 2. Update the uberblock - * 3. For each vdev, update 'L2' to the new label + * 1. For each vdev, update 'L1' to the new label + * 2. Update the uberblock + * 3. For each vdev, update 'L2' to the new label * * Given arbitrary failure, we can determine the correct label to use based on * the transaction group. If we fail after updating L1 but before updating the @@ -117,19 +117,19 @@ * * The nvlist describing the pool and vdev contains the following elements: * - * version ZFS on-disk version - * name Pool name - * state Pool state - * txg Transaction group in which this label was written - * pool_guid Unique identifier for this pool - * vdev_tree An nvlist describing vdev tree. + * version ZFS on-disk version + * name Pool name + * state Pool state + * txg Transaction group in which this label was written + * pool_guid Unique identifier for this pool + * vdev_tree An nvlist describing vdev tree. * features_for_read * An nvlist of the features necessary for reading the MOS. * * Each leaf device label also contains the following: * - * top_guid Unique ID for top-level vdev in which this is contained - * guid Unique ID for the leaf vdev + * top_guid Unique ID for top-level vdev in which this is contained + * guid Unique ID for the leaf vdev * * The 'vs' configuration follows the format described in 'spa_config.c'. */ @@ -396,22 +396,33 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, * histograms. */ uint64_t seg_count = 0; + uint64_t to_alloc = vd->vdev_stat.vs_alloc; /* * There are the same number of allocated segments * as free segments, so we will have at least one - * entry per free segment. + * entry per free segment. However, small free + * segments (smaller than vdev_removal_max_span) + * will be combined with adjacent allocated segments + * as a single mapping. */ for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { - seg_count += vd->vdev_mg->mg_histogram[i]; + if (1ULL << (i + 1) < vdev_removal_max_span) { + to_alloc += + vd->vdev_mg->mg_histogram[i] << + i + 1; + } else { + seg_count += + vd->vdev_mg->mg_histogram[i]; + } } /* - * The maximum length of a mapping is SPA_MAXBLOCKSIZE, - * so we need at least one entry per SPA_MAXBLOCKSIZE - * of allocated data. + * The maximum length of a mapping is + * zfs_remove_max_segment, so we need at least one entry + * per zfs_remove_max_segment of allocated data. */ - seg_count += vd->vdev_stat.vs_alloc / SPA_MAXBLOCKSIZE; + seg_count += to_alloc / zfs_remove_max_segment; fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_SIZE, seg_count * @@ -546,6 +557,7 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg) abd_t *vp_abd; zio_t *zio; uint64_t best_txg = 0; + uint64_t label_txg = 0; int error = 0; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; @@ -571,8 +583,6 @@ retry: if (zio_wait(zio) == 0 && nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist), &label, 0) == 0) { - uint64_t label_txg = 0; - /* * Auxiliary vdevs won't have txg values in their * labels and newly added vdevs may not have been @@ -603,6 +613,15 @@ retry: goto retry; } + /* + * We found a valid label but it didn't pass txg restrictions. + */ + if (config == NULL && label_txg != 0) { + vdev_dbgmsg(vd, "label discarded as txg is too large " + "(%llu > %llu)", (u_longlong_t)label_txg, + (u_longlong_t)txg); + } + abd_free(vp_abd); return (config); @@ -1028,19 +1047,13 @@ retry: * among uberblocks with equal txg, choose the one with the latest timestamp. */ static int -vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) +vdev_uberblock_compare(const uberblock_t *ub1, const uberblock_t *ub2) { - if (ub1->ub_txg < ub2->ub_txg) - return (-1); - if (ub1->ub_txg > ub2->ub_txg) - return (1); - - if (ub1->ub_timestamp < ub2->ub_timestamp) - return (-1); - if (ub1->ub_timestamp > ub2->ub_timestamp) - return (1); + int cmp = AVL_CMP(ub1->ub_txg, ub2->ub_txg); + if (likely(cmp)) + return (cmp); - return (0); + return (AVL_CMP(ub1->ub_timestamp, ub2->ub_timestamp)); } struct ubl_cbdata { @@ -1167,10 +1180,13 @@ vdev_uberblock_sync_done(zio_t *zio) * Write the uberblock to all labels of all leaves of the specified vdev. */ static void -vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) +vdev_uberblock_sync(zio_t *zio, uint64_t *good_writes, + uberblock_t *ub, vdev_t *vd, int flags) { - for (uint64_t c = 0; c < vd->vdev_children; c++) - vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags); + for (uint64_t c = 0; c < vd->vdev_children; c++) { + vdev_uberblock_sync(zio, good_writes, + ub, vd->vdev_child[c], flags); + } if (!vd->vdev_ops->vdev_op_leaf) return; @@ -1188,7 +1204,7 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) for (int l = 0; l < VDEV_LABELS; l++) vdev_label_write(zio, vd, l, ub_abd, VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd), - vdev_uberblock_sync_done, zio->io_private, + vdev_uberblock_sync_done, good_writes, flags | ZIO_FLAG_DONT_PROPAGATE); abd_free(ub_abd); @@ -1202,10 +1218,10 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) zio_t *zio; uint64_t good_writes = 0; - zio = zio_root(spa, NULL, &good_writes, flags); + zio = zio_root(spa, NULL, NULL, flags); for (int v = 0; v < svdcount; v++) - vdev_uberblock_sync(zio, ub, svd[v], flags); + vdev_uberblock_sync(zio, &good_writes, ub, svd[v], flags); (void) zio_wait(zio); @@ -1266,7 +1282,8 @@ vdev_label_sync_ignore_done(zio_t *zio) * Write all even or odd labels to all leaves of the specified vdev. */ static void -vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) +vdev_label_sync(zio_t *zio, uint64_t *good_writes, + vdev_t *vd, int l, uint64_t txg, int flags) { nvlist_t *label; vdev_phys_t *vp; @@ -1274,8 +1291,10 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) char *buf; size_t buflen; - for (int c = 0; c < vd->vdev_children; c++) - vdev_label_sync(zio, vd->vdev_child[c], l, txg, flags); + for (int c = 0; c < vd->vdev_children; c++) { + vdev_label_sync(zio, good_writes, + vd->vdev_child[c], l, txg, flags); + } if (!vd->vdev_ops->vdev_op_leaf) return; @@ -1300,7 +1319,7 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) vdev_label_write(zio, vd, l, vp_abd, offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t), - vdev_label_sync_done, zio->io_private, + vdev_label_sync_done, good_writes, flags | ZIO_FLAG_DONT_PROPAGATE); } } @@ -1332,7 +1351,7 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) (vd->vdev_islog || vd->vdev_aux != NULL) ? vdev_label_sync_ignore_done : vdev_label_sync_top_done, good_writes, flags); - vdev_label_sync(vio, vd, l, txg, flags); + vdev_label_sync(vio, good_writes, vd, l, txg, flags); zio_nowait(vio); } |