diff options
Diffstat (limited to 'sys/contrib/openzfs/cmd')
32 files changed, 1519 insertions, 589 deletions
diff --git a/sys/contrib/openzfs/cmd/Makefile.am b/sys/contrib/openzfs/cmd/Makefile.am index 96040976e53e..ca94f6b77e06 100644 --- a/sys/contrib/openzfs/cmd/Makefile.am +++ b/sys/contrib/openzfs/cmd/Makefile.am @@ -98,17 +98,16 @@ endif if USING_PYTHON -bin_SCRIPTS += arc_summary arcstat dbufstat zilstat -CLEANFILES += arc_summary arcstat dbufstat zilstat -dist_noinst_DATA += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in %D%/zilstat.in +bin_SCRIPTS += zarcsummary zarcstat dbufstat zilstat +CLEANFILES += zarcsummary zarcstat dbufstat zilstat +dist_noinst_DATA += %D%/zarcsummary %D%/zarcstat.in %D%/dbufstat.in %D%/zilstat.in -$(call SUBST,arcstat,%D%/) +$(call SUBST,zarcstat,%D%/) $(call SUBST,dbufstat,%D%/) $(call SUBST,zilstat,%D%/) -arc_summary: %D%/arc_summary +zarcsummary: %D%/zarcsummary $(AM_V_at)cp $< $@ endif - PHONY += cmd cmd: $(bin_SCRIPTS) $(bin_PROGRAMS) $(sbin_SCRIPTS) $(sbin_PROGRAMS) $(dist_bin_SCRIPTS) $(zfsexec_PROGRAMS) $(mounthelper_PROGRAMS) diff --git a/sys/contrib/openzfs/cmd/arcstat.in b/sys/contrib/openzfs/cmd/zarcstat.in index 6f9abb39c3fb..8ffd20481166 100755 --- a/sys/contrib/openzfs/cmd/arcstat.in +++ b/sys/contrib/openzfs/cmd/zarcstat.in @@ -2,7 +2,7 @@ # SPDX-License-Identifier: CDDL-1.0 # # Print out ZFS ARC Statistics exported via kstat(1) -# For a definition of fields, or usage, use arcstat -v +# For a definition of fields, or usage, use zarcstat -v # # This script was originally a fork of the original arcstat.pl (0.1) # by Neelakanth Nadgir, originally published on his Sun blog on @@ -56,6 +56,7 @@ import time import getopt import re import copy +import os from signal import signal, SIGINT, SIGWINCH, SIG_DFL @@ -171,7 +172,7 @@ cols = { "zactive": [7, 1000, "zfetch prefetches active per second"], } -# ARC structural breakdown from arc_summary +# ARC structural breakdown from zarcsummary structfields = { "cmp": ["compressed", "Compressed"], "ovh": ["overhead", "Overhead"], @@ -187,7 +188,7 @@ structstats = { # size stats "sz": ["_size", "size"], } -# ARC types breakdown from arc_summary +# ARC types breakdown from zarcsummary typefields = { "data": ["data", "ARC data"], "meta": ["metadata", "ARC metadata"], @@ -198,7 +199,7 @@ typestats = { # size stats "sz": ["_size", "size"], } -# ARC states breakdown from arc_summary +# ARC states breakdown from zarcsummary statefields = { "ano": ["anon", "Anonymous"], "mfu": ["mfu", "MFU"], @@ -261,7 +262,7 @@ hdr_intr = 20 # Print header every 20 lines of output opfile = None sep = " " # Default separator is 2 spaces l2exist = False -cmd = ("Usage: arcstat [-havxp] [-f fields] [-o file] [-s string] [interval " +cmd = ("Usage: zarcstat [-havxp] [-f fields] [-o file] [-s string] [interval " "[count]]\n") cur = {} d = {} @@ -348,10 +349,10 @@ def usage(): "character or string\n") sys.stderr.write("\t -p : Disable auto-scaling of numerical fields\n") sys.stderr.write("\nExamples:\n") - sys.stderr.write("\tarcstat -o /tmp/a.log 2 10\n") - sys.stderr.write("\tarcstat -s \",\" -o /tmp/a.log 2 10\n") - sys.stderr.write("\tarcstat -v\n") - sys.stderr.write("\tarcstat -f time,hit%,dh%,ph%,mh% 1\n") + sys.stderr.write("\tzarcstat -o /tmp/a.log 2 10\n") + sys.stderr.write("\tzarcstat -s \",\" -o /tmp/a.log 2 10\n") + sys.stderr.write("\tzarcstat -v\n") + sys.stderr.write("\tzarcstat -f time,hit%,dh%,ph%,mh% 1\n") sys.stderr.write("\n") sys.exit(1) @@ -366,7 +367,7 @@ def snap_stats(): cur = kstat - # fill in additional values from arc_summary + # fill in additional values from zarcsummary cur["caches_size"] = caches_size = cur["anon_data"]+cur["anon_metadata"]+\ cur["mfu_data"]+cur["mfu_metadata"]+cur["mru_data"]+cur["mru_metadata"]+\ cur["uncached_data"]+cur["uncached_metadata"] @@ -766,6 +767,7 @@ def calculate(): def main(): + global sint global count global hdr_intr diff --git a/sys/contrib/openzfs/cmd/arc_summary b/sys/contrib/openzfs/cmd/zarcsummary index c1319573220c..24a129d9ca70 100755 --- a/sys/contrib/openzfs/cmd/arc_summary +++ b/sys/contrib/openzfs/cmd/zarcsummary @@ -34,7 +34,7 @@ Provides basic information on the ARC, its efficiency, the L2ARC (if present), the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See the in-source documentation and code at https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details. -The original introduction to arc_summary can be found at +The original introduction to zarcsummary can be found at http://cuddletech.com/?p=454 """ @@ -161,7 +161,7 @@ elif sys.platform.startswith('linux'): return get_params(TUNABLES_PATH) def get_version_impl(request): - # The original arc_summary called /sbin/modinfo/{spl,zfs} to get + # The original zarcsummary called /sbin/modinfo/{spl,zfs} to get # the version information. We switch to /sys/module/{spl,zfs}/version # to make sure we get what is really loaded in the kernel try: @@ -439,7 +439,7 @@ def print_header(): """ # datetime is now recommended over time but we keep the exact formatting - # from the older version of arc_summary in case there are scripts + # from the older version of zarcsummary in case there are scripts # that expect it in this way daydate = time.strftime(DATE_FORMAT) spc_date = LINE_LENGTH-len(daydate) @@ -559,6 +559,7 @@ def section_arc(kstats_dict): print() compressed_size = arc_stats['compressed_size'] + uncompressed_size = arc_stats['uncompressed_size'] overhead_size = arc_stats['overhead_size'] bonus_size = arc_stats['bonus_size'] dnode_size = arc_stats['dnode_size'] @@ -671,6 +672,8 @@ def section_arc(kstats_dict): print() print('ARC misc:') + prt_i2('Uncompressed size:', f_perc(uncompressed_size, compressed_size), + f_bytes(uncompressed_size)) prt_i1('Memory throttles:', arc_stats['memory_throttle_count']) prt_i1('Memory direct reclaims:', arc_stats['memory_direct_count']) prt_i1('Memory indirect reclaims:', arc_stats['memory_indirect_count']) diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 45eb9c783659..2560ad045db3 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -106,8 +106,14 @@ extern boolean_t spa_mode_readable_spacemaps; extern uint_t zfs_reconstruct_indirect_combinations_max; extern uint_t zfs_btree_verify_intensity; +enum { + ARG_ALLOCATED = 256, + ARG_BLOCK_BIN_MODE, + ARG_BLOCK_CLASSES, +}; + static const char cmdname[] = "zdb"; -uint8_t dump_opt[256]; +uint8_t dump_opt[512]; typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); @@ -127,6 +133,21 @@ static zfs_range_tree_t *mos_refd_objs; static spa_t *spa; static objset_t *os; static boolean_t kernel_init_done; +static boolean_t corruption_found = B_FALSE; + +static enum { + BIN_AUTO = 0, + BIN_PSIZE, + BIN_LSIZE, + BIN_ASIZE, +} block_bin_mode = BIN_AUTO; + +static enum { + CLASS_NORMAL = 1 << 1, + CLASS_SPECIAL = 1 << 2, + CLASS_DEDUP = 1 << 3, + CLASS_OTHER = 1 << 4, +} block_classes = 0; static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, boolean_t); @@ -176,7 +197,7 @@ static int sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free, dmu_tx_t *tx) { - ASSERT3P(tx, ==, NULL); + ASSERT0P(tx); struct sublivelist_verify *sv = arg; sublivelist_verify_block_refcnt_t current = { .svbr_blk = *bp, @@ -208,7 +229,7 @@ sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free, sublivelist_verify_block_t svb = { .svb_dva = bp->blk_dva[i], .svb_allocated_txg = - BP_GET_LOGICAL_BIRTH(bp) + BP_GET_BIRTH(bp) }; if (zfs_btree_find(&sv->sv_leftover, &svb, @@ -250,6 +271,7 @@ sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle) &e->svbr_blk, B_TRUE); (void) printf("\tERROR: %d unmatched FREE(s): %s\n", e->svbr_refcnt, blkbuf); + corruption_found = B_TRUE; } zfs_btree_destroy(&sv->sv_pair); @@ -381,7 +403,7 @@ verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg, sublivelist_verify_block_t svb = {{{0}}}; DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid); DVA_SET_OFFSET(&svb.svb_dva, offset); - DVA_SET_ASIZE(&svb.svb_dva, size); + DVA_SET_ASIZE(&svb.svb_dva, 0); zfs_btree_index_t where; uint64_t end_offset = offset + size; @@ -405,6 +427,7 @@ verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg, (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva), (u_longlong_t)found->svb_allocated_txg, (u_longlong_t)txg); + corruption_found = B_TRUE; } } } @@ -426,6 +449,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) (u_longlong_t)txg, (u_longlong_t)offset, (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, (u_longlong_t)mv->mv_msid); + corruption_found = B_TRUE; } else { zfs_range_tree_add(mv->mv_allocated, offset, size); @@ -439,6 +463,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) (u_longlong_t)txg, (u_longlong_t)offset, (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, (u_longlong_t)mv->mv_msid); + corruption_found = B_TRUE; } else { zfs_range_tree_remove(mv->mv_allocated, offset, size); @@ -526,6 +551,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); + corruption_found = B_TRUE; continue; } @@ -542,6 +568,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); + corruption_found = B_TRUE; continue; } @@ -619,8 +646,9 @@ livelist_metaslab_validate(spa_t *spa) metaslab_calculate_range_tree_type(vd, m, &start, &shift); metaslab_verify_t mv; - mv.mv_allocated = zfs_range_tree_create(NULL, - type, NULL, start, shift); + mv.mv_allocated = zfs_range_tree_create_flags( + NULL, type, NULL, start, shift, + 0, "livelist_metaslab_validate:mv_allocated"); mv.mv_vdid = vd->vdev_id; mv.mv_msid = m->ms_id; mv.mv_start = m->ms_start; @@ -654,6 +682,7 @@ livelist_metaslab_validate(spa_t *spa) } (void) printf("ERROR: Found livelist blocks marked as allocated " "for indirect vdevs:\n"); + corruption_found = B_TRUE; zfs_btree_index_t *where = NULL; sublivelist_verify_block_t *svb; @@ -738,6 +767,12 @@ usage(void) (void) fprintf(stderr, " Options to control amount of output:\n"); (void) fprintf(stderr, " -b --block-stats " "block statistics\n"); + (void) fprintf(stderr, " --bin=(lsize|psize|asize) " + "bin blocks based on this size in all three columns\n"); + (void) fprintf(stderr, + " --class=(normal|special|dedup|other)[,...]\n" + " only consider blocks from " + "these allocation classes\n"); (void) fprintf(stderr, " -B --backup " "backup stream\n"); (void) fprintf(stderr, " -c --checksum " @@ -797,8 +832,8 @@ usage(void) "[default is 200]\n"); (void) fprintf(stderr, " -K --key=KEY " "decryption key for encrypted dataset\n"); - (void) fprintf(stderr, " -o --option=\"OPTION=INTEGER\" " - "set global variable to an unsigned 32-bit integer\n"); + (void) fprintf(stderr, " -o --option=\"NAME=VALUE\" " + "set the named tunable to the given value\n"); (void) fprintf(stderr, " -p --path==PATH " "use one or more with -e to specify path to vdev dir\n"); (void) fprintf(stderr, " -P --parseable " @@ -826,7 +861,7 @@ usage(void) (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " "to make only that option verbose\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); - zdb_exit(1); + zdb_exit(2); } static void @@ -891,9 +926,9 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) size_t nvsize = *(uint64_t *)data; char *packed = umem_alloc(nvsize, UMEM_NOFAIL); - VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); + VERIFY0(dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); - VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); + VERIFY0(nvlist_unpack(packed, nvsize, &nv, 0)); umem_free(packed, nvsize); @@ -1454,8 +1489,8 @@ get_obsolete_refcount(vdev_t *vd) refcount++; } } else { - ASSERT3P(vd->vdev_obsolete_sm, ==, NULL); - ASSERT3U(obsolete_sm_object, ==, 0); + ASSERT0P(vd->vdev_obsolete_sm); + ASSERT0(obsolete_sm_object); } for (unsigned c = 0; c < vd->vdev_children; c++) { refcount += get_obsolete_refcount(vd->vdev_child[c]); @@ -1577,9 +1612,8 @@ dump_spacemap(objset_t *os, space_map_t *sm) continue; } - uint8_t words; char entry_type; - uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID; + uint64_t entry_off, entry_run, entry_vdev; if (sm_entry_is_single_word(word)) { entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? @@ -1587,35 +1621,43 @@ dump_spacemap(objset_t *os, space_map_t *sm) entry_off = (SM_OFFSET_DECODE(word) << mapshift) + sm->sm_start; entry_run = SM_RUN_DECODE(word) << mapshift; - words = 1; + + (void) printf("\t [%6llu] %c " + "range: %012llx-%012llx size: %08llx\n", + (u_longlong_t)entry_id, entry_type, + (u_longlong_t)entry_off, + (u_longlong_t)(entry_off + entry_run - 1), + (u_longlong_t)entry_run); } else { /* it is a two-word entry so we read another word */ ASSERT(sm_entry_is_double_word(word)); uint64_t extra_word; offset += sizeof (extra_word); + ASSERT3U(offset, <, space_map_length(sm)); VERIFY0(dmu_read(os, space_map_object(sm), offset, sizeof (extra_word), &extra_word, DMU_READ_PREFETCH)); - ASSERT3U(offset, <=, space_map_length(sm)); - entry_run = SM2_RUN_DECODE(word) << mapshift; entry_vdev = SM2_VDEV_DECODE(word); entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ? 'A' : 'F'; entry_off = (SM2_OFFSET_DECODE(extra_word) << mapshift) + sm->sm_start; - words = 2; - } - (void) printf("\t [%6llu] %c range:" - " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n", - (u_longlong_t)entry_id, - entry_type, (u_longlong_t)entry_off, - (u_longlong_t)(entry_off + entry_run), - (u_longlong_t)entry_run, - (u_longlong_t)entry_vdev, words); + if (zopt_metaslab_args == 0 || + zopt_metaslab[0] == entry_vdev) { + (void) printf("\t [%6llu] %c " + "range: %012llx-%012llx size: %08llx " + "vdev: %llu\n", + (u_longlong_t)entry_id, entry_type, + (u_longlong_t)entry_off, + (u_longlong_t)(entry_off + entry_run - 1), + (u_longlong_t)entry_run, + (u_longlong_t)entry_vdev); + } + } if (entry_type == 'A') alloc += entry_run; @@ -1651,6 +1693,16 @@ dump_metaslab_stats(metaslab_t *msp) } static void +dump_allocated(void *arg, uint64_t start, uint64_t size) +{ + uint64_t *off = arg; + if (*off != start) + (void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", *off, + start - *off); + *off = start + size; +} + +static void dump_metaslab(metaslab_t *msp) { vdev_t *vd = msp->ms_group->mg_vd; @@ -1666,13 +1718,24 @@ dump_metaslab(metaslab_t *msp) (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, (u_longlong_t)space_map_object(sm), freebuf); - if (dump_opt['m'] > 2 && !dump_opt['L']) { + if (dump_opt[ARG_ALLOCATED] || + (dump_opt['m'] > 2 && !dump_opt['L'])) { mutex_enter(&msp->ms_lock); VERIFY0(metaslab_load(msp)); + } + + if (dump_opt['m'] > 2 && !dump_opt['L']) { zfs_range_tree_stat_verify(msp->ms_allocatable); dump_metaslab_stats(msp); - metaslab_unload(msp); - mutex_exit(&msp->ms_lock); + } + + if (dump_opt[ARG_ALLOCATED]) { + uint64_t off = msp->ms_start; + zfs_range_tree_walk(msp->ms_allocatable, dump_allocated, + &off); + if (off != msp->ms_start + msp->ms_size) + (void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", off, + msp->ms_size - off); } if (dump_opt['m'] > 1 && sm != NULL && @@ -1687,6 +1750,12 @@ dump_metaslab(metaslab_t *msp) SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); } + if (dump_opt[ARG_ALLOCATED] || + (dump_opt['m'] > 2 && !dump_opt['L'])) { + metaslab_unload(msp); + mutex_exit(&msp->ms_lock); + } + if (vd->vdev_ops == &vdev_draid_ops) ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift); else @@ -1723,8 +1792,9 @@ print_vdev_metaslab_header(vdev_t *vd) } } - (void) printf("\tvdev %10llu %s", - (u_longlong_t)vd->vdev_id, bias_str); + (void) printf("\tvdev %10llu\t%s metaslab shift %4llu", + (u_longlong_t)vd->vdev_id, bias_str, + (u_longlong_t)vd->vdev_ms_shift); if (ms_flush_data_obj != 0) { (void) printf(" ms_unflushed_phys object %llu", @@ -1791,7 +1861,7 @@ print_vdev_indirect(vdev_t *vd) vdev_indirect_births_t *vib = vd->vdev_indirect_births; if (vim == NULL) { - ASSERT3P(vib, ==, NULL); + ASSERT0P(vib); return; } @@ -1864,7 +1934,7 @@ dump_metaslabs(spa_t *spa) (void) printf("\nMetaslabs:\n"); - if (!dump_opt['d'] && zopt_metaslab_args > 0) { + if (zopt_metaslab_args > 0) { c = zopt_metaslab[0]; if (c >= children) @@ -1991,7 +2061,7 @@ dump_ddt_log(ddt_t *ddt) c += strlcpy(&flagstr[c], " UNKNOWN", sizeof (flagstr) - c); flagstr[1] = '['; - flagstr[c++] = ']'; + flagstr[c] = ']'; } uint64_t count = avl_numnodes(&ddl->ddl_tree); @@ -2042,10 +2112,10 @@ dump_ddt_object(ddt_t *ddt, ddt_type_t type, ddt_class_t class) if (error == ENOENT) return; - ASSERT(error == 0); + ASSERT0(error); error = ddt_object_count(ddt, type, class, &count); - ASSERT(error == 0); + ASSERT0(error); if (count == 0) return; @@ -2568,7 +2638,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, (u_longlong_t)BP_GET_PSIZE(bp), (u_longlong_t)BP_GET_FILL(bp), (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp), - (u_longlong_t)BP_GET_BIRTH(bp)); + (u_longlong_t)BP_GET_PHYSICAL_BIRTH(bp)); if (bp_freed) (void) snprintf(blkbuf + strlen(blkbuf), buflen - strlen(blkbuf), " %s", "FREE"); @@ -2582,19 +2652,17 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, } } -static void +static u_longlong_t print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp) { char blkbuf[BP_SPRINTF_LEN]; + u_longlong_t offset; int l; - if (!BP_IS_EMBEDDED(bp)) { - ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); - } + offset = (u_longlong_t)blkid2offset(dnp, bp, zb); - (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); + (void) printf("%16llx ", offset); ASSERT(zb->zb_level >= 0); @@ -2609,19 +2677,38 @@ print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE); if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD) snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp); - (void) printf("%s\n", blkbuf); + (void) printf("%s", blkbuf); + + if (!BP_IS_EMBEDDED(bp)) { + if (BP_GET_TYPE(bp) != dnp->dn_type) { + (void) printf(" (ERROR: Block pointer type " + "(%llu) does not match dnode type (%hhu))", + BP_GET_TYPE(bp), dnp->dn_type); + corruption_found = B_TRUE; + } + if (BP_GET_LEVEL(bp) != zb->zb_level) { + (void) printf(" (ERROR: Block pointer level " + "(%llu) does not match bookmark level (%lld))", + BP_GET_LEVEL(bp), (longlong_t)zb->zb_level); + corruption_found = B_TRUE; + } + } + (void) printf("\n"); + + return (offset); } static int visit_indirect(spa_t *spa, const dnode_phys_t *dnp, blkptr_t *bp, const zbookmark_phys_t *zb) { + u_longlong_t offset; int err = 0; - if (BP_GET_LOGICAL_BIRTH(bp) == 0) + if (BP_GET_BIRTH(bp) == 0) return (0); - print_indirect(spa, bp, zb, dnp); + offset = print_indirect(spa, bp, zb, dnp); if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { arc_flags_t flags = ARC_FLAG_WAIT; @@ -2651,8 +2738,15 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, break; fill += BP_GET_FILL(cbp); } - if (!err) - ASSERT3U(fill, ==, BP_GET_FILL(bp)); + if (!err) { + if (fill != BP_GET_FILL(bp)) { + (void) printf("%16llx: Block pointer " + "fill (%llu) does not match calculated " + "value (%llu)\n", offset, BP_GET_FILL(bp), + (u_longlong_t)fill); + corruption_found = B_TRUE; + } + } arc_buf_destroy(buf, &buf); } @@ -2806,7 +2900,7 @@ dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) (void) arg, (void) tx; char blkbuf[BP_SPRINTF_LEN]; - if (BP_GET_LOGICAL_BIRTH(bp) != 0) { + if (BP_GET_BIRTH(bp) != 0) { snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("\t%s\n", blkbuf); } @@ -2847,7 +2941,7 @@ dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) (void) arg, (void) tx; char blkbuf[BP_SPRINTF_LEN]; - ASSERT(BP_GET_LOGICAL_BIRTH(bp) != 0); + ASSERT(BP_GET_BIRTH(bp) != 0); snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed); (void) printf("\t%s\n", blkbuf); return (0); @@ -2908,6 +3002,7 @@ dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) (void) printf("ERROR %u while trying to open " "subobj id %llu\n", error, (u_longlong_t)subobj); + corruption_found = B_TRUE; continue; } dump_full_bpobj(&subbpo, "subobj", indent + 1); @@ -3087,6 +3182,7 @@ bpobj_count_refd(bpobj_t *bpo) (void) printf("ERROR %u while trying to open " "subobj id %llu\n", error, (u_longlong_t)subobj); + corruption_found = B_TRUE; continue; } bpobj_count_refd(&subbpo); @@ -3108,7 +3204,7 @@ dsl_deadlist_entry_count_refd(void *arg, dsl_deadlist_entry_t *dle) static int dsl_deadlist_entry_dump(void *arg, dsl_deadlist_entry_t *dle) { - ASSERT(arg == NULL); + ASSERT0P(arg); if (dump_opt['d'] >= 5) { char buf[128]; (void) snprintf(buf, sizeof (buf), @@ -3229,6 +3325,7 @@ zdb_derive_key(dsl_dir_t *dd, uint8_t *key_out) uint64_t keyformat, salt, iters; int i; unsigned char c; + FILE *f; VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), sizeof (uint64_t), @@ -3261,6 +3358,25 @@ zdb_derive_key(dsl_dir_t *dd, uint8_t *key_out) break; + case ZFS_KEYFORMAT_RAW: + if ((f = fopen(key_material, "r")) == NULL) + return (B_FALSE); + + if (fread(key_out, 1, WRAPPING_KEY_LEN, f) != + WRAPPING_KEY_LEN) { + (void) fclose(f); + return (B_FALSE); + } + + /* Check the key length */ + if (fgetc(f) != EOF) { + (void) fclose(f); + return (B_FALSE); + } + + (void) fclose(f); + break; + default: fatal("no support for key format %u\n", (unsigned int) keyformat); @@ -3346,7 +3462,7 @@ open_objset(const char *path, const void *tag, objset_t **osp) uint64_t sa_attrs = 0; uint64_t version = 0; - VERIFY3P(sa_os, ==, NULL); + VERIFY0P(sa_os); /* * We can't own an objset if it's redacted. Therefore, we do this @@ -3519,8 +3635,8 @@ dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid) uint64_t fuid_obj; /* first find the fuid object. It lives in the master node */ - VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, - 8, 1, &fuid_obj) == 0); + VERIFY0(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, + 8, 1, &fuid_obj)); zfs_fuid_avl_tree_create(&idx_tree, &domain_tree); (void) zfs_fuid_table_load(os, fuid_obj, &idx_tree, &domain_tree); @@ -5722,6 +5838,34 @@ dump_size_histograms(zdb_cb_t *zcb) (void) printf("\nBlock Size Histogram\n"); + switch (block_bin_mode) { + case BIN_PSIZE: + printf("(note: all categories are binned by %s)\n", "psize"); + break; + case BIN_LSIZE: + printf("(note: all categories are binned by %s)\n", "lsize"); + break; + case BIN_ASIZE: + printf("(note: all categories are binned by %s)\n", "asize"); + break; + default: + printf("(note: all categories are binned separately)\n"); + break; + } + if (block_classes != 0) { + char buf[256] = ""; + if (block_classes & CLASS_NORMAL) + strlcat(buf, "\"normal\", ", sizeof (buf)); + if (block_classes & CLASS_SPECIAL) + strlcat(buf, "\"special\", ", sizeof (buf)); + if (block_classes & CLASS_DEDUP) + strlcat(buf, "\"dedup\", ", sizeof (buf)); + if (block_classes & CLASS_OTHER) + strlcat(buf, "\"other\", ", sizeof (buf)); + buf[strlen(buf)-2] = '\0'; + printf("(note: only blocks in these classes are counted: %s)\n", + buf); + } /* * Print the first line titles */ @@ -5921,11 +6065,11 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, * entry back to the block pointer before we claim it. */ if (v == DDT_PHYS_FLAT) { - ASSERT3U(BP_GET_BIRTH(bp), ==, + ASSERT3U(BP_GET_PHYSICAL_BIRTH(bp), ==, ddt_phys_birth(dde->dde_phys, v)); tempbp = *bp; ddt_bp_fill(dde->dde_phys, v, &tempbp, - BP_GET_BIRTH(bp)); + BP_GET_PHYSICAL_BIRTH(bp)); bp = &tempbp; } @@ -6070,29 +6214,85 @@ skipped: [BPE_GET_PSIZE(bp)]++; return; } + + if (block_classes != 0) { + spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER); + + uint64_t vdev = DVA_GET_VDEV(&bp->blk_dva[0]); + uint64_t offset = DVA_GET_OFFSET(&bp->blk_dva[0]); + vdev_t *vd = vdev_lookup_top(zcb->zcb_spa, vdev); + ASSERT(vd != NULL); + metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; + ASSERT(ms != NULL); + metaslab_group_t *mg = ms->ms_group; + ASSERT(mg != NULL); + metaslab_class_t *mc = mg->mg_class; + ASSERT(mc != NULL); + + spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG); + + int class; + if (mc == spa_normal_class(zcb->zcb_spa)) { + class = CLASS_NORMAL; + } else if (mc == spa_special_class(zcb->zcb_spa)) { + class = CLASS_SPECIAL; + } else if (mc == spa_dedup_class(zcb->zcb_spa)) { + class = CLASS_DEDUP; + } else { + class = CLASS_OTHER; + } + + if (!(block_classes & class)) { + goto hist_skipped; + } + } + /* * The binning histogram bins by powers of two up to * SPA_MAXBLOCKSIZE rather than creating bins for * every possible blocksize found in the pool. */ - int bin = highbit64(BP_GET_PSIZE(bp)) - 1; + int bin; + + /* + * Binning strategy: each bin includes blocks up to and including + * the given size (excluding blocks that fit into the previous bin). + * This way, the "4K" bin includes blocks within the (2K; 4K] range. + */ +#define BIN(size) (highbit64((size) - 1)) + + switch (block_bin_mode) { + case BIN_PSIZE: bin = BIN(BP_GET_PSIZE(bp)); break; + case BIN_LSIZE: bin = BIN(BP_GET_LSIZE(bp)); break; + case BIN_ASIZE: bin = BIN(BP_GET_ASIZE(bp)); break; + case BIN_AUTO: break; + default: PANIC("bad block_bin_mode"); abort(); + } + + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_PSIZE(bp)); zcb->zcb_psize_count[bin]++; zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp); zcb->zcb_psize_total += BP_GET_PSIZE(bp); - bin = highbit64(BP_GET_LSIZE(bp)) - 1; + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_LSIZE(bp)); zcb->zcb_lsize_count[bin]++; zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp); zcb->zcb_lsize_total += BP_GET_LSIZE(bp); - bin = highbit64(BP_GET_ASIZE(bp)) - 1; + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_ASIZE(bp)); zcb->zcb_asize_count[bin]++; zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp); zcb->zcb_asize_total += BP_GET_ASIZE(bp); +#undef BIN + +hist_skipped: if (!do_claim) return; @@ -6151,7 +6351,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (zb->zb_level == ZB_DNODE_LEVEL) return (0); - if (dump_opt['b'] >= 5 && BP_GET_LOGICAL_BIRTH(bp) > 0) { + if (dump_opt['b'] >= 5 && BP_GET_BIRTH(bp) > 0) { char blkbuf[BP_SPRINTF_LEN]; snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("objset %llu object %llu " @@ -6322,8 +6522,9 @@ zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) ASSERT0(zfs_range_tree_space(svr->svr_allocd_segs)); - zfs_range_tree_t *allocs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, - NULL, 0, 0); + zfs_range_tree_t *allocs = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + 0, "zdb_claim_removing:allocs"); for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) { metaslab_t *msp = vd->vdev_ms[msi]; @@ -6750,6 +6951,7 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) spa->spa_normal_class->mc_ops = &zdb_metaslab_ops; spa->spa_log_class->mc_ops = &zdb_metaslab_ops; spa->spa_embedded_log_class->mc_ops = &zdb_metaslab_ops; + spa->spa_special_embedded_log_class->mc_ops = &zdb_metaslab_ops; zcb->zcb_vd_obsolete_counts = umem_zalloc(rvd->vdev_children * sizeof (uint32_t *), @@ -6887,7 +7089,9 @@ zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb) for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; ASSERT3P(msp->ms_group, ==, (msp->ms_group->mg_class == - spa_embedded_log_class(spa)) ? + spa_embedded_log_class(spa) || + msp->ms_group->mg_class == + spa_special_embedded_log_class(spa)) ? vd->vdev_log_mg : vd->vdev_mg); /* @@ -7011,7 +7215,7 @@ deleted_livelists_count_blocks(spa_t *spa, zdb_cb_t *zbc) static void dump_livelist_cb(dsl_deadlist_t *ll, void *arg) { - ASSERT3P(arg, ==, NULL); + ASSERT0P(arg); global_feature_count[SPA_FEATURE_LIVELIST]++; dump_blkptr_list(ll, "Deleted Livelist"); dsl_deadlist_iterate(ll, sublivelist_verify_lightweight, NULL); @@ -7121,6 +7325,8 @@ dump_block_stats(spa_t *spa) zcb->zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa)); zcb->zcb_totalasize += metaslab_class_get_alloc(spa_embedded_log_class(spa)); + zcb->zcb_totalasize += + metaslab_class_get_alloc(spa_special_embedded_log_class(spa)); zcb->zcb_start = zcb->zcb_lastprint = gethrtime(); err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, zcb); @@ -7169,6 +7375,7 @@ dump_block_stats(spa_t *spa) total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa)) + metaslab_class_get_alloc(spa_embedded_log_class(spa)) + + metaslab_class_get_alloc(spa_special_embedded_log_class(spa)) + metaslab_class_get_alloc(spa_special_class(spa)) + metaslab_class_get_alloc(spa_dedup_class(spa)) + get_unflushed_alloc_space(spa); @@ -7252,6 +7459,18 @@ dump_block_stats(spa_t *spa) 100.0 * alloc / space); } + if (spa_special_embedded_log_class(spa)->mc_allocator[0].mca_rotor + != NULL) { + uint64_t alloc = metaslab_class_get_alloc( + spa_special_embedded_log_class(spa)); + uint64_t space = metaslab_class_get_space( + spa_special_embedded_log_class(spa)); + + (void) printf("\t%-16s %14llu used: %5.2f%%\n", + "Special embedded log", (u_longlong_t)alloc, + 100.0 * alloc / space); + } + for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { if (zcb->zcb_embedded_blocks[i] == 0) continue; @@ -7706,7 +7925,8 @@ zdb_set_skip_mmp(char *target) * applies to the new_path parameter if allocated. */ static char * -import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path) +import_checkpointed_state(char *target, nvlist_t *cfg, boolean_t target_is_spa, + char **new_path) { int error = 0; char *poolname, *bogus_name = NULL; @@ -7714,11 +7934,11 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path) /* If the target is not a pool, the extract the pool name */ char *path_start = strchr(target, '/'); - if (path_start != NULL) { + if (target_is_spa || path_start == NULL) { + poolname = target; + } else { size_t poolname_len = path_start - target; poolname = strndup(target, poolname_len); - } else { - poolname = target; } if (cfg == NULL) { @@ -7749,10 +7969,11 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path) "with error %d\n", bogus_name, error); } - if (new_path != NULL && path_start != NULL) { - if (asprintf(new_path, "%s%s", bogus_name, path_start) == -1) { + if (new_path != NULL && !target_is_spa) { + if (asprintf(new_path, "%s%s", bogus_name, + path_start != NULL ? path_start : "") == -1) { free(bogus_name); - if (path_start != NULL) + if (!target_is_spa && path_start != NULL) free(poolname); return (NULL); } @@ -7891,7 +8112,7 @@ verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current) for (uint64_t c = ckpoint_rvd->vdev_children; c < current_rvd->vdev_children; c++) { vdev_t *current_vd = current_rvd->vdev_child[c]; - VERIFY3P(current_vd->vdev_checkpoint_sm, ==, NULL); + VERIFY0P(current_vd->vdev_checkpoint_sm); } } @@ -7981,7 +8202,7 @@ verify_checkpoint_blocks(spa_t *spa) * name) so we can do verification on it against the current state * of the pool. */ - checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL, + checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL, B_TRUE, NULL); ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0); @@ -8451,8 +8672,9 @@ dump_zpool(spa_t *spa) if (dump_opt['d'] || dump_opt['i']) { spa_feature_t f; - mos_refd_objs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, - NULL, 0, 0); + mos_refd_objs = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + 0, "dump_zpool:mos_refd_objs"); dump_objset(dp->dp_meta_objset); if (dump_opt['d'] >= 3) { @@ -8588,9 +8810,9 @@ zdb_dump_indirect(blkptr_t *bp, int nbps, int flags) } static void -zdb_dump_gbh(void *buf, int flags) +zdb_dump_gbh(void *buf, uint64_t size, int flags) { - zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags); + zdb_dump_indirect((blkptr_t *)buf, gbh_nblkptrs(size), flags); } static void @@ -8780,7 +9002,6 @@ zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize, (void) buf; uint64_t orig_lsize = lsize; boolean_t tryzle = ((getenv("ZDB_NO_ZLE") == NULL)); - boolean_t found = B_FALSE; /* * We don't know how the data was compressed, so just try * every decompress function at every inflated blocksize. @@ -8823,20 +9044,19 @@ zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize, for (cfuncp = cfuncs; *cfuncp; cfuncp++) { if (try_decompress_block(pabd, lsize, psize, flags, *cfuncp, lbuf, lbuf2)) { - found = B_TRUE; + tryzle = B_FALSE; break; } } if (*cfuncp != 0) break; } - if (!found && tryzle) { + if (tryzle) { for (lsize = orig_lsize; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) { if (try_decompress_block(pabd, lsize, psize, flags, ZIO_COMPRESS_ZLE, lbuf, lbuf2)) { *cfuncp = ZIO_COMPRESS_ZLE; - found = B_TRUE; break; } } @@ -9073,7 +9293,7 @@ zdb_read_block(char *thing, spa_t *spa) zdb_dump_indirect((blkptr_t *)buf, orig_lsize / sizeof (blkptr_t), flags); else if (flags & ZDB_FLAG_GBH) - zdb_dump_gbh(buf, flags); + zdb_dump_gbh(buf, lsize, flags); else zdb_dump_block(thing, buf, lsize, flags); @@ -9120,7 +9340,7 @@ zdb_read_block(char *thing, spa_t *spa) ck_zio->io_offset = DVA_GET_OFFSET(&bp->blk_dva[0]); ck_zio->io_bp = bp; - zio_checksum_compute(ck_zio, ck, pabd, lsize); + zio_checksum_compute(ck_zio, ck, pabd, psize); printf( "%12s\t" "cksum=%016llx:%016llx:%016llx:%016llx\n", @@ -9313,6 +9533,12 @@ main(int argc, char **argv) {"all-reconstruction", no_argument, NULL, 'Y'}, {"livelist", no_argument, NULL, 'y'}, {"zstd-headers", no_argument, NULL, 'Z'}, + {"allocated-map", no_argument, NULL, + ARG_ALLOCATED}, + {"bin", required_argument, NULL, + ARG_BLOCK_BIN_MODE}, + {"class", required_argument, NULL, + ARG_BLOCK_CLASSES}, {0, 0, 0, 0} }; @@ -9343,6 +9569,7 @@ main(int argc, char **argv) case 'u': case 'y': case 'Z': + case ARG_ALLOCATED: dump_opt[c]++; dump_all = 0; break; @@ -9377,9 +9604,11 @@ main(int argc, char **argv) while (*optarg != '\0') { *optarg++ = '*'; } break; case 'o': - error = set_global_var(optarg); + dump_opt[c]++; + dump_all = 0; + error = handle_tunable_option(optarg, B_FALSE); if (error != 0) - usage(); + zdb_exit(1); break; case 'p': if (searchdirs == NULL) { @@ -9423,6 +9652,59 @@ main(int argc, char **argv) case 'x': vn_dumpdir = optarg; break; + case ARG_BLOCK_BIN_MODE: + if (strcmp(optarg, "lsize") == 0) { + block_bin_mode = BIN_LSIZE; + } else if (strcmp(optarg, "psize") == 0) { + block_bin_mode = BIN_PSIZE; + } else if (strcmp(optarg, "asize") == 0) { + block_bin_mode = BIN_ASIZE; + } else { + (void) fprintf(stderr, + "--bin=\"%s\" must be one of \"lsize\", " + "\"psize\" or \"asize\"\n", optarg); + usage(); + } + break; + + case ARG_BLOCK_CLASSES: { + char *buf = strdup(optarg), *tok = buf, *next, + *save = NULL; + + while ((next = strtok_r(tok, ",", &save)) != NULL) { + tok = NULL; + + if (strcmp(next, "normal") == 0) { + block_classes |= CLASS_NORMAL; + } else if (strcmp(next, "special") == 0) { + block_classes |= CLASS_SPECIAL; + } else if (strcmp(next, "dedup") == 0) { + block_classes |= CLASS_DEDUP; + } else if (strcmp(next, "other") == 0) { + block_classes |= CLASS_OTHER; + } else { + (void) fprintf(stderr, + "--class=\"%s\" must be a " + "comma-separated list of either " + "\"normal\", \"special\", " + "\"asize\" or \"other\"; " + "got \"%s\"\n", + optarg, next); + usage(); + } + } + + if (block_classes == 0) { + (void) fprintf(stderr, + "--class= must be a comma-separated " + "list of either \"normal\", \"special\", " + "\"asize\" or \"other\"; got empty\n"); + usage(); + } + + free(buf); + break; + } default: usage(); break; @@ -9465,6 +9747,9 @@ main(int argc, char **argv) */ spa_mode_readable_spacemaps = B_TRUE; + libspl_set_assert_ok((dump_opt['A'] == 1) || (dump_opt['A'] > 2)); + zfs_recover = (dump_opt['A'] > 1); + if (dump_all) verbose = MAX(verbose, 1); @@ -9475,9 +9760,6 @@ main(int argc, char **argv) dump_opt[c] += verbose; } - libspl_set_assert_ok((dump_opt['A'] == 1) || (dump_opt['A'] > 2)); - zfs_recover = (dump_opt['A'] > 1); - argc -= optind; argv += optind; if (argc < 2 && dump_opt['R']) @@ -9545,6 +9827,12 @@ main(int argc, char **argv) error = 0; goto fini; } + if (dump_opt['o']) + /* + * Avoid blasting tunable options off the top of the + * screen. + */ + zdb_exit(1); usage(); } @@ -9605,7 +9893,7 @@ main(int argc, char **argv) } else if (objset_str && !zdb_numeric(objset_str + 1) && dump_opt['N']) { printf("Supply a numeric objset ID with -N\n"); - error = 1; + error = 2; goto fini; } } else { @@ -9697,7 +9985,7 @@ main(int argc, char **argv) char *checkpoint_target = NULL; if (dump_opt['k']) { checkpoint_pool = import_checkpointed_state(target, cfg, - &checkpoint_target); + target_is_spa, &checkpoint_target); if (checkpoint_target != NULL) target = checkpoint_target; @@ -9714,7 +10002,7 @@ main(int argc, char **argv) if (error == 0) { if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) { ASSERT(checkpoint_pool != NULL); - ASSERT(checkpoint_target == NULL); + ASSERT0P(checkpoint_target); error = spa_open(checkpoint_pool, &spa, FTAG); if (error != 0) { @@ -9907,5 +10195,8 @@ fini: if (kernel_init_done) kernel_fini(); + if (corruption_found && error == 0) + error = 3; + return (error); } diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.h b/sys/contrib/openzfs/cmd/zdb/zdb.h index 6b6c9169816b..48b561eb202c 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.h +++ b/sys/contrib/openzfs/cmd/zdb/zdb.h @@ -29,6 +29,6 @@ #define _ZDB_H void dump_intent_log(zilog_t *); -extern uint8_t dump_opt[256]; +extern uint8_t dump_opt[512]; #endif /* _ZDB_H */ diff --git a/sys/contrib/openzfs/cmd/zdb/zdb_il.c b/sys/contrib/openzfs/cmd/zdb/zdb_il.c index 6b90b08ca1b1..3d91fb28a4c7 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb_il.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb_il.c @@ -48,8 +48,6 @@ #include "zdb.h" -extern uint8_t dump_opt[256]; - static char tab_prefix[4] = "\t\t\t"; static void @@ -176,7 +174,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { (void) printf("%shas blkptr, %s\n", tab_prefix, - !BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) >= + !BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa) ? "will claim" : "won't claim"); print_log_bp(bp, tab_prefix); @@ -189,7 +187,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) (void) printf("%s<hole>\n", tab_prefix); return; } - if (BP_GET_LOGICAL_BIRTH(bp) < zilog->zl_header->zh_claim_txg) { + if (BP_GET_BIRTH(bp) < zilog->zl_header->zh_claim_txg) { (void) printf("%s<block already committed>\n", tab_prefix); return; @@ -240,7 +238,7 @@ zil_prt_rec_write_enc(zilog_t *zilog, int txtype, const void *arg) if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { (void) printf("%shas blkptr, %s\n", tab_prefix, - !BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) >= + !BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa) ? "will claim" : "won't claim"); print_log_bp(bp, tab_prefix); @@ -476,7 +474,7 @@ print_log_block(zilog_t *zilog, const blkptr_t *bp, void *arg, if (claim_txg != 0) claim = "already claimed"; - else if (BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa)) + else if (BP_GET_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa)) claim = "will claim"; else claim = "won't claim"; diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c index 8718dbde03b6..c0590edc7516 100644 --- a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c +++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c @@ -134,11 +134,13 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) * of blkid cache and L2ARC VDEV does not contain pool guid in its * blkid, so this is a special case for L2ARC VDEV. */ - else if (gsp->gs_vdev_guid != 0 && gsp->gs_devid == NULL && + else if (gsp->gs_vdev_guid != 0 && nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 && gsp->gs_vdev_guid == vdev_guid) { - (void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, - &gsp->gs_devid); + if (gsp->gs_devid == NULL) { + (void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, + &gsp->gs_devid); + } (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, &gsp->gs_vdev_expandtime); return (B_TRUE); @@ -156,22 +158,28 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) /* * For each vdev in this pool, look for a match by devid */ - if ((config = zpool_get_config(zhp, NULL)) != NULL) { - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvl) == 0) { - (void) zfs_agent_iter_vdev(zhp, nvl, gsp); - } - } - /* - * if a match was found then grab the pool guid - */ - if (gsp->gs_vdev_guid && gsp->gs_devid) { - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &gsp->gs_pool_guid); - } + boolean_t found = B_FALSE; + uint64_t pool_guid; + /* Get pool configuration and extract pool GUID */ + if ((config = zpool_get_config(zhp, NULL)) == NULL || + nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &pool_guid) != 0) + goto out; + + /* Skip this pool if we're looking for a specific pool */ + if (gsp->gs_pool_guid != 0 && pool_guid != gsp->gs_pool_guid) + goto out; + + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) == 0) + found = zfs_agent_iter_vdev(zhp, nvl, gsp); + + if (found && gsp->gs_pool_guid == 0) + gsp->gs_pool_guid = pool_guid; + +out: zpool_close(zhp); - return (gsp->gs_devid != NULL && gsp->gs_vdev_guid != 0); + return (found); } void @@ -233,20 +241,17 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or * ZFS_EV_POOL_GUID may be missing so find them. */ - if (devid == NULL || pool_guid == 0 || vdev_guid == 0) { - if (devid == NULL) - search.gs_vdev_guid = vdev_guid; - else - search.gs_devid = devid; - zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); - if (devid == NULL) - devid = search.gs_devid; - if (pool_guid == 0) - pool_guid = search.gs_pool_guid; - if (vdev_guid == 0) - vdev_guid = search.gs_vdev_guid; - devtype = search.gs_vdev_type; - } + search.gs_devid = devid; + search.gs_vdev_guid = vdev_guid; + search.gs_pool_guid = pool_guid; + zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); + if (devid == NULL) + devid = search.gs_devid; + if (pool_guid == 0) + pool_guid = search.gs_pool_guid; + if (vdev_guid == 0) + vdev_guid = search.gs_vdev_guid; + devtype = search.gs_vdev_type; /* * We want to avoid reporting "remove" events coming from diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am index 093a04c4636a..c0b161ecf248 100644 --- a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am +++ b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am @@ -9,18 +9,18 @@ dist_zedexec_SCRIPTS = \ %D%/all-debug.sh \ %D%/all-syslog.sh \ %D%/data-notify.sh \ - %D%/deadman-slot_off.sh \ + %D%/deadman-sync-slot_off.sh \ %D%/generic-notify.sh \ - %D%/pool_import-led.sh \ + %D%/pool_import-sync-led.sh \ %D%/resilver_finish-notify.sh \ %D%/resilver_finish-start-scrub.sh \ %D%/scrub_finish-notify.sh \ - %D%/statechange-led.sh \ + %D%/statechange-sync-led.sh \ %D%/statechange-notify.sh \ - %D%/statechange-slot_off.sh \ + %D%/statechange-sync-slot_off.sh \ %D%/trim_finish-notify.sh \ - %D%/vdev_attach-led.sh \ - %D%/vdev_clear-led.sh + %D%/vdev_attach-sync-led.sh \ + %D%/vdev_clear-sync-led.sh nodist_zedexec_SCRIPTS = \ %D%/history_event-zfs-list-cacher.sh @@ -30,17 +30,17 @@ SUBSTFILES += $(nodist_zedexec_SCRIPTS) zedconfdefaults = \ all-syslog.sh \ data-notify.sh \ - deadman-slot_off.sh \ + deadman-sync-slot_off.sh \ history_event-zfs-list-cacher.sh \ - pool_import-led.sh \ + pool_import-sync-led.sh \ resilver_finish-notify.sh \ resilver_finish-start-scrub.sh \ scrub_finish-notify.sh \ - statechange-led.sh \ + statechange-sync-led.sh \ statechange-notify.sh \ - statechange-slot_off.sh \ - vdev_attach-led.sh \ - vdev_clear-led.sh + statechange-sync-slot_off.sh \ + vdev_attach-sync-led.sh \ + vdev_clear-sync-led.sh dist_noinst_DATA += %D%/README diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh index 7b339b3add01..7b339b3add01 100755 --- a/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh deleted file mode 120000 index 7d7404398a4a..000000000000 --- a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh +++ /dev/null @@ -1 +0,0 @@ -statechange-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh new file mode 120000 index 000000000000..8b9c10c11ebb --- /dev/null +++ b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh @@ -0,0 +1 @@ +statechange-sync-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-led.sh index 40cb61f17307..40cb61f17307 100755 --- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-led.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-led.sh diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-slot_off.sh index 06acce93b8aa..06acce93b8aa 100755 --- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-slot_off.sh diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh deleted file mode 120000 index 7d7404398a4a..000000000000 --- a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh +++ /dev/null @@ -1 +0,0 @@ -statechange-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh new file mode 120000 index 000000000000..8b9c10c11ebb --- /dev/null +++ b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh @@ -0,0 +1 @@ +statechange-sync-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh deleted file mode 120000 index 7d7404398a4a..000000000000 --- a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh +++ /dev/null @@ -1 +0,0 @@ -statechange-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh new file mode 120000 index 000000000000..8b9c10c11ebb --- /dev/null +++ b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh @@ -0,0 +1 @@ +statechange-sync-led.sh
\ No newline at end of file diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh b/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh index 6e00f153be1c..78d8f658ddd8 100644 --- a/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh @@ -441,8 +441,9 @@ zed_notify_slack_webhook() "${pathname}")" # Construct the JSON message for posting. + # shellcheck disable=SC2016 # - msg_json="$(printf '{"text": "*%s*\\n%s"}' "${subject}" "${msg_body}" )" + msg_json="$(printf '{"text": "*%s*\\n```%s```"}' "${subject}" "${msg_body}" )" # Send the POST request and check for errors. # diff --git a/sys/contrib/openzfs/cmd/zed/zed_event.c b/sys/contrib/openzfs/cmd/zed/zed_event.c index 296c222ca382..ba7cba304b1d 100644 --- a/sys/contrib/openzfs/cmd/zed/zed_event.c +++ b/sys/contrib/openzfs/cmd/zed/zed_event.c @@ -110,7 +110,7 @@ zed_event_fini(struct zed_conf *zcp) static void _bump_event_queue_length(void) { - int zzlm = -1, wr; + int zzlm, wr; char qlen_buf[12] = {0}; /* parameter is int => max "-2147483647\n" */ long int qlen, orig_qlen; diff --git a/sys/contrib/openzfs/cmd/zed/zed_exec.c b/sys/contrib/openzfs/cmd/zed/zed_exec.c index 036081decd64..a14af4f20a85 100644 --- a/sys/contrib/openzfs/cmd/zed/zed_exec.c +++ b/sys/contrib/openzfs/cmd/zed/zed_exec.c @@ -196,37 +196,29 @@ _nop(int sig) (void) sig; } -static void * -_reap_children(void *arg) +static void +wait_for_children(boolean_t do_pause, boolean_t wait) { - (void) arg; - struct launched_process_node node, *pnode; pid_t pid; - int status; struct rusage usage; - struct sigaction sa = {}; - - (void) sigfillset(&sa.sa_mask); - (void) sigdelset(&sa.sa_mask, SIGCHLD); - (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL); - - (void) sigemptyset(&sa.sa_mask); - sa.sa_handler = _nop; - sa.sa_flags = SA_NOCLDSTOP; - (void) sigaction(SIGCHLD, &sa, NULL); + int status; + struct launched_process_node node, *pnode; for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) { (void) pthread_mutex_lock(&_launched_processes_lock); - pid = wait4(0, &status, WNOHANG, &usage); - + pid = wait4(0, &status, wait ? 0 : WNOHANG, &usage); if (pid == 0 || pid == (pid_t)-1) { (void) pthread_mutex_unlock(&_launched_processes_lock); - if (pid == 0 || errno == ECHILD) - pause(); - else if (errno != EINTR) + if ((pid == 0) || (errno == ECHILD)) { + if (do_pause) + pause(); + } else if (errno != EINTR) zed_log_msg(LOG_WARNING, "Failed to wait for children: %s", strerror(errno)); + if (!do_pause) + return; + } else { memset(&node, 0, sizeof (node)); node.pid = pid; @@ -278,6 +270,25 @@ _reap_children(void *arg) } } +} + +static void * +_reap_children(void *arg) +{ + (void) arg; + struct sigaction sa = {}; + + (void) sigfillset(&sa.sa_mask); + (void) sigdelset(&sa.sa_mask, SIGCHLD); + (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL); + + (void) sigemptyset(&sa.sa_mask); + sa.sa_handler = _nop; + sa.sa_flags = SA_NOCLDSTOP; + (void) sigaction(SIGCHLD, &sa, NULL); + + wait_for_children(B_TRUE, B_FALSE); + return (NULL); } @@ -307,6 +318,45 @@ zed_exec_fini(void) } /* + * Check if the zedlet name indicates if it is a synchronous zedlet + * + * Synchronous zedlets have a "-sync-" immediately following the event name in + * their zedlet filename, like: + * + * EVENT_NAME-sync-ZEDLETNAME.sh + * + * For example, if you wanted a synchronous statechange script: + * + * statechange-sync-myzedlet.sh + * + * Synchronous zedlets are guaranteed to be the only zedlet running. No other + * zedlets may run in parallel with a synchronous zedlet. A synchronous + * zedlet will wait for all previously spawned zedlets to finish before running. + * Users should be careful to only use synchronous zedlets when needed, since + * they decrease parallelism. + */ +static boolean_t +zedlet_is_sync(const char *zedlet, const char *event) +{ + const char *sync_str = "-sync-"; + size_t sync_str_len; + size_t zedlet_len; + size_t event_len; + + sync_str_len = strlen(sync_str); + zedlet_len = strlen(zedlet); + event_len = strlen(event); + + if (event_len + sync_str_len >= zedlet_len) + return (B_FALSE); + + if (strncmp(&zedlet[event_len], sync_str, sync_str_len) == 0) + return (B_TRUE); + + return (B_FALSE); +} + +/* * Process the event [eid] by synchronously invoking all zedlets with a * matching class prefix. * @@ -368,9 +418,28 @@ zed_exec_process(uint64_t eid, const char *class, const char *subclass, z = zed_strings_next(zcp->zedlets)) { for (csp = class_strings; *csp; csp++) { n = strlen(*csp); - if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n])) + if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n])) { + boolean_t is_sync = zedlet_is_sync(z, *csp); + + if (is_sync) { + /* + * Wait for previous zedlets to + * finish + */ + wait_for_children(B_FALSE, B_TRUE); + } + _zed_exec_fork_child(eid, zcp->zedlet_dir, z, e, zcp->zevent_fd, zcp->do_foreground); + + if (is_sync) { + /* + * Wait for sync zedlet we just launched + * to finish. + */ + wait_for_children(B_FALSE, B_TRUE); + } + } } } free(e); diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index 841c356508a5..ccdd5ffef8e6 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -440,7 +440,7 @@ get_usage(zfs_help_t idx) return (gettext("\tredact <snapshot> <bookmark> " "<redaction_snapshot> ...\n")); case HELP_REWRITE: - return (gettext("\trewrite [-rvx] [-o <offset>] [-l <length>] " + return (gettext("\trewrite [-Prvx] [-o <offset>] [-l <length>] " "<directory|file ...>\n")); case HELP_JAIL: return (gettext("\tjail <jailid|jailname> <filesystem>\n")); @@ -914,7 +914,11 @@ zfs_do_clone(int argc, char **argv) log_history = B_FALSE; } - ret = zfs_mount_and_share(g_zfs, argv[1], ZFS_TYPE_DATASET); + /* + * Dataset cloned successfully, mount/share failures are + * non-fatal. + */ + (void) zfs_mount_and_share(g_zfs, argv[1], ZFS_TYPE_DATASET); } zfs_close(zhp); @@ -923,26 +927,22 @@ zfs_do_clone(int argc, char **argv) return (!!ret); usage: - ASSERT3P(zhp, ==, NULL); + ASSERT0P(zhp); nvlist_free(props); usage(B_FALSE); return (-1); } /* - * Return a default volblocksize for the pool which always uses more than - * half of the data sectors. This primarily applies to dRAID which always - * writes full stripe widths. + * Calculate the minimum allocation size based on the top-level vdevs. */ static uint64_t -default_volblocksize(zpool_handle_t *zhp, nvlist_t *props) +calculate_volblocksize(nvlist_t *config) { - uint64_t volblocksize, asize = SPA_MINBLOCKSIZE; + uint64_t asize = SPA_MINBLOCKSIZE; nvlist_t *tree, **vdevs; uint_t nvdevs; - nvlist_t *config = zpool_get_config(zhp, NULL); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 || nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &vdevs, &nvdevs) != 0) { @@ -973,6 +973,24 @@ default_volblocksize(zpool_handle_t *zhp, nvlist_t *props) } } + return (asize); +} + +/* + * Return a default volblocksize for the pool which always uses more than + * half of the data sectors. This primarily applies to dRAID which always + * writes full stripe widths. + */ +static uint64_t +default_volblocksize(zpool_handle_t *zhp, nvlist_t *props) +{ + uint64_t volblocksize, asize = SPA_MINBLOCKSIZE; + + nvlist_t *config = zpool_get_config(zhp, NULL); + + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_MAX_ALLOC, &asize) != 0) + asize = calculate_volblocksize(config); + /* * Calculate the target volblocksize such that more than half * of the asize is used. The following table is for 4k sectors. @@ -1319,7 +1337,9 @@ zfs_do_create(int argc, char **argv) goto error; } - ret = zfs_mount_and_share(g_zfs, argv[0], ZFS_TYPE_DATASET); + /* Dataset created successfully, mount/share failures are non-fatal */ + ret = 0; + (void) zfs_mount_and_share(g_zfs, argv[0], ZFS_TYPE_DATASET); error: nvlist_free(props); return (ret); @@ -1974,9 +1994,8 @@ fill_dataset_info(nvlist_t *list, zfs_handle_t *zhp, boolean_t as_int) } if (type == ZFS_TYPE_SNAPSHOT) { - char *ds, *snap; - ds = snap = strdup(zfs_get_name(zhp)); - ds = strsep(&snap, "@"); + char *snap = strdup(zfs_get_name(zhp)); + char *ds = strsep(&snap, "@"); fnvlist_add_string(list, "dataset", ds); fnvlist_add_string(list, "snapshot_name", snap); free(ds); @@ -2019,8 +2038,7 @@ get_callback(zfs_handle_t *zhp, void *data) nvlist_t *user_props = zfs_get_user_props(zhp); zprop_list_t *pl = cbp->cb_proplist; nvlist_t *propval; - nvlist_t *item, *d, *props; - item = d = props = NULL; + nvlist_t *item, *d = NULL, *props = NULL; const char *strval; const char *sourceval; boolean_t received = is_recvd_column(cbp); @@ -5305,6 +5323,7 @@ zfs_do_receive(int argc, char **argv) #define ZFS_DELEG_PERM_MOUNT "mount" #define ZFS_DELEG_PERM_SHARE "share" #define ZFS_DELEG_PERM_SEND "send" +#define ZFS_DELEG_PERM_SEND_RAW "send:raw" #define ZFS_DELEG_PERM_RECEIVE "receive" #define ZFS_DELEG_PERM_RECEIVE_APPEND "receive:append" #define ZFS_DELEG_PERM_ALLOW "allow" @@ -5347,6 +5366,7 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = { { ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME }, { ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, { ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND }, + { ZFS_DELEG_PERM_SEND_RAW, ZFS_DELEG_NOTE_SEND_RAW }, { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, { ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK }, @@ -5879,7 +5899,7 @@ parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl) static inline const char * deleg_perm_comment(zfs_deleg_note_t note) { - const char *str = ""; + const char *str; /* subcommands */ switch (note) { @@ -5931,6 +5951,10 @@ deleg_perm_comment(zfs_deleg_note_t note) case ZFS_DELEG_NOTE_SEND: str = gettext(""); break; + case ZFS_DELEG_NOTE_SEND_RAW: + str = gettext("Allow sending ONLY encrypted (raw) replication" + "\n\t\t\t\tstreams"); + break; case ZFS_DELEG_NOTE_SHARE: str = gettext("Allows sharing file systems over NFS or SMB" "\n\t\t\t\tprotocols"); @@ -6860,17 +6884,17 @@ print_holds(boolean_t scripted, int nwidth, int tagwidth, nvlist_t *nvl, if (scripted) { if (parsable) { - (void) printf("%s\t%s\t%ld\n", zname, - tagname, (unsigned long)time); + (void) printf("%s\t%s\t%lld\n", zname, + tagname, (long long)time); } else { (void) printf("%s\t%s\t%s\n", zname, tagname, tsbuf); } } else { if (parsable) { - (void) printf("%-*s %-*s %ld\n", + (void) printf("%-*s %-*s %lld\n", nwidth, zname, tagwidth, - tagname, (unsigned long)time); + tagname, (long long)time); } else { (void) printf("%-*s %-*s %s\n", nwidth, zname, tagwidth, @@ -7729,6 +7753,7 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual) struct extmnttab entry; const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount"; ino_t path_inode; + char *zfs_mntpnt, *entry_mntpnt; /* * Search for the given (major,minor) pair in the mount table. @@ -7770,6 +7795,24 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual) goto out; } + /* + * If the filesystem is mounted, check that the mountpoint matches + * the one in the mnttab entry w.r.t. provided path. If it doesn't, + * then we should not proceed further. + */ + entry_mntpnt = strdup(entry.mnt_mountp); + if (zfs_is_mounted(zhp, &zfs_mntpnt)) { + if (strcmp(zfs_mntpnt, entry_mntpnt) != 0) { + (void) fprintf(stderr, gettext("cannot %s '%s': " + "not an original mountpoint\n"), cmdname, path); + free(zfs_mntpnt); + free(entry_mntpnt); + goto out; + } + free(zfs_mntpnt); + } + free(entry_mntpnt); + if (op == OP_SHARE) { char nfs_mnt_prop[ZFS_MAXPROPLEN]; char smbshare_prop[ZFS_MAXPROPLEN]; @@ -9160,8 +9203,11 @@ zfs_do_rewrite(int argc, char **argv) zfs_rewrite_args_t args; memset(&args, 0, sizeof (args)); - while ((c = getopt(argc, argv, "l:o:rvx")) != -1) { + while ((c = getopt(argc, argv, "Pl:o:rvx")) != -1) { switch (c) { + case 'P': + args.flags |= ZFS_REWRITE_PHYSICAL; + break; case 'l': args.len = strtoll(optarg, NULL, 0); break; diff --git a/sys/contrib/openzfs/cmd/zhack.c b/sys/contrib/openzfs/cmd/zhack.c index 8244bc83fa0d..8ffbf91ffb30 100644 --- a/sys/contrib/openzfs/cmd/zhack.c +++ b/sys/contrib/openzfs/cmd/zhack.c @@ -54,6 +54,7 @@ #include <sys/dmu_tx.h> #include <zfeature_common.h> #include <libzutil.h> +#include <sys/metaslab_impl.h> static importargs_t g_importargs; static char *g_pool; @@ -69,7 +70,8 @@ static __attribute__((noreturn)) void usage(void) { (void) fprintf(stderr, - "Usage: zhack [-c cachefile] [-d dir] <subcommand> <args> ...\n" + "Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> " + "<args> ...\n" "where <subcommand> <args> is one of the following:\n" "\n"); @@ -93,7 +95,10 @@ usage(void) " -c repair corrupted label checksums\n" " -u restore the label on a detached device\n" "\n" - " <device> : path to vdev\n"); + " <device> : path to vdev\n" + "\n" + " metaslab leak <pool>\n" + " apply allocation map from zdb to specified pool\n"); exit(1); } @@ -162,9 +167,9 @@ zhack_import(char *target, boolean_t readonly) props = NULL; if (readonly) { - VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); - VERIFY(nvlist_add_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0); + VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); + VERIFY0(nvlist_add_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_READONLY), 1)); } zfeature_checks_disable = B_TRUE; @@ -218,8 +223,8 @@ dump_obj(objset_t *os, uint64_t obj, const char *name) } else { ASSERT(za->za_integer_length == 1); char val[1024]; - VERIFY(zap_lookup(os, obj, za->za_name, - 1, sizeof (val), val) == 0); + VERIFY0(zap_lookup(os, obj, za->za_name, + 1, sizeof (val), val)); (void) printf("\t%s = %s\n", za->za_name, val); } } @@ -363,10 +368,12 @@ feature_incr_sync(void *arg, dmu_tx_t *tx) zfeature_info_t *feature = arg; uint64_t refcount; + mutex_enter(&spa->spa_feat_stats_lock); VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); feature_sync(spa, feature, refcount + 1, tx); spa_history_log_internal(spa, "zhack feature incr", tx, "name=%s", feature->fi_guid); + mutex_exit(&spa->spa_feat_stats_lock); } static void @@ -376,10 +383,12 @@ feature_decr_sync(void *arg, dmu_tx_t *tx) zfeature_info_t *feature = arg; uint64_t refcount; + mutex_enter(&spa->spa_feat_stats_lock); VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); feature_sync(spa, feature, refcount - 1, tx); spa_history_log_internal(spa, "zhack feature decr", tx, "name=%s", feature->fi_guid); + mutex_exit(&spa->spa_feat_stats_lock); } static void @@ -496,6 +505,186 @@ zhack_do_feature(int argc, char **argv) return (0); } +static boolean_t +strstarts(const char *a, const char *b) +{ + return (strncmp(a, b, strlen(b)) == 0); +} + +static void +metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size, + dmu_tx_t *tx) +{ + ASSERT(msp->ms_disabled); + ASSERT(MUTEX_HELD(&msp->ms_lock)); + uint64_t txg = dmu_tx_get_txg(tx); + + uint64_t off = start; + while (off < start + size) { + uint64_t ostart, osize; + boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable, + off, start + size - off, &ostart, &osize); + if (!found) + break; + zfs_range_tree_remove(msp->ms_allocatable, ostart, osize); + + if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK])) + vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp, + txg); + + zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart, + osize); + msp->ms_allocating_total += osize; + off = ostart + osize; + } +} + +static void +zhack_do_metaslab_leak(int argc, char **argv) +{ + int c; + char *target; + spa_t *spa; + + optind = 1; + boolean_t force = B_FALSE; + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + force = B_TRUE; + break; + default: + usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, "error: missing pool name\n"); + usage(); + } + target = argv[0]; + + zhack_spa_open(target, B_FALSE, FTAG, &spa); + spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER); + + char *line = NULL; + size_t cap = 0; + + vdev_t *vd = NULL; + metaslab_t *prev = NULL; + dmu_tx_t *tx = NULL; + while (getline(&line, &cap, stdin) > 0) { + if (strstarts(line, "\tvdev ")) { + uint64_t vdev_id, ms_shift; + if (sscanf(line, + "\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64, + &vdev_id, &ms_shift) == 1) { + VERIFY3U(sscanf(line, "\tvdev %"PRIu64 + "\t metaslab shift %4"PRIu64, + &vdev_id, &ms_shift), ==, 2); + } + vd = vdev_lookup_top(spa, vdev_id); + if (vd == NULL) { + fprintf(stderr, "error: no such vdev with " + "id %"PRIu64"\n", vdev_id); + break; + } + if (tx) { + dmu_tx_commit(tx); + mutex_exit(&prev->ms_lock); + metaslab_enable(prev, B_FALSE, B_FALSE); + tx = NULL; + prev = NULL; + } + if (vd->vdev_ms_shift != ms_shift) { + fprintf(stderr, "error: ms_shift mismatch: %" + PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift, + ms_shift); + break; + } + } else if (strstarts(line, "\tmetaslabs ")) { + uint64_t ms_count; + VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count), + ==, 1); + ASSERT(vd); + if (!force && vd->vdev_ms_count != ms_count) { + fprintf(stderr, "error: ms_count mismatch: %" + PRIu64" != %"PRIu64"\n", vd->vdev_ms_count, + ms_count); + break; + } + } else if (strstarts(line, "ALLOC:")) { + uint64_t start, size; + VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n", + &start, &size), ==, 2); + + ASSERT(vd); + metaslab_t *cur = + vd->vdev_ms[start >> vd->vdev_ms_shift]; + if (prev != cur) { + if (prev) { + dmu_tx_commit(tx); + mutex_exit(&prev->ms_lock); + metaslab_enable(prev, B_FALSE, B_FALSE); + } + ASSERT(cur); + metaslab_disable(cur); + mutex_enter(&cur->ms_lock); + metaslab_load(cur); + prev = cur; + tx = dmu_tx_create_dd( + spa_get_dsl(vd->vdev_spa)->dp_root_dir); + dmu_tx_assign(tx, DMU_TX_WAIT); + } + + metaslab_force_alloc(cur, start, size, tx); + } else { + continue; + } + } + if (tx) { + dmu_tx_commit(tx); + mutex_exit(&prev->ms_lock); + metaslab_enable(prev, B_FALSE, B_FALSE); + tx = NULL; + prev = NULL; + } + if (line) + free(line); + + spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG); + spa_close(spa, FTAG); +} + +static int +zhack_do_metaslab(int argc, char **argv) +{ + char *subcommand; + + argc--; + argv++; + if (argc == 0) { + (void) fprintf(stderr, + "error: no metaslab operation specified\n"); + usage(); + } + + subcommand = argv[0]; + if (strcmp(subcommand, "leak") == 0) { + zhack_do_metaslab_leak(argc, argv); + } else { + (void) fprintf(stderr, "error: unknown subcommand: %s\n", + subcommand); + usage(); + } + + return (0); +} + #define ASHIFT_UBERBLOCK_SHIFT(ashift) \ MIN(MAX(ashift, UBERBLOCK_SHIFT), \ MAX_UBERBLOCK_SHIFT) @@ -525,6 +714,23 @@ zhack_repair_read_label(const int fd, vdev_label_t *vl, return (0); } +static int +zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap) +{ + if (vdev_eck->zec_magic == ZEC_MAGIC) { + *byteswap = B_FALSE; + } else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) { + *byteswap = B_TRUE; + } else { + (void) fprintf(stderr, "error: label %d: " + "Expected the nvlist checksum magic number but instead got " + "0x%" PRIx64 "\n", + l, vdev_eck->zec_magic); + return (1); + } + return (0); +} + static void zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset, const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum) @@ -551,33 +757,10 @@ zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset, } static int -zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys, - const size_t cfg_keys_len, nvlist_t *cfg, nvlist_t *vdev_tree_cfg, - uint64_t *ashift) +zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift) { int err; - - if (ub->ub_txg != 0) { - (void) fprintf(stderr, - "error: label %d: UB TXG of 0 expected, but got %" - PRIu64 "\n", - l, ub->ub_txg); - (void) fprintf(stderr, "It would appear the device was not " - "properly removed.\n"); - return (1); - } - - for (int i = 0; i < cfg_keys_len; i++) { - uint64_t val; - err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val); - if (err) { - (void) fprintf(stderr, - "error: label %d, %d: " - "cannot find nvlist key %s\n", - l, i, cfg_keys[i]); - return (err); - } - } + nvlist_t *vdev_tree_cfg; err = nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg); @@ -601,7 +784,7 @@ zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys, (void) fprintf(stderr, "error: label %d: nvlist key %s is zero\n", l, ZPOOL_CONFIG_ASHIFT); - return (err); + return (1); } return (0); @@ -616,30 +799,35 @@ zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l) */ if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) { const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp); + int err; + ub->ub_txg = txg; - if (nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG) != 0) { + err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG); + if (err) { (void) fprintf(stderr, "error: label %d: " "Failed to remove pool creation TXG\n", l); - return (1); + return (err); } - if (nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG) != 0) { + err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG); + if (err) { (void) fprintf(stderr, "error: label %d: Failed to remove pool TXG to " "be replaced.\n", l); - return (1); + return (err); } - if (nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg) != 0) { + err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg); + if (err) { (void) fprintf(stderr, "error: label %d: " "Failed to add pool TXG of %" PRIu64 "\n", l, txg); - return (1); + return (err); } } @@ -733,6 +921,7 @@ zhack_repair_test_cksum(const int byteswap, void *vdev_data, BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC; const uint64_t actual_magic = vdev_eck->zec_magic; int err = 0; + if (actual_magic != expected_magic) { (void) fprintf(stderr, "error: label %d: " "Expected " @@ -754,6 +943,36 @@ zhack_repair_test_cksum(const int byteswap, void *vdev_data, return (err); } +static int +zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg) +{ + const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION, + ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID }; + int err; + + err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, + VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0); + if (err) { + (void) fprintf(stderr, + "error: cannot unpack nvlist label %d\n", l); + return (err); + } + + for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) { + uint64_t val; + err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val); + if (err) { + (void) fprintf(stderr, + "error: label %d, %d: " + "cannot find nvlist key %s\n", + l, i, cfg_keys[i]); + return (err); + } + } + + return (0); +} + static void zhack_repair_one_label(const zhack_repair_op_t op, const int fd, vdev_label_t *vl, const uint64_t label_offset, const int l, @@ -767,10 +986,7 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd, (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1; const uint64_t vdev_phys_offset = label_offset + offsetof(vdev_label_t, vl_vdev_phys); - const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION, - ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID }; nvlist_t *cfg; - nvlist_t *vdev_tree_cfg = NULL; uint64_t ashift; int byteswap; @@ -778,18 +994,9 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd, if (err) return; - if (vdev_eck->zec_magic == 0) { - (void) fprintf(stderr, "error: label %d: " - "Expected the nvlist checksum magic number to not be zero" - "\n", - l); - (void) fprintf(stderr, "There should already be a checksum " - "for the label.\n"); + err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap); + if (err) return; - } - - byteswap = - (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)); if (byteswap) { byteswap_uint64_array(&vdev_eck->zec_cksum, @@ -805,16 +1012,7 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd, return; } - err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, - VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0); - if (err) { - (void) fprintf(stderr, - "error: cannot unpack nvlist label %d\n", l); - return; - } - - err = zhack_repair_check_label(ub, - l, cfg_keys, ARRAY_SIZE(cfg_keys), cfg, vdev_tree_cfg, &ashift); + err = zhack_repair_unpack_cfg(vl, l, &cfg); if (err) return; @@ -822,6 +1020,19 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd, char *buf; size_t buflen; + if (ub->ub_txg != 0) { + (void) fprintf(stderr, + "error: label %d: UB TXG of 0 expected, but got %" + PRIu64 "\n", l, ub->ub_txg); + (void) fprintf(stderr, "It would appear the device was " + "not properly detached.\n"); + return; + } + + err = zhack_repair_get_ashift(cfg, l, &ashift); + if (err) + return; + err = zhack_repair_undetach(ub, cfg, l); if (err) return; @@ -981,7 +1192,7 @@ main(int argc, char **argv) dprintf_setup(&argc, argv); zfs_prop_init(); - while ((c = getopt(argc, argv, "+c:d:")) != -1) { + while ((c = getopt(argc, argv, "+c:d:o:")) != -1) { switch (c) { case 'c': g_importargs.cachefile = optarg; @@ -990,6 +1201,10 @@ main(int argc, char **argv) assert(g_importargs.paths < MAX_NUM_PATHS); g_importargs.path[g_importargs.paths++] = optarg; break; + case 'o': + if (handle_tunable_option(optarg, B_FALSE) != 0) + exit(1); + break; default: usage(); break; @@ -1011,6 +1226,8 @@ main(int argc, char **argv) rv = zhack_do_feature(argc, argv); } else if (strcmp(subcommand, "label") == 0) { return (zhack_do_label(argc, argv)); + } else if (strcmp(subcommand, "metaslab") == 0) { + rv = zhack_do_metaslab(argc, argv); } else { (void) fprintf(stderr, "error: unknown subcommand: %s\n", subcommand); diff --git a/sys/contrib/openzfs/cmd/zilstat.in b/sys/contrib/openzfs/cmd/zilstat.in index 4140398bf4a3..d01db9b0914b 100755 --- a/sys/contrib/openzfs/cmd/zilstat.in +++ b/sys/contrib/openzfs/cmd/zilstat.in @@ -47,6 +47,7 @@ cols = { "cec": [5, 1000, "zil_commit_error_count"], "csc": [5, 1000, "zil_commit_stall_count"], "cSc": [5, 1000, "zil_commit_suspend_count"], + "cCc": [5, 1000, "zil_commit_crash_count"], "ic": [5, 1000, "zil_itx_count"], "iic": [5, 1000, "zil_itx_indirect_count"], "iib": [5, 1024, "zil_itx_indirect_bytes"], diff --git a/sys/contrib/openzfs/cmd/zinject/zinject.c b/sys/contrib/openzfs/cmd/zinject/zinject.c index 113797c878b9..c2f646f2567d 100644 --- a/sys/contrib/openzfs/cmd/zinject/zinject.c +++ b/sys/contrib/openzfs/cmd/zinject/zinject.c @@ -107,6 +107,8 @@ * zinject * zinject <-a | -u pool> * zinject -c <id|all> + * zinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range] + * [-T iotype] [-t type object | -b bookmark pool] * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] * [-r range] <object> * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool @@ -132,14 +134,18 @@ * The '-f' flag controls the frequency of errors injected, expressed as a * real number percentage between 0.0001 and 100. The default is 100. * - * The this form is responsible for actually injecting the handler into the + * The <object> form is responsible for actually injecting the handler into the * framework. It takes the arguments described above, translates them to the * internal tuple using libzpool, and then issues an ioctl() to register the * handler. * - * The final form can target a specific bookmark, regardless of whether a + * The '-b' option can target a specific bookmark, regardless of whether a * human-readable interface has been designed. It allows developers to specify * a particular block by number. + * + * The '-E' option injects pipeline ready stage delays for the given object or + * bookmark. The delay is specified in milliseconds, and it supports I/O type + * and range filters. */ #include <errno.h> @@ -346,6 +352,13 @@ usage(void) "\t\tsuch that the operation takes a minimum of supplied seconds\n" "\t\tto complete.\n" "\n" + "\tzinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]\n" + "\t\t[-T iotype] [-t type object | -b bookmark pool]\n" + "\n" + "\t\tInject pipeline ready stage delays for the given object path\n" + "\t\t(data or dnode) or raw bookmark. The delay is specified in\n" + "\t\tmilliseconds.\n" + "\n" "\tzinject -I [-s <seconds> | -g <txgs>] pool\n" "\t\tCause the pool to stop writing blocks yet not\n" "\t\treport errors for a duration. Simulates buggy hardware\n" @@ -724,12 +737,15 @@ register_handler(const char *pool, int flags, zinject_record_t *record, if (quiet) { (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); } else { + boolean_t show_object = B_FALSE; + boolean_t show_iotype = B_FALSE; (void) printf("Added handler %llu with the following " "properties:\n", (u_longlong_t)zc.zc_guid); (void) printf(" pool: %s\n", pool); if (record->zi_guid) { (void) printf(" vdev: %llx\n", (u_longlong_t)record->zi_guid); + show_iotype = B_TRUE; } else if (record->zi_func[0] != '\0') { (void) printf(" panic function: %s\n", record->zi_func); @@ -742,7 +758,18 @@ register_handler(const char *pool, int flags, zinject_record_t *record, } else if (record->zi_timer > 0) { (void) printf(" timer: %lld ms\n", (u_longlong_t)NSEC2MSEC(record->zi_timer)); + if (record->zi_cmd == ZINJECT_DELAY_READY) { + show_object = B_TRUE; + show_iotype = B_TRUE; + } } else { + show_object = B_TRUE; + } + if (show_iotype) { + (void) printf("iotype: %s\n", + iotype_to_str(record->zi_iotype)); + } + if (show_object) { (void) printf("objset: %llu\n", (u_longlong_t)record->zi_objset); (void) printf("object: %llu\n", @@ -910,6 +937,7 @@ main(int argc, char **argv) int ret; int flags = 0; uint32_t dvas = 0; + hrtime_t ready_delay = -1; if ((g_zfs = libzfs_init()) == NULL) { (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); @@ -940,7 +968,7 @@ main(int argc, char **argv) } while ((c = getopt(argc, argv, - ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) { + ":aA:b:C:d:D:E:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) { switch (c) { case 'a': flags |= ZINJECT_FLUSH_ARC; @@ -1113,6 +1141,18 @@ main(int argc, char **argv) case 'u': flags |= ZINJECT_UNLOAD_SPA; break; + case 'E': + ready_delay = MSEC2NSEC(strtol(optarg, &end, 10)); + if (ready_delay <= 0 || *end != '\0') { + (void) fprintf(stderr, "invalid delay '%s': " + "must be a positive duration\n", optarg); + usage(); + libzfs_fini(g_zfs); + return (1); + } + record.zi_cmd = ZINJECT_DELAY_READY; + record.zi_timer = ready_delay; + break; case 'L': if ((label = name_to_type(optarg)) == TYPE_INVAL && !LABEL_TYPE(type)) { @@ -1150,7 +1190,7 @@ main(int argc, char **argv) */ if (raw != NULL || range != NULL || type != TYPE_INVAL || level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || - record.zi_freq > 0 || dvas != 0) { + record.zi_freq > 0 || dvas != 0 || ready_delay >= 0) { (void) fprintf(stderr, "cancel (-c) incompatible with " "any other options\n"); usage(); @@ -1186,7 +1226,7 @@ main(int argc, char **argv) */ if (raw != NULL || range != NULL || type != TYPE_INVAL || level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || - dvas != 0) { + dvas != 0 || ready_delay >= 0) { (void) fprintf(stderr, "device (-d) incompatible with " "data error injection\n"); usage(); @@ -1276,13 +1316,23 @@ main(int argc, char **argv) return (1); } - record.zi_cmd = ZINJECT_DATA_FAULT; + if (record.zi_cmd == ZINJECT_UNINITIALIZED) { + record.zi_cmd = ZINJECT_DATA_FAULT; + if (!error) + error = EIO; + } else if (error != 0) { + (void) fprintf(stderr, "error type -e incompatible " + "with delay injection\n"); + libzfs_fini(g_zfs); + return (1); + } else { + record.zi_iotype = io_type; + } + if (translate_raw(raw, &record) != 0) { libzfs_fini(g_zfs); return (1); } - if (!error) - error = EIO; } else if (record.zi_cmd == ZINJECT_PANIC) { if (raw != NULL || range != NULL || type != TYPE_INVAL || level != 0 || device != NULL || record.zi_freq > 0 || @@ -1410,6 +1460,13 @@ main(int argc, char **argv) record.zi_dvas = dvas; } + if (record.zi_cmd != ZINJECT_UNINITIALIZED && error != 0) { + (void) fprintf(stderr, "error type -e incompatible " + "with delay injection\n"); + libzfs_fini(g_zfs); + return (1); + } + if (error == EACCES) { if (type != TYPE_DATA) { (void) fprintf(stderr, "decryption errors " @@ -1425,8 +1482,12 @@ main(int argc, char **argv) * not found. */ error = ECKSUM; - } else { + } else if (record.zi_cmd == ZINJECT_UNINITIALIZED) { record.zi_cmd = ZINJECT_DATA_FAULT; + if (!error) + error = EIO; + } else { + record.zi_iotype = io_type; } if (translate_record(type, argv[0], range, level, &record, pool, @@ -1434,8 +1495,6 @@ main(int argc, char **argv) libzfs_fini(g_zfs); return (1); } - if (!error) - error = EIO; } /* diff --git a/sys/contrib/openzfs/cmd/zpool/Makefile.am b/sys/contrib/openzfs/cmd/zpool/Makefile.am index 2f962408e5a3..5bb6d8160b18 100644 --- a/sys/contrib/openzfs/cmd/zpool/Makefile.am +++ b/sys/contrib/openzfs/cmd/zpool/Makefile.am @@ -148,6 +148,7 @@ dist_zpoolcompat_DATA = \ %D%/compatibility.d/openzfs-2.1-linux \ %D%/compatibility.d/openzfs-2.2 \ %D%/compatibility.d/openzfs-2.3 \ + %D%/compatibility.d/openzfs-2.4 \ %D%/compatibility.d/openzfsonosx-1.7.0 \ %D%/compatibility.d/openzfsonosx-1.8.1 \ %D%/compatibility.d/openzfsonosx-1.9.3 \ @@ -187,7 +188,9 @@ zpoolcompatlinks = \ "openzfs-2.2 openzfs-2.2-linux" \ "openzfs-2.2 openzfs-2.2-freebsd" \ "openzfs-2.3 openzfs-2.3-linux" \ - "openzfs-2.3 openzfs-2.3-freebsd" + "openzfs-2.3 openzfs-2.3-freebsd" \ + "openzfs-2.4 openzfs-2.4-linux" \ + "openzfs-2.4 openzfs-2.4-freebsd" zpoolconfdir = $(sysconfdir)/zfs/zpool.d INSTALL_DATA_HOOKS += zpool-install-data-hook diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 new file mode 100644 index 000000000000..3fbd91014c95 --- /dev/null +++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 @@ -0,0 +1,48 @@ +# Features supported by OpenZFS 2.4 on Linux and FreeBSD +allocation_classes +async_destroy +blake3 +block_cloning +block_cloning_endian +bookmark_v2 +bookmark_written +bookmarks +device_rebuild +device_removal +draid +dynamic_gang_header +edonr +embedded_data +empty_bpobj +enabled_txg +encryption +extensible_dataset +fast_dedup +filesystem_limits +head_errlog +hole_birth +large_blocks +large_dnode +large_microzap +livelist +log_spacemap +longname +lz4_compress +multi_vdev_crash_dump +obsolete_counts +physical_rewrite +project_quota +raidz_expansion +redacted_datasets +redaction_bookmarks +redaction_list_spill +resilver_defer +sha512 +skein +spacemap_histogram +spacemap_v2 +userobj_accounting +vdev_zaps_v2 +zilsaxattr +zpool_checkpoint +zstd_compress diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_iter.c b/sys/contrib/openzfs/cmd/zpool/zpool_iter.c index 2ec189b98653..fef602736705 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_iter.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_iter.c @@ -26,6 +26,7 @@ /* * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. + * Copyright (c) 2025, Klara, Inc. */ #include <libintl.h> @@ -52,7 +53,7 @@ typedef struct zpool_node { zpool_handle_t *zn_handle; uu_avl_node_t zn_avlnode; - int zn_mark; + hrtime_t zn_last_refresh; } zpool_node_t; struct zpool_list { @@ -62,6 +63,7 @@ struct zpool_list { uu_avl_pool_t *zl_pool; zprop_list_t **zl_proplist; zfs_type_t zl_type; + hrtime_t zl_last_refresh; }; static int @@ -81,26 +83,30 @@ zpool_compare(const void *larg, const void *rarg, void *unused) * of known pools. */ static int -add_pool(zpool_handle_t *zhp, void *data) +add_pool(zpool_handle_t *zhp, zpool_list_t *zlp) { - zpool_list_t *zlp = data; - zpool_node_t *node = safe_malloc(sizeof (zpool_node_t)); + zpool_node_t *node, *new = safe_malloc(sizeof (zpool_node_t)); uu_avl_index_t idx; - node->zn_handle = zhp; - uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool); - if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) { + new->zn_handle = zhp; + uu_avl_node_init(new, &new->zn_avlnode, zlp->zl_pool); + + node = uu_avl_find(zlp->zl_avl, new, NULL, &idx); + if (node == NULL) { if (zlp->zl_proplist && zpool_expand_proplist(zhp, zlp->zl_proplist, zlp->zl_type, zlp->zl_literal) != 0) { zpool_close(zhp); - free(node); + free(new); return (-1); } - uu_avl_insert(zlp->zl_avl, node, idx); + new->zn_last_refresh = zlp->zl_last_refresh; + uu_avl_insert(zlp->zl_avl, new, idx); } else { + zpool_refresh_stats_from_handle(node->zn_handle, zhp); + node->zn_last_refresh = zlp->zl_last_refresh; zpool_close(zhp); - free(node); + free(new); return (-1); } @@ -108,6 +114,18 @@ add_pool(zpool_handle_t *zhp, void *data) } /* + * add_pool(), but always returns 0. This allows zpool_iter() to continue + * even if a pool exists in the tree, or we fail to get the properties for + * a new one. + */ +static int +add_pool_cb(zpool_handle_t *zhp, void *data) +{ + (void) add_pool(zhp, data); + return (0); +} + +/* * Create a list of pools based on the given arguments. If we're given no * arguments, then iterate over all pools in the system and add them to the AVL * tree. Otherwise, add only those pool explicitly specified on the command @@ -135,9 +153,10 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type, zlp->zl_type = type; zlp->zl_literal = literal; + zlp->zl_last_refresh = gethrtime(); if (argc == 0) { - (void) zpool_iter(g_zfs, add_pool, zlp); + (void) zpool_iter(g_zfs, add_pool_cb, zlp); zlp->zl_findall = B_TRUE; } else { int i; @@ -159,15 +178,61 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type, } /* - * Search for any new pools, adding them to the list. We only add pools when no - * options were given on the command line. Otherwise, we keep the list fixed as - * those that were explicitly specified. + * Refresh the state of all pools on the list. Additionally, if no options were + * given on the command line, add any new pools and remove any that are no + * longer available. */ -void -pool_list_update(zpool_list_t *zlp) +int +pool_list_refresh(zpool_list_t *zlp) { - if (zlp->zl_findall) - (void) zpool_iter(g_zfs, add_pool, zlp); + zlp->zl_last_refresh = gethrtime(); + + if (!zlp->zl_findall) { + /* + * This list is a fixed list of pools, so we must not add + * or remove any. Just walk over them and refresh their + * state. + */ + int navail = 0; + for (zpool_node_t *node = uu_avl_first(zlp->zl_avl); + node != NULL; node = uu_avl_next(zlp->zl_avl, node)) { + boolean_t missing; + zpool_refresh_stats(node->zn_handle, &missing); + navail += !missing; + node->zn_last_refresh = zlp->zl_last_refresh; + } + return (navail); + } + + /* Search for any new pools and add them to the list. */ + (void) zpool_iter(g_zfs, add_pool_cb, zlp); + + /* Walk the list of existing pools, and update or remove them. */ + zpool_node_t *node, *next; + for (node = uu_avl_first(zlp->zl_avl); node != NULL; node = next) { + next = uu_avl_next(zlp->zl_avl, node); + + /* + * Skip any that were refreshed and are online; they were added + * by zpool_iter() and are already up to date. + */ + if (node->zn_last_refresh == zlp->zl_last_refresh && + zpool_get_state(node->zn_handle) != POOL_STATE_UNAVAIL) + continue; + + /* Refresh and remove if necessary. */ + boolean_t missing; + zpool_refresh_stats(node->zn_handle, &missing); + if (missing) { + uu_avl_remove(zlp->zl_avl, node); + zpool_close(node->zn_handle); + free(node); + } else { + node->zn_last_refresh = zlp->zl_last_refresh; + } + } + + return (uu_avl_numnodes(zlp->zl_avl)); } /* @@ -191,23 +256,6 @@ pool_list_iter(zpool_list_t *zlp, int unavail, zpool_iter_f func, } /* - * Remove the given pool from the list. When running iostat, we want to remove - * those pools that no longer exist. - */ -void -pool_list_remove(zpool_list_t *zlp, zpool_handle_t *zhp) -{ - zpool_node_t search, *node; - - search.zn_handle = zhp; - if ((node = uu_avl_find(zlp->zl_avl, &search, NULL, NULL)) != NULL) { - uu_avl_remove(zlp->zl_avl, node); - zpool_close(node->zn_handle); - free(node); - } -} - -/* * Free all the handles associated with this list. */ void @@ -379,8 +427,8 @@ process_unique_cmd_columns(vdev_cmd_data_list_t *vcdl) static int vdev_process_cmd_output(vdev_cmd_data_t *data, char *line) { - char *col = NULL; - char *val = line; + char *col; + char *val; char *equals; char **tmp; @@ -397,6 +445,7 @@ vdev_process_cmd_output(vdev_cmd_data_t *data, char *line) col = line; val = equals + 1; } else { + col = NULL; val = line; } diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index e62441894cd7..a6658a9c2800 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -33,8 +33,8 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com> * Copyright (c) 2021, Colm Buckley <colm@tuatha.org> - * Copyright (c) 2021, 2023, Klara Inc. - * Copyright [2021] Hewlett Packard Enterprise Development LP + * Copyright (c) 2021, 2023, 2025, Klara, Inc. + * Copyright (c) 2021, 2025 Hewlett Packard Enterprise Development LP. */ #include <assert.h> @@ -456,7 +456,7 @@ get_usage(zpool_help_t idx) "<pool> <vdev> ...\n")); case HELP_ATTACH: return (gettext("\tattach [-fsw] [-o property=value] " - "<pool> <device> <new-device>\n")); + "<pool> <vdev> <new-device>\n")); case HELP_CLEAR: return (gettext("\tclear [[--power]|[-nF]] <pool> [device]\n")); case HELP_CREATE: @@ -510,16 +510,16 @@ get_usage(zpool_help_t idx) case HELP_REOPEN: return (gettext("\treopen [-n] <pool>\n")); case HELP_INITIALIZE: - return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> " - "[<device> ...]\n")); + return (gettext("\tinitialize [-c | -s | -u] [-w] <-a | <pool> " + "[<device> ...]>\n")); case HELP_SCRUB: - return (gettext("\tscrub [-e | -s | -p | -C] [-w] " - "<pool> ...\n")); + return (gettext("\tscrub [-e | -s | -p | -C | -E | -S] [-w] " + "<-a | <pool> [<pool> ...]>\n")); case HELP_RESILVER: return (gettext("\tresilver <pool> ...\n")); case HELP_TRIM: - return (gettext("\ttrim [-dw] [-r <rate>] [-c | -s] <pool> " - "[<device> ...]\n")); + return (gettext("\ttrim [-dw] [-r <rate>] [-c | -s] " + "<-a | <pool> [<device> ...]>\n")); case HELP_STATUS: return (gettext("\tstatus [-DdegiLPpstvx] " "[-c script1[,script2,...]] ...\n" @@ -560,33 +560,6 @@ get_usage(zpool_help_t idx) } } -static void -zpool_collect_leaves(zpool_handle_t *zhp, nvlist_t *nvroot, nvlist_t *res) -{ - uint_t children = 0; - nvlist_t **child; - uint_t i; - - (void) nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &child, &children); - - if (children == 0) { - char *path = zpool_vdev_name(g_zfs, zhp, nvroot, - VDEV_NAME_PATH); - - if (strcmp(path, VDEV_TYPE_INDIRECT) != 0 && - strcmp(path, VDEV_TYPE_HOLE) != 0) - fnvlist_add_boolean(res, path); - - free(path); - return; - } - - for (i = 0; i < children; i++) { - zpool_collect_leaves(zhp, child[i], res); - } -} - /* * Callback routine that will print out a pool property value. */ @@ -779,10 +752,11 @@ usage(boolean_t requested) } /* - * zpool initialize [-c | -s | -u] [-w] <pool> [<vdev> ...] + * zpool initialize [-c | -s | -u] [-w] <-a | pool> [<vdev> ...] * Initialize all unused blocks in the specified vdevs, or all vdevs in the pool * if none specified. * + * -a Use all pools. * -c Cancel. Ends active initializing. * -s Suspend. Initializing can then be restarted with no flags. * -u Uninitialize. Clears initialization state. @@ -794,22 +768,26 @@ zpool_do_initialize(int argc, char **argv) int c; char *poolname; zpool_handle_t *zhp; - nvlist_t *vdevs; int err = 0; boolean_t wait = B_FALSE; + boolean_t initialize_all = B_FALSE; struct option long_options[] = { {"cancel", no_argument, NULL, 'c'}, {"suspend", no_argument, NULL, 's'}, {"uninit", no_argument, NULL, 'u'}, {"wait", no_argument, NULL, 'w'}, + {"all", no_argument, NULL, 'a'}, {0, 0, 0, 0} }; pool_initialize_func_t cmd_type = POOL_INITIALIZE_START; - while ((c = getopt_long(argc, argv, "csuw", long_options, + while ((c = getopt_long(argc, argv, "acsuw", long_options, NULL)) != -1) { switch (c) { + case 'a': + initialize_all = B_TRUE; + break; case 'c': if (cmd_type != POOL_INITIALIZE_START && cmd_type != POOL_INITIALIZE_CANCEL) { @@ -856,7 +834,18 @@ zpool_do_initialize(int argc, char **argv) argc -= optind; argv += optind; - if (argc < 1) { + initialize_cbdata_t cbdata = { + .wait = wait, + .cmd_type = cmd_type + }; + + if (initialize_all && argc > 0) { + (void) fprintf(stderr, gettext("-a cannot be combined with " + "individual pools or vdevs\n")); + usage(B_FALSE); + } + + if (argc < 1 && !initialize_all) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); return (-1); @@ -868,30 +857,35 @@ zpool_do_initialize(int argc, char **argv) usage(B_FALSE); } - poolname = argv[0]; - zhp = zpool_open(g_zfs, poolname); - if (zhp == NULL) - return (-1); - - vdevs = fnvlist_alloc(); - if (argc == 1) { - /* no individual leaf vdevs specified, so add them all */ - nvlist_t *config = zpool_get_config(zhp, NULL); - nvlist_t *nvroot = fnvlist_lookup_nvlist(config, - ZPOOL_CONFIG_VDEV_TREE); - zpool_collect_leaves(zhp, nvroot, vdevs); + if (argc == 0 && initialize_all) { + /* Initilize each pool recursively */ + err = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, zpool_initialize_one, &cbdata); + return (err); + } else if (argc == 1) { + /* no individual leaf vdevs specified, initialize the pool */ + poolname = argv[0]; + zhp = zpool_open(g_zfs, poolname); + if (zhp == NULL) + return (-1); + err = zpool_initialize_one(zhp, &cbdata); } else { + /* individual leaf vdevs specified, initialize them */ + poolname = argv[0]; + zhp = zpool_open(g_zfs, poolname); + if (zhp == NULL) + return (-1); + nvlist_t *vdevs = fnvlist_alloc(); for (int i = 1; i < argc; i++) { fnvlist_add_boolean(vdevs, argv[i]); } + if (wait) + err = zpool_initialize_wait(zhp, cmd_type, vdevs); + else + err = zpool_initialize(zhp, cmd_type, vdevs); + fnvlist_free(vdevs); } - if (wait) - err = zpool_initialize_wait(zhp, cmd_type, vdevs); - else - err = zpool_initialize(zhp, cmd_type, vdevs); - - fnvlist_free(vdevs); zpool_close(zhp); return (err); @@ -1788,7 +1782,7 @@ zpool_do_labelclear(int argc, char **argv) { char vdev[MAXPATHLEN]; char *name = NULL; - int c, fd = -1, ret = 0; + int c, fd, ret = 0; nvlist_t *config; pool_state_t state; boolean_t inuse = B_FALSE; @@ -5767,24 +5761,6 @@ children: return (ret); } -static int -refresh_iostat(zpool_handle_t *zhp, void *data) -{ - iostat_cbdata_t *cb = data; - boolean_t missing; - - /* - * If the pool has disappeared, remove it from the list and continue. - */ - if (zpool_refresh_stats(zhp, &missing) != 0) - return (-1); - - if (missing) - pool_list_remove(cb->cb_list, zhp); - - return (0); -} - /* * Callback to print out the iostats for the given pool. */ @@ -6157,7 +6133,6 @@ static void get_interval_count_filter_guids(int *argc, char **argv, float *interval, unsigned long *count, iostat_cbdata_t *cb) { - char **tmpargv = argv; int argc_for_interval = 0; /* Is the last arg an interval value? Or a guid? */ @@ -6181,7 +6156,7 @@ get_interval_count_filter_guids(int *argc, char **argv, float *interval, } /* Point to our list of possible intervals */ - tmpargv = &argv[*argc - argc_for_interval]; + char **tmpargv = &argv[*argc - argc_for_interval]; *argc = *argc - argc_for_interval; get_interval_count(&argc_for_interval, tmpargv, @@ -6366,18 +6341,16 @@ get_namewidth_iostat(zpool_handle_t *zhp, void *data) * This command can be tricky because we want to be able to deal with pool * creation/destruction as well as vdev configuration changes. The bulk of this * processing is handled by the pool_list_* routines in zpool_iter.c. We rely - * on pool_list_update() to detect the addition of new pools. Configuration - * changes are all handled within libzfs. + * on pool_list_refresh() to detect the addition and removal of pools. + * Configuration changes are all handled within libzfs. */ int zpool_do_iostat(int argc, char **argv) { int c; int ret; - int npools; float interval = 0; unsigned long count = 0; - int winheight = 24; zpool_list_t *list; boolean_t verbose = B_FALSE; boolean_t latency = B_FALSE, l_histo = B_FALSE, rq_histo = B_FALSE; @@ -6626,10 +6599,24 @@ zpool_do_iostat(int argc, char **argv) return (1); } + int last_npools = 0; for (;;) { - if ((npools = pool_list_count(list)) == 0) + /* + * Refresh all pools in list, adding or removing pools as + * necessary. + */ + int npools = pool_list_refresh(list); + if (npools == 0) { (void) fprintf(stderr, gettext("no pools available\n")); - else { + } else { + /* + * If the list of pools has changed since last time + * around, reset the iteration count to force the + * header to be redisplayed. + */ + if (last_npools != npools) + cb.cb_iteration = 0; + /* * If this is the first iteration and -y was supplied * we skip any printing. @@ -6638,15 +6625,6 @@ zpool_do_iostat(int argc, char **argv) cb.cb_iteration == 0); /* - * Refresh all statistics. This is done as an - * explicit step before calculating the maximum name - * width, so that any * configuration changes are - * properly accounted for. - */ - (void) pool_list_iter(list, B_FALSE, refresh_iostat, - &cb); - - /* * Iterate over all pools to determine the maximum width * for the pool / device name column across all pools. */ @@ -6673,7 +6651,7 @@ zpool_do_iostat(int argc, char **argv) * even when terminal window has its height * changed. */ - winheight = terminal_height(); + int winheight = terminal_height(); /* * Are we connected to TTY? If not, headers_once * should be true, to avoid breaking scripts. @@ -6699,6 +6677,7 @@ zpool_do_iostat(int argc, char **argv) if (skip) { (void) fflush(stdout); (void) fsleep(interval); + last_npools = npools; continue; } @@ -6736,6 +6715,8 @@ zpool_do_iostat(int argc, char **argv) (void) fflush(stdout); (void) fsleep(interval); + + last_npools = npools; } pool_list_free(list); @@ -6994,7 +6975,6 @@ collect_vdev_prop(zpool_prop_t prop, uint64_t value, const char *str, /* * print static default line per vdev - * not compatible with '-o' <proplist> option */ static void collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, @@ -7050,48 +7030,98 @@ collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, * 'toplevel' boolean value is passed to the print_one_column() * to indicate that the value is valid. */ - if (VDEV_STAT_VALID(vs_pspace, c) && vs->vs_pspace) { - collect_vdev_prop(ZPOOL_PROP_SIZE, vs->vs_pspace, NULL, - scripted, B_TRUE, format, cb->cb_json, props, - cb->cb_json_as_int); - } else { - collect_vdev_prop(ZPOOL_PROP_SIZE, vs->vs_space, NULL, - scripted, toplevel, format, cb->cb_json, props, - cb->cb_json_as_int); + for (zprop_list_t *pl = cb->cb_proplist; pl != NULL; + pl = pl->pl_next) { + switch (pl->pl_prop) { + case ZPOOL_PROP_SIZE: + if (VDEV_STAT_VALID(vs_pspace, c) && + vs->vs_pspace) { + collect_vdev_prop( + ZPOOL_PROP_SIZE, vs->vs_pspace, + NULL, scripted, B_TRUE, format, + cb->cb_json, props, + cb->cb_json_as_int); + } else { + collect_vdev_prop( + ZPOOL_PROP_SIZE, vs->vs_space, NULL, + scripted, toplevel, format, + cb->cb_json, props, + cb->cb_json_as_int); + } + break; + case ZPOOL_PROP_ALLOCATED: + collect_vdev_prop(ZPOOL_PROP_ALLOCATED, + vs->vs_alloc, NULL, scripted, toplevel, + format, cb->cb_json, props, + cb->cb_json_as_int); + break; + + case ZPOOL_PROP_FREE: + collect_vdev_prop(ZPOOL_PROP_FREE, + vs->vs_space - vs->vs_alloc, NULL, scripted, + toplevel, format, cb->cb_json, props, + cb->cb_json_as_int); + break; + + case ZPOOL_PROP_CHECKPOINT: + collect_vdev_prop(ZPOOL_PROP_CHECKPOINT, + vs->vs_checkpoint_space, NULL, scripted, + toplevel, format, cb->cb_json, props, + cb->cb_json_as_int); + break; + + case ZPOOL_PROP_EXPANDSZ: + collect_vdev_prop(ZPOOL_PROP_EXPANDSZ, + vs->vs_esize, NULL, scripted, B_TRUE, + format, cb->cb_json, props, + cb->cb_json_as_int); + break; + + case ZPOOL_PROP_FRAGMENTATION: + collect_vdev_prop( + ZPOOL_PROP_FRAGMENTATION, + vs->vs_fragmentation, NULL, scripted, + (vs->vs_fragmentation != ZFS_FRAG_INVALID && + toplevel), + format, cb->cb_json, props, + cb->cb_json_as_int); + break; + + case ZPOOL_PROP_CAPACITY: + cap = (vs->vs_space == 0) ? + 0 : (vs->vs_alloc * 10000 / vs->vs_space); + collect_vdev_prop(ZPOOL_PROP_CAPACITY, cap, + NULL, scripted, toplevel, format, + cb->cb_json, props, cb->cb_json_as_int); + break; + + case ZPOOL_PROP_HEALTH: + state = zpool_state_to_name(vs->vs_state, + vs->vs_aux); + if (isspare) { + if (vs->vs_aux == VDEV_AUX_SPARED) + state = "INUSE"; + else if (vs->vs_state == + VDEV_STATE_HEALTHY) + state = "AVAIL"; + } + collect_vdev_prop(ZPOOL_PROP_HEALTH, 0, state, + scripted, B_TRUE, format, cb->cb_json, + props, cb->cb_json_as_int); + break; + + case ZPOOL_PROP_NAME: + break; + + default: + collect_vdev_prop(pl->pl_prop, 0, + NULL, scripted, B_FALSE, format, + cb->cb_json, props, cb->cb_json_as_int); + + } + + } - collect_vdev_prop(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, NULL, - scripted, toplevel, format, cb->cb_json, props, - cb->cb_json_as_int); - collect_vdev_prop(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc, - NULL, scripted, toplevel, format, cb->cb_json, props, - cb->cb_json_as_int); - collect_vdev_prop(ZPOOL_PROP_CHECKPOINT, - vs->vs_checkpoint_space, NULL, scripted, toplevel, format, - cb->cb_json, props, cb->cb_json_as_int); - collect_vdev_prop(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, NULL, - scripted, B_TRUE, format, cb->cb_json, props, - cb->cb_json_as_int); - collect_vdev_prop(ZPOOL_PROP_FRAGMENTATION, - vs->vs_fragmentation, NULL, scripted, - (vs->vs_fragmentation != ZFS_FRAG_INVALID && toplevel), - format, cb->cb_json, props, cb->cb_json_as_int); - cap = (vs->vs_space == 0) ? 0 : - (vs->vs_alloc * 10000 / vs->vs_space); - collect_vdev_prop(ZPOOL_PROP_CAPACITY, cap, NULL, - scripted, toplevel, format, cb->cb_json, props, - cb->cb_json_as_int); - collect_vdev_prop(ZPOOL_PROP_DEDUPRATIO, 0, NULL, - scripted, toplevel, format, cb->cb_json, props, - cb->cb_json_as_int); - state = zpool_state_to_name(vs->vs_state, vs->vs_aux); - if (isspare) { - if (vs->vs_aux == VDEV_AUX_SPARED) - state = "INUSE"; - else if (vs->vs_state == VDEV_STATE_HEALTHY) - state = "AVAIL"; - } - collect_vdev_prop(ZPOOL_PROP_HEALTH, 0, state, scripted, - B_TRUE, format, cb->cb_json, props, cb->cb_json_as_int); if (cb->cb_json) { fnvlist_add_nvlist(ent, "properties", props); @@ -7652,7 +7682,7 @@ zpool_do_replace(int argc, char **argv) } /* - * zpool attach [-fsw] [-o property=value] <pool> <device>|<vdev> <new_device> + * zpool attach [-fsw] [-o property=value] <pool> <vdev> <new_device> * * -f Force attach, even if <new_device> appears to be in use. * -s Use sequential instead of healing reconstruction for resilver. @@ -7660,9 +7690,9 @@ zpool_do_replace(int argc, char **argv) * -w Wait for resilvering (mirror) or expansion (raidz) to complete * before returning. * - * Attach <new_device> to a <device> or <vdev>, where the vdev can be of type - * mirror or raidz. If <device> is not part of a mirror, then <device> will - * be transformed into a mirror of <device> and <new_device>. When a mirror + * Attach <new_device> to a <vdev>, where the vdev can be of type + * device, mirror or raidz. If <vdev> is not part of a mirror, then <vdev> will + * be transformed into a mirror of <vdev> and <new_device>. When a mirror * is involved, <new_device> will begin life with a DTL of [0, now], and will * immediately begin to resilver itself. For the raidz case, a expansion will * commence and reflow the raidz data across all the disks including the @@ -8368,6 +8398,8 @@ zpool_do_reopen(int argc, char **argv) typedef struct scrub_cbdata { int cb_type; pool_scrub_cmd_t cb_scrub_cmd; + time_t cb_date_start; + time_t cb_date_end; } scrub_cbdata_t; static boolean_t @@ -8411,8 +8443,8 @@ scrub_callback(zpool_handle_t *zhp, void *data) return (1); } - err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd); - + err = zpool_scan_range(zhp, cb->cb_type, cb->cb_scrub_cmd, + cb->cb_date_start, cb->cb_date_end); if (err == 0 && zpool_has_checkpoint(zhp) && cb->cb_type == POOL_SCAN_SCRUB) { (void) printf(gettext("warning: will not scrub state that " @@ -8430,10 +8462,35 @@ wait_callback(zpool_handle_t *zhp, void *data) return (zpool_wait(zhp, *act)); } +static time_t +date_string_to_sec(const char *timestr, boolean_t rounding) +{ + struct tm tm = {0}; + int adjustment = rounding ? 1 : 0; + + /* Allow mktime to determine timezone. */ + tm.tm_isdst = -1; + + if (strptime(timestr, "%Y-%m-%d %H:%M", &tm) == NULL) { + if (strptime(timestr, "%Y-%m-%d", &tm) == NULL) { + fprintf(stderr, gettext("Failed to parse the date.\n")); + usage(B_FALSE); + } + adjustment *= 24 * 60 * 60; + } else { + adjustment *= 60; + } + + return (mktime(&tm) + adjustment); +} + /* - * zpool scrub [-e | -s | -p | -C] [-w] <pool> ... + * zpool scrub [-e | -s | -p | -C | -E | -S] [-w] [-a | <pool> ...] * + * -a Scrub all pools. * -e Only scrub blocks in the error log. + * -E End date of scrub. + * -S Start date of scrub. * -s Stop. Stops any in-progress scrub. * -p Pause. Pause in-progress scrub. * -w Wait. Blocks until scrub has completed. @@ -8449,21 +8506,36 @@ zpool_do_scrub(int argc, char **argv) cb.cb_type = POOL_SCAN_SCRUB; cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; + cb.cb_date_start = cb.cb_date_end = 0; boolean_t is_error_scrub = B_FALSE; boolean_t is_pause = B_FALSE; boolean_t is_stop = B_FALSE; boolean_t is_txg_continue = B_FALSE; + boolean_t scrub_all = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, "spweC")) != -1) { + while ((c = getopt(argc, argv, "aspweCE:S:")) != -1) { switch (c) { + case 'a': + scrub_all = B_TRUE; + break; case 'e': is_error_scrub = B_TRUE; break; + case 'E': + /* + * Round the date. It's better to scrub more data than + * less. This also makes the date inclusive. + */ + cb.cb_date_end = date_string_to_sec(optarg, B_TRUE); + break; case 's': is_stop = B_TRUE; break; + case 'S': + cb.cb_date_start = date_string_to_sec(optarg, B_FALSE); + break; case 'p': is_pause = B_TRUE; break; @@ -8511,6 +8583,19 @@ zpool_do_scrub(int argc, char **argv) } } + if ((cb.cb_date_start != 0 || cb.cb_date_end != 0) && + cb.cb_scrub_cmd != POOL_SCRUB_NORMAL) { + (void) fprintf(stderr, gettext("invalid option combination: " + "start/end date is available only with normal scrub\n")); + usage(B_FALSE); + } + if (cb.cb_date_start != 0 && cb.cb_date_end != 0 && + cb.cb_date_start > cb.cb_date_end) { + (void) fprintf(stderr, gettext("invalid arguments: " + "end date has to be later than start date\n")); + usage(B_FALSE); + } + if (wait && (cb.cb_type == POOL_SCAN_NONE || cb.cb_scrub_cmd == POOL_SCRUB_PAUSE)) { (void) fprintf(stderr, gettext("invalid option combination: " @@ -8521,7 +8606,7 @@ zpool_do_scrub(int argc, char **argv) argc -= optind; argv += optind; - if (argc < 1) { + if (argc < 1 && !scrub_all) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); } @@ -8551,6 +8636,7 @@ zpool_do_resilver(int argc, char **argv) cb.cb_type = POOL_SCAN_RESILVER; cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; + cb.cb_date_start = cb.cb_date_end = 0; /* check options */ while ((c = getopt(argc, argv, "")) != -1) { @@ -8575,8 +8661,9 @@ zpool_do_resilver(int argc, char **argv) } /* - * zpool trim [-d] [-r <rate>] [-c | -s] <pool> [<device> ...] + * zpool trim [-d] [-r <rate>] [-c | -s] <-a | pool> [<device> ...] * + * -a Trim all pools. * -c Cancel. Ends any in-progress trim. * -d Secure trim. Requires kernel and device support. * -r <rate> Sets the TRIM rate in bytes (per second). Supports @@ -8593,6 +8680,7 @@ zpool_do_trim(int argc, char **argv) {"rate", required_argument, NULL, 'r'}, {"suspend", no_argument, NULL, 's'}, {"wait", no_argument, NULL, 'w'}, + {"all", no_argument, NULL, 'a'}, {0, 0, 0, 0} }; @@ -8600,11 +8688,16 @@ zpool_do_trim(int argc, char **argv) uint64_t rate = 0; boolean_t secure = B_FALSE; boolean_t wait = B_FALSE; + boolean_t trimall = B_FALSE; + int error; int c; - while ((c = getopt_long(argc, argv, "cdr:sw", long_options, NULL)) + while ((c = getopt_long(argc, argv, "acdr:sw", long_options, NULL)) != -1) { switch (c) { + case 'a': + trimall = B_TRUE; + break; case 'c': if (cmd_type != POOL_TRIM_START && cmd_type != POOL_TRIM_CANCEL) { @@ -8663,7 +8756,18 @@ zpool_do_trim(int argc, char **argv) argc -= optind; argv += optind; - if (argc < 1) { + trimflags_t trim_flags = { + .secure = secure, + .rate = rate, + .wait = wait, + }; + + trim_cbdata_t cbdata = { + .trim_flags = trim_flags, + .cmd_type = cmd_type + }; + + if (argc < 1 && !trimall) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); return (-1); @@ -8671,41 +8775,46 @@ zpool_do_trim(int argc, char **argv) if (wait && (cmd_type != POOL_TRIM_START)) { (void) fprintf(stderr, gettext("-w cannot be used with -c or " - "-s\n")); + "-s options\n")); usage(B_FALSE); } - char *poolname = argv[0]; - zpool_handle_t *zhp = zpool_open(g_zfs, poolname); - if (zhp == NULL) - return (-1); - - trimflags_t trim_flags = { - .secure = secure, - .rate = rate, - .wait = wait, - }; + if (trimall && argc > 0) { + (void) fprintf(stderr, gettext("-a cannot be combined with " + "individual zpools or vdevs\n")); + usage(B_FALSE); + } - nvlist_t *vdevs = fnvlist_alloc(); - if (argc == 1) { + if (argc == 0 && trimall) { + cbdata.trim_flags.fullpool = B_TRUE; + /* Trim each pool recursively */ + error = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, zpool_trim_one, &cbdata); + } else if (argc == 1) { + char *poolname = argv[0]; + zpool_handle_t *zhp = zpool_open(g_zfs, poolname); + if (zhp == NULL) + return (-1); /* no individual leaf vdevs specified, so add them all */ - nvlist_t *config = zpool_get_config(zhp, NULL); - nvlist_t *nvroot = fnvlist_lookup_nvlist(config, - ZPOOL_CONFIG_VDEV_TREE); - zpool_collect_leaves(zhp, nvroot, vdevs); - trim_flags.fullpool = B_TRUE; + error = zpool_trim_one(zhp, &cbdata); + zpool_close(zhp); } else { - trim_flags.fullpool = B_FALSE; + char *poolname = argv[0]; + zpool_handle_t *zhp = zpool_open(g_zfs, poolname); + if (zhp == NULL) + return (-1); + /* leaf vdevs specified, trim only those */ + cbdata.trim_flags.fullpool = B_FALSE; + nvlist_t *vdevs = fnvlist_alloc(); for (int i = 1; i < argc; i++) { fnvlist_add_boolean(vdevs, argv[i]); } + error = zpool_trim(zhp, cbdata.cmd_type, vdevs, + &cbdata.trim_flags); + fnvlist_free(vdevs); + zpool_close(zhp); } - int error = zpool_trim(zhp, cmd_type, vdevs, &trim_flags); - - fnvlist_free(vdevs); - zpool_close(zhp); - return (error); } @@ -10706,7 +10815,6 @@ status_callback_json(zpool_handle_t *zhp, void *data) uint_t c; vdev_stat_t *vs; nvlist_t *item, *d, *load_info, *vds; - item = d = NULL; /* If dedup stats were requested, also fetch dedupcached. */ if (cbp->cb_dedup_stats > 1) @@ -11330,7 +11438,8 @@ upgrade_enable_all(zpool_handle_t *zhp, int *countp) const char *fname = spa_feature_table[i].fi_uname; const char *fguid = spa_feature_table[i].fi_guid; - if (!spa_feature_table[i].fi_zfs_mod_supported) + if (!spa_feature_table[i].fi_zfs_mod_supported || + (spa_feature_table[i].fi_flags & ZFEATURE_FLAG_NO_UPGRADE)) continue; if (!nvlist_exists(enabled, fguid) && requested_features[i]) { @@ -11485,7 +11594,11 @@ upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg) "Note that the pool " "'compatibility' feature can be " "used to inhibit\nfeature " - "upgrades.\n\n")); + "upgrades.\n\n" + "Features marked with (*) are not " + "applied automatically on upgrade, " + "and\nmust be applied explicitly " + "with zpool-set(7).\n\n")); (void) printf(gettext("POOL " "FEATURE\n")); (void) printf(gettext("------" @@ -11499,7 +11612,9 @@ upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg) poolfirst = B_FALSE; } - (void) printf(gettext(" %s\n"), fname); + (void) printf(gettext(" %s%s\n"), fname, + spa_feature_table[i].fi_flags & + ZFEATURE_FLAG_NO_UPGRADE ? "(*)" : ""); } /* * If they did "zpool upgrade -a", then we could @@ -12300,7 +12415,7 @@ zpool_do_events_next(ev_opts_t *opts) nvlist_free(nvl); } - VERIFY(0 == close(zevent_fd)); + VERIFY0(close(zevent_fd)); return (ret); } diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_util.h b/sys/contrib/openzfs/cmd/zpool/zpool_util.h index 5ab7cb9750f1..3af23c52bd45 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_util.h +++ b/sys/contrib/openzfs/cmd/zpool/zpool_util.h @@ -76,11 +76,10 @@ typedef struct zpool_list zpool_list_t; zpool_list_t *pool_list_get(int, char **, zprop_list_t **, zfs_type_t, boolean_t, int *); -void pool_list_update(zpool_list_t *); +int pool_list_refresh(zpool_list_t *); int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *); void pool_list_free(zpool_list_t *); int pool_list_count(zpool_list_t *); -void pool_list_remove(zpool_list_t *, zpool_handle_t *); extern libzfs_handle_t *g_zfs; diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c index 07868a30d7e7..222b5524669e 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c @@ -270,14 +270,13 @@ is_spare(nvlist_t *config, const char *path) * draid* Virtual dRAID spare */ static nvlist_t * -make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) +make_leaf_vdev(const char *arg, boolean_t is_primary, uint64_t ashift) { char path[MAXPATHLEN]; struct stat64 statbuf; nvlist_t *vdev = NULL; const char *type = NULL; boolean_t wholedisk = B_FALSE; - uint64_t ashift = 0; int err; /* @@ -382,31 +381,6 @@ make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) (uint64_t)wholedisk) == 0); /* - * Override defaults if custom properties are provided. - */ - if (props != NULL) { - const char *value = NULL; - - if (nvlist_lookup_string(props, - zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) { - if (zfs_nicestrtonum(NULL, value, &ashift) != 0) { - (void) fprintf(stderr, - gettext("ashift must be a number.\n")); - return (NULL); - } - if (ashift != 0 && - (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) { - (void) fprintf(stderr, - gettext("invalid 'ashift=%" PRIu64 "' " - "property: only values between %" PRId32 " " - "and %" PRId32 " are allowed.\n"), - ashift, ASHIFT_MIN, ASHIFT_MAX); - return (NULL); - } - } - } - - /* * If the device is known to incorrectly report its physical sector * size explicitly provide the known correct value. */ @@ -574,7 +548,6 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) nvlist_t *cnv = child[c]; const char *path; struct stat64 statbuf; - int64_t size = -1LL; const char *childtype; int fd, err; @@ -610,22 +583,28 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) ZPOOL_CONFIG_PATH, &path) == 0); /* + * Skip active spares they should never cause + * the pool to be evaluated as inconsistent. + */ + if (is_spare(NULL, path)) + continue; + + /* * If we have a raidz/mirror that combines disks - * with files, report it as an error. + * with files, only report it as an error when + * fatal is set to ensure all the replication + * checks aren't skipped in check_replication(). */ - if (!dontreport && type != NULL && + if (fatal && !dontreport && type != NULL && strcmp(type, childtype) != 0) { if (ret != NULL) free(ret); ret = NULL; - if (fatal) - vdev_error(gettext( - "mismatched replication " - "level: %s contains both " - "files and devices\n"), - rep.zprl_type); - else - return (NULL); + vdev_error(gettext( + "mismatched replication " + "level: %s contains both " + "files and devices\n"), + rep.zprl_type); dontreport = B_TRUE; } @@ -656,7 +635,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal) statbuf.st_size == MAXOFFSET_T) continue; - size = statbuf.st_size; + int64_t size = statbuf.st_size; /* * Also make sure that devices and @@ -876,6 +855,18 @@ check_replication(nvlist_t *config, nvlist_t *newroot) (u_longlong_t)mirror->zprl_children); ret = -1; } + } else if (is_raidz_draid(current, new)) { + if (current->zprl_parity != new->zprl_parity) { + vdev_error(gettext( + "mismatched replication level: pool and " + "new vdev with different redundancy, %s " + "and %s vdevs, %llu vs. %llu\n"), + current->zprl_type, + new->zprl_type, + (u_longlong_t)current->zprl_parity, + (u_longlong_t)new->zprl_parity); + ret = -1; + } } else if (strcmp(current->zprl_type, new->zprl_type) != 0) { vdev_error(gettext( "mismatched replication level: pool uses %s " @@ -1353,7 +1344,7 @@ is_grouping(const char *type, int *mindev, int *maxdev) static int draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children) { - uint64_t nparity = 1; + uint64_t nparity; uint64_t nspares = 0; uint64_t ndata = UINT64_MAX; uint64_t ngroups = 1; @@ -1496,6 +1487,29 @@ construct_spec(nvlist_t *props, int argc, char **argv) const char *type, *fulltype; boolean_t is_log, is_special, is_dedup, is_spare; boolean_t seen_logs; + uint64_t ashift = 0; + + if (props != NULL) { + const char *value = NULL; + + if (nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) { + if (zfs_nicestrtonum(NULL, value, &ashift) != 0) { + (void) fprintf(stderr, + gettext("ashift must be a number.\n")); + return (NULL); + } + if (ashift != 0 && + (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) { + (void) fprintf(stderr, + gettext("invalid 'ashift=%" PRIu64 "' " + "property: only values between %" PRId32 " " + "and %" PRId32 " are allowed.\n"), + ashift, ASHIFT_MIN, ASHIFT_MAX); + return (NULL); + } + } + } top = NULL; toplevels = 0; @@ -1581,13 +1595,12 @@ construct_spec(nvlist_t *props, int argc, char **argv) is_dedup = is_spare = B_FALSE; } - if (is_log || is_special || is_dedup) { + if (is_log) { if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { (void) fprintf(stderr, gettext("invalid vdev " - "specification: unsupported '%s' " - "device: %s\n"), is_log ? "log" : - "special", type); + "specification: unsupported 'log' " + "device: %s\n"), type); goto spec_out; } nlogs++; @@ -1602,9 +1615,9 @@ construct_spec(nvlist_t *props, int argc, char **argv) children * sizeof (nvlist_t *)); if (child == NULL) zpool_no_memory(); - if ((nv = make_leaf_vdev(props, argv[c], + if ((nv = make_leaf_vdev(argv[c], !(is_log || is_special || is_dedup || - is_spare))) == NULL) { + is_spare), ashift)) == NULL) { for (c = 0; c < children - 1; c++) nvlist_free(child[c]); free(child); @@ -1668,6 +1681,10 @@ construct_spec(nvlist_t *props, int argc, char **argv) ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_DEDUP) == 0); } + if (ashift > 0) { + fnvlist_add_uint64(nv, + ZPOOL_CONFIG_ASHIFT, ashift); + } if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { verify(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, @@ -1695,8 +1712,9 @@ construct_spec(nvlist_t *props, int argc, char **argv) * We have a device. Pass off to make_leaf_vdev() to * construct the appropriate nvlist describing the vdev. */ - if ((nv = make_leaf_vdev(props, argv[0], !(is_log || - is_special || is_dedup || is_spare))) == NULL) + if ((nv = make_leaf_vdev(argv[0], !(is_log || + is_special || is_dedup || is_spare), + ashift)) == NULL) goto spec_out; verify(nvlist_add_uint64(nv, diff --git a/sys/contrib/openzfs/cmd/zstream/Makefile.am b/sys/contrib/openzfs/cmd/zstream/Makefile.am index be3539fe905d..80ef1ea7ca11 100644 --- a/sys/contrib/openzfs/cmd/zstream/Makefile.am +++ b/sys/contrib/openzfs/cmd/zstream/Makefile.am @@ -18,6 +18,7 @@ zstream_LDADD = \ libzpool.la \ libnvpair.la -PHONY += install-exec-hook -install-exec-hook: +cmd-zstream-install-exec-hook: cd $(DESTDIR)$(sbindir) && $(LN_S) -f zstream zstreamdump + +INSTALL_EXEC_HOOKS += cmd-zstream-install-exec-hook diff --git a/sys/contrib/openzfs/cmd/ztest.c b/sys/contrib/openzfs/cmd/ztest.c index 89264c97ff10..89752dcb0f0f 100644 --- a/sys/contrib/openzfs/cmd/ztest.c +++ b/sys/contrib/openzfs/cmd/ztest.c @@ -273,7 +273,6 @@ extern int zfs_compressed_arc_enabled; extern int zfs_abd_scatter_enabled; extern uint_t dmu_object_alloc_chunk_shift; extern boolean_t zfs_force_some_double_word_sm_entries; -extern unsigned long zio_decompress_fail_fraction; extern unsigned long zfs_reconstruct_indirect_damage_fraction; extern uint64_t raidz_expand_max_reflow_bytes; extern uint_t raidz_expand_pause_point; @@ -809,8 +808,8 @@ static ztest_option_t option_table[] = { { 'X', "raidz-expansion", NULL, "Perform a dedicated raidz expansion test", NO_DEFAULT, NULL}, - { 'o', "option", "\"OPTION=INTEGER\"", - "Set global variable to an unsigned 32-bit integer value", + { 'o', "option", "\"NAME=VALUE\"", + "Set the named tunable to the given value", NO_DEFAULT, NULL}, { 'G', "dump-debug-msg", NULL, "Dump zfs_dbgmsg buffer before exiting due to an error", @@ -829,8 +828,8 @@ static char *short_opts = NULL; static void init_options(void) { - ASSERT3P(long_opts, ==, NULL); - ASSERT3P(short_opts, ==, NULL); + ASSERT0P(long_opts); + ASSERT0P(short_opts); int count = sizeof (option_table) / sizeof (option_table[0]); long_opts = umem_alloc(sizeof (struct option) * count, UMEM_NOFAIL); @@ -919,7 +918,7 @@ ztest_parse_name_value(const char *input, ztest_shared_opts_t *zo) { char name[32]; char *value; - int state = ZTEST_VDEV_CLASS_RND; + int state; (void) strlcpy(name, input, sizeof (name)); @@ -1686,7 +1685,7 @@ ztest_rll_init(rll_t *rll) static void ztest_rll_destroy(rll_t *rll) { - ASSERT3P(rll->rll_writer, ==, NULL); + ASSERT0P(rll->rll_writer); ASSERT0(rll->rll_readers); mutex_destroy(&rll->rll_lock); cv_destroy(&rll->rll_cv); @@ -1720,7 +1719,7 @@ ztest_rll_unlock(rll_t *rll) rll->rll_writer = NULL; } else { ASSERT3S(rll->rll_readers, >, 0); - ASSERT3P(rll->rll_writer, ==, NULL); + ASSERT0P(rll->rll_writer); rll->rll_readers--; } @@ -1996,7 +1995,7 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length, ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING) != 0) { - zil_itx_destroy(itx); + zil_itx_destroy(itx, 0); itx = zil_itx_create(TX_WRITE, sizeof (*lr)); write_state = WR_NEED_COPY; } @@ -2278,8 +2277,8 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) ztest_block_tag_t rbt; - VERIFY(dmu_read(os, lr->lr_foid, offset, - sizeof (rbt), &rbt, flags) == 0); + VERIFY0(dmu_read(os, lr->lr_foid, offset, + sizeof (rbt), &rbt, flags)); if (rbt.bt_magic == BT_MAGIC) { ztest_bt_verify(&rbt, os, lr->lr_foid, 0, offset, gen, txg, crtxg); @@ -2966,7 +2965,7 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id) (void) pthread_rwlock_rdlock(&zd->zd_zilog_lock); - zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); + VERIFY0(zil_commit(zilog, ztest_random(ZTEST_OBJECTS))); /* * Remember the committed values in zd, which is in parent/child @@ -3882,7 +3881,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) * If newvd is too small, it should fail with EOVERFLOW. * * If newvd is a distributed spare and it's being attached to a - * dRAID which is not its parent it should fail with EINVAL. + * dRAID which is not its parent it should fail with ENOTSUP. */ if (pvd->vdev_ops != &vdev_mirror_ops && pvd->vdev_ops != &vdev_root_ops && (!replacing || @@ -3901,7 +3900,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) else if (ashift > oldvd->vdev_top->vdev_ashift) expected_error = EDOM; else if (newvd_is_dspare && pvd != vdev_draid_spare_get_parent(newvd)) - expected_error = EINVAL; + expected_error = ENOTSUP; else expected_error = 0; @@ -4007,7 +4006,7 @@ raidz_scratch_verify(void) * requested by user, but scratch object was not created. */ case RRSS_SCRATCH_NOT_IN_USE: - ASSERT3U(offset, ==, 0); + ASSERT0(offset); break; /* @@ -5537,8 +5536,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) } if (i == 1) { - VERIFY(dmu_buf_hold(os, bigobj, off, - FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); + VERIFY0(dmu_buf_hold(os, bigobj, off, + FTAG, &dbt, DMU_READ_NO_PREFETCH)); } if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) { VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db, @@ -7069,7 +7068,7 @@ ztest_set_global_vars(void) char *kv = ztest_opts.zo_gvars[i]; VERIFY3U(strlen(kv), <=, ZO_GVARS_MAX_ARGLEN); VERIFY3U(strlen(kv), >, 0); - int err = set_global_var(kv); + int err = handle_tunable_option(kv, B_TRUE); if (ztest_opts.zo_verbose > 0) { (void) printf("setting global var %s ... %s\n", kv, err ? "failed" : "ok"); @@ -7813,6 +7812,9 @@ ztest_dataset_open(int d) ztest_dataset_name(name, ztest_opts.zo_pool, d); + if (ztest_opts.zo_verbose >= 6) + (void) printf("Opening %s\n", name); + (void) pthread_rwlock_rdlock(&ztest_name_lock); error = ztest_dataset_create(name); @@ -7934,7 +7936,7 @@ ztest_freeze(void) */ while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) { ztest_dmu_object_alloc_free(zd, 0); - zil_commit(zd->zd_zilog, 0); + VERIFY0(zil_commit(zd->zd_zilog, 0)); } txg_wait_synced(spa_get_dsl(spa), 0); @@ -7976,7 +7978,7 @@ ztest_freeze(void) /* * Commit all of the changes we just generated. */ - zil_commit(zd->zd_zilog, 0); + VERIFY0(zil_commit(zd->zd_zilog, 0)); txg_wait_synced(spa_get_dsl(spa), 0); /* @@ -8308,41 +8310,44 @@ static void ztest_generic_run(ztest_shared_t *zs, spa_t *spa) { kthread_t **run_threads; - int t; + int i, ndatasets; run_threads = umem_zalloc(ztest_opts.zo_threads * sizeof (kthread_t *), UMEM_NOFAIL); /* - * Kick off all the tests that run in parallel. + * Actual number of datasets to be used. */ - for (t = 0; t < ztest_opts.zo_threads; t++) { - if (t < ztest_opts.zo_datasets && ztest_dataset_open(t) != 0) { - umem_free(run_threads, ztest_opts.zo_threads * - sizeof (kthread_t *)); - return; - } + ndatasets = MIN(ztest_opts.zo_datasets, ztest_opts.zo_threads); - run_threads[t] = thread_create(NULL, 0, ztest_thread, - (void *)(uintptr_t)t, 0, NULL, TS_RUN | TS_JOINABLE, + /* + * Prepare the datasets first. + */ + for (i = 0; i < ndatasets; i++) + VERIFY0(ztest_dataset_open(i)); + + /* + * Kick off all the tests that run in parallel. + */ + for (i = 0; i < ztest_opts.zo_threads; i++) { + run_threads[i] = thread_create(NULL, 0, ztest_thread, + (void *)(uintptr_t)i, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri); } /* * Wait for all of the tests to complete. */ - for (t = 0; t < ztest_opts.zo_threads; t++) - VERIFY0(thread_join(run_threads[t])); + for (i = 0; i < ztest_opts.zo_threads; i++) + VERIFY0(thread_join(run_threads[i])); /* * Close all datasets. This must be done after all the threads * are joined so we can be sure none of the datasets are in-use * by any of the threads. */ - for (t = 0; t < ztest_opts.zo_threads; t++) { - if (t < ztest_opts.zo_datasets) - ztest_dataset_close(t); - } + for (i = 0; i < ndatasets; i++) + ztest_dataset_close(i); txg_wait_synced(spa_get_dsl(spa), 0); @@ -8465,6 +8470,7 @@ ztest_run(ztest_shared_t *zs) int d = ztest_random(ztest_opts.zo_datasets); ztest_dataset_destroy(d); + txg_wait_synced(spa_get_dsl(spa), 0); } zs->zs_enospc_count = 0; @@ -8972,7 +8978,7 @@ main(int argc, char **argv) exit(EXIT_FAILURE); } else { /* children should not be spawned if setting gvars fails */ - VERIFY3S(err, ==, 0); + VERIFY0(err); } /* Override location of zpool.cache */ |
