aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/cmd')
-rw-r--r--sys/contrib/openzfs/cmd/Makefile.am11
-rwxr-xr-xsys/contrib/openzfs/cmd/zarcstat.in (renamed from sys/contrib/openzfs/cmd/arcstat.in)22
-rwxr-xr-xsys/contrib/openzfs/cmd/zarcsummary (renamed from sys/contrib/openzfs/cmd/arc_summary)9
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.c475
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.h2
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb_il.c10
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c67
-rw-r--r--sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am24
-rwxr-xr-xsys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh (renamed from sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh)0
l---------sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh1
l---------sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh1
-rwxr-xr-xsys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-led.sh (renamed from sys/contrib/openzfs/cmd/zed/zed.d/statechange-led.sh)0
-rwxr-xr-xsys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-slot_off.sh (renamed from sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh)0
l---------sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh1
l---------sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh1
l---------sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh1
l---------sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh1
-rw-r--r--sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh3
-rw-r--r--sys/contrib/openzfs/cmd/zed/zed_event.c2
-rw-r--r--sys/contrib/openzfs/cmd/zed/zed_exec.c111
-rw-r--r--sys/contrib/openzfs/cmd/zfs/zfs_main.c90
-rw-r--r--sys/contrib/openzfs/cmd/zhack.c345
-rwxr-xr-xsys/contrib/openzfs/cmd/zilstat.in1
-rw-r--r--sys/contrib/openzfs/cmd/zinject/zinject.c81
-rw-r--r--sys/contrib/openzfs/cmd/zpool/Makefile.am5
-rw-r--r--sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.448
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_iter.c123
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_main.c471
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_util.h3
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_vdev.c114
-rw-r--r--sys/contrib/openzfs/cmd/zstream/Makefile.am5
-rw-r--r--sys/contrib/openzfs/cmd/ztest.c80
32 files changed, 1519 insertions, 589 deletions
diff --git a/sys/contrib/openzfs/cmd/Makefile.am b/sys/contrib/openzfs/cmd/Makefile.am
index 96040976e53e..ca94f6b77e06 100644
--- a/sys/contrib/openzfs/cmd/Makefile.am
+++ b/sys/contrib/openzfs/cmd/Makefile.am
@@ -98,17 +98,16 @@ endif
if USING_PYTHON
-bin_SCRIPTS += arc_summary arcstat dbufstat zilstat
-CLEANFILES += arc_summary arcstat dbufstat zilstat
-dist_noinst_DATA += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in %D%/zilstat.in
+bin_SCRIPTS += zarcsummary zarcstat dbufstat zilstat
+CLEANFILES += zarcsummary zarcstat dbufstat zilstat
+dist_noinst_DATA += %D%/zarcsummary %D%/zarcstat.in %D%/dbufstat.in %D%/zilstat.in
-$(call SUBST,arcstat,%D%/)
+$(call SUBST,zarcstat,%D%/)
$(call SUBST,dbufstat,%D%/)
$(call SUBST,zilstat,%D%/)
-arc_summary: %D%/arc_summary
+zarcsummary: %D%/zarcsummary
$(AM_V_at)cp $< $@
endif
-
PHONY += cmd
cmd: $(bin_SCRIPTS) $(bin_PROGRAMS) $(sbin_SCRIPTS) $(sbin_PROGRAMS) $(dist_bin_SCRIPTS) $(zfsexec_PROGRAMS) $(mounthelper_PROGRAMS)
diff --git a/sys/contrib/openzfs/cmd/arcstat.in b/sys/contrib/openzfs/cmd/zarcstat.in
index 6f9abb39c3fb..8ffd20481166 100755
--- a/sys/contrib/openzfs/cmd/arcstat.in
+++ b/sys/contrib/openzfs/cmd/zarcstat.in
@@ -2,7 +2,7 @@
# SPDX-License-Identifier: CDDL-1.0
#
# Print out ZFS ARC Statistics exported via kstat(1)
-# For a definition of fields, or usage, use arcstat -v
+# For a definition of fields, or usage, use zarcstat -v
#
# This script was originally a fork of the original arcstat.pl (0.1)
# by Neelakanth Nadgir, originally published on his Sun blog on
@@ -56,6 +56,7 @@ import time
import getopt
import re
import copy
+import os
from signal import signal, SIGINT, SIGWINCH, SIG_DFL
@@ -171,7 +172,7 @@ cols = {
"zactive": [7, 1000, "zfetch prefetches active per second"],
}
-# ARC structural breakdown from arc_summary
+# ARC structural breakdown from zarcsummary
structfields = {
"cmp": ["compressed", "Compressed"],
"ovh": ["overhead", "Overhead"],
@@ -187,7 +188,7 @@ structstats = { # size stats
"sz": ["_size", "size"],
}
-# ARC types breakdown from arc_summary
+# ARC types breakdown from zarcsummary
typefields = {
"data": ["data", "ARC data"],
"meta": ["metadata", "ARC metadata"],
@@ -198,7 +199,7 @@ typestats = { # size stats
"sz": ["_size", "size"],
}
-# ARC states breakdown from arc_summary
+# ARC states breakdown from zarcsummary
statefields = {
"ano": ["anon", "Anonymous"],
"mfu": ["mfu", "MFU"],
@@ -261,7 +262,7 @@ hdr_intr = 20 # Print header every 20 lines of output
opfile = None
sep = " " # Default separator is 2 spaces
l2exist = False
-cmd = ("Usage: arcstat [-havxp] [-f fields] [-o file] [-s string] [interval "
+cmd = ("Usage: zarcstat [-havxp] [-f fields] [-o file] [-s string] [interval "
"[count]]\n")
cur = {}
d = {}
@@ -348,10 +349,10 @@ def usage():
"character or string\n")
sys.stderr.write("\t -p : Disable auto-scaling of numerical fields\n")
sys.stderr.write("\nExamples:\n")
- sys.stderr.write("\tarcstat -o /tmp/a.log 2 10\n")
- sys.stderr.write("\tarcstat -s \",\" -o /tmp/a.log 2 10\n")
- sys.stderr.write("\tarcstat -v\n")
- sys.stderr.write("\tarcstat -f time,hit%,dh%,ph%,mh% 1\n")
+ sys.stderr.write("\tzarcstat -o /tmp/a.log 2 10\n")
+ sys.stderr.write("\tzarcstat -s \",\" -o /tmp/a.log 2 10\n")
+ sys.stderr.write("\tzarcstat -v\n")
+ sys.stderr.write("\tzarcstat -f time,hit%,dh%,ph%,mh% 1\n")
sys.stderr.write("\n")
sys.exit(1)
@@ -366,7 +367,7 @@ def snap_stats():
cur = kstat
- # fill in additional values from arc_summary
+ # fill in additional values from zarcsummary
cur["caches_size"] = caches_size = cur["anon_data"]+cur["anon_metadata"]+\
cur["mfu_data"]+cur["mfu_metadata"]+cur["mru_data"]+cur["mru_metadata"]+\
cur["uncached_data"]+cur["uncached_metadata"]
@@ -766,6 +767,7 @@ def calculate():
def main():
+
global sint
global count
global hdr_intr
diff --git a/sys/contrib/openzfs/cmd/arc_summary b/sys/contrib/openzfs/cmd/zarcsummary
index c1319573220c..24a129d9ca70 100755
--- a/sys/contrib/openzfs/cmd/arc_summary
+++ b/sys/contrib/openzfs/cmd/zarcsummary
@@ -34,7 +34,7 @@ Provides basic information on the ARC, its efficiency, the L2ARC (if present),
the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See
the in-source documentation and code at
https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details.
-The original introduction to arc_summary can be found at
+The original introduction to zarcsummary can be found at
http://cuddletech.com/?p=454
"""
@@ -161,7 +161,7 @@ elif sys.platform.startswith('linux'):
return get_params(TUNABLES_PATH)
def get_version_impl(request):
- # The original arc_summary called /sbin/modinfo/{spl,zfs} to get
+ # The original zarcsummary called /sbin/modinfo/{spl,zfs} to get
# the version information. We switch to /sys/module/{spl,zfs}/version
# to make sure we get what is really loaded in the kernel
try:
@@ -439,7 +439,7 @@ def print_header():
"""
# datetime is now recommended over time but we keep the exact formatting
- # from the older version of arc_summary in case there are scripts
+ # from the older version of zarcsummary in case there are scripts
# that expect it in this way
daydate = time.strftime(DATE_FORMAT)
spc_date = LINE_LENGTH-len(daydate)
@@ -559,6 +559,7 @@ def section_arc(kstats_dict):
print()
compressed_size = arc_stats['compressed_size']
+ uncompressed_size = arc_stats['uncompressed_size']
overhead_size = arc_stats['overhead_size']
bonus_size = arc_stats['bonus_size']
dnode_size = arc_stats['dnode_size']
@@ -671,6 +672,8 @@ def section_arc(kstats_dict):
print()
print('ARC misc:')
+ prt_i2('Uncompressed size:', f_perc(uncompressed_size, compressed_size),
+ f_bytes(uncompressed_size))
prt_i1('Memory throttles:', arc_stats['memory_throttle_count'])
prt_i1('Memory direct reclaims:', arc_stats['memory_direct_count'])
prt_i1('Memory indirect reclaims:', arc_stats['memory_indirect_count'])
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index 45eb9c783659..2560ad045db3 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -106,8 +106,14 @@ extern boolean_t spa_mode_readable_spacemaps;
extern uint_t zfs_reconstruct_indirect_combinations_max;
extern uint_t zfs_btree_verify_intensity;
+enum {
+ ARG_ALLOCATED = 256,
+ ARG_BLOCK_BIN_MODE,
+ ARG_BLOCK_CLASSES,
+};
+
static const char cmdname[] = "zdb";
-uint8_t dump_opt[256];
+uint8_t dump_opt[512];
typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
@@ -127,6 +133,21 @@ static zfs_range_tree_t *mos_refd_objs;
static spa_t *spa;
static objset_t *os;
static boolean_t kernel_init_done;
+static boolean_t corruption_found = B_FALSE;
+
+static enum {
+ BIN_AUTO = 0,
+ BIN_PSIZE,
+ BIN_LSIZE,
+ BIN_ASIZE,
+} block_bin_mode = BIN_AUTO;
+
+static enum {
+ CLASS_NORMAL = 1 << 1,
+ CLASS_SPECIAL = 1 << 2,
+ CLASS_DEDUP = 1 << 3,
+ CLASS_OTHER = 1 << 4,
+} block_classes = 0;
static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *,
boolean_t);
@@ -176,7 +197,7 @@ static int
sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free,
dmu_tx_t *tx)
{
- ASSERT3P(tx, ==, NULL);
+ ASSERT0P(tx);
struct sublivelist_verify *sv = arg;
sublivelist_verify_block_refcnt_t current = {
.svbr_blk = *bp,
@@ -208,7 +229,7 @@ sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free,
sublivelist_verify_block_t svb = {
.svb_dva = bp->blk_dva[i],
.svb_allocated_txg =
- BP_GET_LOGICAL_BIRTH(bp)
+ BP_GET_BIRTH(bp)
};
if (zfs_btree_find(&sv->sv_leftover, &svb,
@@ -250,6 +271,7 @@ sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
&e->svbr_blk, B_TRUE);
(void) printf("\tERROR: %d unmatched FREE(s): %s\n",
e->svbr_refcnt, blkbuf);
+ corruption_found = B_TRUE;
}
zfs_btree_destroy(&sv->sv_pair);
@@ -381,7 +403,7 @@ verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg,
sublivelist_verify_block_t svb = {{{0}}};
DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid);
DVA_SET_OFFSET(&svb.svb_dva, offset);
- DVA_SET_ASIZE(&svb.svb_dva, size);
+ DVA_SET_ASIZE(&svb.svb_dva, 0);
zfs_btree_index_t where;
uint64_t end_offset = offset + size;
@@ -405,6 +427,7 @@ verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg,
(u_longlong_t)DVA_GET_ASIZE(&found->svb_dva),
(u_longlong_t)found->svb_allocated_txg,
(u_longlong_t)txg);
+ corruption_found = B_TRUE;
}
}
}
@@ -426,6 +449,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg)
(u_longlong_t)txg, (u_longlong_t)offset,
(u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
(u_longlong_t)mv->mv_msid);
+ corruption_found = B_TRUE;
} else {
zfs_range_tree_add(mv->mv_allocated,
offset, size);
@@ -439,6 +463,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg)
(u_longlong_t)txg, (u_longlong_t)offset,
(u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
(u_longlong_t)mv->mv_msid);
+ corruption_found = B_TRUE;
} else {
zfs_range_tree_remove(mv->mv_allocated,
offset, size);
@@ -526,6 +551,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
(u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
(u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
(u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
+ corruption_found = B_TRUE;
continue;
}
@@ -542,6 +568,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
(u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
(u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
(u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
+ corruption_found = B_TRUE;
continue;
}
@@ -619,8 +646,9 @@ livelist_metaslab_validate(spa_t *spa)
metaslab_calculate_range_tree_type(vd, m,
&start, &shift);
metaslab_verify_t mv;
- mv.mv_allocated = zfs_range_tree_create(NULL,
- type, NULL, start, shift);
+ mv.mv_allocated = zfs_range_tree_create_flags(
+ NULL, type, NULL, start, shift,
+ 0, "livelist_metaslab_validate:mv_allocated");
mv.mv_vdid = vd->vdev_id;
mv.mv_msid = m->ms_id;
mv.mv_start = m->ms_start;
@@ -654,6 +682,7 @@ livelist_metaslab_validate(spa_t *spa)
}
(void) printf("ERROR: Found livelist blocks marked as allocated "
"for indirect vdevs:\n");
+ corruption_found = B_TRUE;
zfs_btree_index_t *where = NULL;
sublivelist_verify_block_t *svb;
@@ -738,6 +767,12 @@ usage(void)
(void) fprintf(stderr, " Options to control amount of output:\n");
(void) fprintf(stderr, " -b --block-stats "
"block statistics\n");
+ (void) fprintf(stderr, " --bin=(lsize|psize|asize) "
+ "bin blocks based on this size in all three columns\n");
+ (void) fprintf(stderr,
+ " --class=(normal|special|dedup|other)[,...]\n"
+ " only consider blocks from "
+ "these allocation classes\n");
(void) fprintf(stderr, " -B --backup "
"backup stream\n");
(void) fprintf(stderr, " -c --checksum "
@@ -797,8 +832,8 @@ usage(void)
"[default is 200]\n");
(void) fprintf(stderr, " -K --key=KEY "
"decryption key for encrypted dataset\n");
- (void) fprintf(stderr, " -o --option=\"OPTION=INTEGER\" "
- "set global variable to an unsigned 32-bit integer\n");
+ (void) fprintf(stderr, " -o --option=\"NAME=VALUE\" "
+ "set the named tunable to the given value\n");
(void) fprintf(stderr, " -p --path==PATH "
"use one or more with -e to specify path to vdev dir\n");
(void) fprintf(stderr, " -P --parseable "
@@ -826,7 +861,7 @@ usage(void)
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
- zdb_exit(1);
+ zdb_exit(2);
}
static void
@@ -891,9 +926,9 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
size_t nvsize = *(uint64_t *)data;
char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
- VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
+ VERIFY0(dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
- VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
+ VERIFY0(nvlist_unpack(packed, nvsize, &nv, 0));
umem_free(packed, nvsize);
@@ -1454,8 +1489,8 @@ get_obsolete_refcount(vdev_t *vd)
refcount++;
}
} else {
- ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
- ASSERT3U(obsolete_sm_object, ==, 0);
+ ASSERT0P(vd->vdev_obsolete_sm);
+ ASSERT0(obsolete_sm_object);
}
for (unsigned c = 0; c < vd->vdev_children; c++) {
refcount += get_obsolete_refcount(vd->vdev_child[c]);
@@ -1577,9 +1612,8 @@ dump_spacemap(objset_t *os, space_map_t *sm)
continue;
}
- uint8_t words;
char entry_type;
- uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
+ uint64_t entry_off, entry_run, entry_vdev;
if (sm_entry_is_single_word(word)) {
entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
@@ -1587,35 +1621,43 @@ dump_spacemap(objset_t *os, space_map_t *sm)
entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
sm->sm_start;
entry_run = SM_RUN_DECODE(word) << mapshift;
- words = 1;
+
+ (void) printf("\t [%6llu] %c "
+ "range: %012llx-%012llx size: %08llx\n",
+ (u_longlong_t)entry_id, entry_type,
+ (u_longlong_t)entry_off,
+ (u_longlong_t)(entry_off + entry_run - 1),
+ (u_longlong_t)entry_run);
} else {
/* it is a two-word entry so we read another word */
ASSERT(sm_entry_is_double_word(word));
uint64_t extra_word;
offset += sizeof (extra_word);
+ ASSERT3U(offset, <, space_map_length(sm));
VERIFY0(dmu_read(os, space_map_object(sm), offset,
sizeof (extra_word), &extra_word,
DMU_READ_PREFETCH));
- ASSERT3U(offset, <=, space_map_length(sm));
-
entry_run = SM2_RUN_DECODE(word) << mapshift;
entry_vdev = SM2_VDEV_DECODE(word);
entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
'A' : 'F';
entry_off = (SM2_OFFSET_DECODE(extra_word) <<
mapshift) + sm->sm_start;
- words = 2;
- }
- (void) printf("\t [%6llu] %c range:"
- " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
- (u_longlong_t)entry_id,
- entry_type, (u_longlong_t)entry_off,
- (u_longlong_t)(entry_off + entry_run),
- (u_longlong_t)entry_run,
- (u_longlong_t)entry_vdev, words);
+ if (zopt_metaslab_args == 0 ||
+ zopt_metaslab[0] == entry_vdev) {
+ (void) printf("\t [%6llu] %c "
+ "range: %012llx-%012llx size: %08llx "
+ "vdev: %llu\n",
+ (u_longlong_t)entry_id, entry_type,
+ (u_longlong_t)entry_off,
+ (u_longlong_t)(entry_off + entry_run - 1),
+ (u_longlong_t)entry_run,
+ (u_longlong_t)entry_vdev);
+ }
+ }
if (entry_type == 'A')
alloc += entry_run;
@@ -1651,6 +1693,16 @@ dump_metaslab_stats(metaslab_t *msp)
}
static void
+dump_allocated(void *arg, uint64_t start, uint64_t size)
+{
+ uint64_t *off = arg;
+ if (*off != start)
+ (void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", *off,
+ start - *off);
+ *off = start + size;
+}
+
+static void
dump_metaslab(metaslab_t *msp)
{
vdev_t *vd = msp->ms_group->mg_vd;
@@ -1666,13 +1718,24 @@ dump_metaslab(metaslab_t *msp)
(u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
(u_longlong_t)space_map_object(sm), freebuf);
- if (dump_opt['m'] > 2 && !dump_opt['L']) {
+ if (dump_opt[ARG_ALLOCATED] ||
+ (dump_opt['m'] > 2 && !dump_opt['L'])) {
mutex_enter(&msp->ms_lock);
VERIFY0(metaslab_load(msp));
+ }
+
+ if (dump_opt['m'] > 2 && !dump_opt['L']) {
zfs_range_tree_stat_verify(msp->ms_allocatable);
dump_metaslab_stats(msp);
- metaslab_unload(msp);
- mutex_exit(&msp->ms_lock);
+ }
+
+ if (dump_opt[ARG_ALLOCATED]) {
+ uint64_t off = msp->ms_start;
+ zfs_range_tree_walk(msp->ms_allocatable, dump_allocated,
+ &off);
+ if (off != msp->ms_start + msp->ms_size)
+ (void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", off,
+ msp->ms_size - off);
}
if (dump_opt['m'] > 1 && sm != NULL &&
@@ -1687,6 +1750,12 @@ dump_metaslab(metaslab_t *msp)
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
}
+ if (dump_opt[ARG_ALLOCATED] ||
+ (dump_opt['m'] > 2 && !dump_opt['L'])) {
+ metaslab_unload(msp);
+ mutex_exit(&msp->ms_lock);
+ }
+
if (vd->vdev_ops == &vdev_draid_ops)
ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift);
else
@@ -1723,8 +1792,9 @@ print_vdev_metaslab_header(vdev_t *vd)
}
}
- (void) printf("\tvdev %10llu %s",
- (u_longlong_t)vd->vdev_id, bias_str);
+ (void) printf("\tvdev %10llu\t%s metaslab shift %4llu",
+ (u_longlong_t)vd->vdev_id, bias_str,
+ (u_longlong_t)vd->vdev_ms_shift);
if (ms_flush_data_obj != 0) {
(void) printf(" ms_unflushed_phys object %llu",
@@ -1791,7 +1861,7 @@ print_vdev_indirect(vdev_t *vd)
vdev_indirect_births_t *vib = vd->vdev_indirect_births;
if (vim == NULL) {
- ASSERT3P(vib, ==, NULL);
+ ASSERT0P(vib);
return;
}
@@ -1864,7 +1934,7 @@ dump_metaslabs(spa_t *spa)
(void) printf("\nMetaslabs:\n");
- if (!dump_opt['d'] && zopt_metaslab_args > 0) {
+ if (zopt_metaslab_args > 0) {
c = zopt_metaslab[0];
if (c >= children)
@@ -1991,7 +2061,7 @@ dump_ddt_log(ddt_t *ddt)
c += strlcpy(&flagstr[c], " UNKNOWN",
sizeof (flagstr) - c);
flagstr[1] = '[';
- flagstr[c++] = ']';
+ flagstr[c] = ']';
}
uint64_t count = avl_numnodes(&ddl->ddl_tree);
@@ -2042,10 +2112,10 @@ dump_ddt_object(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
if (error == ENOENT)
return;
- ASSERT(error == 0);
+ ASSERT0(error);
error = ddt_object_count(ddt, type, class, &count);
- ASSERT(error == 0);
+ ASSERT0(error);
if (count == 0)
return;
@@ -2568,7 +2638,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
(u_longlong_t)BP_GET_PSIZE(bp),
(u_longlong_t)BP_GET_FILL(bp),
(u_longlong_t)BP_GET_LOGICAL_BIRTH(bp),
- (u_longlong_t)BP_GET_BIRTH(bp));
+ (u_longlong_t)BP_GET_PHYSICAL_BIRTH(bp));
if (bp_freed)
(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf), " %s", "FREE");
@@ -2582,19 +2652,17 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
}
}
-static void
+static u_longlong_t
print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
const dnode_phys_t *dnp)
{
char blkbuf[BP_SPRINTF_LEN];
+ u_longlong_t offset;
int l;
- if (!BP_IS_EMBEDDED(bp)) {
- ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
- ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
- }
+ offset = (u_longlong_t)blkid2offset(dnp, bp, zb);
- (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
+ (void) printf("%16llx ", offset);
ASSERT(zb->zb_level >= 0);
@@ -2609,19 +2677,38 @@ print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE);
if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD)
snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp);
- (void) printf("%s\n", blkbuf);
+ (void) printf("%s", blkbuf);
+
+ if (!BP_IS_EMBEDDED(bp)) {
+ if (BP_GET_TYPE(bp) != dnp->dn_type) {
+ (void) printf(" (ERROR: Block pointer type "
+ "(%llu) does not match dnode type (%hhu))",
+ BP_GET_TYPE(bp), dnp->dn_type);
+ corruption_found = B_TRUE;
+ }
+ if (BP_GET_LEVEL(bp) != zb->zb_level) {
+ (void) printf(" (ERROR: Block pointer level "
+ "(%llu) does not match bookmark level (%lld))",
+ BP_GET_LEVEL(bp), (longlong_t)zb->zb_level);
+ corruption_found = B_TRUE;
+ }
+ }
+ (void) printf("\n");
+
+ return (offset);
}
static int
visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
blkptr_t *bp, const zbookmark_phys_t *zb)
{
+ u_longlong_t offset;
int err = 0;
- if (BP_GET_LOGICAL_BIRTH(bp) == 0)
+ if (BP_GET_BIRTH(bp) == 0)
return (0);
- print_indirect(spa, bp, zb, dnp);
+ offset = print_indirect(spa, bp, zb, dnp);
if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
arc_flags_t flags = ARC_FLAG_WAIT;
@@ -2651,8 +2738,15 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
break;
fill += BP_GET_FILL(cbp);
}
- if (!err)
- ASSERT3U(fill, ==, BP_GET_FILL(bp));
+ if (!err) {
+ if (fill != BP_GET_FILL(bp)) {
+ (void) printf("%16llx: Block pointer "
+ "fill (%llu) does not match calculated "
+ "value (%llu)\n", offset, BP_GET_FILL(bp),
+ (u_longlong_t)fill);
+ corruption_found = B_TRUE;
+ }
+ }
arc_buf_destroy(buf, &buf);
}
@@ -2806,7 +2900,7 @@ dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
(void) arg, (void) tx;
char blkbuf[BP_SPRINTF_LEN];
- if (BP_GET_LOGICAL_BIRTH(bp) != 0) {
+ if (BP_GET_BIRTH(bp) != 0) {
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
(void) printf("\t%s\n", blkbuf);
}
@@ -2847,7 +2941,7 @@ dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
(void) arg, (void) tx;
char blkbuf[BP_SPRINTF_LEN];
- ASSERT(BP_GET_LOGICAL_BIRTH(bp) != 0);
+ ASSERT(BP_GET_BIRTH(bp) != 0);
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed);
(void) printf("\t%s\n", blkbuf);
return (0);
@@ -2908,6 +3002,7 @@ dump_full_bpobj(bpobj_t *bpo, const char *name, int indent)
(void) printf("ERROR %u while trying to open "
"subobj id %llu\n",
error, (u_longlong_t)subobj);
+ corruption_found = B_TRUE;
continue;
}
dump_full_bpobj(&subbpo, "subobj", indent + 1);
@@ -3087,6 +3182,7 @@ bpobj_count_refd(bpobj_t *bpo)
(void) printf("ERROR %u while trying to open "
"subobj id %llu\n",
error, (u_longlong_t)subobj);
+ corruption_found = B_TRUE;
continue;
}
bpobj_count_refd(&subbpo);
@@ -3108,7 +3204,7 @@ dsl_deadlist_entry_count_refd(void *arg, dsl_deadlist_entry_t *dle)
static int
dsl_deadlist_entry_dump(void *arg, dsl_deadlist_entry_t *dle)
{
- ASSERT(arg == NULL);
+ ASSERT0P(arg);
if (dump_opt['d'] >= 5) {
char buf[128];
(void) snprintf(buf, sizeof (buf),
@@ -3229,6 +3325,7 @@ zdb_derive_key(dsl_dir_t *dd, uint8_t *key_out)
uint64_t keyformat, salt, iters;
int i;
unsigned char c;
+ FILE *f;
VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
zfs_prop_to_name(ZFS_PROP_KEYFORMAT), sizeof (uint64_t),
@@ -3261,6 +3358,25 @@ zdb_derive_key(dsl_dir_t *dd, uint8_t *key_out)
break;
+ case ZFS_KEYFORMAT_RAW:
+ if ((f = fopen(key_material, "r")) == NULL)
+ return (B_FALSE);
+
+ if (fread(key_out, 1, WRAPPING_KEY_LEN, f) !=
+ WRAPPING_KEY_LEN) {
+ (void) fclose(f);
+ return (B_FALSE);
+ }
+
+ /* Check the key length */
+ if (fgetc(f) != EOF) {
+ (void) fclose(f);
+ return (B_FALSE);
+ }
+
+ (void) fclose(f);
+ break;
+
default:
fatal("no support for key format %u\n",
(unsigned int) keyformat);
@@ -3346,7 +3462,7 @@ open_objset(const char *path, const void *tag, objset_t **osp)
uint64_t sa_attrs = 0;
uint64_t version = 0;
- VERIFY3P(sa_os, ==, NULL);
+ VERIFY0P(sa_os);
/*
* We can't own an objset if it's redacted. Therefore, we do this
@@ -3519,8 +3635,8 @@ dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
uint64_t fuid_obj;
/* first find the fuid object. It lives in the master node */
- VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
- 8, 1, &fuid_obj) == 0);
+ VERIFY0(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
+ 8, 1, &fuid_obj));
zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
(void) zfs_fuid_table_load(os, fuid_obj,
&idx_tree, &domain_tree);
@@ -5722,6 +5838,34 @@ dump_size_histograms(zdb_cb_t *zcb)
(void) printf("\nBlock Size Histogram\n");
+ switch (block_bin_mode) {
+ case BIN_PSIZE:
+ printf("(note: all categories are binned by %s)\n", "psize");
+ break;
+ case BIN_LSIZE:
+ printf("(note: all categories are binned by %s)\n", "lsize");
+ break;
+ case BIN_ASIZE:
+ printf("(note: all categories are binned by %s)\n", "asize");
+ break;
+ default:
+ printf("(note: all categories are binned separately)\n");
+ break;
+ }
+ if (block_classes != 0) {
+ char buf[256] = "";
+ if (block_classes & CLASS_NORMAL)
+ strlcat(buf, "\"normal\", ", sizeof (buf));
+ if (block_classes & CLASS_SPECIAL)
+ strlcat(buf, "\"special\", ", sizeof (buf));
+ if (block_classes & CLASS_DEDUP)
+ strlcat(buf, "\"dedup\", ", sizeof (buf));
+ if (block_classes & CLASS_OTHER)
+ strlcat(buf, "\"other\", ", sizeof (buf));
+ buf[strlen(buf)-2] = '\0';
+ printf("(note: only blocks in these classes are counted: %s)\n",
+ buf);
+ }
/*
* Print the first line titles
*/
@@ -5921,11 +6065,11 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
* entry back to the block pointer before we claim it.
*/
if (v == DDT_PHYS_FLAT) {
- ASSERT3U(BP_GET_BIRTH(bp), ==,
+ ASSERT3U(BP_GET_PHYSICAL_BIRTH(bp), ==,
ddt_phys_birth(dde->dde_phys, v));
tempbp = *bp;
ddt_bp_fill(dde->dde_phys, v, &tempbp,
- BP_GET_BIRTH(bp));
+ BP_GET_PHYSICAL_BIRTH(bp));
bp = &tempbp;
}
@@ -6070,29 +6214,85 @@ skipped:
[BPE_GET_PSIZE(bp)]++;
return;
}
+
+ if (block_classes != 0) {
+ spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);
+
+ uint64_t vdev = DVA_GET_VDEV(&bp->blk_dva[0]);
+ uint64_t offset = DVA_GET_OFFSET(&bp->blk_dva[0]);
+ vdev_t *vd = vdev_lookup_top(zcb->zcb_spa, vdev);
+ ASSERT(vd != NULL);
+ metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
+ ASSERT(ms != NULL);
+ metaslab_group_t *mg = ms->ms_group;
+ ASSERT(mg != NULL);
+ metaslab_class_t *mc = mg->mg_class;
+ ASSERT(mc != NULL);
+
+ spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG);
+
+ int class;
+ if (mc == spa_normal_class(zcb->zcb_spa)) {
+ class = CLASS_NORMAL;
+ } else if (mc == spa_special_class(zcb->zcb_spa)) {
+ class = CLASS_SPECIAL;
+ } else if (mc == spa_dedup_class(zcb->zcb_spa)) {
+ class = CLASS_DEDUP;
+ } else {
+ class = CLASS_OTHER;
+ }
+
+ if (!(block_classes & class)) {
+ goto hist_skipped;
+ }
+ }
+
/*
* The binning histogram bins by powers of two up to
* SPA_MAXBLOCKSIZE rather than creating bins for
* every possible blocksize found in the pool.
*/
- int bin = highbit64(BP_GET_PSIZE(bp)) - 1;
+ int bin;
+
+ /*
+ * Binning strategy: each bin includes blocks up to and including
+ * the given size (excluding blocks that fit into the previous bin).
+ * This way, the "4K" bin includes blocks within the (2K; 4K] range.
+ */
+#define BIN(size) (highbit64((size) - 1))
+
+ switch (block_bin_mode) {
+ case BIN_PSIZE: bin = BIN(BP_GET_PSIZE(bp)); break;
+ case BIN_LSIZE: bin = BIN(BP_GET_LSIZE(bp)); break;
+ case BIN_ASIZE: bin = BIN(BP_GET_ASIZE(bp)); break;
+ case BIN_AUTO: break;
+ default: PANIC("bad block_bin_mode"); abort();
+ }
+
+ if (block_bin_mode == BIN_AUTO)
+ bin = BIN(BP_GET_PSIZE(bp));
zcb->zcb_psize_count[bin]++;
zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp);
zcb->zcb_psize_total += BP_GET_PSIZE(bp);
- bin = highbit64(BP_GET_LSIZE(bp)) - 1;
+ if (block_bin_mode == BIN_AUTO)
+ bin = BIN(BP_GET_LSIZE(bp));
zcb->zcb_lsize_count[bin]++;
zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp);
zcb->zcb_lsize_total += BP_GET_LSIZE(bp);
- bin = highbit64(BP_GET_ASIZE(bp)) - 1;
+ if (block_bin_mode == BIN_AUTO)
+ bin = BIN(BP_GET_ASIZE(bp));
zcb->zcb_asize_count[bin]++;
zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
zcb->zcb_asize_total += BP_GET_ASIZE(bp);
+#undef BIN
+
+hist_skipped:
if (!do_claim)
return;
@@ -6151,7 +6351,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (zb->zb_level == ZB_DNODE_LEVEL)
return (0);
- if (dump_opt['b'] >= 5 && BP_GET_LOGICAL_BIRTH(bp) > 0) {
+ if (dump_opt['b'] >= 5 && BP_GET_BIRTH(bp) > 0) {
char blkbuf[BP_SPRINTF_LEN];
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
(void) printf("objset %llu object %llu "
@@ -6322,8 +6522,9 @@ zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
ASSERT0(zfs_range_tree_space(svr->svr_allocd_segs));
- zfs_range_tree_t *allocs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
- NULL, 0, 0);
+ zfs_range_tree_t *allocs = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ 0, "zdb_claim_removing:allocs");
for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
metaslab_t *msp = vd->vdev_ms[msi];
@@ -6750,6 +6951,7 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
spa->spa_embedded_log_class->mc_ops = &zdb_metaslab_ops;
+ spa->spa_special_embedded_log_class->mc_ops = &zdb_metaslab_ops;
zcb->zcb_vd_obsolete_counts =
umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
@@ -6887,7 +7089,9 @@ zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
metaslab_t *msp = vd->vdev_ms[m];
ASSERT3P(msp->ms_group, ==, (msp->ms_group->mg_class ==
- spa_embedded_log_class(spa)) ?
+ spa_embedded_log_class(spa) ||
+ msp->ms_group->mg_class ==
+ spa_special_embedded_log_class(spa)) ?
vd->vdev_log_mg : vd->vdev_mg);
/*
@@ -7011,7 +7215,7 @@ deleted_livelists_count_blocks(spa_t *spa, zdb_cb_t *zbc)
static void
dump_livelist_cb(dsl_deadlist_t *ll, void *arg)
{
- ASSERT3P(arg, ==, NULL);
+ ASSERT0P(arg);
global_feature_count[SPA_FEATURE_LIVELIST]++;
dump_blkptr_list(ll, "Deleted Livelist");
dsl_deadlist_iterate(ll, sublivelist_verify_lightweight, NULL);
@@ -7121,6 +7325,8 @@ dump_block_stats(spa_t *spa)
zcb->zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
zcb->zcb_totalasize +=
metaslab_class_get_alloc(spa_embedded_log_class(spa));
+ zcb->zcb_totalasize +=
+ metaslab_class_get_alloc(spa_special_embedded_log_class(spa));
zcb->zcb_start = zcb->zcb_lastprint = gethrtime();
err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, zcb);
@@ -7169,6 +7375,7 @@ dump_block_stats(spa_t *spa)
total_alloc = norm_alloc +
metaslab_class_get_alloc(spa_log_class(spa)) +
metaslab_class_get_alloc(spa_embedded_log_class(spa)) +
+ metaslab_class_get_alloc(spa_special_embedded_log_class(spa)) +
metaslab_class_get_alloc(spa_special_class(spa)) +
metaslab_class_get_alloc(spa_dedup_class(spa)) +
get_unflushed_alloc_space(spa);
@@ -7252,6 +7459,18 @@ dump_block_stats(spa_t *spa)
100.0 * alloc / space);
}
+ if (spa_special_embedded_log_class(spa)->mc_allocator[0].mca_rotor
+ != NULL) {
+ uint64_t alloc = metaslab_class_get_alloc(
+ spa_special_embedded_log_class(spa));
+ uint64_t space = metaslab_class_get_space(
+ spa_special_embedded_log_class(spa));
+
+ (void) printf("\t%-16s %14llu used: %5.2f%%\n",
+ "Special embedded log", (u_longlong_t)alloc,
+ 100.0 * alloc / space);
+ }
+
for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
if (zcb->zcb_embedded_blocks[i] == 0)
continue;
@@ -7706,7 +7925,8 @@ zdb_set_skip_mmp(char *target)
* applies to the new_path parameter if allocated.
*/
static char *
-import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
+import_checkpointed_state(char *target, nvlist_t *cfg, boolean_t target_is_spa,
+ char **new_path)
{
int error = 0;
char *poolname, *bogus_name = NULL;
@@ -7714,11 +7934,11 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
/* If the target is not a pool, the extract the pool name */
char *path_start = strchr(target, '/');
- if (path_start != NULL) {
+ if (target_is_spa || path_start == NULL) {
+ poolname = target;
+ } else {
size_t poolname_len = path_start - target;
poolname = strndup(target, poolname_len);
- } else {
- poolname = target;
}
if (cfg == NULL) {
@@ -7749,10 +7969,11 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
"with error %d\n", bogus_name, error);
}
- if (new_path != NULL && path_start != NULL) {
- if (asprintf(new_path, "%s%s", bogus_name, path_start) == -1) {
+ if (new_path != NULL && !target_is_spa) {
+ if (asprintf(new_path, "%s%s", bogus_name,
+ path_start != NULL ? path_start : "") == -1) {
free(bogus_name);
- if (path_start != NULL)
+ if (!target_is_spa && path_start != NULL)
free(poolname);
return (NULL);
}
@@ -7891,7 +8112,7 @@ verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current)
for (uint64_t c = ckpoint_rvd->vdev_children;
c < current_rvd->vdev_children; c++) {
vdev_t *current_vd = current_rvd->vdev_child[c];
- VERIFY3P(current_vd->vdev_checkpoint_sm, ==, NULL);
+ VERIFY0P(current_vd->vdev_checkpoint_sm);
}
}
@@ -7981,7 +8202,7 @@ verify_checkpoint_blocks(spa_t *spa)
* name) so we can do verification on it against the current state
* of the pool.
*/
- checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL,
+ checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL, B_TRUE,
NULL);
ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0);
@@ -8451,8 +8672,9 @@ dump_zpool(spa_t *spa)
if (dump_opt['d'] || dump_opt['i']) {
spa_feature_t f;
- mos_refd_objs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64,
- NULL, 0, 0);
+ mos_refd_objs = zfs_range_tree_create_flags(
+ NULL, ZFS_RANGE_SEG64, NULL, 0, 0,
+ 0, "dump_zpool:mos_refd_objs");
dump_objset(dp->dp_meta_objset);
if (dump_opt['d'] >= 3) {
@@ -8588,9 +8810,9 @@ zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
}
static void
-zdb_dump_gbh(void *buf, int flags)
+zdb_dump_gbh(void *buf, uint64_t size, int flags)
{
- zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
+ zdb_dump_indirect((blkptr_t *)buf, gbh_nblkptrs(size), flags);
}
static void
@@ -8780,7 +9002,6 @@ zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize,
(void) buf;
uint64_t orig_lsize = lsize;
boolean_t tryzle = ((getenv("ZDB_NO_ZLE") == NULL));
- boolean_t found = B_FALSE;
/*
* We don't know how the data was compressed, so just try
* every decompress function at every inflated blocksize.
@@ -8823,20 +9044,19 @@ zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize,
for (cfuncp = cfuncs; *cfuncp; cfuncp++) {
if (try_decompress_block(pabd, lsize, psize, flags,
*cfuncp, lbuf, lbuf2)) {
- found = B_TRUE;
+ tryzle = B_FALSE;
break;
}
}
if (*cfuncp != 0)
break;
}
- if (!found && tryzle) {
+ if (tryzle) {
for (lsize = orig_lsize; lsize <= maxlsize;
lsize += SPA_MINBLOCKSIZE) {
if (try_decompress_block(pabd, lsize, psize, flags,
ZIO_COMPRESS_ZLE, lbuf, lbuf2)) {
*cfuncp = ZIO_COMPRESS_ZLE;
- found = B_TRUE;
break;
}
}
@@ -9073,7 +9293,7 @@ zdb_read_block(char *thing, spa_t *spa)
zdb_dump_indirect((blkptr_t *)buf,
orig_lsize / sizeof (blkptr_t), flags);
else if (flags & ZDB_FLAG_GBH)
- zdb_dump_gbh(buf, flags);
+ zdb_dump_gbh(buf, lsize, flags);
else
zdb_dump_block(thing, buf, lsize, flags);
@@ -9120,7 +9340,7 @@ zdb_read_block(char *thing, spa_t *spa)
ck_zio->io_offset =
DVA_GET_OFFSET(&bp->blk_dva[0]);
ck_zio->io_bp = bp;
- zio_checksum_compute(ck_zio, ck, pabd, lsize);
+ zio_checksum_compute(ck_zio, ck, pabd, psize);
printf(
"%12s\t"
"cksum=%016llx:%016llx:%016llx:%016llx\n",
@@ -9313,6 +9533,12 @@ main(int argc, char **argv)
{"all-reconstruction", no_argument, NULL, 'Y'},
{"livelist", no_argument, NULL, 'y'},
{"zstd-headers", no_argument, NULL, 'Z'},
+ {"allocated-map", no_argument, NULL,
+ ARG_ALLOCATED},
+ {"bin", required_argument, NULL,
+ ARG_BLOCK_BIN_MODE},
+ {"class", required_argument, NULL,
+ ARG_BLOCK_CLASSES},
{0, 0, 0, 0}
};
@@ -9343,6 +9569,7 @@ main(int argc, char **argv)
case 'u':
case 'y':
case 'Z':
+ case ARG_ALLOCATED:
dump_opt[c]++;
dump_all = 0;
break;
@@ -9377,9 +9604,11 @@ main(int argc, char **argv)
while (*optarg != '\0') { *optarg++ = '*'; }
break;
case 'o':
- error = set_global_var(optarg);
+ dump_opt[c]++;
+ dump_all = 0;
+ error = handle_tunable_option(optarg, B_FALSE);
if (error != 0)
- usage();
+ zdb_exit(1);
break;
case 'p':
if (searchdirs == NULL) {
@@ -9423,6 +9652,59 @@ main(int argc, char **argv)
case 'x':
vn_dumpdir = optarg;
break;
+ case ARG_BLOCK_BIN_MODE:
+ if (strcmp(optarg, "lsize") == 0) {
+ block_bin_mode = BIN_LSIZE;
+ } else if (strcmp(optarg, "psize") == 0) {
+ block_bin_mode = BIN_PSIZE;
+ } else if (strcmp(optarg, "asize") == 0) {
+ block_bin_mode = BIN_ASIZE;
+ } else {
+ (void) fprintf(stderr,
+ "--bin=\"%s\" must be one of \"lsize\", "
+ "\"psize\" or \"asize\"\n", optarg);
+ usage();
+ }
+ break;
+
+ case ARG_BLOCK_CLASSES: {
+ char *buf = strdup(optarg), *tok = buf, *next,
+ *save = NULL;
+
+ while ((next = strtok_r(tok, ",", &save)) != NULL) {
+ tok = NULL;
+
+ if (strcmp(next, "normal") == 0) {
+ block_classes |= CLASS_NORMAL;
+ } else if (strcmp(next, "special") == 0) {
+ block_classes |= CLASS_SPECIAL;
+ } else if (strcmp(next, "dedup") == 0) {
+ block_classes |= CLASS_DEDUP;
+ } else if (strcmp(next, "other") == 0) {
+ block_classes |= CLASS_OTHER;
+ } else {
+ (void) fprintf(stderr,
+ "--class=\"%s\" must be a "
+ "comma-separated list of either "
+ "\"normal\", \"special\", "
+ "\"asize\" or \"other\"; "
+ "got \"%s\"\n",
+ optarg, next);
+ usage();
+ }
+ }
+
+ if (block_classes == 0) {
+ (void) fprintf(stderr,
+ "--class= must be a comma-separated "
+ "list of either \"normal\", \"special\", "
+ "\"asize\" or \"other\"; got empty\n");
+ usage();
+ }
+
+ free(buf);
+ break;
+ }
default:
usage();
break;
@@ -9465,6 +9747,9 @@ main(int argc, char **argv)
*/
spa_mode_readable_spacemaps = B_TRUE;
+ libspl_set_assert_ok((dump_opt['A'] == 1) || (dump_opt['A'] > 2));
+ zfs_recover = (dump_opt['A'] > 1);
+
if (dump_all)
verbose = MAX(verbose, 1);
@@ -9475,9 +9760,6 @@ main(int argc, char **argv)
dump_opt[c] += verbose;
}
- libspl_set_assert_ok((dump_opt['A'] == 1) || (dump_opt['A'] > 2));
- zfs_recover = (dump_opt['A'] > 1);
-
argc -= optind;
argv += optind;
if (argc < 2 && dump_opt['R'])
@@ -9545,6 +9827,12 @@ main(int argc, char **argv)
error = 0;
goto fini;
}
+ if (dump_opt['o'])
+ /*
+ * Avoid blasting tunable options off the top of the
+ * screen.
+ */
+ zdb_exit(1);
usage();
}
@@ -9605,7 +9893,7 @@ main(int argc, char **argv)
} else if (objset_str && !zdb_numeric(objset_str + 1) &&
dump_opt['N']) {
printf("Supply a numeric objset ID with -N\n");
- error = 1;
+ error = 2;
goto fini;
}
} else {
@@ -9697,7 +9985,7 @@ main(int argc, char **argv)
char *checkpoint_target = NULL;
if (dump_opt['k']) {
checkpoint_pool = import_checkpointed_state(target, cfg,
- &checkpoint_target);
+ target_is_spa, &checkpoint_target);
if (checkpoint_target != NULL)
target = checkpoint_target;
@@ -9714,7 +10002,7 @@ main(int argc, char **argv)
if (error == 0) {
if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) {
ASSERT(checkpoint_pool != NULL);
- ASSERT(checkpoint_target == NULL);
+ ASSERT0P(checkpoint_target);
error = spa_open(checkpoint_pool, &spa, FTAG);
if (error != 0) {
@@ -9907,5 +10195,8 @@ fini:
if (kernel_init_done)
kernel_fini();
+ if (corruption_found && error == 0)
+ error = 3;
+
return (error);
}
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.h b/sys/contrib/openzfs/cmd/zdb/zdb.h
index 6b6c9169816b..48b561eb202c 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.h
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.h
@@ -29,6 +29,6 @@
#define _ZDB_H
void dump_intent_log(zilog_t *);
-extern uint8_t dump_opt[256];
+extern uint8_t dump_opt[512];
#endif /* _ZDB_H */
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb_il.c b/sys/contrib/openzfs/cmd/zdb/zdb_il.c
index 6b90b08ca1b1..3d91fb28a4c7 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb_il.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb_il.c
@@ -48,8 +48,6 @@
#include "zdb.h"
-extern uint8_t dump_opt[256];
-
static char tab_prefix[4] = "\t\t\t";
static void
@@ -176,7 +174,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg)
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
(void) printf("%shas blkptr, %s\n", tab_prefix,
- !BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) >=
+ !BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) >=
spa_min_claim_txg(zilog->zl_spa) ?
"will claim" : "won't claim");
print_log_bp(bp, tab_prefix);
@@ -189,7 +187,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg)
(void) printf("%s<hole>\n", tab_prefix);
return;
}
- if (BP_GET_LOGICAL_BIRTH(bp) < zilog->zl_header->zh_claim_txg) {
+ if (BP_GET_BIRTH(bp) < zilog->zl_header->zh_claim_txg) {
(void) printf("%s<block already committed>\n",
tab_prefix);
return;
@@ -240,7 +238,7 @@ zil_prt_rec_write_enc(zilog_t *zilog, int txtype, const void *arg)
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
(void) printf("%shas blkptr, %s\n", tab_prefix,
- !BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) >=
+ !BP_IS_HOLE(bp) && BP_GET_BIRTH(bp) >=
spa_min_claim_txg(zilog->zl_spa) ?
"will claim" : "won't claim");
print_log_bp(bp, tab_prefix);
@@ -476,7 +474,7 @@ print_log_block(zilog_t *zilog, const blkptr_t *bp, void *arg,
if (claim_txg != 0)
claim = "already claimed";
- else if (BP_GET_LOGICAL_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa))
+ else if (BP_GET_BIRTH(bp) >= spa_min_claim_txg(zilog->zl_spa))
claim = "will claim";
else
claim = "won't claim";
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
index 8718dbde03b6..c0590edc7516 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
@@ -134,11 +134,13 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
* of blkid cache and L2ARC VDEV does not contain pool guid in its
* blkid, so this is a special case for L2ARC VDEV.
*/
- else if (gsp->gs_vdev_guid != 0 && gsp->gs_devid == NULL &&
+ else if (gsp->gs_vdev_guid != 0 &&
nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 &&
gsp->gs_vdev_guid == vdev_guid) {
- (void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID,
- &gsp->gs_devid);
+ if (gsp->gs_devid == NULL) {
+ (void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID,
+ &gsp->gs_devid);
+ }
(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
&gsp->gs_vdev_expandtime);
return (B_TRUE);
@@ -156,22 +158,28 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
/*
* For each vdev in this pool, look for a match by devid
*/
- if ((config = zpool_get_config(zhp, NULL)) != NULL) {
- if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvl) == 0) {
- (void) zfs_agent_iter_vdev(zhp, nvl, gsp);
- }
- }
- /*
- * if a match was found then grab the pool guid
- */
- if (gsp->gs_vdev_guid && gsp->gs_devid) {
- (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &gsp->gs_pool_guid);
- }
+ boolean_t found = B_FALSE;
+ uint64_t pool_guid;
+ /* Get pool configuration and extract pool GUID */
+ if ((config = zpool_get_config(zhp, NULL)) == NULL ||
+ nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+ &pool_guid) != 0)
+ goto out;
+
+ /* Skip this pool if we're looking for a specific pool */
+ if (gsp->gs_pool_guid != 0 && pool_guid != gsp->gs_pool_guid)
+ goto out;
+
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) == 0)
+ found = zfs_agent_iter_vdev(zhp, nvl, gsp);
+
+ if (found && gsp->gs_pool_guid == 0)
+ gsp->gs_pool_guid = pool_guid;
+
+out:
zpool_close(zhp);
- return (gsp->gs_devid != NULL && gsp->gs_vdev_guid != 0);
+ return (found);
}
void
@@ -233,20 +241,17 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
* For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
* ZFS_EV_POOL_GUID may be missing so find them.
*/
- if (devid == NULL || pool_guid == 0 || vdev_guid == 0) {
- if (devid == NULL)
- search.gs_vdev_guid = vdev_guid;
- else
- search.gs_devid = devid;
- zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
- if (devid == NULL)
- devid = search.gs_devid;
- if (pool_guid == 0)
- pool_guid = search.gs_pool_guid;
- if (vdev_guid == 0)
- vdev_guid = search.gs_vdev_guid;
- devtype = search.gs_vdev_type;
- }
+ search.gs_devid = devid;
+ search.gs_vdev_guid = vdev_guid;
+ search.gs_pool_guid = pool_guid;
+ zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
+ if (devid == NULL)
+ devid = search.gs_devid;
+ if (pool_guid == 0)
+ pool_guid = search.gs_pool_guid;
+ if (vdev_guid == 0)
+ vdev_guid = search.gs_vdev_guid;
+ devtype = search.gs_vdev_type;
/*
* We want to avoid reporting "remove" events coming from
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am
index 093a04c4636a..c0b161ecf248 100644
--- a/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/Makefile.am
@@ -9,18 +9,18 @@ dist_zedexec_SCRIPTS = \
%D%/all-debug.sh \
%D%/all-syslog.sh \
%D%/data-notify.sh \
- %D%/deadman-slot_off.sh \
+ %D%/deadman-sync-slot_off.sh \
%D%/generic-notify.sh \
- %D%/pool_import-led.sh \
+ %D%/pool_import-sync-led.sh \
%D%/resilver_finish-notify.sh \
%D%/resilver_finish-start-scrub.sh \
%D%/scrub_finish-notify.sh \
- %D%/statechange-led.sh \
+ %D%/statechange-sync-led.sh \
%D%/statechange-notify.sh \
- %D%/statechange-slot_off.sh \
+ %D%/statechange-sync-slot_off.sh \
%D%/trim_finish-notify.sh \
- %D%/vdev_attach-led.sh \
- %D%/vdev_clear-led.sh
+ %D%/vdev_attach-sync-led.sh \
+ %D%/vdev_clear-sync-led.sh
nodist_zedexec_SCRIPTS = \
%D%/history_event-zfs-list-cacher.sh
@@ -30,17 +30,17 @@ SUBSTFILES += $(nodist_zedexec_SCRIPTS)
zedconfdefaults = \
all-syslog.sh \
data-notify.sh \
- deadman-slot_off.sh \
+ deadman-sync-slot_off.sh \
history_event-zfs-list-cacher.sh \
- pool_import-led.sh \
+ pool_import-sync-led.sh \
resilver_finish-notify.sh \
resilver_finish-start-scrub.sh \
scrub_finish-notify.sh \
- statechange-led.sh \
+ statechange-sync-led.sh \
statechange-notify.sh \
- statechange-slot_off.sh \
- vdev_attach-led.sh \
- vdev_clear-led.sh
+ statechange-sync-slot_off.sh \
+ vdev_attach-sync-led.sh \
+ vdev_clear-sync-led.sh
dist_noinst_DATA += %D%/README
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh
index 7b339b3add01..7b339b3add01 100755
--- a/sys/contrib/openzfs/cmd/zed/zed.d/deadman-slot_off.sh
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh
deleted file mode 120000
index 7d7404398a4a..000000000000
--- a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-led.sh
+++ /dev/null
@@ -1 +0,0 @@
-statechange-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh
new file mode 120000
index 000000000000..8b9c10c11ebb
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/pool_import-sync-led.sh
@@ -0,0 +1 @@
+statechange-sync-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-led.sh
index 40cb61f17307..40cb61f17307 100755
--- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-led.sh
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-led.sh
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-slot_off.sh
index 06acce93b8aa..06acce93b8aa 100755
--- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-sync-slot_off.sh
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh
deleted file mode 120000
index 7d7404398a4a..000000000000
--- a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-led.sh
+++ /dev/null
@@ -1 +0,0 @@
-statechange-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh
new file mode 120000
index 000000000000..8b9c10c11ebb
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_attach-sync-led.sh
@@ -0,0 +1 @@
+statechange-sync-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh
deleted file mode 120000
index 7d7404398a4a..000000000000
--- a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-led.sh
+++ /dev/null
@@ -1 +0,0 @@
-statechange-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh
new file mode 120000
index 000000000000..8b9c10c11ebb
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/vdev_clear-sync-led.sh
@@ -0,0 +1 @@
+statechange-sync-led.sh \ No newline at end of file
diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh b/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh
index 6e00f153be1c..78d8f658ddd8 100644
--- a/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh
+++ b/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh
@@ -441,8 +441,9 @@ zed_notify_slack_webhook()
"${pathname}")"
# Construct the JSON message for posting.
+ # shellcheck disable=SC2016
#
- msg_json="$(printf '{"text": "*%s*\\n%s"}' "${subject}" "${msg_body}" )"
+ msg_json="$(printf '{"text": "*%s*\\n```%s```"}' "${subject}" "${msg_body}" )"
# Send the POST request and check for errors.
#
diff --git a/sys/contrib/openzfs/cmd/zed/zed_event.c b/sys/contrib/openzfs/cmd/zed/zed_event.c
index 296c222ca382..ba7cba304b1d 100644
--- a/sys/contrib/openzfs/cmd/zed/zed_event.c
+++ b/sys/contrib/openzfs/cmd/zed/zed_event.c
@@ -110,7 +110,7 @@ zed_event_fini(struct zed_conf *zcp)
static void
_bump_event_queue_length(void)
{
- int zzlm = -1, wr;
+ int zzlm, wr;
char qlen_buf[12] = {0}; /* parameter is int => max "-2147483647\n" */
long int qlen, orig_qlen;
diff --git a/sys/contrib/openzfs/cmd/zed/zed_exec.c b/sys/contrib/openzfs/cmd/zed/zed_exec.c
index 036081decd64..a14af4f20a85 100644
--- a/sys/contrib/openzfs/cmd/zed/zed_exec.c
+++ b/sys/contrib/openzfs/cmd/zed/zed_exec.c
@@ -196,37 +196,29 @@ _nop(int sig)
(void) sig;
}
-static void *
-_reap_children(void *arg)
+static void
+wait_for_children(boolean_t do_pause, boolean_t wait)
{
- (void) arg;
- struct launched_process_node node, *pnode;
pid_t pid;
- int status;
struct rusage usage;
- struct sigaction sa = {};
-
- (void) sigfillset(&sa.sa_mask);
- (void) sigdelset(&sa.sa_mask, SIGCHLD);
- (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
-
- (void) sigemptyset(&sa.sa_mask);
- sa.sa_handler = _nop;
- sa.sa_flags = SA_NOCLDSTOP;
- (void) sigaction(SIGCHLD, &sa, NULL);
+ int status;
+ struct launched_process_node node, *pnode;
for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) {
(void) pthread_mutex_lock(&_launched_processes_lock);
- pid = wait4(0, &status, WNOHANG, &usage);
-
+ pid = wait4(0, &status, wait ? 0 : WNOHANG, &usage);
if (pid == 0 || pid == (pid_t)-1) {
(void) pthread_mutex_unlock(&_launched_processes_lock);
- if (pid == 0 || errno == ECHILD)
- pause();
- else if (errno != EINTR)
+ if ((pid == 0) || (errno == ECHILD)) {
+ if (do_pause)
+ pause();
+ } else if (errno != EINTR)
zed_log_msg(LOG_WARNING,
"Failed to wait for children: %s",
strerror(errno));
+ if (!do_pause)
+ return;
+
} else {
memset(&node, 0, sizeof (node));
node.pid = pid;
@@ -278,6 +270,25 @@ _reap_children(void *arg)
}
}
+}
+
+static void *
+_reap_children(void *arg)
+{
+ (void) arg;
+ struct sigaction sa = {};
+
+ (void) sigfillset(&sa.sa_mask);
+ (void) sigdelset(&sa.sa_mask, SIGCHLD);
+ (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
+
+ (void) sigemptyset(&sa.sa_mask);
+ sa.sa_handler = _nop;
+ sa.sa_flags = SA_NOCLDSTOP;
+ (void) sigaction(SIGCHLD, &sa, NULL);
+
+ wait_for_children(B_TRUE, B_FALSE);
+
return (NULL);
}
@@ -307,6 +318,45 @@ zed_exec_fini(void)
}
/*
+ * Check if the zedlet name indicates if it is a synchronous zedlet
+ *
+ * Synchronous zedlets have a "-sync-" immediately following the event name in
+ * their zedlet filename, like:
+ *
+ * EVENT_NAME-sync-ZEDLETNAME.sh
+ *
+ * For example, if you wanted a synchronous statechange script:
+ *
+ * statechange-sync-myzedlet.sh
+ *
+ * Synchronous zedlets are guaranteed to be the only zedlet running. No other
+ * zedlets may run in parallel with a synchronous zedlet. A synchronous
+ * zedlet will wait for all previously spawned zedlets to finish before running.
+ * Users should be careful to only use synchronous zedlets when needed, since
+ * they decrease parallelism.
+ */
+static boolean_t
+zedlet_is_sync(const char *zedlet, const char *event)
+{
+ const char *sync_str = "-sync-";
+ size_t sync_str_len;
+ size_t zedlet_len;
+ size_t event_len;
+
+ sync_str_len = strlen(sync_str);
+ zedlet_len = strlen(zedlet);
+ event_len = strlen(event);
+
+ if (event_len + sync_str_len >= zedlet_len)
+ return (B_FALSE);
+
+ if (strncmp(&zedlet[event_len], sync_str, sync_str_len) == 0)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+/*
* Process the event [eid] by synchronously invoking all zedlets with a
* matching class prefix.
*
@@ -368,9 +418,28 @@ zed_exec_process(uint64_t eid, const char *class, const char *subclass,
z = zed_strings_next(zcp->zedlets)) {
for (csp = class_strings; *csp; csp++) {
n = strlen(*csp);
- if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n]))
+ if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n])) {
+ boolean_t is_sync = zedlet_is_sync(z, *csp);
+
+ if (is_sync) {
+ /*
+ * Wait for previous zedlets to
+ * finish
+ */
+ wait_for_children(B_FALSE, B_TRUE);
+ }
+
_zed_exec_fork_child(eid, zcp->zedlet_dir,
z, e, zcp->zevent_fd, zcp->do_foreground);
+
+ if (is_sync) {
+ /*
+ * Wait for sync zedlet we just launched
+ * to finish.
+ */
+ wait_for_children(B_FALSE, B_TRUE);
+ }
+ }
}
}
free(e);
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
index 841c356508a5..ccdd5ffef8e6 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@@ -440,7 +440,7 @@ get_usage(zfs_help_t idx)
return (gettext("\tredact <snapshot> <bookmark> "
"<redaction_snapshot> ...\n"));
case HELP_REWRITE:
- return (gettext("\trewrite [-rvx] [-o <offset>] [-l <length>] "
+ return (gettext("\trewrite [-Prvx] [-o <offset>] [-l <length>] "
"<directory|file ...>\n"));
case HELP_JAIL:
return (gettext("\tjail <jailid|jailname> <filesystem>\n"));
@@ -914,7 +914,11 @@ zfs_do_clone(int argc, char **argv)
log_history = B_FALSE;
}
- ret = zfs_mount_and_share(g_zfs, argv[1], ZFS_TYPE_DATASET);
+ /*
+ * Dataset cloned successfully, mount/share failures are
+ * non-fatal.
+ */
+ (void) zfs_mount_and_share(g_zfs, argv[1], ZFS_TYPE_DATASET);
}
zfs_close(zhp);
@@ -923,26 +927,22 @@ zfs_do_clone(int argc, char **argv)
return (!!ret);
usage:
- ASSERT3P(zhp, ==, NULL);
+ ASSERT0P(zhp);
nvlist_free(props);
usage(B_FALSE);
return (-1);
}
/*
- * Return a default volblocksize for the pool which always uses more than
- * half of the data sectors. This primarily applies to dRAID which always
- * writes full stripe widths.
+ * Calculate the minimum allocation size based on the top-level vdevs.
*/
static uint64_t
-default_volblocksize(zpool_handle_t *zhp, nvlist_t *props)
+calculate_volblocksize(nvlist_t *config)
{
- uint64_t volblocksize, asize = SPA_MINBLOCKSIZE;
+ uint64_t asize = SPA_MINBLOCKSIZE;
nvlist_t *tree, **vdevs;
uint_t nvdevs;
- nvlist_t *config = zpool_get_config(zhp, NULL);
-
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 ||
nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
&vdevs, &nvdevs) != 0) {
@@ -973,6 +973,24 @@ default_volblocksize(zpool_handle_t *zhp, nvlist_t *props)
}
}
+ return (asize);
+}
+
+/*
+ * Return a default volblocksize for the pool which always uses more than
+ * half of the data sectors. This primarily applies to dRAID which always
+ * writes full stripe widths.
+ */
+static uint64_t
+default_volblocksize(zpool_handle_t *zhp, nvlist_t *props)
+{
+ uint64_t volblocksize, asize = SPA_MINBLOCKSIZE;
+
+ nvlist_t *config = zpool_get_config(zhp, NULL);
+
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_MAX_ALLOC, &asize) != 0)
+ asize = calculate_volblocksize(config);
+
/*
* Calculate the target volblocksize such that more than half
* of the asize is used. The following table is for 4k sectors.
@@ -1319,7 +1337,9 @@ zfs_do_create(int argc, char **argv)
goto error;
}
- ret = zfs_mount_and_share(g_zfs, argv[0], ZFS_TYPE_DATASET);
+ /* Dataset created successfully, mount/share failures are non-fatal */
+ ret = 0;
+ (void) zfs_mount_and_share(g_zfs, argv[0], ZFS_TYPE_DATASET);
error:
nvlist_free(props);
return (ret);
@@ -1974,9 +1994,8 @@ fill_dataset_info(nvlist_t *list, zfs_handle_t *zhp, boolean_t as_int)
}
if (type == ZFS_TYPE_SNAPSHOT) {
- char *ds, *snap;
- ds = snap = strdup(zfs_get_name(zhp));
- ds = strsep(&snap, "@");
+ char *snap = strdup(zfs_get_name(zhp));
+ char *ds = strsep(&snap, "@");
fnvlist_add_string(list, "dataset", ds);
fnvlist_add_string(list, "snapshot_name", snap);
free(ds);
@@ -2019,8 +2038,7 @@ get_callback(zfs_handle_t *zhp, void *data)
nvlist_t *user_props = zfs_get_user_props(zhp);
zprop_list_t *pl = cbp->cb_proplist;
nvlist_t *propval;
- nvlist_t *item, *d, *props;
- item = d = props = NULL;
+ nvlist_t *item, *d = NULL, *props = NULL;
const char *strval;
const char *sourceval;
boolean_t received = is_recvd_column(cbp);
@@ -5305,6 +5323,7 @@ zfs_do_receive(int argc, char **argv)
#define ZFS_DELEG_PERM_MOUNT "mount"
#define ZFS_DELEG_PERM_SHARE "share"
#define ZFS_DELEG_PERM_SEND "send"
+#define ZFS_DELEG_PERM_SEND_RAW "send:raw"
#define ZFS_DELEG_PERM_RECEIVE "receive"
#define ZFS_DELEG_PERM_RECEIVE_APPEND "receive:append"
#define ZFS_DELEG_PERM_ALLOW "allow"
@@ -5347,6 +5366,7 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
{ ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
{ ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
{ ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
+ { ZFS_DELEG_PERM_SEND_RAW, ZFS_DELEG_NOTE_SEND_RAW },
{ ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
{ ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
{ ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK },
@@ -5879,7 +5899,7 @@ parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl)
static inline const char *
deleg_perm_comment(zfs_deleg_note_t note)
{
- const char *str = "";
+ const char *str;
/* subcommands */
switch (note) {
@@ -5931,6 +5951,10 @@ deleg_perm_comment(zfs_deleg_note_t note)
case ZFS_DELEG_NOTE_SEND:
str = gettext("");
break;
+ case ZFS_DELEG_NOTE_SEND_RAW:
+ str = gettext("Allow sending ONLY encrypted (raw) replication"
+ "\n\t\t\t\tstreams");
+ break;
case ZFS_DELEG_NOTE_SHARE:
str = gettext("Allows sharing file systems over NFS or SMB"
"\n\t\t\t\tprotocols");
@@ -6860,17 +6884,17 @@ print_holds(boolean_t scripted, int nwidth, int tagwidth, nvlist_t *nvl,
if (scripted) {
if (parsable) {
- (void) printf("%s\t%s\t%ld\n", zname,
- tagname, (unsigned long)time);
+ (void) printf("%s\t%s\t%lld\n", zname,
+ tagname, (long long)time);
} else {
(void) printf("%s\t%s\t%s\n", zname,
tagname, tsbuf);
}
} else {
if (parsable) {
- (void) printf("%-*s %-*s %ld\n",
+ (void) printf("%-*s %-*s %lld\n",
nwidth, zname, tagwidth,
- tagname, (unsigned long)time);
+ tagname, (long long)time);
} else {
(void) printf("%-*s %-*s %s\n",
nwidth, zname, tagwidth,
@@ -7729,6 +7753,7 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
struct extmnttab entry;
const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount";
ino_t path_inode;
+ char *zfs_mntpnt, *entry_mntpnt;
/*
* Search for the given (major,minor) pair in the mount table.
@@ -7770,6 +7795,24 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
goto out;
}
+ /*
+ * If the filesystem is mounted, check that the mountpoint matches
+ * the one in the mnttab entry w.r.t. provided path. If it doesn't,
+ * then we should not proceed further.
+ */
+ entry_mntpnt = strdup(entry.mnt_mountp);
+ if (zfs_is_mounted(zhp, &zfs_mntpnt)) {
+ if (strcmp(zfs_mntpnt, entry_mntpnt) != 0) {
+ (void) fprintf(stderr, gettext("cannot %s '%s': "
+ "not an original mountpoint\n"), cmdname, path);
+ free(zfs_mntpnt);
+ free(entry_mntpnt);
+ goto out;
+ }
+ free(zfs_mntpnt);
+ }
+ free(entry_mntpnt);
+
if (op == OP_SHARE) {
char nfs_mnt_prop[ZFS_MAXPROPLEN];
char smbshare_prop[ZFS_MAXPROPLEN];
@@ -9160,8 +9203,11 @@ zfs_do_rewrite(int argc, char **argv)
zfs_rewrite_args_t args;
memset(&args, 0, sizeof (args));
- while ((c = getopt(argc, argv, "l:o:rvx")) != -1) {
+ while ((c = getopt(argc, argv, "Pl:o:rvx")) != -1) {
switch (c) {
+ case 'P':
+ args.flags |= ZFS_REWRITE_PHYSICAL;
+ break;
case 'l':
args.len = strtoll(optarg, NULL, 0);
break;
diff --git a/sys/contrib/openzfs/cmd/zhack.c b/sys/contrib/openzfs/cmd/zhack.c
index 8244bc83fa0d..8ffbf91ffb30 100644
--- a/sys/contrib/openzfs/cmd/zhack.c
+++ b/sys/contrib/openzfs/cmd/zhack.c
@@ -54,6 +54,7 @@
#include <sys/dmu_tx.h>
#include <zfeature_common.h>
#include <libzutil.h>
+#include <sys/metaslab_impl.h>
static importargs_t g_importargs;
static char *g_pool;
@@ -69,7 +70,8 @@ static __attribute__((noreturn)) void
usage(void)
{
(void) fprintf(stderr,
- "Usage: zhack [-c cachefile] [-d dir] <subcommand> <args> ...\n"
+ "Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> "
+ "<args> ...\n"
"where <subcommand> <args> is one of the following:\n"
"\n");
@@ -93,7 +95,10 @@ usage(void)
" -c repair corrupted label checksums\n"
" -u restore the label on a detached device\n"
"\n"
- " <device> : path to vdev\n");
+ " <device> : path to vdev\n"
+ "\n"
+ " metaslab leak <pool>\n"
+ " apply allocation map from zdb to specified pool\n");
exit(1);
}
@@ -162,9 +167,9 @@ zhack_import(char *target, boolean_t readonly)
props = NULL;
if (readonly) {
- VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_uint64(props,
- zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0);
+ VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
+ VERIFY0(nvlist_add_uint64(props,
+ zpool_prop_to_name(ZPOOL_PROP_READONLY), 1));
}
zfeature_checks_disable = B_TRUE;
@@ -218,8 +223,8 @@ dump_obj(objset_t *os, uint64_t obj, const char *name)
} else {
ASSERT(za->za_integer_length == 1);
char val[1024];
- VERIFY(zap_lookup(os, obj, za->za_name,
- 1, sizeof (val), val) == 0);
+ VERIFY0(zap_lookup(os, obj, za->za_name,
+ 1, sizeof (val), val));
(void) printf("\t%s = %s\n", za->za_name, val);
}
}
@@ -363,10 +368,12 @@ feature_incr_sync(void *arg, dmu_tx_t *tx)
zfeature_info_t *feature = arg;
uint64_t refcount;
+ mutex_enter(&spa->spa_feat_stats_lock);
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
feature_sync(spa, feature, refcount + 1, tx);
spa_history_log_internal(spa, "zhack feature incr", tx,
"name=%s", feature->fi_guid);
+ mutex_exit(&spa->spa_feat_stats_lock);
}
static void
@@ -376,10 +383,12 @@ feature_decr_sync(void *arg, dmu_tx_t *tx)
zfeature_info_t *feature = arg;
uint64_t refcount;
+ mutex_enter(&spa->spa_feat_stats_lock);
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
feature_sync(spa, feature, refcount - 1, tx);
spa_history_log_internal(spa, "zhack feature decr", tx,
"name=%s", feature->fi_guid);
+ mutex_exit(&spa->spa_feat_stats_lock);
}
static void
@@ -496,6 +505,186 @@ zhack_do_feature(int argc, char **argv)
return (0);
}
+static boolean_t
+strstarts(const char *a, const char *b)
+{
+ return (strncmp(a, b, strlen(b)) == 0);
+}
+
+static void
+metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size,
+ dmu_tx_t *tx)
+{
+ ASSERT(msp->ms_disabled);
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
+ uint64_t txg = dmu_tx_get_txg(tx);
+
+ uint64_t off = start;
+ while (off < start + size) {
+ uint64_t ostart, osize;
+ boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable,
+ off, start + size - off, &ostart, &osize);
+ if (!found)
+ break;
+ zfs_range_tree_remove(msp->ms_allocatable, ostart, osize);
+
+ if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
+ vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp,
+ txg);
+
+ zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart,
+ osize);
+ msp->ms_allocating_total += osize;
+ off = ostart + osize;
+ }
+}
+
+static void
+zhack_do_metaslab_leak(int argc, char **argv)
+{
+ int c;
+ char *target;
+ spa_t *spa;
+
+ optind = 1;
+ boolean_t force = B_FALSE;
+ while ((c = getopt(argc, argv, "f")) != -1) {
+ switch (c) {
+ case 'f':
+ force = B_TRUE;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ (void) fprintf(stderr, "error: missing pool name\n");
+ usage();
+ }
+ target = argv[0];
+
+ zhack_spa_open(target, B_FALSE, FTAG, &spa);
+ spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER);
+
+ char *line = NULL;
+ size_t cap = 0;
+
+ vdev_t *vd = NULL;
+ metaslab_t *prev = NULL;
+ dmu_tx_t *tx = NULL;
+ while (getline(&line, &cap, stdin) > 0) {
+ if (strstarts(line, "\tvdev ")) {
+ uint64_t vdev_id, ms_shift;
+ if (sscanf(line,
+ "\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64,
+ &vdev_id, &ms_shift) == 1) {
+ VERIFY3U(sscanf(line, "\tvdev %"PRIu64
+ "\t metaslab shift %4"PRIu64,
+ &vdev_id, &ms_shift), ==, 2);
+ }
+ vd = vdev_lookup_top(spa, vdev_id);
+ if (vd == NULL) {
+ fprintf(stderr, "error: no such vdev with "
+ "id %"PRIu64"\n", vdev_id);
+ break;
+ }
+ if (tx) {
+ dmu_tx_commit(tx);
+ mutex_exit(&prev->ms_lock);
+ metaslab_enable(prev, B_FALSE, B_FALSE);
+ tx = NULL;
+ prev = NULL;
+ }
+ if (vd->vdev_ms_shift != ms_shift) {
+ fprintf(stderr, "error: ms_shift mismatch: %"
+ PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift,
+ ms_shift);
+ break;
+ }
+ } else if (strstarts(line, "\tmetaslabs ")) {
+ uint64_t ms_count;
+ VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count),
+ ==, 1);
+ ASSERT(vd);
+ if (!force && vd->vdev_ms_count != ms_count) {
+ fprintf(stderr, "error: ms_count mismatch: %"
+ PRIu64" != %"PRIu64"\n", vd->vdev_ms_count,
+ ms_count);
+ break;
+ }
+ } else if (strstarts(line, "ALLOC:")) {
+ uint64_t start, size;
+ VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n",
+ &start, &size), ==, 2);
+
+ ASSERT(vd);
+ metaslab_t *cur =
+ vd->vdev_ms[start >> vd->vdev_ms_shift];
+ if (prev != cur) {
+ if (prev) {
+ dmu_tx_commit(tx);
+ mutex_exit(&prev->ms_lock);
+ metaslab_enable(prev, B_FALSE, B_FALSE);
+ }
+ ASSERT(cur);
+ metaslab_disable(cur);
+ mutex_enter(&cur->ms_lock);
+ metaslab_load(cur);
+ prev = cur;
+ tx = dmu_tx_create_dd(
+ spa_get_dsl(vd->vdev_spa)->dp_root_dir);
+ dmu_tx_assign(tx, DMU_TX_WAIT);
+ }
+
+ metaslab_force_alloc(cur, start, size, tx);
+ } else {
+ continue;
+ }
+ }
+ if (tx) {
+ dmu_tx_commit(tx);
+ mutex_exit(&prev->ms_lock);
+ metaslab_enable(prev, B_FALSE, B_FALSE);
+ tx = NULL;
+ prev = NULL;
+ }
+ if (line)
+ free(line);
+
+ spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG);
+ spa_close(spa, FTAG);
+}
+
+static int
+zhack_do_metaslab(int argc, char **argv)
+{
+ char *subcommand;
+
+ argc--;
+ argv++;
+ if (argc == 0) {
+ (void) fprintf(stderr,
+ "error: no metaslab operation specified\n");
+ usage();
+ }
+
+ subcommand = argv[0];
+ if (strcmp(subcommand, "leak") == 0) {
+ zhack_do_metaslab_leak(argc, argv);
+ } else {
+ (void) fprintf(stderr, "error: unknown subcommand: %s\n",
+ subcommand);
+ usage();
+ }
+
+ return (0);
+}
+
#define ASHIFT_UBERBLOCK_SHIFT(ashift) \
MIN(MAX(ashift, UBERBLOCK_SHIFT), \
MAX_UBERBLOCK_SHIFT)
@@ -525,6 +714,23 @@ zhack_repair_read_label(const int fd, vdev_label_t *vl,
return (0);
}
+static int
+zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap)
+{
+ if (vdev_eck->zec_magic == ZEC_MAGIC) {
+ *byteswap = B_FALSE;
+ } else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) {
+ *byteswap = B_TRUE;
+ } else {
+ (void) fprintf(stderr, "error: label %d: "
+ "Expected the nvlist checksum magic number but instead got "
+ "0x%" PRIx64 "\n",
+ l, vdev_eck->zec_magic);
+ return (1);
+ }
+ return (0);
+}
+
static void
zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum)
@@ -551,33 +757,10 @@ zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
}
static int
-zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys,
- const size_t cfg_keys_len, nvlist_t *cfg, nvlist_t *vdev_tree_cfg,
- uint64_t *ashift)
+zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift)
{
int err;
-
- if (ub->ub_txg != 0) {
- (void) fprintf(stderr,
- "error: label %d: UB TXG of 0 expected, but got %"
- PRIu64 "\n",
- l, ub->ub_txg);
- (void) fprintf(stderr, "It would appear the device was not "
- "properly removed.\n");
- return (1);
- }
-
- for (int i = 0; i < cfg_keys_len; i++) {
- uint64_t val;
- err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val);
- if (err) {
- (void) fprintf(stderr,
- "error: label %d, %d: "
- "cannot find nvlist key %s\n",
- l, i, cfg_keys[i]);
- return (err);
- }
- }
+ nvlist_t *vdev_tree_cfg;
err = nvlist_lookup_nvlist(cfg,
ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg);
@@ -601,7 +784,7 @@ zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys,
(void) fprintf(stderr,
"error: label %d: nvlist key %s is zero\n",
l, ZPOOL_CONFIG_ASHIFT);
- return (err);
+ return (1);
}
return (0);
@@ -616,30 +799,35 @@ zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l)
*/
if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) {
const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp);
+ int err;
+
ub->ub_txg = txg;
- if (nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG) != 0) {
+ err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG);
+ if (err) {
(void) fprintf(stderr,
"error: label %d: "
"Failed to remove pool creation TXG\n",
l);
- return (1);
+ return (err);
}
- if (nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG) != 0) {
+ err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG);
+ if (err) {
(void) fprintf(stderr,
"error: label %d: Failed to remove pool TXG to "
"be replaced.\n",
l);
- return (1);
+ return (err);
}
- if (nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg) != 0) {
+ err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg);
+ if (err) {
(void) fprintf(stderr,
"error: label %d: "
"Failed to add pool TXG of %" PRIu64 "\n",
l, txg);
- return (1);
+ return (err);
}
}
@@ -733,6 +921,7 @@ zhack_repair_test_cksum(const int byteswap, void *vdev_data,
BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
const uint64_t actual_magic = vdev_eck->zec_magic;
int err = 0;
+
if (actual_magic != expected_magic) {
(void) fprintf(stderr, "error: label %d: "
"Expected "
@@ -754,6 +943,36 @@ zhack_repair_test_cksum(const int byteswap, void *vdev_data,
return (err);
}
+static int
+zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg)
+{
+ const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
+ ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
+ int err;
+
+ err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
+ VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0);
+ if (err) {
+ (void) fprintf(stderr,
+ "error: cannot unpack nvlist label %d\n", l);
+ return (err);
+ }
+
+ for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) {
+ uint64_t val;
+ err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val);
+ if (err) {
+ (void) fprintf(stderr,
+ "error: label %d, %d: "
+ "cannot find nvlist key %s\n",
+ l, i, cfg_keys[i]);
+ return (err);
+ }
+ }
+
+ return (0);
+}
+
static void
zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
vdev_label_t *vl, const uint64_t label_offset, const int l,
@@ -767,10 +986,7 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
(zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1;
const uint64_t vdev_phys_offset =
label_offset + offsetof(vdev_label_t, vl_vdev_phys);
- const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
- ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
nvlist_t *cfg;
- nvlist_t *vdev_tree_cfg = NULL;
uint64_t ashift;
int byteswap;
@@ -778,18 +994,9 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
if (err)
return;
- if (vdev_eck->zec_magic == 0) {
- (void) fprintf(stderr, "error: label %d: "
- "Expected the nvlist checksum magic number to not be zero"
- "\n",
- l);
- (void) fprintf(stderr, "There should already be a checksum "
- "for the label.\n");
+ err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap);
+ if (err)
return;
- }
-
- byteswap =
- (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC));
if (byteswap) {
byteswap_uint64_array(&vdev_eck->zec_cksum,
@@ -805,16 +1012,7 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
return;
}
- err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
- VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0);
- if (err) {
- (void) fprintf(stderr,
- "error: cannot unpack nvlist label %d\n", l);
- return;
- }
-
- err = zhack_repair_check_label(ub,
- l, cfg_keys, ARRAY_SIZE(cfg_keys), cfg, vdev_tree_cfg, &ashift);
+ err = zhack_repair_unpack_cfg(vl, l, &cfg);
if (err)
return;
@@ -822,6 +1020,19 @@ zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
char *buf;
size_t buflen;
+ if (ub->ub_txg != 0) {
+ (void) fprintf(stderr,
+ "error: label %d: UB TXG of 0 expected, but got %"
+ PRIu64 "\n", l, ub->ub_txg);
+ (void) fprintf(stderr, "It would appear the device was "
+ "not properly detached.\n");
+ return;
+ }
+
+ err = zhack_repair_get_ashift(cfg, l, &ashift);
+ if (err)
+ return;
+
err = zhack_repair_undetach(ub, cfg, l);
if (err)
return;
@@ -981,7 +1192,7 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
zfs_prop_init();
- while ((c = getopt(argc, argv, "+c:d:")) != -1) {
+ while ((c = getopt(argc, argv, "+c:d:o:")) != -1) {
switch (c) {
case 'c':
g_importargs.cachefile = optarg;
@@ -990,6 +1201,10 @@ main(int argc, char **argv)
assert(g_importargs.paths < MAX_NUM_PATHS);
g_importargs.path[g_importargs.paths++] = optarg;
break;
+ case 'o':
+ if (handle_tunable_option(optarg, B_FALSE) != 0)
+ exit(1);
+ break;
default:
usage();
break;
@@ -1011,6 +1226,8 @@ main(int argc, char **argv)
rv = zhack_do_feature(argc, argv);
} else if (strcmp(subcommand, "label") == 0) {
return (zhack_do_label(argc, argv));
+ } else if (strcmp(subcommand, "metaslab") == 0) {
+ rv = zhack_do_metaslab(argc, argv);
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
diff --git a/sys/contrib/openzfs/cmd/zilstat.in b/sys/contrib/openzfs/cmd/zilstat.in
index 4140398bf4a3..d01db9b0914b 100755
--- a/sys/contrib/openzfs/cmd/zilstat.in
+++ b/sys/contrib/openzfs/cmd/zilstat.in
@@ -47,6 +47,7 @@ cols = {
"cec": [5, 1000, "zil_commit_error_count"],
"csc": [5, 1000, "zil_commit_stall_count"],
"cSc": [5, 1000, "zil_commit_suspend_count"],
+ "cCc": [5, 1000, "zil_commit_crash_count"],
"ic": [5, 1000, "zil_itx_count"],
"iic": [5, 1000, "zil_itx_indirect_count"],
"iib": [5, 1024, "zil_itx_indirect_bytes"],
diff --git a/sys/contrib/openzfs/cmd/zinject/zinject.c b/sys/contrib/openzfs/cmd/zinject/zinject.c
index 113797c878b9..c2f646f2567d 100644
--- a/sys/contrib/openzfs/cmd/zinject/zinject.c
+++ b/sys/contrib/openzfs/cmd/zinject/zinject.c
@@ -107,6 +107,8 @@
* zinject
* zinject <-a | -u pool>
* zinject -c <id|all>
+ * zinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]
+ * [-T iotype] [-t type object | -b bookmark pool]
* zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
* [-r range] <object>
* zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
@@ -132,14 +134,18 @@
* The '-f' flag controls the frequency of errors injected, expressed as a
* real number percentage between 0.0001 and 100. The default is 100.
*
- * The this form is responsible for actually injecting the handler into the
+ * The <object> form is responsible for actually injecting the handler into the
* framework. It takes the arguments described above, translates them to the
* internal tuple using libzpool, and then issues an ioctl() to register the
* handler.
*
- * The final form can target a specific bookmark, regardless of whether a
+ * The '-b' option can target a specific bookmark, regardless of whether a
* human-readable interface has been designed. It allows developers to specify
* a particular block by number.
+ *
+ * The '-E' option injects pipeline ready stage delays for the given object or
+ * bookmark. The delay is specified in milliseconds, and it supports I/O type
+ * and range filters.
*/
#include <errno.h>
@@ -346,6 +352,13 @@ usage(void)
"\t\tsuch that the operation takes a minimum of supplied seconds\n"
"\t\tto complete.\n"
"\n"
+ "\tzinject -E <delay> [-a] [-m] [-f freq] [-l level] [-r range]\n"
+ "\t\t[-T iotype] [-t type object | -b bookmark pool]\n"
+ "\n"
+ "\t\tInject pipeline ready stage delays for the given object path\n"
+ "\t\t(data or dnode) or raw bookmark. The delay is specified in\n"
+ "\t\tmilliseconds.\n"
+ "\n"
"\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
"\t\tCause the pool to stop writing blocks yet not\n"
"\t\treport errors for a duration. Simulates buggy hardware\n"
@@ -724,12 +737,15 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
if (quiet) {
(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
} else {
+ boolean_t show_object = B_FALSE;
+ boolean_t show_iotype = B_FALSE;
(void) printf("Added handler %llu with the following "
"properties:\n", (u_longlong_t)zc.zc_guid);
(void) printf(" pool: %s\n", pool);
if (record->zi_guid) {
(void) printf(" vdev: %llx\n",
(u_longlong_t)record->zi_guid);
+ show_iotype = B_TRUE;
} else if (record->zi_func[0] != '\0') {
(void) printf(" panic function: %s\n",
record->zi_func);
@@ -742,7 +758,18 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
} else if (record->zi_timer > 0) {
(void) printf(" timer: %lld ms\n",
(u_longlong_t)NSEC2MSEC(record->zi_timer));
+ if (record->zi_cmd == ZINJECT_DELAY_READY) {
+ show_object = B_TRUE;
+ show_iotype = B_TRUE;
+ }
} else {
+ show_object = B_TRUE;
+ }
+ if (show_iotype) {
+ (void) printf("iotype: %s\n",
+ iotype_to_str(record->zi_iotype));
+ }
+ if (show_object) {
(void) printf("objset: %llu\n",
(u_longlong_t)record->zi_objset);
(void) printf("object: %llu\n",
@@ -910,6 +937,7 @@ main(int argc, char **argv)
int ret;
int flags = 0;
uint32_t dvas = 0;
+ hrtime_t ready_delay = -1;
if ((g_zfs = libzfs_init()) == NULL) {
(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
@@ -940,7 +968,7 @@ main(int argc, char **argv)
}
while ((c = getopt(argc, argv,
- ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
+ ":aA:b:C:d:D:E:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
switch (c) {
case 'a':
flags |= ZINJECT_FLUSH_ARC;
@@ -1113,6 +1141,18 @@ main(int argc, char **argv)
case 'u':
flags |= ZINJECT_UNLOAD_SPA;
break;
+ case 'E':
+ ready_delay = MSEC2NSEC(strtol(optarg, &end, 10));
+ if (ready_delay <= 0 || *end != '\0') {
+ (void) fprintf(stderr, "invalid delay '%s': "
+ "must be a positive duration\n", optarg);
+ usage();
+ libzfs_fini(g_zfs);
+ return (1);
+ }
+ record.zi_cmd = ZINJECT_DELAY_READY;
+ record.zi_timer = ready_delay;
+ break;
case 'L':
if ((label = name_to_type(optarg)) == TYPE_INVAL &&
!LABEL_TYPE(type)) {
@@ -1150,7 +1190,7 @@ main(int argc, char **argv)
*/
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
- record.zi_freq > 0 || dvas != 0) {
+ record.zi_freq > 0 || dvas != 0 || ready_delay >= 0) {
(void) fprintf(stderr, "cancel (-c) incompatible with "
"any other options\n");
usage();
@@ -1186,7 +1226,7 @@ main(int argc, char **argv)
*/
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
- dvas != 0) {
+ dvas != 0 || ready_delay >= 0) {
(void) fprintf(stderr, "device (-d) incompatible with "
"data error injection\n");
usage();
@@ -1276,13 +1316,23 @@ main(int argc, char **argv)
return (1);
}
- record.zi_cmd = ZINJECT_DATA_FAULT;
+ if (record.zi_cmd == ZINJECT_UNINITIALIZED) {
+ record.zi_cmd = ZINJECT_DATA_FAULT;
+ if (!error)
+ error = EIO;
+ } else if (error != 0) {
+ (void) fprintf(stderr, "error type -e incompatible "
+ "with delay injection\n");
+ libzfs_fini(g_zfs);
+ return (1);
+ } else {
+ record.zi_iotype = io_type;
+ }
+
if (translate_raw(raw, &record) != 0) {
libzfs_fini(g_zfs);
return (1);
}
- if (!error)
- error = EIO;
} else if (record.zi_cmd == ZINJECT_PANIC) {
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0 || device != NULL || record.zi_freq > 0 ||
@@ -1410,6 +1460,13 @@ main(int argc, char **argv)
record.zi_dvas = dvas;
}
+ if (record.zi_cmd != ZINJECT_UNINITIALIZED && error != 0) {
+ (void) fprintf(stderr, "error type -e incompatible "
+ "with delay injection\n");
+ libzfs_fini(g_zfs);
+ return (1);
+ }
+
if (error == EACCES) {
if (type != TYPE_DATA) {
(void) fprintf(stderr, "decryption errors "
@@ -1425,8 +1482,12 @@ main(int argc, char **argv)
* not found.
*/
error = ECKSUM;
- } else {
+ } else if (record.zi_cmd == ZINJECT_UNINITIALIZED) {
record.zi_cmd = ZINJECT_DATA_FAULT;
+ if (!error)
+ error = EIO;
+ } else {
+ record.zi_iotype = io_type;
}
if (translate_record(type, argv[0], range, level, &record, pool,
@@ -1434,8 +1495,6 @@ main(int argc, char **argv)
libzfs_fini(g_zfs);
return (1);
}
- if (!error)
- error = EIO;
}
/*
diff --git a/sys/contrib/openzfs/cmd/zpool/Makefile.am b/sys/contrib/openzfs/cmd/zpool/Makefile.am
index 2f962408e5a3..5bb6d8160b18 100644
--- a/sys/contrib/openzfs/cmd/zpool/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zpool/Makefile.am
@@ -148,6 +148,7 @@ dist_zpoolcompat_DATA = \
%D%/compatibility.d/openzfs-2.1-linux \
%D%/compatibility.d/openzfs-2.2 \
%D%/compatibility.d/openzfs-2.3 \
+ %D%/compatibility.d/openzfs-2.4 \
%D%/compatibility.d/openzfsonosx-1.7.0 \
%D%/compatibility.d/openzfsonosx-1.8.1 \
%D%/compatibility.d/openzfsonosx-1.9.3 \
@@ -187,7 +188,9 @@ zpoolcompatlinks = \
"openzfs-2.2 openzfs-2.2-linux" \
"openzfs-2.2 openzfs-2.2-freebsd" \
"openzfs-2.3 openzfs-2.3-linux" \
- "openzfs-2.3 openzfs-2.3-freebsd"
+ "openzfs-2.3 openzfs-2.3-freebsd" \
+ "openzfs-2.4 openzfs-2.4-linux" \
+ "openzfs-2.4 openzfs-2.4-freebsd"
zpoolconfdir = $(sysconfdir)/zfs/zpool.d
INSTALL_DATA_HOOKS += zpool-install-data-hook
diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4
new file mode 100644
index 000000000000..3fbd91014c95
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.4
@@ -0,0 +1,48 @@
+# Features supported by OpenZFS 2.4 on Linux and FreeBSD
+allocation_classes
+async_destroy
+blake3
+block_cloning
+block_cloning_endian
+bookmark_v2
+bookmark_written
+bookmarks
+device_rebuild
+device_removal
+draid
+dynamic_gang_header
+edonr
+embedded_data
+empty_bpobj
+enabled_txg
+encryption
+extensible_dataset
+fast_dedup
+filesystem_limits
+head_errlog
+hole_birth
+large_blocks
+large_dnode
+large_microzap
+livelist
+log_spacemap
+longname
+lz4_compress
+multi_vdev_crash_dump
+obsolete_counts
+physical_rewrite
+project_quota
+raidz_expansion
+redacted_datasets
+redaction_bookmarks
+redaction_list_spill
+resilver_defer
+sha512
+skein
+spacemap_histogram
+spacemap_v2
+userobj_accounting
+vdev_zaps_v2
+zilsaxattr
+zpool_checkpoint
+zstd_compress
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_iter.c b/sys/contrib/openzfs/cmd/zpool/zpool_iter.c
index 2ec189b98653..fef602736705 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_iter.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_iter.c
@@ -26,6 +26,7 @@
/*
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
+ * Copyright (c) 2025, Klara, Inc.
*/
#include <libintl.h>
@@ -52,7 +53,7 @@
typedef struct zpool_node {
zpool_handle_t *zn_handle;
uu_avl_node_t zn_avlnode;
- int zn_mark;
+ hrtime_t zn_last_refresh;
} zpool_node_t;
struct zpool_list {
@@ -62,6 +63,7 @@ struct zpool_list {
uu_avl_pool_t *zl_pool;
zprop_list_t **zl_proplist;
zfs_type_t zl_type;
+ hrtime_t zl_last_refresh;
};
static int
@@ -81,26 +83,30 @@ zpool_compare(const void *larg, const void *rarg, void *unused)
* of known pools.
*/
static int
-add_pool(zpool_handle_t *zhp, void *data)
+add_pool(zpool_handle_t *zhp, zpool_list_t *zlp)
{
- zpool_list_t *zlp = data;
- zpool_node_t *node = safe_malloc(sizeof (zpool_node_t));
+ zpool_node_t *node, *new = safe_malloc(sizeof (zpool_node_t));
uu_avl_index_t idx;
- node->zn_handle = zhp;
- uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool);
- if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) {
+ new->zn_handle = zhp;
+ uu_avl_node_init(new, &new->zn_avlnode, zlp->zl_pool);
+
+ node = uu_avl_find(zlp->zl_avl, new, NULL, &idx);
+ if (node == NULL) {
if (zlp->zl_proplist &&
zpool_expand_proplist(zhp, zlp->zl_proplist,
zlp->zl_type, zlp->zl_literal) != 0) {
zpool_close(zhp);
- free(node);
+ free(new);
return (-1);
}
- uu_avl_insert(zlp->zl_avl, node, idx);
+ new->zn_last_refresh = zlp->zl_last_refresh;
+ uu_avl_insert(zlp->zl_avl, new, idx);
} else {
+ zpool_refresh_stats_from_handle(node->zn_handle, zhp);
+ node->zn_last_refresh = zlp->zl_last_refresh;
zpool_close(zhp);
- free(node);
+ free(new);
return (-1);
}
@@ -108,6 +114,18 @@ add_pool(zpool_handle_t *zhp, void *data)
}
/*
+ * add_pool(), but always returns 0. This allows zpool_iter() to continue
+ * even if a pool exists in the tree, or we fail to get the properties for
+ * a new one.
+ */
+static int
+add_pool_cb(zpool_handle_t *zhp, void *data)
+{
+ (void) add_pool(zhp, data);
+ return (0);
+}
+
+/*
* Create a list of pools based on the given arguments. If we're given no
* arguments, then iterate over all pools in the system and add them to the AVL
* tree. Otherwise, add only those pool explicitly specified on the command
@@ -135,9 +153,10 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type,
zlp->zl_type = type;
zlp->zl_literal = literal;
+ zlp->zl_last_refresh = gethrtime();
if (argc == 0) {
- (void) zpool_iter(g_zfs, add_pool, zlp);
+ (void) zpool_iter(g_zfs, add_pool_cb, zlp);
zlp->zl_findall = B_TRUE;
} else {
int i;
@@ -159,15 +178,61 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type,
}
/*
- * Search for any new pools, adding them to the list. We only add pools when no
- * options were given on the command line. Otherwise, we keep the list fixed as
- * those that were explicitly specified.
+ * Refresh the state of all pools on the list. Additionally, if no options were
+ * given on the command line, add any new pools and remove any that are no
+ * longer available.
*/
-void
-pool_list_update(zpool_list_t *zlp)
+int
+pool_list_refresh(zpool_list_t *zlp)
{
- if (zlp->zl_findall)
- (void) zpool_iter(g_zfs, add_pool, zlp);
+ zlp->zl_last_refresh = gethrtime();
+
+ if (!zlp->zl_findall) {
+ /*
+ * This list is a fixed list of pools, so we must not add
+ * or remove any. Just walk over them and refresh their
+ * state.
+ */
+ int navail = 0;
+ for (zpool_node_t *node = uu_avl_first(zlp->zl_avl);
+ node != NULL; node = uu_avl_next(zlp->zl_avl, node)) {
+ boolean_t missing;
+ zpool_refresh_stats(node->zn_handle, &missing);
+ navail += !missing;
+ node->zn_last_refresh = zlp->zl_last_refresh;
+ }
+ return (navail);
+ }
+
+ /* Search for any new pools and add them to the list. */
+ (void) zpool_iter(g_zfs, add_pool_cb, zlp);
+
+ /* Walk the list of existing pools, and update or remove them. */
+ zpool_node_t *node, *next;
+ for (node = uu_avl_first(zlp->zl_avl); node != NULL; node = next) {
+ next = uu_avl_next(zlp->zl_avl, node);
+
+ /*
+ * Skip any that were refreshed and are online; they were added
+ * by zpool_iter() and are already up to date.
+ */
+ if (node->zn_last_refresh == zlp->zl_last_refresh &&
+ zpool_get_state(node->zn_handle) != POOL_STATE_UNAVAIL)
+ continue;
+
+ /* Refresh and remove if necessary. */
+ boolean_t missing;
+ zpool_refresh_stats(node->zn_handle, &missing);
+ if (missing) {
+ uu_avl_remove(zlp->zl_avl, node);
+ zpool_close(node->zn_handle);
+ free(node);
+ } else {
+ node->zn_last_refresh = zlp->zl_last_refresh;
+ }
+ }
+
+ return (uu_avl_numnodes(zlp->zl_avl));
}
/*
@@ -191,23 +256,6 @@ pool_list_iter(zpool_list_t *zlp, int unavail, zpool_iter_f func,
}
/*
- * Remove the given pool from the list. When running iostat, we want to remove
- * those pools that no longer exist.
- */
-void
-pool_list_remove(zpool_list_t *zlp, zpool_handle_t *zhp)
-{
- zpool_node_t search, *node;
-
- search.zn_handle = zhp;
- if ((node = uu_avl_find(zlp->zl_avl, &search, NULL, NULL)) != NULL) {
- uu_avl_remove(zlp->zl_avl, node);
- zpool_close(node->zn_handle);
- free(node);
- }
-}
-
-/*
* Free all the handles associated with this list.
*/
void
@@ -379,8 +427,8 @@ process_unique_cmd_columns(vdev_cmd_data_list_t *vcdl)
static int
vdev_process_cmd_output(vdev_cmd_data_t *data, char *line)
{
- char *col = NULL;
- char *val = line;
+ char *col;
+ char *val;
char *equals;
char **tmp;
@@ -397,6 +445,7 @@ vdev_process_cmd_output(vdev_cmd_data_t *data, char *line)
col = line;
val = equals + 1;
} else {
+ col = NULL;
val = line;
}
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
index e62441894cd7..a6658a9c2800 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -33,8 +33,8 @@
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
- * Copyright (c) 2021, 2023, Klara Inc.
- * Copyright [2021] Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2021, 2023, 2025, Klara, Inc.
+ * Copyright (c) 2021, 2025 Hewlett Packard Enterprise Development LP.
*/
#include <assert.h>
@@ -456,7 +456,7 @@ get_usage(zpool_help_t idx)
"<pool> <vdev> ...\n"));
case HELP_ATTACH:
return (gettext("\tattach [-fsw] [-o property=value] "
- "<pool> <device> <new-device>\n"));
+ "<pool> <vdev> <new-device>\n"));
case HELP_CLEAR:
return (gettext("\tclear [[--power]|[-nF]] <pool> [device]\n"));
case HELP_CREATE:
@@ -510,16 +510,16 @@ get_usage(zpool_help_t idx)
case HELP_REOPEN:
return (gettext("\treopen [-n] <pool>\n"));
case HELP_INITIALIZE:
- return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> "
- "[<device> ...]\n"));
+ return (gettext("\tinitialize [-c | -s | -u] [-w] <-a | <pool> "
+ "[<device> ...]>\n"));
case HELP_SCRUB:
- return (gettext("\tscrub [-e | -s | -p | -C] [-w] "
- "<pool> ...\n"));
+ return (gettext("\tscrub [-e | -s | -p | -C | -E | -S] [-w] "
+ "<-a | <pool> [<pool> ...]>\n"));
case HELP_RESILVER:
return (gettext("\tresilver <pool> ...\n"));
case HELP_TRIM:
- return (gettext("\ttrim [-dw] [-r <rate>] [-c | -s] <pool> "
- "[<device> ...]\n"));
+ return (gettext("\ttrim [-dw] [-r <rate>] [-c | -s] "
+ "<-a | <pool> [<device> ...]>\n"));
case HELP_STATUS:
return (gettext("\tstatus [-DdegiLPpstvx] "
"[-c script1[,script2,...]] ...\n"
@@ -560,33 +560,6 @@ get_usage(zpool_help_t idx)
}
}
-static void
-zpool_collect_leaves(zpool_handle_t *zhp, nvlist_t *nvroot, nvlist_t *res)
-{
- uint_t children = 0;
- nvlist_t **child;
- uint_t i;
-
- (void) nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
- &child, &children);
-
- if (children == 0) {
- char *path = zpool_vdev_name(g_zfs, zhp, nvroot,
- VDEV_NAME_PATH);
-
- if (strcmp(path, VDEV_TYPE_INDIRECT) != 0 &&
- strcmp(path, VDEV_TYPE_HOLE) != 0)
- fnvlist_add_boolean(res, path);
-
- free(path);
- return;
- }
-
- for (i = 0; i < children; i++) {
- zpool_collect_leaves(zhp, child[i], res);
- }
-}
-
/*
* Callback routine that will print out a pool property value.
*/
@@ -779,10 +752,11 @@ usage(boolean_t requested)
}
/*
- * zpool initialize [-c | -s | -u] [-w] <pool> [<vdev> ...]
+ * zpool initialize [-c | -s | -u] [-w] <-a | pool> [<vdev> ...]
* Initialize all unused blocks in the specified vdevs, or all vdevs in the pool
* if none specified.
*
+ * -a Use all pools.
* -c Cancel. Ends active initializing.
* -s Suspend. Initializing can then be restarted with no flags.
* -u Uninitialize. Clears initialization state.
@@ -794,22 +768,26 @@ zpool_do_initialize(int argc, char **argv)
int c;
char *poolname;
zpool_handle_t *zhp;
- nvlist_t *vdevs;
int err = 0;
boolean_t wait = B_FALSE;
+ boolean_t initialize_all = B_FALSE;
struct option long_options[] = {
{"cancel", no_argument, NULL, 'c'},
{"suspend", no_argument, NULL, 's'},
{"uninit", no_argument, NULL, 'u'},
{"wait", no_argument, NULL, 'w'},
+ {"all", no_argument, NULL, 'a'},
{0, 0, 0, 0}
};
pool_initialize_func_t cmd_type = POOL_INITIALIZE_START;
- while ((c = getopt_long(argc, argv, "csuw", long_options,
+ while ((c = getopt_long(argc, argv, "acsuw", long_options,
NULL)) != -1) {
switch (c) {
+ case 'a':
+ initialize_all = B_TRUE;
+ break;
case 'c':
if (cmd_type != POOL_INITIALIZE_START &&
cmd_type != POOL_INITIALIZE_CANCEL) {
@@ -856,7 +834,18 @@ zpool_do_initialize(int argc, char **argv)
argc -= optind;
argv += optind;
- if (argc < 1) {
+ initialize_cbdata_t cbdata = {
+ .wait = wait,
+ .cmd_type = cmd_type
+ };
+
+ if (initialize_all && argc > 0) {
+ (void) fprintf(stderr, gettext("-a cannot be combined with "
+ "individual pools or vdevs\n"));
+ usage(B_FALSE);
+ }
+
+ if (argc < 1 && !initialize_all) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
usage(B_FALSE);
return (-1);
@@ -868,30 +857,35 @@ zpool_do_initialize(int argc, char **argv)
usage(B_FALSE);
}
- poolname = argv[0];
- zhp = zpool_open(g_zfs, poolname);
- if (zhp == NULL)
- return (-1);
-
- vdevs = fnvlist_alloc();
- if (argc == 1) {
- /* no individual leaf vdevs specified, so add them all */
- nvlist_t *config = zpool_get_config(zhp, NULL);
- nvlist_t *nvroot = fnvlist_lookup_nvlist(config,
- ZPOOL_CONFIG_VDEV_TREE);
- zpool_collect_leaves(zhp, nvroot, vdevs);
+ if (argc == 0 && initialize_all) {
+ /* Initilize each pool recursively */
+ err = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL,
+ B_FALSE, zpool_initialize_one, &cbdata);
+ return (err);
+ } else if (argc == 1) {
+ /* no individual leaf vdevs specified, initialize the pool */
+ poolname = argv[0];
+ zhp = zpool_open(g_zfs, poolname);
+ if (zhp == NULL)
+ return (-1);
+ err = zpool_initialize_one(zhp, &cbdata);
} else {
+ /* individual leaf vdevs specified, initialize them */
+ poolname = argv[0];
+ zhp = zpool_open(g_zfs, poolname);
+ if (zhp == NULL)
+ return (-1);
+ nvlist_t *vdevs = fnvlist_alloc();
for (int i = 1; i < argc; i++) {
fnvlist_add_boolean(vdevs, argv[i]);
}
+ if (wait)
+ err = zpool_initialize_wait(zhp, cmd_type, vdevs);
+ else
+ err = zpool_initialize(zhp, cmd_type, vdevs);
+ fnvlist_free(vdevs);
}
- if (wait)
- err = zpool_initialize_wait(zhp, cmd_type, vdevs);
- else
- err = zpool_initialize(zhp, cmd_type, vdevs);
-
- fnvlist_free(vdevs);
zpool_close(zhp);
return (err);
@@ -1788,7 +1782,7 @@ zpool_do_labelclear(int argc, char **argv)
{
char vdev[MAXPATHLEN];
char *name = NULL;
- int c, fd = -1, ret = 0;
+ int c, fd, ret = 0;
nvlist_t *config;
pool_state_t state;
boolean_t inuse = B_FALSE;
@@ -5767,24 +5761,6 @@ children:
return (ret);
}
-static int
-refresh_iostat(zpool_handle_t *zhp, void *data)
-{
- iostat_cbdata_t *cb = data;
- boolean_t missing;
-
- /*
- * If the pool has disappeared, remove it from the list and continue.
- */
- if (zpool_refresh_stats(zhp, &missing) != 0)
- return (-1);
-
- if (missing)
- pool_list_remove(cb->cb_list, zhp);
-
- return (0);
-}
-
/*
* Callback to print out the iostats for the given pool.
*/
@@ -6157,7 +6133,6 @@ static void
get_interval_count_filter_guids(int *argc, char **argv, float *interval,
unsigned long *count, iostat_cbdata_t *cb)
{
- char **tmpargv = argv;
int argc_for_interval = 0;
/* Is the last arg an interval value? Or a guid? */
@@ -6181,7 +6156,7 @@ get_interval_count_filter_guids(int *argc, char **argv, float *interval,
}
/* Point to our list of possible intervals */
- tmpargv = &argv[*argc - argc_for_interval];
+ char **tmpargv = &argv[*argc - argc_for_interval];
*argc = *argc - argc_for_interval;
get_interval_count(&argc_for_interval, tmpargv,
@@ -6366,18 +6341,16 @@ get_namewidth_iostat(zpool_handle_t *zhp, void *data)
* This command can be tricky because we want to be able to deal with pool
* creation/destruction as well as vdev configuration changes. The bulk of this
* processing is handled by the pool_list_* routines in zpool_iter.c. We rely
- * on pool_list_update() to detect the addition of new pools. Configuration
- * changes are all handled within libzfs.
+ * on pool_list_refresh() to detect the addition and removal of pools.
+ * Configuration changes are all handled within libzfs.
*/
int
zpool_do_iostat(int argc, char **argv)
{
int c;
int ret;
- int npools;
float interval = 0;
unsigned long count = 0;
- int winheight = 24;
zpool_list_t *list;
boolean_t verbose = B_FALSE;
boolean_t latency = B_FALSE, l_histo = B_FALSE, rq_histo = B_FALSE;
@@ -6626,10 +6599,24 @@ zpool_do_iostat(int argc, char **argv)
return (1);
}
+ int last_npools = 0;
for (;;) {
- if ((npools = pool_list_count(list)) == 0)
+ /*
+ * Refresh all pools in list, adding or removing pools as
+ * necessary.
+ */
+ int npools = pool_list_refresh(list);
+ if (npools == 0) {
(void) fprintf(stderr, gettext("no pools available\n"));
- else {
+ } else {
+ /*
+ * If the list of pools has changed since last time
+ * around, reset the iteration count to force the
+ * header to be redisplayed.
+ */
+ if (last_npools != npools)
+ cb.cb_iteration = 0;
+
/*
* If this is the first iteration and -y was supplied
* we skip any printing.
@@ -6638,15 +6625,6 @@ zpool_do_iostat(int argc, char **argv)
cb.cb_iteration == 0);
/*
- * Refresh all statistics. This is done as an
- * explicit step before calculating the maximum name
- * width, so that any * configuration changes are
- * properly accounted for.
- */
- (void) pool_list_iter(list, B_FALSE, refresh_iostat,
- &cb);
-
- /*
* Iterate over all pools to determine the maximum width
* for the pool / device name column across all pools.
*/
@@ -6673,7 +6651,7 @@ zpool_do_iostat(int argc, char **argv)
* even when terminal window has its height
* changed.
*/
- winheight = terminal_height();
+ int winheight = terminal_height();
/*
* Are we connected to TTY? If not, headers_once
* should be true, to avoid breaking scripts.
@@ -6699,6 +6677,7 @@ zpool_do_iostat(int argc, char **argv)
if (skip) {
(void) fflush(stdout);
(void) fsleep(interval);
+ last_npools = npools;
continue;
}
@@ -6736,6 +6715,8 @@ zpool_do_iostat(int argc, char **argv)
(void) fflush(stdout);
(void) fsleep(interval);
+
+ last_npools = npools;
}
pool_list_free(list);
@@ -6994,7 +6975,6 @@ collect_vdev_prop(zpool_prop_t prop, uint64_t value, const char *str,
/*
* print static default line per vdev
- * not compatible with '-o' <proplist> option
*/
static void
collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
@@ -7050,48 +7030,98 @@ collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
* 'toplevel' boolean value is passed to the print_one_column()
* to indicate that the value is valid.
*/
- if (VDEV_STAT_VALID(vs_pspace, c) && vs->vs_pspace) {
- collect_vdev_prop(ZPOOL_PROP_SIZE, vs->vs_pspace, NULL,
- scripted, B_TRUE, format, cb->cb_json, props,
- cb->cb_json_as_int);
- } else {
- collect_vdev_prop(ZPOOL_PROP_SIZE, vs->vs_space, NULL,
- scripted, toplevel, format, cb->cb_json, props,
- cb->cb_json_as_int);
+ for (zprop_list_t *pl = cb->cb_proplist; pl != NULL;
+ pl = pl->pl_next) {
+ switch (pl->pl_prop) {
+ case ZPOOL_PROP_SIZE:
+ if (VDEV_STAT_VALID(vs_pspace, c) &&
+ vs->vs_pspace) {
+ collect_vdev_prop(
+ ZPOOL_PROP_SIZE, vs->vs_pspace,
+ NULL, scripted, B_TRUE, format,
+ cb->cb_json, props,
+ cb->cb_json_as_int);
+ } else {
+ collect_vdev_prop(
+ ZPOOL_PROP_SIZE, vs->vs_space, NULL,
+ scripted, toplevel, format,
+ cb->cb_json, props,
+ cb->cb_json_as_int);
+ }
+ break;
+ case ZPOOL_PROP_ALLOCATED:
+ collect_vdev_prop(ZPOOL_PROP_ALLOCATED,
+ vs->vs_alloc, NULL, scripted, toplevel,
+ format, cb->cb_json, props,
+ cb->cb_json_as_int);
+ break;
+
+ case ZPOOL_PROP_FREE:
+ collect_vdev_prop(ZPOOL_PROP_FREE,
+ vs->vs_space - vs->vs_alloc, NULL, scripted,
+ toplevel, format, cb->cb_json, props,
+ cb->cb_json_as_int);
+ break;
+
+ case ZPOOL_PROP_CHECKPOINT:
+ collect_vdev_prop(ZPOOL_PROP_CHECKPOINT,
+ vs->vs_checkpoint_space, NULL, scripted,
+ toplevel, format, cb->cb_json, props,
+ cb->cb_json_as_int);
+ break;
+
+ case ZPOOL_PROP_EXPANDSZ:
+ collect_vdev_prop(ZPOOL_PROP_EXPANDSZ,
+ vs->vs_esize, NULL, scripted, B_TRUE,
+ format, cb->cb_json, props,
+ cb->cb_json_as_int);
+ break;
+
+ case ZPOOL_PROP_FRAGMENTATION:
+ collect_vdev_prop(
+ ZPOOL_PROP_FRAGMENTATION,
+ vs->vs_fragmentation, NULL, scripted,
+ (vs->vs_fragmentation != ZFS_FRAG_INVALID &&
+ toplevel),
+ format, cb->cb_json, props,
+ cb->cb_json_as_int);
+ break;
+
+ case ZPOOL_PROP_CAPACITY:
+ cap = (vs->vs_space == 0) ?
+ 0 : (vs->vs_alloc * 10000 / vs->vs_space);
+ collect_vdev_prop(ZPOOL_PROP_CAPACITY, cap,
+ NULL, scripted, toplevel, format,
+ cb->cb_json, props, cb->cb_json_as_int);
+ break;
+
+ case ZPOOL_PROP_HEALTH:
+ state = zpool_state_to_name(vs->vs_state,
+ vs->vs_aux);
+ if (isspare) {
+ if (vs->vs_aux == VDEV_AUX_SPARED)
+ state = "INUSE";
+ else if (vs->vs_state ==
+ VDEV_STATE_HEALTHY)
+ state = "AVAIL";
+ }
+ collect_vdev_prop(ZPOOL_PROP_HEALTH, 0, state,
+ scripted, B_TRUE, format, cb->cb_json,
+ props, cb->cb_json_as_int);
+ break;
+
+ case ZPOOL_PROP_NAME:
+ break;
+
+ default:
+ collect_vdev_prop(pl->pl_prop, 0,
+ NULL, scripted, B_FALSE, format,
+ cb->cb_json, props, cb->cb_json_as_int);
+
+ }
+
+
}
- collect_vdev_prop(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, NULL,
- scripted, toplevel, format, cb->cb_json, props,
- cb->cb_json_as_int);
- collect_vdev_prop(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc,
- NULL, scripted, toplevel, format, cb->cb_json, props,
- cb->cb_json_as_int);
- collect_vdev_prop(ZPOOL_PROP_CHECKPOINT,
- vs->vs_checkpoint_space, NULL, scripted, toplevel, format,
- cb->cb_json, props, cb->cb_json_as_int);
- collect_vdev_prop(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, NULL,
- scripted, B_TRUE, format, cb->cb_json, props,
- cb->cb_json_as_int);
- collect_vdev_prop(ZPOOL_PROP_FRAGMENTATION,
- vs->vs_fragmentation, NULL, scripted,
- (vs->vs_fragmentation != ZFS_FRAG_INVALID && toplevel),
- format, cb->cb_json, props, cb->cb_json_as_int);
- cap = (vs->vs_space == 0) ? 0 :
- (vs->vs_alloc * 10000 / vs->vs_space);
- collect_vdev_prop(ZPOOL_PROP_CAPACITY, cap, NULL,
- scripted, toplevel, format, cb->cb_json, props,
- cb->cb_json_as_int);
- collect_vdev_prop(ZPOOL_PROP_DEDUPRATIO, 0, NULL,
- scripted, toplevel, format, cb->cb_json, props,
- cb->cb_json_as_int);
- state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
- if (isspare) {
- if (vs->vs_aux == VDEV_AUX_SPARED)
- state = "INUSE";
- else if (vs->vs_state == VDEV_STATE_HEALTHY)
- state = "AVAIL";
- }
- collect_vdev_prop(ZPOOL_PROP_HEALTH, 0, state, scripted,
- B_TRUE, format, cb->cb_json, props, cb->cb_json_as_int);
if (cb->cb_json) {
fnvlist_add_nvlist(ent, "properties", props);
@@ -7652,7 +7682,7 @@ zpool_do_replace(int argc, char **argv)
}
/*
- * zpool attach [-fsw] [-o property=value] <pool> <device>|<vdev> <new_device>
+ * zpool attach [-fsw] [-o property=value] <pool> <vdev> <new_device>
*
* -f Force attach, even if <new_device> appears to be in use.
* -s Use sequential instead of healing reconstruction for resilver.
@@ -7660,9 +7690,9 @@ zpool_do_replace(int argc, char **argv)
* -w Wait for resilvering (mirror) or expansion (raidz) to complete
* before returning.
*
- * Attach <new_device> to a <device> or <vdev>, where the vdev can be of type
- * mirror or raidz. If <device> is not part of a mirror, then <device> will
- * be transformed into a mirror of <device> and <new_device>. When a mirror
+ * Attach <new_device> to a <vdev>, where the vdev can be of type
+ * device, mirror or raidz. If <vdev> is not part of a mirror, then <vdev> will
+ * be transformed into a mirror of <vdev> and <new_device>. When a mirror
* is involved, <new_device> will begin life with a DTL of [0, now], and will
* immediately begin to resilver itself. For the raidz case, a expansion will
* commence and reflow the raidz data across all the disks including the
@@ -8368,6 +8398,8 @@ zpool_do_reopen(int argc, char **argv)
typedef struct scrub_cbdata {
int cb_type;
pool_scrub_cmd_t cb_scrub_cmd;
+ time_t cb_date_start;
+ time_t cb_date_end;
} scrub_cbdata_t;
static boolean_t
@@ -8411,8 +8443,8 @@ scrub_callback(zpool_handle_t *zhp, void *data)
return (1);
}
- err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd);
-
+ err = zpool_scan_range(zhp, cb->cb_type, cb->cb_scrub_cmd,
+ cb->cb_date_start, cb->cb_date_end);
if (err == 0 && zpool_has_checkpoint(zhp) &&
cb->cb_type == POOL_SCAN_SCRUB) {
(void) printf(gettext("warning: will not scrub state that "
@@ -8430,10 +8462,35 @@ wait_callback(zpool_handle_t *zhp, void *data)
return (zpool_wait(zhp, *act));
}
+static time_t
+date_string_to_sec(const char *timestr, boolean_t rounding)
+{
+ struct tm tm = {0};
+ int adjustment = rounding ? 1 : 0;
+
+ /* Allow mktime to determine timezone. */
+ tm.tm_isdst = -1;
+
+ if (strptime(timestr, "%Y-%m-%d %H:%M", &tm) == NULL) {
+ if (strptime(timestr, "%Y-%m-%d", &tm) == NULL) {
+ fprintf(stderr, gettext("Failed to parse the date.\n"));
+ usage(B_FALSE);
+ }
+ adjustment *= 24 * 60 * 60;
+ } else {
+ adjustment *= 60;
+ }
+
+ return (mktime(&tm) + adjustment);
+}
+
/*
- * zpool scrub [-e | -s | -p | -C] [-w] <pool> ...
+ * zpool scrub [-e | -s | -p | -C | -E | -S] [-w] [-a | <pool> ...]
*
+ * -a Scrub all pools.
* -e Only scrub blocks in the error log.
+ * -E End date of scrub.
+ * -S Start date of scrub.
* -s Stop. Stops any in-progress scrub.
* -p Pause. Pause in-progress scrub.
* -w Wait. Blocks until scrub has completed.
@@ -8449,21 +8506,36 @@ zpool_do_scrub(int argc, char **argv)
cb.cb_type = POOL_SCAN_SCRUB;
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
+ cb.cb_date_start = cb.cb_date_end = 0;
boolean_t is_error_scrub = B_FALSE;
boolean_t is_pause = B_FALSE;
boolean_t is_stop = B_FALSE;
boolean_t is_txg_continue = B_FALSE;
+ boolean_t scrub_all = B_FALSE;
/* check options */
- while ((c = getopt(argc, argv, "spweC")) != -1) {
+ while ((c = getopt(argc, argv, "aspweCE:S:")) != -1) {
switch (c) {
+ case 'a':
+ scrub_all = B_TRUE;
+ break;
case 'e':
is_error_scrub = B_TRUE;
break;
+ case 'E':
+ /*
+ * Round the date. It's better to scrub more data than
+ * less. This also makes the date inclusive.
+ */
+ cb.cb_date_end = date_string_to_sec(optarg, B_TRUE);
+ break;
case 's':
is_stop = B_TRUE;
break;
+ case 'S':
+ cb.cb_date_start = date_string_to_sec(optarg, B_FALSE);
+ break;
case 'p':
is_pause = B_TRUE;
break;
@@ -8511,6 +8583,19 @@ zpool_do_scrub(int argc, char **argv)
}
}
+ if ((cb.cb_date_start != 0 || cb.cb_date_end != 0) &&
+ cb.cb_scrub_cmd != POOL_SCRUB_NORMAL) {
+ (void) fprintf(stderr, gettext("invalid option combination: "
+ "start/end date is available only with normal scrub\n"));
+ usage(B_FALSE);
+ }
+ if (cb.cb_date_start != 0 && cb.cb_date_end != 0 &&
+ cb.cb_date_start > cb.cb_date_end) {
+ (void) fprintf(stderr, gettext("invalid arguments: "
+ "end date has to be later than start date\n"));
+ usage(B_FALSE);
+ }
+
if (wait && (cb.cb_type == POOL_SCAN_NONE ||
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE)) {
(void) fprintf(stderr, gettext("invalid option combination: "
@@ -8521,7 +8606,7 @@ zpool_do_scrub(int argc, char **argv)
argc -= optind;
argv += optind;
- if (argc < 1) {
+ if (argc < 1 && !scrub_all) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
usage(B_FALSE);
}
@@ -8551,6 +8636,7 @@ zpool_do_resilver(int argc, char **argv)
cb.cb_type = POOL_SCAN_RESILVER;
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
+ cb.cb_date_start = cb.cb_date_end = 0;
/* check options */
while ((c = getopt(argc, argv, "")) != -1) {
@@ -8575,8 +8661,9 @@ zpool_do_resilver(int argc, char **argv)
}
/*
- * zpool trim [-d] [-r <rate>] [-c | -s] <pool> [<device> ...]
+ * zpool trim [-d] [-r <rate>] [-c | -s] <-a | pool> [<device> ...]
*
+ * -a Trim all pools.
* -c Cancel. Ends any in-progress trim.
* -d Secure trim. Requires kernel and device support.
* -r <rate> Sets the TRIM rate in bytes (per second). Supports
@@ -8593,6 +8680,7 @@ zpool_do_trim(int argc, char **argv)
{"rate", required_argument, NULL, 'r'},
{"suspend", no_argument, NULL, 's'},
{"wait", no_argument, NULL, 'w'},
+ {"all", no_argument, NULL, 'a'},
{0, 0, 0, 0}
};
@@ -8600,11 +8688,16 @@ zpool_do_trim(int argc, char **argv)
uint64_t rate = 0;
boolean_t secure = B_FALSE;
boolean_t wait = B_FALSE;
+ boolean_t trimall = B_FALSE;
+ int error;
int c;
- while ((c = getopt_long(argc, argv, "cdr:sw", long_options, NULL))
+ while ((c = getopt_long(argc, argv, "acdr:sw", long_options, NULL))
!= -1) {
switch (c) {
+ case 'a':
+ trimall = B_TRUE;
+ break;
case 'c':
if (cmd_type != POOL_TRIM_START &&
cmd_type != POOL_TRIM_CANCEL) {
@@ -8663,7 +8756,18 @@ zpool_do_trim(int argc, char **argv)
argc -= optind;
argv += optind;
- if (argc < 1) {
+ trimflags_t trim_flags = {
+ .secure = secure,
+ .rate = rate,
+ .wait = wait,
+ };
+
+ trim_cbdata_t cbdata = {
+ .trim_flags = trim_flags,
+ .cmd_type = cmd_type
+ };
+
+ if (argc < 1 && !trimall) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
usage(B_FALSE);
return (-1);
@@ -8671,41 +8775,46 @@ zpool_do_trim(int argc, char **argv)
if (wait && (cmd_type != POOL_TRIM_START)) {
(void) fprintf(stderr, gettext("-w cannot be used with -c or "
- "-s\n"));
+ "-s options\n"));
usage(B_FALSE);
}
- char *poolname = argv[0];
- zpool_handle_t *zhp = zpool_open(g_zfs, poolname);
- if (zhp == NULL)
- return (-1);
-
- trimflags_t trim_flags = {
- .secure = secure,
- .rate = rate,
- .wait = wait,
- };
+ if (trimall && argc > 0) {
+ (void) fprintf(stderr, gettext("-a cannot be combined with "
+ "individual zpools or vdevs\n"));
+ usage(B_FALSE);
+ }
- nvlist_t *vdevs = fnvlist_alloc();
- if (argc == 1) {
+ if (argc == 0 && trimall) {
+ cbdata.trim_flags.fullpool = B_TRUE;
+ /* Trim each pool recursively */
+ error = for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL,
+ B_FALSE, zpool_trim_one, &cbdata);
+ } else if (argc == 1) {
+ char *poolname = argv[0];
+ zpool_handle_t *zhp = zpool_open(g_zfs, poolname);
+ if (zhp == NULL)
+ return (-1);
/* no individual leaf vdevs specified, so add them all */
- nvlist_t *config = zpool_get_config(zhp, NULL);
- nvlist_t *nvroot = fnvlist_lookup_nvlist(config,
- ZPOOL_CONFIG_VDEV_TREE);
- zpool_collect_leaves(zhp, nvroot, vdevs);
- trim_flags.fullpool = B_TRUE;
+ error = zpool_trim_one(zhp, &cbdata);
+ zpool_close(zhp);
} else {
- trim_flags.fullpool = B_FALSE;
+ char *poolname = argv[0];
+ zpool_handle_t *zhp = zpool_open(g_zfs, poolname);
+ if (zhp == NULL)
+ return (-1);
+ /* leaf vdevs specified, trim only those */
+ cbdata.trim_flags.fullpool = B_FALSE;
+ nvlist_t *vdevs = fnvlist_alloc();
for (int i = 1; i < argc; i++) {
fnvlist_add_boolean(vdevs, argv[i]);
}
+ error = zpool_trim(zhp, cbdata.cmd_type, vdevs,
+ &cbdata.trim_flags);
+ fnvlist_free(vdevs);
+ zpool_close(zhp);
}
- int error = zpool_trim(zhp, cmd_type, vdevs, &trim_flags);
-
- fnvlist_free(vdevs);
- zpool_close(zhp);
-
return (error);
}
@@ -10706,7 +10815,6 @@ status_callback_json(zpool_handle_t *zhp, void *data)
uint_t c;
vdev_stat_t *vs;
nvlist_t *item, *d, *load_info, *vds;
- item = d = NULL;
/* If dedup stats were requested, also fetch dedupcached. */
if (cbp->cb_dedup_stats > 1)
@@ -11330,7 +11438,8 @@ upgrade_enable_all(zpool_handle_t *zhp, int *countp)
const char *fname = spa_feature_table[i].fi_uname;
const char *fguid = spa_feature_table[i].fi_guid;
- if (!spa_feature_table[i].fi_zfs_mod_supported)
+ if (!spa_feature_table[i].fi_zfs_mod_supported ||
+ (spa_feature_table[i].fi_flags & ZFEATURE_FLAG_NO_UPGRADE))
continue;
if (!nvlist_exists(enabled, fguid) && requested_features[i]) {
@@ -11485,7 +11594,11 @@ upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg)
"Note that the pool "
"'compatibility' feature can be "
"used to inhibit\nfeature "
- "upgrades.\n\n"));
+ "upgrades.\n\n"
+ "Features marked with (*) are not "
+ "applied automatically on upgrade, "
+ "and\nmust be applied explicitly "
+ "with zpool-set(7).\n\n"));
(void) printf(gettext("POOL "
"FEATURE\n"));
(void) printf(gettext("------"
@@ -11499,7 +11612,9 @@ upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg)
poolfirst = B_FALSE;
}
- (void) printf(gettext(" %s\n"), fname);
+ (void) printf(gettext(" %s%s\n"), fname,
+ spa_feature_table[i].fi_flags &
+ ZFEATURE_FLAG_NO_UPGRADE ? "(*)" : "");
}
/*
* If they did "zpool upgrade -a", then we could
@@ -12300,7 +12415,7 @@ zpool_do_events_next(ev_opts_t *opts)
nvlist_free(nvl);
}
- VERIFY(0 == close(zevent_fd));
+ VERIFY0(close(zevent_fd));
return (ret);
}
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_util.h b/sys/contrib/openzfs/cmd/zpool/zpool_util.h
index 5ab7cb9750f1..3af23c52bd45 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_util.h
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_util.h
@@ -76,11 +76,10 @@ typedef struct zpool_list zpool_list_t;
zpool_list_t *pool_list_get(int, char **, zprop_list_t **, zfs_type_t,
boolean_t, int *);
-void pool_list_update(zpool_list_t *);
+int pool_list_refresh(zpool_list_t *);
int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *);
void pool_list_free(zpool_list_t *);
int pool_list_count(zpool_list_t *);
-void pool_list_remove(zpool_list_t *, zpool_handle_t *);
extern libzfs_handle_t *g_zfs;
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
index 07868a30d7e7..222b5524669e 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
@@ -270,14 +270,13 @@ is_spare(nvlist_t *config, const char *path)
* draid* Virtual dRAID spare
*/
static nvlist_t *
-make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary)
+make_leaf_vdev(const char *arg, boolean_t is_primary, uint64_t ashift)
{
char path[MAXPATHLEN];
struct stat64 statbuf;
nvlist_t *vdev = NULL;
const char *type = NULL;
boolean_t wholedisk = B_FALSE;
- uint64_t ashift = 0;
int err;
/*
@@ -382,31 +381,6 @@ make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary)
(uint64_t)wholedisk) == 0);
/*
- * Override defaults if custom properties are provided.
- */
- if (props != NULL) {
- const char *value = NULL;
-
- if (nvlist_lookup_string(props,
- zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) {
- if (zfs_nicestrtonum(NULL, value, &ashift) != 0) {
- (void) fprintf(stderr,
- gettext("ashift must be a number.\n"));
- return (NULL);
- }
- if (ashift != 0 &&
- (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) {
- (void) fprintf(stderr,
- gettext("invalid 'ashift=%" PRIu64 "' "
- "property: only values between %" PRId32 " "
- "and %" PRId32 " are allowed.\n"),
- ashift, ASHIFT_MIN, ASHIFT_MAX);
- return (NULL);
- }
- }
- }
-
- /*
* If the device is known to incorrectly report its physical sector
* size explicitly provide the known correct value.
*/
@@ -574,7 +548,6 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
nvlist_t *cnv = child[c];
const char *path;
struct stat64 statbuf;
- int64_t size = -1LL;
const char *childtype;
int fd, err;
@@ -610,22 +583,28 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
ZPOOL_CONFIG_PATH, &path) == 0);
/*
+ * Skip active spares they should never cause
+ * the pool to be evaluated as inconsistent.
+ */
+ if (is_spare(NULL, path))
+ continue;
+
+ /*
* If we have a raidz/mirror that combines disks
- * with files, report it as an error.
+ * with files, only report it as an error when
+ * fatal is set to ensure all the replication
+ * checks aren't skipped in check_replication().
*/
- if (!dontreport && type != NULL &&
+ if (fatal && !dontreport && type != NULL &&
strcmp(type, childtype) != 0) {
if (ret != NULL)
free(ret);
ret = NULL;
- if (fatal)
- vdev_error(gettext(
- "mismatched replication "
- "level: %s contains both "
- "files and devices\n"),
- rep.zprl_type);
- else
- return (NULL);
+ vdev_error(gettext(
+ "mismatched replication "
+ "level: %s contains both "
+ "files and devices\n"),
+ rep.zprl_type);
dontreport = B_TRUE;
}
@@ -656,7 +635,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
statbuf.st_size == MAXOFFSET_T)
continue;
- size = statbuf.st_size;
+ int64_t size = statbuf.st_size;
/*
* Also make sure that devices and
@@ -876,6 +855,18 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
(u_longlong_t)mirror->zprl_children);
ret = -1;
}
+ } else if (is_raidz_draid(current, new)) {
+ if (current->zprl_parity != new->zprl_parity) {
+ vdev_error(gettext(
+ "mismatched replication level: pool and "
+ "new vdev with different redundancy, %s "
+ "and %s vdevs, %llu vs. %llu\n"),
+ current->zprl_type,
+ new->zprl_type,
+ (u_longlong_t)current->zprl_parity,
+ (u_longlong_t)new->zprl_parity);
+ ret = -1;
+ }
} else if (strcmp(current->zprl_type, new->zprl_type) != 0) {
vdev_error(gettext(
"mismatched replication level: pool uses %s "
@@ -1353,7 +1344,7 @@ is_grouping(const char *type, int *mindev, int *maxdev)
static int
draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
{
- uint64_t nparity = 1;
+ uint64_t nparity;
uint64_t nspares = 0;
uint64_t ndata = UINT64_MAX;
uint64_t ngroups = 1;
@@ -1496,6 +1487,29 @@ construct_spec(nvlist_t *props, int argc, char **argv)
const char *type, *fulltype;
boolean_t is_log, is_special, is_dedup, is_spare;
boolean_t seen_logs;
+ uint64_t ashift = 0;
+
+ if (props != NULL) {
+ const char *value = NULL;
+
+ if (nvlist_lookup_string(props,
+ zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) {
+ if (zfs_nicestrtonum(NULL, value, &ashift) != 0) {
+ (void) fprintf(stderr,
+ gettext("ashift must be a number.\n"));
+ return (NULL);
+ }
+ if (ashift != 0 &&
+ (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) {
+ (void) fprintf(stderr,
+ gettext("invalid 'ashift=%" PRIu64 "' "
+ "property: only values between %" PRId32 " "
+ "and %" PRId32 " are allowed.\n"),
+ ashift, ASHIFT_MIN, ASHIFT_MAX);
+ return (NULL);
+ }
+ }
+ }
top = NULL;
toplevels = 0;
@@ -1581,13 +1595,12 @@ construct_spec(nvlist_t *props, int argc, char **argv)
is_dedup = is_spare = B_FALSE;
}
- if (is_log || is_special || is_dedup) {
+ if (is_log) {
if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
(void) fprintf(stderr,
gettext("invalid vdev "
- "specification: unsupported '%s' "
- "device: %s\n"), is_log ? "log" :
- "special", type);
+ "specification: unsupported 'log' "
+ "device: %s\n"), type);
goto spec_out;
}
nlogs++;
@@ -1602,9 +1615,9 @@ construct_spec(nvlist_t *props, int argc, char **argv)
children * sizeof (nvlist_t *));
if (child == NULL)
zpool_no_memory();
- if ((nv = make_leaf_vdev(props, argv[c],
+ if ((nv = make_leaf_vdev(argv[c],
!(is_log || is_special || is_dedup ||
- is_spare))) == NULL) {
+ is_spare), ashift)) == NULL) {
for (c = 0; c < children - 1; c++)
nvlist_free(child[c]);
free(child);
@@ -1668,6 +1681,10 @@ construct_spec(nvlist_t *props, int argc, char **argv)
ZPOOL_CONFIG_ALLOCATION_BIAS,
VDEV_ALLOC_BIAS_DEDUP) == 0);
}
+ if (ashift > 0) {
+ fnvlist_add_uint64(nv,
+ ZPOOL_CONFIG_ASHIFT, ashift);
+ }
if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
verify(nvlist_add_uint64(nv,
ZPOOL_CONFIG_NPARITY,
@@ -1695,8 +1712,9 @@ construct_spec(nvlist_t *props, int argc, char **argv)
* We have a device. Pass off to make_leaf_vdev() to
* construct the appropriate nvlist describing the vdev.
*/
- if ((nv = make_leaf_vdev(props, argv[0], !(is_log ||
- is_special || is_dedup || is_spare))) == NULL)
+ if ((nv = make_leaf_vdev(argv[0], !(is_log ||
+ is_special || is_dedup || is_spare),
+ ashift)) == NULL)
goto spec_out;
verify(nvlist_add_uint64(nv,
diff --git a/sys/contrib/openzfs/cmd/zstream/Makefile.am b/sys/contrib/openzfs/cmd/zstream/Makefile.am
index be3539fe905d..80ef1ea7ca11 100644
--- a/sys/contrib/openzfs/cmd/zstream/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zstream/Makefile.am
@@ -18,6 +18,7 @@ zstream_LDADD = \
libzpool.la \
libnvpair.la
-PHONY += install-exec-hook
-install-exec-hook:
+cmd-zstream-install-exec-hook:
cd $(DESTDIR)$(sbindir) && $(LN_S) -f zstream zstreamdump
+
+INSTALL_EXEC_HOOKS += cmd-zstream-install-exec-hook
diff --git a/sys/contrib/openzfs/cmd/ztest.c b/sys/contrib/openzfs/cmd/ztest.c
index 89264c97ff10..89752dcb0f0f 100644
--- a/sys/contrib/openzfs/cmd/ztest.c
+++ b/sys/contrib/openzfs/cmd/ztest.c
@@ -273,7 +273,6 @@ extern int zfs_compressed_arc_enabled;
extern int zfs_abd_scatter_enabled;
extern uint_t dmu_object_alloc_chunk_shift;
extern boolean_t zfs_force_some_double_word_sm_entries;
-extern unsigned long zio_decompress_fail_fraction;
extern unsigned long zfs_reconstruct_indirect_damage_fraction;
extern uint64_t raidz_expand_max_reflow_bytes;
extern uint_t raidz_expand_pause_point;
@@ -809,8 +808,8 @@ static ztest_option_t option_table[] = {
{ 'X', "raidz-expansion", NULL,
"Perform a dedicated raidz expansion test",
NO_DEFAULT, NULL},
- { 'o', "option", "\"OPTION=INTEGER\"",
- "Set global variable to an unsigned 32-bit integer value",
+ { 'o', "option", "\"NAME=VALUE\"",
+ "Set the named tunable to the given value",
NO_DEFAULT, NULL},
{ 'G', "dump-debug-msg", NULL,
"Dump zfs_dbgmsg buffer before exiting due to an error",
@@ -829,8 +828,8 @@ static char *short_opts = NULL;
static void
init_options(void)
{
- ASSERT3P(long_opts, ==, NULL);
- ASSERT3P(short_opts, ==, NULL);
+ ASSERT0P(long_opts);
+ ASSERT0P(short_opts);
int count = sizeof (option_table) / sizeof (option_table[0]);
long_opts = umem_alloc(sizeof (struct option) * count, UMEM_NOFAIL);
@@ -919,7 +918,7 @@ ztest_parse_name_value(const char *input, ztest_shared_opts_t *zo)
{
char name[32];
char *value;
- int state = ZTEST_VDEV_CLASS_RND;
+ int state;
(void) strlcpy(name, input, sizeof (name));
@@ -1686,7 +1685,7 @@ ztest_rll_init(rll_t *rll)
static void
ztest_rll_destroy(rll_t *rll)
{
- ASSERT3P(rll->rll_writer, ==, NULL);
+ ASSERT0P(rll->rll_writer);
ASSERT0(rll->rll_readers);
mutex_destroy(&rll->rll_lock);
cv_destroy(&rll->rll_cv);
@@ -1720,7 +1719,7 @@ ztest_rll_unlock(rll_t *rll)
rll->rll_writer = NULL;
} else {
ASSERT3S(rll->rll_readers, >, 0);
- ASSERT3P(rll->rll_writer, ==, NULL);
+ ASSERT0P(rll->rll_writer);
rll->rll_readers--;
}
@@ -1996,7 +1995,7 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH |
DMU_KEEP_CACHING) != 0) {
- zil_itx_destroy(itx);
+ zil_itx_destroy(itx, 0);
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
write_state = WR_NEED_COPY;
}
@@ -2278,8 +2277,8 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
ztest_block_tag_t rbt;
- VERIFY(dmu_read(os, lr->lr_foid, offset,
- sizeof (rbt), &rbt, flags) == 0);
+ VERIFY0(dmu_read(os, lr->lr_foid, offset,
+ sizeof (rbt), &rbt, flags));
if (rbt.bt_magic == BT_MAGIC) {
ztest_bt_verify(&rbt, os, lr->lr_foid, 0,
offset, gen, txg, crtxg);
@@ -2966,7 +2965,7 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
(void) pthread_rwlock_rdlock(&zd->zd_zilog_lock);
- zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
+ VERIFY0(zil_commit(zilog, ztest_random(ZTEST_OBJECTS)));
/*
* Remember the committed values in zd, which is in parent/child
@@ -3882,7 +3881,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
* If newvd is too small, it should fail with EOVERFLOW.
*
* If newvd is a distributed spare and it's being attached to a
- * dRAID which is not its parent it should fail with EINVAL.
+ * dRAID which is not its parent it should fail with ENOTSUP.
*/
if (pvd->vdev_ops != &vdev_mirror_ops &&
pvd->vdev_ops != &vdev_root_ops && (!replacing ||
@@ -3901,7 +3900,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
else if (ashift > oldvd->vdev_top->vdev_ashift)
expected_error = EDOM;
else if (newvd_is_dspare && pvd != vdev_draid_spare_get_parent(newvd))
- expected_error = EINVAL;
+ expected_error = ENOTSUP;
else
expected_error = 0;
@@ -4007,7 +4006,7 @@ raidz_scratch_verify(void)
* requested by user, but scratch object was not created.
*/
case RRSS_SCRATCH_NOT_IN_USE:
- ASSERT3U(offset, ==, 0);
+ ASSERT0(offset);
break;
/*
@@ -5537,8 +5536,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
}
if (i == 1) {
- VERIFY(dmu_buf_hold(os, bigobj, off,
- FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
+ VERIFY0(dmu_buf_hold(os, bigobj, off,
+ FTAG, &dbt, DMU_READ_NO_PREFETCH));
}
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
@@ -7069,7 +7068,7 @@ ztest_set_global_vars(void)
char *kv = ztest_opts.zo_gvars[i];
VERIFY3U(strlen(kv), <=, ZO_GVARS_MAX_ARGLEN);
VERIFY3U(strlen(kv), >, 0);
- int err = set_global_var(kv);
+ int err = handle_tunable_option(kv, B_TRUE);
if (ztest_opts.zo_verbose > 0) {
(void) printf("setting global var %s ... %s\n", kv,
err ? "failed" : "ok");
@@ -7813,6 +7812,9 @@ ztest_dataset_open(int d)
ztest_dataset_name(name, ztest_opts.zo_pool, d);
+ if (ztest_opts.zo_verbose >= 6)
+ (void) printf("Opening %s\n", name);
+
(void) pthread_rwlock_rdlock(&ztest_name_lock);
error = ztest_dataset_create(name);
@@ -7934,7 +7936,7 @@ ztest_freeze(void)
*/
while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) {
ztest_dmu_object_alloc_free(zd, 0);
- zil_commit(zd->zd_zilog, 0);
+ VERIFY0(zil_commit(zd->zd_zilog, 0));
}
txg_wait_synced(spa_get_dsl(spa), 0);
@@ -7976,7 +7978,7 @@ ztest_freeze(void)
/*
* Commit all of the changes we just generated.
*/
- zil_commit(zd->zd_zilog, 0);
+ VERIFY0(zil_commit(zd->zd_zilog, 0));
txg_wait_synced(spa_get_dsl(spa), 0);
/*
@@ -8308,41 +8310,44 @@ static void
ztest_generic_run(ztest_shared_t *zs, spa_t *spa)
{
kthread_t **run_threads;
- int t;
+ int i, ndatasets;
run_threads = umem_zalloc(ztest_opts.zo_threads * sizeof (kthread_t *),
UMEM_NOFAIL);
/*
- * Kick off all the tests that run in parallel.
+ * Actual number of datasets to be used.
*/
- for (t = 0; t < ztest_opts.zo_threads; t++) {
- if (t < ztest_opts.zo_datasets && ztest_dataset_open(t) != 0) {
- umem_free(run_threads, ztest_opts.zo_threads *
- sizeof (kthread_t *));
- return;
- }
+ ndatasets = MIN(ztest_opts.zo_datasets, ztest_opts.zo_threads);
- run_threads[t] = thread_create(NULL, 0, ztest_thread,
- (void *)(uintptr_t)t, 0, NULL, TS_RUN | TS_JOINABLE,
+ /*
+ * Prepare the datasets first.
+ */
+ for (i = 0; i < ndatasets; i++)
+ VERIFY0(ztest_dataset_open(i));
+
+ /*
+ * Kick off all the tests that run in parallel.
+ */
+ for (i = 0; i < ztest_opts.zo_threads; i++) {
+ run_threads[i] = thread_create(NULL, 0, ztest_thread,
+ (void *)(uintptr_t)i, 0, NULL, TS_RUN | TS_JOINABLE,
defclsyspri);
}
/*
* Wait for all of the tests to complete.
*/
- for (t = 0; t < ztest_opts.zo_threads; t++)
- VERIFY0(thread_join(run_threads[t]));
+ for (i = 0; i < ztest_opts.zo_threads; i++)
+ VERIFY0(thread_join(run_threads[i]));
/*
* Close all datasets. This must be done after all the threads
* are joined so we can be sure none of the datasets are in-use
* by any of the threads.
*/
- for (t = 0; t < ztest_opts.zo_threads; t++) {
- if (t < ztest_opts.zo_datasets)
- ztest_dataset_close(t);
- }
+ for (i = 0; i < ndatasets; i++)
+ ztest_dataset_close(i);
txg_wait_synced(spa_get_dsl(spa), 0);
@@ -8465,6 +8470,7 @@ ztest_run(ztest_shared_t *zs)
int d = ztest_random(ztest_opts.zo_datasets);
ztest_dataset_destroy(d);
+ txg_wait_synced(spa_get_dsl(spa), 0);
}
zs->zs_enospc_count = 0;
@@ -8972,7 +8978,7 @@ main(int argc, char **argv)
exit(EXIT_FAILURE);
} else {
/* children should not be spawned if setting gvars fails */
- VERIFY3S(err, ==, 0);
+ VERIFY0(err);
}
/* Override location of zpool.cache */