aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2023-05-12 11:12:59 +0000
committerMartin Matuska <mm@FreeBSD.org>2023-05-12 11:13:33 +0000
commite639e0d27cc863ba1b8de20e861e6b5d9b922a8e (patch)
treed45761e2b719726afe365d28aad7ee663717dd8a
parent92c23f6d9c2074f6deb0029d13a8c92b32797059 (diff)
parente6107668385044718b0a73330ed6423650806473 (diff)
downloadsrc-e639e0d27cc863ba1b8de20e861e6b5d9b922a8e.tar.gz
src-e639e0d27cc863ba1b8de20e861e6b5d9b922a8e.zip
zfs: merge openzfs/zfs@e61076683
Notable upstream pull request merges: #14744 Optimize check_filesystem() and process_error_log() #14773 Allow zhack label repair to restore detached devices #14794 zpool import -m also removing spare and cache when log device is missing #14805 Simplify and optimize random_int_between() #14813 Enable the head_errlog feature to remove errors #14816 Fix two abd_gang_add_gang() issues #14817 Verify block pointers before writing them out #14819 Add dmu_tx_hold_append() interface #14823 Remove single parent assertion from zio_nowait() #14824 Plug memory leak in zfsdev_state #14825 Block cloning dbuf fixes #14828 Remove duplicate code in l2arc_evict() #14837 Fixes in head_errlog feature with encryption #14839 Prevent panic during concurrent snapshot rollback and zvol read #14853 zil: Don't expect zio_shrink() to succeed Obtained from: OpenZFS OpenZFS commit: e6107668385044718b0a73330ed6423650806473
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.c19
-rw-r--r--sys/contrib/openzfs/cmd/zhack.c508
-rw-r--r--sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c13
-rw-r--r--sys/contrib/openzfs/include/sys/dbuf.h23
-rw-r--r--sys/contrib/openzfs/include/sys/dmu.h7
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_tx.h1
-rw-r--r--sys/contrib/openzfs/include/sys/spa.h3
-rw-r--r--sys/contrib/openzfs/include/sys/zio.h8
-rw-r--r--sys/contrib/openzfs/man/man1/zhack.123
-rw-r--r--sys/contrib/openzfs/man/man7/zpool-features.77
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-status.83
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/abd.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c26
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c119
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c51
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_recv.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_tx.c105
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_scan.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c12
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_errlog.c389
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_vnops.c38
-rw-r--r--sys/contrib/openzfs/module/zfs/zil.c1
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c94
-rw-r--r--sys/contrib/openzfs/tests/runfiles/common.run5
-rwxr-xr-xsys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in1
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib14
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/math.shlib11
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am7
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh19
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib361
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh64
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh30
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh31
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh33
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh30
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh75
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh6
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_007_pos.ksh13
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh5
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh2
-rw-r--r--sys/modules/zfs/zfs_config.h4
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
44 files changed, 1726 insertions, 460 deletions
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index ec5d1acacf85..5ab13b470dc0 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -3322,13 +3322,22 @@ dump_znode_sa_xattr(sa_handle_t *hdl)
(void) printf("\tSA xattrs: %d bytes, %d entries\n\n",
sa_xattr_size, sa_xattr_entries);
while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) {
+ boolean_t can_print = !dump_opt['P'];
uchar_t *value;
uint_t cnt, idx;
(void) printf("\t\t%s = ", nvpair_name(elem));
nvpair_value_byte_array(elem, &value, &cnt);
+
+ for (idx = 0; idx < cnt; ++idx) {
+ if (!isprint(value[idx])) {
+ can_print = B_FALSE;
+ break;
+ }
+ }
+
for (idx = 0; idx < cnt; ++idx) {
- if (isprint(value[idx]))
+ if (can_print)
(void) putchar(value[idx]);
else
(void) printf("\\%3.3o", value[idx]);
@@ -8490,8 +8499,8 @@ zdb_read_block(char *thing, spa_t *spa)
!(flags & ZDB_FLAG_DECOMPRESS)) {
const blkptr_t *b = (const blkptr_t *)(void *)
((uintptr_t)buf + (uintptr_t)blkptr_offset);
- if (zfs_blkptr_verify(spa, b, B_FALSE, BLK_VERIFY_ONLY) ==
- B_FALSE) {
+ if (zfs_blkptr_verify(spa, b,
+ BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY) == B_FALSE) {
abd_return_buf_copy(pabd, buf, lsize);
borrowed = B_FALSE;
buf = lbuf;
@@ -8499,8 +8508,8 @@ zdb_read_block(char *thing, spa_t *spa)
lbuf, lsize, psize, flags);
b = (const blkptr_t *)(void *)
((uintptr_t)buf + (uintptr_t)blkptr_offset);
- if (failed || zfs_blkptr_verify(spa, b, B_FALSE,
- BLK_VERIFY_LOG) == B_FALSE) {
+ if (failed || zfs_blkptr_verify(spa, b,
+ BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) == B_FALSE) {
printf("invalid block pointer at this DVA\n");
goto out;
}
diff --git a/sys/contrib/openzfs/cmd/zhack.c b/sys/contrib/openzfs/cmd/zhack.c
index 0b6da31ec573..44611887dd25 100644
--- a/sys/contrib/openzfs/cmd/zhack.c
+++ b/sys/contrib/openzfs/cmd/zhack.c
@@ -58,6 +58,12 @@ static importargs_t g_importargs;
static char *g_pool;
static boolean_t g_readonly;
+typedef enum {
+ ZHACK_REPAIR_OP_UNKNOWN = 0,
+ ZHACK_REPAIR_OP_CKSUM = (1 << 0),
+ ZHACK_REPAIR_OP_UNDETACH = (1 << 1)
+} zhack_repair_op_t;
+
static __attribute__((noreturn)) void
usage(void)
{
@@ -81,7 +87,10 @@ usage(void)
" <feature> : should be a feature guid\n"
"\n"
" label repair <device>\n"
- " repair corrupted label checksums\n"
+ " repair labels of a specified device according to options\n"
+ " which may be combined to do their functions in one call\n"
+ " -c repair corrupted label checksums\n"
+ " -u restore the label on a detached device\n"
"\n"
" <device> : path to vdev\n");
exit(1);
@@ -485,119 +494,398 @@ zhack_do_feature(int argc, char **argv)
return (0);
}
+#define ASHIFT_UBERBLOCK_SHIFT(ashift) \
+ MIN(MAX(ashift, UBERBLOCK_SHIFT), \
+ MAX_UBERBLOCK_SHIFT)
+#define ASHIFT_UBERBLOCK_SIZE(ashift) \
+ (1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift))
+
+#define REPAIR_LABEL_STATUS_CKSUM (1 << 0)
+#define REPAIR_LABEL_STATUS_UB (1 << 1)
+
static int
-zhack_repair_label_cksum(int argc, char **argv)
+zhack_repair_read_label(const int fd, vdev_label_t *vl,
+ const uint64_t label_offset, const int l)
{
- zio_checksum_info_t *ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
- const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
- ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
- boolean_t labels_repaired[VDEV_LABELS] = {0};
- boolean_t repaired = B_FALSE;
- vdev_label_t labels[VDEV_LABELS] = {{{0}}};
- struct stat st;
- int fd;
+ const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset);
- abd_init();
+ if (err == -1) {
+ (void) fprintf(stderr,
+ "error: cannot read label %d: %s\n",
+ l, strerror(errno));
+ return (err);
+ } else if (err != sizeof (vdev_label_t)) {
+ (void) fprintf(stderr,
+ "error: bad label %d read size\n", l);
+ return (err);
+ }
- argc -= 1;
- argv += 1;
+ return (0);
+}
- if (argc < 1) {
- (void) fprintf(stderr, "error: missing device\n");
- usage();
- }
+static void
+zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
+ const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum)
+{
+ zio_cksum_t verifier;
+ zio_cksum_t current_cksum;
+ zio_checksum_info_t *ci;
+ abd_t *abd;
- if ((fd = open(argv[0], O_RDWR)) == -1)
- fatal(NULL, FTAG, "cannot open '%s': %s", argv[0],
- strerror(errno));
+ ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);
- if (stat(argv[0], &st) != 0)
- fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0],
- strerror(errno));
+ if (byteswap)
+ byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
- for (int l = 0; l < VDEV_LABELS; l++) {
- uint64_t label_offset, offset;
- zio_cksum_t expected_cksum;
- zio_cksum_t actual_cksum;
- zio_cksum_t verifier;
- zio_eck_t *eck;
- nvlist_t *cfg;
- int byteswap;
+ current_cksum = eck->zec_cksum;
+ eck->zec_cksum = verifier;
+
+ ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
+ abd = abd_get_from_buf(data, abdsize);
+ ci->ci_func[byteswap](abd, abdsize, NULL, cksum);
+ abd_free(abd);
+
+ eck->zec_cksum = current_cksum;
+}
+
+static int
+zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys,
+ const size_t cfg_keys_len, nvlist_t *cfg, nvlist_t *vdev_tree_cfg,
+ uint64_t *ashift)
+{
+ int err;
+
+ if (ub->ub_txg != 0) {
+ (void) fprintf(stderr,
+ "error: label %d: UB TXG of 0 expected, but got %"
+ PRIu64 "\n",
+ l, ub->ub_txg);
+ (void) fprintf(stderr, "It would appear the device was not "
+ "properly removed.\n");
+ return (1);
+ }
+
+ for (int i = 0; i < cfg_keys_len; i++) {
uint64_t val;
- ssize_t err;
-
- vdev_label_t *vl = &labels[l];
-
- label_offset = vdev_label_offset(st.st_size, l, 0);
- err = pread64(fd, vl, sizeof (vdev_label_t), label_offset);
- if (err == -1) {
- (void) fprintf(stderr, "error: cannot read "
- "label %d: %s\n", l, strerror(errno));
- continue;
- } else if (err != sizeof (vdev_label_t)) {
- (void) fprintf(stderr, "error: bad label %d read size "
- "\n", l);
- continue;
+ err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val);
+ if (err) {
+ (void) fprintf(stderr,
+ "error: label %d, %d: "
+ "cannot find nvlist key %s\n",
+ l, i, cfg_keys[i]);
+ return (err);
}
+ }
- err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
- VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0);
- if (err) {
- (void) fprintf(stderr, "error: cannot unpack nvlist "
- "label %d\n", l);
- continue;
+ err = nvlist_lookup_nvlist(cfg,
+ ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg);
+ if (err) {
+ (void) fprintf(stderr,
+ "error: label %d: cannot find nvlist key %s\n",
+ l, ZPOOL_CONFIG_VDEV_TREE);
+ return (err);
+ }
+
+ err = nvlist_lookup_uint64(vdev_tree_cfg,
+ ZPOOL_CONFIG_ASHIFT, ashift);
+ if (err) {
+ (void) fprintf(stderr,
+ "error: label %d: cannot find nvlist key %s\n",
+ l, ZPOOL_CONFIG_ASHIFT);
+ return (err);
+ }
+
+ if (*ashift == 0) {
+ (void) fprintf(stderr,
+ "error: label %d: nvlist key %s is zero\n",
+ l, ZPOOL_CONFIG_ASHIFT);
+ return (err);
+ }
+
+ return (0);
+}
+
+static int
+zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l)
+{
+ /*
+ * Uberblock root block pointer has valid birth TXG.
+ * Copying it to the label NVlist
+ */
+ if (ub->ub_rootbp.blk_birth != 0) {
+ const uint64_t txg = ub->ub_rootbp.blk_birth;
+ ub->ub_txg = txg;
+
+ if (nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG) != 0) {
+ (void) fprintf(stderr,
+ "error: label %d: "
+ "Failed to remove pool creation TXG\n",
+ l);
+ return (1);
}
- for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) {
- err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val);
- if (err) {
- (void) fprintf(stderr, "error: label %d: "
- "cannot find nvlist key %s\n",
- l, cfg_keys[i]);
- continue;
- }
+ if (nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG) != 0) {
+ (void) fprintf(stderr,
+ "error: label %d: Failed to remove pool TXG to "
+ "be replaced.\n",
+ l);
+ return (1);
}
- void *data = (char *)vl + offsetof(vdev_label_t, vl_vdev_phys);
- eck = (zio_eck_t *)((char *)(data) + VDEV_PHYS_SIZE) - 1;
+ if (nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg) != 0) {
+ (void) fprintf(stderr,
+ "error: label %d: "
+ "Failed to add pool TXG of %" PRIu64 "\n",
+ l, txg);
+ return (1);
+ }
+ }
+
+ return (0);
+}
- offset = label_offset + offsetof(vdev_label_t, vl_vdev_phys);
- ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);
+static boolean_t
+zhack_repair_write_label(const int l, const int fd, const int byteswap,
+ void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize)
+{
+ zio_cksum_t actual_cksum;
+ zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck,
+ &actual_cksum);
+ zio_cksum_t expected_cksum = eck->zec_cksum;
+ ssize_t err;
+
+ if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
+ return (B_FALSE);
+
+ eck->zec_cksum = actual_cksum;
+
+ err = pwrite64(fd, data, abdsize, offset);
+ if (err == -1) {
+ (void) fprintf(stderr, "error: cannot write label %d: %s\n",
+ l, strerror(errno));
+ return (B_FALSE);
+ } else if (err != abdsize) {
+ (void) fprintf(stderr, "error: bad write size label %d\n", l);
+ return (B_FALSE);
+ } else {
+ (void) fprintf(stderr,
+ "label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n",
+ l, abdsize, offset);
+ }
- byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
- if (byteswap)
- byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
+ return (B_TRUE);
+}
- expected_cksum = eck->zec_cksum;
- eck->zec_cksum = verifier;
+static void
+zhack_repair_write_uberblock(vdev_label_t *vl, const int l,
+ const uint64_t ashift, const int fd, const int byteswap,
+ const uint64_t label_offset, uint32_t *labels_repaired)
+{
+ void *ub_data =
+ (char *)vl + offsetof(vdev_label_t, vl_uberblock);
+ zio_eck_t *ub_eck =
+ (zio_eck_t *)
+ ((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1;
- abd_t *abd = abd_get_from_buf(data, VDEV_PHYS_SIZE);
- ci->ci_func[byteswap](abd, VDEV_PHYS_SIZE, NULL, &actual_cksum);
- abd_free(abd);
+ if (ub_eck->zec_magic != 0) {
+ (void) fprintf(stderr,
+ "error: label %d: "
+ "Expected Uberblock checksum magic number to "
+ "be 0, but got %" PRIu64 "\n",
+ l, ub_eck->zec_magic);
+ (void) fprintf(stderr, "It would appear there's already "
+ "a checksum for the uberblock.\n");
+ return;
+ }
- if (byteswap)
- byteswap_uint64_array(&expected_cksum,
- sizeof (zio_cksum_t));
- if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
- continue;
+ ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
- eck->zec_cksum = actual_cksum;
+ if (zhack_repair_write_label(l, fd, byteswap,
+ ub_data, ub_eck,
+ label_offset + offsetof(vdev_label_t, vl_uberblock),
+ ASHIFT_UBERBLOCK_SIZE(ashift)))
+ labels_repaired[l] |= REPAIR_LABEL_STATUS_UB;
+}
- err = pwrite64(fd, data, VDEV_PHYS_SIZE, offset);
- if (err == -1) {
- (void) fprintf(stderr, "error: cannot write "
- "label %d: %s\n", l, strerror(errno));
- continue;
- } else if (err != VDEV_PHYS_SIZE) {
- (void) fprintf(stderr, "error: bad write size "
- "label %d\n", l);
- continue;
+static void
+zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum)
+{
+ (void) fprintf(stream,
+ "%016llx:%016llx:%016llx:%016llx",
+ (u_longlong_t)cksum->zc_word[0],
+ (u_longlong_t)cksum->zc_word[1],
+ (u_longlong_t)cksum->zc_word[2],
+ (u_longlong_t)cksum->zc_word[3]);
+}
+
+static int
+zhack_repair_test_cksum(const int byteswap, void *vdev_data,
+ zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l)
+{
+ const zio_cksum_t expected_cksum = vdev_eck->zec_cksum;
+ zio_cksum_t actual_cksum;
+ zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset,
+ VDEV_PHYS_SIZE, vdev_eck, &actual_cksum);
+ const uint64_t expected_magic = byteswap ?
+ BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
+ const uint64_t actual_magic = vdev_eck->zec_magic;
+ int err = 0;
+ if (actual_magic != expected_magic) {
+ (void) fprintf(stderr, "error: label %d: "
+ "Expected "
+ "the nvlist checksum magic number to not be %"
+ PRIu64 " not %" PRIu64 "\n",
+ l, expected_magic, actual_magic);
+ err = ECKSUM;
+ }
+ if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) {
+ (void) fprintf(stderr, "error: label %d: "
+ "Expected the nvlist checksum to be ", l);
+ (void) zhack_repair_print_cksum(stderr,
+ &expected_cksum);
+ (void) fprintf(stderr, " not ");
+ zhack_repair_print_cksum(stderr, &actual_cksum);
+ (void) fprintf(stderr, "\n");
+ err = ECKSUM;
+ }
+ return (err);
+}
+
+static void
+zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
+ vdev_label_t *vl, const uint64_t label_offset, const int l,
+ uint32_t *labels_repaired)
+{
+ ssize_t err;
+ uberblock_t *ub = (uberblock_t *)vl->vl_uberblock;
+ void *vdev_data =
+ (char *)vl + offsetof(vdev_label_t, vl_vdev_phys);
+ zio_eck_t *vdev_eck =
+ (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1;
+ const uint64_t vdev_phys_offset =
+ label_offset + offsetof(vdev_label_t, vl_vdev_phys);
+ const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
+ ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
+ nvlist_t *cfg;
+ nvlist_t *vdev_tree_cfg = NULL;
+ uint64_t ashift;
+ int byteswap;
+
+ err = zhack_repair_read_label(fd, vl, label_offset, l);
+ if (err)
+ return;
+
+ if (vdev_eck->zec_magic == 0) {
+ (void) fprintf(stderr, "error: label %d: "
+ "Expected the nvlist checksum magic number to not be zero"
+ "\n",
+ l);
+ (void) fprintf(stderr, "There should already be a checksum "
+ "for the label.\n");
+ return;
+ }
+
+ byteswap =
+ (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC));
+
+ if (byteswap) {
+ byteswap_uint64_array(&vdev_eck->zec_cksum,
+ sizeof (zio_cksum_t));
+ vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic);
+ }
+
+ if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 &&
+ zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck,
+ vdev_phys_offset, l) != 0) {
+ (void) fprintf(stderr, "It would appear checksums are "
+ "corrupted. Try zhack repair label -c <device>\n");
+ return;
+ }
+
+ err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
+ VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0);
+ if (err) {
+ (void) fprintf(stderr,
+ "error: cannot unpack nvlist label %d\n", l);
+ return;
+ }
+
+ err = zhack_repair_check_label(ub,
+ l, cfg_keys, ARRAY_SIZE(cfg_keys), cfg, vdev_tree_cfg, &ashift);
+ if (err)
+ return;
+
+ if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) {
+ char *buf;
+ size_t buflen;
+
+ err = zhack_repair_undetach(ub, cfg, l);
+ if (err)
+ return;
+
+ buf = vl->vl_vdev_phys.vp_nvlist;
+ buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t);
+ if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) {
+ (void) fprintf(stderr,
+ "error: label %d: Failed to pack nvlist\n", l);
+ return;
}
- fsync(fd);
+ zhack_repair_write_uberblock(vl,
+ l, ashift, fd, byteswap, label_offset, labels_repaired);
+ }
+
+ if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck,
+ vdev_phys_offset, VDEV_PHYS_SIZE))
+ labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM;
+
+ fsync(fd);
+}
+
+static const char *
+zhack_repair_label_status(const uint32_t label_status,
+ const uint32_t to_check)
+{
+ return ((label_status & to_check) != 0 ? "repaired" : "skipped");
+}
+
+static int
+zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv)
+{
+ uint32_t labels_repaired[VDEV_LABELS] = {0};
+ vdev_label_t labels[VDEV_LABELS] = {{{0}}};
+ struct stat64 st;
+ int fd;
+ off_t filesize;
+ uint32_t repaired = 0;
+
+ abd_init();
+
+ if (argc < 1) {
+ (void) fprintf(stderr, "error: missing device\n");
+ usage();
+ }
+
+ if ((fd = open(argv[0], O_RDWR)) == -1)
+ fatal(NULL, FTAG, "cannot open '%s': %s", argv[0],
+ strerror(errno));
+
+ if (fstat64_blk(fd, &st) != 0)
+ fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0],
+ strerror(errno));
+
+ filesize = st.st_size;
+ (void) fprintf(stderr, "Calculated filesize to be %jd\n",
+ (intmax_t)filesize);
+
+ if (filesize % sizeof (vdev_label_t) != 0)
+ filesize =
+ (filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t);
- labels_repaired[l] = B_TRUE;
+ for (int l = 0; l < VDEV_LABELS; l++) {
+ zhack_repair_one_label(op, fd, &labels[l],
+ vdev_label_offset(filesize, l, 0), l, labels_repaired);
}
close(fd);
@@ -605,18 +893,52 @@ zhack_repair_label_cksum(int argc, char **argv)
abd_fini();
for (int l = 0; l < VDEV_LABELS; l++) {
- (void) printf("label %d: %s\n", l,
- labels_repaired[l] ? "repaired" : "skipped");
- repaired |= labels_repaired[l];
+ const uint32_t lr = labels_repaired[l];
+ (void) printf("label %d: ", l);
+ (void) printf("uberblock: %s ",
+ zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB));
+ (void) printf("checksum: %s\n",
+ zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM));
+ repaired |= lr;
}
- if (repaired)
+ if (repaired > 0)
return (0);
return (1);
}
static int
+zhack_do_label_repair(int argc, char **argv)
+{
+ zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN;
+ int c;
+
+ optind = 1;
+ while ((c = getopt(argc, argv, "+cu")) != -1) {
+ switch (c) {
+ case 'c':
+ op |= ZHACK_REPAIR_OP_CKSUM;
+ break;
+ case 'u':
+ op |= ZHACK_REPAIR_OP_UNDETACH;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (op == ZHACK_REPAIR_OP_UNKNOWN)
+ op = ZHACK_REPAIR_OP_CKSUM;
+
+ return (zhack_label_repair(op, argc, argv));
+}
+
+static int
zhack_do_label(int argc, char **argv)
{
char *subcommand;
@@ -632,7 +954,7 @@ zhack_do_label(int argc, char **argv)
subcommand = argv[0];
if (strcmp(subcommand, "repair") == 0) {
- err = zhack_repair_label_cksum(argc, argv);
+ err = zhack_do_label_repair(argc, argv);
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
diff --git a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
index 27c7d63781c5..979546ab3090 100644
--- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
+++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
@@ -587,16 +587,11 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh,
errno);
return (-1);
}
- size_t runtime_path_len = strlen(runtime_path);
- size_t counter_path_len = runtime_path_len + 1 + 10;
- char *counter_path = malloc(counter_path_len + 1);
- if (!counter_path) {
+
+ char *counter_path;
+ if (asprintf(&counter_path, "%s/%u", runtime_path, config->uid) == -1)
return (-1);
- }
- counter_path[0] = 0;
- strcat(counter_path, runtime_path);
- snprintf(counter_path + runtime_path_len, counter_path_len, "/%d",
- config->uid);
+
const int fd = open(counter_path,
O_RDWR | O_CLOEXEC | O_CREAT | O_NOFOLLOW,
S_IRUSR | S_IWUSR);
diff --git a/sys/contrib/openzfs/include/sys/dbuf.h b/sys/contrib/openzfs/include/sys/dbuf.h
index fb26a83b1844..1800a7e31da0 100644
--- a/sys/contrib/openzfs/include/sys/dbuf.h
+++ b/sys/contrib/openzfs/include/sys/dbuf.h
@@ -61,16 +61,18 @@ extern "C" {
/*
* The simplified state transition diagram for dbufs looks like:
*
- * +----> READ ----+
- * | |
- * | V
- * (alloc)-->UNCACHED CACHED-->EVICTING-->(free)
- * | ^ ^
- * | | |
- * +----> FILL ----+ |
- * | |
- * | |
- * +--------> NOFILL -------+
+ * +--> READ --+
+ * | |
+ * | V
+ * (alloc)-->UNCACHED CACHED-->EVICTING-->(free)
+ * ^ | ^ ^
+ * | | | |
+ * | +--> FILL --+ |
+ * | | |
+ * | | |
+ * | +------> NOFILL -----+
+ * | |
+ * +---------------+
*
* DB_SEARCH is an invalid state for a dbuf. It is used by dbuf_free_range
* to find all dbufs in a range of a dnode and must be less than any other
@@ -375,6 +377,7 @@ dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
uint64_t blkid, uint64_t *hash_out);
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
+void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
diff --git a/sys/contrib/openzfs/include/sys/dmu.h b/sys/contrib/openzfs/include/sys/dmu.h
index 1b82ff620f27..5ee6704668a4 100644
--- a/sys/contrib/openzfs/include/sys/dmu.h
+++ b/sys/contrib/openzfs/include/sys/dmu.h
@@ -782,6 +782,9 @@ dmu_tx_t *dmu_tx_create(objset_t *os);
void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
void dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
int len);
+void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
+void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
+ int len);
void dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
int len);
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
@@ -1062,8 +1065,8 @@ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
uint64_t *off);
int dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset,
- uint64_t length, dmu_tx_t *tx, struct blkptr *bps, size_t *nbpsp);
-void dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset,
+ uint64_t length, struct blkptr *bps, size_t *nbpsp);
+int dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset,
uint64_t length, dmu_tx_t *tx, const struct blkptr *bps, size_t nbps,
boolean_t replay);
diff --git a/sys/contrib/openzfs/include/sys/dmu_tx.h b/sys/contrib/openzfs/include/sys/dmu_tx.h
index ca8514e5d2d0..aa55da626149 100644
--- a/sys/contrib/openzfs/include/sys/dmu_tx.h
+++ b/sys/contrib/openzfs/include/sys/dmu_tx.h
@@ -91,6 +91,7 @@ enum dmu_tx_hold_type {
THT_SPACE,
THT_SPILL,
THT_CLONE,
+ THT_APPEND,
THT_NUMTYPES
};
diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h
index b96a9ef1d42f..460ea2bfee4e 100644
--- a/sys/contrib/openzfs/include/sys/spa.h
+++ b/sys/contrib/openzfs/include/sys/spa.h
@@ -1140,7 +1140,8 @@ extern const char *spa_state_to_name(spa_t *spa);
struct zbookmark_phys;
extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb,
const uint64_t *birth);
-extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb);
+extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb,
+ const uint64_t *birth);
extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);
extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,
diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h
index 3463682a1065..695bc09e6cb7 100644
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@@ -531,6 +531,12 @@ enum blk_verify_flag {
BLK_VERIFY_HALT
};
+enum blk_config_flag {
+ BLK_CONFIG_HELD, // SCL_VDEV held for writer
+ BLK_CONFIG_NEEDED, // SCL_VDEV should be obtained for reader
+ BLK_CONFIG_SKIP, // skip checks which require SCL_VDEV
+};
+
extern int zio_bookmark_compare(const void *, const void *);
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
@@ -646,7 +652,7 @@ extern int zio_resume(spa_t *spa);
extern void zio_resume_wait(spa_t *spa);
extern boolean_t zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
- boolean_t config_held, enum blk_verify_flag blk_verify);
+ enum blk_config_flag blk_config, enum blk_verify_flag blk_verify);
/*
* Initial setup and teardown.
diff --git a/sys/contrib/openzfs/man/man1/zhack.1 b/sys/contrib/openzfs/man/man1/zhack.1
index 26b8156b4008..937f1e9168c2 100644
--- a/sys/contrib/openzfs/man/man1/zhack.1
+++ b/sys/contrib/openzfs/man/man1/zhack.1
@@ -98,10 +98,29 @@ feature is now required to read the pool MOS.
.It Xo
.Nm zhack
.Cm label repair
+.Op Fl cu
.Ar device
.Xc
-Repair corrupted labels by rewriting the checksum using the presumed valid
-contents of the label.
+Repair labels of a specified
+.Ar device
+according to options.
+.Pp
+Flags may be combined to do their functions simultaneously.
+.
+.Pp
+The
+.Fl c
+flag repairs corrupted label checksums
+.
+.Pp
+The
+.Fl u
+flag restores the label on a detached device
+.Pp
+Example:
+.Nm zhack Cm label repair Fl cu Ar device
+ Fix checksums and undetach a device
+.
.El
.
.Sh GLOBAL OPTIONS
diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7
index efe9e833996a..2b7dcb63829c 100644
--- a/sys/contrib/openzfs/man/man7/zpool-features.7
+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
@@ -562,13 +562,12 @@ This feature enables the upgraded version of errlog, which required an on-disk
error log format change.
Now the error log of each head dataset is stored separately in the zap object
and keyed by the head id.
-In case of encrypted filesystems with unloaded keys or unmounted encrypted
-filesystems we are unable to check their snapshots or clones for errors and
-these will not be reported.
-In this case no filenames will be reported either.
With this feature enabled, every dataset affected by an error block is listed
in the output of
.Nm zpool Cm status .
+In case of encrypted filesystems with unloaded keys we are unable to check
+their snapshots or clones for errors and these will not be reported.
+An "access denied" error will be reported.
.Pp
\*[instant-never]
.
diff --git a/sys/contrib/openzfs/man/man8/zpool-status.8 b/sys/contrib/openzfs/man/man8/zpool-status.8
index ed572e29f51f..8f9580cf086e 100644
--- a/sys/contrib/openzfs/man/man8/zpool-status.8
+++ b/sys/contrib/openzfs/man/man8/zpool-status.8
@@ -119,6 +119,9 @@ See
.It Fl v
Displays verbose data error information, printing out a complete list of all
data errors since the last complete pool scrub.
+If the head_errlog feature is enabled and files containing errors have been
+removed then the respective filenames will not be reported in subsequent runs
+of this command.
.It Fl x
Only display status for pools that are exhibiting errors or are otherwise
unavailable.
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
index 26578491fd67..2520507b98aa 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
@@ -832,6 +832,7 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag)
(zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize))
return (SET_ERROR(EIO));
+ rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
ssize_t start_resid = zfs_uio_resid(&uio);
lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio),
zfs_uio_resid(&uio), RL_READER);
@@ -853,6 +854,7 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag)
zfs_rangelock_exit(lr);
int64_t nread = start_resid - zfs_uio_resid(&uio);
dataset_kstats_update_read_kstats(&zv->zv_kstat, nread);
+ rw_exit(&zv->zv_suspend_lock);
return (error);
}
diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c
index 26222d2efe3f..745ee8f02ed4 100644
--- a/sys/contrib/openzfs/module/zfs/abd.c
+++ b/sys/contrib/openzfs/module/zfs/abd.c
@@ -370,7 +370,20 @@ abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
* will retain all the free_on_free settings after being
* added to the parents list.
*/
+#ifdef ZFS_DEBUG
+ /*
+ * If cabd had abd_parent, we have to drop it here. We can't
+ * transfer it to pabd, nor we can clear abd_size leaving it.
+ */
+ if (cabd->abd_parent != NULL) {
+ (void) zfs_refcount_remove_many(
+ &cabd->abd_parent->abd_children,
+ cabd->abd_size, cabd);
+ cabd->abd_parent = NULL;
+ }
+#endif
pabd->abd_size += cabd->abd_size;
+ cabd->abd_size = 0;
list_move_tail(&ABD_GANG(pabd).abd_gang_chain,
&ABD_GANG(cabd).abd_gang_chain);
ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
@@ -408,7 +421,6 @@ abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
*/
if (abd_is_gang(cabd)) {
ASSERT(!list_link_active(&cabd->abd_gang_link));
- ASSERT(!list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
return (abd_gang_add_gang(pabd, cabd, free_on_free));
}
ASSERT(!abd_is_gang(cabd));
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index c50228a2682f..a78f664c4fe8 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -5696,8 +5696,8 @@ top:
* and treat it as a checksum error. This allows an alternate blkptr
* to be tried when one is available (e.g. ditto blocks).
*/
- if (!zfs_blkptr_verify(spa, bp, zio_flags & ZIO_FLAG_CONFIG_WRITER,
- BLK_VERIFY_LOG)) {
+ if (!zfs_blkptr_verify(spa, bp, (zio_flags & ZIO_FLAG_CONFIG_WRITER) ?
+ BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
rc = SET_ERROR(ECKSUM);
goto done;
}
@@ -8198,10 +8198,17 @@ l2arc_write_size(l2arc_dev_t *dev)
* iteration can occur.
*/
dev_size = dev->l2ad_end - dev->l2ad_start;
+
+ /* We need to add in the worst case scenario of log block overhead. */
tsize = size + l2arc_log_blk_overhead(size, dev);
- if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0)
+ if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
+ /*
+ * Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
+ * times the writesize, whichever is greater.
+ */
tsize += MAX(64 * 1024 * 1024,
(tsize * l2arc_trim_ahead) / 100);
+ }
if (tsize >= dev_size) {
cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
@@ -8836,19 +8843,6 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
buflist = &dev->l2ad_buflist;
- /*
- * We need to add in the worst case scenario of log block overhead.
- */
- distance += l2arc_log_blk_overhead(distance, dev);
- if (vd->vdev_has_trim && l2arc_trim_ahead > 0) {
- /*
- * Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
- * times the write size, whichever is greater.
- */
- distance += MAX(64 * 1024 * 1024,
- (distance * l2arc_trim_ahead) / 100);
- }
-
top:
rerun = B_FALSE;
if (dev->l2ad_hand >= (dev->l2ad_end - distance)) {
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index 8193fb244079..049a62c1c171 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -1573,24 +1573,22 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
bpp = &bp;
}
} else {
- struct dirty_leaf *dl;
dbuf_dirty_record_t *dr;
ASSERT3S(db->db_state, ==, DB_NOFILL);
+ /*
+ * Block cloning: If we have a pending block clone,
+ * we don't want to read the underlying block, but the content
+ * of the block being cloned, so we have the most recent data.
+ */
dr = list_head(&db->db_dirty_records);
- if (dr == NULL) {
+ if (dr == NULL || !dr->dt.dl.dr_brtwrite) {
err = EIO;
goto early_unlock;
- } else {
- dl = &dr->dt.dl;
- if (!dl->dr_brtwrite) {
- err = EIO;
- goto early_unlock;
- }
- bp = dl->dr_overridden_by;
- bpp = &bp;
}
+ bp = dr->dt.dl.dr_overridden_by;
+ bpp = &bp;
}
err = dbuf_read_hole(db, dn, bpp);
@@ -1906,6 +1904,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
dmu_buf_impl_t *db = dr->dr_dbuf;
blkptr_t *bp = &dr->dt.dl.dr_overridden_by;
uint64_t txg = dr->dr_txg;
+ boolean_t release;
ASSERT(MUTEX_HELD(&db->db_mtx));
/*
@@ -1926,8 +1925,10 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite)
zio_free(db->db_objset->os_spa, txg, bp);
+ release = !dr->dt.dl.dr_brtwrite;
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
dr->dt.dl.dr_nopwrite = B_FALSE;
+ dr->dt.dl.dr_brtwrite = B_FALSE;
dr->dt.dl.dr_has_raw_params = B_FALSE;
/*
@@ -1938,7 +1939,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
* the buf thawed to save the effort of freezing &
* immediately re-thawing it.
*/
- if (!dr->dt.dl.dr_brtwrite)
+ if (release)
arc_release(dr->dt.dl.dr_data, db);
}
@@ -2022,11 +2023,6 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
db->db_blkid > dn->dn_maxblkid)
dn->dn_maxblkid = db->db_blkid;
dbuf_unoverride(dr);
- if (dr->dt.dl.dr_brtwrite) {
- ASSERT(db->db.db_data == NULL);
- mutex_exit(&db->db_mtx);
- continue;
- }
} else {
/*
* This dbuf is not dirty in the open context.
@@ -2613,6 +2609,7 @@ static void
dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+ boolean_t undirty = B_FALSE;
ASSERT(tx->tx_txg != 0);
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
@@ -2625,7 +2622,7 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
*/
mutex_enter(&db->db_mtx);
- if (db->db_state == DB_CACHED) {
+ if (db->db_state == DB_CACHED || db->db_state == DB_NOFILL) {
dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
/*
* It's possible that it is already dirty but not cached,
@@ -2633,10 +2630,21 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
* go through dmu_buf_will_dirty().
*/
if (dr != NULL) {
- /* This dbuf is already dirty and cached. */
- dbuf_redirty(dr);
- mutex_exit(&db->db_mtx);
- return;
+ if (dr->dt.dl.dr_brtwrite) {
+ /*
+ * Block cloning: If we are dirtying a cloned
+ * block, we cannot simply redirty it, because
+ * this dr has no data associated with it.
+ * We will go through a full undirtying below,
+ * before dirtying it again.
+ */
+ undirty = B_TRUE;
+ } else {
+ /* This dbuf is already dirty and cached. */
+ dbuf_redirty(dr);
+ mutex_exit(&db->db_mtx);
+ return;
+ }
}
}
mutex_exit(&db->db_mtx);
@@ -2645,7 +2653,20 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
flags |= DB_RF_HAVESTRUCT;
DB_DNODE_EXIT(db);
+
+ /*
+ * Block cloning: Do the dbuf_read() before undirtying the dbuf, as we
+ * want to make sure dbuf_read() will read the pending cloned block and
+ * not the uderlying block that is being replaced. dbuf_undirty() will
+ * do dbuf_unoverride(), so we will end up with cloned block content,
+ * without overridden BP.
+ */
(void) dbuf_read(db, NULL, flags);
+ if (undirty) {
+ mutex_enter(&db->db_mtx);
+ VERIFY(!dbuf_undirty(db, tx));
+ mutex_exit(&db->db_mtx);
+ }
(void) dbuf_dirty(db, tx);
}
@@ -2669,13 +2690,37 @@ dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
}
void
+dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx)
+{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+
+ /*
+ * Block cloning: We are going to clone into this block, so undirty
+ * modifications done to this block so far in this txg. This includes
+ * writes and clones into this block.
+ */
+ mutex_enter(&db->db_mtx);
+ VERIFY(!dbuf_undirty(db, tx));
+ ASSERT(list_head(&db->db_dirty_records) == NULL);
+ if (db->db_buf != NULL) {
+ arc_buf_destroy(db->db_buf, db);
+ db->db_buf = NULL;
+ }
+ mutex_exit(&db->db_mtx);
+
+ dmu_buf_will_not_fill(db_fake, tx);
+}
+
+void
dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
db->db_state = DB_NOFILL;
DTRACE_SET_STATE(db, "allocating NOFILL buffer");
- dmu_buf_will_fill(db_fake, tx);
+
+ dbuf_noread(db);
+ (void) dbuf_dirty(db, tx);
}
void
@@ -2691,6 +2736,19 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT ||
dmu_tx_private_ok(tx));
+ if (db->db_state == DB_NOFILL) {
+ /*
+ * Block cloning: We will be completely overwriting a block
+ * cloned in this transaction group, so let's undirty the
+ * pending clone and mark the block as uncached. This will be
+ * as if the clone was never done.
+ */
+ mutex_enter(&db->db_mtx);
+ VERIFY(!dbuf_undirty(db, tx));
+ mutex_exit(&db->db_mtx);
+ db->db_state = DB_UNCACHED;
+ }
+
dbuf_noread(db);
(void) dbuf_dirty(db, tx);
}
@@ -4636,6 +4694,20 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
i += DNODE_MIN_SIZE;
if (dnp->dn_type != DMU_OT_NONE) {
fill++;
+ for (int j = 0; j < dnp->dn_nblkptr;
+ j++) {
+ (void) zfs_blkptr_verify(spa,
+ &dnp->dn_blkptr[j],
+ BLK_CONFIG_SKIP,
+ BLK_VERIFY_HALT);
+ }
+ if (dnp->dn_flags &
+ DNODE_FLAG_SPILL_BLKPTR) {
+ (void) zfs_blkptr_verify(spa,
+ DN_SPILL_BLKPTR(dnp),
+ BLK_CONFIG_SKIP,
+ BLK_VERIFY_HALT);
+ }
i += dnp->dn_extra_slots *
DNODE_MIN_SIZE;
}
@@ -4653,6 +4725,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) {
if (BP_IS_HOLE(ibp))
continue;
+ (void) zfs_blkptr_verify(spa, ibp,
+ BLK_CONFIG_SKIP, BLK_VERIFY_HALT);
fill += BP_GET_FILL(ibp);
}
}
@@ -5139,6 +5213,7 @@ EXPORT_SYMBOL(dbuf_dirty);
EXPORT_SYMBOL(dmu_buf_set_crypt_params);
EXPORT_SYMBOL(dmu_buf_will_dirty);
EXPORT_SYMBOL(dmu_buf_is_dirty);
+EXPORT_SYMBOL(dmu_buf_will_clone);
EXPORT_SYMBOL(dmu_buf_will_not_fill);
EXPORT_SYMBOL(dmu_buf_will_fill);
EXPORT_SYMBOL(dmu_buf_fill_done);
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index cda1472a77aa..8a13b8f410a1 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -2173,7 +2173,7 @@ restart:
int
dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
- dmu_tx_t *tx, blkptr_t *bps, size_t *nbpsp)
+ blkptr_t *bps, size_t *nbpsp)
{
dmu_buf_t **dbp, *dbuf;
dmu_buf_impl_t *db;
@@ -2197,10 +2197,6 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
mutex_enter(&db->db_mtx);
- /*
- * If the block is not on the disk yet, it has no BP assigned.
- * There is not much we can do...
- */
if (!list_is_empty(&db->db_dirty_records)) {
dbuf_dirty_record_t *dr;
@@ -2235,10 +2231,6 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
error = SET_ERROR(EAGAIN);
goto out;
}
- if (dmu_buf_is_dirty(dbuf, tx)) {
- error = SET_ERROR(EAGAIN);
- goto out;
- }
/*
* Make sure we clone only data blocks.
*/
@@ -2257,7 +2249,7 @@ out:
return (error);
}
-void
+int
dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
dmu_tx_t *tx, const blkptr_t *bps, size_t nbps, boolean_t replay)
{
@@ -2267,7 +2259,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
struct dirty_leaf *dl;
dbuf_dirty_record_t *dr;
const blkptr_t *bp;
- int numbufs;
+ int error = 0, i, numbufs;
spa = os->os_spa;
@@ -2275,27 +2267,37 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
&numbufs, &dbp));
ASSERT3U(nbps, ==, numbufs);
- for (int i = 0; i < numbufs; i++) {
+ /*
+ * Before we start cloning make sure that the dbufs sizes match new BPs
+ * sizes. If they don't, that's a no-go, as we are not able to shrink
+ * dbufs.
+ */
+ for (i = 0; i < numbufs; i++) {
dbuf = dbp[i];
db = (dmu_buf_impl_t *)dbuf;
bp = &bps[i];
ASSERT0(db->db_level);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
- ASSERT(BP_IS_HOLE(bp) || dbuf->db_size == BP_GET_LSIZE(bp));
-
- mutex_enter(&db->db_mtx);
+ ASSERT(db->db_blkid != DMU_SPILL_BLKID);
- VERIFY(!dbuf_undirty(db, tx));
- ASSERT(list_head(&db->db_dirty_records) == NULL);
- if (db->db_buf != NULL) {
- arc_buf_destroy(db->db_buf, db);
- db->db_buf = NULL;
+ if (!BP_IS_HOLE(bp) && BP_GET_LSIZE(bp) != dbuf->db_size) {
+ error = SET_ERROR(EXDEV);
+ goto out;
}
+ }
- mutex_exit(&db->db_mtx);
+ for (i = 0; i < numbufs; i++) {
+ dbuf = dbp[i];
+ db = (dmu_buf_impl_t *)dbuf;
+ bp = &bps[i];
- dmu_buf_will_not_fill(dbuf, tx);
+ ASSERT0(db->db_level);
+ ASSERT(db->db_blkid != DMU_BONUS_BLKID);
+ ASSERT(db->db_blkid != DMU_SPILL_BLKID);
+ ASSERT(BP_IS_HOLE(bp) || dbuf->db_size == BP_GET_LSIZE(bp));
+
+ dmu_buf_will_clone(dbuf, tx);
mutex_enter(&db->db_mtx);
@@ -2305,7 +2307,6 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
dl = &dr->dt.dl;
dl->dr_overridden_by = *bp;
dl->dr_brtwrite = B_TRUE;
-
dl->dr_override_state = DR_OVERRIDDEN;
if (BP_IS_HOLE(bp)) {
dl->dr_overridden_by.blk_birth = 0;
@@ -2331,8 +2332,10 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
brt_pending_add(spa, bp, tx);
}
}
-
+out:
dmu_buf_rele_array(dbp, numbufs, FTAG);
+
+ return (error);
}
void
diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c
index c2ce5ce000ac..c22a95f8647f 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_recv.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c
@@ -1353,7 +1353,7 @@ corrective_read_done(zio_t *zio)
cr_cb_data_t *data = zio->io_private;
/* Corruption corrected; update error log if needed */
if (zio->io_error == 0)
- spa_remove_error(data->spa, &data->zb);
+ spa_remove_error(data->spa, &data->zb, &zio->io_bp->blk_birth);
kmem_free(data, sizeof (cr_cb_data_t));
abd_free(zio->io_abd);
}
diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c
index 1c5608c4541b..c4e274bd4c42 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_tx.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c
@@ -295,6 +295,53 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
}
static void
+dmu_tx_count_append(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
+{
+ dnode_t *dn = txh->txh_dnode;
+ int err = 0;
+
+ if (len == 0)
+ return;
+
+ (void) zfs_refcount_add_many(&txh->txh_space_towrite, len, FTAG);
+
+ if (dn == NULL)
+ return;
+
+ /*
+ * For i/o error checking, read the blocks that will be needed
+ * to perform the append; first level-0 block (if not aligned, i.e.
+ * if they are partial-block writes), no additional blocks are read.
+ */
+ if (dn->dn_maxblkid == 0) {
+ if (off < dn->dn_datablksz &&
+ (off > 0 || len < dn->dn_datablksz)) {
+ err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
+ if (err != 0) {
+ txh->txh_tx->tx_err = err;
+ }
+ }
+ } else {
+ zio_t *zio = zio_root(dn->dn_objset->os_spa,
+ NULL, NULL, ZIO_FLAG_CANFAIL);
+
+ /* first level-0 block */
+ uint64_t start = off >> dn->dn_datablkshift;
+ if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) {
+ err = dmu_tx_check_ioerr(zio, dn, 0, start);
+ if (err != 0) {
+ txh->txh_tx->tx_err = err;
+ }
+ }
+
+ err = zio_wait(zio);
+ if (err != 0) {
+ txh->txh_tx->tx_err = err;
+ }
+ }
+}
+
+static void
dmu_tx_count_dnode(dmu_tx_hold_t *txh)
{
(void) zfs_refcount_add_many(&txh->txh_space_towrite,
@@ -335,6 +382,42 @@ dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
}
/*
+ * Should be used when appending to an object and the exact offset is unknown.
+ * The write must occur at or beyond the specified offset. Only the L0 block
+ * at provided offset will be prefetched.
+ */
+void
+dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
+{
+ dmu_tx_hold_t *txh;
+
+ ASSERT0(tx->tx_txg);
+ ASSERT3U(len, <=, DMU_MAX_ACCESS);
+
+ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+ object, THT_APPEND, off, DMU_OBJECT_END);
+ if (txh != NULL) {
+ dmu_tx_count_append(txh, off, len);
+ dmu_tx_count_dnode(txh);
+ }
+}
+
+void
+dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
+{
+ dmu_tx_hold_t *txh;
+
+ ASSERT0(tx->tx_txg);
+ ASSERT3U(len, <=, DMU_MAX_ACCESS);
+
+ txh = dmu_tx_hold_dnode_impl(tx, dn, THT_APPEND, off, DMU_OBJECT_END);
+ if (txh != NULL) {
+ dmu_tx_count_append(txh, off, len);
+ dmu_tx_count_dnode(txh);
+ }
+}
+
+/*
* This function marks the transaction as being a "net free". The end
* result is that refquotas will be disabled for this transaction, and
* this transaction will be able to use half of the pool space overhead
@@ -668,6 +751,26 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
if (blkid == 0)
match_offset = TRUE;
break;
+ case THT_APPEND:
+ if (blkid >= beginblk && (blkid <= endblk ||
+ txh->txh_arg2 == DMU_OBJECT_END))
+ match_offset = TRUE;
+
+ /*
+ * THT_WRITE used for bonus and spill blocks.
+ */
+ ASSERT(blkid != DMU_BONUS_BLKID &&
+ blkid != DMU_SPILL_BLKID);
+
+ /*
+ * They might have to increase nlevels,
+ * thus dirtying the new TLIBs. Or the
+ * might have to change the block size,
+ * thus dirying the new lvl=0 blk=0.
+ */
+ if (blkid == 0)
+ match_offset = TRUE;
+ break;
case THT_FREE:
/*
* We will dirty all the level 1 blocks in
@@ -1454,6 +1557,8 @@ dmu_tx_fini(void)
EXPORT_SYMBOL(dmu_tx_create);
EXPORT_SYMBOL(dmu_tx_hold_write);
EXPORT_SYMBOL(dmu_tx_hold_write_by_dnode);
+EXPORT_SYMBOL(dmu_tx_hold_append);
+EXPORT_SYMBOL(dmu_tx_hold_append_by_dnode);
EXPORT_SYMBOL(dmu_tx_hold_free);
EXPORT_SYMBOL(dmu_tx_hold_free_by_dnode);
EXPORT_SYMBOL(dmu_tx_hold_zap);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index d6a9365df120..d398b6705551 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -1970,7 +1970,8 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
DMU_USERUSED_OBJECT, tx);
}
arc_buf_destroy(buf, &buf);
- } else if (!zfs_blkptr_verify(spa, bp, B_FALSE, BLK_VERIFY_LOG)) {
+ } else if (!zfs_blkptr_verify(spa, bp,
+ BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
/*
* Sanity check the block pointer contents, this is handled
* by arc_read() for the cases above.
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index dd4a442d97a1..16396170273c 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -2387,7 +2387,7 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
* When damaged consider it to be a metadata error since we cannot
* trust the BP_GET_TYPE and BP_GET_LEVEL values.
*/
- if (!zfs_blkptr_verify(spa, bp, B_FALSE, BLK_VERIFY_LOG)) {
+ if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
atomic_inc_64(&sle->sle_meta_count);
return (0);
}
@@ -6378,6 +6378,16 @@ spa_tryimport(nvlist_t *tryconfig)
spa->spa_config_source = SPA_CONFIG_SRC_SCAN;
}
+ /*
+ * spa_import() relies on a pool config fetched by spa_try_import()
+ * for spare/cache devices. Import flags are not passed to
+ * spa_tryimport(), which makes it return early due to a missing log
+ * device and missing retrieving the cache device and spare eventually.
+ * Passing ZFS_IMPORT_MISSING_LOG to spa_tryimport() makes it fetch
+ * the correct configuration regardless of the missing log device.
+ */
+ spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG;
+
error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING);
/*
diff --git a/sys/contrib/openzfs/module/zfs/spa_errlog.c b/sys/contrib/openzfs/module/zfs/spa_errlog.c
index 3bc8619b51a8..31719063a227 100644
--- a/sys/contrib/openzfs/module/zfs/spa_errlog.c
+++ b/sys/contrib/openzfs/module/zfs/spa_errlog.c
@@ -72,6 +72,11 @@
#define NAME_MAX_LEN 64
+typedef struct clones {
+ uint64_t clone_ds;
+ list_node_t node;
+} clones_t;
+
/*
* spa_upgrade_errlog_limit : A zfs module parameter that controls the number
* of on-disk error log entries that will be converted to the new
@@ -135,10 +140,6 @@ name_to_bookmark(char *buf, zbookmark_phys_t *zb)
}
#ifdef _KERNEL
-static int check_clones(spa_t *spa, uint64_t zap_clone, uint64_t snap_count,
- uint64_t *snap_obj_array, zbookmark_err_phys_t *zep, void* uaddr,
- uint64_t *count);
-
static void
zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep, zbookmark_phys_t *zb)
{
@@ -162,15 +163,15 @@ name_to_object(char *buf, uint64_t *obj)
static int get_head_ds(spa_t *spa, uint64_t dsobj, uint64_t *head_ds)
{
dsl_dataset_t *ds;
- int error = dsl_dataset_hold_obj(spa->spa_dsl_pool,
- dsobj, FTAG, &ds);
+ int error = dsl_dataset_hold_obj_flags(spa->spa_dsl_pool,
+ dsobj, DS_HOLD_FLAG_DECRYPT, FTAG, &ds);
if (error != 0)
return (error);
ASSERT(head_ds);
*head_ds = dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj;
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
return (error);
}
@@ -291,12 +292,13 @@ copyout_entry(const zbookmark_phys_t *zb, void *uaddr, uint64_t *count)
*/
static int
check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
- void *uaddr, uint64_t *count)
+ void *uaddr, uint64_t *count, list_t *clones_list)
{
dsl_dataset_t *ds;
dsl_pool_t *dp = spa->spa_dsl_pool;
- int error = dsl_dataset_hold_obj(dp, head_ds, FTAG, &ds);
+ int error = dsl_dataset_hold_obj_flags(dp, head_ds,
+ DS_HOLD_FLAG_DECRYPT, FTAG, &ds);
if (error != 0)
return (error);
@@ -306,23 +308,6 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
error = find_birth_txg(ds, zep, &latest_txg);
/*
- * If the filesystem is encrypted and the key is not loaded
- * or the encrypted filesystem is not mounted the error will be EACCES.
- * In that case report an error in the head filesystem and return.
- */
- if (error == EACCES) {
- dsl_dataset_rele(ds, FTAG);
- zbookmark_phys_t zb;
- zep_to_zb(head_ds, zep, &zb);
- error = copyout_entry(&zb, uaddr, count);
- if (error != 0) {
- dsl_dataset_rele(ds, FTAG);
- return (error);
- }
- return (0);
- }
-
- /*
* If find_birth_txg() errors out otherwise, let txg_to_consider be
* equal to the spa's syncing txg: if check_filesystem() errors out
* then affected snapshots or clones will not be checked.
@@ -333,7 +318,7 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
zep_to_zb(head_ds, zep, &zb);
error = copyout_entry(&zb, uaddr, count);
if (error != 0) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
return (error);
}
check_snapshot = B_FALSE;
@@ -351,14 +336,14 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
if (error != 0) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
return (error);
}
}
if (snap_count == 0) {
/* Filesystem without snapshots. */
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
return (0);
}
@@ -370,20 +355,21 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
uint64_t snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
uint64_t zap_clone = dsl_dir_phys(ds->ds_dir)->dd_clones;
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
/* Check only snapshots created from this file system. */
while (snap_obj != 0 && zep->zb_birth < snap_obj_txg &&
snap_obj_txg <= txg_to_consider) {
- error = dsl_dataset_hold_obj(dp, snap_obj, FTAG, &ds);
+ error = dsl_dataset_hold_obj_flags(dp, snap_obj,
+ DS_HOLD_FLAG_DECRYPT, FTAG, &ds);
if (error != 0)
goto out;
if (dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj != head_ds) {
snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
continue;
}
@@ -403,33 +389,20 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
zep_to_zb(snap_obj, zep, &zb);
error = copyout_entry(&zb, uaddr, count);
if (error != 0) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT,
+ FTAG);
goto out;
}
}
snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
- dsl_dataset_rele(ds, FTAG);
- }
-
- if (zap_clone != 0 && aff_snap_count > 0) {
- error = check_clones(spa, zap_clone, snap_count, snap_obj_array,
- zep, uaddr, count);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
}
-out:
- kmem_free(snap_obj_array, sizeof (*snap_obj_array));
- return (error);
-}
+ if (zap_clone == 0 || aff_snap_count == 0)
+ return (0);
-/*
- * Clone checking.
- */
-static int check_clones(spa_t *spa, uint64_t zap_clone, uint64_t snap_count,
- uint64_t *snap_obj_array, zbookmark_err_phys_t *zep, void* uaddr,
- uint64_t *count)
-{
- int error = 0;
+ /* Check clones. */
zap_cursor_t *zc;
zap_attribute_t *za;
@@ -440,10 +413,9 @@ static int check_clones(spa_t *spa, uint64_t zap_clone, uint64_t snap_count,
zap_cursor_retrieve(zc, za) == 0;
zap_cursor_advance(zc)) {
- dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_dataset_t *clone;
- error = dsl_dataset_hold_obj(dp, za->za_first_integer,
- FTAG, &clone);
+ error = dsl_dataset_hold_obj_flags(dp, za->za_first_integer,
+ DS_HOLD_FLAG_DECRYPT, FTAG, &clone);
if (error != 0)
break;
@@ -458,22 +430,22 @@ static int check_clones(spa_t *spa, uint64_t zap_clone, uint64_t snap_count,
== snap_obj_array[i])
found = B_TRUE;
}
- dsl_dataset_rele(clone, FTAG);
+ dsl_dataset_rele_flags(clone, DS_HOLD_FLAG_DECRYPT, FTAG);
if (!found)
continue;
- error = check_filesystem(spa, za->za_first_integer, zep,
- uaddr, count);
-
- if (error != 0)
- break;
+ clones_t *ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
+ ct->clone_ds = za->za_first_integer;
+ list_insert_tail(clones_list, ct);
}
zap_cursor_fini(zc);
kmem_free(za, sizeof (*za));
kmem_free(zc, sizeof (*zc));
+out:
+ kmem_free(snap_obj_array, sizeof (*snap_obj_array));
return (error);
}
@@ -488,14 +460,14 @@ find_top_affected_fs(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
return (error);
dsl_dataset_t *ds;
- error = dsl_dataset_hold_obj(spa->spa_dsl_pool, oldest_dsobj,
- FTAG, &ds);
+ error = dsl_dataset_hold_obj_flags(spa->spa_dsl_pool, oldest_dsobj,
+ DS_HOLD_FLAG_DECRYPT, FTAG, &ds);
if (error != 0)
return (error);
*top_affected_fs =
dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj;
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
return (0);
}
@@ -521,10 +493,43 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
}
uint64_t top_affected_fs;
+ uint64_t init_count = *count;
int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs);
if (error == 0) {
+ clones_t *ct;
+ list_t clones_list;
+
+ list_create(&clones_list, sizeof (clones_t),
+ offsetof(clones_t, node));
+
error = check_filesystem(spa, top_affected_fs, zep,
- uaddr, count);
+ uaddr, count, &clones_list);
+
+ while ((ct = list_remove_head(&clones_list)) != NULL) {
+ error = check_filesystem(spa, ct->clone_ds, zep,
+ uaddr, count, &clones_list);
+ kmem_free(ct, sizeof (*ct));
+
+ if (error) {
+ while (!list_is_empty(&clones_list)) {
+ ct = list_remove_head(&clones_list);
+ kmem_free(ct, sizeof (*ct));
+ }
+ break;
+ }
+ }
+
+ list_destroy(&clones_list);
+ }
+ if (error == 0 && init_count == *count) {
+ /*
+ * If we reach this point, no errors have been detected
+ * in the checked filesystems/snapshots. Before returning mark
+ * the error block to be removed from the error lists and logs.
+ */
+ zbookmark_phys_t zb;
+ zep_to_zb(head_ds, zep, &zb);
+ spa_remove_error(spa, &zb, &zep->zb_birth);
}
return (error);
@@ -536,37 +541,111 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
* so that we can later remove the related log entries in sync context.
*/
static void
-spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb)
+spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb,
+ const uint64_t *birth)
{
char name[NAME_MAX_LEN];
if (obj == 0)
return;
- bookmark_to_name(healed_zb, name, sizeof (name));
- mutex_enter(&spa->spa_errlog_lock);
- if (zap_contains(spa->spa_meta_objset, obj, name) == 0) {
- /*
- * Found an error matching healed zb, add zb to our
- * tree of healed errors
- */
- avl_tree_t *tree = &spa->spa_errlist_healed;
- spa_error_entry_t search;
- spa_error_entry_t *new;
- avl_index_t where;
- search.se_bookmark = *healed_zb;
- mutex_enter(&spa->spa_errlist_lock);
- if (avl_find(tree, &search, &where) != NULL) {
- mutex_exit(&spa->spa_errlist_lock);
- mutex_exit(&spa->spa_errlog_lock);
- return;
+ boolean_t held_list = B_FALSE;
+ boolean_t held_log = B_FALSE;
+
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
+ bookmark_to_name(healed_zb, name, sizeof (name));
+
+ if (zap_contains(spa->spa_meta_objset, healed_zb->zb_objset,
+ name) == 0) {
+ if (!MUTEX_HELD(&spa->spa_errlog_lock)) {
+ mutex_enter(&spa->spa_errlog_lock);
+ held_log = B_TRUE;
+ }
+
+ /*
+ * Found an error matching healed zb, add zb to our
+ * tree of healed errors
+ */
+ avl_tree_t *tree = &spa->spa_errlist_healed;
+ spa_error_entry_t search;
+ spa_error_entry_t *new;
+ avl_index_t where;
+ search.se_bookmark = *healed_zb;
+ if (!MUTEX_HELD(&spa->spa_errlist_lock)) {
+ mutex_enter(&spa->spa_errlist_lock);
+ held_list = B_TRUE;
+ }
+ if (avl_find(tree, &search, &where) != NULL) {
+ if (held_list)
+ mutex_exit(&spa->spa_errlist_lock);
+ if (held_log)
+ mutex_exit(&spa->spa_errlog_lock);
+ return;
+ }
+ new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP);
+ new->se_bookmark = *healed_zb;
+ avl_insert(tree, new, where);
+ if (held_list)
+ mutex_exit(&spa->spa_errlist_lock);
+ if (held_log)
+ mutex_exit(&spa->spa_errlog_lock);
}
- new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP);
- new->se_bookmark = *healed_zb;
- avl_insert(tree, new, where);
- mutex_exit(&spa->spa_errlist_lock);
+ return;
}
- mutex_exit(&spa->spa_errlog_lock);
+
+ zbookmark_err_phys_t healed_zep;
+ healed_zep.zb_object = healed_zb->zb_object;
+ healed_zep.zb_level = healed_zb->zb_level;
+ healed_zep.zb_blkid = healed_zb->zb_blkid;
+
+ if (birth != NULL)
+ healed_zep.zb_birth = *birth;
+ else
+ healed_zep.zb_birth = 0;
+
+ errphys_to_name(&healed_zep, name, sizeof (name));
+
+ zap_cursor_t zc;
+ zap_attribute_t za;
+ for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_errlog_last);
+ zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
+ if (zap_contains(spa->spa_meta_objset, za.za_first_integer,
+ name) == 0) {
+ if (!MUTEX_HELD(&spa->spa_errlog_lock)) {
+ mutex_enter(&spa->spa_errlog_lock);
+ held_log = B_TRUE;
+ }
+
+ avl_tree_t *tree = &spa->spa_errlist_healed;
+ spa_error_entry_t search;
+ spa_error_entry_t *new;
+ avl_index_t where;
+ search.se_bookmark = *healed_zb;
+
+ if (!MUTEX_HELD(&spa->spa_errlist_lock)) {
+ mutex_enter(&spa->spa_errlist_lock);
+ held_list = B_TRUE;
+ }
+
+ if (avl_find(tree, &search, &where) != NULL) {
+ if (held_list)
+ mutex_exit(&spa->spa_errlist_lock);
+ if (held_log)
+ mutex_exit(&spa->spa_errlog_lock);
+ continue;
+ }
+ new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP);
+ new->se_bookmark = *healed_zb;
+ new->se_zep = healed_zep;
+ avl_insert(tree, new, where);
+
+ if (held_list)
+ mutex_exit(&spa->spa_errlist_lock);
+ if (held_log)
+ mutex_exit(&spa->spa_errlog_lock);
+ }
+ }
+ zap_cursor_fini(&zc);
}
/*
@@ -604,12 +683,36 @@ spa_remove_healed_errors(spa_t *spa, avl_tree_t *s, avl_tree_t *l, dmu_tx_t *tx)
&cookie)) != NULL) {
remove_error_from_list(spa, s, &se->se_bookmark);
remove_error_from_list(spa, l, &se->se_bookmark);
- bookmark_to_name(&se->se_bookmark, name, sizeof (name));
kmem_free(se, sizeof (spa_error_entry_t));
- (void) zap_remove(spa->spa_meta_objset,
- spa->spa_errlog_last, name, tx);
- (void) zap_remove(spa->spa_meta_objset,
- spa->spa_errlog_scrub, name, tx);
+
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
+ bookmark_to_name(&se->se_bookmark, name, sizeof (name));
+ (void) zap_remove(spa->spa_meta_objset,
+ spa->spa_errlog_last, name, tx);
+ (void) zap_remove(spa->spa_meta_objset,
+ spa->spa_errlog_scrub, name, tx);
+ } else {
+ errphys_to_name(&se->se_zep, name, sizeof (name));
+ zap_cursor_t zc;
+ zap_attribute_t za;
+ for (zap_cursor_init(&zc, spa->spa_meta_objset,
+ spa->spa_errlog_last);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ zap_remove(spa->spa_meta_objset,
+ za.za_first_integer, name, tx);
+ }
+ zap_cursor_fini(&zc);
+
+ for (zap_cursor_init(&zc, spa->spa_meta_objset,
+ spa->spa_errlog_scrub);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ zap_remove(spa->spa_meta_objset,
+ za.za_first_integer, name, tx);
+ }
+ zap_cursor_fini(&zc);
+ }
}
}
@@ -618,14 +721,10 @@ spa_remove_healed_errors(spa_t *spa, avl_tree_t *s, avl_tree_t *l, dmu_tx_t *tx)
* later in spa_remove_healed_errors().
*/
void
-spa_remove_error(spa_t *spa, zbookmark_phys_t *zb)
+spa_remove_error(spa_t *spa, zbookmark_phys_t *zb, const uint64_t *birth)
{
- char name[NAME_MAX_LEN];
-
- bookmark_to_name(zb, name, sizeof (name));
-
- spa_add_healed_error(spa, spa->spa_errlog_last, zb);
- spa_add_healed_error(spa, spa->spa_errlog_scrub, zb);
+ spa_add_healed_error(spa, spa->spa_errlog_last, zb, birth);
+ spa_add_healed_error(spa, spa->spa_errlog_scrub, zb, birth);
}
static uint64_t
@@ -736,7 +835,8 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
dsl_dataset_t *ds;
objset_t *os;
- int error = dsl_dataset_hold_obj(dp, zb.zb_objset, FTAG, &ds);
+ int error = dsl_dataset_hold_obj_flags(dp, zb.zb_objset,
+ DS_HOLD_FLAG_DECRYPT, FTAG, &ds);
if (error != 0)
continue;
@@ -751,7 +851,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
* truly persistent, it should re-appear after a scan.
*/
if (dmu_objset_from_ds(ds, &os) != 0) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
continue;
}
@@ -759,7 +859,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
blkptr_t bp;
if (dnode_hold(os, zep.zb_object, FTAG, &dn) != 0) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
continue;
}
@@ -773,7 +873,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
if (error != 0 || BP_IS_HOLE(&bp))
continue;
@@ -827,62 +927,84 @@ spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx)
static int
process_error_log(spa_t *spa, uint64_t obj, void *uaddr, uint64_t *count)
{
- zap_cursor_t zc;
- zap_attribute_t za;
-
if (obj == 0)
return (0);
+ zap_cursor_t *zc;
+ zap_attribute_t *za;
+
+ zc = kmem_zalloc(sizeof (zap_cursor_t), KM_SLEEP);
+ za = kmem_zalloc(sizeof (zap_attribute_t), KM_SLEEP);
+
if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
- for (zap_cursor_init(&zc, spa->spa_meta_objset, obj);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
+ for (zap_cursor_init(zc, spa->spa_meta_objset, obj);
+ zap_cursor_retrieve(zc, za) == 0;
+ zap_cursor_advance(zc)) {
if (*count == 0) {
- zap_cursor_fini(&zc);
+ zap_cursor_fini(zc);
+ kmem_free(zc, sizeof (*zc));
+ kmem_free(za, sizeof (*za));
return (SET_ERROR(ENOMEM));
}
zbookmark_phys_t zb;
- name_to_bookmark(za.za_name, &zb);
+ name_to_bookmark(za->za_name, &zb);
int error = copyout_entry(&zb, uaddr, count);
if (error != 0) {
- zap_cursor_fini(&zc);
+ zap_cursor_fini(zc);
+ kmem_free(zc, sizeof (*zc));
+ kmem_free(za, sizeof (*za));
return (error);
}
}
- zap_cursor_fini(&zc);
+ zap_cursor_fini(zc);
+ kmem_free(zc, sizeof (*zc));
+ kmem_free(za, sizeof (*za));
return (0);
}
- for (zap_cursor_init(&zc, spa->spa_meta_objset, obj);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
+ for (zap_cursor_init(zc, spa->spa_meta_objset, obj);
+ zap_cursor_retrieve(zc, za) == 0;
+ zap_cursor_advance(zc)) {
- zap_cursor_t head_ds_cursor;
- zap_attribute_t head_ds_attr;
+ zap_cursor_t *head_ds_cursor;
+ zap_attribute_t *head_ds_attr;
- uint64_t head_ds_err_obj = za.za_first_integer;
+ head_ds_cursor = kmem_zalloc(sizeof (zap_cursor_t), KM_SLEEP);
+ head_ds_attr = kmem_zalloc(sizeof (zap_attribute_t), KM_SLEEP);
+
+ uint64_t head_ds_err_obj = za->za_first_integer;
uint64_t head_ds;
- name_to_object(za.za_name, &head_ds);
- for (zap_cursor_init(&head_ds_cursor, spa->spa_meta_objset,
- head_ds_err_obj); zap_cursor_retrieve(&head_ds_cursor,
- &head_ds_attr) == 0; zap_cursor_advance(&head_ds_cursor)) {
+ name_to_object(za->za_name, &head_ds);
+ for (zap_cursor_init(head_ds_cursor, spa->spa_meta_objset,
+ head_ds_err_obj); zap_cursor_retrieve(head_ds_cursor,
+ head_ds_attr) == 0; zap_cursor_advance(head_ds_cursor)) {
zbookmark_err_phys_t head_ds_block;
- name_to_errphys(head_ds_attr.za_name, &head_ds_block);
+ name_to_errphys(head_ds_attr->za_name, &head_ds_block);
int error = process_error_block(spa, head_ds,
&head_ds_block, uaddr, count);
if (error != 0) {
- zap_cursor_fini(&head_ds_cursor);
- zap_cursor_fini(&zc);
+ zap_cursor_fini(head_ds_cursor);
+ kmem_free(head_ds_cursor,
+ sizeof (*head_ds_cursor));
+ kmem_free(head_ds_attr, sizeof (*head_ds_attr));
+
+ zap_cursor_fini(zc);
+ kmem_free(za, sizeof (*za));
+ kmem_free(zc, sizeof (*zc));
return (error);
}
}
- zap_cursor_fini(&head_ds_cursor);
+ zap_cursor_fini(head_ds_cursor);
+ kmem_free(head_ds_cursor, sizeof (*head_ds_cursor));
+ kmem_free(head_ds_attr, sizeof (*head_ds_attr));
}
- zap_cursor_fini(&zc);
+ zap_cursor_fini(zc);
+ kmem_free(za, sizeof (*za));
+ kmem_free(zc, sizeof (*zc));
return (0);
}
@@ -1229,7 +1351,8 @@ find_txg_ancestor_snapshot(spa_t *spa, uint64_t new_head, uint64_t old_head,
dsl_dataset_t *ds;
dsl_pool_t *dp = spa->spa_dsl_pool;
- int error = dsl_dataset_hold_obj(dp, old_head, FTAG, &ds);
+ int error = dsl_dataset_hold_obj_flags(dp, old_head,
+ DS_HOLD_FLAG_DECRYPT, FTAG, &ds);
if (error != 0)
return (error);
@@ -1237,9 +1360,9 @@ find_txg_ancestor_snapshot(spa_t *spa, uint64_t new_head, uint64_t old_head,
uint64_t prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
while (prev_obj != 0) {
- dsl_dataset_rele(ds, FTAG);
- if ((error = dsl_dataset_hold_obj(dp, prev_obj,
- FTAG, &ds)) == 0 &&
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
+ if ((error = dsl_dataset_hold_obj_flags(dp, prev_obj,
+ DS_HOLD_FLAG_DECRYPT, FTAG, &ds)) == 0 &&
dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj == new_head)
break;
@@ -1249,7 +1372,7 @@ find_txg_ancestor_snapshot(spa_t *spa, uint64_t new_head, uint64_t old_head,
prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
}
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
ASSERT(prev_obj != 0);
*txg = prev_obj_txg;
return (0);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index 22e644f75f95..3b1e2ae5fb5d 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -7862,6 +7862,8 @@ zfs_kmod_fini(void)
zfs_onexit_destroy(zs->zs_onexit);
if (zs->zs_zevent)
zfs_zevent_destroy(zs->zs_zevent);
+ if (zs != &zfsdev_state_listhead)
+ kmem_free(zs, sizeof (zfsdev_state_t));
}
zfs_ereport_taskq_fini(); /* run before zfs_fini() on Linux */
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index a6a27222bf4c..86706469acee 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -1072,6 +1072,15 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
inzfsvfs = ZTOZSB(inzp);
outzfsvfs = ZTOZSB(outzp);
+
+ /*
+ * We need to call zfs_enter() potentially on two different datasets,
+ * so we need a dedicated function for that.
+ */
+ error = zfs_enter_two(inzfsvfs, outzfsvfs, FTAG);
+ if (error != 0)
+ return (error);
+
inos = inzfsvfs->z_os;
outos = outzfsvfs->z_os;
@@ -1083,14 +1092,6 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
return (SET_ERROR(EXDEV));
}
- /*
- * We need to call zfs_enter() potentially on two different datasets,
- * so we need a dedicated function for that.
- */
- error = zfs_enter_two(inzfsvfs, outzfsvfs, FTAG);
- if (error != 0)
- return (error);
-
ASSERT(!outzfsvfs->z_replay);
error = zfs_verify_zp(inzp);
@@ -1246,16 +1247,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
break;
}
- /*
- * Start a transaction.
- */
- tx = dmu_tx_create(outos);
-
nbps = maxblocks;
- error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, tx, bps,
+ error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, bps,
&nbps);
if (error != 0) {
- dmu_tx_abort(tx);
/*
* If we are tyring to clone a block that was created
* in the current transaction group. Return an error,
@@ -1276,12 +1271,15 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
*/
if (BP_IS_PROTECTED(&bps[0])) {
if (inzfsvfs != outzfsvfs) {
- dmu_tx_abort(tx);
error = SET_ERROR(EXDEV);
break;
}
}
+ /*
+ * Start a transaction.
+ */
+ tx = dmu_tx_create(outos);
dmu_tx_hold_sa(tx, outzp->z_sa_hdl, B_FALSE);
db = (dmu_buf_impl_t *)sa_get_db(outzp->z_sa_hdl);
DB_DNODE_ENTER(db);
@@ -1309,8 +1307,12 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
((len - 1) / inblksz + 1) * inblksz);
}
- dmu_brt_clone(outos, outzp->z_id, outoff, size, tx, bps, nbps,
- B_FALSE);
+ error = dmu_brt_clone(outos, outzp->z_id, outoff, size, tx,
+ bps, nbps, B_FALSE);
+ if (error != 0) {
+ dmu_tx_commit(tx);
+ break;
+ }
zfs_clear_setid_bits_if_necessary(outzfsvfs, outzp, cr,
&clear_setid_bits_txg, tx);
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index ec9da706a806..c37da89dd438 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -1866,6 +1866,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t);
ASSERT3U(wsz, <=, lwb->lwb_sz);
zio_shrink(lwb->lwb_write_zio, wsz);
+ wsz = lwb->lwb_write_zio->io_size;
} else {
wsz = lwb->lwb_sz;
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index 0924fb6f40bc..c17ca5e1d651 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -935,9 +935,35 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
(void) vsnprintf(buf, sizeof (buf), fmt, adx);
va_end(adx);
+ zfs_dbgmsg("bad blkptr at %px: "
+ "DVA[0]=%#llx/%#llx "
+ "DVA[1]=%#llx/%#llx "
+ "DVA[2]=%#llx/%#llx "
+ "prop=%#llx "
+ "pad=%#llx,%#llx "
+ "phys_birth=%#llx "
+ "birth=%#llx "
+ "fill=%#llx "
+ "cksum=%#llx/%#llx/%#llx/%#llx",
+ bp,
+ (long long)bp->blk_dva[0].dva_word[0],
+ (long long)bp->blk_dva[0].dva_word[1],
+ (long long)bp->blk_dva[1].dva_word[0],
+ (long long)bp->blk_dva[1].dva_word[1],
+ (long long)bp->blk_dva[2].dva_word[0],
+ (long long)bp->blk_dva[2].dva_word[1],
+ (long long)bp->blk_prop,
+ (long long)bp->blk_pad[0],
+ (long long)bp->blk_pad[1],
+ (long long)bp->blk_phys_birth,
+ (long long)bp->blk_birth,
+ (long long)bp->blk_fill,
+ (long long)bp->blk_cksum.zc_word[0],
+ (long long)bp->blk_cksum.zc_word[1],
+ (long long)bp->blk_cksum.zc_word[2],
+ (long long)bp->blk_cksum.zc_word[3]);
switch (blk_verify) {
case BLK_VERIFY_HALT:
- dprintf_bp(bp, "blkptr at %p dprintf_bp():", bp);
zfs_panic_recover("%s: %s", spa_name(spa), buf);
break;
case BLK_VERIFY_LOG:
@@ -958,47 +984,54 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
* If everything checks out B_TRUE is returned. The zfs_blkptr_verify
* argument controls the behavior when an invalid field is detected.
*
- * Modes for zfs_blkptr_verify:
- * 1) BLK_VERIFY_ONLY (evaluate the block)
- * 2) BLK_VERIFY_LOG (evaluate the block and log problems)
- * 3) BLK_VERIFY_HALT (call zfs_panic_recover on error)
+ * Values for blk_verify_flag:
+ * BLK_VERIFY_ONLY: evaluate the block
+ * BLK_VERIFY_LOG: evaluate the block and log problems
+ * BLK_VERIFY_HALT: call zfs_panic_recover on error
+ *
+ * Values for blk_config_flag:
+ * BLK_CONFIG_HELD: caller holds SCL_VDEV for writer
+ * BLK_CONFIG_NEEDED: caller holds no config lock, SCL_VDEV will be
+ * obtained for reader
+ * BLK_CONFIG_SKIP: skip checks which require SCL_VDEV, for better
+ * performance
*/
boolean_t
-zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held,
- enum blk_verify_flag blk_verify)
+zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
+ enum blk_config_flag blk_config, enum blk_verify_flag blk_verify)
{
int errors = 0;
if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) {
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
- "blkptr at %p has invalid TYPE %llu",
+ "blkptr at %px has invalid TYPE %llu",
bp, (longlong_t)BP_GET_TYPE(bp));
}
if (BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS) {
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
- "blkptr at %p has invalid CHECKSUM %llu",
+ "blkptr at %px has invalid CHECKSUM %llu",
bp, (longlong_t)BP_GET_CHECKSUM(bp));
}
if (BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_FUNCTIONS) {
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
- "blkptr at %p has invalid COMPRESS %llu",
+ "blkptr at %px has invalid COMPRESS %llu",
bp, (longlong_t)BP_GET_COMPRESS(bp));
}
if (BP_GET_LSIZE(bp) > SPA_MAXBLOCKSIZE) {
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
- "blkptr at %p has invalid LSIZE %llu",
+ "blkptr at %px has invalid LSIZE %llu",
bp, (longlong_t)BP_GET_LSIZE(bp));
}
if (BP_GET_PSIZE(bp) > SPA_MAXBLOCKSIZE) {
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
- "blkptr at %p has invalid PSIZE %llu",
+ "blkptr at %px has invalid PSIZE %llu",
bp, (longlong_t)BP_GET_PSIZE(bp));
}
if (BP_IS_EMBEDDED(bp)) {
if (BPE_GET_ETYPE(bp) >= NUM_BP_EMBEDDED_TYPES) {
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
- "blkptr at %p has invalid ETYPE %llu",
+ "blkptr at %px has invalid ETYPE %llu",
bp, (longlong_t)BPE_GET_ETYPE(bp));
}
}
@@ -1010,10 +1043,19 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held,
if (!spa->spa_trust_config)
return (errors == 0);
- if (!config_held)
- spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
- else
+ switch (blk_config) {
+ case BLK_CONFIG_HELD:
ASSERT(spa_config_held(spa, SCL_VDEV, RW_WRITER));
+ break;
+ case BLK_CONFIG_NEEDED:
+ spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
+ break;
+ case BLK_CONFIG_SKIP:
+ return (errors == 0);
+ default:
+ panic("invalid blk_config %u", blk_config);
+ }
+
/*
* Pool-specific checks.
*
@@ -1028,20 +1070,20 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held,
if (vdevid >= spa->spa_root_vdev->vdev_children) {
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
- "blkptr at %p DVA %u has invalid VDEV %llu",
+ "blkptr at %px DVA %u has invalid VDEV %llu",
bp, i, (longlong_t)vdevid);
continue;
}
vdev_t *vd = spa->spa_root_vdev->vdev_child[vdevid];
if (vd == NULL) {
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
- "blkptr at %p DVA %u has invalid VDEV %llu",
+ "blkptr at %px DVA %u has invalid VDEV %llu",
bp, i, (longlong_t)vdevid);
continue;
}
if (vd->vdev_ops == &vdev_hole_ops) {
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
- "blkptr at %p DVA %u has hole VDEV %llu",
+ "blkptr at %px DVA %u has hole VDEV %llu",
bp, i, (longlong_t)vdevid);
continue;
}
@@ -1059,13 +1101,11 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held,
asize = vdev_gang_header_asize(vd);
if (offset + asize > vd->vdev_asize) {
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
- "blkptr at %p DVA %u has invalid OFFSET %llu",
+ "blkptr at %px DVA %u has invalid OFFSET %llu",
bp, i, (longlong_t)offset);
}
}
- if (errors > 0)
- dprintf_bp(bp, "blkptr at %p dprintf_bp():", bp);
- if (!config_held)
+ if (blk_config == BLK_CONFIG_NEEDED)
spa_config_exit(spa, SCL_VDEV, bp);
return (errors == 0);
@@ -1203,7 +1243,7 @@ void
zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
{
- (void) zfs_blkptr_verify(spa, bp, B_FALSE, BLK_VERIFY_HALT);
+ (void) zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_HALT);
/*
* The check for EMBEDDED is a performance optimization. We
@@ -1282,8 +1322,8 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
{
zio_t *zio;
- (void) zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER,
- BLK_VERIFY_HALT);
+ (void) zfs_blkptr_verify(spa, bp, (flags & ZIO_FLAG_CONFIG_WRITER) ?
+ BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_HALT);
if (BP_IS_EMBEDDED(bp))
return (zio_null(pio, spa, NULL, NULL, NULL, 0));
@@ -2301,7 +2341,7 @@ zio_nowait(zio_t *zio)
ASSERT3P(zio->io_executor, ==, NULL);
if (zio->io_child_type == ZIO_CHILD_LOGICAL &&
- zio_unique_parent(zio) == NULL) {
+ list_is_empty(&zio->io_parent_list)) {
zio_t *pio;
/*
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index 55991cfeaf78..e2137ac596d9 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -325,7 +325,8 @@ tests = ['zfs_wait_deleteq', 'zfs_wait_getsubopt']
tags = ['functional', 'cli_root', 'zfs_wait']
[tests/functional/cli_root/zhack]
-tests = ['zhack_label_checksum']
+tests = ['zhack_label_repair_001', 'zhack_label_repair_002',
+ 'zhack_label_repair_003', 'zhack_label_repair_004']
pre =
post =
tags = ['functional', 'cli_root', 'zhack']
@@ -421,7 +422,7 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
'import_cachefile_mirror_detached',
'import_cachefile_paths_changed',
'import_cachefile_shared_device',
- 'import_devices_missing',
+ 'import_devices_missing', 'import_log_missing',
'import_paths_changed',
'import_rewind_config_changed',
'import_rewind_device_replaced']
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
index f3cfca912a57..63470bc041c6 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -257,6 +257,7 @@ if sys.platform.startswith('freebsd'):
'resilver/resilver_restart_001': ['FAIL', known_reason],
'pool_checkpoint/checkpoint_big_rewind': ['FAIL', 12622],
'pool_checkpoint/checkpoint_indirect': ['FAIL', 12623],
+ 'snapshot/snapshot_002_pos': ['FAIL', '14831'],
})
elif sys.platform.startswith('linux'):
maybe.update({
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
index 02e6a500a71a..8521f271be54 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
@@ -1951,6 +1951,7 @@ function check_pool_status # pool token keyword <verbose>
# is_pool_removing - to check if the pool removing is a vdev
# is_pool_removed - to check if the pool remove is completed
# is_pool_discarding - to check if the pool checkpoint is being discarded
+# is_pool_replacing - to check if the pool is performing a replacement
#
function is_pool_resilvering #pool <verbose>
{
@@ -1997,6 +1998,10 @@ function is_pool_discarding #pool
{
check_pool_status "$1" "checkpoint" "discarding"
}
+function is_pool_replacing #pool
+{
+ zpool status "$1" | grep -qE 'replacing-[0-9]+'
+}
function wait_for_degraded
{
@@ -2983,12 +2988,15 @@ function wait_freeing #pool
# Wait for every device replace operation to complete
#
# $1 pool name
+# $2 timeout
#
-function wait_replacing #pool
+function wait_replacing #pool timeout
{
+ typeset timeout=${2:-300}
typeset pool=${1:-$TESTPOOL}
- while zpool status $pool | grep -qE 'replacing-[0-9]+'; do
- log_must sleep 1
+ for (( timer = 0; timer < $timeout; timer++ )); do
+ is_pool_replacing $pool || break;
+ sleep 1;
done
}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/math.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/math.shlib
index 38d9fecea7cf..da1e77e5fb97 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/math.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/math.shlib
@@ -118,9 +118,7 @@ function verify_ne # <a> <b> <type>
# A simple function to get a random number between two bounds (inclusive)
#
-# Probably not the most efficient for large ranges, but it's okay.
-#
-# Note since we're using $RANDOM, 32767 is the largest number we
+# Note since we're using $RANDOM, $min+32767 is the largest number we
# can accept as the upper bound.
#
# $1 lower bound
@@ -129,11 +127,6 @@ function random_int_between
{
typeset -i min=$1
typeset -i max=$2
- typeset -i rand=0
-
- while [[ $rand -lt $min ]] ; do
- rand=$(( $RANDOM % $max + 1))
- done
- echo $rand
+ echo $(( (RANDOM % (max - min + 1)) + min ))
}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index 74295b86ddc2..9299a4ca9b47 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -250,6 +250,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
functional/cli_root/zpool_upgrade/zpool_upgrade.cfg \
functional/cli_root/zpool_upgrade/zpool_upgrade.kshlib \
functional/cli_root/zpool_wait/zpool_wait.kshlib \
+ functional/cli_root/zhack/library.kshlib \
functional/cli_user/misc/misc.cfg \
functional/cli_user/zfs_list/zfs_list.cfg \
functional/cli_user/zfs_list/zfs_list.kshlib \
@@ -932,7 +933,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zfs/zfs_001_neg.ksh \
functional/cli_root/zfs/zfs_002_pos.ksh \
functional/cli_root/zfs/zfs_003_neg.ksh \
- functional/cli_root/zhack/zhack_label_checksum.ksh \
+ functional/cli_root/zhack/zhack_label_repair_001.ksh \
+ functional/cli_root/zhack/zhack_label_repair_002.ksh \
+ functional/cli_root/zhack/zhack_label_repair_003.ksh \
+ functional/cli_root/zhack/zhack_label_repair_004.ksh \
functional/cli_root/zpool_add/add_nested_replacing_spare.ksh \
functional/cli_root/zpool_add/add-o_ashift.ksh \
functional/cli_root/zpool_add/add_prop_ashift.ksh \
@@ -1052,6 +1056,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_import/import_cachefile_paths_changed.ksh \
functional/cli_root/zpool_import/import_cachefile_shared_device.ksh \
functional/cli_root/zpool_import/import_devices_missing.ksh \
+ functional/cli_root/zpool_import/import_log_missing.ksh \
functional/cli_root/zpool_import/import_paths_changed.ksh \
functional/cli_root/zpool_import/import_rewind_config_changed.ksh \
functional/cli_root/zpool_import/import_rewind_device_replaced.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh
index 9ebde1cd9d32..261fc5eed8cb 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh
@@ -163,7 +163,24 @@ corrupt_blocks_at_level "/$TESTPOOL/testfs5/$TESTFILE0" 0
log_must zfs unmount $TESTPOOL/testfs5
log_must zfs unload-key $TESTPOOL/testfs5
# test healing recv (on an encrypted dataset) using a raw send file
-test_corrective_recv "$TESTPOOL/testfs5@snap1" $raw_backup
+# This is a special case since with unloaded keys we cannot report errors
+# in the filesystem.
+log_must zpool scrub -w $TESTPOOL
+log_must zpool status -v $TESTPOOL
+log_mustnot eval "zpool status -v $TESTPOOL | \
+ grep \"permission denied\""
+# make sure we will read the corruption from disk by flushing the ARC
+log_must zinject -a
+log_must eval "zfs recv -c $TESTPOOL/testfs5@snap1 < $raw_backup"
+
+log_must zpool scrub -w $TESTPOOL
+log_must zpool status -v $TESTPOOL
+log_mustnot eval "zpool status -v $TESTPOOL | \
+ grep \"Permanent errors have been detected\""
+typeset cksum=$(md5digest $file)
+[[ "$cksum" == "$checksum" ]] || \
+ log_fail "Checksums differ ($cksum != $checksum)"
+
# non raw send file healing an encrypted dataset with an unloaded key will fail
log_mustnot eval "zfs recv -c $TESTPOOL/testfs5@snap1 < $backup"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib
new file mode 100644
index 000000000000..880a78861630
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib
@@ -0,0 +1,361 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by vStack. All rights reserved.
+#
+
+. "$STF_SUITE"/include/libtest.shlib
+. "$STF_SUITE"/include/blkdev.shlib
+
+#
+# Description:
+#
+# Test whether zhack label repair commands can recover detached devices
+# and corrupted checksums with a variety of sizes, and ensure
+# the purposes of either command is cleanly separated from the others.
+#
+# Strategy:
+#
+# Tests are done on loopback devices with sizes divisible by label size and sizes that are not.
+#
+# Test one:
+#
+# 1. Create pool on a loopback device with some test data
+# 2. Export the pool.
+# 3. Corrupt all label checksums in the pool
+# 4. Check that pool cannot be imported
+# 5. Verify that it cannot be imported after using zhack label repair -u
+# to ensure that the -u option will quit on corrupted checksums.
+# 6. Use zhack label repair -c on device
+# 7. Check that pool can be imported and that data is intact
+#
+# Test two:
+#
+# 1. Create pool on a loopback device with some test data
+# 2. Detach either device from the mirror
+# 3. Export the pool
+# 4. Remove the non-detached device and its backing file
+# 5. Verify that the remaining detached device cannot be imported
+# 6. Verify that it cannot be imported after using zhack label repair -c
+# to ensure that the -c option will not undetach a device.
+# 7. Use zhack label repair -u on device
+# 8. Verify that the detached device can be imported and that data is intact
+#
+# Test three:
+#
+# 1. Create pool on a loopback device with some test data
+# 2. Detach either device from the mirror
+# 3. Export the pool
+# 4. Remove the non-detached device and its backing file
+# 5. Corrupt all label checksums on the remaining device
+# 6. Verify that the remaining detached device cannot be imported
+# 7. Verify that it cannot be imported after using zhack label repair -u
+# to ensure that the -u option will quit on corrupted checksums.
+# 8. Verify that it cannot be imported after using zhack label repair -c
+# -c should repair the checksums, but not undetach a device.
+# 9. Use zhack label repair -u on device
+# 10. Verify that the detached device can be imported and that data is intact
+#
+# Test four:
+#
+# 1. Create pool on a loopback device with some test data
+# 2. Detach either device from the mirror
+# 3. Export the pool
+# 4. Remove the non-detached device and its backing file
+# 5. Corrupt all label checksums on the remaining device
+# 6. Verify that the remaining detached device cannot be imported
+# 7. Use zhack label repair -cu on device to attempt to fix checksums and
+# undetach the device in a single operation.
+# 8. Verify that the detached device can be imported and that data is intact
+#
+
+log_assert "Verify zhack label repair <operation> <vdev> will repair label checksums and uberblocks"
+log_onexit cleanup
+
+LABEL_SIZE="$((2**18))"
+LABEL_NVLIST_END="$((LABEL_SIZE / 2))"
+LABEL_CKSUM_SIZE="32"
+LABEL_CKSUM_START="$(( LABEL_NVLIST_END - LABEL_CKSUM_SIZE ))"
+
+VIRTUAL_DISK=$TEST_BASE_DIR/disk
+VIRTUAL_MIRROR_DISK=$TEST_BASE_DIR/mirrordisk
+
+VIRTUAL_DEVICE=
+VIRTUAL_MIRROR_DEVICE=
+
+function cleanup_lo
+{
+ L_DEVICE="$1"
+
+ if [[ -e $L_DEVICE ]]; then
+ if is_linux; then
+ log_must losetup -d "$L_DEVICE"
+ elif is_freebsd; then
+ log_must mdconfig -d -u "$L_DEVICE"
+ else
+ log_must lofiadm -d "$L_DEVICE"
+ fi
+ fi
+}
+
+function cleanup
+{
+ poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+ cleanup_lo "$VIRTUAL_DEVICE"
+ cleanup_lo "$VIRTUAL_MIRROR_DEVICE"
+ VIRTUAL_DEVICE=
+ VIRTUAL_MIRROR_DEVICE=
+ [[ -f "$VIRTUAL_DISK" ]] && log_must rm "$VIRTUAL_DISK"
+ [[ -f "$VIRTUAL_MIRROR_DISK" ]] && log_must rm "$VIRTUAL_MIRROR_DISK"
+}
+
+RAND_MAX="$((2**15 - 1))"
+function get_devsize
+{
+ if [ "$RANDOM" -gt "$(( RAND_MAX / 2 ))" ]; then
+ echo "$(( MINVDEVSIZE + RANDOM ))"
+ else
+ echo "$MINVDEVSIZE"
+ fi
+}
+
+function pick_logop
+{
+ L_SHOULD_SUCCEED="$1"
+
+ l_logop="log_mustnot"
+ if [ "$L_SHOULD_SUCCEED" == true ]; then
+ l_logop="log_must"
+ fi
+
+ echo "$l_logop"
+}
+
+function check_dataset
+{
+ L_SHOULD_SUCCEED="$1"
+ L_LOGOP="$(pick_logop "$L_SHOULD_SUCCEED")"
+
+ "$L_LOGOP" mounted "$TESTPOOL"/"$TESTFS"
+
+ "$L_LOGOP" test -f "$TESTDIR"/"test"
+}
+
+function setup_dataset
+{
+ log_must zfs create "$TESTPOOL"/"$TESTFS"
+
+ log_must mkdir -p "$TESTDIR"
+ log_must zfs set mountpoint="$TESTDIR" "$TESTPOOL"/"$TESTFS"
+
+ log_must mounted "$TESTPOOL"/"$TESTFS"
+
+ log_must touch "$TESTDIR"/"test"
+ log_must test -f "$TESTDIR"/"test"
+
+ log_must zpool sync "$TESTPOOL"
+
+ check_dataset true
+}
+
+function get_practical_size
+{
+ L_SIZE="$1"
+
+ if [ "$((L_SIZE % LABEL_SIZE))" -ne 0 ]; then
+ echo "$(((L_SIZE / LABEL_SIZE) * LABEL_SIZE))"
+ else
+ echo "$L_SIZE"
+ fi
+}
+
+function corrupt_sized_label_checksum
+{
+ L_SIZE="$1"
+ L_LABEL="$2"
+ L_DEVICE="$3"
+
+ L_PRACTICAL_SIZE="$(get_practical_size "$L_SIZE")"
+
+ typeset -a L_OFFSETS=("$LABEL_CKSUM_START" \
+ "$((LABEL_SIZE + LABEL_CKSUM_START))" \
+ "$(((L_PRACTICAL_SIZE - LABEL_SIZE*2) + LABEL_CKSUM_START))" \
+ "$(((L_PRACTICAL_SIZE - LABEL_SIZE) + LABEL_CKSUM_START))")
+
+ dd if=/dev/urandom of="$L_DEVICE" \
+ seek="${L_OFFSETS["$L_LABEL"]}" bs=1 count="$LABEL_CKSUM_SIZE" \
+ conv=notrunc
+}
+
+function corrupt_labels
+{
+ L_SIZE="$1"
+ L_DISK="$2"
+
+ corrupt_sized_label_checksum "$L_SIZE" 0 "$L_DISK"
+ corrupt_sized_label_checksum "$L_SIZE" 1 "$L_DISK"
+ corrupt_sized_label_checksum "$L_SIZE" 2 "$L_DISK"
+ corrupt_sized_label_checksum "$L_SIZE" 3 "$L_DISK"
+}
+
+function try_import_and_repair
+{
+ L_REPAIR_SHOULD_SUCCEED="$1"
+ L_IMPORT_SHOULD_SUCCEED="$2"
+ L_OP="$3"
+ L_POOLDISK="$4"
+ L_REPAIR_LOGOP="$(pick_logop "$L_REPAIR_SHOULD_SUCCEED")"
+ L_IMPORT_LOGOP="$(pick_logop "$L_IMPORT_SHOULD_SUCCEED")"
+
+ log_mustnot zpool import "$TESTPOOL" -d "$L_POOLDISK"
+
+ "$L_REPAIR_LOGOP" zhack label repair "$L_OP" "$L_POOLDISK"
+
+ "$L_IMPORT_LOGOP" zpool import "$TESTPOOL" -d "$L_POOLDISK"
+
+ check_dataset "$L_IMPORT_SHOULD_SUCCEED"
+}
+
+function prepare_vdev
+{
+ L_SIZE="$1"
+ L_BACKFILE="$2"
+
+ l_devname=
+ if truncate -s "$L_SIZE" "$L_BACKFILE"; then
+ if is_linux; then
+ l_devname="$(losetup -f "$L_BACKFILE" --show)"
+ elif is_freebsd; then
+ l_devname=/dev/"$(mdconfig -a -t vnode -f "$L_BACKFILE")"
+ else
+ l_devname="$(lofiadm -a "$L_BACKFILE")"
+ fi
+ fi
+ echo "$l_devname"
+}
+
+function run_test_one
+{
+ L_SIZE="$1"
+
+ VIRTUAL_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_DISK")"
+ log_must test -e "$VIRTUAL_DEVICE"
+
+ log_must zpool create "$TESTPOOL" "$VIRTUAL_DEVICE"
+
+ setup_dataset
+
+ log_must zpool export "$TESTPOOL"
+
+ corrupt_labels "$L_SIZE" "$VIRTUAL_DISK"
+
+ try_import_and_repair false false "-u" "$VIRTUAL_DEVICE"
+
+ try_import_and_repair true true "-c" "$VIRTUAL_DEVICE"
+
+ cleanup
+
+ log_pass "zhack label repair corruption test passed with a randomized size of $L_SIZE"
+}
+
+function make_mirrored_pool
+{
+ L_SIZE="$1"
+
+ VIRTUAL_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_DISK")"
+ log_must test -e "$VIRTUAL_DEVICE"
+ VIRTUAL_MIRROR_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_MIRROR_DISK")"
+ log_must test -e "$VIRTUAL_MIRROR_DEVICE"
+
+ log_must zpool create "$TESTPOOL" "$VIRTUAL_DEVICE"
+ log_must zpool attach "$TESTPOOL" "$VIRTUAL_DEVICE" "$VIRTUAL_MIRROR_DEVICE"
+}
+
+function export_and_cleanup_vdisk
+{
+ log_must zpool export "$TESTPOOL"
+
+ cleanup_lo "$VIRTUAL_DEVICE"
+
+ VIRTUAL_DEVICE=
+
+ log_must rm "$VIRTUAL_DISK"
+}
+
+function run_test_two
+{
+ L_SIZE="$1"
+
+ make_mirrored_pool "$L_SIZE"
+
+ setup_dataset
+
+ log_must zpool detach "$TESTPOOL" "$VIRTUAL_MIRROR_DEVICE"
+
+ export_and_cleanup_vdisk
+
+ try_import_and_repair false false "-c" "$VIRTUAL_MIRROR_DEVICE"
+
+ try_import_and_repair true true "-u" "$VIRTUAL_MIRROR_DEVICE"
+
+ cleanup
+
+ log_pass "zhack label repair detached test passed with a randomized size of $L_SIZE"
+}
+
+function run_test_three
+{
+ L_SIZE="$1"
+
+ make_mirrored_pool "$L_SIZE"
+
+ setup_dataset
+
+ log_must zpool detach "$TESTPOOL" "$VIRTUAL_MIRROR_DEVICE"
+
+ export_and_cleanup_vdisk
+
+ corrupt_labels "$L_SIZE" "$VIRTUAL_MIRROR_DISK"
+
+ try_import_and_repair false false "-u" "$VIRTUAL_MIRROR_DEVICE"
+
+ try_import_and_repair true false "-c" "$VIRTUAL_MIRROR_DEVICE"
+
+ try_import_and_repair true true "-u" "$VIRTUAL_MIRROR_DEVICE"
+
+ cleanup
+
+ log_pass "zhack label repair corruption and detached test passed with a randomized size of $L_SIZE"
+}
+
+function run_test_four
+{
+ L_SIZE="$1"
+
+ make_mirrored_pool "$L_SIZE"
+
+ setup_dataset
+
+ log_must zpool detach "$TESTPOOL" "$VIRTUAL_MIRROR_DEVICE"
+
+ export_and_cleanup_vdisk
+
+ corrupt_labels "$L_SIZE" "$VIRTUAL_MIRROR_DISK"
+
+ try_import_and_repair true true "-cu" "$VIRTUAL_MIRROR_DEVICE"
+
+ cleanup
+
+ log_pass "zhack label repair corruption and detached single-command test passed with a randomized size of $L_SIZE."
+}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh
deleted file mode 100755
index 67c7e7c4487d..000000000000
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/ksh
-
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source. A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-
-#
-# Copyright (c) 2021 by vStack. All rights reserved.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/include/blkdev.shlib
-
-#
-# Description:
-# zhack label repair <vdev> will calculate and rewrite label checksum if invalid
-#
-# Strategy:
-# 1. Create pool with some number of vdevs and export it
-# 2. Corrupt all labels checksums
-# 3. Check that pool cannot be imported
-# 4. Use zhack to repair labels checksums
-# 5. Check that pool can be imported
-#
-
-log_assert "Verify zhack label repair <vdev> will repair labels checksums"
-log_onexit cleanup
-
-VIRTUAL_DISK=$TEST_BASE_DIR/disk
-
-function cleanup
-{
- poolexists $TESTPOOL && destroy_pool $TESTPOOL
- [[ -f $VIRTUAL_DISK ]] && log_must rm $VIRTUAL_DISK
-}
-
-log_must truncate -s $(($MINVDEVSIZE * 8)) $VIRTUAL_DISK
-
-log_must zpool create $TESTPOOL $VIRTUAL_DISK
-log_must zpool export $TESTPOOL
-
-log_mustnot zhack label repair $VIRTUAL_DISK
-
-corrupt_label_checksum 0 $VIRTUAL_DISK
-corrupt_label_checksum 1 $VIRTUAL_DISK
-corrupt_label_checksum 2 $VIRTUAL_DISK
-corrupt_label_checksum 3 $VIRTUAL_DISK
-
-log_mustnot zpool import $TESTPOOL -d $TEST_BASE_DIR
-
-log_must zhack label repair $VIRTUAL_DISK
-
-log_must zpool import $TESTPOOL -d $TEST_BASE_DIR
-
-cleanup
-
-log_pass "zhack label repair works correctly."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh
new file mode 100755
index 000000000000..2a511e9efcb6
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh
@@ -0,0 +1,30 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+
+#
+# Description:
+#
+# Test whether zhack label repair can recover
+# corrupted checksums on devices of varied size,
+# but not undetached devices.
+#
+# Strategy:
+#
+# 1. Create pool on a loopback device with some test data
+# 2. Export the pool.
+# 3. Corrupt all label checksums in the pool
+# 4. Check that pool cannot be imported
+# 5. Verify that it cannot be imported after using zhack label repair -u
+# to ensure that the -u option will quit on corrupted checksums.
+# 6. Use zhack label repair -c on device
+# 7. Check that pool can be imported and that data is intact
+
+. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib
+
+run_test_one "$(get_devsize)"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh
new file mode 100755
index 000000000000..4f1e61a39857
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh
@@ -0,0 +1,31 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+
+#
+# Description:
+#
+# Test whether zhack label repair can recover
+# detached drives on devices of varied size, but not
+# repair corrupted checksums.
+#
+# Strategy:
+#
+# 1. Create pool on a loopback device with some test data
+# 2. Detach either device from the mirror
+# 3. Export the pool
+# 4. Remove the non-detached device and its backing file
+# 5. Verify that the remaining detached device cannot be imported
+# 6. Verify that it cannot be imported after using zhack label repair -c
+# to ensure that the -c option will not undetach a device.
+# 7. Use zhack label repair -u on device
+# 8. Verify that the detached device can be imported and that data is intact
+
+. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib
+
+run_test_two "$(get_devsize)"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh
new file mode 100755
index 000000000000..7e82363d2f46
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh
@@ -0,0 +1,33 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+
+#
+# Description:
+#
+# Test whether zhack label repair can recover a device of varied size with
+# corrupted checksums and which has been detached.
+#
+# Strategy:
+#
+# 1. Create pool on a loopback device with some test data
+# 2. Detach either device from the mirror
+# 3. Export the pool
+# 4. Remove the non-detached device and its backing file
+# 5. Corrupt all label checksums on the remaining device
+# 6. Verify that the remaining detached device cannot be imported
+# 7. Verify that it cannot be imported after using zhack label repair -u
+# to ensure that the -u option will quit on corrupted checksums.
+# 8. Verify that it cannot be imported after using zhack label repair -c
+# -c should repair the checksums, but not undetach a device.
+# 9. Use zhack label repair -u on device
+# 10. Verify that the detached device can be imported and that data is intact
+
+. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib
+
+run_test_three "$(get_devsize)"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh
new file mode 100755
index 000000000000..0b739402b199
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh
@@ -0,0 +1,30 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+
+#
+# Description:
+#
+# Test whether zhack label repair can recover a device of varied size with
+# corrupted checksums and which has been detached (in one command).
+#
+# Strategy:
+#
+# 1. Create pool on a loopback device with some test data
+# 2. Detach either device from the mirror
+# 3. Export the pool
+# 4. Remove the non-detached device and its backing file
+# 5. Corrupt all label checksums on the remaining device
+# 6. Verify that the remaining detached device cannot be imported
+# 7. Use zhack label repair -cu on device to attempt to fix checksums and
+# undetach the device in a single operation.
+# 8. Verify that the detached device can be imported and that data is intact
+
+. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib
+
+run_test_four "$(get_devsize)"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh
new file mode 100755
index 000000000000..f12cac78540f
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh
@@ -0,0 +1,75 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
+
+#
+# DESCRIPTION:
+# Import with missing log device should not remove spare/cache.
+#
+# STRATEGY:
+# 1. Create a pool.
+# 2. Add spare, cache and log devices to the pool.
+# 3. Export the pool.
+# 4. Remove the log device.
+# 5. Import the pool with -m flag.
+# 6. Verify that spare and cache are still present in the pool.
+#
+
+verify_runnable "global"
+
+log_onexit cleanup
+
+function test_missing_log
+{
+ typeset poolcreate="$1"
+ typeset cachevdev="$2"
+ typeset sparevdev="$3"
+ typeset logvdev="$4"
+ typeset missingvdev="$4"
+
+ log_note "$0: pool '$poolcreate', adding $cachevdev, $sparevdev," \
+ "$logvdev then moving away $missingvdev."
+
+ log_must zpool create $TESTPOOL1 $poolcreate
+
+ log_must zpool add $TESTPOOL1 cache $cachevdev spare $sparevdev \
+ log $logvdev
+
+ log_must_busy zpool export $TESTPOOL1
+
+ log_must mv $missingvdev $BACKUP_DEVICE_DIR
+
+ log_must zpool import -m -d $DEVICE_DIR $TESTPOOL1
+
+ CACHE_PRESENT=$(zpool status -v $TESTPOOL1 | grep $cachevdev)
+
+ SPARE_PRESENT=$(zpool status -v $TESTPOOL1 | grep $sparevdev)
+
+ if [ -z "$CACHE_PRESENT"] || [ -z "SPARE_PRESENT"]
+ then
+ log_fail "cache/spare vdev missing after importing with missing" \
+ "log device"
+ fi
+
+ # Cleanup
+ log_must zpool destroy $TESTPOOL1
+
+ log_note ""
+}
+
+log_must mkdir -p $BACKUP_DEVICE_DIR
+
+test_missing_log "$VDEV0" "$VDEV1" "$VDEV2" "$VDEV3"
+
+log_pass "zpool import succeeded with missing log device"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh
index 04cd1892380d..ec4c67fb42f5 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh
@@ -29,7 +29,7 @@
# Verify correct output with 'zpool status -v' after corrupting a file
#
# STRATEGY:
-# 1. Create a pool, an ancrypted filesystem and a file
+# 1. Create a pool, an encrypted filesystem and a file
# 2. zinject checksum errors
# 3. Unmount the filesystem and unload the key
# 4. Scrub the pool
@@ -76,8 +76,8 @@ log_must zpool sync $TESTPOOL2
log_must zpool scrub $TESTPOOL2
log_must zpool wait -t scrub $TESTPOOL2
log_must zpool status -v $TESTPOOL2
-log_must eval "zpool status -v $TESTPOOL2 | \
- grep \"Permanent errors have been detected\""
+log_mustnot eval "zpool status -v $TESTPOOL2 | \
+ grep \"permission denied\""
log_mustnot eval "zpool status -v $TESTPOOL2 | grep '$file'"
log_must eval "cat /$TESTPOOL2/pwd | zfs load-key $TESTPOOL2/$TESTFS1"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_007_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_007_pos.ksh
index c9849379f779..666ac9bfc9dd 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_007_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_007_pos.ksh
@@ -39,6 +39,9 @@
# 7. Verify we report errors in the pool in 'zpool status -v'
# 8. Promote clone1
# 9. Verify we report errors in the pool in 'zpool status -v'
+# 10. Delete the corrupted file and origin snapshots.
+# 11. Verify we do not report data errors anymore, without requiring
+# a scrub.
. $STF_SUITE/include/libtest.shlib
@@ -95,4 +98,14 @@ log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clonexx/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone2@snap3:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone3/$TESTFILE0'"
+log_must rm /$TESTPOOL2/clone1/$TESTFILE0
+log_must zfs destroy -R $TESTPOOL2/clone1@snap1
+log_must zfs destroy -R $TESTPOOL2/clone1@snap2
+log_must zfs list -r $TESTPOOL2
+log_must zpool status -v $TESTPOOL2
+log_must zpool sync
+log_must zpool status -v $TESTPOOL2
+log_must eval "zpool status -v $TESTPOOL2 | \
+ grep \"No known data errors\""
+
log_pass "Verify reporting errors when deleting corrupted files after scrub"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
index 2846192d08eb..081e6c18430d 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
@@ -54,6 +54,7 @@ fi
function cleanup
{
+ zpool status $TESTPOOL
destroy_pool $TESTPOOL
sed -i '/alias scsidebug/d' $VDEVID_CONF
unload_scsi_debug
@@ -99,8 +100,8 @@ block_device_wait
insert_disk $SD $SD_HOST
# Wait for the new disk to be online and replaced
-log_must wait_vdev_state $TESTPOOL "scsidebug" "ONLINE" $MAXTIMEOUT
-log_must wait_replacing $TESTPOOL
+log_must wait_vdev_state $TESTPOOL "scsidebug" "ONLINE" 60
+log_must wait_replacing $TESTPOOL 60
# Validate auto-replace was successful
log_must check_state $TESTPOOL "" "ONLINE"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh
index 0bbd08acdd3f..a93d0b3cc803 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh
@@ -67,7 +67,7 @@ typeset VDEV_MIN_MB=$((MINVDEVSIZE * 0.30 / 1024 / 1024))
log_must zpool create -f $TESTPOOL $TRIM_VDEV1 cache $TRIM_VDEV2
verify_vdevs "-le" "$VDEV_MIN_MB" $TRIM_VDEV2
-typeset fill_mb=$(( floor(2 * MINVDEVSIZE) ))
+typeset fill_mb=$(( floor(3 * MINVDEVSIZE) ))
export DIRECTORY=/$TESTPOOL
export NUMJOBS=1
export FILE_SIZE=${fill_mb}
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index f489f0b6c69a..ec45185797b4 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -1048,7 +1048,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gd96e29576"
+#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_ge61076683"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -1078,7 +1078,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "FreeBSD_gd96e29576"
+#define ZFS_META_RELEASE "FreeBSD_ge61076683"
/* Define the project version. */
#define ZFS_META_VERSION "2.1.99"
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index e0a986d41c02..29954b8d4719 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.1.99-1919-gd96e29576"
+#define ZFS_META_GITREV "zfs-2.1.99-1944-ge61076683"