aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2024-02-08 12:39:04 +0000
committerMartin Matuska <mm@FreeBSD.org>2024-02-08 15:51:08 +0000
commita4e5e0106ac7145f56eb39a691e302cabb4635be (patch)
tree8dac13394184eb6bc756212b00c57c85c1e7f276
parent4594eb454891e6247a6ea786f467a0b960ddd835 (diff)
parent229b9f4ed05e6d14fb4d73fa04a71e99b01bb534 (diff)
downloadsrc-a4e5e0106ac7145f56eb39a691e302cabb4635be.tar.gz
src-a4e5e0106ac7145f56eb39a691e302cabb4635be.zip
zfs: merge openzfs/zfs@229b9f4ed
Notable upstream pull request merges: #15769 082338875 Add 'zpool status -e' flag to see unhealthy vdevs #15804 a0d3fe72b libzdb: Initial breakout of libzdb #15847 229b9f4ed LUA: Backport CVE-2020-24370's patch Obtained from: OpenZFS OpenZFS commit: 229b9f4ed05e6d14fb4d73fa04a71e99b01bb534
-rw-r--r--cddl/lib/Makefile1
-rw-r--r--cddl/lib/libzdb/Makefile25
-rw-r--r--cddl/lib/libzdb/Makefile.depend14
-rw-r--r--cddl/usr.sbin/zdb/Makefile2
-rw-r--r--rescue/rescue/Makefile2
-rw-r--r--share/mk/bsd.libnames.mk1
-rw-r--r--share/mk/src.libnames.mk3
-rw-r--r--sys/contrib/openzfs/cmd/zdb/Makefile.am1
-rw-r--r--sys/contrib/openzfs/cmd/zdb/zdb.c108
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_main.c58
-rw-r--r--sys/contrib/openzfs/config/kernel-blkdev.m434
-rw-r--r--sys/contrib/openzfs/include/Makefile.am1
-rw-r--r--sys/contrib/openzfs/include/libzdb.h68
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h1
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h2
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_vnops.h3
-rw-r--r--sys/contrib/openzfs/lib/Makefile.am5
-rw-r--r--sys/contrib/openzfs/lib/libzdb/Makefile.am7
-rw-r--r--sys/contrib/openzfs/lib/libzdb/libzdb.c102
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.49
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-status.84
-rw-r--r--sys/contrib/openzfs/module/lua/ldebug.c7
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c4
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c72
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c5
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c48
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_vnops.c43
-rw-r--r--sys/contrib/openzfs/tests/runfiles/common.run5
-rwxr-xr-xsys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg1
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_002_pos.ksh4
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh2
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh104
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh161
-rw-r--r--sys/modules/zfs/zfs_config.h4
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
37 files changed, 736 insertions, 181 deletions
diff --git a/cddl/lib/Makefile b/cddl/lib/Makefile
index 68250c8300b1..e6a0763544f7 100644
--- a/cddl/lib/Makefile
+++ b/cddl/lib/Makefile
@@ -20,6 +20,7 @@ SUBDIR.${MK_ZFS}+= \
libtpool \
libumem \
libuutil \
+ libzdb \
libzfs \
libzfs_core \
libzfsbootenv \
diff --git a/cddl/lib/libzdb/Makefile b/cddl/lib/libzdb/Makefile
new file mode 100644
index 000000000000..63248399b91c
--- /dev/null
+++ b/cddl/lib/libzdb/Makefile
@@ -0,0 +1,25 @@
+.PATH: ${SRCTOP}/sys/contrib/openzfs/lib/libzdb
+.PATH: ${SRCTOP}/sys/contrib/openzfs/include
+
+LIB= zdb
+PACKAGE= zfs
+
+INCS = libzdb.h
+
+SRCS = libzdb.c
+
+WARNS?= 2
+CSTD= c99
+
+CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include
+CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include
+CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/os/freebsd
+CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include/os/freebsd/zfs
+CFLAGS+= -I${SRCTOP}/sys
+CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
+CFLAGS+= -include ${SRCTOP}/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
+CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libzutil
+CFLAGS+= -DHAVE_ISSETUGID -DIN_BASE
+CFLAGS+= -include ${SRCTOP}/sys/modules/zfs/zfs_config.h
+
+.include <bsd.lib.mk>
diff --git a/cddl/lib/libzdb/Makefile.depend b/cddl/lib/libzdb/Makefile.depend
new file mode 100644
index 000000000000..93249906da4f
--- /dev/null
+++ b/cddl/lib/libzdb/Makefile.depend
@@ -0,0 +1,14 @@
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ include \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/cddl/usr.sbin/zdb/Makefile b/cddl/usr.sbin/zdb/Makefile
index 744db789772c..e41f4afce82f 100644
--- a/cddl/usr.sbin/zdb/Makefile
+++ b/cddl/usr.sbin/zdb/Makefile
@@ -23,7 +23,7 @@ CFLAGS+= \
-include ${ZFSTOP}/include/os/freebsd/spl/sys/ccompile.h \
-DHAVE_ISSETUGID
-LIBADD= nvpair umem uutil zfs spl avl zutil zpool crypto
+LIBADD= nvpair umem uutil zdb zfs spl avl zutil zpool crypto
CFLAGS.gcc+= -fms-extensions
# Since there are many asserts in this program, it makes no sense to compile
diff --git a/rescue/rescue/Makefile b/rescue/rescue/Makefile
index 7bf3299f4d48..0a8d142ef83a 100644
--- a/rescue/rescue/Makefile
+++ b/rescue/rescue/Makefile
@@ -153,7 +153,7 @@ CRUNCH_LIBS_zfs+= ${LIBBE} \
${LIBNVPAIR}
CRUNCH_LIBS_bectl+= ${CRUNCH_LIBS_zfs}
CRUNCH_LIBS_zpool+= ${CRUNCH_LIBS_zfs}
-CRUNCH_LIBS_zdb+= ${CRUNCH_LIBS_zfs}
+CRUNCH_LIBS_zdb+= ${CRUNCH_LIBS_zfs} ${LIBZDB}
.else
# liblzma needs pthread
CRUNCH_LIBS+= -lpthread
diff --git a/share/mk/bsd.libnames.mk b/share/mk/bsd.libnames.mk
index db08a5ac718c..414ae3164066 100644
--- a/share/mk/bsd.libnames.mk
+++ b/share/mk/bsd.libnames.mk
@@ -167,6 +167,7 @@ LIBXPG4?= ${LIBDESTDIR}${LIBDIR_BASE}/libxpg4.a
LIBY?= ${LIBDESTDIR}${LIBDIR_BASE}/liby.a
LIBYPCLNT?= ${LIBDESTDIR}${LIBDIR_BASE}/libypclnt.a
LIBZ?= ${LIBDESTDIR}${LIBDIR_BASE}/libz.a
+LIBZDB?= ${LIBDESTDIR}${LIBDIR_BASE}/libzdb.a
LIBZFS?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfs.a
LIBZFS_CORE?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfs_core.a
LIBZFSBOOTENV?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfsbootenv.a
diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk
index 5a6932614b29..658dd1c3d699 100644
--- a/share/mk/src.libnames.mk
+++ b/share/mk/src.libnames.mk
@@ -223,6 +223,7 @@ _LIBRARIES= \
y \
ypclnt \
z \
+ zdb \
zfs_core \
zfs \
zfsbootenv \
@@ -683,6 +684,8 @@ LIBNVPAIRDIR= ${_LIB_OBJTOP}/cddl/lib/libnvpair
LIBNVPAIR?= ${LIBNVPAIRDIR}/libnvpair${PIE_SUFFIX}.a
LIBUMEMDIR= ${_LIB_OBJTOP}/cddl/lib/libumem
LIBUUTILDIR= ${_LIB_OBJTOP}/cddl/lib/libuutil
+LIBZDBDIR= ${_LIB_OBJTOP}/cddl/lib/libzdb
+LIBZDB?= ${LIBZDBDIR}/libzdb${PIE_SUFFIX}.a
LIBZFSDIR= ${_LIB_OBJTOP}/cddl/lib/libzfs
LIBZFS?= ${LIBZFSDIR}/libzfs${PIE_SUFFIX}.a
LIBZFS_COREDIR= ${_LIB_OBJTOP}/cddl/lib/libzfs_core
diff --git a/sys/contrib/openzfs/cmd/zdb/Makefile.am b/sys/contrib/openzfs/cmd/zdb/Makefile.am
index c93c9c37cd8d..ebdc19128e1a 100644
--- a/sys/contrib/openzfs/cmd/zdb/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zdb/Makefile.am
@@ -10,6 +10,7 @@ zdb_SOURCES = \
%D%/zdb_il.c
zdb_LDADD = \
+ libzdb.la \
libzpool.la \
libzfs_core.la \
libnvpair.la
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index 2062f4fa1026..afdc5a2c8b54 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -88,36 +88,10 @@
#include <libnvpair.h>
#include <libzutil.h>
-#include "zdb.h"
+#include <libzdb.h>
-#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
- zio_compress_table[(idx)].ci_name : "UNKNOWN")
-#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
- zio_checksum_table[(idx)].ci_name : "UNKNOWN")
-#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \
- (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \
- DMU_OT_ZAP_OTHER : \
- (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
- DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
-
-/* Some platforms require part of inode IDs to be remapped */
-#ifdef __APPLE__
-#define ZDB_MAP_OBJECT_ID(obj) INO_XNUTOZFS(obj, 2)
-#else
-#define ZDB_MAP_OBJECT_ID(obj) (obj)
-#endif
+#include "zdb.h"
-static const char *
-zdb_ot_name(dmu_object_type_t type)
-{
- if (type < DMU_OT_NUMTYPES)
- return (dmu_ot[type].ot_name);
- else if ((type & DMU_OT_NEWTYPE) &&
- ((type & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS))
- return (dmu_ot_byteswap[type & DMU_OT_BYTESWAP_MASK].ob_name);
- else
- return ("UNKNOWN");
-}
extern int reference_tracking_enable;
extern int zfs_recover;
@@ -135,35 +109,12 @@ typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
static uint64_t *zopt_metaslab = NULL;
static unsigned zopt_metaslab_args = 0;
-typedef struct zopt_object_range {
- uint64_t zor_obj_start;
- uint64_t zor_obj_end;
- uint64_t zor_flags;
-} zopt_object_range_t;
static zopt_object_range_t *zopt_object_ranges = NULL;
static unsigned zopt_object_args = 0;
static int flagbits[256];
-#define ZOR_FLAG_PLAIN_FILE 0x0001
-#define ZOR_FLAG_DIRECTORY 0x0002
-#define ZOR_FLAG_SPACE_MAP 0x0004
-#define ZOR_FLAG_ZAP 0x0008
-#define ZOR_FLAG_ALL_TYPES -1
-#define ZOR_SUPPORTED_FLAGS (ZOR_FLAG_PLAIN_FILE | \
- ZOR_FLAG_DIRECTORY | \
- ZOR_FLAG_SPACE_MAP | \
- ZOR_FLAG_ZAP)
-
-#define ZDB_FLAG_CHECKSUM 0x0001
-#define ZDB_FLAG_DECOMPRESS 0x0002
-#define ZDB_FLAG_BSWAP 0x0004
-#define ZDB_FLAG_GBH 0x0008
-#define ZDB_FLAG_INDIRECT 0x0010
-#define ZDB_FLAG_RAW 0x0020
-#define ZDB_FLAG_PRINT_BLKPTR 0x0040
-#define ZDB_FLAG_VERBOSE 0x0080
static uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */
static int leaked_objects = 0;
@@ -176,62 +127,7 @@ static void mos_obj_refd_multiple(uint64_t);
static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free,
dmu_tx_t *tx);
-typedef struct sublivelist_verify {
- /* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
- zfs_btree_t sv_pair;
-
- /* ALLOC's without a matching FREE, accumulates across sub-livelists */
- zfs_btree_t sv_leftover;
-} sublivelist_verify_t;
-
-static int
-livelist_compare(const void *larg, const void *rarg)
-{
- const blkptr_t *l = larg;
- const blkptr_t *r = rarg;
- /* Sort them according to dva[0] */
- uint64_t l_dva0_vdev, r_dva0_vdev;
- l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
- r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
- if (l_dva0_vdev < r_dva0_vdev)
- return (-1);
- else if (l_dva0_vdev > r_dva0_vdev)
- return (+1);
-
- /* if vdevs are equal, sort by offsets. */
- uint64_t l_dva0_offset;
- uint64_t r_dva0_offset;
- l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
- r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
- if (l_dva0_offset < r_dva0_offset) {
- return (-1);
- } else if (l_dva0_offset > r_dva0_offset) {
- return (+1);
- }
-
- /*
- * Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
- * it's possible the offsets are equal. In that case, sort by txg
- */
- if (l->blk_birth < r->blk_birth) {
- return (-1);
- } else if (l->blk_birth > r->blk_birth) {
- return (+1);
- }
- return (0);
-}
-
-typedef struct sublivelist_verify_block {
- dva_t svb_dva;
-
- /*
- * We need this to check if the block marked as allocated
- * in the livelist was freed (and potentially reallocated)
- * in the metaslab spacemaps at a later TXG.
- */
- uint64_t svb_allocated_txg;
-} sublivelist_verify_block_t;
static void zdb_print_blkptr(const blkptr_t *bp, int flags);
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
index 11486f3f185e..8753d7263914 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -2161,6 +2161,7 @@ typedef struct status_cbdata {
boolean_t cb_explain;
boolean_t cb_first;
boolean_t cb_dedup_stats;
+ boolean_t cb_print_unhealthy;
boolean_t cb_print_status;
boolean_t cb_print_slow_ios;
boolean_t cb_print_vdev_init;
@@ -2358,6 +2359,35 @@ health_str_to_color(const char *health)
}
/*
+ * Called for each leaf vdev. Returns 0 if the vdev is healthy.
+ * A vdev is unhealthy if any of the following are true:
+ * 1) there are read, write, or checksum errors,
+ * 2) its state is not ONLINE, or
+ * 3) slow IO reporting was requested (-s) and there are slow IOs.
+ */
+static int
+vdev_health_check_cb(void *hdl_data, nvlist_t *nv, void *data)
+{
+ status_cbdata_t *cb = data;
+ vdev_stat_t *vs;
+ uint_t vsc;
+ (void) hdl_data;
+
+ if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t **)&vs, &vsc) != 0)
+ return (1);
+
+ if (vs->vs_checksum_errors || vs->vs_read_errors ||
+ vs->vs_write_errors || vs->vs_state != VDEV_STATE_HEALTHY)
+ return (1);
+
+ if (cb->cb_print_slow_ios && vs->vs_slow_ios)
+ return (1);
+
+ return (0);
+}
+
+/*
* Print out configuration state as requested by status_callback.
*/
static void
@@ -2375,7 +2405,8 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
const char *state;
const char *type;
const char *path = NULL;
- const char *rcolor = NULL, *wcolor = NULL, *ccolor = NULL;
+ const char *rcolor = NULL, *wcolor = NULL, *ccolor = NULL,
+ *scolor = NULL;
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
@@ -2402,6 +2433,15 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
state = gettext("AVAIL");
}
+ /*
+ * If '-e' is specified then top-level vdevs and their children
+ * can be pruned if all of their leaves are healthy.
+ */
+ if (cb->cb_print_unhealthy && depth > 0 &&
+ for_each_vdev_in_nvlist(nv, vdev_health_check_cb, cb) == 0) {
+ return;
+ }
+
printf_color(health_str_to_color(state),
"\t%*s%-*s %-8s", depth, "", cb->cb_namewidth - depth,
name, state);
@@ -2416,6 +2456,9 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
if (vs->vs_checksum_errors)
ccolor = ANSI_RED;
+ if (vs->vs_slow_ios)
+ scolor = ANSI_BLUE;
+
if (cb->cb_literal) {
fputc(' ', stdout);
printf_color(rcolor, "%5llu",
@@ -2448,9 +2491,10 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
}
if (cb->cb_literal)
- printf(" %5llu", (u_longlong_t)vs->vs_slow_ios);
+ printf_color(scolor, " %5llu",
+ (u_longlong_t)vs->vs_slow_ios);
else
- printf(" %5s", rbuf);
+ printf_color(scolor, " %5s", rbuf);
}
if (cb->cb_print_power) {
if (children == 0) {
@@ -9106,9 +9150,11 @@ status_callback(zpool_handle_t *zhp, void *data)
(void) printf(gettext(
"errors: No known data errors\n"));
} else if (!cbp->cb_verbose) {
+ color_start(ANSI_RED);
(void) printf(gettext("errors: %llu data "
"errors, use '-v' for a list\n"),
(u_longlong_t)nerr);
+ color_end();
} else {
print_error_log(zhp);
}
@@ -9129,6 +9175,7 @@ status_callback(zpool_handle_t *zhp, void *data)
* [pool] [interval [count]]
*
* -c CMD For each vdev, run command CMD
+ * -e Display only unhealthy vdevs
* -i Display vdev initialization status.
* -g Display guid for individual vdev name.
* -L Follow links when resolving vdev path name.
@@ -9160,7 +9207,7 @@ zpool_do_status(int argc, char **argv)
};
/* check options */
- while ((c = getopt_long(argc, argv, "c:igLpPsvxDtT:", long_options,
+ while ((c = getopt_long(argc, argv, "c:eigLpPsvxDtT:", long_options,
NULL)) != -1) {
switch (c) {
case 'c':
@@ -9187,6 +9234,9 @@ zpool_do_status(int argc, char **argv)
}
cmd = optarg;
break;
+ case 'e':
+ cb.cb_print_unhealthy = B_TRUE;
+ break;
case 'i':
cb.cb_print_vdev_init = B_TRUE;
break;
diff --git a/sys/contrib/openzfs/config/kernel-blkdev.m4 b/sys/contrib/openzfs/config/kernel-blkdev.m4
index 8e9e638b125a..c5a353ca9203 100644
--- a/sys/contrib/openzfs/config/kernel-blkdev.m4
+++ b/sys/contrib/openzfs/config/kernel-blkdev.m4
@@ -524,6 +524,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEVNAME], [
dnl #
dnl # 5.19 API: blkdev_issue_secure_erase()
+dnl # 4.7 API: __blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
@@ -539,6 +540,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
sector, nr_sects, GFP_KERNEL);
])
+ ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_flags], [
+ #include <linux/blkdev.h>
+ ],[
+ struct block_device *bdev = NULL;
+ sector_t sector = 0;
+ sector_t nr_sects = 0;
+ unsigned long flags = 0;
+ struct bio *biop = NULL;
+ int error __attribute__ ((unused));
+
+ error = __blkdev_issue_discard(bdev,
+ sector, nr_sects, GFP_KERNEL, flags, &biop);
+ ])
+
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [
#include <linux/blkdev.h>
],[
@@ -562,13 +577,22 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [
],[
AC_MSG_RESULT(no)
- AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
- ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
+ AC_MSG_CHECKING([whether __blkdev_issue_discard() is available])
+ ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_flags], [
AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1,
- [blkdev_issue_discard() is available])
+ AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC, 1,
+ [__blkdev_issue_discard() is available])
],[
- ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()])
+ AC_MSG_RESULT(no)
+
+ AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
+ ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1,
+ [blkdev_issue_discard() is available])
+ ],[
+ ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()])
+ ])
])
])
])
diff --git a/sys/contrib/openzfs/include/Makefile.am b/sys/contrib/openzfs/include/Makefile.am
index 5f38f6ac6ddb..cb28a2d6c96c 100644
--- a/sys/contrib/openzfs/include/Makefile.am
+++ b/sys/contrib/openzfs/include/Makefile.am
@@ -186,6 +186,7 @@ USER_H = \
libuutil.h \
libuutil_common.h \
libuutil_impl.h \
+ libzdb.h \
libzfs.h \
libzfs_core.h \
libzfsbootenv.h \
diff --git a/sys/contrib/openzfs/include/libzdb.h b/sys/contrib/openzfs/include/libzdb.h
new file mode 100644
index 000000000000..ef910d0a2c5a
--- /dev/null
+++ b/sys/contrib/openzfs/include/libzdb.h
@@ -0,0 +1,68 @@
+#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
+ zio_compress_table[(idx)].ci_name : "UNKNOWN")
+#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
+ zio_checksum_table[(idx)].ci_name : "UNKNOWN")
+#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \
+ (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \
+ DMU_OT_ZAP_OTHER : \
+ (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
+ DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
+
+/* Some platforms require part of inode IDs to be remapped */
+#ifdef __APPLE__
+#define ZDB_MAP_OBJECT_ID(obj) INO_XNUTOZFS(obj, 2)
+#else
+#define ZDB_MAP_OBJECT_ID(obj) (obj)
+#endif
+
+#define ZOR_FLAG_PLAIN_FILE 0x0001
+#define ZOR_FLAG_DIRECTORY 0x0002
+#define ZOR_FLAG_SPACE_MAP 0x0004
+#define ZOR_FLAG_ZAP 0x0008
+#define ZOR_FLAG_ALL_TYPES -1
+#define ZOR_SUPPORTED_FLAGS (ZOR_FLAG_PLAIN_FILE | \
+ ZOR_FLAG_DIRECTORY | \
+ ZOR_FLAG_SPACE_MAP | \
+ ZOR_FLAG_ZAP)
+
+#define ZDB_FLAG_CHECKSUM 0x0001
+#define ZDB_FLAG_DECOMPRESS 0x0002
+#define ZDB_FLAG_BSWAP 0x0004
+#define ZDB_FLAG_GBH 0x0008
+#define ZDB_FLAG_INDIRECT 0x0010
+#define ZDB_FLAG_RAW 0x0020
+#define ZDB_FLAG_PRINT_BLKPTR 0x0040
+#define ZDB_FLAG_VERBOSE 0x0080
+
+
+typedef struct zdb_ctx {
+} zdb_ctx_t;
+
+typedef struct zopt_object_range {
+ uint64_t zor_obj_start;
+ uint64_t zor_obj_end;
+ uint64_t zor_flags;
+} zopt_object_range_t;
+
+
+typedef struct sublivelist_verify {
+ /* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
+ zfs_btree_t sv_pair;
+
+ /* ALLOC's without a matching FREE, accumulates across sub-livelists */
+ zfs_btree_t sv_leftover;
+} sublivelist_verify_t;
+
+typedef struct sublivelist_verify_block {
+ dva_t svb_dva;
+
+ /*
+ * We need this to check if the block marked as allocated
+ * in the livelist was freed (and potentially reallocated)
+ * in the metaslab spacemaps at a later TXG.
+ */
+ uint64_t svb_allocated_txg;
+} sublivelist_verify_block_t;
+
+const char *zdb_ot_name(dmu_object_type_t type);
+int livelist_compare(const void *larg, const void *rarg);
diff --git a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
index e7ebcccbe0ce..7f0f24325d59 100644
--- a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
+++ b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
@@ -285,7 +285,6 @@ typedef struct zfid_long {
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
extern int zfs_super_owner;
-extern int zfs_bclone_enabled;
extern void zfs_init(void);
extern void zfs_fini(void);
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
index 220466550258..b4d5db21f5e5 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
@@ -45,8 +45,6 @@ extern "C" {
typedef struct zfsvfs zfsvfs_t;
struct znode;
-extern int zfs_bclone_enabled;
-
/*
* This structure emulates the vfs_t from other platforms. It's purpose
* is to facilitate the handling of mount options and minimize structural
diff --git a/sys/contrib/openzfs/include/sys/zfs_vnops.h b/sys/contrib/openzfs/include/sys/zfs_vnops.h
index 5da103f17783..e60b99bed192 100644
--- a/sys/contrib/openzfs/include/sys/zfs_vnops.h
+++ b/sys/contrib/openzfs/include/sys/zfs_vnops.h
@@ -24,8 +24,11 @@
#ifndef _SYS_FS_ZFS_VNOPS_H
#define _SYS_FS_ZFS_VNOPS_H
+
#include <sys/zfs_vnops_os.h>
+extern int zfs_bclone_enabled;
+
extern int zfs_fsync(znode_t *, int, cred_t *);
extern int zfs_read(znode_t *, zfs_uio_t *, int, cred_t *);
extern int zfs_write(znode_t *, zfs_uio_t *, int, cred_t *);
diff --git a/sys/contrib/openzfs/lib/Makefile.am b/sys/contrib/openzfs/lib/Makefile.am
index 499ebdaeba9b..050a6cac0a37 100644
--- a/sys/contrib/openzfs/lib/Makefile.am
+++ b/sys/contrib/openzfs/lib/Makefile.am
@@ -9,11 +9,11 @@
# These library interfaces are subject to change at any time.
#
#
-# CMDS: zhack/ztest/zdb/ zfs/zpool/zed/
+# CMDS: zhack/ztest/ zfs/zpool/zed/
# raidz_{test,bench} zinject/zstream
# | |
# LIBS: | | libzfsbootenv*
-# | | |
+# |--libzdb--zdb | |
# | | |
# libzpool libzfs* ----------------+
# | | | \ / | | |
@@ -62,6 +62,7 @@ include $(srcdir)/%D%/libspl/Makefile.am
include $(srcdir)/%D%/libtpool/Makefile.am
include $(srcdir)/%D%/libunicode/Makefile.am
include $(srcdir)/%D%/libuutil/Makefile.am
+include $(srcdir)/%D%/libzdb/Makefile.am
include $(srcdir)/%D%/libzfs_core/Makefile.am
include $(srcdir)/%D%/libzfs/Makefile.am
include $(srcdir)/%D%/libzfsbootenv/Makefile.am
diff --git a/sys/contrib/openzfs/lib/libzdb/Makefile.am b/sys/contrib/openzfs/lib/libzdb/Makefile.am
new file mode 100644
index 000000000000..ec4fd92b984e
--- /dev/null
+++ b/sys/contrib/openzfs/lib/libzdb/Makefile.am
@@ -0,0 +1,7 @@
+libzdb_la_CFLAGS = $(AM_CFLAGS) $(LIBRARY_CFLAGS)
+libzdb_la_CFLAGS += -fvisibility=hidden
+
+noinst_LTLIBRARIES += libzdb.la
+
+libzdb_la_SOURCES = \
+ %D%/libzdb.c
diff --git a/sys/contrib/openzfs/lib/libzdb/libzdb.c b/sys/contrib/openzfs/lib/libzdb/libzdb.c
new file mode 100644
index 000000000000..9989fa1eb80f
--- /dev/null
+++ b/sys/contrib/openzfs/lib/libzdb/libzdb.c
@@ -0,0 +1,102 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <getopt.h>
+#include <openssl/evp.h>
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/dmu.h>
+#include <sys/zap.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_sa.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
+#include <sys/vdev.h>
+#include <sys/vdev_impl.h>
+#include <sys/metaslab_impl.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_bookmark.h>
+#include <sys/dbuf.h>
+#include <sys/zil.h>
+#include <sys/zil_impl.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <sys/dmu_send.h>
+#include <sys/dmu_traverse.h>
+#include <sys/zio_checksum.h>
+#include <sys/zio_compress.h>
+#include <sys/zfs_fuid.h>
+#include <sys/arc.h>
+#include <sys/arc_impl.h>
+#include <sys/ddt.h>
+#include <sys/zfeature.h>
+#include <sys/abd.h>
+#include <sys/blkptr.h>
+#include <sys/dsl_crypt.h>
+#include <sys/dsl_scan.h>
+#include <sys/btree.h>
+#include <sys/brt.h>
+#include <sys/brt_impl.h>
+#include <zfs_comutil.h>
+#include <sys/zstd/zstd.h>
+
+#include <libnvpair.h>
+#include <libzutil.h>
+
+#include <libzdb.h>
+
+const char *
+zdb_ot_name(dmu_object_type_t type)
+{
+ if (type < DMU_OT_NUMTYPES)
+ return (dmu_ot[type].ot_name);
+ else if ((type & DMU_OT_NEWTYPE) &&
+ ((type & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS))
+ return (dmu_ot_byteswap[type & DMU_OT_BYTESWAP_MASK].ob_name);
+ else
+ return ("UNKNOWN");
+}
+
+int
+livelist_compare(const void *larg, const void *rarg)
+{
+ const blkptr_t *l = larg;
+ const blkptr_t *r = rarg;
+
+ /* Sort them according to dva[0] */
+ uint64_t l_dva0_vdev, r_dva0_vdev;
+ l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
+ r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
+ if (l_dva0_vdev < r_dva0_vdev)
+ return (-1);
+ else if (l_dva0_vdev > r_dva0_vdev)
+ return (+1);
+
+ /* if vdevs are equal, sort by offsets. */
+ uint64_t l_dva0_offset;
+ uint64_t r_dva0_offset;
+ l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
+ r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
+ if (l_dva0_offset < r_dva0_offset) {
+ return (-1);
+ } else if (l_dva0_offset > r_dva0_offset) {
+ return (+1);
+ }
+
+ /*
+ * Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
+ * it's possible the offsets are equal. In that case, sort by txg
+ */
+ if (l->blk_birth < r->blk_birth) {
+ return (-1);
+ } else if (l->blk_birth > r->blk_birth) {
+ return (+1);
+ }
+ return (0);
+}
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index 47471a805907..30c168253f96 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -1159,6 +1159,15 @@ Enable the experimental block cloning feature.
If this setting is 0, then even if feature@block_cloning is enabled,
attempts to clone blocks will act as though the feature is disabled.
.
+.It Sy zfs_bclone_wait_dirty Ns = Ns Sy 0 Ns | Ns 1 Pq int
+When set to 1 the FICLONE and FICLONERANGE ioctls wait for dirty data to be
+written to disk.
+This allows the clone operation to reliably succeed when a file is
+modified and then immediately cloned.
+For small files this may be slower than making a copy of the file.
+Therefore, this setting defaults to 0 which causes a clone operation to
+immediately fail when encountering a dirty block.
+.
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
Select a BLAKE3 implementation.
.Pp
diff --git a/sys/contrib/openzfs/man/man8/zpool-status.8 b/sys/contrib/openzfs/man/man8/zpool-status.8
index 56fa4aed057b..24ad6e643cae 100644
--- a/sys/contrib/openzfs/man/man8/zpool-status.8
+++ b/sys/contrib/openzfs/man/man8/zpool-status.8
@@ -36,7 +36,7 @@
.Sh SYNOPSIS
.Nm zpool
.Cm status
-.Op Fl DigLpPstvx
+.Op Fl DeigLpPstvx
.Op Fl T Sy u Ns | Ns Sy d
.Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns …
.Oo Ar pool Oc Ns …
@@ -69,6 +69,8 @@ See the
option of
.Nm zpool Cm iostat
for complete details.
+.It Fl e
+Only show unhealthy vdevs (not-ONLINE or with errors).
.It Fl i
Display vdev initialization status.
.It Fl g
diff --git a/sys/contrib/openzfs/module/lua/ldebug.c b/sys/contrib/openzfs/module/lua/ldebug.c
index 0092474c762d..23e321bb1247 100644
--- a/sys/contrib/openzfs/module/lua/ldebug.c
+++ b/sys/contrib/openzfs/module/lua/ldebug.c
@@ -111,10 +111,11 @@ static const char *upvalname (Proto *p, int uv) {
static const char *findvararg (CallInfo *ci, int n, StkId *pos) {
int nparams = clLvalue(ci->func)->p->numparams;
- if (n >= ci->u.l.base - ci->func - nparams)
+ int nvararg = cast_int(ci->u.l.base - ci->func) - nparams;
+ if (n <= -nvararg)
return NULL; /* no such vararg */
else {
- *pos = ci->func + nparams + n;
+ *pos = ci->func + nparams - n;
return "(*vararg)"; /* generic name for any vararg */
}
}
@@ -126,7 +127,7 @@ static const char *findlocal (lua_State *L, CallInfo *ci, int n,
StkId base;
if (isLua(ci)) {
if (n < 0) /* access to vararg values? */
- return findvararg(ci, -n, pos);
+ return findvararg(ci, n, pos);
else {
base = ci->u.l.base;
name = luaF_getlocalname(ci_func(ci)->p, n, currentpc(ci));
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
index f2d5391037c4..a972c720dfdb 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
@@ -89,10 +89,6 @@ int zfs_debug_level;
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
"Debug level");
-int zfs_bclone_enabled = 1;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
- &zfs_bclone_enabled, 0, "Enable block cloning");
-
struct zfs_jailparam {
int mount_snapshot;
};
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
index e7f0aa573848..b0bda5fa2012 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
@@ -862,27 +862,66 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
return (0);
}
+#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \
+ defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC)
+BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
+{
+ zio_t *zio = bio->bi_private;
+#ifdef HAVE_1ARG_BIO_END_IO_T
+ zio->io_error = BIO_END_IO_ERROR(bio);
+#else
+ zio->io_error = -error;
+#endif
+ bio_put(bio);
+ if (zio->io_error)
+ vdev_disk_error(zio);
+ zio_interrupt(zio);
+}
+
static int
-vdev_disk_io_trim(zio_t *zio)
+vdev_issue_discard_trim(zio_t *zio, unsigned long flags)
{
- vdev_t *v = zio->io_vd;
- vdev_disk_t *vd = v->vdev_tsd;
+ int ret;
+ struct bio *bio = NULL;
-#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
- if (zio->io_trim_flags & ZIO_TRIM_SECURE) {
- return (-blkdev_issue_secure_erase(BDH_BDEV(vd->vd_bdh),
- zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
- } else {
- return (-blkdev_issue_discard(BDH_BDEV(vd->vd_bdh),
- zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
+#if defined(BLKDEV_DISCARD_SECURE)
+ ret = - __blkdev_issue_discard(
+ BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
+ zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, flags, &bio);
+#else
+ (void) flags;
+ ret = - __blkdev_issue_discard(
+ BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
+ zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, &bio);
+#endif
+ if (!ret && bio) {
+ bio->bi_private = zio;
+ bio->bi_end_io = vdev_disk_discard_end_io;
+ vdev_submit_bio(bio);
}
-#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
+ return (ret);
+}
+#endif
+
+static int
+vdev_disk_io_trim(zio_t *zio)
+{
unsigned long trim_flags = 0;
-#if defined(BLKDEV_DISCARD_SECURE)
- if (zio->io_trim_flags & ZIO_TRIM_SECURE)
+ if (zio->io_trim_flags & ZIO_TRIM_SECURE) {
+#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
+ return (-blkdev_issue_secure_erase(
+ BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
+ zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
+#elif defined(BLKDEV_DISCARD_SECURE)
trim_flags |= BLKDEV_DISCARD_SECURE;
#endif
- return (-blkdev_issue_discard(BDH_BDEV(vd->vd_bdh),
+ }
+#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \
+ defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC)
+ return (vdev_issue_discard_trim(zio, trim_flags));
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
+ return (-blkdev_issue_discard(
+ BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags));
#else
#error "Unsupported kernel"
@@ -968,7 +1007,12 @@ vdev_disk_io_start(zio_t *zio)
case ZIO_TYPE_TRIM:
zio->io_error = vdev_disk_io_trim(zio);
rw_exit(&vd->vd_lock);
+#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
+ if (zio->io_trim_flags & ZIO_TRIM_SECURE)
+ zio_interrupt(zio);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
zio_interrupt(zio);
+#endif
return;
default:
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index b7b89b8afc56..a32307c39331 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -4255,9 +4255,4 @@ EXPORT_SYMBOL(zfs_map);
/* CSTYLED */
module_param(zfs_delete_blocks, ulong, 0644);
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
-
-/* CSTYLED */
-module_param(zfs_bclone_enabled, uint, 0644);
-MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
-
#endif
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
index 73476ff40ebf..3065d54fa9da 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
@@ -31,8 +31,6 @@
#include <sys/zfs_vnops.h>
#include <sys/zfeature.h>
-int zfs_bclone_enabled = 1;
-
/*
* Clone part of a file via block cloning.
*
@@ -40,7 +38,7 @@ int zfs_bclone_enabled = 1;
* care of that depending on how it was called.
*/
static ssize_t
-__zpl_clone_file_range(struct file *src_file, loff_t src_off,
+zpl_clone_file_range_impl(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, size_t len)
{
struct inode *src_i = file_inode(src_file);
@@ -96,11 +94,12 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
{
ssize_t ret;
+ /* Flags is reserved for future extensions and must be zero. */
if (flags != 0)
return (-EINVAL);
- /* Try to do it via zfs_clone_range() */
- ret = __zpl_clone_file_range(src_file, src_off,
+ /* Try to do it via zfs_clone_range() and allow shortening. */
+ ret = zpl_clone_file_range_impl(src_file, src_off,
dst_file, dst_off, len);
#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
@@ -137,6 +136,11 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
* FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
* range in both files and if they're the same, arrange for them to be backed
* by the same storage.
+ *
+ * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range
+ * if we want. It's designed for filesystems that may need to shorten the
+ * length for alignment, EOF, or any other requirement. ZFS may shorten the
+ * request when there is outstanding dirty data which hasn't been written.
*/
loff_t
zpl_remap_file_range(struct file *src_file, loff_t src_off,
@@ -145,24 +149,21 @@ zpl_remap_file_range(struct file *src_file, loff_t src_off,
if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
return (-EINVAL);
- /*
- * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given
- * range if we want. Its designed for filesystems that make data past
- * EOF available, and don't want it to be visible in both files. ZFS
- * doesn't do that, so we just turn the flag off.
- */
- flags &= ~REMAP_FILE_CAN_SHORTEN;
-
+ /* No support for dedup yet */
if (flags & REMAP_FILE_DEDUP)
- /* No support for dedup yet */
return (-EOPNOTSUPP);
/* Zero length means to clone everything to the end of the file */
if (len == 0)
len = i_size_read(file_inode(src_file)) - src_off;
- return (__zpl_clone_file_range(src_file, src_off,
- dst_file, dst_off, len));
+ ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
+ dst_file, dst_off, len);
+
+ if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len)
+ ret = -EINVAL;
+
+ return (ret);
}
#endif /* HAVE_VFS_REMAP_FILE_RANGE */
@@ -179,8 +180,14 @@ zpl_clone_file_range(struct file *src_file, loff_t src_off,
if (len == 0)
len = i_size_read(file_inode(src_file)) - src_off;
- return (__zpl_clone_file_range(src_file, src_off,
- dst_file, dst_off, len));
+ /* The entire length must be cloned or this is an error. */
+ ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
+ dst_file, dst_off, len);
+
+ if (ret >= 0 && ret != len)
+ ret = -EINVAL;
+
+ return (ret);
}
#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
@@ -214,8 +221,7 @@ zpl_ioctl_ficlone(struct file *dst_file, void *arg)
size_t len = i_size_read(file_inode(src_file));
- ssize_t ret =
- __zpl_clone_file_range(src_file, 0, dst_file, 0, len);
+ ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len);
fput(src_file);
@@ -253,7 +259,7 @@ zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
if (len == 0)
len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
- ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset,
+ ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset,
dst_file, fcr.fcr_dest_offset, len);
fput(src_file);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index c8ff7b6432fd..7f39ad6fc775 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -58,6 +58,26 @@
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
+/*
+ * Enable the experimental block cloning feature. If this setting is 0, then
+ * even if feature@block_cloning is enabled, attempts to clone blocks will act
+ * as though the feature is disabled.
+ */
+int zfs_bclone_enabled = 1;
+
+/*
+ * When set zfs_clone_range() waits for dirty data to be written to disk.
+ * This allows the clone operation to reliably succeed when a file is modified
+ * and then immediately cloned. For small files this may be slower than making
+ * a copy of the file and is therefore not the default. However, in certain
+ * scenarios this behavior may be desirable so a tunable is provided.
+ */
+static int zfs_bclone_wait_dirty = 0;
+
+/*
+ * Maximum bytes to read per chunk in zfs_read().
+ */
+static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024;
int
zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
@@ -182,8 +202,6 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr)
return (error);
}
-static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
-
/*
* Read bytes from specified file into supplied buffer.
*
@@ -1049,6 +1067,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
size_t maxblocks, nbps;
uint_t inblksz;
uint64_t clear_setid_bits_txg = 0;
+ uint64_t last_synced_txg = 0;
inoff = *inoffp;
outoff = *outoffp;
@@ -1287,15 +1306,23 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
}
nbps = maxblocks;
+ last_synced_txg = spa_last_synced_txg(dmu_objset_spa(inos));
error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, bps,
&nbps);
if (error != 0) {
/*
* If we are trying to clone a block that was created
- * in the current transaction group, error will be
- * EAGAIN here, which we can just return to the caller
- * so it can fallback if it likes.
+ * in the current transaction group, the error will be
+ * EAGAIN here. Based on zfs_bclone_wait_dirty either
+ * return a shortened range to the caller so it can
+ * fallback, or wait for the next TXG and check again.
*/
+ if (error == EAGAIN && zfs_bclone_wait_dirty) {
+ txg_wait_synced(dmu_objset_pool(inos),
+ last_synced_txg + 1);
+ continue;
+ }
+
break;
}
@@ -1517,3 +1544,9 @@ EXPORT_SYMBOL(zfs_clone_range_replay);
ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW,
"Bytes to read per chunk");
+
+ZFS_MODULE_PARAM(zfs, zfs_, bclone_enabled, INT, ZMOD_RW,
+ "Enable block cloning");
+
+ZFS_MODULE_PARAM(zfs, zfs_, bclone_wait_dirty, INT, ZMOD_RW,
+ "Wait for dirty blocks when cloning");
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index 7e0990b5d9f9..502b4de2bae9 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -536,7 +536,8 @@ tags = ['functional', 'cli_root', 'zpool_split']
tests = ['zpool_status_001_pos', 'zpool_status_002_pos',
'zpool_status_003_pos', 'zpool_status_004_pos',
'zpool_status_005_pos', 'zpool_status_006_pos',
- 'zpool_status_007_pos', 'zpool_status_features_001_pos']
+ 'zpool_status_007_pos', 'zpool_status_008_pos',
+ 'zpool_status_features_001_pos']
tags = ['functional', 'cli_root', 'zpool_status']
[tests/functional/cli_root/zpool_sync]
@@ -631,7 +632,7 @@ tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos',
tags = ['functional', 'compression']
[tests/functional/cp_files]
-tests = ['cp_files_001_pos', 'cp_stress']
+tests = ['cp_files_001_pos', 'cp_files_002_pos', 'cp_stress']
tags = ['functional', 'cp_files']
[tests/functional/crtime]
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
index ae4aa6275465..edfdd47ee6d7 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -176,6 +176,7 @@ if sys.platform.startswith('freebsd'):
'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
'cli_root/zfs_unshare/zfs_unshare_008_pos': ['SKIP', na_reason],
+ 'cp_files/cp_files_002_pos': ['SKIP', na_reason],
'link_count/link_count_001': ['SKIP', na_reason],
'casenorm/mixed_create_failure': ['FAIL', 13215],
'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
@@ -312,6 +313,7 @@ elif sys.platform.startswith('linux'):
['SKIP', cfr_reason],
'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason],
'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason],
+ 'cp_files/cp_files_002_pos': ['SKIP', cfr_reason],
'fault/auto_online_002_pos': ['FAIL', 11889],
'fault/auto_replace_001_pos': ['FAIL', 14851],
'fault/auto_spare_002_pos': ['FAIL', 11889],
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
index e4e380aa7fd5..a619b846dd11 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
@@ -94,6 +94,7 @@ VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
+BCLONE_WAIT_DIRTY zfs_bclone_wait_dirty zfs_bclone_wait_dirty
XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index 4040e60434a7..01af258d59fe 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -1239,6 +1239,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_status/zpool_status_005_pos.ksh \
functional/cli_root/zpool_status/zpool_status_006_pos.ksh \
functional/cli_root/zpool_status/zpool_status_007_pos.ksh \
+ functional/cli_root/zpool_status/zpool_status_008_pos.ksh \
functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \
functional/cli_root/zpool_sync/cleanup.ksh \
functional/cli_root/zpool_sync/setup.ksh \
@@ -1394,6 +1395,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/compression/setup.ksh \
functional/cp_files/cleanup.ksh \
functional/cp_files/cp_files_001_pos.ksh \
+ functional/cp_files/cp_files_002_pos.ksh \
functional/cp_files/cp_stress.ksh \
functional/cp_files/setup.ksh \
functional/crtime/cleanup.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_002_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_002_pos.ksh
index 3bdd7db649f9..d6f32cdc7ac6 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_002_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_002_pos.ksh
@@ -51,7 +51,7 @@ else
fi
set -A args "" "-x" "-v" "-x $testpool" "-v $testpool" "-xv $testpool" \
- "-vx $testpool"
+ "-vx $testpool" "-e $testpool" "-es $testpool"
log_assert "Executing 'zpool status' with correct options succeeds"
@@ -64,4 +64,6 @@ while [[ $i -lt ${#args[*]} ]]; do
(( i = i + 1 ))
done
+cleanup
+
log_pass "'zpool status' with correct options succeeded"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh
index b501aac5ad6d..52b22dd833f0 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh
@@ -37,6 +37,7 @@
# 3. Read the file
# 4. Take a snapshot and make a clone
# 5. Verify we see "snapshot, clone and filesystem" output in 'zpool status -v'
+# and 'zpool status -ev'
function cleanup
{
@@ -68,6 +69,7 @@ log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"
+log_must eval "zpool status -ev | grep '$TESTPOOL2/10m_file'"
log_mustnot eval "zpool status -v | grep '$TESTFS1'"
log_pass "'zpool status -v' outputs affected filesystem, snapshot & clone"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh
new file mode 100755
index 000000000000..6be2ad5a7410
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh
@@ -0,0 +1,104 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify 'zpool status -e' only shows unhealthy devices.
+#
+# STRATEGY:
+# 1. Create zpool
+# 2. Force DEGRADE, FAULT, or inject slow IOs for vdevs
+# 3. Verify vdevs are reported correctly with -e and -s
+# 4. Verify parents are reported as DEGRADED
+# 5. Verify healthy children are not reported
+#
+
+function cleanup
+{
+ log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
+ zinject -c all
+ poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2
+ log_must rm -f $all_vdevs
+}
+
+log_assert "Verify 'zpool status -e'"
+
+log_onexit cleanup
+
+all_vdevs=$(echo $TESTDIR/vdev{1..6})
+log_must mkdir -p $TESTDIR
+log_must truncate -s $MINVDEVSIZE $all_vdevs
+
+OLD_SLOW_IO=$(get_tunable ZIO_SLOW_IO_MS)
+
+for raid_type in "draid2:3d:6c:1s" "raidz2"; do
+
+ log_must zpool create -f $TESTPOOL2 $raid_type $all_vdevs
+
+ # Check DEGRADED vdevs are shown.
+ log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev4 "ONLINE"
+ log_must zinject -d $TESTDIR/vdev4 -A degrade $TESTPOOL2
+ log_must eval "zpool status -e $TESTPOOL2 | grep $TESTDIR/vdev4 | grep DEGRADED"
+
+ # Check FAULTED vdevs are shown.
+ log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev5 "ONLINE"
+ log_must zinject -d $TESTDIR/vdev5 -A fault $TESTPOOL2
+ log_must eval "zpool status -e $TESTPOOL2 | grep $TESTDIR/vdev5 | grep FAULTED"
+
+ # Check no ONLINE vdevs are shown
+ log_mustnot eval "zpool status -e $TESTPOOL2 | grep ONLINE"
+
+ # Check no ONLINE slow vdevs are show. Then mark IOs greater than
+ # 10ms slow, delay IOs 20ms to vdev6, check slow IOs.
+ log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev6 "ONLINE"
+ log_mustnot eval "zpool status -es $TESTPOOL2 | grep ONLINE"
+
+ log_must set_tunable64 ZIO_SLOW_IO_MS 10
+ log_must zinject -d $TESTDIR/vdev6 -D20:100 $TESTPOOL2
+ log_must mkfile 1048576 /$TESTPOOL2/testfile
+ sync_pool $TESTPOOL2
+ log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
+
+ # Check vdev6 slow IOs are only shown when requested with -s.
+ log_mustnot eval "zpool status -e $TESTPOOL2 | grep $TESTDIR/vdev6 | grep ONLINE"
+ log_must eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev6 | grep ONLINE"
+
+ # Pool level and top-vdev level status must be DEGRADED.
+ log_must eval "zpool status -e $TESTPOOL2 | grep $TESTPOOL2 | grep DEGRADED"
+ log_must eval "zpool status -e $TESTPOOL2 | grep $raid_type | grep DEGRADED"
+
+ # Check that healthy vdevs[1-3] aren't shown with -e.
+ log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev1 "ONLINE"
+ log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev2 "ONLINE"
+ log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev3 "ONLINE"
+ log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev1 | grep ONLINE"
+ log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev2 | grep ONLINE"
+ log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev3 | grep ONLINE"
+
+ log_must zinject -c all
+ log_must zpool status -es $TESTPOOL2
+
+ zpool destroy $TESTPOOL2
+done
+
+log_pass "Verify zpool status -e shows only unhealthy vdevs"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh
new file mode 100755
index 000000000000..60817449ab03
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh
@@ -0,0 +1,161 @@
+#! /bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify all cp --reflink modes work with modified file.
+#
+# STRATEGY:
+# 1. Verify "cp --reflink=never|auto|always" behaves as expected.
+# Two different modes of operation are tested.
+#
+# a. zfs_bclone_wait_dirty=0: FICLONE and FICLONERANGE fail with EINVAL
+# when there are dirty blocks which cannot be immediately cloned.
+# This is the default behavior.
+#
+# b. zfs_bclone_wait_dirty=1: FICLONE and FICLONERANGE wait for
+# dirty blocks to be written to disk allowing the clone to succeed.
+# The downside to this is it may be slow which depending on the
+# situtation may defeat the point of making a clone.
+#
+
+verify_runnable "global"
+verify_block_cloning
+
+if ! is_linux; then
+ log_unsupported "cp --reflink is a GNU coreutils option"
+fi
+
+function cleanup
+{
+ datasetexists $TESTPOOL/cp-reflink && \
+ destroy_dataset $$TESTPOOL/cp-reflink -f
+ log_must set_tunable32 BCLONE_WAIT_DIRTY 0
+}
+
+function verify_copy
+{
+ src_cksum=$(sha256digest $1)
+ dst_cksum=$(sha256digest $2)
+
+ if [[ "$src_cksum" != "$dst_cksum" ]]; then
+ log_must ls -l $CP_TESTDIR
+ log_fail "checksum mismatch ($src_cksum != $dst_cksum)"
+ fi
+}
+
+log_assert "Verify all cp --reflink modes work with modified file"
+
+log_onexit cleanup
+
+SRC_FILE=src.data
+DST_FILE=dst.data
+SRC_SIZE=$(($RANDOM % 2048))
+
+# A smaller recordsize is used merely to speed up the test.
+RECORDSIZE=4096
+
+log_must zfs create -o recordsize=$RECORDSIZE $TESTPOOL/cp-reflink
+CP_TESTDIR=$(get_prop mountpoint $TESTPOOL/cp-reflink)
+
+log_must cd $CP_TESTDIR
+
+# Never wait on dirty blocks (zfs_bclone_wait_dirty=0)
+log_must set_tunable32 BCLONE_WAIT_DIRTY 0
+
+for mode in "never" "auto" "always"; do
+ log_note "Checking 'cp --reflink=$mode'"
+
+ # Create a new file and immediately copy it.
+ log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE count=$SRC_SIZE
+
+ if [[ "$mode" == "always" ]]; then
+ log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
+ log_must ls -l $CP_TESTDIR
+ else
+ log_must cp --reflink=$mode $SRC_FILE $DST_FILE
+ verify_copy $SRC_FILE $DST_FILE
+ fi
+ log_must rm -f $DST_FILE
+
+ # Append to an existing file and immediately copy it.
+ sync_pool $TESTPOOL
+ log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE seek=$SRC_SIZE \
+ count=1 conv=notrunc
+ if [[ "$mode" == "always" ]]; then
+ log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
+ log_must ls -l $CP_TESTDIR
+ else
+ log_must cp --reflink=$mode $SRC_FILE $DST_FILE
+ verify_copy $SRC_FILE $DST_FILE
+ fi
+ log_must rm -f $DST_FILE
+
+ # Overwrite a random range of an existing file and immediately copy it.
+ sync_pool $TESTPOOL
+ log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \
+ seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc
+ if [[ "$mode" == "always" ]]; then
+ log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
+ log_must ls -l $CP_TESTDIR
+ else
+ log_must cp --reflink=$mode $SRC_FILE $DST_FILE
+ verify_copy $SRC_FILE $DST_FILE
+ fi
+ log_must rm -f $SRC_FILE $DST_FILE
+done
+
+# Wait on dirty blocks (zfs_bclone_wait_dirty=1)
+log_must set_tunable32 BCLONE_WAIT_DIRTY 1
+
+for mode in "never" "auto" "always"; do
+ log_note "Checking 'cp --reflink=$mode'"
+
+ # Create a new file and immediately copy it.
+ log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE count=$SRC_SIZE
+ log_must cp --reflink=$mode $SRC_FILE $DST_FILE
+ verify_copy $SRC_FILE $DST_FILE
+ log_must rm -f $DST_FILE
+
+ # Append to an existing file and immediately copy it.
+ log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE seek=$SRC_SIZE \
+ count=1 conv=notrunc
+ log_must cp --reflink=$mode $SRC_FILE $DST_FILE
+ verify_copy $SRC_FILE $DST_FILE
+ log_must rm -f $DST_FILE
+
+ # Overwrite a random range of an existing file and immediately copy it.
+ log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \
+ seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc
+ log_must cp --reflink=$mode $SRC_FILE $DST_FILE
+ verify_copy $SRC_FILE $DST_FILE
+ log_must rm -f $SRC_FILE $DST_FILE
+done
+
+log_pass
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index 0c7cff4c796e..f452cffa20c8 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -1152,7 +1152,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.2.99-333-FreeBSD_g2e6b3c4d9"
+#define ZFS_META_ALIAS "zfs-2.2.99-338-FreeBSD_g229b9f4ed"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -1182,7 +1182,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "333-FreeBSD_g2e6b3c4d9"
+#define ZFS_META_RELEASE "338-FreeBSD_g229b9f4ed"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 5e86c5ebf6d0..04ced657e728 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.2.99-333-g2e6b3c4d9"
+#define ZFS_META_GITREV "zfs-2.2.99-338-g229b9f4ed"