aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2023-12-19 22:17:48 +0000
committerMartin Matuska <mm@FreeBSD.org>2023-12-19 22:19:48 +0000
commit188408da9f7c19f476c1afe9becb0d373088da31 (patch)
treef6d6bed507853d0eb4e7049396a329b2e7d669a6
parent45835894bd980c7cd77213a238b6dc7264c76de0 (diff)
parentdbda45160ffa43e5ecf0498a609230f1afee7b3f (diff)
downloadsrc-188408da9f7c19f476c1afe9becb0d373088da31.tar.gz
src-188408da9f7c19f476c1afe9becb0d373088da31.zip
zfs: merge openzfs/zfs@dbda45160
Notable upstream pull request merges: #15665 9b1677fb5 dmu: Allow buffer fills to fail Obtained from: OpenZFS OpenZFS commit: dbda45160ffa43e5ecf0498a609230f1afee7b3f
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/uio.h2
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/uio.h2
-rw-r--r--sys/contrib/openzfs/include/sys/dbuf.h4
-rw-r--r--sys/contrib/openzfs/lib/libspl/include/sys/uio.h2
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c33
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c21
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_recv.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_bookmark.c2
-rw-r--r--sys/contrib/openzfs/tests/runfiles/linux.run3
-rwxr-xr-xsys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am1
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh89
-rw-r--r--sys/modules/zfs/zfs_config.h4
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
15 files changed, 139 insertions, 34 deletions
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/uio.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/uio.h
index b71f2f2e5625..b9d41903ea63 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/uio.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/uio.h
@@ -62,7 +62,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
}
static inline void
-zfs_uio_advance(zfs_uio_t *uio, size_t size)
+zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
{
zfs_uio_resid(uio) -= size;
zfs_uio_offset(uio) += size;
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
index a4b600004c9f..5e6ea8d3c221 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
@@ -95,7 +95,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
}
static inline void
-zfs_uio_advance(zfs_uio_t *uio, size_t size)
+zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
{
uio->uio_resid -= size;
uio->uio_loffset += size;
diff --git a/sys/contrib/openzfs/include/sys/dbuf.h b/sys/contrib/openzfs/include/sys/dbuf.h
index 2ff0bc72b270..3808a04cba80 100644
--- a/sys/contrib/openzfs/include/sys/dbuf.h
+++ b/sys/contrib/openzfs/include/sys/dbuf.h
@@ -380,8 +380,8 @@ dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
-void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
-void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
+void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail);
+boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed);
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/uio.h b/sys/contrib/openzfs/lib/libspl/include/sys/uio.h
index e9e21819d4f8..665bfc42301b 100644
--- a/sys/contrib/openzfs/lib/libspl/include/sys/uio.h
+++ b/sys/contrib/openzfs/lib/libspl/include/sys/uio.h
@@ -90,7 +90,7 @@ zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len)
}
static inline void
-zfs_uio_advance(zfs_uio_t *uio, size_t size)
+zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
{
uio->uio_resid -= size;
uio->uio_loffset += size;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c
index ee6fb2dc657b..48ea37cbad59 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c
@@ -107,7 +107,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
if (tocpy == db->db_size)
- dmu_buf_will_fill(db, tx);
+ dmu_buf_will_fill(db, tx, B_FALSE);
else
dmu_buf_will_dirty(db, tx);
@@ -123,7 +123,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
}
if (tocpy == db->db_size)
- dmu_buf_fill_done(db, tx);
+ dmu_buf_fill_done(db, tx, B_FALSE);
offset += tocpy;
size -= tocpy;
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index 03c97941d6d3..e9d5abca3324 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -2751,7 +2751,7 @@ dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
}
void
-dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
+dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
@@ -2769,8 +2769,14 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
* Block cloning: We will be completely overwriting a block
* cloned in this transaction group, so let's undirty the
* pending clone and mark the block as uncached. This will be
- * as if the clone was never done.
+ * as if the clone was never done. But if the fill can fail
+ * we should have a way to return back to the cloned data.
*/
+ if (canfail && dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) {
+ mutex_exit(&db->db_mtx);
+ dmu_buf_will_dirty(db_fake, tx);
+ return;
+ }
VERIFY(!dbuf_undirty(db, tx));
db->db_state = DB_UNCACHED;
}
@@ -2831,32 +2837,41 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx)
dl->dr_overridden_by.blk_birth = dr->dr_txg;
}
-void
-dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx)
+boolean_t
+dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed)
{
(void) tx;
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
- dbuf_states_t old_state;
mutex_enter(&db->db_mtx);
DBUF_VERIFY(db);
- old_state = db->db_state;
- db->db_state = DB_CACHED;
- if (old_state == DB_FILL) {
+ if (db->db_state == DB_FILL) {
if (db->db_level == 0 && db->db_freed_in_flight) {
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
/* we were freed while filling */
/* XXX dbuf_undirty? */
memset(db->db.db_data, 0, db->db.db_size);
db->db_freed_in_flight = FALSE;
+ db->db_state = DB_CACHED;
DTRACE_SET_STATE(db,
"fill done handling freed in flight");
+ failed = B_FALSE;
+ } else if (failed) {
+ VERIFY(!dbuf_undirty(db, tx));
+ db->db_buf = NULL;
+ dbuf_clear_data(db);
+ DTRACE_SET_STATE(db, "fill failed");
} else {
+ db->db_state = DB_CACHED;
DTRACE_SET_STATE(db, "fill done");
}
cv_broadcast(&db->db_changed);
+ } else {
+ db->db_state = DB_CACHED;
+ failed = B_FALSE;
}
mutex_exit(&db->db_mtx);
+ return (failed);
}
void
@@ -3001,7 +3016,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
DTRACE_SET_STATE(db, "filling assigned arcbuf");
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
- dmu_buf_fill_done(&db->db, tx);
+ dmu_buf_fill_done(&db->db, tx, B_FALSE);
}
void
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index f5a5d0fc437f..d82211e6d4c7 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -1134,14 +1134,14 @@ dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size,
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
if (tocpy == db->db_size)
- dmu_buf_will_fill(db, tx);
+ dmu_buf_will_fill(db, tx, B_FALSE);
else
dmu_buf_will_dirty(db, tx);
(void) memcpy((char *)db->db_data + bufoff, buf, tocpy);
if (tocpy == db->db_size)
- dmu_buf_fill_done(db, tx);
+ dmu_buf_fill_done(db, tx, B_FALSE);
offset += tocpy;
size -= tocpy;
@@ -1349,27 +1349,24 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)
ASSERT(size > 0);
- bufoff = zfs_uio_offset(uio) - db->db_offset;
+ offset_t off = zfs_uio_offset(uio);
+ bufoff = off - db->db_offset;
tocpy = MIN(db->db_size - bufoff, size);
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
if (tocpy == db->db_size)
- dmu_buf_will_fill(db, tx);
+ dmu_buf_will_fill(db, tx, B_TRUE);
else
dmu_buf_will_dirty(db, tx);
- /*
- * XXX zfs_uiomove could block forever (eg.nfs-backed
- * pages). There needs to be a uiolockdown() function
- * to lock the pages in memory, so that zfs_uiomove won't
- * block.
- */
err = zfs_uio_fault_move((char *)db->db_data + bufoff,
tocpy, UIO_WRITE, uio);
- if (tocpy == db->db_size)
- dmu_buf_fill_done(db, tx);
+ if (tocpy == db->db_size && dmu_buf_fill_done(db, tx, err)) {
+ /* The fill was reverted. Undo any uio progress. */
+ zfs_uio_advance(uio, off - zfs_uio_offset(uio));
+ }
if (err)
break;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c
index 05ca91717c2f..54aa60259ea1 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_recv.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c
@@ -2532,7 +2532,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
* size of the provided arc_buf_t.
*/
if (db_spill->db_size != drrs->drr_length) {
- dmu_buf_will_fill(db_spill, tx);
+ dmu_buf_will_fill(db_spill, tx, B_FALSE);
VERIFY0(dbuf_spill_set_blksz(db_spill,
drrs->drr_length, tx));
}
diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
index 03d9420dbdb9..4faefecbadbb 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
@@ -490,7 +490,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
dmu_buf_t *db;
VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
DB_RF_MUST_SUCCEED, FTAG, &db));
- dmu_buf_will_fill(db, tx);
+ dmu_buf_will_fill(db, tx, B_FALSE);
VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
SPA_MINBLOCKSIZE), tx));
local_rl->rl_phys = db->db_data;
diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run
index 17ba23352422..c7c17f271762 100644
--- a/sys/contrib/openzfs/tests/runfiles/linux.run
+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
@@ -44,7 +44,8 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial',
'block_cloning_copyfilerange_cross_dataset',
'block_cloning_cross_enc_dataset',
'block_cloning_copyfilerange_fallback_same_txg',
- 'block_cloning_replay', 'block_cloning_replay_encrypted']
+ 'block_cloning_replay', 'block_cloning_replay_encrypted',
+ 'block_cloning_lwb_buffer_overflow']
tags = ['functional', 'block_cloning']
[tests/functional/chattr:Linux]
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
index 3b5eeacb6bad..708b7be91767 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -305,6 +305,8 @@ elif sys.platform.startswith('linux'):
['SKIP', cfr_reason],
'block_cloning/block_cloning_replay_encrypted':
['SKIP', cfr_reason],
+ 'block_cloning/block_cloning_lwb_buffer_overflow':
+ ['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_cross_dataset':
['SKIP', cfr_cross_reason],
'block_cloning/block_cloning_copyfilerange_fallback_same_txg':
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index 3c9f09382424..f2e28b92f1a2 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -454,6 +454,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/block_cloning/block_cloning_cross_enc_dataset.ksh \
functional/block_cloning/block_cloning_replay.ksh \
functional/block_cloning/block_cloning_replay_encrypted.ksh \
+ functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh \
functional/bootfs/bootfs_001_pos.ksh \
functional/bootfs/bootfs_002_neg.ksh \
functional/bootfs/bootfs_003_pos.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh
new file mode 100755
index 000000000000..0ae76b7e54a5
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh
@@ -0,0 +1,89 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2023 by iXsystems, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
+
+#
+# DESCRIPTION:
+# Test for LWB buffer overflow with multiple VDEVs ZIL when 128KB
+# block write is split into two 68KB ones, trying to write maximum
+# sizes 128KB TX_CLONE_RANGE record with 1022 block pointers into
+# 68KB buffer.
+#
+# STRATEGY:
+# 1. Create a pool with multiple VDEVs ZIL
+# 2. Write maximum sizes TX_CLONE_RANGE record with 1022 block
+# pointers into 68KB buffer
+# 3. Sync TXG
+# 4. Clone the file
+# 5. Synchronize cached writes
+#
+
+verify_runnable "global"
+
+if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
+ log_unsupported "copy_file_range not available before Linux 4.5"
+fi
+
+VDIR=$TEST_BASE_DIR/disk-bclone
+VDEV="$VDIR/a $VDIR/b $VDIR/c"
+LDEV="$VDIR/e $VDIR/f"
+
+function cleanup
+{
+ datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+ rm -rf $VDIR
+}
+
+log_onexit cleanup
+
+log_assert "Test for LWB buffer overflow with multiple VDEVs ZIL"
+
+log_must rm -rf $VDIR
+log_must mkdir -p $VDIR
+log_must truncate -s $MINVDEVSIZE $VDEV $LDEV
+
+log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \
+ log mirror $LDEV
+log_must zfs create -o recordsize=32K $TESTPOOL/$TESTFS
+# Each ZIL log entry can fit 130816 bytes for a block cloning operation,
+# so it can store 1022 block pointers. When LWB optimization is enabled,
+# an assert is hit when 128KB block write is split into two 68KB ones
+# for 2 SLOG devices
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 bs=32K count=1022 \
+ conv=fsync
+sync_pool $TESTPOOL
+log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2
+log_must sync
+
+sync_pool $TESTPOOL
+log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2
+typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 $TESTPOOL/$TESTFS file2)
+log_must [ "$blocks" = "$(seq -s " " 0 1021)" ]
+
+log_pass "LWB buffer overflow is not triggered with multiple VDEVs ZIL"
+
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index b6e93c8eadf7..f6b8ad1fc472 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -1113,7 +1113,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.2.99-268-FreeBSD_g86e115e21"
+#define ZFS_META_ALIAS "zfs-2.2.99-270-FreeBSD_gdbda45160"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -1143,7 +1143,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "268-FreeBSD_g86e115e21"
+#define ZFS_META_RELEASE "270-FreeBDS_gdbda45160"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index f87735ca2dfe..a52a7d957ff4 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.2.99-268-g86e115e21"
+#define ZFS_META_GITREV "zfs-2.2.99-270-gdbda45160"