aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2021-02-16 00:39:34 +0000
committerMartin Matuska <mm@FreeBSD.org>2021-02-16 01:46:28 +0000
commit184c1b943937986c81e1996d999d21626ec7a4ff (patch)
treef7321df93d0bd5ffb8cf9245c84745dac7e81ce1 /sys/contrib/openzfs/module
parent10fc4c3218381fef7189a5b8d46a757cd1989dff (diff)
parent83dd4a9252fd2044038a399d7afc68259d483b8e (diff)
downloadsrc-184c1b943937986c81e1996d999d21626ec7a4ff.tar.gz
src-184c1b943937986c81e1996d999d21626ec7a4ff.zip
zfs: merge OpenZFS master-436ab35a5
- speed up writing to ZFS pools without ZIL devices (aa755b3) - speed up importing ZFS pools (2d8f72d, a0e0199, cf0977a) ... MFC after: 2 weeks Reviewed by: mjg (partial) Tested by: pho Differential Revision: https://reviews.freebsd.org/D28677
Diffstat (limited to 'sys/contrib/openzfs/module')
-rw-r--r--sys/contrib/openzfs/module/Makefile.in22
-rw-r--r--sys/contrib/openzfs/module/avl/avl.c3
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/modes.c12
-rw-r--r--sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c32
-rw-r--r--sys/contrib/openzfs/module/icp/io/sha1_mod.c48
-rw-r--r--sys/contrib/openzfs/module/icp/io/sha2_mod.c48
-rw-r--r--sys/contrib/openzfs/module/icp/io/skein_mod.c40
-rw-r--r--sys/contrib/openzfs/module/lua/ldebug.c1
-rw-r--r--sys/contrib/openzfs/module/lua/ldo.c1
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c38
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c2
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c31
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c26
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c40
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c2
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c89
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c6
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c77
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c44
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-generic.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c3
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/abd_os.c49
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c49
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c (renamed from sys/contrib/openzfs/module/zcommon/zfs_uio.c)95
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c90
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c18
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c30
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c18
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c4
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c6
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/abd.c293
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c42
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c54
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_objset.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_tx.c16
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dataset.c13
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_destroy.c15
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c100
-rw-r--r--sys/contrib/openzfs/module/zfs/sa.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c51
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_history.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c67
-rw-r--r--sys/contrib/openzfs/module/zfs/txg.c24
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c225
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_draid.c34
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_indirect.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_label.c38
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_queue.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_raidz.c49
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_removal.c13
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_sa.c11
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_vnops.c69
-rw-r--r--sys/contrib/openzfs/module/zfs/zil.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c75
59 files changed, 1276 insertions, 878 deletions
diff --git a/sys/contrib/openzfs/module/Makefile.in b/sys/contrib/openzfs/module/Makefile.in
index 0ee2c447221a..69caf48570e9 100644
--- a/sys/contrib/openzfs/module/Makefile.in
+++ b/sys/contrib/openzfs/module/Makefile.in
@@ -14,7 +14,8 @@ check:
modules modules-Linux modules-FreeBSD modules-unknown \
clean clean-Linux clean-FreeBSD \
modules_install modules_install-Linux modules_install-FreeBSD \
- modules_uninstall modules_uninstall-Linux modules_uninstall-FreeBSD
+ modules_uninstall modules_uninstall-Linux modules_uninstall-FreeBSD \
+ cppcheck cppcheck-Linux cppcheck-FreeBSD
# Filter out options that FreeBSD make doesn't understand
getflags = ( \
@@ -106,6 +107,25 @@ modules_uninstall-FreeBSD:
modules_uninstall: modules_uninstall-@ac_system@
+cppcheck-Linux:
+ @CPPCHECK@ -j@CPU_COUNT@ --std=c99 --quiet --force --error-exitcode=2 \
+ --inline-suppr --suppress=noValidConfiguration \
+ --enable=warning,information -D_KERNEL \
+ --include=@LINUX_OBJ@/include/generated/autoconf.h \
+ --include=@top_srcdir@/zfs_config.h \
+ --config-exclude=@LINUX_OBJ@/include \
+ -I @LINUX_OBJ@/include \
+ -I @top_srcdir@/include/os/linux/kernel \
+ -I @top_srcdir@/include/os/linux/spl \
+ -I @top_srcdir@/include/os/linux/zfs \
+ -I @top_srcdir@/include \
+ avl icp lua nvpair spl unicode zcommon zfs zstd os/linux
+
+cppcheck-FreeBSD:
+ @true
+
+cppcheck: cppcheck-@ac_system@
+
distdir:
(cd @srcdir@ && find $(ZFS_MODULES) os -name '*.[chS]') | \
while read path; do \
diff --git a/sys/contrib/openzfs/module/avl/avl.c b/sys/contrib/openzfs/module/avl/avl.c
index 48865365d8e3..d0473d883b3d 100644
--- a/sys/contrib/openzfs/module/avl/avl.c
+++ b/sys/contrib/openzfs/module/avl/avl.c
@@ -492,7 +492,6 @@ avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
int which_child = AVL_INDEX2CHILD(where);
size_t off = tree->avl_offset;
- ASSERT(tree);
#ifdef _LP64
ASSERT(((uintptr_t)new_data & 0x7) == 0);
#endif
@@ -680,8 +679,6 @@ avl_remove(avl_tree_t *tree, void *data)
int which_child;
size_t off = tree->avl_offset;
- ASSERT(tree);
-
delete = AVL_DATA2NODE(data, off);
/*
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/modes.c b/sys/contrib/openzfs/module/icp/algs/modes/modes.c
index faae9722bd04..59743c7d6829 100644
--- a/sys/contrib/openzfs/module/icp/algs/modes/modes.c
+++ b/sys/contrib/openzfs/module/icp/algs/modes/modes.c
@@ -43,11 +43,11 @@ crypto_init_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset)
break;
case CRYPTO_DATA_UIO: {
- uio_t *uiop = out->cd_uio;
+ zfs_uio_t *uiop = out->cd_uio;
uint_t vec_idx;
offset = out->cd_offset;
- offset = uio_index_at_offset(uiop, offset, &vec_idx);
+ offset = zfs_uio_index_at_offset(uiop, offset, &vec_idx);
*current_offset = offset;
*iov_or_mp = (void *)(uintptr_t)vec_idx;
@@ -85,7 +85,7 @@ crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset,
}
case CRYPTO_DATA_UIO: {
- uio_t *uio = out->cd_uio;
+ zfs_uio_t *uio = out->cd_uio;
offset_t offset;
uint_t vec_idx;
uint8_t *p;
@@ -94,7 +94,7 @@ crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset,
offset = *current_offset;
vec_idx = (uintptr_t)(*iov_or_mp);
- uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len);
+ zfs_uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len);
p = (uint8_t *)iov_base + offset;
*out_data_1 = p;
@@ -106,10 +106,10 @@ crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset,
} else {
/* one block spans two iovecs */
*out_data_1_len = iov_len - offset;
- if (vec_idx == uio_iovcnt(uio))
+ if (vec_idx == zfs_uio_iovcnt(uio))
return;
vec_idx++;
- uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len);
+ zfs_uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len);
*out_data_2 = (uint8_t *)iov_base;
*current_offset = amt - *out_data_1_len;
}
diff --git a/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c b/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c
index 905ef6657336..1b115d976232 100644
--- a/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c
+++ b/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c
@@ -40,7 +40,7 @@ int
crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
void *digest_ctx, void (*update)(void))
{
- uio_t *uiop = data->cd_uio;
+ zfs_uio_t *uiop = data->cd_uio;
off_t offset = data->cd_offset;
size_t length = len;
uint_t vec_idx;
@@ -48,7 +48,7 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
uchar_t *datap;
ASSERT(data->cd_format == CRYPTO_DATA_UIO);
- if (uio_segflg(uiop) != UIO_SYSSPACE) {
+ if (zfs_uio_segflg(uiop) != UIO_SYSSPACE) {
return (CRYPTO_ARGUMENTS_BAD);
}
@@ -56,9 +56,9 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
* Jump to the first iovec containing data to be
* processed.
*/
- offset = uio_index_at_offset(uiop, offset, &vec_idx);
+ offset = zfs_uio_index_at_offset(uiop, offset, &vec_idx);
- if (vec_idx == uio_iovcnt(uiop) && length > 0) {
+ if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
/*
* The caller specified an offset that is larger than
* the total size of the buffers it provided.
@@ -66,11 +66,11 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
return (CRYPTO_DATA_LEN_RANGE);
}
- while (vec_idx < uio_iovcnt(uiop) && length > 0) {
- cur_len = MIN(uio_iovlen(uiop, vec_idx) -
+ while (vec_idx < zfs_uio_iovcnt(uiop) && length > 0) {
+ cur_len = MIN(zfs_uio_iovlen(uiop, vec_idx) -
offset, length);
- datap = (uchar_t *)(uio_iovbase(uiop, vec_idx) + offset);
+ datap = (uchar_t *)(zfs_uio_iovbase(uiop, vec_idx) + offset);
switch (cmd) {
case COPY_FROM_DATA:
bcopy(datap, buf, cur_len);
@@ -97,7 +97,7 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
offset = 0;
}
- if (vec_idx == uio_iovcnt(uiop) && length > 0) {
+ if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
/*
* The end of the specified iovec's was reached but
* the length requested could not be processed.
@@ -166,7 +166,7 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
void (*copy_block)(uint8_t *, uint64_t *))
{
common_ctx_t *common_ctx = ctx;
- uio_t *uiop = input->cd_uio;
+ zfs_uio_t *uiop = input->cd_uio;
off_t offset = input->cd_offset;
size_t length = input->cd_length;
uint_t vec_idx;
@@ -178,7 +178,7 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
&common_ctx->cc_iv[0]);
}
- if (uio_segflg(input->cd_uio) != UIO_SYSSPACE) {
+ if (zfs_uio_segflg(input->cd_uio) != UIO_SYSSPACE) {
return (CRYPTO_ARGUMENTS_BAD);
}
@@ -186,8 +186,8 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
* Jump to the first iovec containing data to be
* processed.
*/
- offset = uio_index_at_offset(uiop, offset, &vec_idx);
- if (vec_idx == uio_iovcnt(uiop) && length > 0) {
+ offset = zfs_uio_index_at_offset(uiop, offset, &vec_idx);
+ if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
/*
* The caller specified an offset that is larger than the
* total size of the buffers it provided.
@@ -198,11 +198,11 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
/*
* Now process the iovecs.
*/
- while (vec_idx < uio_iovcnt(uiop) && length > 0) {
- cur_len = MIN(uio_iovlen(uiop, vec_idx) -
+ while (vec_idx < zfs_uio_iovcnt(uiop) && length > 0) {
+ cur_len = MIN(zfs_uio_iovlen(uiop, vec_idx) -
offset, length);
- int rv = (cipher)(ctx, uio_iovbase(uiop, vec_idx) + offset,
+ int rv = (cipher)(ctx, zfs_uio_iovbase(uiop, vec_idx) + offset,
cur_len, output);
if (rv != CRYPTO_SUCCESS) {
@@ -213,7 +213,7 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
offset = 0;
}
- if (vec_idx == uio_iovcnt(uiop) && length > 0) {
+ if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
/*
* The end of the specified iovec's was reached but
* the length requested could not be processed, i.e.
diff --git a/sys/contrib/openzfs/module/icp/io/sha1_mod.c b/sys/contrib/openzfs/module/icp/io/sha1_mod.c
index ffae143cded0..6dcee6b2ecf2 100644
--- a/sys/contrib/openzfs/module/icp/io/sha1_mod.c
+++ b/sys/contrib/openzfs/module/icp/io/sha1_mod.c
@@ -271,15 +271,15 @@ sha1_digest_update_uio(SHA1_CTX *sha1_ctx, crypto_data_t *data)
size_t cur_len;
/* we support only kernel buffer */
- if (uio_segflg(data->cd_uio) != UIO_SYSSPACE)
+ if (zfs_uio_segflg(data->cd_uio) != UIO_SYSSPACE)
return (CRYPTO_ARGUMENTS_BAD);
/*
* Jump to the first iovec containing data to be
* digested.
*/
- offset = uio_index_at_offset(data->cd_uio, offset, &vec_idx);
- if (vec_idx == uio_iovcnt(data->cd_uio)) {
+ offset = zfs_uio_index_at_offset(data->cd_uio, offset, &vec_idx);
+ if (vec_idx == zfs_uio_iovcnt(data->cd_uio)) {
/*
* The caller specified an offset that is larger than the
* total size of the buffers it provided.
@@ -290,12 +290,12 @@ sha1_digest_update_uio(SHA1_CTX *sha1_ctx, crypto_data_t *data)
/*
* Now do the digesting on the iovecs.
*/
- while (vec_idx < uio_iovcnt(data->cd_uio) && length > 0) {
- cur_len = MIN(uio_iovlen(data->cd_uio, vec_idx) -
+ while (vec_idx < zfs_uio_iovcnt(data->cd_uio) && length > 0) {
+ cur_len = MIN(zfs_uio_iovlen(data->cd_uio, vec_idx) -
offset, length);
SHA1Update(sha1_ctx,
- (uint8_t *)uio_iovbase(data->cd_uio, vec_idx) + offset,
+ (uint8_t *)zfs_uio_iovbase(data->cd_uio, vec_idx) + offset,
cur_len);
length -= cur_len;
@@ -303,7 +303,7 @@ sha1_digest_update_uio(SHA1_CTX *sha1_ctx, crypto_data_t *data)
offset = 0;
}
- if (vec_idx == uio_iovcnt(data->cd_uio) && length > 0) {
+ if (vec_idx == zfs_uio_iovcnt(data->cd_uio) && length > 0) {
/*
* The end of the specified iovec's was reached but
* the length requested could not be processed, i.e.
@@ -330,15 +330,15 @@ sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest,
uint_t vec_idx = 0;
/* we support only kernel buffer */
- if (uio_segflg(digest->cd_uio) != UIO_SYSSPACE)
+ if (zfs_uio_segflg(digest->cd_uio) != UIO_SYSSPACE)
return (CRYPTO_ARGUMENTS_BAD);
/*
* Jump to the first iovec containing ptr to the digest to
* be returned.
*/
- offset = uio_index_at_offset(digest->cd_uio, offset, &vec_idx);
- if (vec_idx == uio_iovcnt(digest->cd_uio)) {
+ offset = zfs_uio_index_at_offset(digest->cd_uio, offset, &vec_idx);
+ if (vec_idx == zfs_uio_iovcnt(digest->cd_uio)) {
/*
* The caller specified an offset that is
* larger than the total size of the buffers
@@ -348,7 +348,7 @@ sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest,
}
if (offset + digest_len <=
- uio_iovlen(digest->cd_uio, vec_idx)) {
+ zfs_uio_iovlen(digest->cd_uio, vec_idx)) {
/*
* The computed SHA1 digest will fit in the current
* iovec.
@@ -360,11 +360,11 @@ sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest,
* the user only what was requested.
*/
SHA1Final(digest_scratch, sha1_ctx);
- bcopy(digest_scratch, (uchar_t *)uio_iovbase(digest->
- cd_uio, vec_idx) + offset,
+ bcopy(digest_scratch, (uchar_t *)
+ zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
digest_len);
} else {
- SHA1Final((uchar_t *)uio_iovbase(digest->
+ SHA1Final((uchar_t *)zfs_uio_iovbase(digest->
cd_uio, vec_idx) + offset,
sha1_ctx);
}
@@ -382,11 +382,11 @@ sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest,
SHA1Final(digest_tmp, sha1_ctx);
- while (vec_idx < uio_iovcnt(digest->cd_uio) && length > 0) {
- cur_len = MIN(uio_iovlen(digest->cd_uio, vec_idx) -
+ while (vec_idx < zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
+ cur_len = MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) -
offset, length);
bcopy(digest_tmp + scratch_offset,
- uio_iovbase(digest->cd_uio, vec_idx) + offset,
+ zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
cur_len);
length -= cur_len;
@@ -395,7 +395,7 @@ sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest,
offset = 0;
}
- if (vec_idx == uio_iovcnt(digest->cd_uio) && length > 0) {
+ if (vec_idx == zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
/*
* The end of the specified iovec's was reached but
* the length requested could not be processed, i.e.
@@ -1096,12 +1096,12 @@ sha1_mac_verify_atomic(crypto_provider_handle_t provider,
size_t cur_len;
/* we support only kernel buffer */
- if (uio_segflg(mac->cd_uio) != UIO_SYSSPACE)
+ if (zfs_uio_segflg(mac->cd_uio) != UIO_SYSSPACE)
return (CRYPTO_ARGUMENTS_BAD);
/* jump to the first iovec containing the expected digest */
- offset = uio_index_at_offset(mac->cd_uio, offset, &vec_idx);
- if (vec_idx == uio_iovcnt(mac->cd_uio)) {
+ offset = zfs_uio_index_at_offset(mac->cd_uio, offset, &vec_idx);
+ if (vec_idx == zfs_uio_iovcnt(mac->cd_uio)) {
/*
* The caller specified an offset that is
* larger than the total size of the buffers
@@ -1112,12 +1112,12 @@ sha1_mac_verify_atomic(crypto_provider_handle_t provider,
}
/* do the comparison of computed digest vs specified one */
- while (vec_idx < uio_iovcnt(mac->cd_uio) && length > 0) {
- cur_len = MIN(uio_iovlen(mac->cd_uio, vec_idx) -
+ while (vec_idx < zfs_uio_iovcnt(mac->cd_uio) && length > 0) {
+ cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) -
offset, length);
if (bcmp(digest + scratch_offset,
- uio_iovbase(mac->cd_uio, vec_idx) + offset,
+ zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset,
cur_len) != 0) {
ret = CRYPTO_INVALID_MAC;
break;
diff --git a/sys/contrib/openzfs/module/icp/io/sha2_mod.c b/sys/contrib/openzfs/module/icp/io/sha2_mod.c
index a4a5c6041dd0..d690cd0bcb05 100644
--- a/sys/contrib/openzfs/module/icp/io/sha2_mod.c
+++ b/sys/contrib/openzfs/module/icp/io/sha2_mod.c
@@ -296,15 +296,15 @@ sha2_digest_update_uio(SHA2_CTX *sha2_ctx, crypto_data_t *data)
size_t cur_len;
/* we support only kernel buffer */
- if (uio_segflg(data->cd_uio) != UIO_SYSSPACE)
+ if (zfs_uio_segflg(data->cd_uio) != UIO_SYSSPACE)
return (CRYPTO_ARGUMENTS_BAD);
/*
* Jump to the first iovec containing data to be
* digested.
*/
- offset = uio_index_at_offset(data->cd_uio, offset, &vec_idx);
- if (vec_idx == uio_iovcnt(data->cd_uio)) {
+ offset = zfs_uio_index_at_offset(data->cd_uio, offset, &vec_idx);
+ if (vec_idx == zfs_uio_iovcnt(data->cd_uio)) {
/*
* The caller specified an offset that is larger than the
* total size of the buffers it provided.
@@ -315,18 +315,18 @@ sha2_digest_update_uio(SHA2_CTX *sha2_ctx, crypto_data_t *data)
/*
* Now do the digesting on the iovecs.
*/
- while (vec_idx < uio_iovcnt(data->cd_uio) && length > 0) {
- cur_len = MIN(uio_iovlen(data->cd_uio, vec_idx) -
+ while (vec_idx < zfs_uio_iovcnt(data->cd_uio) && length > 0) {
+ cur_len = MIN(zfs_uio_iovlen(data->cd_uio, vec_idx) -
offset, length);
- SHA2Update(sha2_ctx, (uint8_t *)uio_iovbase(data->cd_uio,
+ SHA2Update(sha2_ctx, (uint8_t *)zfs_uio_iovbase(data->cd_uio,
vec_idx) + offset, cur_len);
length -= cur_len;
vec_idx++;
offset = 0;
}
- if (vec_idx == uio_iovcnt(data->cd_uio) && length > 0) {
+ if (vec_idx == zfs_uio_iovcnt(data->cd_uio) && length > 0) {
/*
* The end of the specified iovec's was reached but
* the length requested could not be processed, i.e.
@@ -353,15 +353,15 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest,
uint_t vec_idx = 0;
/* we support only kernel buffer */
- if (uio_segflg(digest->cd_uio) != UIO_SYSSPACE)
+ if (zfs_uio_segflg(digest->cd_uio) != UIO_SYSSPACE)
return (CRYPTO_ARGUMENTS_BAD);
/*
* Jump to the first iovec containing ptr to the digest to
* be returned.
*/
- offset = uio_index_at_offset(digest->cd_uio, offset, &vec_idx);
- if (vec_idx == uio_iovcnt(digest->cd_uio)) {
+ offset = zfs_uio_index_at_offset(digest->cd_uio, offset, &vec_idx);
+ if (vec_idx == zfs_uio_iovcnt(digest->cd_uio)) {
/*
* The caller specified an offset that is
* larger than the total size of the buffers
@@ -371,7 +371,7 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest,
}
if (offset + digest_len <=
- uio_iovlen(digest->cd_uio, vec_idx)) {
+ zfs_uio_iovlen(digest->cd_uio, vec_idx)) {
/*
* The computed SHA2 digest will fit in the current
* iovec.
@@ -387,11 +387,11 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest,
*/
SHA2Final(digest_scratch, sha2_ctx);
- bcopy(digest_scratch, (uchar_t *)uio_iovbase(digest->
- cd_uio, vec_idx) + offset,
+ bcopy(digest_scratch, (uchar_t *)
+ zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
digest_len);
} else {
- SHA2Final((uchar_t *)uio_iovbase(digest->
+ SHA2Final((uchar_t *)zfs_uio_iovbase(digest->
cd_uio, vec_idx) + offset,
sha2_ctx);
@@ -410,12 +410,12 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest,
SHA2Final(digest_tmp, sha2_ctx);
- while (vec_idx < uio_iovcnt(digest->cd_uio) && length > 0) {
+ while (vec_idx < zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
cur_len =
- MIN(uio_iovlen(digest->cd_uio, vec_idx) -
+ MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) -
offset, length);
bcopy(digest_tmp + scratch_offset,
- uio_iovbase(digest->cd_uio, vec_idx) + offset,
+ zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
cur_len);
length -= cur_len;
@@ -424,7 +424,7 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest,
offset = 0;
}
- if (vec_idx == uio_iovcnt(digest->cd_uio) && length > 0) {
+ if (vec_idx == zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
/*
* The end of the specified iovec's was reached but
* the length requested could not be processed, i.e.
@@ -1251,12 +1251,12 @@ sha2_mac_verify_atomic(crypto_provider_handle_t provider,
size_t cur_len;
/* we support only kernel buffer */
- if (uio_segflg(mac->cd_uio) != UIO_SYSSPACE)
+ if (zfs_uio_segflg(mac->cd_uio) != UIO_SYSSPACE)
return (CRYPTO_ARGUMENTS_BAD);
/* jump to the first iovec containing the expected digest */
- offset = uio_index_at_offset(mac->cd_uio, offset, &vec_idx);
- if (vec_idx == uio_iovcnt(mac->cd_uio)) {
+ offset = zfs_uio_index_at_offset(mac->cd_uio, offset, &vec_idx);
+ if (vec_idx == zfs_uio_iovcnt(mac->cd_uio)) {
/*
* The caller specified an offset that is
* larger than the total size of the buffers
@@ -1267,12 +1267,12 @@ sha2_mac_verify_atomic(crypto_provider_handle_t provider,
}
/* do the comparison of computed digest vs specified one */
- while (vec_idx < uio_iovcnt(mac->cd_uio) && length > 0) {
- cur_len = MIN(uio_iovlen(mac->cd_uio, vec_idx) -
+ while (vec_idx < zfs_uio_iovcnt(mac->cd_uio) && length > 0) {
+ cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) -
offset, length);
if (bcmp(digest + scratch_offset,
- uio_iovbase(mac->cd_uio, vec_idx) + offset,
+ zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset,
cur_len) != 0) {
ret = CRYPTO_INVALID_MAC;
break;
diff --git a/sys/contrib/openzfs/module/icp/io/skein_mod.c b/sys/contrib/openzfs/module/icp/io/skein_mod.c
index 18026807fd84..5ee36af12bcb 100644
--- a/sys/contrib/openzfs/module/icp/io/skein_mod.c
+++ b/sys/contrib/openzfs/module/icp/io/skein_mod.c
@@ -272,18 +272,18 @@ skein_digest_update_uio(skein_ctx_t *ctx, const crypto_data_t *data)
size_t length = data->cd_length;
uint_t vec_idx = 0;
size_t cur_len;
- uio_t *uio = data->cd_uio;
+ zfs_uio_t *uio = data->cd_uio;
/* we support only kernel buffer */
- if (uio_segflg(uio) != UIO_SYSSPACE)
+ if (zfs_uio_segflg(uio) != UIO_SYSSPACE)
return (CRYPTO_ARGUMENTS_BAD);
/*
* Jump to the first iovec containing data to be
* digested.
*/
- offset = uio_index_at_offset(uio, offset, &vec_idx);
- if (vec_idx == uio_iovcnt(uio)) {
+ offset = zfs_uio_index_at_offset(uio, offset, &vec_idx);
+ if (vec_idx == zfs_uio_iovcnt(uio)) {
/*
* The caller specified an offset that is larger than the
* total size of the buffers it provided.
@@ -294,16 +294,16 @@ skein_digest_update_uio(skein_ctx_t *ctx, const crypto_data_t *data)
/*
* Now do the digesting on the iovecs.
*/
- while (vec_idx < uio_iovcnt(uio) && length > 0) {
- cur_len = MIN(uio_iovlen(uio, vec_idx) - offset, length);
- SKEIN_OP(ctx, Update, (uint8_t *)uio_iovbase(uio, vec_idx)
+ while (vec_idx < zfs_uio_iovcnt(uio) && length > 0) {
+ cur_len = MIN(zfs_uio_iovlen(uio, vec_idx) - offset, length);
+ SKEIN_OP(ctx, Update, (uint8_t *)zfs_uio_iovbase(uio, vec_idx)
+ offset, cur_len);
length -= cur_len;
vec_idx++;
offset = 0;
}
- if (vec_idx == uio_iovcnt(uio) && length > 0) {
+ if (vec_idx == zfs_uio_iovcnt(uio) && length > 0) {
/*
* The end of the specified iovec's was reached but
* the length requested could not be processed, i.e.
@@ -322,19 +322,19 @@ static int
skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
crypto_req_handle_t req)
{
- off_t offset = digest->cd_offset;
- uint_t vec_idx = 0;
- uio_t *uio = digest->cd_uio;
+ off_t offset = digest->cd_offset;
+ uint_t vec_idx = 0;
+ zfs_uio_t *uio = digest->cd_uio;
/* we support only kernel buffer */
- if (uio_segflg(uio) != UIO_SYSSPACE)
+ if (zfs_uio_segflg(uio) != UIO_SYSSPACE)
return (CRYPTO_ARGUMENTS_BAD);
/*
* Jump to the first iovec containing ptr to the digest to be returned.
*/
- offset = uio_index_at_offset(uio, offset, &vec_idx);
- if (vec_idx == uio_iovcnt(uio)) {
+ offset = zfs_uio_index_at_offset(uio, offset, &vec_idx);
+ if (vec_idx == zfs_uio_iovcnt(uio)) {
/*
* The caller specified an offset that is larger than the
* total size of the buffers it provided.
@@ -342,10 +342,10 @@ skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
return (CRYPTO_DATA_LEN_RANGE);
}
if (offset + CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen) <=
- uio_iovlen(uio, vec_idx)) {
+ zfs_uio_iovlen(uio, vec_idx)) {
/* The computed digest will fit in the current iovec. */
SKEIN_OP(ctx, Final,
- (uchar_t *)uio_iovbase(uio, vec_idx) + offset);
+ (uchar_t *)zfs_uio_iovbase(uio, vec_idx) + offset);
} else {
uint8_t *digest_tmp;
off_t scratch_offset = 0;
@@ -357,11 +357,11 @@ skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
if (digest_tmp == NULL)
return (CRYPTO_HOST_MEMORY);
SKEIN_OP(ctx, Final, digest_tmp);
- while (vec_idx < uio_iovcnt(uio) && length > 0) {
- cur_len = MIN(uio_iovlen(uio, vec_idx) - offset,
+ while (vec_idx < zfs_uio_iovcnt(uio) && length > 0) {
+ cur_len = MIN(zfs_uio_iovlen(uio, vec_idx) - offset,
length);
bcopy(digest_tmp + scratch_offset,
- uio_iovbase(uio, vec_idx) + offset, cur_len);
+ zfs_uio_iovbase(uio, vec_idx) + offset, cur_len);
length -= cur_len;
vec_idx++;
@@ -370,7 +370,7 @@ skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
}
kmem_free(digest_tmp, CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen));
- if (vec_idx == uio_iovcnt(uio) && length > 0) {
+ if (vec_idx == zfs_uio_iovcnt(uio) && length > 0) {
/*
* The end of the specified iovec's was reached but
* the length requested could not be processed, i.e.
diff --git a/sys/contrib/openzfs/module/lua/ldebug.c b/sys/contrib/openzfs/module/lua/ldebug.c
index 2e1efa4e7250..da005c44376e 100644
--- a/sys/contrib/openzfs/module/lua/ldebug.c
+++ b/sys/contrib/openzfs/module/lua/ldebug.c
@@ -324,7 +324,6 @@ static void kname (Proto *p, int pc, int c, const char **name) {
if (ISK(c)) { /* is 'c' a constant? */
TValue *kvalue = &p->k[INDEXK(c)];
if (ttisstring(kvalue)) { /* literal constant? */
- // cppcheck-suppress autoVariables
*name = svalue(kvalue); /* it is its own name */
return;
}
diff --git a/sys/contrib/openzfs/module/lua/ldo.c b/sys/contrib/openzfs/module/lua/ldo.c
index 474fe659bcef..f3c3dcb4d81a 100644
--- a/sys/contrib/openzfs/module/lua/ldo.c
+++ b/sys/contrib/openzfs/module/lua/ldo.c
@@ -196,7 +196,6 @@ int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud) {
struct lua_longjmp lj;
lj.status = LUA_OK;
lj.previous = L->errorJmp; /* chain new error handler */
- // cppcheck-suppress autoVariables
L->errorJmp = &lj;
LUAI_TRY(L, &lj,
(*f)(L, ud);
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c
index c6b610394718..f5f3524f7b9d 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c
@@ -43,31 +43,32 @@
#include <sys/param.h>
#include <sys/uio.h>
#include <sys/vnode.h>
+#include <sys/zfs_znode.h>
/*
- * same as uiomove() but doesn't modify uio structure.
+ * same as zfs_uiomove() but doesn't modify uio structure.
* return in cbytes how many bytes were copied.
*/
int
-uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
+zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
{
struct iovec small_iovec[1];
struct uio small_uio_clone;
struct uio *uio_clone;
int error;
- ASSERT3U(uio->uio_rw, ==, rw);
- if (uio->uio_iovcnt == 1) {
- small_uio_clone = *uio;
- small_iovec[0] = *uio->uio_iov;
+ ASSERT3U(zfs_uio_rw(uio), ==, rw);
+ if (zfs_uio_iovcnt(uio) == 1) {
+ small_uio_clone = *(GET_UIO_STRUCT(uio));
+ small_iovec[0] = *(GET_UIO_STRUCT(uio)->uio_iov);
small_uio_clone.uio_iov = small_iovec;
uio_clone = &small_uio_clone;
} else {
- uio_clone = cloneuio(uio);
+ uio_clone = cloneuio(GET_UIO_STRUCT(uio));
}
error = vn_io_fault_uiomove(p, n, uio_clone);
- *cbytes = uio->uio_resid - uio_clone->uio_resid;
+ *cbytes = zfs_uio_resid(uio) - uio_clone->uio_resid;
if (uio_clone != &small_uio_clone)
free(uio_clone, M_IOV);
return (error);
@@ -77,16 +78,23 @@ uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
* Drop the next n chars out of *uiop.
*/
void
-uioskip(uio_t *uio, size_t n)
+zfs_uioskip(zfs_uio_t *uio, size_t n)
{
- enum uio_seg segflg;
+ zfs_uio_seg_t segflg;
/* For the full compatibility with illumos. */
- if (n > uio->uio_resid)
+ if (n > zfs_uio_resid(uio))
return;
- segflg = uio->uio_segflg;
- uio->uio_segflg = UIO_NOCOPY;
- uiomove(NULL, n, uio->uio_rw, uio);
- uio->uio_segflg = segflg;
+ segflg = zfs_uio_segflg(uio);
+ zfs_uio_segflg(uio) = UIO_NOCOPY;
+ zfs_uiomove(NULL, n, zfs_uio_rw(uio), uio);
+ zfs_uio_segflg(uio) = segflg;
+}
+
+int
+zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio)
+{
+ ASSERT(zfs_uio_rw(uio) == dir);
+ return (vn_io_fault_uiomove(p, n, GET_UIO_STRUCT(uio)));
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c
index 9e16c0029087..09c8401267df 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c
@@ -240,7 +240,9 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
#endif
VI_LOCK(vp);
vp->v_iflag &= ~VI_MOUNT;
+#ifdef VIRF_MOUNTPOINT
vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
+#endif
vp->v_mountedhere = mp;
VI_UNLOCK(vp);
/* Put the new filesystem on the mount list. */
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c
index 0a323e8856a3..ab82b2aaeb78 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/abd_os.c
@@ -202,7 +202,7 @@ abd_free_chunks(abd_t *abd)
}
abd_t *
-abd_alloc_struct(size_t size)
+abd_alloc_struct_impl(size_t size)
{
uint_t chunkcnt = abd_chunkcnt_for_bytes(size);
/*
@@ -216,22 +216,18 @@ abd_alloc_struct(size_t size)
offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
ASSERT3P(abd, !=, NULL);
- list_link_init(&abd->abd_gang_link);
- mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
ABDSTAT_INCR(abdstat_struct_size, abd_size);
return (abd);
}
void
-abd_free_struct(abd_t *abd)
+abd_free_struct_impl(abd_t *abd)
{
uint_t chunkcnt = abd_is_linear(abd) || abd_is_gang(abd) ? 0 :
abd_scatter_chunkcnt(abd);
ssize_t size = MAX(sizeof (abd_t),
offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
- mutex_destroy(&abd->abd_mtx);
- ASSERT(!list_link_active(&abd->abd_gang_link));
kmem_free(abd, size);
ABDSTAT_INCR(abdstat_struct_size, -size);
}
@@ -249,10 +245,8 @@ abd_alloc_zero_scatter(void)
abd_zero_buf = kmem_zalloc(zfs_abd_chunk_size, KM_SLEEP);
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
- abd_zero_scatter->abd_flags = ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
+ abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
- abd_zero_scatter->abd_parent = NULL;
- zfs_refcount_create(&abd_zero_scatter->abd_children);
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
ABD_SCATTER(abd_zero_scatter).abd_chunk_size =
@@ -270,7 +264,6 @@ abd_alloc_zero_scatter(void)
static void
abd_free_zero_scatter(void)
{
- zfs_refcount_destroy(&abd_zero_scatter->abd_children);
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)zfs_abd_chunk_size);
@@ -355,10 +348,8 @@ abd_alloc_scatter_offset_chunkcnt(size_t chunkcnt)
}
abd_t *
-abd_get_offset_scatter(abd_t *sabd, size_t off)
+abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off)
{
- abd_t *abd = NULL;
-
abd_verify(sabd);
ASSERT3U(off, <=, sabd->abd_size);
@@ -366,14 +357,24 @@ abd_get_offset_scatter(abd_t *sabd, size_t off)
uint_t chunkcnt = abd_scatter_chunkcnt(sabd) -
(new_offset / zfs_abd_chunk_size);
- abd = abd_alloc_scatter_offset_chunkcnt(chunkcnt);
+ /*
+ * If an abd struct is provided, it is only the minimum size. If we
+ * need additional chunks, we need to allocate a new struct.
+ */
+ if (abd != NULL &&
+ offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]) >
+ sizeof (abd_t)) {
+ abd = NULL;
+ }
+
+ if (abd == NULL)
+ abd = abd_alloc_struct(chunkcnt * zfs_abd_chunk_size);
/*
* Even if this buf is filesystem metadata, we only track that
* if we own the underlying data buffer, which is not true in
* this case. Therefore, we don't ever use ABD_FLAG_META here.
*/
- abd->abd_flags = 0;
ABD_SCATTER(abd).abd_offset = new_offset % zfs_abd_chunk_size;
ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
index b86ffc59a21d..fbf998416234 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
@@ -197,7 +197,7 @@ static void
freebsd_crypt_uio_debug_log(boolean_t encrypt,
freebsd_crypt_session_t *input_sessionp,
struct zio_crypt_info *c_info,
- uio_t *data_uio,
+ zfs_uio_t *data_uio,
crypto_key_t *key,
uint8_t *ivbuf,
size_t datalen,
@@ -222,13 +222,13 @@ freebsd_crypt_uio_debug_log(boolean_t encrypt,
printf("%02x ", b[i]);
}
printf("}\n");
- for (int i = 0; i < data_uio->uio_iovcnt; i++) {
+ for (int i = 0; i < zfs_uio_iovcnt(data_uio); i++) {
printf("\tiovec #%d: <%p, %u>\n", i,
- data_uio->uio_iov[i].iov_base,
- (unsigned int)data_uio->uio_iov[i].iov_len);
- total += data_uio->uio_iov[i].iov_len;
+ zfs_uio_iovbase(data_uio, i),
+ (unsigned int)zfs_uio_iovlen(data_uio, i));
+ total += zfs_uio_iovlen(data_uio, i);
}
- data_uio->uio_resid = total;
+ zfs_uio_resid(data_uio) = total;
#endif
}
/*
@@ -310,7 +310,7 @@ int
freebsd_crypt_uio(boolean_t encrypt,
freebsd_crypt_session_t *input_sessionp,
struct zio_crypt_info *c_info,
- uio_t *data_uio,
+ zfs_uio_t *data_uio,
crypto_key_t *key,
uint8_t *ivbuf,
size_t datalen,
@@ -323,9 +323,9 @@ freebsd_crypt_uio(boolean_t encrypt,
freebsd_crypt_uio_debug_log(encrypt, input_sessionp, c_info, data_uio,
key, ivbuf, datalen, auth_len);
- for (int i = 0; i < data_uio->uio_iovcnt; i++)
- total += data_uio->uio_iov[i].iov_len;
- data_uio->uio_resid = total;
+ for (int i = 0; i < zfs_uio_iovcnt(data_uio); i++)
+ total += zfs_uio_iovlen(data_uio, i);
+ zfs_uio_resid(data_uio) = total;
if (input_sessionp == NULL) {
session = kmem_zalloc(sizeof (*session), KM_SLEEP);
error = freebsd_crypt_newsession(session, c_info, key);
@@ -343,7 +343,7 @@ freebsd_crypt_uio(boolean_t encrypt,
CRYPTO_OP_VERIFY_DIGEST;
}
crp->crp_flags = CRYPTO_F_CBIFSYNC | CRYPTO_F_IV_SEPARATE;
- crypto_use_uio(crp, data_uio);
+ crypto_use_uio(crp, GET_UIO_STRUCT(data_uio));
crp->crp_aad_start = 0;
crp->crp_aad_length = auth_len;
@@ -480,7 +480,7 @@ int
freebsd_crypt_uio(boolean_t encrypt,
freebsd_crypt_session_t *input_sessionp,
struct zio_crypt_info *c_info,
- uio_t *data_uio,
+ zfs_uio_t *data_uio,
crypto_key_t *key,
uint8_t *ivbuf,
size_t datalen,
@@ -564,7 +564,7 @@ freebsd_crypt_uio(boolean_t encrypt,
crp->crp_session = session->fs_sid;
crp->crp_ilen = auth_len + datalen;
- crp->crp_buf = (void*)data_uio;
+ crp->crp_buf = (void*)GET_UIO_STRUCT(data_uio);
crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC;
auth_desc->crd_skip = 0;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c
index 6901f1ca915a..f472aecdbafb 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c
@@ -251,7 +251,7 @@ sfs_reclaim_vnode(vnode_t *vp)
static int
sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap,
- uio_t *uio, off_t *offp)
+ zfs_uio_t *uio, off_t *offp)
{
struct dirent entry;
int error;
@@ -260,26 +260,26 @@ sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap,
if (ap->a_ncookies != NULL)
*ap->a_ncookies = 0;
- if (uio->uio_resid < sizeof (entry))
+ if (zfs_uio_resid(uio) < sizeof (entry))
return (SET_ERROR(EINVAL));
- if (uio->uio_offset < 0)
+ if (zfs_uio_offset(uio) < 0)
return (SET_ERROR(EINVAL));
- if (uio->uio_offset == 0) {
+ if (zfs_uio_offset(uio) == 0) {
entry.d_fileno = id;
entry.d_type = DT_DIR;
entry.d_name[0] = '.';
entry.d_name[1] = '\0';
entry.d_namlen = 1;
entry.d_reclen = sizeof (entry);
- error = vfs_read_dirent(ap, &entry, uio->uio_offset);
+ error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio));
if (error != 0)
return (SET_ERROR(error));
}
- if (uio->uio_offset < sizeof (entry))
+ if (zfs_uio_offset(uio) < sizeof (entry))
return (SET_ERROR(EINVAL));
- if (uio->uio_offset == sizeof (entry)) {
+ if (zfs_uio_offset(uio) == sizeof (entry)) {
entry.d_fileno = parent_id;
entry.d_type = DT_DIR;
entry.d_name[0] = '.';
@@ -287,7 +287,7 @@ sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap,
entry.d_name[2] = '\0';
entry.d_namlen = 2;
entry.d_reclen = sizeof (entry);
- error = vfs_read_dirent(ap, &entry, uio->uio_offset);
+ error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio));
if (error != 0)
return (SET_ERROR(error));
}
@@ -666,21 +666,23 @@ zfsctl_root_readdir(struct vop_readdir_args *ap)
vnode_t *vp = ap->a_vp;
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
zfsctl_root_t *node = vp->v_data;
- uio_t *uio = ap->a_uio;
+ zfs_uio_t uio;
int *eofp = ap->a_eofflag;
off_t dots_offset;
int error;
+ zfs_uio_init(&uio, ap->a_uio);
+
ASSERT(vp->v_type == VDIR);
- error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, uio,
+ error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio,
&dots_offset);
if (error != 0) {
if (error == ENAMETOOLONG) /* ran out of destination space */
error = 0;
return (error);
}
- if (uio->uio_offset != dots_offset)
+ if (zfs_uio_offset(&uio) != dots_offset)
return (SET_ERROR(EINVAL));
CTASSERT(sizeof (node->snapdir->sn_name) <= sizeof (entry.d_name));
@@ -689,7 +691,7 @@ zfsctl_root_readdir(struct vop_readdir_args *ap)
strcpy(entry.d_name, node->snapdir->sn_name);
entry.d_namlen = strlen(entry.d_name);
entry.d_reclen = sizeof (entry);
- error = vfs_read_dirent(ap, &entry, uio->uio_offset);
+ error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio));
if (error != 0) {
if (error == ENAMETOOLONG)
error = 0;
@@ -1030,15 +1032,17 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap)
struct dirent entry;
vnode_t *vp = ap->a_vp;
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
- uio_t *uio = ap->a_uio;
+ zfs_uio_t uio;
int *eofp = ap->a_eofflag;
off_t dots_offset;
int error;
+ zfs_uio_init(&uio, ap->a_uio);
+
ASSERT(vp->v_type == VDIR);
- error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, uio,
- &dots_offset);
+ error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap,
+ &uio, &dots_offset);
if (error != 0) {
if (error == ENAMETOOLONG) /* ran out of destination space */
error = 0;
@@ -1050,7 +1054,7 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap)
uint64_t cookie;
uint64_t id;
- cookie = uio->uio_offset - dots_offset;
+ cookie = zfs_uio_offset(&uio) - dots_offset;
dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname),
@@ -1071,14 +1075,14 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap)
strcpy(entry.d_name, snapname);
entry.d_namlen = strlen(entry.d_name);
entry.d_reclen = sizeof (entry);
- error = vfs_read_dirent(ap, &entry, uio->uio_offset);
+ error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio));
if (error != 0) {
if (error == ENAMETOOLONG)
error = 0;
ZFS_EXIT(zfsvfs);
return (SET_ERROR(error));
}
- uio->uio_offset = cookie + dots_offset;
+ zfs_uio_setoffset(&uio, cookie + dots_offset);
}
/* NOTREACHED */
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
index 8fb259f4ba76..06546c12e420 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
@@ -290,7 +290,7 @@ zfs_file_private(zfs_file_t *fp)
int
zfs_file_unlink(const char *fnamep)
{
- enum uio_seg seg = UIO_SYSSPACE;
+ zfs_uio_seg_t seg = UIO_SYSSPACE;
int rc;
#if __FreeBSD_version >= 1300018
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index 42f5786ce5c7..d5f0da9ecd4b 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -518,7 +518,7 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
* in one single dmu_read() call.
*/
int
-mappedread_sf(znode_t *zp, int nbytes, uio_t *uio)
+mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
{
vnode_t *vp = ZTOV(zp);
objset_t *os = zp->z_zfsvfs->z_os;
@@ -530,14 +530,14 @@ mappedread_sf(znode_t *zp, int nbytes, uio_t *uio)
int len = nbytes;
int error = 0;
- ASSERT(uio->uio_segflg == UIO_NOCOPY);
+ ASSERT(zfs_uio_segflg(uio) == UIO_NOCOPY);
ASSERT(vp->v_mount != NULL);
obj = vp->v_object;
ASSERT(obj != NULL);
- ASSERT((uio->uio_loffset & PAGEOFFSET) == 0);
+ ASSERT((zfs_uio_offset(uio) & PAGEOFFSET) == 0);
zfs_vmobject_wlock_12(obj);
- for (start = uio->uio_loffset; len > 0; start += PAGESIZE) {
+ for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
int bytes = MIN(PAGESIZE, len);
pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
@@ -584,8 +584,7 @@ mappedread_sf(znode_t *zp, int nbytes, uio_t *uio)
}
if (error)
break;
- uio->uio_resid -= bytes;
- uio->uio_offset += bytes;
+ zfs_uio_advance(uio, bytes);
len -= bytes;
}
zfs_vmobject_wunlock_12(obj);
@@ -603,7 +602,7 @@ mappedread_sf(znode_t *zp, int nbytes, uio_t *uio)
* the file is memory mapped.
*/
int
-mappedread(znode_t *zp, int nbytes, uio_t *uio)
+mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
{
vnode_t *vp = ZTOV(zp);
vm_object_t obj;
@@ -616,7 +615,7 @@ mappedread(znode_t *zp, int nbytes, uio_t *uio)
obj = vp->v_object;
ASSERT(obj != NULL);
- start = uio->uio_loffset;
+ start = zfs_uio_offset(uio);
off = start & PAGEOFFSET;
zfs_vmobject_wlock_12(obj);
for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
@@ -629,7 +628,8 @@ mappedread(znode_t *zp, int nbytes, uio_t *uio)
zfs_vmobject_wunlock_12(obj);
va = zfs_map_page(pp, &sf);
- error = vn_io_fault_uiomove(va + off, bytes, uio);
+ error = vn_io_fault_uiomove(va + off, bytes,
+ GET_UIO_STRUCT(uio));
zfs_unmap_page(sf);
zfs_vmobject_wlock_12(obj);
page_unhold(pp);
@@ -1649,7 +1649,7 @@ zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
*/
/* ARGSUSED */
static int
-zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
+zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
int *ncookies, ulong_t **cookies)
{
znode_t *zp = VTOZ(vp);
@@ -1694,7 +1694,7 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
/*
* Check for valid iov_len.
*/
- if (uio->uio_iov->iov_len <= 0) {
+ if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
@@ -1709,7 +1709,7 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
error = 0;
os = zfsvfs->z_os;
- offset = uio->uio_loffset;
+ offset = zfs_uio_offset(uio);
prefetch = zp->z_zn_prefetch;
/*
@@ -1730,9 +1730,9 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
/*
* Get space to change directory entries into fs independent format.
*/
- iovp = uio->uio_iov;
+ iovp = GET_UIO_STRUCT(uio)->uio_iov;
bytes_wanted = iovp->iov_len;
- if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) {
+ if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
bufsize = bytes_wanted;
outbuf = kmem_alloc(bufsize, KM_SLEEP);
odp = (struct dirent64 *)outbuf;
@@ -1747,7 +1747,7 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
/*
* Minimum entry size is dirent size and 1 byte for a file name.
*/
- ncooks = uio->uio_resid / (sizeof (struct dirent) -
+ ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
sizeof (((struct dirent *)NULL)->d_name) + 1);
cooks = malloc(ncooks * sizeof (ulong_t), M_TEMP, M_WAITOK);
*cookies = cooks;
@@ -1927,20 +1927,21 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
if (ncookies != NULL)
*ncookies -= ncooks;
- if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) {
+ if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
iovp->iov_base += outcount;
iovp->iov_len -= outcount;
- uio->uio_resid -= outcount;
- } else if ((error = uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
+ zfs_uio_resid(uio) -= outcount;
+ } else if ((error =
+ zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
/*
* Reset the pointer.
*/
- offset = uio->uio_loffset;
+ offset = zfs_uio_offset(uio);
}
update:
zap_cursor_fini(&zc);
- if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
+ if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
kmem_free(outbuf, bufsize);
if (error == ENOENT)
@@ -1948,7 +1949,7 @@ update:
ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
- uio->uio_loffset = offset;
+ zfs_uio_setoffset(uio, offset);
ZFS_EXIT(zfsvfs);
if (error != 0 && cookies != NULL) {
free(*cookies, M_TEMP);
@@ -3631,7 +3632,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
*/
/* ARGSUSED */
static int
-zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct)
+zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
{
znode_t *zp = VTOZ(vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
@@ -4414,8 +4415,9 @@ struct vop_read_args {
static int
zfs_freebsd_read(struct vop_read_args *ap)
{
-
- return (zfs_read(VTOZ(ap->a_vp), ap->a_uio, ioflags(ap->a_ioflag),
+ zfs_uio_t uio;
+ zfs_uio_init(&uio, ap->a_uio);
+ return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
ap->a_cred));
}
@@ -4431,8 +4433,9 @@ struct vop_write_args {
static int
zfs_freebsd_write(struct vop_write_args *ap)
{
-
- return (zfs_write(VTOZ(ap->a_vp), ap->a_uio, ioflags(ap->a_ioflag),
+ zfs_uio_t uio;
+ zfs_uio_init(&uio, ap->a_uio);
+ return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
ap->a_cred));
}
@@ -4704,8 +4707,9 @@ struct vop_readdir_args {
static int
zfs_freebsd_readdir(struct vop_readdir_args *ap)
{
-
- return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag,
+ zfs_uio_t uio;
+ zfs_uio_init(&uio, ap->a_uio);
+ return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
ap->a_ncookies, ap->a_cookies));
}
@@ -5008,26 +5012,27 @@ struct vop_readlink_args {
static int
zfs_freebsd_readlink(struct vop_readlink_args *ap)
{
+ zfs_uio_t uio;
znode_t *zp = VTOZ(ap->a_vp);
- struct uio *auio;
char *symlink, *base;
size_t symlink_len;
int error;
bool trycache;
- auio = ap->a_uio;
+ zfs_uio_init(&uio, ap->a_uio);
trycache = false;
- if (auio->uio_segflg == UIO_SYSSPACE && auio->uio_iovcnt == 1) {
- base = auio->uio_iov->iov_base;
- symlink_len = auio->uio_iov->iov_len;
+ if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
+ zfs_uio_iovcnt(&uio) == 1) {
+ base = zfs_uio_iovbase(&uio, 0);
+ symlink_len = zfs_uio_iovlen(&uio, 0);
trycache = true;
}
- error = zfs_readlink(ap->a_vp, auio, ap->a_cred, NULL);
+ error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
error != 0 || !trycache) {
return (error);
}
- symlink_len -= auio->uio_resid;
+ symlink_len -= zfs_uio_resid(&uio);
symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
if (symlink != NULL) {
memcpy(symlink, base, symlink_len);
@@ -5504,11 +5509,14 @@ zfs_listextattr(struct vop_listextattr_args *ap)
uint8_t dirbuf[sizeof (struct dirent)];
struct dirent *dp;
struct iovec aiov;
- struct uio auio, *uio = ap->a_uio;
+ struct uio auio;
size_t *sizep = ap->a_size;
size_t plen;
vnode_t *xvp = NULL, *vp;
int done, error, eof, pos;
+ zfs_uio_t uio;
+
+ zfs_uio_init(&uio, ap->a_uio);
/*
* If the xattr property is off, refuse the request.
@@ -5590,15 +5598,16 @@ zfs_listextattr(struct vop_listextattr_args *ap)
nlen = dp->d_namlen - plen;
if (sizep != NULL)
*sizep += 1 + nlen;
- else if (uio != NULL) {
+ else if (GET_UIO_STRUCT(&uio) != NULL) {
/*
* Format of extattr name entry is one byte for
* length and the rest for name.
*/
- error = uiomove(&nlen, 1, uio->uio_rw, uio);
+ error = zfs_uiomove(&nlen, 1, zfs_uio_rw(&uio),
+ &uio);
if (error == 0) {
- error = uiomove(dp->d_name + plen, nlen,
- uio->uio_rw, uio);
+ error = zfs_uiomove(dp->d_name + plen,
+ nlen, zfs_uio_rw(&uio), &uio);
}
if (error != 0)
break;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
index d9f2635b0129..0491b2ff3e28 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
@@ -1912,8 +1912,10 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
size_t complen;
int is_xattrdir;
- if (prevdb)
+ if (prevdb) {
+ ASSERT(prevhdl != NULL);
zfs_release_sa_handle(prevhdl, prevdb, FTAG);
+ }
if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
&is_xattrdir)) != 0)
@@ -2020,7 +2022,7 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
void
-zfs_inode_update(znode_t *zp)
+zfs_znode_update_vfs(znode_t *zp)
{
vm_object_t object;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c
index fd2beee7bdd2..9fe678d2574f 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c
@@ -404,7 +404,7 @@ int failed_decrypt_size;
static int
zio_do_crypt_uio_opencrypto(boolean_t encrypt, freebsd_crypt_session_t *sess,
uint64_t crypt, crypto_key_t *key, uint8_t *ivbuf, uint_t datalen,
- uio_t *uio, uint_t auth_len)
+ zfs_uio_t *uio, uint_t auth_len)
{
zio_crypt_info_t *ci;
int ret;
@@ -439,7 +439,8 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
* input and output. Also, the AAD (for AES-GMC at least)
* needs to logically go in front.
*/
- uio_t cuio;
+ zfs_uio_t cuio;
+ struct uio cuio_s;
iovec_t iovecs[4];
uint64_t crypt = key->zk_crypt;
uint_t enc_len, keydata_len, aad_len;
@@ -447,6 +448,8 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+ zfs_uio_init(&cuio, &cuio_s);
+
keydata_len = zio_crypt_table[crypt].ci_keylen;
/* generate iv for wrapping the master and hmac key */
@@ -489,9 +492,9 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
iovecs[0].iov_len = aad_len;
enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN;
- cuio.uio_iov = iovecs;
- cuio.uio_iovcnt = 4;
- cuio.uio_segflg = UIO_SYSSPACE;
+ GET_UIO_STRUCT(&cuio)->uio_iov = iovecs;
+ zfs_uio_iovcnt(&cuio) = 4;
+ zfs_uio_segflg(&cuio) = UIO_SYSSPACE;
/* encrypt the keys and store the resulting ciphertext and mac */
ret = zio_do_crypt_uio_opencrypto(B_TRUE, NULL, crypt, cwkey,
@@ -517,7 +520,8 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
* input and output. Also, the AAD (for AES-GMC at least)
* needs to logically go in front.
*/
- uio_t cuio;
+ zfs_uio_t cuio;
+ struct uio cuio_s;
iovec_t iovecs[4];
void *src, *dst;
uint_t enc_len, keydata_len, aad_len;
@@ -528,6 +532,8 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
keydata_len = zio_crypt_table[crypt].ci_keylen;
rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+ zfs_uio_init(&cuio, &cuio_s);
+
/*
* Since we only support one buffer, we need to copy
* the encrypted buffer (source) to the plain buffer
@@ -565,9 +571,9 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
iovecs[0].iov_base = aad;
iovecs[0].iov_len = aad_len;
- cuio.uio_iov = iovecs;
- cuio.uio_iovcnt = 4;
- cuio.uio_segflg = UIO_SYSSPACE;
+ GET_UIO_STRUCT(&cuio)->uio_iov = iovecs;
+ zfs_uio_iovcnt(&cuio) = 4;
+ zfs_uio_segflg(&cuio) = UIO_SYSSPACE;
/* decrypt the keys and store the result in the output buffers */
ret = zio_do_crypt_uio_opencrypto(B_FALSE, NULL, crypt, cwkey,
@@ -1150,10 +1156,11 @@ error:
}
static void
-zio_crypt_destroy_uio(uio_t *uio)
+zio_crypt_destroy_uio(zfs_uio_t *uio)
{
- if (uio->uio_iov)
- kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t));
+ if (GET_UIO_STRUCT(uio)->uio_iov)
+ kmem_free(GET_UIO_STRUCT(uio)->uio_iov,
+ zfs_uio_iovcnt(uio) * sizeof (iovec_t));
}
/*
@@ -1247,14 +1254,14 @@ zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
* accommodate some of the drivers, the authbuf needs to be logically before
* the data. This means that we need to copy the source to the destination,
* and set up an extra iovec_t at the beginning to handle the authbuf.
- * It also means we'll only return one uio_t.
+ * It also means we'll only return one zfs_uio_t.
*/
/* ARGSUSED */
static int
zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
- uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio,
- uio_t *out_uio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
+ uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, zfs_uio_t *puio,
+ zfs_uio_t *out_uio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
boolean_t *no_crypt)
{
uint8_t *aadbuf = zio_buf_alloc(datalen);
@@ -1398,8 +1405,8 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
*enc_len = total_len;
*authbuf = aadbuf;
*auth_len = aad_len;
- out_uio->uio_iov = dst_iovecs;
- out_uio->uio_iovcnt = nr_iovecs;
+ GET_UIO_STRUCT(out_uio)->uio_iov = dst_iovecs;
+ zfs_uio_iovcnt(out_uio) = nr_iovecs;
return (0);
}
@@ -1410,7 +1417,7 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
static int
zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version,
uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
- uio_t *puio, uio_t *out_uio, uint_t *enc_len, uint8_t **authbuf,
+ zfs_uio_t *puio, zfs_uio_t *out_uio, uint_t *enc_len, uint8_t **authbuf,
uint_t *auth_len, boolean_t *no_crypt)
{
uint8_t *aadbuf = zio_buf_alloc(datalen);
@@ -1547,8 +1554,8 @@ zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version,
*enc_len = total_len;
*authbuf = aadbuf;
*auth_len = aad_len;
- out_uio->uio_iov = dst_iovecs;
- out_uio->uio_iovcnt = nr_iovecs;
+ GET_UIO_STRUCT(out_uio)->uio_iov = dst_iovecs;
+ zfs_uio_iovcnt(out_uio) = nr_iovecs;
return (0);
}
@@ -1556,7 +1563,7 @@ zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version,
/* ARGSUSED */
static int
zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf,
- uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *out_uio,
+ uint8_t *cipherbuf, uint_t datalen, zfs_uio_t *puio, zfs_uio_t *out_uio,
uint_t *enc_len)
{
int ret;
@@ -1584,8 +1591,8 @@ zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf,
cipher_iovecs[0].iov_len = datalen;
*enc_len = datalen;
- out_uio->uio_iov = cipher_iovecs;
- out_uio->uio_iovcnt = nr_cipher;
+ GET_UIO_STRUCT(out_uio)->uio_iov = cipher_iovecs;
+ zfs_uio_iovcnt(out_uio) = nr_cipher;
return (0);
@@ -1596,8 +1603,8 @@ error:
kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t));
*enc_len = 0;
- out_uio->uio_iov = NULL;
- out_uio->uio_iovcnt = 0;
+ GET_UIO_STRUCT(out_uio)->uio_iov = NULL;
+ zfs_uio_iovcnt(out_uio) = 0;
return (ret);
}
@@ -1613,8 +1620,8 @@ error:
static int
zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot,
uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
- uint8_t *mac, uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
- uint_t *auth_len, boolean_t *no_crypt)
+ uint8_t *mac, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len,
+ uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt)
{
int ret;
iovec_t *mac_iov;
@@ -1646,9 +1653,11 @@ zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot,
goto error;
/* populate the uios */
- cuio->uio_segflg = UIO_SYSSPACE;
+ zfs_uio_segflg(cuio) = UIO_SYSSPACE;
- mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]);
+ mac_iov =
+ ((iovec_t *)&(GET_UIO_STRUCT(cuio)->
+ uio_iov[zfs_uio_iovcnt(cuio) - 1]));
mac_iov->iov_base = (void *)mac;
mac_iov->iov_len = ZIO_DATA_MAC_LEN;
@@ -1675,14 +1684,18 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
uint64_t crypt = key->zk_crypt;
uint_t keydata_len = zio_crypt_table[crypt].ci_keylen;
uint_t enc_len, auth_len;
- uio_t puio, cuio;
+ zfs_uio_t puio, cuio;
+ struct uio puio_s, cuio_s;
uint8_t enc_keydata[MASTER_KEY_MAX_LEN];
crypto_key_t tmp_ckey, *ckey = NULL;
freebsd_crypt_session_t *tmpl = NULL;
uint8_t *authbuf = NULL;
- bzero(&puio, sizeof (uio_t));
- bzero(&cuio, sizeof (uio_t));
+
+ zfs_uio_init(&puio, &puio_s);
+ zfs_uio_init(&cuio, &cuio_s);
+ bzero(GET_UIO_STRUCT(&puio), sizeof (struct uio));
+ bzero(GET_UIO_STRUCT(&cuio), sizeof (struct uio));
#ifdef FCRYPTO_DEBUG
printf("%s(%s, %p, %p, %d, %p, %p, %u, %s, %p, %p, %p)\n",
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
index 6c44e3681709..2389b1a06355 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
@@ -746,12 +746,15 @@ out:
*/
static int
-zvol_cdev_read(struct cdev *dev, struct uio *uio, int ioflag)
+zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag)
{
zvol_state_t *zv;
uint64_t volsize;
zfs_locked_range_t *lr;
int error = 0;
+ zfs_uio_t uio;
+
+ zfs_uio_init(&uio, uio_s);
zv = dev->si_drv2;
@@ -760,20 +763,20 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio, int ioflag)
* uio_loffset == volsize isn't an error as
* its required for EOF processing.
*/
- if (uio->uio_resid > 0 &&
- (uio->uio_loffset < 0 || uio->uio_loffset > volsize))
+ if (zfs_uio_resid(&uio) > 0 &&
+ (zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize))
return (SET_ERROR(EIO));
- lr = zfs_rangelock_enter(&zv->zv_rangelock, uio->uio_loffset,
- uio->uio_resid, RL_READER);
- while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
- uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
+ lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio),
+ zfs_uio_resid(&uio), RL_READER);
+ while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) {
+ uint64_t bytes = MIN(zfs_uio_resid(&uio), DMU_MAX_ACCESS >> 1);
/* don't read past the end */
- if (bytes > volsize - uio->uio_loffset)
- bytes = volsize - uio->uio_loffset;
+ if (bytes > volsize - zfs_uio_offset(&uio))
+ bytes = volsize - zfs_uio_offset(&uio);
- error = dmu_read_uio_dnode(zv->zv_dn, uio, bytes);
+ error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes);
if (error) {
/* convert checksum errors into IO errors */
if (error == ECKSUM)
@@ -787,20 +790,23 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio, int ioflag)
}
static int
-zvol_cdev_write(struct cdev *dev, struct uio *uio, int ioflag)
+zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
{
zvol_state_t *zv;
uint64_t volsize;
zfs_locked_range_t *lr;
int error = 0;
boolean_t sync;
+ zfs_uio_t uio;
zv = dev->si_drv2;
volsize = zv->zv_volsize;
- if (uio->uio_resid > 0 &&
- (uio->uio_loffset < 0 || uio->uio_loffset > volsize))
+ zfs_uio_init(&uio, uio_s);
+
+ if (zfs_uio_resid(&uio) > 0 &&
+ (zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize))
return (SET_ERROR(EIO));
sync = (ioflag & IO_SYNC) ||
@@ -809,11 +815,11 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio, int ioflag)
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
zvol_ensure_zilog(zv);
- lr = zfs_rangelock_enter(&zv->zv_rangelock, uio->uio_loffset,
- uio->uio_resid, RL_WRITER);
- while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
- uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
- uint64_t off = uio->uio_loffset;
+ lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio),
+ zfs_uio_resid(&uio), RL_WRITER);
+ while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) {
+ uint64_t bytes = MIN(zfs_uio_resid(&uio), DMU_MAX_ACCESS >> 1);
+ uint64_t off = zfs_uio_offset(&uio);
dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
if (bytes > volsize - off) /* don't write past the end */
@@ -825,7 +831,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio, int ioflag)
dmu_tx_abort(tx);
break;
}
- error = dmu_write_uio_dnode(zv->zv_dn, uio, bytes, tx);
+ error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
if (error == 0)
zvol_log_write(zv, tx, off, bytes, sync);
dmu_tx_commit(tx);
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
index 1da7618185ec..36fdff72a133 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
@@ -284,9 +284,7 @@ int64_t
__divdi3(int64_t u, int64_t v)
{
int64_t q, t;
- // cppcheck-suppress shiftTooManyBitsSigned
q = __udivdi3(abs64(u), abs64(v));
- // cppcheck-suppress shiftTooManyBitsSigned
t = (u ^ v) >> 63; // If u, v have different
return ((q ^ t) - t); // signs, negate q.
}
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c
index dbbf72c8569d..c7f1aadf784e 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c
@@ -486,7 +486,7 @@ proc_kstat_open(struct inode *inode, struct file *filp)
f = filp->private_data;
f->private = PDE_DATA(inode);
- return (rc);
+ return (0);
}
static ssize_t
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
index e8d89bfeabe5..61631256c858 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
@@ -274,8 +274,6 @@ taskq_lowest_id(taskq_t *tq)
taskq_ent_t *t;
taskq_thread_t *tqt;
- ASSERT(tq);
-
if (!list_empty(&tq->tq_pend_list)) {
t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list);
lowest_id = MIN(lowest_id, t->tqent_id);
@@ -995,6 +993,7 @@ error:
spin_unlock_irqrestore(&tq->tq_lock, flags);
tsd_set(taskq_tsd, NULL);
+ thread_exit();
return (0);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
index 0abac228447f..d82e5f4dcf15 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
@@ -185,7 +185,7 @@ abd_chunkcnt_for_bytes(size_t size)
}
abd_t *
-abd_alloc_struct(size_t size)
+abd_alloc_struct_impl(size_t size)
{
/*
* In Linux we do not use the size passed in during ABD
@@ -193,18 +193,14 @@ abd_alloc_struct(size_t size)
*/
abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE);
ASSERT3P(abd, !=, NULL);
- list_link_init(&abd->abd_gang_link);
- mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t));
return (abd);
}
void
-abd_free_struct(abd_t *abd)
+abd_free_struct_impl(abd_t *abd)
{
- mutex_destroy(&abd->abd_mtx);
- ASSERT(!list_link_active(&abd->abd_gang_link));
kmem_cache_free(abd_cache, abd);
ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t));
}
@@ -472,14 +468,12 @@ abd_alloc_zero_scatter(void)
ASSERT3U(table.nents, ==, nr_pages);
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
- abd_zero_scatter->abd_flags = ABD_FLAG_OWNER;
+ abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
ABD_SCATTER(abd_zero_scatter).abd_sgl = table.sgl;
ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
- abd_zero_scatter->abd_parent = NULL;
abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS;
- zfs_refcount_create(&abd_zero_scatter->abd_children);
abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) {
sg_set_page(sg, abd_zero_page, PAGESIZE, 0);
@@ -599,12 +593,11 @@ abd_alloc_zero_scatter(void)
abd_zero_page = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP);
memset(abd_zero_page, 0, PAGESIZE);
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
- abd_zero_scatter->abd_flags = ABD_FLAG_OWNER;
+ abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;
abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS;
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
- abd_zero_scatter->abd_parent = NULL;
zfs_refcount_create(&abd_zero_scatter->abd_children);
ABD_SCATTER(abd_zero_scatter).abd_sgl = vmem_alloc(nr_pages *
sizeof (struct scatterlist), KM_SLEEP);
@@ -678,7 +671,6 @@ abd_verify_scatter(abd_t *abd)
static void
abd_free_zero_scatter(void)
{
- zfs_refcount_destroy(&abd_zero_scatter->abd_children);
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGESIZE);
ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);
@@ -747,9 +739,7 @@ abd_free_linear_page(abd_t *abd)
ABD_SCATTER(abd).abd_sgl = sg;
abd_free_chunks(abd);
- zfs_refcount_destroy(&abd->abd_children);
abd_update_scatter_stats(abd, ABDSTAT_DECR);
- abd_free_struct(abd);
}
/*
@@ -770,9 +760,8 @@ abd_alloc_for_io(size_t size, boolean_t is_metadata)
}
abd_t *
-abd_get_offset_scatter(abd_t *sabd, size_t off)
+abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off)
{
- abd_t *abd = NULL;
int i = 0;
struct scatterlist *sg = NULL;
@@ -781,14 +770,14 @@ abd_get_offset_scatter(abd_t *sabd, size_t off)
size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
- abd = abd_alloc_struct(0);
+ if (abd == NULL)
+ abd = abd_alloc_struct(0);
/*
* Even if this buf is filesystem metadata, we only track that
* if we own the underlying data buffer, which is not true in
* this case. Therefore, we don't ever use ABD_FLAG_META here.
*/
- abd->abd_flags = 0;
abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) {
if (new_offset < sg->length)
@@ -936,17 +925,28 @@ abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off)
{
unsigned long pos;
- while (abd_is_gang(abd))
- abd = abd_gang_get_offset(abd, &off);
+ if (abd_is_gang(abd)) {
+ unsigned long count = 0;
+
+ for (abd_t *cabd = abd_gang_get_offset(abd, &off);
+ cabd != NULL && size != 0;
+ cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
+ ASSERT3U(off, <, cabd->abd_size);
+ int mysize = MIN(size, cabd->abd_size - off);
+ count += abd_nr_pages_off(cabd, mysize, off);
+ size -= mysize;
+ off = 0;
+ }
+ return (count);
+ }
- ASSERT(!abd_is_gang(abd));
if (abd_is_linear(abd))
pos = (unsigned long)abd_to_buf(abd) + off;
else
pos = ABD_SCATTER(abd).abd_offset + off;
- return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -
- (pos >> PAGE_SHIFT);
+ return (((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -
+ (pos >> PAGE_SHIFT));
}
static unsigned int
@@ -1021,7 +1021,6 @@ unsigned int
abd_bio_map_off(struct bio *bio, abd_t *abd,
unsigned int io_size, size_t off)
{
- int i;
struct abd_iter aiter;
ASSERT3U(io_size, <=, abd->abd_size - off);
@@ -1035,7 +1034,7 @@ abd_bio_map_off(struct bio *bio, abd_t *abd,
abd_iter_init(&aiter, abd);
abd_iter_advance(&aiter, off);
- for (i = 0; i < bio->bi_max_vecs; i++) {
+ for (int i = 0; i < bio->bi_max_vecs; i++) {
struct page *pg;
size_t len, sgoff, pgoff;
struct scatterlist *sg;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
index 4bd27d1b516f..b373f2c2e83c 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
@@ -350,19 +350,14 @@ vdev_disk_close(vdev_t *v)
static dio_request_t *
vdev_disk_dio_alloc(int bio_count)
{
- dio_request_t *dr;
- int i;
-
- dr = kmem_zalloc(sizeof (dio_request_t) +
+ dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
sizeof (struct bio *) * bio_count, KM_SLEEP);
- if (dr) {
- atomic_set(&dr->dr_ref, 0);
- dr->dr_bio_count = bio_count;
- dr->dr_error = 0;
+ atomic_set(&dr->dr_ref, 0);
+ dr->dr_bio_count = bio_count;
+ dr->dr_error = 0;
- for (i = 0; i < dr->dr_bio_count; i++)
- dr->dr_bio[i] = NULL;
- }
+ for (int i = 0; i < dr->dr_bio_count; i++)
+ dr->dr_bio[i] = NULL;
return (dr);
}
@@ -536,8 +531,9 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
dio_request_t *dr;
uint64_t abd_offset;
uint64_t bio_offset;
- int bio_size, bio_count = 16;
- int i = 0, error = 0;
+ int bio_size;
+ int bio_count = 16;
+ int error = 0;
struct blk_plug plug;
/*
@@ -552,8 +548,6 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
retry:
dr = vdev_disk_dio_alloc(bio_count);
- if (dr == NULL)
- return (SET_ERROR(ENOMEM));
if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
bio_set_flags_failfast(bdev, &flags);
@@ -561,26 +555,28 @@ retry:
dr->dr_zio = zio;
/*
- * When the IO size exceeds the maximum bio size for the request
- * queue we are forced to break the IO in multiple bio's and wait
- * for them all to complete. Ideally, all pool users will set
- * their volume block size to match the maximum request size and
- * the common case will be one bio per vdev IO request.
+ * Since bio's can have up to BIO_MAX_PAGES=256 iovec's, each of which
+ * is at least 512 bytes and at most PAGESIZE (typically 4K), one bio
+ * can cover at least 128KB and at most 1MB. When the required number
+ * of iovec's exceeds this, we are forced to break the IO in multiple
+ * bio's and wait for them all to complete. This is likely if the
+ * recordsize property is increased beyond 1MB. The default
+ * bio_count=16 should typically accommodate the maximum-size zio of
+ * 16MB.
*/
abd_offset = 0;
bio_offset = io_offset;
- bio_size = io_size;
- for (i = 0; i <= dr->dr_bio_count; i++) {
+ bio_size = io_size;
+ for (int i = 0; i <= dr->dr_bio_count; i++) {
/* Finished constructing bio's for given buffer */
if (bio_size <= 0)
break;
/*
- * By default only 'bio_count' bio's per dio are allowed.
- * However, if we find ourselves in a situation where more
- * are needed we allocate a larger dio and warn the user.
+ * If additional bio's are required, we have to retry, but
+ * this should be rare - see the comment above.
*/
if (dr->dr_bio_count == i) {
vdev_disk_dio_free(dr);
@@ -622,9 +618,10 @@ retry:
blk_start_plug(&plug);
/* Submit all bio's associated with this dio */
- for (i = 0; i < dr->dr_bio_count; i++)
+ for (int i = 0; i < dr->dr_bio_count; i++) {
if (dr->dr_bio[i])
vdev_submit_bio(dr->dr_bio[i]);
+ }
if (dr->dr_bio_count > 1)
blk_finish_plug(&plug);
diff --git a/sys/contrib/openzfs/module/zcommon/zfs_uio.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
index e435e1a9f78a..3b0f824115f8 100644
--- a/sys/contrib/openzfs/module/zcommon/zfs_uio.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
@@ -55,7 +55,7 @@
* a non-zero errno on failure.
*/
static int
-uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
+zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
{
const struct iovec *iov = uio->uio_iov;
size_t skip = uio->uio_skip;
@@ -126,7 +126,7 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
}
static int
-uiomove_bvec(void *p, size_t n, enum uio_rw rw, struct uio *uio)
+zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
{
const struct bio_vec *bv = uio->uio_bvec;
size_t skip = uio->uio_skip;
@@ -160,7 +160,7 @@ uiomove_bvec(void *p, size_t n, enum uio_rw rw, struct uio *uio)
#if defined(HAVE_VFS_IOV_ITER)
static int
-uiomove_iter(void *p, size_t n, enum uio_rw rw, struct uio *uio,
+zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
boolean_t revert)
{
size_t cnt = MIN(n, uio->uio_resid);
@@ -182,7 +182,7 @@ uiomove_iter(void *p, size_t n, enum uio_rw rw, struct uio *uio,
return (EFAULT);
/*
- * Revert advancing the uio_iter. This is set by uiocopy()
+ * Revert advancing the uio_iter. This is set by zfs_uiocopy()
* to avoid consuming the uio and its iov_iter structure.
*/
if (revert)
@@ -196,33 +196,69 @@ uiomove_iter(void *p, size_t n, enum uio_rw rw, struct uio *uio,
#endif
int
-uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
+zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
{
if (uio->uio_segflg == UIO_BVEC)
- return (uiomove_bvec(p, n, rw, uio));
+ return (zfs_uiomove_bvec(p, n, rw, uio));
#if defined(HAVE_VFS_IOV_ITER)
else if (uio->uio_segflg == UIO_ITER)
- return (uiomove_iter(p, n, rw, uio, B_FALSE));
+ return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
#endif
else
- return (uiomove_iov(p, n, rw, uio));
+ return (zfs_uiomove_iov(p, n, rw, uio));
}
-EXPORT_SYMBOL(uiomove);
+EXPORT_SYMBOL(zfs_uiomove);
+/*
+ * Fault in the pages of the first n bytes specified by the uio structure.
+ * 1 byte in each page is touched and the uio struct is unmodified. Any
+ * error will terminate the process as this is only a best attempt to get
+ * the pages resident.
+ */
int
-uio_prefaultpages(ssize_t n, struct uio *uio)
+zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
{
- struct iov_iter iter, *iterp = NULL;
-
-#if defined(HAVE_IOV_ITER_FAULT_IN_READABLE)
- if (uio->uio_segflg == UIO_USERSPACE) {
- iterp = &iter;
- iov_iter_init_compat(iterp, READ, uio->uio_iov,
- uio->uio_iovcnt, uio->uio_resid);
+ if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) {
+ /* There's never a need to fault in kernel pages */
+ return (0);
#if defined(HAVE_VFS_IOV_ITER)
} else if (uio->uio_segflg == UIO_ITER) {
- iterp = uio->uio_iter;
+ /*
+ * At least a Linux 4.9 kernel, iov_iter_fault_in_readable()
+ * can be relied on to fault in user pages when referenced.
+ */
+ if (iov_iter_fault_in_readable(uio->uio_iter, n))
+ return (EFAULT);
#endif
+ } else {
+ /* Fault in all user pages */
+ ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE);
+ const struct iovec *iov = uio->uio_iov;
+ int iovcnt = uio->uio_iovcnt;
+ size_t skip = uio->uio_skip;
+ uint8_t tmp;
+ caddr_t p;
+
+ for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
+ ulong_t cnt = MIN(iov->iov_len - skip, n);
+ /* empty iov */
+ if (cnt == 0)
+ continue;
+ n -= cnt;
+ /* touch each page in this segment. */
+ p = iov->iov_base + skip;
+ while (cnt) {
+ if (get_user(tmp, (uint8_t *)p))
+ return (EFAULT);
+ ulong_t incr = MIN(cnt, PAGESIZE);
+ p += incr;
+ cnt -= incr;
+ }
+ /* touch the last byte in case it straddles a page. */
+ p--;
+ if (get_user(tmp, (uint8_t *)p))
+ return (EFAULT);
+ }
}
if (iterp && iov_iter_fault_in_readable(iterp, n))
@@ -230,40 +266,40 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
#endif
return (0);
}
-EXPORT_SYMBOL(uio_prefaultpages);
+EXPORT_SYMBOL(zfs_uio_prefaultpages);
/*
- * The same as uiomove() but doesn't modify uio structure.
+ * The same as zfs_uiomove() but doesn't modify uio structure.
* return in cbytes how many bytes were copied.
*/
int
-uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
+zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
{
- struct uio uio_copy;
+ zfs_uio_t uio_copy;
int ret;
- bcopy(uio, &uio_copy, sizeof (struct uio));
+ bcopy(uio, &uio_copy, sizeof (zfs_uio_t));
if (uio->uio_segflg == UIO_BVEC)
- ret = uiomove_bvec(p, n, rw, &uio_copy);
+ ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
#if defined(HAVE_VFS_IOV_ITER)
else if (uio->uio_segflg == UIO_ITER)
- ret = uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
+ ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
#endif
else
- ret = uiomove_iov(p, n, rw, &uio_copy);
+ ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
*cbytes = uio->uio_resid - uio_copy.uio_resid;
return (ret);
}
-EXPORT_SYMBOL(uiocopy);
+EXPORT_SYMBOL(zfs_uiocopy);
/*
* Drop the next n chars out of *uio.
*/
void
-uioskip(uio_t *uio, size_t n)
+zfs_uioskip(zfs_uio_t *uio, size_t n)
{
if (n > uio->uio_resid)
return;
@@ -292,5 +328,6 @@ uioskip(uio_t *uio, size_t n)
uio->uio_loffset += n;
uio->uio_resid -= n;
}
-EXPORT_SYMBOL(uioskip);
+EXPORT_SYMBOL(zfs_uioskip);
+
#endif /* _KERNEL */
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index 165c1218ae79..3cc4b560e477 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -1772,7 +1772,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
*ipp = ZTOI(zp);
if (*ipp)
- zfs_inode_update(ITOZ(*ipp));
+ zfs_znode_update_vfs(ITOZ(*ipp));
ZFS_EXIT(zfsvfs);
return (0);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index 3be387a30e5c..84c33b541ea3 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -87,15 +87,18 @@
* must be checked with ZFS_VERIFY_ZP(zp). Both of these macros
* can return EIO from the calling function.
*
- * (2) zrele() should always be the last thing except for zil_commit()
- * (if necessary) and ZFS_EXIT(). This is for 3 reasons:
- * First, if it's the last reference, the vnode/znode
- * can be freed, so the zp may point to freed memory. Second, the last
- * reference will call zfs_zinactive(), which may induce a lot of work --
- * pushing cached pages (which acquires range locks) and syncing out
- * cached atime changes. Third, zfs_zinactive() may require a new tx,
- * which could deadlock the system if you were already holding one.
- * If you must call zrele() within a tx then use zfs_zrele_async().
+ * (2) zrele() should always be the last thing except for zil_commit() (if
+ * necessary) and ZFS_EXIT(). This is for 3 reasons: First, if it's the
+ * last reference, the vnode/znode can be freed, so the zp may point to
+ * freed memory. Second, the last reference will call zfs_zinactive(),
+ * which may induce a lot of work -- pushing cached pages (which acquires
+ * range locks) and syncing out cached atime changes. Third,
+ * zfs_zinactive() may require a new tx, which could deadlock the system
+ * if you were already holding one. This deadlock occurs because the tx
+ * currently being operated on prevents a txg from syncing, which
+ * prevents the new tx from progressing, resulting in a deadlock. If you
+ * must call zrele() within a tx, use zfs_zrele_async(). Note that iput()
+ * is a synonym for zrele().
*
* (3) All range locks must be grabbed before calling dmu_tx_assign(),
* as they can span dmu_tx_assign() calls.
@@ -298,7 +301,7 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
* the file is memory mapped.
*/
int
-mappedread(znode_t *zp, int nbytes, uio_t *uio)
+mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
{
struct inode *ip = ZTOI(zp);
struct address_space *mp = ip->i_mapping;
@@ -320,7 +323,7 @@ mappedread(znode_t *zp, int nbytes, uio_t *uio)
unlock_page(pp);
pb = kmap(pp);
- error = uiomove(pb + off, bytes, UIO_READ, uio);
+ error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
kunmap(pp);
if (mapping_writably_mapped(mp))
@@ -372,8 +375,8 @@ zfs_write_simple(znode_t *zp, const void *data, size_t len,
iov.iov_base = (void *)data;
iov.iov_len = len;
- uio_t uio;
- uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0);
+ zfs_uio_t uio;
+ zfs_uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0);
cookie = spl_fstrans_mark();
error = zfs_write(zp, &uio, 0, kcred);
@@ -381,8 +384,8 @@ zfs_write_simple(znode_t *zp, const void *data, size_t len,
if (error == 0) {
if (residp != NULL)
- *residp = uio_resid(&uio);
- else if (uio_resid(&uio) != 0)
+ *residp = zfs_uio_resid(&uio);
+ else if (zfs_uio_resid(&uio) != 0)
error = SET_ERROR(EIO);
}
@@ -398,11 +401,18 @@ zfs_zrele_async(znode_t *zp)
ASSERT(atomic_read(&ip->i_count) > 0);
ASSERT(os != NULL);
- if (atomic_read(&ip->i_count) == 1)
+ /*
+ * If decrementing the count would put us at 0, we can't do it inline
+ * here, because that would be synchronous. Instead, dispatch an iput
+ * to run later.
+ *
+ * For more information on the dangers of a synchronous iput, see the
+ * header comment of this file.
+ */
+ if (!atomic_add_unless(&ip->i_count, -1, 1)) {
VERIFY(taskq_dispatch(dsl_pool_zrele_taskq(dmu_objset_pool(os)),
(task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID);
- else
- zrele(zp);
+ }
}
@@ -516,7 +526,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
error = zfs_dirlook(zdp, nm, zpp, flags, direntflags, realpnp);
if ((error == 0) && (*zpp))
- zfs_inode_update(*zpp);
+ zfs_znode_update_vfs(*zpp);
ZFS_EXIT(zfsvfs);
return (error);
@@ -779,8 +789,8 @@ out:
if (zp)
zrele(zp);
} else {
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
*zpp = zp;
}
@@ -902,8 +912,8 @@ out:
if (zp)
zrele(zp);
} else {
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
*ipp = ZTOI(zp);
}
@@ -1129,8 +1139,8 @@ out:
pn_free(realnmp);
zfs_dirent_unlock(dl);
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
if (delete_now)
zrele(zp);
@@ -1138,7 +1148,7 @@ out:
zfs_zrele_async(zp);
if (xzp) {
- zfs_inode_update(xzp);
+ zfs_znode_update_vfs(xzp);
zfs_zrele_async(xzp);
}
@@ -1335,8 +1345,8 @@ out:
if (error != 0) {
zrele(zp);
} else {
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
}
ZFS_EXIT(zfsvfs);
return (error);
@@ -1461,8 +1471,8 @@ top:
out:
zfs_dirent_unlock(dl);
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
zrele(zp);
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
@@ -2532,7 +2542,7 @@ out:
err2 = zfs_setattr_dir(attrzp);
zrele(attrzp);
}
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(zp);
}
out2:
@@ -2990,17 +3000,17 @@ out:
zfs_dirent_unlock(sdl);
zfs_dirent_unlock(tdl);
- zfs_inode_update(sdzp);
+ zfs_znode_update_vfs(sdzp);
if (sdzp == tdzp)
rw_exit(&sdzp->z_name_lock);
if (sdzp != tdzp)
- zfs_inode_update(tdzp);
+ zfs_znode_update_vfs(tdzp);
- zfs_inode_update(szp);
+ zfs_znode_update_vfs(szp);
zrele(szp);
if (tzp) {
- zfs_inode_update(tzp);
+ zfs_znode_update_vfs(tzp);
zrele(tzp);
}
@@ -3159,8 +3169,8 @@ top:
txtype |= TX_CI;
zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
}
zfs_acl_ids_free(&acl_ids);
@@ -3198,7 +3208,7 @@ top:
*/
/* ARGSUSED */
int
-zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr)
+zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
@@ -3409,8 +3419,8 @@ top:
if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED)
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
- zfs_inode_update(tdzp);
- zfs_inode_update(szp);
+ zfs_znode_update_vfs(tdzp);
+ zfs_znode_update_vfs(szp);
ZFS_EXIT(zfsvfs);
return (error);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
index b33594488ee0..d59c1bb0716a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
@@ -479,14 +479,10 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip)
}
/*
- * Update the embedded inode given the znode. We should work toward
- * eliminating this function as soon as possible by removing values
- * which are duplicated between the znode and inode. If the generic
- * inode has the correct field it should be used, and the ZFS code
- * updated to access the inode. This can be done incrementally.
+ * Update the embedded inode given the znode.
*/
void
-zfs_inode_update(znode_t *zp)
+zfs_znode_update_vfs(znode_t *zp)
{
zfsvfs_t *zfsvfs;
struct inode *ip;
@@ -602,7 +598,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
ZFS_TIME_DECODE(&ip->i_ctime, ctime);
ip->i_ino = zp->z_id;
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(zp);
zfs_inode_set_ops(zfsvfs, ip);
/*
@@ -1278,7 +1274,7 @@ zfs_rezget(znode_t *zp)
zp->z_blksz = doi.doi_data_block_size;
zp->z_atime_dirty = B_FALSE;
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(zp);
/*
* If the file has zero links, then it has been unlinked on the send
@@ -1796,7 +1792,7 @@ log:
dmu_tx_commit(tx);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(zp);
error = 0;
out:
@@ -2127,8 +2123,10 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
size_t complen;
int is_xattrdir = 0;
- if (prevdb)
+ if (prevdb) {
+ ASSERT(prevhdl != NULL);
zfs_release_sa_handle(prevhdl, prevdb, FTAG);
+ }
if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
&is_xattrdir)) != 0)
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
index 8106359e1c77..284ca706ede5 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c
@@ -376,7 +376,7 @@ error:
static int
zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key,
crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen,
- uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len)
+ zfs_uio_t *puio, zfs_uio_t *cuio, uint8_t *authbuf, uint_t auth_len)
{
int ret;
crypto_data_t plaindata, cipherdata;
@@ -479,7 +479,7 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out)
{
int ret;
- uio_t puio, cuio;
+ zfs_uio_t puio, cuio;
uint64_t aad[3];
iovec_t plain_iovecs[2], cipher_iovecs[3];
uint64_t crypt = key->zk_crypt;
@@ -495,7 +495,7 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
if (ret != 0)
goto error;
- /* initialize uio_ts */
+ /* initialize zfs_uio_ts */
plain_iovecs[0].iov_base = key->zk_master_keydata;
plain_iovecs[0].iov_len = keydata_len;
plain_iovecs[1].iov_base = key->zk_hmac_keydata;
@@ -550,7 +550,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
uint8_t *mac, zio_crypt_key_t *key)
{
crypto_mechanism_t mech;
- uio_t puio, cuio;
+ zfs_uio_t puio, cuio;
uint64_t aad[3];
iovec_t plain_iovecs[2], cipher_iovecs[3];
uint_t enc_len, keydata_len, aad_len;
@@ -563,7 +563,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
keydata_len = zio_crypt_table[crypt].ci_keylen;
- /* initialize uio_ts */
+ /* initialize zfs_uio_ts */
plain_iovecs[0].iov_base = key->zk_master_keydata;
plain_iovecs[0].iov_len = keydata_len;
plain_iovecs[1].iov_base = key->zk_hmac_keydata;
@@ -1296,7 +1296,7 @@ error:
}
static void
-zio_crypt_destroy_uio(uio_t *uio)
+zio_crypt_destroy_uio(zfs_uio_t *uio)
{
if (uio->uio_iov)
kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t));
@@ -1386,8 +1386,8 @@ zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
*/
static int
zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
- uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio,
- uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
+ uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, zfs_uio_t *puio,
+ zfs_uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
boolean_t *no_crypt)
{
int ret;
@@ -1581,7 +1581,7 @@ error:
static int
zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version,
uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
- uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
+ zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
uint_t *auth_len, boolean_t *no_crypt)
{
int ret;
@@ -1764,7 +1764,7 @@ error:
static int
zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf,
- uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio,
+ uint8_t *cipherbuf, uint_t datalen, zfs_uio_t *puio, zfs_uio_t *cuio,
uint_t *enc_len)
{
int ret;
@@ -1824,8 +1824,8 @@ error:
static int
zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot,
uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
- uint8_t *mac, uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
- uint_t *auth_len, boolean_t *no_crypt)
+ uint8_t *mac, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len,
+ uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt)
{
int ret;
iovec_t *mac_iov;
@@ -1884,7 +1884,7 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
uint64_t crypt = key->zk_crypt;
uint_t keydata_len = zio_crypt_table[crypt].ci_keylen;
uint_t enc_len, auth_len;
- uio_t puio, cuio;
+ zfs_uio_t puio, cuio;
uint8_t enc_keydata[MASTER_KEY_MAX_LEN];
crypto_key_t tmp_ckey, *ckey = NULL;
crypto_ctx_template_t tmpl;
@@ -1950,8 +1950,8 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
/* If the hardware implementation fails fall back to software */
}
- bzero(&puio, sizeof (uio_t));
- bzero(&cuio, sizeof (uio_t));
+ bzero(&puio, sizeof (zfs_uio_t));
+ bzero(&cuio, sizeof (zfs_uio_t));
/* create uios for encryption */
ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
index 9e08c94e2147..970db4a8b73a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
@@ -242,13 +242,13 @@ zpl_file_accessed(struct file *filp)
* Otherwise, for older kernels extract the iovec and pass it instead.
*/
static void
-zpl_uio_init(uio_t *uio, struct kiocb *kiocb, struct iov_iter *to,
+zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to,
loff_t pos, ssize_t count, size_t skip)
{
#if defined(HAVE_VFS_IOV_ITER)
- uio_iov_iter_init(uio, to, pos, count, skip);
+ zfs_uio_iov_iter_init(uio, to, pos, count, skip);
#else
- uio_iovec_init(uio, to->iov, to->nr_segs, pos,
+ zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos,
to->type & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
count, skip);
#endif
@@ -261,7 +261,7 @@ zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
fstrans_cookie_t cookie;
struct file *filp = kiocb->ki_filp;
ssize_t count = iov_iter_count(to);
- uio_t uio;
+ zfs_uio_t uio;
zpl_uio_init(&uio, kiocb, to, kiocb->ki_pos, count, 0);
@@ -317,7 +317,7 @@ zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
fstrans_cookie_t cookie;
struct file *filp = kiocb->ki_filp;
struct inode *ip = filp->f_mapping->host;
- uio_t uio;
+ zfs_uio_t uio;
size_t count = 0;
ssize_t ret;
@@ -364,8 +364,8 @@ zpl_aio_read(struct kiocb *kiocb, const struct iovec *iov,
if (ret)
return (ret);
- uio_t uio;
- uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
+ zfs_uio_t uio;
+ zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
count, 0);
crhold(cr);
@@ -407,8 +407,8 @@ zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov,
if (ret)
return (ret);
- uio_t uio;
- uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
+ zfs_uio_t uio;
+ zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
count, 0);
crhold(cr);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
index f336fbb1272b..e79d334edc9b 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
@@ -499,8 +499,8 @@ zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link)
iov.iov_len = MAXPATHLEN;
iov.iov_base = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
- uio_t uio;
- uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, MAXPATHLEN - 1, 0);
+ zfs_uio_t uio;
+ zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, MAXPATHLEN - 1, 0);
cookie = spl_fstrans_mark();
error = -zfs_readlink(ip, &uio, cr);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
index 1ec3dae2bb81..83812f2dcba8 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
@@ -306,15 +306,15 @@ zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
iov.iov_base = (void *)value;
iov.iov_len = size;
- uio_t uio;
- uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0);
+ zfs_uio_t uio;
+ zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0);
cookie = spl_fstrans_mark();
error = -zfs_read(ITOZ(xip), &uio, 0, cr);
spl_fstrans_unmark(cookie);
if (error == 0)
- error = size - uio_resid(&uio);
+ error = size - zfs_uio_resid(&uio);
out:
if (xzp)
zrele(xzp);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index cdc2076702af..0caf31307718 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -85,9 +85,9 @@ zvol_write(void *arg)
zv_request_t *zvr = arg;
struct bio *bio = zvr->bio;
int error = 0;
- uio_t uio;
+ zfs_uio_t uio;
- uio_bvec_init(&uio, bio);
+ zfs_uio_bvec_init(&uio, bio);
zvol_state_t *zv = zvr->zv;
ASSERT3P(zv, !=, NULL);
@@ -247,9 +247,9 @@ zvol_read(void *arg)
zv_request_t *zvr = arg;
struct bio *bio = zvr->bio;
int error = 0;
- uio_t uio;
+ zfs_uio_t uio;
- uio_bvec_init(&uio, bio);
+ zfs_uio_bvec_init(&uio, bio);
zvol_state_t *zv = zvr->zv;
ASSERT3P(zv, !=, NULL);
diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c
index 68d4aa5f5cb4..7d3a2f6d69e2 100644
--- a/sys/contrib/openzfs/module/zfs/abd.c
+++ b/sys/contrib/openzfs/module/zfs/abd.c
@@ -105,26 +105,6 @@
/* see block comment above for description */
int zfs_abd_scatter_enabled = B_TRUE;
-boolean_t
-abd_is_linear(abd_t *abd)
-{
- return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE);
-}
-
-boolean_t
-abd_is_linear_page(abd_t *abd)
-{
- return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0 ?
- B_TRUE : B_FALSE);
-}
-
-boolean_t
-abd_is_gang(abd_t *abd)
-{
- return ((abd->abd_flags & ABD_FLAG_GANG) != 0 ? B_TRUE :
- B_FALSE);
-}
-
void
abd_verify(abd_t *abd)
{
@@ -133,8 +113,10 @@ abd_verify(abd_t *abd)
ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE | ABD_FLAG_GANG |
- ABD_FLAG_GANG_FREE | ABD_FLAG_ZEROS));
+ ABD_FLAG_GANG_FREE | ABD_FLAG_ZEROS | ABD_FLAG_ALLOCD));
+#ifdef ZFS_DEBUG
IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
+#endif
IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
if (abd_is_linear(abd)) {
ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL);
@@ -153,11 +135,43 @@ abd_verify(abd_t *abd)
}
}
-uint_t
-abd_get_size(abd_t *abd)
+static void
+abd_init_struct(abd_t *abd)
{
- abd_verify(abd);
- return (abd->abd_size);
+ list_link_init(&abd->abd_gang_link);
+ mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
+ abd->abd_flags = 0;
+#ifdef ZFS_DEBUG
+ zfs_refcount_create(&abd->abd_children);
+ abd->abd_parent = NULL;
+#endif
+ abd->abd_size = 0;
+}
+
+static void
+abd_fini_struct(abd_t *abd)
+{
+ mutex_destroy(&abd->abd_mtx);
+ ASSERT(!list_link_active(&abd->abd_gang_link));
+#ifdef ZFS_DEBUG
+ zfs_refcount_destroy(&abd->abd_children);
+#endif
+}
+
+abd_t *
+abd_alloc_struct(size_t size)
+{
+ abd_t *abd = abd_alloc_struct_impl(size);
+ abd_init_struct(abd);
+ abd->abd_flags |= ABD_FLAG_ALLOCD;
+ return (abd);
+}
+
+void
+abd_free_struct(abd_t *abd)
+{
+ abd_fini_struct(abd);
+ abd_free_struct_impl(abd);
}
/*
@@ -173,7 +187,7 @@ abd_alloc(size_t size, boolean_t is_metadata)
VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
abd_t *abd = abd_alloc_struct(size);
- abd->abd_flags = ABD_FLAG_OWNER;
+ abd->abd_flags |= ABD_FLAG_OWNER;
abd->abd_u.abd_scatter.abd_offset = 0;
abd_alloc_chunks(abd, size);
@@ -181,65 +195,12 @@ abd_alloc(size_t size, boolean_t is_metadata)
abd->abd_flags |= ABD_FLAG_META;
}
abd->abd_size = size;
- abd->abd_parent = NULL;
- zfs_refcount_create(&abd->abd_children);
abd_update_scatter_stats(abd, ABDSTAT_INCR);
return (abd);
}
-static void
-abd_free_scatter(abd_t *abd)
-{
- abd_free_chunks(abd);
-
- zfs_refcount_destroy(&abd->abd_children);
- abd_update_scatter_stats(abd, ABDSTAT_DECR);
- abd_free_struct(abd);
-}
-
-static void
-abd_put_gang_abd(abd_t *abd)
-{
- ASSERT(abd_is_gang(abd));
- abd_t *cabd;
-
- while ((cabd = list_remove_head(&ABD_GANG(abd).abd_gang_chain))
- != NULL) {
- ASSERT0(cabd->abd_flags & ABD_FLAG_GANG_FREE);
- abd->abd_size -= cabd->abd_size;
- abd_put(cabd);
- }
- ASSERT0(abd->abd_size);
- list_destroy(&ABD_GANG(abd).abd_gang_chain);
-}
-
-/*
- * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not
- * free the underlying scatterlist or buffer.
- */
-void
-abd_put(abd_t *abd)
-{
- if (abd == NULL)
- return;
-
- abd_verify(abd);
- ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
-
- if (abd->abd_parent != NULL) {
- (void) zfs_refcount_remove_many(&abd->abd_parent->abd_children,
- abd->abd_size, abd);
- }
-
- if (abd_is_gang(abd))
- abd_put_gang_abd(abd);
-
- zfs_refcount_destroy(&abd->abd_children);
- abd_free_struct(abd);
-}
-
/*
* Allocate an ABD that must be linear, along with its own underlying data
* buffer. Only use this when it would be very annoying to write your ABD
@@ -252,13 +213,11 @@ abd_alloc_linear(size_t size, boolean_t is_metadata)
VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
- abd->abd_flags = ABD_FLAG_LINEAR | ABD_FLAG_OWNER;
+ abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_OWNER;
if (is_metadata) {
abd->abd_flags |= ABD_FLAG_META;
}
abd->abd_size = size;
- abd->abd_parent = NULL;
- zfs_refcount_create(&abd->abd_children);
if (is_metadata) {
ABD_LINEAR_BUF(abd) = zio_buf_alloc(size);
@@ -284,19 +243,16 @@ abd_free_linear(abd_t *abd)
zio_data_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
}
- zfs_refcount_destroy(&abd->abd_children);
abd_update_linear_stats(abd, ABDSTAT_DECR);
-
- abd_free_struct(abd);
}
static void
-abd_free_gang_abd(abd_t *abd)
+abd_free_gang(abd_t *abd)
{
ASSERT(abd_is_gang(abd));
- abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
+ abd_t *cabd;
- while (cabd != NULL) {
+ while ((cabd = list_head(&ABD_GANG(abd).abd_gang_chain)) != NULL) {
/*
* We must acquire the child ABDs mutex to ensure that if it
* is being added to another gang ABD we will set the link
@@ -307,24 +263,29 @@ abd_free_gang_abd(abd_t *abd)
ASSERT(list_link_active(&cabd->abd_gang_link));
list_remove(&ABD_GANG(abd).abd_gang_chain, cabd);
mutex_exit(&cabd->abd_mtx);
- abd->abd_size -= cabd->abd_size;
- if (cabd->abd_flags & ABD_FLAG_GANG_FREE) {
- if (cabd->abd_flags & ABD_FLAG_OWNER)
- abd_free(cabd);
- else
- abd_put(cabd);
- }
- cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
+ if (cabd->abd_flags & ABD_FLAG_GANG_FREE)
+ abd_free(cabd);
}
- ASSERT0(abd->abd_size);
list_destroy(&ABD_GANG(abd).abd_gang_chain);
- zfs_refcount_destroy(&abd->abd_children);
- abd_free_struct(abd);
+}
+
+static void
+abd_free_scatter(abd_t *abd)
+{
+ abd_free_chunks(abd);
+ abd_update_scatter_stats(abd, ABDSTAT_DECR);
}
/*
- * Free an ABD. Only use this on ABDs allocated with abd_alloc(),
- * abd_alloc_linear(), or abd_alloc_gang_abd().
+ * Free an ABD. Use with any kind of abd: those created with abd_alloc_*()
+ * and abd_get_*(), including abd_get_offset_struct().
+ *
+ * If the ABD was created with abd_alloc_*(), the underlying data
+ * (scatterlist or linear buffer) will also be freed. (Subject to ownership
+ * changes via abd_*_ownership_of_buf().)
+ *
+ * Unless the ABD was created with abd_get_offset_struct(), the abd_t will
+ * also be freed.
*/
void
abd_free(abd_t *abd)
@@ -333,14 +294,30 @@ abd_free(abd_t *abd)
return;
abd_verify(abd);
- ASSERT3P(abd->abd_parent, ==, NULL);
- ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
- if (abd_is_linear(abd))
- abd_free_linear(abd);
- else if (abd_is_gang(abd))
- abd_free_gang_abd(abd);
- else
- abd_free_scatter(abd);
+#ifdef ZFS_DEBUG
+ IMPLY(abd->abd_flags & ABD_FLAG_OWNER, abd->abd_parent == NULL);
+#endif
+
+ if (abd_is_gang(abd)) {
+ abd_free_gang(abd);
+ } else if (abd_is_linear(abd)) {
+ if (abd->abd_flags & ABD_FLAG_OWNER)
+ abd_free_linear(abd);
+ } else {
+ if (abd->abd_flags & ABD_FLAG_OWNER)
+ abd_free_scatter(abd);
+ }
+
+#ifdef ZFS_DEBUG
+ if (abd->abd_parent != NULL) {
+ (void) zfs_refcount_remove_many(&abd->abd_parent->abd_children,
+ abd->abd_size, abd);
+ }
+#endif
+
+ abd_fini_struct(abd);
+ if (abd->abd_flags & ABD_FLAG_ALLOCD)
+ abd_free_struct_impl(abd);
}
/*
@@ -359,24 +336,18 @@ abd_alloc_sametype(abd_t *sabd, size_t size)
}
}
-
/*
* Create gang ABD that will be the head of a list of ABD's. This is used
* to "chain" scatter/gather lists together when constructing aggregated
* IO's. To free this abd, abd_free() must be called.
*/
abd_t *
-abd_alloc_gang_abd(void)
+abd_alloc_gang(void)
{
- abd_t *abd;
-
- abd = abd_alloc_struct(0);
- abd->abd_flags = ABD_FLAG_GANG | ABD_FLAG_OWNER;
- abd->abd_size = 0;
- abd->abd_parent = NULL;
+ abd_t *abd = abd_alloc_struct(0);
+ abd->abd_flags |= ABD_FLAG_GANG | ABD_FLAG_OWNER;
list_create(&ABD_GANG(abd).abd_gang_chain,
sizeof (abd_t), offsetof(abd_t, abd_gang_link));
- zfs_refcount_create(&abd->abd_children);
return (abd);
}
@@ -392,8 +363,8 @@ abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
if (free_on_free) {
/*
* If the parent is responsible for freeing the child gang
- * ABD we will just splice the childs children ABD list to
- * the parents list and immediately free the child gang ABD
+ * ABD we will just splice the child's children ABD list to
+ * the parent's list and immediately free the child gang ABD
* struct. The parent gang ABDs children from the child gang
* will retain all the free_on_free settings after being
* added to the parents list.
@@ -403,7 +374,7 @@ abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
&ABD_GANG(cabd).abd_gang_chain);
ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
abd_verify(pabd);
- abd_free_struct(cabd);
+ abd_free(cabd);
} else {
for (abd_t *child = list_head(&ABD_GANG(cabd).abd_gang_chain);
child != NULL;
@@ -431,7 +402,7 @@ abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
/*
* If the child being added is a gang ABD, we will add the
- * childs ABDs to the parent gang ABD. This alllows us to account
+ * child's ABDs to the parent gang ABD. This allows us to account
* for the offset correctly in the parent gang ABD.
*/
if (abd_is_gang(cabd)) {
@@ -458,7 +429,7 @@ abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
* allocated ABD with ABD_FLAG_GANG_FREE, before
* adding it to the gang ABD's list, to make the
* gang ABD aware that it is responsible to call
- * abd_put(). We use abd_get_offset() in order
+ * abd_free(). We use abd_get_offset() in order
* to just allocate a new ABD but avoid copying the
* data over into the newly allocated ABD.
*
@@ -515,73 +486,96 @@ abd_gang_get_offset(abd_t *abd, size_t *off)
}
/*
- * Allocate a new ABD to point to offset off of sabd. It shares the underlying
- * buffer data with sabd. Use abd_put() to free. sabd must not be freed while
- * any derived ABDs exist.
+ * Allocate a new ABD, using the provided struct (if non-NULL, and if
+ * circumstances allow - otherwise allocate the struct). The returned ABD will
+ * point to offset off of sabd. It shares the underlying buffer data with sabd.
+ * Use abd_free() to free. sabd must not be freed while any derived ABDs exist.
*/
static abd_t *
-abd_get_offset_impl(abd_t *sabd, size_t off, size_t size)
+abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size)
{
- abd_t *abd = NULL;
-
abd_verify(sabd);
- ASSERT3U(off, <=, sabd->abd_size);
+ ASSERT3U(off + size, <=, sabd->abd_size);
if (abd_is_linear(sabd)) {
- abd = abd_alloc_struct(0);
-
+ if (abd == NULL)
+ abd = abd_alloc_struct(0);
/*
* Even if this buf is filesystem metadata, we only track that
* if we own the underlying data buffer, which is not true in
* this case. Therefore, we don't ever use ABD_FLAG_META here.
*/
- abd->abd_flags = ABD_FLAG_LINEAR;
+ abd->abd_flags |= ABD_FLAG_LINEAR;
ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off;
} else if (abd_is_gang(sabd)) {
size_t left = size;
- abd = abd_alloc_gang_abd();
+ if (abd == NULL) {
+ abd = abd_alloc_gang();
+ } else {
+ abd->abd_flags |= ABD_FLAG_GANG;
+ list_create(&ABD_GANG(abd).abd_gang_chain,
+ sizeof (abd_t), offsetof(abd_t, abd_gang_link));
+ }
+
abd->abd_flags &= ~ABD_FLAG_OWNER;
for (abd_t *cabd = abd_gang_get_offset(sabd, &off);
cabd != NULL && left > 0;
cabd = list_next(&ABD_GANG(sabd).abd_gang_chain, cabd)) {
int csize = MIN(left, cabd->abd_size - off);
- abd_t *nabd = abd_get_offset_impl(cabd, off, csize);
- abd_gang_add(abd, nabd, B_FALSE);
+ abd_t *nabd = abd_get_offset_size(cabd, off, csize);
+ abd_gang_add(abd, nabd, B_TRUE);
left -= csize;
off = 0;
}
ASSERT3U(left, ==, 0);
} else {
- abd = abd_get_offset_scatter(sabd, off);
+ abd = abd_get_offset_scatter(abd, sabd, off);
}
+ ASSERT3P(abd, !=, NULL);
abd->abd_size = size;
+#ifdef ZFS_DEBUG
abd->abd_parent = sabd;
- zfs_refcount_create(&abd->abd_children);
(void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd);
+#endif
return (abd);
}
+/*
+ * Like abd_get_offset_size(), but memory for the abd_t is provided by the
+ * caller. Using this routine can improve performance by avoiding the cost
+ * of allocating memory for the abd_t struct, and updating the abd stats.
+ * Usually, the provided abd is returned, but in some circumstances (FreeBSD,
+ * if sabd is scatter and size is more than 2 pages) a new abd_t may need to
+ * be allocated. Therefore callers should be careful to use the returned
+ * abd_t*.
+ */
+abd_t *
+abd_get_offset_struct(abd_t *abd, abd_t *sabd, size_t off, size_t size)
+{
+ abd_init_struct(abd);
+ return (abd_get_offset_impl(abd, sabd, off, size));
+}
+
abd_t *
abd_get_offset(abd_t *sabd, size_t off)
{
size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0;
VERIFY3U(size, >, 0);
- return (abd_get_offset_impl(sabd, off, size));
+ return (abd_get_offset_impl(NULL, sabd, off, size));
}
abd_t *
abd_get_offset_size(abd_t *sabd, size_t off, size_t size)
{
ASSERT3U(off + size, <=, sabd->abd_size);
- return (abd_get_offset_impl(sabd, off, size));
+ return (abd_get_offset_impl(NULL, sabd, off, size));
}
/*
- * Return a size scatter ABD. In order to free the returned
- * ABD abd_put() must be called.
+ * Return a size scatter ABD containing only zeros.
*/
abd_t *
abd_get_zeros(size_t size)
@@ -592,8 +586,7 @@ abd_get_zeros(size_t size)
}
/*
- * Allocate a linear ABD structure for buf. You must free this with abd_put()
- * since the resulting ABD doesn't own its own buffer.
+ * Allocate a linear ABD structure for buf.
*/
abd_t *
abd_get_from_buf(void *buf, size_t size)
@@ -607,10 +600,8 @@ abd_get_from_buf(void *buf, size_t size)
* own the underlying data buffer, which is not true in this case.
* Therefore, we don't ever use ABD_FLAG_META here.
*/
- abd->abd_flags = ABD_FLAG_LINEAR;
+ abd->abd_flags |= ABD_FLAG_LINEAR;
abd->abd_size = size;
- abd->abd_parent = NULL;
- zfs_refcount_create(&abd->abd_children);
ABD_LINEAR_BUF(abd) = buf;
@@ -645,7 +636,9 @@ abd_borrow_buf(abd_t *abd, size_t n)
} else {
buf = zio_buf_alloc(n);
}
+#ifdef ZFS_DEBUG
(void) zfs_refcount_add_many(&abd->abd_children, n, buf);
+#endif
return (buf);
}
@@ -676,7 +669,9 @@ abd_return_buf(abd_t *abd, void *buf, size_t n)
ASSERT0(abd_cmp_buf(abd, buf, n));
zio_buf_free(buf, n);
}
+#ifdef ZFS_DEBUG
(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
+#endif
}
void
@@ -790,12 +785,12 @@ abd_iterate_func(abd_t *abd, size_t off, size_t size,
abd_verify(abd);
ASSERT3U(off + size, <=, abd->abd_size);
- boolean_t abd_multi = abd_is_gang(abd);
+ boolean_t gang = abd_is_gang(abd);
abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
while (size > 0) {
/* If we are at the end of the gang ABD we are done */
- if (abd_multi && !c_abd)
+ if (gang && !c_abd)
break;
abd_iter_map(&aiter);
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index fecc752e16ff..b4f0c8a85b64 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -3065,7 +3065,7 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
arc_hdr_size(hdr), hdr, buf);
arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
abd_release_ownership_of_buf(hdr->b_l1hdr.b_pabd);
- abd_put(hdr->b_l1hdr.b_pabd);
+ abd_free(hdr->b_l1hdr.b_pabd);
hdr->b_l1hdr.b_pabd = NULL;
buf->b_flags &= ~ARC_BUF_FLAG_SHARED;
@@ -4163,7 +4163,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
mutex_enter(&arc_evict_lock);
arc_evict_count += bytes_evicted;
- if ((int64_t)(arc_free_memory() - arc_sys_free / 2) > 0) {
+ if (arc_free_memory() > arc_sys_free / 2) {
arc_evict_waiter_t *aw;
while ((aw = list_head(&arc_evict_waiters)) != NULL &&
aw->aew_count <= arc_evict_count) {
@@ -5242,14 +5242,20 @@ arc_wait_for_eviction(uint64_t amount)
list_link_init(&aw.aew_node);
cv_init(&aw.aew_cv, NULL, CV_DEFAULT, NULL);
- arc_evict_waiter_t *last =
- list_tail(&arc_evict_waiters);
- if (last != NULL) {
- ASSERT3U(last->aew_count, >, arc_evict_count);
- aw.aew_count = last->aew_count + amount;
- } else {
- aw.aew_count = arc_evict_count + amount;
+ uint64_t last_count = 0;
+ if (!list_is_empty(&arc_evict_waiters)) {
+ arc_evict_waiter_t *last =
+ list_tail(&arc_evict_waiters);
+ last_count = last->aew_count;
}
+ /*
+ * Note, the last waiter's count may be less than
+ * arc_evict_count if we are low on memory in which
+ * case arc_evict_state_impl() may have deferred
+ * wakeups (but still incremented arc_evict_count).
+ */
+ aw.aew_count =
+ MAX(last_count, arc_evict_count) + amount;
list_insert_tail(&arc_evict_waiters, &aw);
@@ -7041,7 +7047,7 @@ arc_write_done(zio_t *zio)
ASSERT(!zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
callback->awcb_done(zio, buf, callback->awcb_private);
- abd_put(zio->io_abd);
+ abd_free(zio->io_abd);
kmem_free(callback, sizeof (arc_write_callback_t));
}
@@ -9037,7 +9043,7 @@ l2arc_blk_fetch_done(zio_t *zio)
cb = zio->io_private;
if (cb->l2rcb_abd != NULL)
- abd_put(cb->l2rcb_abd);
+ abd_free(cb->l2rcb_abd);
kmem_free(cb, sizeof (l2arc_read_callback_t));
}
@@ -9075,17 +9081,17 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
/*
* Copy buffers for L2ARC writing.
*/
- for (int try = 0; try < L2ARC_FEED_TYPES; try++) {
+ for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++) {
/*
- * If try == 1 or 3, we cache MRU metadata and data
+ * If pass == 1 or 3, we cache MRU metadata and data
* respectively.
*/
if (l2arc_mfuonly) {
- if (try == 1 || try == 3)
+ if (pass == 1 || pass == 3)
continue;
}
- multilist_sublist_t *mls = l2arc_sublist_lock(try);
+ multilist_sublist_t *mls = l2arc_sublist_lock(pass);
uint64_t passed_sz = 0;
VERIFY3P(mls, !=, NULL);
@@ -10011,7 +10017,7 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev)
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_SPECULATIVE, B_FALSE));
- abd_put(abd);
+ abd_free(abd);
if (err != 0) {
ARCSTAT_BUMP(arcstat_l2_rebuild_abort_dh_errors);
@@ -10379,7 +10385,7 @@ l2arc_dev_hdr_update(l2arc_dev_t *dev)
VDEV_LABEL_START_SIZE, l2dhdr_asize, abd, ZIO_CHECKSUM_LABEL, NULL,
NULL, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, B_FALSE));
- abd_put(abd);
+ abd_free(abd);
if (err != 0) {
zfs_dbgmsg("L2ARC IO error (%d) while writing device header, "
@@ -10468,7 +10474,7 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
fletcher_4_native(tmpbuf, asize, NULL,
&l2dhdr->dh_start_lbps[0].lbp_cksum);
- abd_put(abd_buf->abd);
+ abd_free(abd_buf->abd);
/* perform the write itself */
abd_buf->abd = abd_get_from_buf(tmpbuf, sizeof (*lb));
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index 93445a80294b..a6cdc017cd21 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -4656,7 +4656,7 @@ dbuf_write_override_done(zio_t *zio)
dbuf_write_done(zio, NULL, db);
if (zio->io_abd != NULL)
- abd_put(zio->io_abd);
+ abd_free(zio->io_abd);
}
typedef struct dbuf_remap_impl_callback_arg {
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index a02f43df13fd..ed345f0b6ec3 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -1170,7 +1170,7 @@ dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
#ifdef _KERNEL
int
-dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
+dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size)
{
dmu_buf_t **dbp;
int numbufs, i, err;
@@ -1179,7 +1179,7 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
* NB: we could do this block-at-a-time, but it's nice
* to be reading in parallel.
*/
- err = dmu_buf_hold_array_by_dnode(dn, uio_offset(uio), size,
+ err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), size,
TRUE, FTAG, &numbufs, &dbp, 0);
if (err)
return (err);
@@ -1191,16 +1191,12 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
ASSERT(size > 0);
- bufoff = uio_offset(uio) - db->db_offset;
+ bufoff = zfs_uio_offset(uio) - db->db_offset;
tocpy = MIN(db->db_size - bufoff, size);
-#ifdef __FreeBSD__
- err = vn_io_fault_uiomove((char *)db->db_data + bufoff,
- tocpy, uio);
-#else
- err = uiomove((char *)db->db_data + bufoff, tocpy,
- UIO_READ, uio);
-#endif
+ err = zfs_uio_fault_move((char *)db->db_data + bufoff, tocpy,
+ UIO_READ, uio);
+
if (err)
break;
@@ -1214,14 +1210,14 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
/*
* Read 'size' bytes into the uio buffer.
* From object zdb->db_object.
- * Starting at offset uio->uio_loffset.
+ * Starting at zfs_uio_offset(uio).
*
* If the caller already has a dbuf in the target object
* (e.g. its bonus buffer), this routine is faster than dmu_read_uio(),
* because we don't have to find the dnode_t for the object.
*/
int
-dmu_read_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size)
+dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
dnode_t *dn;
@@ -1241,10 +1237,10 @@ dmu_read_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size)
/*
* Read 'size' bytes into the uio buffer.
* From the specified object
- * Starting at offset uio->uio_loffset.
+ * Starting at offset zfs_uio_offset(uio).
*/
int
-dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
+dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size)
{
dnode_t *dn;
int err;
@@ -1264,14 +1260,14 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
}
int
-dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
+dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)
{
dmu_buf_t **dbp;
int numbufs;
int err = 0;
int i;
- err = dmu_buf_hold_array_by_dnode(dn, uio_offset(uio), size,
+ err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), size,
FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
if (err)
return (err);
@@ -1283,7 +1279,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
ASSERT(size > 0);
- bufoff = uio_offset(uio) - db->db_offset;
+ bufoff = zfs_uio_offset(uio) - db->db_offset;
tocpy = MIN(db->db_size - bufoff, size);
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
@@ -1294,18 +1290,14 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
dmu_buf_will_dirty(db, tx);
/*
- * XXX uiomove could block forever (eg.nfs-backed
+ * XXX zfs_uiomove could block forever (eg.nfs-backed
* pages). There needs to be a uiolockdown() function
- * to lock the pages in memory, so that uiomove won't
+ * to lock the pages in memory, so that zfs_uiomove won't
* block.
*/
-#ifdef __FreeBSD__
- err = vn_io_fault_uiomove((char *)db->db_data + bufoff,
- tocpy, uio);
-#else
- err = uiomove((char *)db->db_data + bufoff, tocpy,
- UIO_WRITE, uio);
-#endif
+ err = zfs_uio_fault_move((char *)db->db_data + bufoff,
+ tocpy, UIO_WRITE, uio);
+
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
@@ -1322,14 +1314,14 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
/*
* Write 'size' bytes from the uio buffer.
* To object zdb->db_object.
- * Starting at offset uio->uio_loffset.
+ * Starting at offset zfs_uio_offset(uio).
*
* If the caller already has a dbuf in the target object
* (e.g. its bonus buffer), this routine is faster than dmu_write_uio(),
* because we don't have to find the dnode_t for the object.
*/
int
-dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
+dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
@@ -1350,10 +1342,10 @@ dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
/*
* Write 'size' bytes from the uio buffer.
* To the specified object.
- * Starting at offset uio->uio_loffset.
+ * Starting at offset zfs_uio_offset(uio).
*/
int
-dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
+dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
dmu_tx_t *tx)
{
dnode_t *dn;
@@ -1600,7 +1592,7 @@ dmu_sync_late_arrival_done(zio_t *zio)
dsa->dsa_done(dsa->dsa_zgd, zio->io_error);
- abd_put(zio->io_abd);
+ abd_free(zio->io_abd);
kmem_free(dsa, sizeof (*dsa));
}
diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
index 66a8f20092e0..bfb4adf262d5 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@@ -326,7 +326,7 @@ smallblk_changed_cb(void *arg, uint64_t newval)
/*
* Inheritance and range checking should have been done by now.
*/
- ASSERT(newval <= SPA_OLD_MAXBLOCKSIZE);
+ ASSERT(newval <= SPA_MAXBLOCKSIZE);
ASSERT(ISP2(newval));
os->os_zpl_special_smallblock = newval;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c
index 0ebed4e6fbdf..73667915df0f 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_tx.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c
@@ -1012,6 +1012,22 @@ dmu_tx_unassign(dmu_tx_t *tx)
* details on the throttle). This is used by the VFS operations, after
* they have already called dmu_tx_wait() (though most likely on a
* different tx).
+ *
+ * It is guaranteed that subsequent successful calls to dmu_tx_assign()
+ * will assign the tx to monotonically increasing txgs. Of course this is
+ * not strong monotonicity, because the same txg can be returned multiple
+ * times in a row. This guarantee holds both for subsequent calls from
+ * one thread and for multiple threads. For example, it is impossible to
+ * observe the following sequence of events:
+ *
+ * Thread 1 Thread 2
+ *
+ * dmu_tx_assign(T1, ...)
+ * 1 <- dmu_tx_get_txg(T1)
+ * dmu_tx_assign(T2, ...)
+ * 2 <- dmu_tx_get_txg(T2)
+ * dmu_tx_assign(T3, ...)
+ * 1 <- dmu_tx_get_txg(T3)
*/
int
dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
index de60c33589e3..6da5faf01edf 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -2322,18 +2322,7 @@ void
get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
{
nvlist_t *propval = fnvlist_alloc();
- nvlist_t *val;
-
- /*
- * We use nvlist_alloc() instead of fnvlist_alloc() because the
- * latter would allocate the list with NV_UNIQUE_NAME flag.
- * As a result, every time a clone name is appended to the list
- * it would be (linearly) searched for a duplicate name.
- * We already know that all clone names must be unique and we
- * want avoid the quadratic complexity of double-checking that
- * because we can have a large number of clones.
- */
- VERIFY0(nvlist_alloc(&val, 0, KM_SLEEP));
+ nvlist_t *val = fnvlist_alloc();
if (get_clones_stat_impl(ds, val) == 0) {
fnvlist_add_nvlist(propval, ZPROP_VALUE, val);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_destroy.c b/sys/contrib/openzfs/module/zfs/dsl_destroy.c
index 26fdf96341b9..837d78987e75 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_destroy.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_destroy.c
@@ -600,26 +600,21 @@ dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
/*
* lzc_destroy_snaps() is documented to take an nvlist whose
* values "don't matter". We need to convert that nvlist to
- * one that we know can be converted to LUA. We also don't
- * care about any duplicate entries because the nvlist will
- * be converted to a LUA table which should take care of this.
+ * one that we know can be converted to LUA.
*/
- nvlist_t *snaps_normalized;
- VERIFY0(nvlist_alloc(&snaps_normalized, 0, KM_SLEEP));
+ nvlist_t *snaps_normalized = fnvlist_alloc();
for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL);
pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) {
fnvlist_add_boolean_value(snaps_normalized,
nvpair_name(pair), B_TRUE);
}
- nvlist_t *arg;
- VERIFY0(nvlist_alloc(&arg, 0, KM_SLEEP));
+ nvlist_t *arg = fnvlist_alloc();
fnvlist_add_nvlist(arg, "snaps", snaps_normalized);
fnvlist_free(snaps_normalized);
fnvlist_add_boolean_value(arg, "defer", defer);
- nvlist_t *wrapper;
- VERIFY0(nvlist_alloc(&wrapper, 0, KM_SLEEP));
+ nvlist_t *wrapper = fnvlist_alloc();
fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg);
fnvlist_free(arg);
@@ -654,7 +649,7 @@ dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
B_TRUE,
0,
zfs_lua_max_memlimit,
- nvlist_next_nvpair(wrapper, NULL), result);
+ fnvlist_lookup_nvpair(wrapper, ZCP_ARG_ARGLIST), result);
if (error != 0) {
char *errorstr = NULL;
(void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr);
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index bed6bf64c928..bc4f007b61a1 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -522,9 +522,10 @@ metaslab_class_histogram_verify(metaslab_class_t *mc)
mc_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE,
KM_SLEEP);
+ mutex_enter(&mc->mc_lock);
for (int c = 0; c < rvd->vdev_children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
- metaslab_group_t *mg = tvd->vdev_mg;
+ metaslab_group_t *mg = vdev_get_mg(tvd, mc);
/*
* Skip any holes, uninitialized top-levels, or
@@ -535,13 +536,18 @@ metaslab_class_histogram_verify(metaslab_class_t *mc)
continue;
}
+ IMPLY(mg == mg->mg_vd->vdev_log_mg,
+ mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
+
for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++)
mc_hist[i] += mg->mg_histogram[i];
}
- for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++)
+ for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
VERIFY3U(mc_hist[i], ==, mc->mc_histogram[i]);
+ }
+ mutex_exit(&mc->mc_lock);
kmem_free(mc_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE);
}
@@ -1004,16 +1010,22 @@ metaslab_group_initialized(metaslab_group_t *mg)
uint64_t
metaslab_group_get_space(metaslab_group_t *mg)
{
- return ((1ULL << mg->mg_vd->vdev_ms_shift) * mg->mg_vd->vdev_ms_count);
+ /*
+ * Note that the number of nodes in mg_metaslab_tree may be one less
+ * than vdev_ms_count, due to the embedded log metaslab.
+ */
+ mutex_enter(&mg->mg_lock);
+ uint64_t ms_count = avl_numnodes(&mg->mg_metaslab_tree);
+ mutex_exit(&mg->mg_lock);
+ return ((1ULL << mg->mg_vd->vdev_ms_shift) * ms_count);
}
void
metaslab_group_histogram_verify(metaslab_group_t *mg)
{
uint64_t *mg_hist;
- vdev_t *vd = mg->mg_vd;
- uint64_t ashift = vd->vdev_ashift;
- int i;
+ avl_tree_t *t = &mg->mg_metaslab_tree;
+ uint64_t ashift = mg->mg_vd->vdev_ashift;
if ((zfs_flags & ZFS_DEBUG_HISTOGRAM_VERIFY) == 0)
return;
@@ -1024,21 +1036,25 @@ metaslab_group_histogram_verify(metaslab_group_t *mg)
ASSERT3U(RANGE_TREE_HISTOGRAM_SIZE, >=,
SPACE_MAP_HISTOGRAM_SIZE + ashift);
- for (int m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
-
- /* skip if not active or not a member */
- if (msp->ms_sm == NULL || msp->ms_group != mg)
+ mutex_enter(&mg->mg_lock);
+ for (metaslab_t *msp = avl_first(t);
+ msp != NULL; msp = AVL_NEXT(t, msp)) {
+ VERIFY3P(msp->ms_group, ==, mg);
+ /* skip if not active */
+ if (msp->ms_sm == NULL)
continue;
- for (i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++)
+ for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
mg_hist[i + ashift] +=
msp->ms_sm->sm_phys->smp_histogram[i];
+ }
}
- for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i ++)
+ for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i ++)
VERIFY3U(mg_hist[i], ==, mg->mg_histogram[i]);
+ mutex_exit(&mg->mg_lock);
+
kmem_free(mg_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE);
}
@@ -1053,12 +1069,16 @@ metaslab_group_histogram_add(metaslab_group_t *mg, metaslab_t *msp)
return;
mutex_enter(&mg->mg_lock);
+ mutex_enter(&mc->mc_lock);
for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
+ IMPLY(mg == mg->mg_vd->vdev_log_mg,
+ mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
mg->mg_histogram[i + ashift] +=
msp->ms_sm->sm_phys->smp_histogram[i];
mc->mc_histogram[i + ashift] +=
msp->ms_sm->sm_phys->smp_histogram[i];
}
+ mutex_exit(&mc->mc_lock);
mutex_exit(&mg->mg_lock);
}
@@ -1073,17 +1093,21 @@ metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp)
return;
mutex_enter(&mg->mg_lock);
+ mutex_enter(&mc->mc_lock);
for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
ASSERT3U(mg->mg_histogram[i + ashift], >=,
msp->ms_sm->sm_phys->smp_histogram[i]);
ASSERT3U(mc->mc_histogram[i + ashift], >=,
msp->ms_sm->sm_phys->smp_histogram[i]);
+ IMPLY(mg == mg->mg_vd->vdev_log_mg,
+ mc == spa_embedded_log_class(mg->mg_vd->vdev_spa));
mg->mg_histogram[i + ashift] -=
msp->ms_sm->sm_phys->smp_histogram[i];
mc->mc_histogram[i + ashift] -=
msp->ms_sm->sm_phys->smp_histogram[i];
}
+ mutex_exit(&mc->mc_lock);
mutex_exit(&mg->mg_lock);
}
@@ -2741,37 +2765,47 @@ metaslab_fini(metaslab_t *msp)
mutex_enter(&msp->ms_lock);
VERIFY(msp->ms_group == NULL);
- metaslab_space_update(vd, mg->mg_class,
- -metaslab_allocated_space(msp), 0, -msp->ms_size);
+ /*
+ * If the range trees haven't been allocated, this metaslab hasn't
+ * been through metaslab_sync_done() for the first time yet, so its
+ * space hasn't been accounted for in its vdev and doesn't need to be
+ * subtracted.
+ */
+ if (msp->ms_freed != NULL) {
+ metaslab_space_update(vd, mg->mg_class,
+ -metaslab_allocated_space(msp), 0, -msp->ms_size);
+ }
space_map_close(msp->ms_sm);
msp->ms_sm = NULL;
metaslab_unload(msp);
+
range_tree_destroy(msp->ms_allocatable);
- range_tree_destroy(msp->ms_freeing);
- range_tree_destroy(msp->ms_freed);
- ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=,
- metaslab_unflushed_changes_memused(msp));
- spa->spa_unflushed_stats.sus_memused -=
- metaslab_unflushed_changes_memused(msp);
- range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
- range_tree_destroy(msp->ms_unflushed_allocs);
- range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
- range_tree_destroy(msp->ms_unflushed_frees);
+ if (msp->ms_freed != NULL) {
+ range_tree_destroy(msp->ms_freeing);
+ range_tree_destroy(msp->ms_freed);
- for (int t = 0; t < TXG_SIZE; t++) {
- range_tree_destroy(msp->ms_allocating[t]);
- }
+ ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=,
+ metaslab_unflushed_changes_memused(msp));
+ spa->spa_unflushed_stats.sus_memused -=
+ metaslab_unflushed_changes_memused(msp);
+ range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
+ range_tree_destroy(msp->ms_unflushed_allocs);
+ range_tree_destroy(msp->ms_checkpointing);
+ range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
+ range_tree_destroy(msp->ms_unflushed_frees);
- for (int t = 0; t < TXG_DEFER_SIZE; t++) {
- range_tree_destroy(msp->ms_defer[t]);
+ for (int t = 0; t < TXG_SIZE; t++) {
+ range_tree_destroy(msp->ms_allocating[t]);
+ }
+ for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+ range_tree_destroy(msp->ms_defer[t]);
+ }
}
ASSERT0(msp->ms_deferspace);
- range_tree_destroy(msp->ms_checkpointing);
-
for (int t = 0; t < TXG_SIZE; t++)
ASSERT(!txg_list_member(&vd->vdev_ms_list, msp, t));
@@ -5113,7 +5147,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
* all else fails.
*/
if (vd != NULL && vd->vdev_mg != NULL) {
- mg = vd->vdev_mg;
+ mg = vdev_get_mg(vd, mc);
if (flags & METASLAB_HINTBP_AVOID &&
mg->mg_next != NULL)
diff --git a/sys/contrib/openzfs/module/zfs/sa.c b/sys/contrib/openzfs/module/zfs/sa.c
index 83a10e7b4548..5af0aaa7d0aa 100644
--- a/sys/contrib/openzfs/module/zfs/sa.c
+++ b/sys/contrib/openzfs/module/zfs/sa.c
@@ -1502,7 +1502,7 @@ sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen)
#ifdef _KERNEL
int
-sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
+sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, zfs_uio_t *uio)
{
int error;
sa_bulk_attr_t bulk;
@@ -1515,8 +1515,8 @@ sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
mutex_enter(&hdl->sa_lock);
if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) {
- error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size,
- uio_resid(uio)), UIO_READ, uio);
+ error = zfs_uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size,
+ zfs_uio_resid(uio)), UIO_READ, uio);
}
mutex_exit(&hdl->sa_lock);
return (error);
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index 53ffbc31c186..56354a107e66 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -303,10 +303,12 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
alloc = metaslab_class_get_alloc(mc);
alloc += metaslab_class_get_alloc(spa_special_class(spa));
alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
+ alloc += metaslab_class_get_alloc(spa_embedded_log_class(spa));
size = metaslab_class_get_space(mc);
size += metaslab_class_get_space(spa_special_class(spa));
size += metaslab_class_get_space(spa_dedup_class(spa));
+ size += metaslab_class_get_space(spa_embedded_log_class(spa));
spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
@@ -1196,6 +1198,8 @@ spa_activate(spa_t *spa, spa_mode_t mode)
spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
+ spa->spa_embedded_log_class =
+ metaslab_class_create(spa, zfs_metaslab_ops);
spa->spa_special_class = metaslab_class_create(spa, zfs_metaslab_ops);
spa->spa_dedup_class = metaslab_class_create(spa, zfs_metaslab_ops);
@@ -1347,6 +1351,9 @@ spa_deactivate(spa_t *spa)
metaslab_class_destroy(spa->spa_log_class);
spa->spa_log_class = NULL;
+ metaslab_class_destroy(spa->spa_embedded_log_class);
+ spa->spa_embedded_log_class = NULL;
+
metaslab_class_destroy(spa->spa_special_class);
spa->spa_special_class = NULL;
@@ -2103,6 +2110,9 @@ spa_check_logs(spa_t *spa)
return (rv);
}
+/*
+ * Passivate any log vdevs (note, does not apply to embedded log metaslabs).
+ */
static boolean_t
spa_passivate_log(spa_t *spa)
{
@@ -2113,10 +2123,10 @@ spa_passivate_log(spa_t *spa)
for (int c = 0; c < rvd->vdev_children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
- metaslab_group_t *mg = tvd->vdev_mg;
if (tvd->vdev_islog) {
- metaslab_group_passivate(mg);
+ ASSERT3P(tvd->vdev_log_mg, ==, NULL);
+ metaslab_group_passivate(tvd->vdev_mg);
slog_found = B_TRUE;
}
}
@@ -2124,6 +2134,9 @@ spa_passivate_log(spa_t *spa)
return (slog_found);
}
+/*
+ * Activate any log vdevs (note, does not apply to embedded log metaslabs).
+ */
static void
spa_activate_log(spa_t *spa)
{
@@ -2133,10 +2146,11 @@ spa_activate_log(spa_t *spa)
for (int c = 0; c < rvd->vdev_children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
- metaslab_group_t *mg = tvd->vdev_mg;
- if (tvd->vdev_islog)
- metaslab_group_activate(mg);
+ if (tvd->vdev_islog) {
+ ASSERT3P(tvd->vdev_log_mg, ==, NULL);
+ metaslab_group_activate(tvd->vdev_mg);
+ }
}
}
@@ -6236,6 +6250,7 @@ static int
spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
boolean_t force, boolean_t hardforce)
{
+ int error;
spa_t *spa;
if (oldconfig)
@@ -6288,13 +6303,9 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
* references. If we are resetting a pool, allow references by
* fault injection handlers.
*/
- if (!spa_refcount_zero(spa) ||
- (spa->spa_inject_ref != 0 &&
- new_state != POOL_STATE_UNINITIALIZED)) {
- spa_async_resume(spa);
- spa->spa_is_exporting = B_FALSE;
- mutex_exit(&spa_namespace_lock);
- return (SET_ERROR(EBUSY));
+ if (!spa_refcount_zero(spa) || (spa->spa_inject_ref != 0)) {
+ error = SET_ERROR(EBUSY);
+ goto fail;
}
if (spa->spa_sync_on) {
@@ -6306,10 +6317,8 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
*/
if (!force && new_state == POOL_STATE_EXPORTED &&
spa_has_active_shared_spare(spa)) {
- spa_async_resume(spa);
- spa->spa_is_exporting = B_FALSE;
- mutex_exit(&spa_namespace_lock);
- return (SET_ERROR(EXDEV));
+ error = SET_ERROR(EXDEV);
+ goto fail;
}
/*
@@ -6371,6 +6380,12 @@ export_spa:
mutex_exit(&spa_namespace_lock);
return (0);
+
+fail:
+ spa->spa_is_exporting = B_FALSE;
+ spa_async_resume(spa);
+ mutex_exit(&spa_namespace_lock);
+ return (error);
}
/*
@@ -8033,12 +8048,16 @@ spa_async_thread(void *arg)
old_space = metaslab_class_get_space(spa_normal_class(spa));
old_space += metaslab_class_get_space(spa_special_class(spa));
old_space += metaslab_class_get_space(spa_dedup_class(spa));
+ old_space += metaslab_class_get_space(
+ spa_embedded_log_class(spa));
spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
new_space = metaslab_class_get_space(spa_normal_class(spa));
new_space += metaslab_class_get_space(spa_special_class(spa));
new_space += metaslab_class_get_space(spa_dedup_class(spa));
+ new_space += metaslab_class_get_space(
+ spa_embedded_log_class(spa));
mutex_exit(&spa_namespace_lock);
/*
diff --git a/sys/contrib/openzfs/module/zfs/spa_history.c b/sys/contrib/openzfs/module/zfs/spa_history.c
index 2939c0366504..0482e0f6c39d 100644
--- a/sys/contrib/openzfs/module/zfs/spa_history.c
+++ b/sys/contrib/openzfs/module/zfs/spa_history.c
@@ -288,7 +288,6 @@ spa_history_log_sync(void *arg, dmu_tx_t *tx)
}
#endif
- fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname()->nodename);
if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
@@ -396,6 +395,12 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
}
fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
+ /*
+ * Since the history is recorded asynchronously, the effective time is
+ * now, which may be considerably before the change is made on disk.
+ */
+ fnvlist_add_uint64(nvarg, ZPOOL_HIST_TIME, gethrestime_sec());
+
/* Kick this off asynchronously; errors are ignored. */
dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, nvarg, tx);
dmu_tx_commit(tx);
@@ -526,6 +531,7 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
+ fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
if (dmu_tx_is_syncing(tx)) {
spa_history_log_sync(nvl, tx);
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index f49be8eec01a..b4c73f58d3bc 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -349,9 +349,11 @@ int spa_asize_inflation = 24;
* Normally, we don't allow the last 3.2% (1/(2^spa_slop_shift)) of space in
* the pool to be consumed. This ensures that we don't run the pool
* completely out of space, due to unaccounted changes (e.g. to the MOS).
- * It also limits the worst-case time to allocate space. If we have
- * less than this amount of free space, most ZPL operations (e.g. write,
- * create) will return ENOSPC.
+ * It also limits the worst-case time to allocate space. If we have less than
+ * this amount of free space, most ZPL operations (e.g. write, create) will
+ * return ENOSPC. The ZIL metaslabs (spa_embedded_log_class) are also part of
+ * this 3.2% of space which can't be consumed by normal writes; the slop space
+ * "proper" (spa_get_slop_space()) is decreased by the embedded log space.
*
* Certain operations (e.g. file removal, most administrative actions) can
* use half the slop space. They will only return ENOSPC if less than half
@@ -1026,10 +1028,10 @@ spa_aux_activate(vdev_t *vd, avl_tree_t *avl)
/*
* Spares are tracked globally due to the following constraints:
*
- * - A spare may be part of multiple pools.
- * - A spare may be added to a pool even if it's actively in use within
+ * - A spare may be part of multiple pools.
+ * - A spare may be added to a pool even if it's actively in use within
* another pool.
- * - A spare in use in any pool can only be the source of a replacement if
+ * - A spare in use in any pool can only be the source of a replacement if
* the target is a spare in the same pool.
*
* We keep track of all spares on the system through the use of a reference
@@ -1236,6 +1238,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
*/
ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
+ ASSERT(metaslab_class_validate(spa_embedded_log_class(spa)) == 0);
ASSERT(metaslab_class_validate(spa_special_class(spa)) == 0);
ASSERT(metaslab_class_validate(spa_dedup_class(spa)) == 0);
@@ -1776,17 +1779,37 @@ spa_get_worst_case_asize(spa_t *spa, uint64_t lsize)
}
/*
- * Return the amount of slop space in bytes. It is 1/32 of the pool (3.2%),
- * or at least 128MB, unless that would cause it to be more than half the
- * pool size.
- *
- * See the comment above spa_slop_shift for details.
+ * Return the amount of slop space in bytes. It is typically 1/32 of the pool
+ * (3.2%), minus the embedded log space. On very small pools, it may be
+ * slightly larger than this. The embedded log space is not included in
+ * spa_dspace. By subtracting it, the usable space (per "zfs list") is a
+ * constant 97% of the total space, regardless of metaslab size (assuming the
+ * default spa_slop_shift=5 and a non-tiny pool).
+ *
+ * See the comment above spa_slop_shift for more details.
*/
uint64_t
spa_get_slop_space(spa_t *spa)
{
uint64_t space = spa_get_dspace(spa);
- return (MAX(space >> spa_slop_shift, MIN(space >> 1, spa_min_slop)));
+ uint64_t slop = space >> spa_slop_shift;
+
+ /*
+ * Subtract the embedded log space, but no more than half the (3.2%)
+ * unusable space. Note, the "no more than half" is only relevant if
+ * zfs_embedded_slog_min_ms >> spa_slop_shift < 2, which is not true by
+ * default.
+ */
+ uint64_t embedded_log =
+ metaslab_class_get_dspace(spa_embedded_log_class(spa));
+ slop -= MIN(embedded_log, slop >> 1);
+
+ /*
+ * Slop space should be at least spa_min_slop, but no more than half
+ * the entire pool.
+ */
+ slop = MAX(slop, MIN(space >> 1, spa_min_slop));
+ return (slop);
}
uint64_t
@@ -1873,6 +1896,12 @@ spa_log_class(spa_t *spa)
}
metaslab_class_t *
+spa_embedded_log_class(spa_t *spa)
+{
+ return (spa->spa_embedded_log_class);
+}
+
+metaslab_class_t *
spa_special_class(spa_t *spa)
{
return (spa->spa_special_class);
@@ -1891,12 +1920,10 @@ metaslab_class_t *
spa_preferred_class(spa_t *spa, uint64_t size, dmu_object_type_t objtype,
uint_t level, uint_t special_smallblk)
{
- if (DMU_OT_IS_ZIL(objtype)) {
- if (spa->spa_log_class->mc_groups != 0)
- return (spa_log_class(spa));
- else
- return (spa_normal_class(spa));
- }
+ /*
+ * ZIL allocations determine their class in zio_alloc_zil().
+ */
+ ASSERT(objtype != DMU_OT_INTENT_LOG);
boolean_t has_special_class = spa->spa_special_class->mc_groups != 0;
@@ -2432,9 +2459,9 @@ spa_fini(void)
}
/*
- * Return whether this pool has slogs. No locking needed.
+ * Return whether this pool has a dedicated slog device. No locking needed.
* It's not a problem if the wrong answer is returned as it's only for
- * performance and not correctness
+ * performance and not correctness.
*/
boolean_t
spa_has_slogs(spa_t *spa)
diff --git a/sys/contrib/openzfs/module/zfs/txg.c b/sys/contrib/openzfs/module/zfs/txg.c
index 3efd26155014..497e19dd58eb 100644
--- a/sys/contrib/openzfs/module/zfs/txg.c
+++ b/sys/contrib/openzfs/module/zfs/txg.c
@@ -292,6 +292,27 @@ txg_sync_stop(dsl_pool_t *dp)
mutex_exit(&tx->tx_sync_lock);
}
+/*
+ * Get a handle on the currently open txg and keep it open.
+ *
+ * The txg is guaranteed to stay open until txg_rele_to_quiesce() is called for
+ * the handle. Once txg_rele_to_quiesce() has been called, the txg stays
+ * in quiescing state until txg_rele_to_sync() is called for the handle.
+ *
+ * It is guaranteed that subsequent calls return monotonically increasing
+ * txgs for the same dsl_pool_t. Of course this is not strong monotonicity,
+ * because the same txg can be returned multiple times in a row. This
+ * guarantee holds both for subsequent calls from one thread and for multiple
+ * threads. For example, it is impossible to observe the following sequence
+ * of events:
+ *
+ * Thread 1 Thread 2
+ *
+ * 1 <- txg_hold_open(P, ...)
+ * 2 <- txg_hold_open(P, ...)
+ * 1 <- txg_hold_open(P, ...)
+ *
+ */
uint64_t
txg_hold_open(dsl_pool_t *dp, txg_handle_t *th)
{
@@ -393,7 +414,8 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
spa_txg_history_add(dp->dp_spa, txg + 1, tx_open_time);
/*
- * Quiesce the transaction group by waiting for everyone to txg_exit().
+ * Quiesce the transaction group by waiting for everyone to
+ * call txg_rele_to_sync() for their open transaction handles.
*/
for (c = 0; c < max_ncpus; c++) {
tx_cpu_t *tc = &tx->tx_cpu[c];
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 7ffe924212da..36001e0a6626 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -59,6 +59,27 @@
#include <sys/zvol.h>
#include <sys/zfs_ratelimit.h>
+/*
+ * One metaslab from each (normal-class) vdev is used by the ZIL. These are
+ * called "embedded slog metaslabs", are referenced by vdev_log_mg, and are
+ * part of the spa_embedded_log_class. The metaslab with the most free space
+ * in each vdev is selected for this purpose when the pool is opened (or a
+ * vdev is added). See vdev_metaslab_init().
+ *
+ * Log blocks can be allocated from the following locations. Each one is tried
+ * in order until the allocation succeeds:
+ * 1. dedicated log vdevs, aka "slog" (spa_log_class)
+ * 2. embedded slog metaslabs (spa_embedded_log_class)
+ * 3. other metaslabs in normal vdevs (spa_normal_class)
+ *
+ * zfs_embedded_slog_min_ms disables the embedded slog if there are fewer
+ * than this number of metaslabs in the vdev. This ensures that we don't set
+ * aside an unreasonable amount of space for the ZIL. If set to less than
+ * 1 << (spa_slop_shift + 1), on small pools the usable space may be reduced
+ * (by more than 1<<spa_slop_shift) due to the embedded slog metaslab.
+ */
+int zfs_embedded_slog_min_ms = 64;
+
/* default target for number of metaslabs per top-level vdev */
int zfs_vdev_default_ms_count = 200;
@@ -223,6 +244,22 @@ vdev_getops(const char *type)
return (ops);
}
+/*
+ * Given a vdev and a metaslab class, find which metaslab group we're
+ * interested in. All vdevs may belong to two different metaslab classes.
+ * Dedicated slog devices use only the primary metaslab group, rather than a
+ * separate log group. For embedded slogs, the vdev_log_mg will be non-NULL.
+ */
+metaslab_group_t *
+vdev_get_mg(vdev_t *vd, metaslab_class_t *mc)
+{
+ if (mc == spa_embedded_log_class(vd->vdev_spa) &&
+ vd->vdev_log_mg != NULL)
+ return (vd->vdev_log_mg);
+ else
+ return (vd->vdev_mg);
+}
+
/* ARGSUSED */
void
vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
@@ -978,6 +1015,11 @@ vdev_free(vdev_t *vd)
metaslab_group_destroy(vd->vdev_mg);
vd->vdev_mg = NULL;
}
+ if (vd->vdev_log_mg != NULL) {
+ ASSERT0(vd->vdev_ms_count);
+ metaslab_group_destroy(vd->vdev_log_mg);
+ vd->vdev_log_mg = NULL;
+ }
ASSERT0(vd->vdev_stat.vs_space);
ASSERT0(vd->vdev_stat.vs_dspace);
@@ -1098,14 +1140,20 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
if (tvd->vdev_mg)
ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg);
+ if (tvd->vdev_log_mg)
+ ASSERT3P(tvd->vdev_log_mg, ==, svd->vdev_log_mg);
tvd->vdev_mg = svd->vdev_mg;
+ tvd->vdev_log_mg = svd->vdev_log_mg;
tvd->vdev_ms = svd->vdev_ms;
svd->vdev_mg = NULL;
+ svd->vdev_log_mg = NULL;
svd->vdev_ms = NULL;
if (tvd->vdev_mg != NULL)
tvd->vdev_mg->mg_vd = tvd;
+ if (tvd->vdev_log_mg != NULL)
+ tvd->vdev_log_mg->mg_vd = tvd;
tvd->vdev_checkpoint_sm = svd->vdev_checkpoint_sm;
svd->vdev_checkpoint_sm = NULL;
@@ -1283,7 +1331,7 @@ vdev_remove_parent(vdev_t *cvd)
vdev_free(mvd);
}
-static void
+void
vdev_metaslab_group_create(vdev_t *vd)
{
spa_t *spa = vd->vdev_spa;
@@ -1317,6 +1365,11 @@ vdev_metaslab_group_create(vdev_t *vd)
vd->vdev_mg = metaslab_group_create(mc, vd,
spa->spa_alloc_count);
+ if (!vd->vdev_islog) {
+ vd->vdev_log_mg = metaslab_group_create(
+ spa_embedded_log_class(spa), vd, 1);
+ }
+
/*
* The spa ashift min/max only apply for the normal metaslab
* class. Class destination is late binding so ashift boundry
@@ -1340,8 +1393,6 @@ int
vdev_metaslab_init(vdev_t *vd, uint64_t txg)
{
spa_t *spa = vd->vdev_spa;
- objset_t *mos = spa->spa_meta_objset;
- uint64_t m;
uint64_t oldc = vd->vdev_ms_count;
uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift;
metaslab_t **mspp;
@@ -1369,16 +1420,17 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
vd->vdev_ms = mspp;
vd->vdev_ms_count = newc;
- for (m = oldc; m < newc; m++) {
- uint64_t object = 0;
+ for (uint64_t m = oldc; m < newc; m++) {
+ uint64_t object = 0;
/*
* vdev_ms_array may be 0 if we are creating the "fake"
* metaslabs for an indirect vdev for zdb's leak detection.
* See zdb_leak_init().
*/
if (txg == 0 && vd->vdev_ms_array != 0) {
- error = dmu_read(mos, vd->vdev_ms_array,
+ error = dmu_read(spa->spa_meta_objset,
+ vd->vdev_ms_array,
m * sizeof (uint64_t), sizeof (uint64_t), &object,
DMU_READ_PREFETCH);
if (error != 0) {
@@ -1388,17 +1440,6 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
}
}
-#ifndef _KERNEL
- /*
- * To accommodate zdb_leak_init() fake indirect
- * metaslabs, we allocate a metaslab group for
- * indirect vdevs which normally don't have one.
- */
- if (vd->vdev_mg == NULL) {
- ASSERT0(vdev_is_concrete(vd));
- vdev_metaslab_group_create(vd);
- }
-#endif
error = metaslab_init(vd->vdev_mg, m, object, txg,
&(vd->vdev_ms[m]));
if (error != 0) {
@@ -1408,6 +1449,47 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
}
}
+ /*
+ * Find the emptiest metaslab on the vdev and mark it for use for
+ * embedded slog by moving it from the regular to the log metaslab
+ * group.
+ */
+ if (vd->vdev_mg->mg_class == spa_normal_class(spa) &&
+ vd->vdev_ms_count > zfs_embedded_slog_min_ms &&
+ avl_is_empty(&vd->vdev_log_mg->mg_metaslab_tree)) {
+ uint64_t slog_msid = 0;
+ uint64_t smallest = UINT64_MAX;
+
+ /*
+ * Note, we only search the new metaslabs, because the old
+ * (pre-existing) ones may be active (e.g. have non-empty
+ * range_tree's), and we don't move them to the new
+ * metaslab_t.
+ */
+ for (uint64_t m = oldc; m < newc; m++) {
+ uint64_t alloc =
+ space_map_allocated(vd->vdev_ms[m]->ms_sm);
+ if (alloc < smallest) {
+ slog_msid = m;
+ smallest = alloc;
+ }
+ }
+ metaslab_t *slog_ms = vd->vdev_ms[slog_msid];
+ /*
+ * The metaslab was marked as dirty at the end of
+ * metaslab_init(). Remove it from the dirty list so that we
+ * can uninitialize and reinitialize it to the new class.
+ */
+ if (txg != 0) {
+ (void) txg_list_remove_this(&vd->vdev_ms_list,
+ slog_ms, txg);
+ }
+ uint64_t sm_obj = space_map_object(slog_ms->ms_sm);
+ metaslab_fini(slog_ms);
+ VERIFY0(metaslab_init(vd->vdev_log_mg, slog_msid, sm_obj, txg,
+ &vd->vdev_ms[slog_msid]));
+ }
+
if (txg == 0)
spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER);
@@ -1418,6 +1500,8 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
*/
if (!expanding && !vd->vdev_removing) {
metaslab_group_activate(vd->vdev_mg);
+ if (vd->vdev_log_mg != NULL)
+ metaslab_group_activate(vd->vdev_log_mg);
}
if (txg == 0)
@@ -1453,7 +1537,12 @@ vdev_metaslab_fini(vdev_t *vd)
if (vd->vdev_ms != NULL) {
metaslab_group_t *mg = vd->vdev_mg;
+
metaslab_group_passivate(mg);
+ if (vd->vdev_log_mg != NULL) {
+ ASSERT(!vd->vdev_islog);
+ metaslab_group_passivate(vd->vdev_log_mg);
+ }
uint64_t count = vd->vdev_ms_count;
for (uint64_t m = 0; m < count; m++) {
@@ -1463,11 +1552,13 @@ vdev_metaslab_fini(vdev_t *vd)
}
vmem_free(vd->vdev_ms, count * sizeof (metaslab_t *));
vd->vdev_ms = NULL;
-
vd->vdev_ms_count = 0;
- for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++)
+ for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
ASSERT0(mg->mg_histogram[i]);
+ if (vd->vdev_log_mg != NULL)
+ ASSERT0(vd->vdev_log_mg->mg_histogram[i]);
+ }
}
ASSERT0(vd->vdev_ms_count);
ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
@@ -1634,6 +1725,14 @@ vdev_probe(vdev_t *vd, zio_t *zio)
}
static void
+vdev_load_child(void *arg)
+{
+ vdev_t *vd = arg;
+
+ vd->vdev_load_error = vdev_load(vd);
+}
+
+static void
vdev_open_child(void *arg)
{
vdev_t *vd = arg;
@@ -2037,6 +2136,16 @@ vdev_open(vdev_t *vd)
return (0);
}
+static void
+vdev_validate_child(void *arg)
+{
+ vdev_t *vd = arg;
+
+ vd->vdev_validate_thread = curthread;
+ vd->vdev_validate_error = vdev_validate(vd);
+ vd->vdev_validate_thread = NULL;
+}
+
/*
* Called once the vdevs are all opened, this routine validates the label
* contents. This needs to be done before vdev_load() so that we don't
@@ -2051,18 +2160,43 @@ int
vdev_validate(vdev_t *vd)
{
spa_t *spa = vd->vdev_spa;
+ taskq_t *tq = NULL;
nvlist_t *label;
uint64_t guid = 0, aux_guid = 0, top_guid;
uint64_t state;
nvlist_t *nvl;
uint64_t txg;
+ int children = vd->vdev_children;
if (vdev_validate_skip)
return (0);
- for (uint64_t c = 0; c < vd->vdev_children; c++)
- if (vdev_validate(vd->vdev_child[c]) != 0)
+ if (children > 0) {
+ tq = taskq_create("vdev_validate", children, minclsyspri,
+ children, children, TASKQ_PREPOPULATE);
+ }
+
+ for (uint64_t c = 0; c < children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+
+ if (tq == NULL || vdev_uses_zvols(cvd)) {
+ vdev_validate_child(cvd);
+ } else {
+ VERIFY(taskq_dispatch(tq, vdev_validate_child, cvd,
+ TQ_SLEEP) != TASKQID_INVALID);
+ }
+ }
+ if (tq != NULL) {
+ taskq_wait(tq);
+ taskq_destroy(tq);
+ }
+ for (int c = 0; c < children; c++) {
+ int error = vd->vdev_child[c]->vdev_validate_error;
+
+ if (error != 0)
return (SET_ERROR(EBADF));
+ }
+
/*
* If the device has already failed, or was marked offline, don't do
@@ -3259,18 +3393,46 @@ vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj)
int
vdev_load(vdev_t *vd)
{
+ int children = vd->vdev_children;
int error = 0;
+ taskq_t *tq = NULL;
+
+ /*
+ * It's only worthwhile to use the taskq for the root vdev, because the
+ * slow part is metaslab_init, and that only happens for top-level
+ * vdevs.
+ */
+ if (vd->vdev_ops == &vdev_root_ops && vd->vdev_children > 0) {
+ tq = taskq_create("vdev_load", children, minclsyspri,
+ children, children, TASKQ_PREPOPULATE);
+ }
/*
* Recursively load all children.
*/
for (int c = 0; c < vd->vdev_children; c++) {
- error = vdev_load(vd->vdev_child[c]);
- if (error != 0) {
- return (error);
+ vdev_t *cvd = vd->vdev_child[c];
+
+ if (tq == NULL || vdev_uses_zvols(cvd)) {
+ cvd->vdev_load_error = vdev_load(cvd);
+ } else {
+ VERIFY(taskq_dispatch(tq, vdev_load_child,
+ cvd, TQ_SLEEP) != TASKQID_INVALID);
}
}
+ if (tq != NULL) {
+ taskq_wait(tq);
+ taskq_destroy(tq);
+ }
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ int error = vd->vdev_child[c]->vdev_load_error;
+
+ if (error != 0)
+ return (error);
+ }
+
vdev_set_deflate_ratio(vd);
/*
@@ -3531,8 +3693,11 @@ vdev_sync_done(vdev_t *vd, uint64_t txg)
!= NULL)
metaslab_sync_done(msp, txg);
- if (reassess)
+ if (reassess) {
metaslab_sync_reassess(vd->vdev_mg);
+ if (vd->vdev_log_mg != NULL)
+ metaslab_sync_reassess(vd->vdev_log_mg);
+ }
}
void
@@ -3856,6 +4021,7 @@ top:
/*
* Prevent any future allocations.
*/
+ ASSERT3P(tvd->vdev_log_mg, ==, NULL);
metaslab_group_passivate(mg);
(void) spa_vdev_state_exit(spa, vd, 0);
@@ -4256,6 +4422,12 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
*/
if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
vdev_is_concrete(vd)) {
+ /*
+ * The vdev fragmentation rating doesn't take into
+ * account the embedded slog metaslab (vdev_log_mg).
+ * Since it's only one metaslab, it would have a tiny
+ * impact on the overall fragmentation.
+ */
vs->vs_fragmentation = (vd->vdev_mg != NULL) ?
vd->vdev_mg->mg_fragmentation : 0;
}
@@ -5234,6 +5406,9 @@ ZFS_MODULE_PARAM(zfs_vdev, vdev_, validate_skip, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, nocacheflush, INT, ZMOD_RW,
"Disable cache flushes");
+ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, INT, ZMOD_RW,
+ "Minimum number of metaslabs required to dedicate one for log blocks");
+
ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift,
param_set_min_auto_ashift, param_get_ulong, ZMOD_RW,
"Minimum ashift used when creating new top-level vdevs");
diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c
index 6b7ad7021a50..a4f48cf744b0 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_draid.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c
@@ -716,7 +716,7 @@ vdev_draid_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
offset = 0;
for (; x < rr->rr_cols; x++) {
- abd_put(rr->rr_col[x].rc_abd);
+ abd_free(rr->rr_col[x].rc_abd);
if (offset == good_size) {
/* empty data column (small write) */
@@ -754,11 +754,7 @@ vdev_draid_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
offset = 0;
for (x = rr->rr_firstdatacol; x < rr->rr_cols; x++) {
- if (offset == good_size || x < rr->rr_bigcols)
- abd_put(rr->rr_col[x].rc_abd);
- else
- abd_free(rr->rr_col[x].rc_abd);
-
+ abd_free(rr->rr_col[x].rc_abd);
rr->rr_col[x].rc_abd = abd_get_offset_size(
rr->rr_abd_copy, offset,
rr->rr_col[x].rc_size);
@@ -797,7 +793,7 @@ vdev_draid_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
/* we drop the ereport if it ends up that the data was good */
zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
- abd_put((abd_t *)good);
+ abd_free((abd_t *)good);
}
/*
@@ -852,11 +848,7 @@ vdev_draid_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
offset, col->rc_size);
abd_copy(tmp, col->rc_abd, col->rc_size);
-
- if (abd_is_gang(col->rc_abd))
- abd_free(col->rc_abd);
- else
- abd_put(col->rc_abd);
+ abd_free(col->rc_abd);
col->rc_abd = tmp;
offset += col->rc_size;
@@ -902,12 +894,12 @@ vdev_draid_map_alloc_write(zio_t *zio, uint64_t abd_offset, raidz_row_t *rr)
rc->rc_abd = abd_get_zeros(skip_size);
} else if (rc->rc_size == parity_size) {
/* this is a "big column" */
- rc->rc_abd = abd_get_offset_size(zio->io_abd,
- abd_off, rc->rc_size);
+ rc->rc_abd = abd_get_offset_struct(&rc->rc_abdstruct,
+ zio->io_abd, abd_off, rc->rc_size);
} else {
/* short data column, add a skip sector */
ASSERT3U(rc->rc_size + skip_size, ==, parity_size);
- rc->rc_abd = abd_alloc_gang_abd();
+ rc->rc_abd = abd_alloc_gang();
abd_gang_add(rc->rc_abd, abd_get_offset_size(
zio->io_abd, abd_off, rc->rc_size), B_TRUE);
abd_gang_add(rc->rc_abd, abd_get_zeros(skip_size),
@@ -958,13 +950,13 @@ vdev_draid_map_alloc_scrub(zio_t *zio, uint64_t abd_offset, raidz_row_t *rr)
skip_off += skip_size;
} else if (rc->rc_size == parity_size) {
/* this is a "big column" */
- rc->rc_abd = abd_get_offset_size(zio->io_abd,
- abd_off, rc->rc_size);
+ rc->rc_abd = abd_get_offset_struct(&rc->rc_abdstruct,
+ zio->io_abd, abd_off, rc->rc_size);
} else {
/* short data column, add a skip sector */
ASSERT3U(rc->rc_size + skip_size, ==, parity_size);
ASSERT3U(rr->rr_nempty, !=, 0);
- rc->rc_abd = abd_alloc_gang_abd();
+ rc->rc_abd = abd_alloc_gang();
abd_gang_add(rc->rc_abd, abd_get_offset_size(
zio->io_abd, abd_off, rc->rc_size), B_TRUE);
abd_gang_add(rc->rc_abd, abd_get_offset_size(
@@ -1006,8 +998,8 @@ vdev_draid_map_alloc_read(zio_t *zio, uint64_t abd_offset, raidz_row_t *rr)
raidz_col_t *rc = &rr->rr_col[c];
if (rc->rc_size > 0) {
- rc->rc_abd = abd_get_offset_size(zio->io_abd,
- abd_off, rc->rc_size);
+ rc->rc_abd = abd_get_offset_struct(&rc->rc_abdstruct,
+ zio->io_abd, abd_off, rc->rc_size);
abd_off += rc->rc_size;
}
}
@@ -1056,7 +1048,7 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
ASSERT3P(rc->rc_abd, !=, NULL);
ASSERT(!abd_is_gang(rc->rc_abd));
abd_t *read_abd = rc->rc_abd;
- rc->rc_abd = abd_alloc_gang_abd();
+ rc->rc_abd = abd_alloc_gang();
abd_gang_add(rc->rc_abd, read_abd, B_TRUE);
abd_gang_add(rc->rc_abd, abd_get_offset_size(
rr->rr_abd_empty, skip_off, skip_size), B_TRUE);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
index 07d1c922a50c..b26d0993711a 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
@@ -1187,7 +1187,7 @@ vdev_indirect_child_io_done(zio_t *zio)
pio->io_error = zio_worst_error(pio->io_error, zio->io_error);
mutex_exit(&pio->io_lock);
- abd_put(zio->io_abd);
+ abd_free(zio->io_abd);
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c
index fbd117d2d9ae..04202a9f8960 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_label.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_label.c
@@ -754,16 +754,17 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg)
{
spa_t *spa = vd->vdev_spa;
nvlist_t *config = NULL;
- vdev_phys_t *vp;
- abd_t *vp_abd;
- zio_t *zio;
+ vdev_phys_t *vp[VDEV_LABELS];
+ abd_t *vp_abd[VDEV_LABELS];
+ zio_t *zio[VDEV_LABELS];
uint64_t best_txg = 0;
uint64_t label_txg = 0;
int error = 0;
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SPECULATIVE;
- ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
+ ASSERT(vd->vdev_validate_thread == curthread ||
+ spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
if (!vdev_readable(vd))
return (NULL);
@@ -776,21 +777,24 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg)
if (vd->vdev_ops == &vdev_draid_spare_ops)
return (vdev_draid_read_config_spare(vd));
- vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE);
- vp = abd_to_buf(vp_abd);
+ for (int l = 0; l < VDEV_LABELS; l++) {
+ vp_abd[l] = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE);
+ vp[l] = abd_to_buf(vp_abd[l]);
+ }
retry:
for (int l = 0; l < VDEV_LABELS; l++) {
- nvlist_t *label = NULL;
-
- zio = zio_root(spa, NULL, NULL, flags);
+ zio[l] = zio_root(spa, NULL, NULL, flags);
- vdev_label_read(zio, vd, l, vp_abd,
- offsetof(vdev_label_t, vl_vdev_phys),
- sizeof (vdev_phys_t), NULL, NULL, flags);
+ vdev_label_read(zio[l], vd, l, vp_abd[l],
+ offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t),
+ NULL, NULL, flags);
+ }
+ for (int l = 0; l < VDEV_LABELS; l++) {
+ nvlist_t *label = NULL;
- if (zio_wait(zio) == 0 &&
- nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist),
+ if (zio_wait(zio[l]) == 0 &&
+ nvlist_unpack(vp[l]->vp_nvlist, sizeof (vp[l]->vp_nvlist),
&label, 0) == 0) {
/*
* Auxiliary vdevs won't have txg values in their
@@ -803,6 +807,8 @@ retry:
ZPOOL_CONFIG_POOL_TXG, &label_txg);
if ((error || label_txg == 0) && !config) {
config = label;
+ for (l++; l < VDEV_LABELS; l++)
+ zio_wait(zio[l]);
break;
} else if (label_txg <= txg && label_txg > best_txg) {
best_txg = label_txg;
@@ -831,7 +837,9 @@ retry:
(u_longlong_t)txg);
}
- abd_free(vp_abd);
+ for (int l = 0; l < VDEV_LABELS; l++) {
+ abd_free(vp_abd[l]);
+ }
return (config);
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index 02040c3ee198..25a4bc69cc23 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -789,7 +789,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
size = IO_SPAN(first, last);
ASSERT3U(size, <=, maxblocksize);
- abd = abd_alloc_gang_abd();
+ abd = abd_alloc_gang();
if (abd == NULL)
return (NULL);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
index 5b152f38bd63..f4812e61252c 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
@@ -138,30 +138,15 @@
static void
vdev_raidz_row_free(raidz_row_t *rr)
{
- int c;
-
- for (c = 0; c < rr->rr_firstdatacol && c < rr->rr_cols; c++) {
- abd_free(rr->rr_col[c].rc_abd);
+ for (int c = 0; c < rr->rr_cols; c++) {
+ raidz_col_t *rc = &rr->rr_col[c];
- if (rr->rr_col[c].rc_gdata != NULL) {
- abd_free(rr->rr_col[c].rc_gdata);
- }
- if (rr->rr_col[c].rc_orig_data != NULL) {
- zio_buf_free(rr->rr_col[c].rc_orig_data,
- rr->rr_col[c].rc_size);
- }
- }
- for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
- if (rr->rr_col[c].rc_size != 0) {
- if (abd_is_gang(rr->rr_col[c].rc_abd))
- abd_free(rr->rr_col[c].rc_abd);
- else
- abd_put(rr->rr_col[c].rc_abd);
- }
- if (rr->rr_col[c].rc_orig_data != NULL) {
- zio_buf_free(rr->rr_col[c].rc_orig_data,
- rr->rr_col[c].rc_size);
- }
+ if (rc->rc_size != 0)
+ abd_free(rc->rc_abd);
+ if (rc->rc_gdata != NULL)
+ abd_free(rc->rc_gdata);
+ if (rc->rc_orig_data != NULL)
+ zio_buf_free(rc->rc_orig_data, rc->rc_size);
}
if (rr->rr_abd_copy != NULL)
@@ -249,7 +234,7 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
/* fill in the data columns from good_data */
offset = 0;
for (; x < rr->rr_cols; x++) {
- abd_put(rr->rr_col[x].rc_abd);
+ abd_free(rr->rr_col[x].rc_abd);
rr->rr_col[x].rc_abd =
abd_get_offset_size((abd_t *)good_data,
@@ -268,7 +253,7 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
offset = 0;
for (x = rr->rr_firstdatacol; x < rr->rr_cols; x++) {
- abd_put(rr->rr_col[x].rc_abd);
+ abd_free(rr->rr_col[x].rc_abd);
rr->rr_col[x].rc_abd = abd_get_offset_size(
rr->rr_abd_copy, offset,
rr->rr_col[x].rc_size);
@@ -291,7 +276,7 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
/* we drop the ereport if it ends up that the data was good */
zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
- abd_put((abd_t *)good);
+ abd_free((abd_t *)good);
}
/*
@@ -344,7 +329,7 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
abd_copy(tmp, col->rc_abd, col->rc_size);
- abd_put(col->rc_abd);
+ abd_free(col->rc_abd);
col->rc_abd = tmp;
offset += col->rc_size;
@@ -379,7 +364,6 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
/* The starting byte offset on each child vdev. */
uint64_t o = (b / dcols) << ashift;
uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot;
- uint64_t off = 0;
raidz_map_t *rm =
kmem_zalloc(offsetof(raidz_map_t, rm_row[1]), KM_SLEEP);
@@ -477,13 +461,10 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
rr->rr_col[c].rc_abd =
abd_alloc_linear(rr->rr_col[c].rc_size, B_FALSE);
- rr->rr_col[c].rc_abd = abd_get_offset_size(zio->io_abd, 0,
- rr->rr_col[c].rc_size);
- off = rr->rr_col[c].rc_size;
-
- for (c = c + 1; c < acols; c++) {
+ for (uint64_t off = 0; c < acols; c++) {
raidz_col_t *rc = &rr->rr_col[c];
- rc->rc_abd = abd_get_offset_size(zio->io_abd, off, rc->rc_size);
+ rc->rc_abd = abd_get_offset_struct(&rc->rc_abdstruct,
+ zio->io_abd, off, rc->rc_size);
off += rc->rc_size;
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c
index 6eaaddd3979f..a758fe4fb343 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_removal.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c
@@ -1207,6 +1207,11 @@ vdev_remove_complete(spa_t *spa)
vd->vdev_mg = NULL;
spa_log_sm_set_blocklimit(spa);
}
+ if (vd->vdev_log_mg != NULL) {
+ ASSERT0(vd->vdev_ms_count);
+ metaslab_group_destroy(vd->vdev_log_mg);
+ vd->vdev_log_mg = NULL;
+ }
ASSERT0(vd->vdev_stat.vs_space);
ASSERT0(vd->vdev_stat.vs_dspace);
@@ -1780,6 +1785,8 @@ spa_vdev_remove_cancel_impl(spa_t *spa)
spa_config_enter(spa, SCL_ALLOC | SCL_VDEV, FTAG, RW_WRITER);
vdev_t *vd = vdev_lookup_top(spa, vdid);
metaslab_group_activate(vd->vdev_mg);
+ ASSERT(!vd->vdev_islog);
+ metaslab_group_activate(vd->vdev_log_mg);
spa_config_exit(spa, SCL_ALLOC | SCL_VDEV, FTAG);
}
@@ -1858,6 +1865,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
ASSERT(vd->vdev_islog);
ASSERT(vd == vd->vdev_top);
+ ASSERT3P(vd->vdev_log_mg, ==, NULL);
ASSERT(MUTEX_HELD(&spa_namespace_lock));
/*
@@ -1893,6 +1901,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
if (error != 0) {
metaslab_group_activate(mg);
+ ASSERT3P(vd->vdev_log_mg, ==, NULL);
return (error);
}
ASSERT0(vd->vdev_stat.vs_alloc);
@@ -2121,6 +2130,8 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg)
*/
metaslab_group_t *mg = vd->vdev_mg;
metaslab_group_passivate(mg);
+ ASSERT(!vd->vdev_islog);
+ metaslab_group_passivate(vd->vdev_log_mg);
/*
* Wait for the youngest allocations and frees to sync,
@@ -2157,6 +2168,8 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg)
if (error != 0) {
metaslab_group_activate(mg);
+ ASSERT(!vd->vdev_islog);
+ metaslab_group_activate(vd->vdev_log_mg);
spa_async_request(spa, SPA_ASYNC_INITIALIZE_RESTART);
spa_async_request(spa, SPA_ASYNC_TRIM_RESTART);
spa_async_request(spa, SPA_ASYNC_AUTOTRIM_RESTART);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index 8eb9474cadb0..0e35fd069cbb 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -7418,6 +7418,7 @@ zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
size_t saved_poolname_len = 0;
nvlist_t *innvl = NULL;
fstrans_cookie_t cookie;
+ hrtime_t start_time = gethrtime();
cmd = vecnum;
error = 0;
@@ -7576,6 +7577,8 @@ zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
error);
}
+ fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
+ gethrtime() - start_time);
(void) spa_history_log_nvl(spa, lognv);
spa_close(spa, FTAG);
}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_sa.c b/sys/contrib/openzfs/module/zfs/zfs_sa.c
index cbb773ffbdfa..67be131da63b 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_sa.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_sa.c
@@ -71,7 +71,7 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
#ifdef _KERNEL
int
-zfs_sa_readlink(znode_t *zp, uio_t *uio)
+zfs_sa_readlink(znode_t *zp, zfs_uio_t *uio)
{
dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
size_t bufsz;
@@ -79,15 +79,16 @@ zfs_sa_readlink(znode_t *zp, uio_t *uio)
bufsz = zp->z_size;
if (bufsz + ZFS_OLD_ZNODE_PHYS_SIZE <= db->db_size) {
- error = uiomove((caddr_t)db->db_data +
+ error = zfs_uiomove((caddr_t)db->db_data +
ZFS_OLD_ZNODE_PHYS_SIZE,
- MIN((size_t)bufsz, uio_resid(uio)), UIO_READ, uio);
+ MIN((size_t)bufsz, zfs_uio_resid(uio)), UIO_READ, uio);
} else {
dmu_buf_t *dbp;
if ((error = dmu_buf_hold(ZTOZSB(zp)->z_os, zp->z_id,
0, FTAG, &dbp, DMU_READ_NO_PREFETCH)) == 0) {
- error = uiomove(dbp->db_data,
- MIN((size_t)bufsz, uio_resid(uio)), UIO_READ, uio);
+ error = zfs_uiomove(dbp->db_data,
+ MIN((size_t)bufsz, zfs_uio_resid(uio)), UIO_READ,
+ uio);
dmu_buf_rele(dbp, FTAG);
}
}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
index 2dcc231b30b6..61d5f06c6455 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -187,7 +187,7 @@ static unsigned long zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
*/
/* ARGSUSED */
int
-zfs_read(struct znode *zp, uio_t *uio, int ioflag, cred_t *cr)
+zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
{
int error = 0;
boolean_t frsync = B_FALSE;
@@ -210,7 +210,7 @@ zfs_read(struct znode *zp, uio_t *uio, int ioflag, cred_t *cr)
/*
* Validate file offset
*/
- if (uio->uio_loffset < (offset_t)0) {
+ if (zfs_uio_offset(uio) < (offset_t)0) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
@@ -218,7 +218,7 @@ zfs_read(struct znode *zp, uio_t *uio, int ioflag, cred_t *cr)
/*
* Fasttrack empty reads
*/
- if (uio->uio_resid == 0) {
+ if (zfs_uio_resid(uio) == 0) {
ZFS_EXIT(zfsvfs);
return (0);
}
@@ -242,26 +242,26 @@ zfs_read(struct znode *zp, uio_t *uio, int ioflag, cred_t *cr)
* Lock the range against changes.
*/
zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
- uio->uio_loffset, uio->uio_resid, RL_READER);
+ zfs_uio_offset(uio), zfs_uio_resid(uio), RL_READER);
/*
* If we are reading past end-of-file we can skip
* to the end; but we might still need to set atime.
*/
- if (uio->uio_loffset >= zp->z_size) {
+ if (zfs_uio_offset(uio) >= zp->z_size) {
error = 0;
goto out;
}
- ASSERT(uio->uio_loffset < zp->z_size);
- ssize_t n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
+ ASSERT(zfs_uio_offset(uio) < zp->z_size);
+ ssize_t n = MIN(zfs_uio_resid(uio), zp->z_size - zfs_uio_offset(uio));
ssize_t start_resid = n;
while (n > 0) {
ssize_t nbytes = MIN(n, zfs_vnops_read_chunk_size -
- P2PHASE(uio->uio_loffset, zfs_vnops_read_chunk_size));
+ P2PHASE(zfs_uio_offset(uio), zfs_vnops_read_chunk_size));
#ifdef UIO_NOCOPY
- if (uio->uio_segflg == UIO_NOCOPY)
+ if (zfs_uio_segflg(uio) == UIO_NOCOPY)
error = mappedread_sf(zp, nbytes, uio);
else
#endif
@@ -314,10 +314,10 @@ out:
/* ARGSUSED */
int
-zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
+zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
{
int error = 0;
- ssize_t start_resid = uio->uio_resid;
+ ssize_t start_resid = zfs_uio_resid(uio);
/*
* Fasttrack empty write
@@ -354,7 +354,7 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
*/
if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) &&
- (uio->uio_loffset < zp->z_size))) {
+ (zfs_uio_offset(uio) < zp->z_size))) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EPERM));
}
@@ -362,7 +362,7 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
/*
* Validate file offset
*/
- offset_t woff = ioflag & O_APPEND ? zp->z_size : uio->uio_loffset;
+ offset_t woff = ioflag & O_APPEND ? zp->z_size : zfs_uio_offset(uio);
if (woff < 0) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
@@ -375,7 +375,7 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
* don't hold up txg.
* Skip this if uio contains loaned arc_buf.
*/
- if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EFAULT));
}
@@ -399,7 +399,7 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
*/
woff = zp->z_size;
}
- uio->uio_loffset = woff;
+ zfs_uio_setoffset(uio, woff);
} else {
/*
* Note that if the file block size will change as a result of
@@ -409,7 +409,7 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
}
- if (zn_rlimit_fsize(zp, uio, uio->uio_td)) {
+ if (zn_rlimit_fsize(zp, uio)) {
zfs_rangelock_exit(lr);
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EFBIG));
@@ -439,7 +439,7 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
* and allows us to do more fine-grained space accounting.
*/
while (n > 0) {
- woff = uio->uio_loffset;
+ woff = zfs_uio_offset(uio);
if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, uid) ||
zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, gid) ||
@@ -467,7 +467,7 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
max_blksz);
ASSERT(abuf != NULL);
ASSERT(arc_buf_size(abuf) == max_blksz);
- if ((error = uiocopy(abuf->b_data, max_blksz,
+ if ((error = zfs_uiocopy(abuf->b_data, max_blksz,
UIO_WRITE, uio, &cbytes))) {
dmu_return_arcbuf(abuf);
break;
@@ -528,11 +528,11 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
ssize_t tx_bytes;
if (abuf == NULL) {
- tx_bytes = uio->uio_resid;
- uio_fault_disable(uio, B_TRUE);
+ tx_bytes = zfs_uio_resid(uio);
+ zfs_uio_fault_disable(uio, B_TRUE);
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
uio, nbytes, tx);
- uio_fault_disable(uio, B_FALSE);
+ zfs_uio_fault_disable(uio, B_FALSE);
#ifdef __linux__
if (error == EFAULT) {
dmu_tx_commit(tx);
@@ -540,12 +540,13 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
* Account for partial writes before
* continuing the loop.
* Update needs to occur before the next
- * uio_prefaultpages, or prefaultpages may
+ * zfs_uio_prefaultpages, or prefaultpages may
* error, and we may break the loop early.
*/
- if (tx_bytes != uio->uio_resid)
- n -= tx_bytes - uio->uio_resid;
- if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ if (tx_bytes != zfs_uio_resid(uio))
+ n -= tx_bytes - zfs_uio_resid(uio);
+ if (zfs_uio_prefaultpages(MIN(n, max_blksz),
+ uio)) {
break;
}
continue;
@@ -555,7 +556,7 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
dmu_tx_commit(tx);
break;
}
- tx_bytes -= uio->uio_resid;
+ tx_bytes -= zfs_uio_resid(uio);
} else {
/* Implied by abuf != NULL: */
ASSERT3S(n, >=, max_blksz);
@@ -580,8 +581,8 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
dmu_tx_commit(tx);
break;
}
- ASSERT3S(nbytes, <=, uio->uio_resid);
- uioskip(uio, nbytes);
+ ASSERT3S(nbytes, <=, zfs_uio_resid(uio));
+ zfs_uioskip(uio, nbytes);
tx_bytes = nbytes;
}
if (tx_bytes && zn_has_cached_data(zp) &&
@@ -632,9 +633,9 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
* Update the file size (zp_size) if it has changed;
* account for possible concurrent updates.
*/
- while ((end_size = zp->z_size) < uio->uio_loffset) {
+ while ((end_size = zp->z_size) < zfs_uio_offset(uio)) {
(void) atomic_cas_64(&zp->z_size, end_size,
- uio->uio_loffset);
+ zfs_uio_offset(uio));
ASSERT(error == 0);
}
/*
@@ -657,14 +658,14 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
n -= nbytes;
if (n > 0) {
- if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
error = SET_ERROR(EFAULT);
break;
}
}
}
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(zp);
zfs_rangelock_exit(lr);
/*
@@ -672,7 +673,7 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
* uio data is inaccessible return an error. Otherwise, it's
* at least a partial write, so it's successful.
*/
- if (zfsvfs->z_replay || uio->uio_resid == start_resid ||
+ if (zfsvfs->z_replay || zfs_uio_resid(uio) == start_resid ||
error == EFAULT) {
ZFS_EXIT(zfsvfs);
return (error);
@@ -682,7 +683,7 @@ zfs_write(znode_t *zp, uio_t *uio, int ioflag, cred_t *cr)
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, zp->z_id);
- const int64_t nwritten = start_resid - uio->uio_resid;
+ const int64_t nwritten = start_resid - zfs_uio_resid(uio);
dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
task_io_account_write(nwritten);
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index 632fef29bff4..7b52f9249298 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -1230,7 +1230,7 @@ zil_lwb_write_done(zio_t *zio)
ASSERT(!BP_IS_HOLE(zio->io_bp));
ASSERT(BP_GET_FILL(zio->io_bp) == 0);
- abd_put(zio->io_abd);
+ abd_free(zio->io_abd);
mutex_enter(&zilog->zl_lock);
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED);
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index dfd92b893b9f..7f3cb19d46db 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -2481,7 +2481,7 @@ zio_resume_wait(spa_t *spa)
static void
zio_gang_issue_func_done(zio_t *zio)
{
- abd_put(zio->io_abd);
+ abd_free(zio->io_abd);
}
static zio_t *
@@ -2525,7 +2525,7 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data,
zio_checksum_compute(zio, BP_GET_CHECKSUM(bp),
buf, BP_GET_PSIZE(bp));
- abd_put(buf);
+ abd_free(buf);
}
/*
* If we are here to damage data for testing purposes,
@@ -2653,7 +2653,7 @@ zio_gang_tree_assemble_done(zio_t *zio)
ASSERT(zio->io_size == SPA_GANGBLOCKSIZE);
ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC);
- abd_put(zio->io_abd);
+ abd_free(zio->io_abd);
for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) {
blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g];
@@ -2777,14 +2777,13 @@ zio_write_gang_done(zio_t *zio)
* check for it here as it is cleared in zio_ready.
*/
if (zio->io_abd != NULL)
- abd_put(zio->io_abd);
+ abd_free(zio->io_abd);
}
static zio_t *
-zio_write_gang_block(zio_t *pio)
+zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
{
spa_t *spa = pio->io_spa;
- metaslab_class_t *mc = spa_normal_class(spa);
blkptr_t *bp = pio->io_bp;
zio_t *gio = pio->io_gang_leader;
zio_t *zio;
@@ -3501,6 +3500,17 @@ zio_dva_allocate(zio_t *zio)
zio->io_metaslab_class = mc;
}
+ /*
+ * Try allocating the block in the usual metaslab class.
+ * If that's full, allocate it in the normal class.
+ * If that's full, allocate as a gang block,
+ * and if all are full, the allocation fails (which shouldn't happen).
+ *
+ * Note that we do not fall back on embedded slog (ZIL) space, to
+ * preserve unfragmented slog space, which is critical for decent
+ * sync write performance. If a log allocation fails, we will fall
+ * back to spa_sync() which is abysmal for performance.
+ */
error = metaslab_alloc(spa, mc, zio->io_size, bp,
zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
&zio->io_alloc_list, zio, zio->io_allocator);
@@ -3520,26 +3530,38 @@ zio_dva_allocate(zio_t *zio)
zio->io_prop.zp_copies, zio->io_allocator, zio);
zio->io_flags &= ~ZIO_FLAG_IO_ALLOCATING;
- mc = spa_normal_class(spa);
- VERIFY(metaslab_class_throttle_reserve(mc,
+ VERIFY(metaslab_class_throttle_reserve(
+ spa_normal_class(spa),
zio->io_prop.zp_copies, zio->io_allocator, zio,
flags | METASLAB_MUST_RESERVE));
- } else {
- mc = spa_normal_class(spa);
}
- zio->io_metaslab_class = mc;
+ zio->io_metaslab_class = mc = spa_normal_class(spa);
+ if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC) {
+ zfs_dbgmsg("%s: metaslab allocation failure, "
+ "trying normal class: zio %px, size %llu, error %d",
+ spa_name(spa), zio, zio->io_size, error);
+ }
error = metaslab_alloc(spa, mc, zio->io_size, bp,
zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
&zio->io_alloc_list, zio, zio->io_allocator);
}
+ if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) {
+ if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC) {
+ zfs_dbgmsg("%s: metaslab allocation failure, "
+ "trying ganging: zio %px, size %llu, error %d",
+ spa_name(spa), zio, zio->io_size, error);
+ }
+ return (zio_write_gang_block(zio, mc));
+ }
if (error != 0) {
- zfs_dbgmsg("%s: metaslab allocation failure: zio %px, "
- "size %llu, error %d", spa_name(spa), zio, zio->io_size,
- error);
- if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
- return (zio_write_gang_block(zio));
+ if (error != ENOSPC ||
+ (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC)) {
+ zfs_dbgmsg("%s: metaslab allocation failure: zio %px, "
+ "size %llu, error %d",
+ spa_name(spa), zio, zio->io_size, error);
+ }
zio->io_error = error;
}
@@ -3619,15 +3641,18 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
int flags = METASLAB_FASTWRITE | METASLAB_ZIL;
int allocator = cityhash4(0, 0, 0, os->os_dsl_dataset->ds_object) %
spa->spa_alloc_count;
- error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp,
- 1, txg, NULL, flags, &io_alloc_list, NULL, allocator);
- if (error == 0) {
- *slog = TRUE;
- } else {
- error = metaslab_alloc(spa, spa_normal_class(spa), size, new_bp,
- 1, txg, NULL, flags, &io_alloc_list, NULL, allocator);
- if (error == 0)
- *slog = FALSE;
+ error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
+ txg, NULL, flags, &io_alloc_list, NULL, allocator);
+ *slog = (error == 0);
+ if (error != 0) {
+ error = metaslab_alloc(spa, spa_embedded_log_class(spa), size,
+ new_bp, 1, txg, NULL, flags,
+ &io_alloc_list, NULL, allocator);
+ }
+ if (error != 0) {
+ error = metaslab_alloc(spa, spa_normal_class(spa), size,
+ new_bp, 1, txg, NULL, flags,
+ &io_alloc_list, NULL, allocator);
}
metaslab_trace_fini(&io_alloc_list);