aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module')
-rw-r--r--sys/contrib/openzfs/module/Kbuild.in3
-rw-r--r--sys/contrib/openzfs/module/Makefile.bsd24
-rw-r--r--sys/contrib/openzfs/module/icp/spi/kcf_spi.c1
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c2
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c5
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c10
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c8
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c3
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c4
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c20
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c12
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c14
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c3
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c3
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c93
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c6
-rw-r--r--sys/contrib/openzfs/module/zcommon/zpool_prop.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/bpobj.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/bptree.c9
-rw-r--r--sys/contrib/openzfs/module/zfs/brt.c48
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt_log.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c27
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_redact.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/dnode.c156
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/mmp.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c281
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_checkpoint.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_config.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_history.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c110
-rw-r--r--sys/contrib/openzfs/module/zfs/space_map.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c86
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_indirect_births.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_indirect_mapping.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_initialize.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_label.c33
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_raidz.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_rebuild.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_removal.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_trim.c12
-rw-r--r--sys/contrib/openzfs/module/zfs/zap_micro.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_fm.c9
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_fuid.c4
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c41
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c13
-rw-r--r--sys/contrib/openzfs/module/zfs/zio_inject.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/zvol.c9
-rw-r--r--sys/contrib/openzfs/module/zstd/include/aarch64_compat.h38
-rw-r--r--sys/contrib/openzfs/module/zstd/lib/common/compiler.h3
-rw-r--r--sys/contrib/openzfs/module/zstd/lib/common/zstd_internal.h9
53 files changed, 701 insertions, 516 deletions
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
index 58a80dc4402c..95313c984178 100644
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@@ -293,10 +293,9 @@ ZSTD_UPSTREAM_OBJS := \
zfs-objs += $(addprefix zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS))
-# Disable aarch64 neon SIMD instructions for kernel mode
$(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -I$(zstd_include) $(ZFS_ZSTD_FLAGS)
$(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : asflags-y += -I$(zstd_include)
-$(addprefix $(obj)/zstd/,$(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -include $(zstd_include)/aarch64_compat.h -include $(zstd_include)/zstd_compat_wrapper.h -Wp,-w
+$(addprefix $(obj)/zstd/,$(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -include $(zstd_include)/zstd_compat_wrapper.h -Wp,-w
$(obj)/zstd/zfs_zstd.o : ccflags-y += -include $(zstd_include)/zstd_compat_wrapper.h
diff --git a/sys/contrib/openzfs/module/Makefile.bsd b/sys/contrib/openzfs/module/Makefile.bsd
index 3ba38c43f25b..c20fdc0c483b 100644
--- a/sys/contrib/openzfs/module/Makefile.bsd
+++ b/sys/contrib/openzfs/module/Makefile.bsd
@@ -521,30 +521,6 @@ CFLAGS.zstd_ldm.c= -U__BMI__ -fno-tree-vectorize ${NO_WBITWISE_INSTEAD_OF_LOGICA
CFLAGS.zstd_opt.c= -U__BMI__ -fno-tree-vectorize ${NO_WBITWISE_INSTEAD_OF_LOGICAL}
.if ${MACHINE_ARCH} == "aarch64"
-__ZFS_ZSTD_AARCH64_FLAGS= -include ${SRCDIR}/zstd/include/aarch64_compat.h
-CFLAGS.zstd.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.entropy_common.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.error_private.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.fse_compress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.fse_decompress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.hist.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.huf_compress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.huf_decompress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.pool.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.xxhash.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_common.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_compress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_compress_literals.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_compress_sequences.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_compress_superblock.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_ddict.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_decompress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_decompress_block.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_double_fast.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_fast.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_lazy.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_ldm.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
-CFLAGS.zstd_opt.c+= ${__ZFS_ZSTD_AARCH64_FLAGS}
sha256-armv8.o: sha256-armv8.S
${CC} -c ${CFLAGS:N-mgeneral-regs-only} ${WERROR} ${.IMPSRC} \
diff --git a/sys/contrib/openzfs/module/icp/spi/kcf_spi.c b/sys/contrib/openzfs/module/icp/spi/kcf_spi.c
index 806c0b028017..35fe55b2595d 100644
--- a/sys/contrib/openzfs/module/icp/spi/kcf_spi.c
+++ b/sys/contrib/openzfs/module/icp/spi/kcf_spi.c
@@ -31,7 +31,6 @@
*/
-#include <sys/zfs_context.h>
#include <sys/crypto/common.h>
#include <sys/crypto/impl.h>
#include <sys/crypto/sched_impl.h>
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c
index 54d4029c5e6f..b92be3710f3c 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c
@@ -238,7 +238,7 @@ zfs_uio_iov_step(struct iovec v, zfs_uio_t *uio, int *numpages)
zfs_uio_rw(uio), &uio->uio_dio.pages[uio->uio_dio.npages]);
if (res != n)
- return (SET_ERROR(EFAULT));
+ return (EFAULT);
ASSERT3U(len, ==, res * PAGE_SIZE);
*numpages = res;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c
index 26cc7981bfcd..1990ec677d37 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c
@@ -76,7 +76,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
return (0);
err = dmu_buf_hold_array(os, object, offset, size,
- FALSE, FTAG, &numbufs, &dbp);
+ FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
if (err)
return (err);
@@ -147,7 +147,8 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
ASSERT3S(last_size, <=, PAGE_SIZE);
err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
- IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
+ IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp,
+ DMU_READ_PREFETCH);
if (err != 0)
return (err);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c
index 2d04ccf95fbf..d918b26521a7 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c
@@ -193,7 +193,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind)
*/
config = spa_generate_rootconf(name);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
if (config != NULL) {
pname = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME);
VERIFY0(strcmp(name, pname));
@@ -204,7 +204,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind)
* e.g., after reboot -r.
*/
if (spa->spa_state == POOL_STATE_ACTIVE) {
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
fnvlist_free(config);
return (0);
}
@@ -226,7 +226,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind)
&spa->spa_ubsync.ub_version) != 0)
spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
} else if ((spa = spa_lookup(name)) == NULL) {
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
fnvlist_free(config);
cmn_err(CE_NOTE, "Cannot find the pool label for '%s'",
name);
@@ -249,7 +249,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind)
VDEV_ALLOC_ROOTPOOL);
spa_config_exit(spa, SCL_ALL, FTAG);
if (error) {
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
fnvlist_free(config);
cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
name);
@@ -259,7 +259,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind)
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
vdev_free(rvd);
spa_config_exit(spa, SCL_ALL, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
fnvlist_free(config);
return (0);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c
index 11e93b800a54..9663f05cb354 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c
@@ -42,7 +42,8 @@ vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size)
spa_t *spa = vd->vdev_spa;
zio_t *zio;
abd_t *pad2;
- int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_TRYHARD;
int error;
if (size > VDEV_PAD_SIZE)
@@ -59,16 +60,11 @@ vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size)
abd_copy_from_buf(pad2, buf, size);
abd_zero_off(pad2, size, VDEV_PAD_SIZE - size);
-retry:
zio = zio_root(spa, NULL, NULL, flags);
vdev_label_write(zio, vd, 0, pad2,
offsetof(vdev_label_t, vl_be),
VDEV_PAD_SIZE, NULL, NULL, flags);
error = zio_wait(zio);
- if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
- flags |= ZIO_FLAG_TRYHARD;
- goto retry;
- }
abd_free(pad2);
return (error);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
index cb5787269db2..c98ccd756405 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
@@ -1262,7 +1262,8 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
if (aclnode->z_ace_count == 0)
continue;
dmu_write(zfsvfs->z_os, aoid, off,
- aclnode->z_size, aclnode->z_acldata, tx);
+ aclnode->z_size, aclnode->z_acldata, tx,
+ DMU_READ_NO_PREFETCH);
off += aclnode->z_size;
}
} else {
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c
index dcdefae56639..29711fcf5d2c 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c
@@ -108,11 +108,11 @@ zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
"command", &command) != 0)
return (EINVAL);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa = spa_by_guid(pool_guid, vdev_guid);
if (spa != NULL)
strcpy(name, spa_name(spa));
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
if (spa == NULL)
return (ENOENT);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index f34a2fd37a77..8a9d23d0d554 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -278,7 +278,7 @@ zfs_ioctl_getxattr(vnode_t *vp, zfsxattr_t *fsx)
memset(fsx, 0, sizeof (*fsx));
fsx->fsx_xflags = (zp->z_pflags & ZFS_PROJINHERIT) ?
- ZFS_PROJINHERIT_FL : 0;
+ FS_PROJINHERIT_FL : 0;
fsx->fsx_projid = zp->z_projid;
return (0);
@@ -290,7 +290,7 @@ zfs_ioctl_setflags(vnode_t *vp, uint32_t ioctl_flags, xvattr_t *xva)
uint64_t zfs_flags = VTOZ(vp)->z_pflags;
xoptattr_t *xoap;
- if (ioctl_flags & ~(ZFS_PROJINHERIT_FL))
+ if (ioctl_flags & ~(FS_PROJINHERIT_FL))
return (SET_ERROR(EOPNOTSUPP));
xva_init(xva);
@@ -304,7 +304,7 @@ zfs_ioctl_setflags(vnode_t *vp, uint32_t ioctl_flags, xvattr_t *xva)
} \
} while (0)
- FLAG_CHANGE(ZFS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT,
+ FLAG_CHANGE(FS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT,
xoap->xoa_projinherit);
#undef FLAG_CHANGE
@@ -4479,7 +4479,8 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
for (i = 0; wlen > 0; woff += tocopy, wlen -= tocopy, i++) {
tocopy = MIN(PAGE_SIZE, wlen);
va = zfs_map_page(ma[i], &sf);
- dmu_write(zfsvfs->z_os, zp->z_id, woff, tocopy, va, tx);
+ dmu_write(zfsvfs->z_os, zp->z_id, woff, tocopy, va, tx,
+ DMU_READ_PREFETCH);
zfs_unmap_page(sf);
}
} else {
@@ -5757,7 +5758,7 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
{
ulong_t val;
int error;
-#ifdef _PC_CLONE_BLKSIZE
+#if defined(_PC_CLONE_BLKSIZE) || defined(_PC_CASE_INSENSITIVE)
zfsvfs_t *zfsvfs;
#endif
@@ -5821,6 +5822,15 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
*ap->a_retval = 0;
return (0);
#endif
+#ifdef _PC_CASE_INSENSITIVE
+ case _PC_CASE_INSENSITIVE:
+ zfsvfs = (zfsvfs_t *)ap->a_vp->v_mount->mnt_data;
+ if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
+ *ap->a_retval = 1;
+ else
+ *ap->a_retval = 0;
+ return (0);
+#endif
default:
return (vop_stdpathconf(ap));
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
index 3ddbfcb97184..dc30f6dd939c 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
@@ -283,8 +283,8 @@ retry:
* Take spa_namespace_lock to prevent lock inversion when
* zvols from one pool are opened as vdevs in another.
*/
- if (!mutex_owned(&spa_namespace_lock)) {
- if (!mutex_tryenter(&spa_namespace_lock)) {
+ if (!spa_namespace_held()) {
+ if (!spa_namespace_tryenter(FTAG)) {
mutex_exit(&zv->zv_state_lock);
rw_exit(&zv->zv_suspend_lock);
drop_suspend = B_FALSE;
@@ -296,7 +296,7 @@ retry:
}
err = zvol_first_open(zv, !(flag & FWRITE));
if (drop_namespace)
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
if (err)
goto out_locked;
pp->mediasize = zv->zv_volsize;
@@ -963,8 +963,8 @@ retry:
* Take spa_namespace_lock to prevent lock inversion when
* zvols from one pool are opened as vdevs in another.
*/
- if (!mutex_owned(&spa_namespace_lock)) {
- if (!mutex_tryenter(&spa_namespace_lock)) {
+ if (!spa_namespace_held()) {
+ if (!spa_namespace_tryenter(FTAG)) {
mutex_exit(&zv->zv_state_lock);
rw_exit(&zv->zv_suspend_lock);
drop_suspend = B_FALSE;
@@ -976,7 +976,7 @@ retry:
}
err = zvol_first_open(zv, !(flags & FWRITE));
if (drop_namespace)
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
if (err)
goto out_locked;
}
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
index 092f090d934b..00ff789265c6 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
@@ -32,7 +32,6 @@
#include <sys/taskq.h>
#include <sys/kmem.h>
#include <sys/tsd.h>
-#include <sys/trace_spl.h>
#include <sys/time.h>
#include <sys/atomic.h>
#include <sys/kstat.h>
@@ -325,7 +324,6 @@ task_expire_impl(taskq_ent_t *t)
}
t->tqent_birth = jiffies;
- DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
/*
* The priority list must be maintained in strict task id order
@@ -713,9 +711,7 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
t->tqent_taskq = tq;
t->tqent_timer.function = NULL;
t->tqent_timer.expires = 0;
-
t->tqent_birth = jiffies;
- DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
@@ -840,9 +836,7 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
t->tqent_func = func;
t->tqent_arg = arg;
t->tqent_taskq = tq;
-
t->tqent_birth = jiffies;
- DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
spin_unlock(&t->tqent_lock);
@@ -1054,11 +1048,6 @@ taskq_thread(void *args)
* A TQENT_FLAG_PREALLOC task may be reused or freed
* during the task function call. Store tqent_id and
* tqent_flags here.
- *
- * Also use an on stack taskq_ent_t for tqt_task
- * assignment in this case; we want to make sure
- * to duplicate all fields, so the values are
- * correct when it's accessed via DTRACE_PROBE*.
*/
tqt->tqt_id = t->tqent_id;
tqt->tqt_flags = t->tqent_flags;
@@ -1074,13 +1063,10 @@ taskq_thread(void *args)
spin_unlock_irqrestore(&tq->tq_lock, flags);
TQSTAT_INC(tq, threads_active);
- DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t);
/* Perform the requested task */
t->tqent_func(t->tqent_arg);
- DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t);
-
TQSTAT_DEC(tq, threads_active);
if ((t->tqent_flags & TQENT_LIST_MASK) ==
TQENT_LIST_PENDING)
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
index 934d74a112fd..4c929a4642b1 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
@@ -1447,7 +1447,8 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
if (aclnode->z_ace_count == 0)
continue;
dmu_write(zfsvfs->z_os, aoid, off,
- aclnode->z_size, aclnode->z_acldata, tx);
+ aclnode->z_size, aclnode->z_acldata, tx,
+ DMU_READ_NO_PREFETCH);
off += aclnode->z_size;
}
} else {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index e845ad69ad78..02465adf36d5 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -3892,7 +3892,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
va = kmap(pp);
ASSERT3U(pglen, <=, PAGE_SIZE);
- dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx);
+ dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx,
+ DMU_READ_PREFETCH);
kunmap(pp);
SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
index 02965ac8cbee..f7691c02d163 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
@@ -811,28 +811,44 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice)
return (error);
}
-#define ZFS_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL)
-#define ZFS_FL_USER_MODIFIABLE (FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL)
+#define ZFS_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL)
+#define ZFS_FL_USER_MODIFIABLE (FS_FL_USER_MODIFIABLE | FS_PROJINHERIT_FL)
+
+
+static struct {
+ uint64_t zfs_flag;
+ uint32_t fs_flag;
+ uint32_t xflag;
+} flags_lookup[] = {
+ {ZFS_IMMUTABLE, FS_IMMUTABLE_FL, FS_XFLAG_IMMUTABLE},
+ {ZFS_APPENDONLY, FS_APPEND_FL, FS_XFLAG_APPEND},
+ {ZFS_NODUMP, FS_NODUMP_FL, FS_XFLAG_NODUMP},
+ {ZFS_PROJINHERIT, FS_PROJINHERIT_FL, FS_XFLAG_PROJINHERIT}
+};
static uint32_t
__zpl_ioctl_getflags(struct inode *ip)
{
uint64_t zfs_flags = ITOZ(ip)->z_pflags;
uint32_t ioctl_flags = 0;
+ for (int i = 0; i < ARRAY_SIZE(flags_lookup); i++)
+ if (zfs_flags & flags_lookup[i].zfs_flag)
+ ioctl_flags |= flags_lookup[i].fs_flag;
- if (zfs_flags & ZFS_IMMUTABLE)
- ioctl_flags |= FS_IMMUTABLE_FL;
-
- if (zfs_flags & ZFS_APPENDONLY)
- ioctl_flags |= FS_APPEND_FL;
+ return (ioctl_flags);
+}
- if (zfs_flags & ZFS_NODUMP)
- ioctl_flags |= FS_NODUMP_FL;
+static uint32_t
+__zpl_ioctl_getxflags(struct inode *ip)
+{
+ uint64_t zfs_flags = ITOZ(ip)->z_pflags;
+ uint32_t ioctl_flags = 0;
- if (zfs_flags & ZFS_PROJINHERIT)
- ioctl_flags |= ZFS_PROJINHERIT_FL;
+ for (int i = 0; i < ARRAY_SIZE(flags_lookup); i++)
+ if (zfs_flags & flags_lookup[i].zfs_flag)
+ ioctl_flags |= flags_lookup[i].xflag;
- return (ioctl_flags & ZFS_FL_USER_VISIBLE);
+ return (ioctl_flags);
}
/*
@@ -846,6 +862,7 @@ zpl_ioctl_getflags(struct file *filp, void __user *arg)
int err;
flags = __zpl_ioctl_getflags(file_inode(filp));
+ flags = flags & ZFS_FL_USER_VISIBLE;
err = copy_to_user(arg, &flags, sizeof (flags));
return (err);
@@ -869,7 +886,7 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
xoptattr_t *xoap;
if (ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL |
- ZFS_PROJINHERIT_FL))
+ FS_PROJINHERIT_FL))
return (-EOPNOTSUPP);
if (ioctl_flags & ~ZFS_FL_USER_MODIFIABLE)
@@ -900,7 +917,51 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
xoap->xoa_appendonly);
FLAG_CHANGE(FS_NODUMP_FL, ZFS_NODUMP, XAT_NODUMP,
xoap->xoa_nodump);
- FLAG_CHANGE(ZFS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT,
+ FLAG_CHANGE(FS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT,
+ xoap->xoa_projinherit);
+
+#undef FLAG_CHANGE
+
+ return (0);
+}
+
+static int
+__zpl_ioctl_setxflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
+{
+ uint64_t zfs_flags = ITOZ(ip)->z_pflags;
+ xoptattr_t *xoap;
+
+ if (ioctl_flags & ~(FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND |
+ FS_XFLAG_NODUMP | FS_XFLAG_PROJINHERIT))
+ return (-EOPNOTSUPP);
+
+ if ((fchange(ioctl_flags, zfs_flags, FS_XFLAG_IMMUTABLE,
+ ZFS_IMMUTABLE) ||
+ fchange(ioctl_flags, zfs_flags, FS_XFLAG_APPEND, ZFS_APPENDONLY)) &&
+ !capable(CAP_LINUX_IMMUTABLE))
+ return (-EPERM);
+
+ if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+ return (-EACCES);
+
+ xva_init(xva);
+ xoap = xva_getxoptattr(xva);
+
+#define FLAG_CHANGE(iflag, zflag, xflag, xfield) do { \
+ if (((ioctl_flags & (iflag)) && !(zfs_flags & (zflag))) || \
+ ((zfs_flags & (zflag)) && !(ioctl_flags & (iflag)))) { \
+ XVA_SET_REQ(xva, (xflag)); \
+ (xfield) = ((ioctl_flags & (iflag)) != 0); \
+ } \
+} while (0)
+
+ FLAG_CHANGE(FS_XFLAG_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
+ xoap->xoa_immutable);
+ FLAG_CHANGE(FS_XFLAG_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
+ xoap->xoa_appendonly);
+ FLAG_CHANGE(FS_XFLAG_NODUMP, ZFS_NODUMP, XAT_NODUMP,
+ xoap->xoa_nodump);
+ FLAG_CHANGE(FS_XFLAG_PROJINHERIT, ZFS_PROJINHERIT, XAT_PROJINHERIT,
xoap->xoa_projinherit);
#undef FLAG_CHANGE
@@ -941,7 +1002,7 @@ zpl_ioctl_getxattr(struct file *filp, void __user *arg)
struct inode *ip = file_inode(filp);
int err;
- fsx.fsx_xflags = __zpl_ioctl_getflags(ip);
+ fsx.fsx_xflags = __zpl_ioctl_getxflags(ip);
fsx.fsx_projid = ITOZ(ip)->z_projid;
err = copy_to_user(arg, &fsx, sizeof (fsx));
@@ -965,7 +1026,7 @@ zpl_ioctl_setxattr(struct file *filp, void __user *arg)
if (!zpl_is_valid_projid(fsx.fsx_projid))
return (-EINVAL);
- err = __zpl_ioctl_setflags(ip, fsx.fsx_xflags, &xva);
+ err = __zpl_ioctl_setxflags(ip, fsx.fsx_xflags, &xva);
if (err)
return (err);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index fe939150b641..89f9bc555fcf 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -809,8 +809,8 @@ retry:
* the kernel so the only option is to return the error for
* the caller to handle it.
*/
- if (!mutex_owned(&spa_namespace_lock)) {
- if (!mutex_tryenter(&spa_namespace_lock)) {
+ if (!spa_namespace_held()) {
+ if (!spa_namespace_tryenter(FTAG)) {
mutex_exit(&zv->zv_state_lock);
rw_exit(&zv->zv_suspend_lock);
drop_suspend = B_FALSE;
@@ -834,7 +834,7 @@ retry:
error = -zvol_first_open(zv, !(blk_mode_is_open_write(flag)));
if (drop_namespace)
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
if (error == 0) {
diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
index 07819ba2be8b..4826237b23e8 100644
--- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
@@ -481,6 +481,9 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
sfeatures);
+ zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
+ B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
+ "SLOW_IO_EVENTS", boolean_table, sfeatures);
/* hidden properties */
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index dbb5e942e2e6..48bf99f1aeb7 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -8548,7 +8548,7 @@ l2arc_dev_get_next(void)
* of cache devices (l2arc_dev_mtx). Once a device has been selected,
* both locks will be dropped and a spa config lock held instead.
*/
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
mutex_enter(&l2arc_dev_mtx);
/* if there are no vdevs, there is nothing to do */
@@ -8591,7 +8591,7 @@ out:
*/
if (next != NULL)
spa_config_enter(next->l2ad_spa, SCL_L2ARC, next, RW_READER);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (next);
}
@@ -10231,7 +10231,7 @@ l2arc_stop(void)
void
l2arc_spa_rebuild_start(spa_t *spa)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
/*
* Locate the spa's l2arc devices and kick off rebuild threads.
@@ -10256,7 +10256,7 @@ l2arc_spa_rebuild_start(spa_t *spa)
void
l2arc_spa_rebuild_stop(spa_t *spa)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_export_thread == curthread);
for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
diff --git a/sys/contrib/openzfs/module/zfs/bpobj.c b/sys/contrib/openzfs/module/zfs/bpobj.c
index ea9fbd036c6e..afcb2374f824 100644
--- a/sys/contrib/openzfs/module/zfs/bpobj.c
+++ b/sys/contrib/openzfs/module/zfs/bpobj.c
@@ -752,7 +752,8 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
}
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
- numsubsub * sizeof (subobj), subdb->db_data, tx);
+ numsubsub * sizeof (subobj), subdb->db_data, tx,
+ DMU_READ_NO_PREFETCH);
dmu_buf_rele(subdb, FTAG);
bpo->bpo_phys->bpo_num_subobjs += numsubsub;
@@ -777,7 +778,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
dmu_write(bpo->bpo_os, bpo->bpo_object,
bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t),
numbps * sizeof (blkptr_t),
- bps->db_data, tx);
+ bps->db_data, tx, DMU_READ_NO_PREFETCH);
dmu_buf_rele(bps, FTAG);
bpo->bpo_phys->bpo_num_blkptrs += numbps;
@@ -794,7 +795,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
- sizeof (subobj), &subobj, tx);
+ sizeof (subobj), &subobj, tx, DMU_READ_NO_PREFETCH);
bpo->bpo_phys->bpo_num_subobjs++;
}
diff --git a/sys/contrib/openzfs/module/zfs/bptree.c b/sys/contrib/openzfs/module/zfs/bptree.c
index a98bba3eb259..1274278e8e91 100644
--- a/sys/contrib/openzfs/module/zfs/bptree.c
+++ b/sys/contrib/openzfs/module/zfs/bptree.c
@@ -137,7 +137,8 @@ bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
bte = kmem_zalloc(sizeof (*bte), KM_SLEEP);
bte->be_birth_txg = birth_txg;
bte->be_bp = *bp;
- dmu_write(os, obj, bt->bt_end * sizeof (*bte), sizeof (*bte), bte, tx);
+ dmu_write(os, obj, bt->bt_end * sizeof (*bte), sizeof (*bte), bte, tx,
+ DMU_READ_NO_PREFETCH);
kmem_free(bte, sizeof (*bte));
dmu_buf_will_dirty(db, tx);
@@ -247,7 +248,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
ZB_DESTROYED_OBJSET);
ASSERT0(bte.be_zb.zb_level);
dmu_write(os, obj, i * sizeof (bte),
- sizeof (bte), &bte, tx);
+ sizeof (bte), &bte, tx,
+ DMU_READ_NO_PREFETCH);
if (err == EIO || err == ECKSUM ||
err == ENXIO) {
/*
@@ -269,7 +271,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
*/
bte.be_birth_txg = UINT64_MAX;
dmu_write(os, obj, i * sizeof (bte),
- sizeof (bte), &bte, tx);
+ sizeof (bte), &bte, tx,
+ DMU_READ_NO_PREFETCH);
}
if (!ioerr) {
diff --git a/sys/contrib/openzfs/module/zfs/brt.c b/sys/contrib/openzfs/module/zfs/brt.c
index 40664354aa73..08a6bd52ab31 100644
--- a/sys/contrib/openzfs/module/zfs/brt.c
+++ b/sys/contrib/openzfs/module/zfs/brt.c
@@ -260,8 +260,8 @@ static int brt_zap_prefetch = 1;
#define BRT_DEBUG(...) do { } while (0)
#endif
-static int brt_zap_default_bs = 12;
-static int brt_zap_default_ibs = 12;
+static int brt_zap_default_bs = 13;
+static int brt_zap_default_ibs = 13;
static kstat_t *brt_ksp;
@@ -454,6 +454,7 @@ brt_vdev_create(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx)
VERIFY(mos_entries != 0);
VERIFY0(dnode_hold(spa->spa_meta_objset, mos_entries, brtvd,
&brtvd->bv_mos_entries_dnode));
+ dnode_set_storage_type(brtvd->bv_mos_entries_dnode, DMU_OT_DDT_ZAP);
rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER);
brtvd->bv_mos_entries = mos_entries;
rw_exit(&brtvd->bv_mos_entries_lock);
@@ -508,8 +509,8 @@ brt_vdev_realloc(spa_t *spa, brt_vdev_t *brtvd)
size = (vdev_get_min_asize(vd) - 1) / spa->spa_brt_rangesize + 1;
spa_config_exit(spa, SCL_VDEV, FTAG);
- entcount = vmem_zalloc(sizeof (entcount[0]) * size, KM_SLEEP);
nblocks = BRT_RANGESIZE_TO_NBLOCKS(size);
+ entcount = vmem_zalloc(nblocks * BRT_BLOCKSIZE, KM_SLEEP);
bitmap = kmem_zalloc(BT_SIZEOFMAP(nblocks), KM_SLEEP);
if (!brtvd->bv_initiated) {
@@ -530,9 +531,8 @@ brt_vdev_realloc(spa_t *spa, brt_vdev_t *brtvd)
memcpy(entcount, brtvd->bv_entcount,
sizeof (entcount[0]) * MIN(size, brtvd->bv_size));
- vmem_free(brtvd->bv_entcount,
- sizeof (entcount[0]) * brtvd->bv_size);
onblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size);
+ vmem_free(brtvd->bv_entcount, onblocks * BRT_BLOCKSIZE);
memcpy(bitmap, brtvd->bv_bitmap, MIN(BT_SIZEOFMAP(nblocks),
BT_SIZEOFMAP(onblocks)));
kmem_free(brtvd->bv_bitmap, BT_SIZEOFMAP(onblocks));
@@ -581,13 +581,14 @@ brt_vdev_load(spa_t *spa, brt_vdev_t *brtvd)
*/
error = dmu_read(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0,
MIN(brtvd->bv_size, bvphys->bvp_size) * sizeof (uint16_t),
- brtvd->bv_entcount, DMU_READ_NO_PREFETCH);
+ brtvd->bv_entcount, DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO);
if (error != 0)
return (error);
ASSERT(bvphys->bvp_mos_entries != 0);
VERIFY0(dnode_hold(spa->spa_meta_objset, bvphys->bvp_mos_entries, brtvd,
&brtvd->bv_mos_entries_dnode));
+ dnode_set_storage_type(brtvd->bv_mos_entries_dnode, DMU_OT_DDT_ZAP);
rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER);
brtvd->bv_mos_entries = bvphys->bvp_mos_entries;
rw_exit(&brtvd->bv_mos_entries_lock);
@@ -613,9 +614,9 @@ brt_vdev_dealloc(brt_vdev_t *brtvd)
ASSERT(brtvd->bv_initiated);
ASSERT0(avl_numnodes(&brtvd->bv_tree));
- vmem_free(brtvd->bv_entcount, sizeof (uint16_t) * brtvd->bv_size);
- brtvd->bv_entcount = NULL;
uint64_t nblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size);
+ vmem_free(brtvd->bv_entcount, nblocks * BRT_BLOCKSIZE);
+ brtvd->bv_entcount = NULL;
kmem_free(brtvd->bv_bitmap, BT_SIZEOFMAP(nblocks));
brtvd->bv_bitmap = NULL;
@@ -807,10 +808,10 @@ brt_vdev_sync(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx)
/*
* TODO: Walk brtvd->bv_bitmap and write only the dirty blocks.
*/
- dmu_write(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0,
- brtvd->bv_size * sizeof (brtvd->bv_entcount[0]),
- brtvd->bv_entcount, tx);
uint64_t nblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size);
+ dmu_write(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0,
+ nblocks * BRT_BLOCKSIZE, brtvd->bv_entcount, tx,
+ DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO);
memset(brtvd->bv_bitmap, 0, BT_SIZEOFMAP(nblocks));
brtvd->bv_entcount_dirty = FALSE;
}
@@ -1510,6 +1511,31 @@ brt_load(spa_t *spa)
}
void
+brt_prefetch_all(spa_t *spa)
+{
+ /*
+ * Load all BRT entries for each vdev. This is intended to perform
+ * a prefetch on all such blocks. For the same reason that brt_prefetch
+ * (called from brt_pending_add) isn't locked, this is also not locked.
+ */
+ brt_rlock(spa);
+ for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
+ brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
+ brt_unlock(spa);
+
+ rw_enter(&brtvd->bv_mos_entries_lock, RW_READER);
+ if (brtvd->bv_mos_entries != 0) {
+ (void) zap_prefetch_object(spa->spa_meta_objset,
+ brtvd->bv_mos_entries);
+ }
+ rw_exit(&brtvd->bv_mos_entries_lock);
+
+ brt_rlock(spa);
+ }
+ brt_unlock(spa);
+}
+
+void
brt_unload(spa_t *spa)
{
if (spa->spa_brt_rangesize == 0)
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index fccc4c5b5b94..72c597609ade 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -446,7 +446,10 @@ static boolean_t
dbuf_include_in_metadata_cache(dmu_buf_impl_t *db)
{
DB_DNODE_ENTER(db);
- dmu_object_type_t type = DB_DNODE(db)->dn_type;
+ dnode_t *dn = DB_DNODE(db);
+ dmu_object_type_t type = dn->dn_storage_type;
+ if (type == DMU_OT_NONE)
+ type = dn->dn_type;
DB_DNODE_EXIT(db);
/* Check if this dbuf is one of the types we care about */
diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c
index c7a2426f3a77..3d42c51365a8 100644
--- a/sys/contrib/openzfs/module/zfs/ddt_log.c
+++ b/sys/contrib/openzfs/module/zfs/ddt_log.c
@@ -222,7 +222,7 @@ ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx, ddt_log_update_t *dlu)
VERIFY0(dmu_buf_hold_array_by_dnode(dlu->dlu_dn, offset, length,
B_FALSE, FTAG, &dlu->dlu_ndbp, &dlu->dlu_dbp,
- DMU_READ_NO_PREFETCH));
+ DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO));
dlu->dlu_tx = tx;
dlu->dlu_block = dlu->dlu_offset = 0;
@@ -298,7 +298,8 @@ ddt_log_entry(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe, ddt_log_update_t *dlu)
* we will fill it, and zero it out.
*/
if (dlu->dlu_offset == 0) {
- dmu_buf_will_fill(db, dlu->dlu_tx, B_FALSE);
+ dmu_buf_will_fill_flags(db, dlu->dlu_tx, B_FALSE,
+ DMU_UNCACHEDIO);
memset(db->db_data, 0, db->db_size);
}
@@ -597,7 +598,7 @@ ddt_log_load_one(ddt_t *ddt, uint_t n)
for (uint64_t offset = 0; offset < hdr.dlh_length;
offset += dn->dn_datablksz) {
err = dmu_buf_hold_by_dnode(dn, offset, FTAG, &db,
- DMU_READ_PREFETCH);
+ DMU_READ_PREFETCH | DMU_UNCACHEDIO);
if (err != 0) {
dnode_rele(dn, FTAG);
ddt_log_empty(ddt, ddl);
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index a7a5c89bdafb..5690f8afad00 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -635,7 +635,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
int
dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
uint64_t length, int read, const void *tag, int *numbufsp,
- dmu_buf_t ***dbpp)
+ dmu_buf_t ***dbpp, dmu_flags_t flags)
{
dnode_t *dn;
int err;
@@ -645,7 +645,7 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
return (err);
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
- numbufsp, dbpp, DMU_READ_PREFETCH);
+ numbufsp, dbpp, flags);
dnode_rele(dn, FTAG);
@@ -655,14 +655,14 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
int
dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset,
uint64_t length, boolean_t read, const void *tag, int *numbufsp,
- dmu_buf_t ***dbpp)
+ dmu_buf_t ***dbpp, dmu_flags_t flags)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
int err;
DB_DNODE_ENTER(db);
err = dmu_buf_hold_array_by_dnode(DB_DNODE(db), offset, length, read,
- tag, numbufsp, dbpp, DMU_READ_PREFETCH);
+ tag, numbufsp, dbpp, flags);
DB_DNODE_EXIT(db);
return (err);
@@ -850,12 +850,15 @@ dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset, uint64_t size)
return (err);
/*
- * Chunk the requests (16 indirects worth) so that we can be interrupted
+ * Chunk the requests (16 indirects worth) so that we can be
+ * interrupted. Prefetch at least SPA_MAXBLOCKSIZE at a time
+ * to better utilize pools with smaller block sizes.
*/
uint64_t chunksize;
if (dn->dn_indblkshift) {
uint64_t nbps = bp_span_in_blocks(dn->dn_indblkshift, 1);
chunksize = (nbps * 16) << dn->dn_datablkshift;
+ chunksize = MAX(chunksize, SPA_MAXBLOCKSIZE);
} else {
chunksize = dn->dn_datablksz;
}
@@ -1293,7 +1296,7 @@ dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size,
void
dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- const void *buf, dmu_tx_t *tx)
+ const void *buf, dmu_tx_t *tx, dmu_flags_t flags)
{
dmu_buf_t **dbp;
int numbufs;
@@ -1302,8 +1305,8 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
return;
VERIFY0(dmu_buf_hold_array(os, object, offset, size,
- FALSE, FTAG, &numbufs, &dbp));
- dmu_write_impl(dbp, numbufs, offset, size, buf, tx, DMU_READ_PREFETCH);
+ FALSE, FTAG, &numbufs, &dbp, flags));
+ dmu_write_impl(dbp, numbufs, offset, size, buf, tx, flags);
dmu_buf_rele_array(dbp, numbufs, FTAG);
}
@@ -1346,7 +1349,7 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
return;
VERIFY0(dmu_buf_hold_array(os, object, offset, size,
- FALSE, FTAG, &numbufs, &dbp));
+ FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH));
for (i = 0; i < numbufs; i++) {
dmu_buf_t *db = dbp[i];
@@ -1383,7 +1386,7 @@ dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_buf_t **dbp;
VERIFY0(dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG,
- &numbufs, &dbp));
+ &numbufs, &dbp, DMU_READ_PREFETCH));
for (i = 0; i < numbufs; i++)
dmu_buf_redact(dbp[i], tx);
dmu_buf_rele_array(dbp, numbufs, FTAG);
@@ -2592,7 +2595,7 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
int error, numbufs;
error = dmu_buf_hold_array(os, object, offset, length, FALSE, FTAG,
- &numbufs, &dbp);
+ &numbufs, &dbp, DMU_READ_PREFETCH);
if (error != 0) {
if (error == ESRCH) {
error = SET_ERROR(ENXIO);
@@ -2693,7 +2696,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
spa = os->os_spa;
VERIFY0(dmu_buf_hold_array(os, object, offset, length, FALSE, FTAG,
- &numbufs, &dbp));
+ &numbufs, &dbp, DMU_READ_PREFETCH));
ASSERT3U(nbps, ==, numbufs);
/*
diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c
index 5a22ed71a5fe..c087be4c811d 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_redact.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c
@@ -544,7 +544,8 @@ redaction_list_update_sync(void *arg, dmu_tx_t *tx)
if (index == bufsize) {
dmu_write(mos, rl->rl_object,
rl->rl_phys->rlp_num_entries * sizeof (*buf),
- bufsize * sizeof (*buf), buf, tx);
+ bufsize * sizeof (*buf), buf, tx,
+ DMU_READ_NO_PREFETCH);
rl->rl_phys->rlp_num_entries += bufsize;
index = 0;
}
@@ -552,7 +553,8 @@ redaction_list_update_sync(void *arg, dmu_tx_t *tx)
}
if (index > 0) {
dmu_write(mos, rl->rl_object, rl->rl_phys->rlp_num_entries *
- sizeof (*buf), index * sizeof (*buf), buf, tx);
+ sizeof (*buf), index * sizeof (*buf), buf, tx,
+ DMU_READ_NO_PREFETCH);
rl->rl_phys->rlp_num_entries += index;
}
kmem_free(buf, bufsize * sizeof (*buf));
diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c
index e88d394b5229..e0cc4a7e13e0 100644
--- a/sys/contrib/openzfs/module/zfs/dnode.c
+++ b/sys/contrib/openzfs/module/zfs/dnode.c
@@ -2496,26 +2496,27 @@ dnode_diduse_space(dnode_t *dn, int64_t delta)
}
/*
- * Scans a block at the indicated "level" looking for a hole or data,
- * depending on 'flags'.
+ * Scans the block at the indicated "level" looking for a hole or data,
+ * depending on 'flags' starting from array position given by *index.
*
- * If level > 0, then we are scanning an indirect block looking at its
- * pointers. If level == 0, then we are looking at a block of dnodes.
+ * If lvl > 0, then we are scanning an indirect block looking at its
+ * pointers. If lvl == 0, then we are looking at a block of dnodes.
*
* If we don't find what we are looking for in the block, we return ESRCH.
- * Otherwise, return with *offset pointing to the beginning (if searching
- * forwards) or end (if searching backwards) of the range covered by the
- * block pointer we matched on (or dnode).
+ * Otherwise, return with *index set to the matching array position.
*
- * The basic search algorithm used below by dnode_next_offset() is to
- * use this function to search up the block tree (widen the search) until
- * we find something (i.e., we don't return ESRCH) and then search back
- * down the tree (narrow the search) until we reach our original search
- * level.
+ * In both cases, *offset is updated to point at the matched BP/dnode or
+ * the next offset to search (unless at the limit of possible offsets).
+ *
+ * The basic search algorithm used below by dnode_next_offset() uses this
+ * function to perform a block-order tree traversal. We search up the block
+ * tree (widen the search) until we find something (i.e., we don't return
+ * ESRCH) and then search back down the tree (narrow the search) until we
+ * reach our original search level or backtrack up because nothing matches.
*/
static int
-dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
- int lvl, uint64_t blkfill, uint64_t txg)
+dnode_next_offset_level(dnode_t *dn, int flags, int lvl, uint64_t blkid,
+ int *index, uint64_t blkfill, uint64_t txg, uint64_t *offset)
{
dmu_buf_impl_t *db = NULL;
void *data = NULL;
@@ -2541,20 +2542,12 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
rrw_enter(&dmu_objset_ds(dn->dn_objset)->ds_bp_rwlock,
RW_READER, FTAG);
} else {
- uint64_t blkid = dbuf_whichblock(dn, lvl, *offset);
error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FALSE, FTAG, &db);
if (error) {
if (error != ENOENT)
return (error);
if (hole)
return (0);
- /*
- * This can only happen when we are searching up
- * the block tree for data. We don't really need to
- * adjust the offset, as we will just end up looking
- * at the pointer to this block in its parent, and its
- * going to be unallocated, so we will skip over it.
- */
return (SET_ERROR(ESRCH));
}
error = dbuf_read(db, NULL,
@@ -2582,8 +2575,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
ASSERT(dn->dn_type == DMU_OT_DNODE);
ASSERT(!(flags & DNODE_FIND_BACKWARDS));
- for (i = (*offset >> DNODE_SHIFT) & (blkfill - 1);
- i < blkfill; i += dnp[i].dn_extra_slots + 1) {
+ for (i = *index; i < blkfill; i += dnp[i].dn_extra_slots + 1) {
if ((dnp[i].dn_type == DMU_OT_NONE) == hole)
break;
}
@@ -2591,11 +2583,11 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
if (i == blkfill)
error = SET_ERROR(ESRCH);
+ *index = i;
*offset = (*offset & ~(DNODE_BLOCK_SIZE - 1)) +
(i << DNODE_SHIFT);
} else {
blkptr_t *bp = data;
- uint64_t start = *offset;
span = (lvl - 1) * epbs + dn->dn_datablkshift;
minfill = 0;
maxfill = blkfill << ((lvl - 1) * epbs);
@@ -2605,40 +2597,27 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
else
minfill++;
- if (span >= 8 * sizeof (*offset)) {
- /* This only happens on the highest indirection level */
- ASSERT3U((lvl - 1), ==, dn->dn_phys->dn_nlevels - 1);
- *offset = 0;
- } else {
- *offset = *offset >> span;
- }
-
- for (i = BF64_GET(*offset, 0, epbs);
- i >= 0 && i < epb; i += inc) {
+ for (i = *index; i >= 0 && i < epb; i += inc) {
if (BP_GET_FILL(&bp[i]) >= minfill &&
BP_GET_FILL(&bp[i]) <= maxfill &&
(hole || BP_GET_LOGICAL_BIRTH(&bp[i]) > txg))
break;
- if (inc > 0 || *offset > 0)
- *offset += inc;
}
- if (span >= 8 * sizeof (*offset)) {
- *offset = start;
- } else {
- *offset = *offset << span;
- }
-
- if (inc < 0) {
- /* traversing backwards; position offset at the end */
- if (span < 8 * sizeof (*offset))
- *offset = MIN(*offset + (1ULL << span) - 1,
- start);
- } else if (*offset < start) {
- *offset = start;
- }
if (i < 0 || i >= epb)
error = SET_ERROR(ESRCH);
+
+ *index = i;
+ if (span < 8 * sizeof (*offset)) {
+ uint64_t nblk = blkid << epbs;
+ if (i >= 0 || blkid != 0)
+ nblk += i;
+ if ((nblk >> (8 * sizeof (*offset) - span)) == 0)
+ *offset = (flags & DNODE_FIND_BACKWARDS) ?
+ /* backwards: position offset at the end */
+ MIN(*offset, ((nblk + 1) << span) - 1) :
+ MAX(*offset, nblk << span);
+ }
}
if (db != NULL) {
@@ -2656,38 +2635,24 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
}
/*
- * Adjust *offset to the next (or previous) block byte offset at lvl.
- * Returns FALSE if *offset would overflow or underflow.
- */
-static boolean_t
-dnode_next_block(dnode_t *dn, int flags, uint64_t *offset, int lvl)
-{
- int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- int span = lvl * epbs + dn->dn_datablkshift;
- uint64_t blkid, maxblkid;
-
- if (span >= 8 * sizeof (uint64_t))
- return (B_FALSE);
-
- blkid = *offset >> span;
- maxblkid = 1ULL << (8 * sizeof (*offset) - span);
- if (!(flags & DNODE_FIND_BACKWARDS) && blkid + 1 < maxblkid)
- *offset = (blkid + 1) << span;
- else if ((flags & DNODE_FIND_BACKWARDS) && blkid > 0)
- *offset = (blkid << span) - 1;
- else
- return (B_FALSE);
-
- return (B_TRUE);
-}
-
-/*
* Find the next hole, data, or sparse region at or after *offset.
* The value 'blkfill' tells us how many items we expect to find
* in an L0 data block; this value is 1 for normal objects,
* DNODES_PER_BLOCK for the meta dnode, and some fraction of
* DNODES_PER_BLOCK when searching for sparse regions thereof.
*
+ * If minlvl == 0, this searches for dnodes or unallocated dnodes.
+ * If found, *offset points to the first offset of the matched dnode.
+ * Backwards search is not allowed for dnodes.
+ *
+ * If minlvl > 0, this searches for blocks at the given level.
+ * If found, *offset points to the first L0 offset of the block
+ * (or for backwards search, the last offset, inclusive).
+ *
+ * If not found, in both cases, *offset is set to the first (or last)
+ * offset of the unallocated indirect block where the search ended or
+ * the initial offset if no such block was encountered.
+ *
* Examples:
*
* dnode_next_offset(dn, flags, offset, 1, 1, 0);
@@ -2708,7 +2673,8 @@ int
dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset,
int minlvl, uint64_t blkfill, uint64_t txg)
{
- uint64_t matched = *offset;
+ uint64_t blkid;
+ int index, epbs;
int lvl, maxlvl;
int error = 0;
@@ -2730,18 +2696,31 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset,
goto out;
}
+ epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
maxlvl = dn->dn_phys->dn_nlevels;
+ if (minlvl > 0) {
+ uint64_t n = dbuf_whichblock(dn, minlvl - 1, *offset);
+ blkid = n >> epbs;
+ index = BF64_GET(n, 0, epbs);
+ } else {
+ blkid = dbuf_whichblock(dn, 0, *offset);
+ index = (*offset >> DNODE_SHIFT) & (blkfill - 1);
+ ASSERT3U(BF64_GET(*offset, 0, DNODE_SHIFT), ==, 0);
+ }
+
for (lvl = minlvl; lvl <= maxlvl; ) {
error = dnode_next_offset_level(dn,
- flags, offset, lvl, blkfill, txg);
+ flags, lvl, blkid, &index, blkfill, txg, offset);
+
if (error == 0 && lvl > minlvl) {
+ /* Continue search at matched block in lvl-1. */
+ blkid = (blkid << epbs) + index;
+ index = 0;
--lvl;
- matched = *offset;
- } else if (error == ESRCH && lvl < maxlvl &&
- dnode_next_block(dn, flags, &matched, lvl)) {
+ } else if (error == ESRCH && lvl < maxlvl) {
/*
- * Continue search at next/prev offset in lvl+1 block.
+ * Continue search at next/prev index in lvl+1 block.
*
* Usually we only search upwards at the start of the
* search as higher level blocks point at a matching
@@ -2752,13 +2731,14 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset,
* happens if we are still syncing out the tree, and
* some BP's at higher levels are not updated yet.
*
- * We must adjust offset to avoid coming back to the
- * same offset and getting stuck looping forever. This
- * also deals with the case where offset is already at
- * the beginning or end of the object.
+ * We must adjust index to avoid coming back to the
+ * same offset and getting stuck looping forever. The
+ * next loop goes up again if index is -1 or (1<<epbs).
*/
+ index = BF64_GET(blkid, 0, epbs) +
+ ((flags & DNODE_FIND_BACKWARDS) ? -1 : 1);
+ blkid = blkid >> epbs;
++lvl;
- *offset = matched;
} else {
break;
}
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index 9f4399af56bd..3f649ffb44e4 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -3966,7 +3966,8 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx)
object = space_map_object(msp->ms_sm);
dmu_write(spa->spa_meta_objset,
msp->ms_group->mg_vd->vdev_ms_array, sizeof (uint64_t) *
- msp->ms_id, sizeof (uint64_t), &object, tx);
+ msp->ms_id, sizeof (uint64_t), &object, tx,
+ DMU_READ_NO_PREFETCH);
}
/*
@@ -4292,7 +4293,8 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
VERIFY3U(new_object, !=, 0);
dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) *
- msp->ms_id, sizeof (uint64_t), &new_object, tx);
+ msp->ms_id, sizeof (uint64_t), &new_object, tx,
+ DMU_READ_NO_PREFETCH);
VERIFY0(space_map_open(&msp->ms_sm, mos, new_object,
msp->ms_start, msp->ms_size, vd->vdev_ashift));
@@ -6328,7 +6330,7 @@ metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx)
}
dmu_write(spa_meta_objset(spa), object, entry_offset, entry_size,
- &entry, tx);
+ &entry, tx, DMU_READ_NO_PREFETCH);
}
void
diff --git a/sys/contrib/openzfs/module/zfs/mmp.c b/sys/contrib/openzfs/module/zfs/mmp.c
index fd46127b6068..b8ba40ecdc9d 100644
--- a/sys/contrib/openzfs/module/zfs/mmp.c
+++ b/sys/contrib/openzfs/module/zfs/mmp.c
@@ -729,12 +729,12 @@ mmp_signal_all_threads(void)
{
spa_t *spa = NULL;
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
while ((spa = spa_next(spa))) {
if (spa->spa_state == POOL_STATE_ACTIVE)
mmp_signal_thread(spa);
}
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
ZFS_MODULE_PARAM_CALL(zfs_multihost, zfs_multihost_, interval,
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index b3bb46da263b..34de3f1d9525 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -141,7 +141,7 @@ typedef enum zti_modes {
#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
-#define ZTI_SCALE { ZTI_MODE_SCALE, 0, 1 }
+#define ZTI_SCALE(min) { ZTI_MODE_SCALE, (min), 1 }
#define ZTI_SYNC { ZTI_MODE_SYNC, 0, 1 }
#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
@@ -180,13 +180,13 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
- { ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */
+ { ZTI_N(8), ZTI_NULL, ZTI_SCALE(0), ZTI_NULL }, /* READ */
#ifdef illumos
- { ZTI_SYNC, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */
+ { ZTI_SYNC, ZTI_N(5), ZTI_SCALE(0), ZTI_N(5) }, /* WRITE */
#else
- { ZTI_SYNC, ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* WRITE */
+ { ZTI_SYNC, ZTI_NULL, ZTI_SCALE(0), ZTI_NULL }, /* WRITE */
#endif
- { ZTI_SCALE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
+ { ZTI_SCALE(32), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FLUSH */
{ ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */
@@ -1082,7 +1082,7 @@ spa_change_guid(spa_t *spa, const uint64_t *guidp)
int error;
mutex_enter(&spa->spa_vdev_top_lock);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
if (guidp != NULL) {
guid = *guidp;
@@ -1117,7 +1117,7 @@ spa_change_guid(spa_t *spa, const uint64_t *guidp)
}
out:
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
mutex_exit(&spa->spa_vdev_top_lock);
return (error);
@@ -1170,7 +1170,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
uint_t value = ztip->zti_value;
uint_t count = ztip->zti_count;
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
- uint_t cpus, flags = TASKQ_DYNAMIC;
+ uint_t cpus, threads, flags = TASKQ_DYNAMIC;
switch (mode) {
case ZTI_MODE_FIXED:
@@ -1183,8 +1183,8 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
* Create one wr_iss taskq for every 'zio_taskq_write_tpq' CPUs,
* not to exceed the number of spa allocators, and align to it.
*/
- cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
- count = MAX(1, cpus / MAX(1, zio_taskq_write_tpq));
+ threads = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
+ count = MAX(1, threads / MAX(1, zio_taskq_write_tpq));
count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
count = MIN(count, spa->spa_alloc_count);
while (spa->spa_alloc_count % count != 0 &&
@@ -1201,14 +1201,14 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
break;
case ZTI_MODE_SCALE:
- flags |= TASKQ_THREADS_CPU_PCT;
/*
* We want more taskqs to reduce lock contention, but we want
* less for better request ordering and CPU utilization.
*/
- cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
+ threads = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100);
+ threads = MAX(threads, value);
if (zio_taskq_batch_tpq > 0) {
- count = MAX(1, (cpus + zio_taskq_batch_tpq / 2) /
+ count = MAX(1, (threads + zio_taskq_batch_tpq / 2) /
zio_taskq_batch_tpq);
} else {
/*
@@ -1228,13 +1228,23 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
* 128 10 8% 10 100
* 256 14 6% 15 210
*/
- count = 1 + cpus / 6;
+ cpus = MIN(threads, boot_ncpus);
+ count = 1 + threads / 6;
while (count * count > cpus)
count--;
}
- /* Limit each taskq within 100% to not trigger assertion. */
- count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
- value = (zio_taskq_batch_pct + count / 2) / count;
+
+ /*
+ * Try to represent the number of threads per taskq as percent
+ * of online CPUs to allow scaling with later online/offline.
+ * Fall back to absolute numbers if can't.
+ */
+ value = (threads * 100 + boot_ncpus * count / 2) /
+ (boot_ncpus * count);
+ if (value < 5 || value > 100)
+ value = MAX(1, (threads + count / 2) / count);
+ else
+ flags |= TASKQ_THREADS_CPU_PCT;
break;
case ZTI_MODE_NULL:
@@ -1433,8 +1443,30 @@ spa_taskq_param_set(zio_type_t t, char *cfg)
break;
}
+ /*
+ * SCALE is optionally parameterised by minimum number of
+ * threads.
+ */
case ZTI_MODE_SCALE: {
- const zio_taskq_info_t zti = ZTI_SCALE;
+ unsigned long long mint = 0;
+ if (c != NULL && *c != '\0') {
+ /* Need a number */
+ if (!(isdigit(*c)))
+ break;
+ tok = c;
+
+ /* Take digits */
+ err = ddi_strtoull(tok, &tok, 10, &mint);
+ /* Must succeed, and moved forward */
+ if (err != 0 || tok == c || *tok != '\0')
+ break;
+
+ /* Sanity check */
+ if (mint >= 16384)
+ break;
+ }
+
+ const zio_taskq_info_t zti = ZTI_SCALE(mint);
row[q] = zti;
break;
}
@@ -1501,6 +1533,9 @@ spa_taskq_param_get(zio_type_t t, char *buf, boolean_t add_newline)
pos += sprintf(&buf[pos], "%s%s,%u,%u", sep,
modes[zti->zti_mode], zti->zti_count,
zti->zti_value);
+ else if (zti->zti_mode == ZTI_MODE_SCALE && zti->zti_value > 0)
+ pos += sprintf(&buf[pos], "%s%s,%u", sep,
+ modes[zti->zti_mode], zti->zti_value);
else
pos += sprintf(&buf[pos], "%s%s", sep,
modes[zti->zti_mode]);
@@ -1520,9 +1555,10 @@ spa_taskq_read_param_set(const char *val, zfs_kernel_param_t *kp)
{
char *cfg = kmem_strdup(val);
int err = spa_taskq_param_set(ZIO_TYPE_READ, cfg);
- kmem_free(cfg, strlen(val)+1);
+ kmem_strfree(cfg);
return (-err);
}
+
static int
spa_taskq_read_param_get(char *buf, zfs_kernel_param_t *kp)
{
@@ -1534,14 +1570,30 @@ spa_taskq_write_param_set(const char *val, zfs_kernel_param_t *kp)
{
char *cfg = kmem_strdup(val);
int err = spa_taskq_param_set(ZIO_TYPE_WRITE, cfg);
- kmem_free(cfg, strlen(val)+1);
+ kmem_strfree(cfg);
return (-err);
}
+
static int
spa_taskq_write_param_get(char *buf, zfs_kernel_param_t *kp)
{
return (spa_taskq_param_get(ZIO_TYPE_WRITE, buf, TRUE));
}
+
+static int
+spa_taskq_free_param_set(const char *val, zfs_kernel_param_t *kp)
+{
+ char *cfg = kmem_strdup(val);
+ int err = spa_taskq_param_set(ZIO_TYPE_FREE, cfg);
+ kmem_strfree(cfg);
+ return (-err);
+}
+
+static int
+spa_taskq_free_param_get(char *buf, zfs_kernel_param_t *kp)
+{
+ return (spa_taskq_param_get(ZIO_TYPE_FREE, buf, TRUE));
+}
#else
/*
* On FreeBSD load-time parameters can be set up before malloc() is available,
@@ -1574,6 +1626,19 @@ spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS)
return (err);
return (spa_taskq_param_set(ZIO_TYPE_WRITE, buf));
}
+
+static int
+spa_taskq_free_param(ZFS_MODULE_PARAM_ARGS)
+{
+ char buf[SPA_TASKQ_PARAM_MAX];
+ int err;
+
+ (void) spa_taskq_param_get(ZIO_TYPE_FREE, buf, FALSE);
+ err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
+ if (err || req->newptr == NULL)
+ return (err);
+ return (spa_taskq_param_set(ZIO_TYPE_FREE, buf));
+}
#endif
#endif /* _KERNEL */
@@ -2187,7 +2252,7 @@ spa_should_sync_time_logger_on_unload(spa_t *spa)
static void
spa_unload(spa_t *spa)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_export_thread == curthread);
ASSERT(spa_state(spa) != POOL_STATE_UNINITIALIZED);
@@ -5260,7 +5325,7 @@ spa_ld_read_checkpoint_txg(spa_t *spa)
int error = 0;
ASSERT0(spa->spa_checkpoint_txg);
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_load_thread == curthread);
error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
@@ -5287,7 +5352,7 @@ spa_ld_mos_init(spa_t *spa, spa_import_type_t type)
{
int error = 0;
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE);
/*
@@ -5363,7 +5428,7 @@ spa_ld_checkpoint_rewind(spa_t *spa)
uberblock_t checkpoint;
int error = 0;
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
@@ -5510,7 +5575,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
boolean_t update_config_cache = B_FALSE;
hrtime_t load_start = gethrtime();
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE);
spa_load_note(spa, "LOADING");
@@ -5557,7 +5622,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
* Drop the namespace lock for the rest of the function.
*/
spa->spa_load_thread = curthread;
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
/*
* Retrieve the checkpoint txg if the pool has a checkpoint.
@@ -5796,9 +5861,9 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
spa_load_note(spa, "LOADED");
fail:
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa->spa_load_thread = NULL;
- cv_broadcast(&spa_namespace_cv);
+ spa_namespace_broadcast();
return (error);
@@ -5960,14 +6025,14 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag,
* up calling spa_open() again. The real fix is to figure out how to
* avoid dsl_dir_open() calling this in the first place.
*/
- if (MUTEX_NOT_HELD(&spa_namespace_lock)) {
- mutex_enter(&spa_namespace_lock);
+ if (!spa_namespace_held()) {
+ spa_namespace_enter(FTAG);
locked = B_TRUE;
}
if ((spa = spa_lookup(pool)) == NULL) {
if (locked)
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (SET_ERROR(ENOENT));
}
@@ -6004,7 +6069,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag,
spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
spa_remove(spa);
if (locked)
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (SET_ERROR(ENOENT));
}
@@ -6024,7 +6089,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag,
spa_deactivate(spa);
spa->spa_last_open_failed = error;
if (locked)
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
*spapp = NULL;
return (error);
}
@@ -6048,7 +6113,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag,
spa->spa_last_open_failed = 0;
spa->spa_last_ubsync_txg = 0;
spa->spa_load_txg = 0;
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
if (firstopen)
@@ -6081,13 +6146,13 @@ spa_inject_addref(char *name)
{
spa_t *spa;
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
if ((spa = spa_lookup(name)) == NULL) {
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (NULL);
}
spa->spa_inject_ref++;
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (spa);
}
@@ -6095,9 +6160,9 @@ spa_inject_addref(char *name)
void
spa_inject_delref(spa_t *spa)
{
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa->spa_inject_ref--;
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
/*
@@ -6341,14 +6406,14 @@ spa_get_stats(const char *name, nvlist_t **config,
*/
if (altroot) {
if (spa == NULL) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa = spa_lookup(name);
if (spa)
spa_altroot(spa, altroot, buflen);
else
altroot[0] = '\0';
spa = NULL;
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
} else {
spa_altroot(spa, altroot, buflen);
}
@@ -6568,9 +6633,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
/*
* If this pool already exists, return failure.
*/
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
if (spa_lookup(poolname) != NULL) {
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (SET_ERROR(EEXIST));
}
@@ -6588,7 +6653,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
if (props && (error = spa_prop_validate(spa, props))) {
spa_deactivate(spa);
spa_remove(spa);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (error);
}
@@ -6621,14 +6686,14 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
if (error != 0) {
spa_deactivate(spa);
spa_remove(spa);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (error);
}
}
if (!has_allocclass && zfs_special_devs(nvroot, NULL)) {
spa_deactivate(spa);
spa_remove(spa);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (ENOTSUP);
}
@@ -6694,7 +6759,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa_unload(spa);
spa_deactivate(spa);
spa_remove(spa);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (error);
}
@@ -6847,7 +6912,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa_import_os(spa);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (0);
}
@@ -6872,9 +6937,9 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
/*
* If a pool with this name exists, return failure.
*/
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
if (spa_lookup(pool) != NULL) {
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (SET_ERROR(EEXIST));
}
@@ -6901,7 +6966,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT);
zfs_dbgmsg("spa_import: verbatim import of %s", pool);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (0);
}
@@ -6960,7 +7025,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
spa_unload(spa);
spa_deactivate(spa);
spa_remove(spa);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (error);
}
@@ -7028,7 +7093,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
zvol_create_minors(pool);
@@ -7060,7 +7125,7 @@ spa_tryimport(nvlist_t *tryconfig)
(void) snprintf(name, MAXPATHLEN, "%s-%llx-%s",
TRYIMPORT_NAME, (u_longlong_t)(uintptr_t)curthread, poolname);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa = spa_add(name, tryconfig, NULL);
spa_activate(spa, SPA_MODE_READ);
kmem_free(name, MAXPATHLEN);
@@ -7158,7 +7223,7 @@ spa_tryimport(nvlist_t *tryconfig)
spa_unload(spa);
spa_deactivate(spa);
spa_remove(spa);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (config);
}
@@ -7186,15 +7251,15 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
if (!(spa_mode_global & SPA_MODE_WRITE))
return (SET_ERROR(EROFS));
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
if ((spa = spa_lookup(pool)) == NULL) {
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (SET_ERROR(ENOENT));
}
if (spa->spa_is_exporting) {
/* the pool is being exported by another thread */
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (SET_ERROR(ZFS_ERR_EXPORT_IN_PROGRESS));
}
spa->spa_is_exporting = B_TRUE;
@@ -7204,18 +7269,18 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
* and see if we can export.
*/
spa_open_ref(spa, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
spa_async_suspend(spa);
if (spa->spa_zvol_taskq) {
zvol_remove_minors(spa, spa_name(spa), B_TRUE);
taskq_wait(spa->spa_zvol_taskq);
}
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa->spa_export_thread = curthread;
spa_close(spa, FTAG);
if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
goto export_spa;
}
@@ -7239,7 +7304,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
goto fail;
}
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
/*
* At this point we no longer hold the spa_namespace_lock and
* there were no references on the spa. Future spa_lookups will
@@ -7258,7 +7323,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
if (!force && new_state == POOL_STATE_EXPORTED &&
spa_has_active_shared_spare(spa)) {
error = SET_ERROR(EXDEV);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
goto fail;
}
@@ -7333,7 +7398,7 @@ export_spa:
/*
* Take the namespace lock for the actual spa_t removal
*/
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
if (new_state != POOL_STATE_UNINITIALIZED) {
if (!hardforce)
spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
@@ -7351,8 +7416,8 @@ export_spa:
/*
* Wake up any waiters in spa_lookup()
*/
- cv_broadcast(&spa_namespace_cv);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_broadcast();
+ spa_namespace_exit(FTAG);
return (0);
fail:
@@ -7363,8 +7428,8 @@ fail:
/*
* Wake up any waiters in spa_lookup()
*/
- cv_broadcast(&spa_namespace_cv);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_broadcast();
+ spa_namespace_exit(FTAG);
return (error);
}
@@ -7574,10 +7639,10 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift)
*/
(void) spa_vdev_exit(spa, vd, txg, 0);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
spa_event_notify(spa, NULL, NULL, ESC_ZFS_VDEV_ADD);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (0);
}
@@ -7694,7 +7759,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
oldvd = spa_lookup_by_guid(spa, guid, B_FALSE);
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) {
error = (spa_has_checkpoint(spa)) ?
ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT;
@@ -8078,7 +8143,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* as spa_vdev_resilver_done() calls this function everything
* should be fine as the resilver will return right away.
*/
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) {
error = (spa_has_checkpoint(spa)) ?
ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT;
@@ -8282,28 +8347,28 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
if (unspare) {
spa_t *altspa = NULL;
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
while ((altspa = spa_next(altspa)) != NULL) {
if (altspa->spa_state != POOL_STATE_ACTIVE ||
altspa == spa)
continue;
spa_open_ref(altspa, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
(void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_close(altspa, FTAG);
}
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
/* search the rest of the vdevs for spares to remove */
spa_vdev_resilver_done(spa);
}
/* all done with the spa; OK to release */
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_close(spa, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (error);
}
@@ -8312,7 +8377,7 @@ static int
spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
list_t *vd_list)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
@@ -8396,7 +8461,7 @@ spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type,
* we can properly assess the vdev state before we commit to
* the initializing operation.
*/
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
for (nvpair_t *pair = nvlist_next_nvpair(nv, NULL);
pair != NULL; pair = nvlist_next_nvpair(nv, pair)) {
@@ -8419,7 +8484,7 @@ spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type,
/* Sync out the initializing state */
txg_wait_synced(spa->spa_dsl_pool, 0);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
list_destroy(&vd_list);
@@ -8430,7 +8495,7 @@ static int
spa_vdev_trim_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
uint64_t rate, boolean_t partial, boolean_t secure, list_t *vd_list)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
@@ -8517,7 +8582,7 @@ spa_vdev_trim(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, uint64_t rate,
* we can properly assess the vdev state before we commit to
* the TRIM operation.
*/
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
for (nvpair_t *pair = nvlist_next_nvpair(nv, NULL);
pair != NULL; pair = nvlist_next_nvpair(nv, pair)) {
@@ -8540,7 +8605,7 @@ spa_vdev_trim(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, uint64_t rate,
/* Sync out the TRIM state */
txg_wait_synced(spa->spa_dsl_pool, 0);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
list_destroy(&vd_list);
@@ -8568,7 +8633,7 @@ spa_vdev_split_mirror(spa_t *spa, const char *newname, nvlist_t *config,
txg = spa_vdev_enter(spa);
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) {
error = (spa_has_checkpoint(spa)) ?
ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT;
@@ -9242,7 +9307,7 @@ spa_async_thread(void *arg)
if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
uint64_t old_space, new_space;
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
old_space = metaslab_class_get_space(spa_normal_class(spa));
old_space += metaslab_class_get_space(spa_special_class(spa));
old_space += metaslab_class_get_space(spa_dedup_class(spa));
@@ -9260,7 +9325,7 @@ spa_async_thread(void *arg)
spa_embedded_log_class(spa));
new_space += metaslab_class_get_space(
spa_special_embedded_log_class(spa));
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
/*
* If the pool grew as a result of the config update,
@@ -9329,49 +9394,49 @@ spa_async_thread(void *arg)
dsl_scan_restart_resilver(dp, 0);
if (tasks & SPA_ASYNC_INITIALIZE_RESTART) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
vdev_initialize_restart(spa->spa_root_vdev);
spa_config_exit(spa, SCL_CONFIG, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
if (tasks & SPA_ASYNC_TRIM_RESTART) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
vdev_trim_restart(spa->spa_root_vdev);
spa_config_exit(spa, SCL_CONFIG, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
if (tasks & SPA_ASYNC_AUTOTRIM_RESTART) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
vdev_autotrim_restart(spa);
spa_config_exit(spa, SCL_CONFIG, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
/*
* Kick off L2 cache whole device TRIM.
*/
if (tasks & SPA_ASYNC_L2CACHE_TRIM) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
vdev_trim_l2arc(spa);
spa_config_exit(spa, SCL_CONFIG, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
/*
* Kick off L2 cache rebuilding.
*/
if (tasks & SPA_ASYNC_L2CACHE_REBUILD) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_config_enter(spa, SCL_L2ARC, FTAG, RW_READER);
l2arc_spa_rebuild_start(spa);
spa_config_exit(spa, SCL_L2ARC, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
/*
@@ -9601,7 +9666,8 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
KM_SLEEP));
memset(packed + nvsize, 0, bufsize - nvsize);
- dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
+ dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx,
+ DMU_READ_NO_PREFETCH);
vmem_free(packed, bufsize);
@@ -10522,18 +10588,18 @@ void
spa_sync_allpools(void)
{
spa_t *spa = NULL;
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
while ((spa = spa_next(spa)) != NULL) {
if (spa_state(spa) != POOL_STATE_ACTIVE ||
!spa_writeable(spa) || spa_suspended(spa))
continue;
spa_open_ref(spa, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
txg_wait_synced(spa_get_dsl(spa), 0);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_close(spa, FTAG);
}
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
taskq_t *
@@ -10680,7 +10746,7 @@ spa_evict_all(void)
* Remove all cached state. All pools should be closed now,
* so every spa in the AVL tree should be unreferenced.
*/
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
while ((spa = spa_next(NULL)) != NULL) {
/*
* Stop async tasks. The async thread may need to detach
@@ -10688,9 +10754,9 @@ spa_evict_all(void)
* spa_namespace_lock, so we must drop it here.
*/
spa_open_ref(spa, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
spa_async_suspend(spa);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_close(spa, FTAG);
if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
@@ -10699,7 +10765,7 @@ spa_evict_all(void)
}
spa_remove(spa);
}
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
vdev_t *
@@ -11272,6 +11338,9 @@ ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read,
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_write,
spa_taskq_write_param_set, spa_taskq_write_param_get, ZMOD_RW,
"Configure IO queues for write IO");
+ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_free,
+ spa_taskq_free_param_set, spa_taskq_free_param_get, ZMOD_RW,
+ "Configure IO queues for free IO");
#endif
ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_write_tpq, UINT, ZMOD_RW,
diff --git a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c
index e07756c46748..a42aa62e6599 100644
--- a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c
+++ b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c
@@ -427,7 +427,7 @@ spa_checkpoint_discard_thread(void *arg, zthr_t *zthr)
*/
int error = dmu_buf_hold_array_by_bonus(
checkpoint_sm->sm_dbuf, offset, size,
- B_TRUE, FTAG, &numbufs, &dbp);
+ B_TRUE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
if (error != 0) {
zfs_panic_recover("zfs: error %d was returned "
"while prefetching checkpoint space map "
diff --git a/sys/contrib/openzfs/module/zfs/spa_config.c b/sys/contrib/openzfs/module/zfs/spa_config.c
index f615591e826b..31216e9a7ccc 100644
--- a/sys/contrib/openzfs/module/zfs/spa_config.c
+++ b/sys/contrib/openzfs/module/zfs/spa_config.c
@@ -161,7 +161,7 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent,
boolean_t ccw_failure;
int error = 0;
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
if (!(spa_mode_global & SPA_MODE_WRITE))
return;
@@ -287,7 +287,7 @@ spa_all_configs(uint64_t *generation, nvlist_t **pools)
if (*generation == spa_config_generation)
return (SET_ERROR(EEXIST));
- int error = mutex_enter_interruptible(&spa_namespace_lock);
+ int error = spa_namespace_enter_interruptible(FTAG);
if (error)
return (SET_ERROR(EINTR));
@@ -302,7 +302,7 @@ spa_all_configs(uint64_t *generation, nvlist_t **pools)
}
}
*generation = spa_config_generation;
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (0);
}
@@ -483,7 +483,7 @@ spa_config_update(spa_t *spa, int what)
uint64_t txg;
int c;
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
txg = spa_last_synced_txg(spa) + 1;
diff --git a/sys/contrib/openzfs/module/zfs/spa_history.c b/sys/contrib/openzfs/module/zfs/spa_history.c
index 60ab07944d72..b9d0c9656726 100644
--- a/sys/contrib/openzfs/module/zfs/spa_history.c
+++ b/sys/contrib/openzfs/module/zfs/spa_history.c
@@ -169,13 +169,14 @@ spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,
phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof);
shpp->sh_eof += len;
- dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx);
+ dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx,
+ DMU_READ_NO_PREFETCH);
len -= firstwrite;
if (len > 0) {
/* write out the rest at the beginning of physical file */
dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len,
- len, (char *)buf + firstwrite, tx);
+ len, (char *)buf + firstwrite, tx, DMU_READ_NO_PREFETCH);
}
return (0);
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index 0bead6d49666..bf22d2eb68e7 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -28,7 +28,7 @@
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
- * Copyright (c) 2023, 2024, Klara Inc.
+ * Copyright (c) 2023, 2024, 2025, Klara, Inc.
*/
#include <sys/zfs_context.h>
@@ -237,9 +237,10 @@
* locking is, always, based on spa_namespace_lock and spa_config_lock[].
*/
-avl_tree_t spa_namespace_avl;
-kmutex_t spa_namespace_lock;
-kcondvar_t spa_namespace_cv;
+static avl_tree_t spa_namespace_avl;
+static kmutex_t spa_namespace_lock;
+static kcondvar_t spa_namespace_cv;
+
static const int spa_max_replication_override = SPA_DVAS_PER_BP;
static kmutex_t spa_spare_lock;
@@ -608,6 +609,58 @@ spa_config_held(spa_t *spa, int locks, krw_t rw)
* ==========================================================================
*/
+void
+spa_namespace_enter(const void *tag)
+{
+ (void) tag;
+ ASSERT(!MUTEX_HELD(&spa_namespace_lock));
+ mutex_enter(&spa_namespace_lock);
+}
+
+boolean_t
+spa_namespace_tryenter(const void *tag)
+{
+ (void) tag;
+ ASSERT(!MUTEX_HELD(&spa_namespace_lock));
+ return (mutex_tryenter(&spa_namespace_lock));
+}
+
+int
+spa_namespace_enter_interruptible(const void *tag)
+{
+ (void) tag;
+ ASSERT(!MUTEX_HELD(&spa_namespace_lock));
+ return (mutex_enter_interruptible(&spa_namespace_lock));
+}
+
+void
+spa_namespace_exit(const void *tag)
+{
+ (void) tag;
+ ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ mutex_exit(&spa_namespace_lock);
+}
+
+boolean_t
+spa_namespace_held(void)
+{
+ return (MUTEX_HELD(&spa_namespace_lock));
+}
+
+void
+spa_namespace_wait(void)
+{
+ ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ cv_wait(&spa_namespace_cv, &spa_namespace_lock);
+}
+
+void
+spa_namespace_broadcast(void)
+{
+ ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ cv_broadcast(&spa_namespace_cv);
+}
+
/*
* Lookup the named spa_t in the AVL tree. The spa_namespace_lock must be held.
* Returns NULL if no matching spa_t is found.
@@ -620,7 +673,7 @@ spa_lookup(const char *name)
avl_index_t where;
char *cp;
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
retry:
(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
@@ -645,7 +698,7 @@ retry:
spa->spa_load_thread != curthread) ||
(spa->spa_export_thread != NULL &&
spa->spa_export_thread != curthread)) {
- cv_wait(&spa_namespace_cv, &spa_namespace_lock);
+ spa_namespace_wait();
goto retry;
}
@@ -697,7 +750,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa_t *spa;
spa_config_dirent_t *dp;
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
@@ -747,7 +800,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa_config_lock_init(spa);
spa_stats_init(spa);
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
avl_add(&spa_namespace_avl, spa);
/*
@@ -837,7 +890,7 @@ spa_remove(spa_t *spa)
{
spa_config_dirent_t *dp;
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
ASSERT(spa_state(spa) == POOL_STATE_UNINITIALIZED);
ASSERT3U(zfs_refcount_count(&spa->spa_refcount), ==, 0);
ASSERT0(spa->spa_waiters);
@@ -916,7 +969,7 @@ spa_remove(spa_t *spa)
spa_t *
spa_next(spa_t *prev)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
if (prev)
return (AVL_NEXT(&spa_namespace_avl, prev));
@@ -938,7 +991,7 @@ void
spa_open_ref(spa_t *spa, const void *tag)
{
ASSERT(zfs_refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
- MUTEX_HELD(&spa_namespace_lock) ||
+ spa_namespace_held() ||
spa->spa_load_thread == curthread);
(void) zfs_refcount_add(&spa->spa_refcount, tag);
}
@@ -951,7 +1004,7 @@ void
spa_close(spa_t *spa, const void *tag)
{
ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref ||
- MUTEX_HELD(&spa_namespace_lock) ||
+ spa_namespace_held() ||
spa->spa_load_thread == curthread ||
spa->spa_export_thread == curthread);
(void) zfs_refcount_remove(&spa->spa_refcount, tag);
@@ -980,7 +1033,7 @@ spa_async_close(spa_t *spa, const void *tag)
boolean_t
spa_refcount_zero(spa_t *spa)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_export_thread == curthread);
return (zfs_refcount_count(&spa->spa_refcount) == spa->spa_minref);
@@ -1227,7 +1280,7 @@ uint64_t
spa_vdev_enter(spa_t *spa)
{
mutex_enter(&spa->spa_vdev_top_lock);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
ASSERT0(spa->spa_export_thread);
@@ -1246,7 +1299,7 @@ uint64_t
spa_vdev_detach_enter(spa_t *spa, uint64_t guid)
{
mutex_enter(&spa->spa_vdev_top_lock);
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
ASSERT0(spa->spa_export_thread);
@@ -1270,7 +1323,7 @@ spa_vdev_detach_enter(spa_t *spa, uint64_t guid)
uint64_t
spa_vdev_config_enter(spa_t *spa)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
@@ -1285,7 +1338,7 @@ void
spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error,
const char *tag)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
int config_changed = B_FALSE;
@@ -1374,7 +1427,7 @@ spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
vdev_rebuild_restart(spa);
spa_vdev_config_exit(spa, vd, txg, error, FTAG);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
mutex_exit(&spa->spa_vdev_top_lock);
return (error);
@@ -1452,9 +1505,9 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
* If the config changed, update the config cache.
*/
if (config_changed) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
return (error);
@@ -1501,7 +1554,7 @@ spa_by_guid(uint64_t pool_guid, uint64_t device_guid)
spa_t *spa;
avl_tree_t *t = &spa_namespace_avl;
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
if (spa->spa_state == POOL_STATE_UNINITIALIZED)
@@ -1583,7 +1636,7 @@ spa_load_guid_exists(uint64_t guid)
{
avl_tree_t *t = &spa_namespace_avl;
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
for (spa_t *spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
if (spa_load_guid(spa) == guid)
@@ -2200,10 +2253,10 @@ spa_set_deadman_ziotime(hrtime_t ns)
spa_t *spa = NULL;
if (spa_mode_global != SPA_MODE_UNINIT) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
while ((spa = spa_next(spa)) != NULL)
spa->spa_deadman_ziotime = ns;
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
}
@@ -2213,10 +2266,10 @@ spa_set_deadman_synctime(hrtime_t ns)
spa_t *spa = NULL;
if (spa_mode_global != SPA_MODE_UNINIT) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
while ((spa = spa_next(spa)) != NULL)
spa->spa_deadman_synctime = ns;
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
}
@@ -3048,10 +3101,10 @@ param_set_deadman_failmode_common(const char *val)
return (SET_ERROR(EINVAL));
if (spa_mode_global != SPA_MODE_UNINIT) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
while ((spa = spa_next(spa)) != NULL)
spa_set_deadman_failmode(spa, val);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
}
return (0);
@@ -3135,7 +3188,6 @@ EXPORT_SYMBOL(spa_has_slogs);
EXPORT_SYMBOL(spa_is_root);
EXPORT_SYMBOL(spa_writeable);
EXPORT_SYMBOL(spa_mode);
-EXPORT_SYMBOL(spa_namespace_lock);
EXPORT_SYMBOL(spa_trust_config);
EXPORT_SYMBOL(spa_missing_tvds_allowed);
EXPORT_SYMBOL(spa_set_missing_tvds);
diff --git a/sys/contrib/openzfs/module/zfs/space_map.c b/sys/contrib/openzfs/module/zfs/space_map.c
index 5f24963f2291..f20c49ebb6de 100644
--- a/sys/contrib/openzfs/module/zfs/space_map.c
+++ b/sys/contrib/openzfs/module/zfs/space_map.c
@@ -537,7 +537,7 @@ space_map_write_intro_debug(space_map_t *sm, maptype_t maptype, dmu_tx_t *tx)
SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_length,
- sizeof (dentry), &dentry, tx);
+ sizeof (dentry), &dentry, tx, DMU_READ_NO_PREFETCH);
sm->sm_phys->smp_length += sizeof (dentry);
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index c8d7280387a2..2a4d1876251f 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -449,32 +449,53 @@ vdev_get_nparity(vdev_t *vd)
}
static int
-vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
+vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
{
- spa_t *spa = vd->vdev_spa;
- objset_t *mos = spa->spa_meta_objset;
- uint64_t objid;
- int err;
if (vd->vdev_root_zap != 0) {
- objid = vd->vdev_root_zap;
+ *objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
- objid = vd->vdev_top_zap;
+ *objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
- objid = vd->vdev_leaf_zap;
+ *objid = vd->vdev_leaf_zap;
} else {
return (EINVAL);
}
+ return (0);
+}
+
+static int
+vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
+{
+ spa_t *spa = vd->vdev_spa;
+ objset_t *mos = spa->spa_meta_objset;
+ uint64_t objid;
+ int err;
+
+ if (vdev_prop_get_objid(vd, &objid) != 0)
+ return (EINVAL);
+
err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
sizeof (uint64_t), 1, value);
-
if (err == ENOENT)
*value = vdev_prop_default_numeric(prop);
return (err);
}
+static int
+vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
+{
+ int err;
+ uint64_t ivalue;
+
+ err = vdev_prop_get_int(vd, prop, &ivalue);
+ *bvalue = ivalue != 0;
+
+ return (err);
+}
+
/*
* Get the number of data disks for a top-level vdev.
*/
@@ -737,8 +758,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
*/
vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
+
vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
+
+ vd->vdev_slow_io_events = vdev_prop_default_numeric(
+ VDEV_PROP_SLOW_IO_EVENTS);
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
@@ -3931,6 +3956,11 @@ vdev_load(vdev_t *vd)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
+ error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS,
+ &vd->vdev_slow_io_events);
+ if (error && error != ENOENT)
+ vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
+ "failed [error=%d]", (u_longlong_t)zapobj, error);
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
&vd->vdev_slow_io_n);
if (error && error != ENOENT)
@@ -5980,15 +6010,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
/*
* Set vdev property values in the vdev props mos object.
*/
- if (vd->vdev_root_zap != 0) {
- objid = vd->vdev_root_zap;
- } else if (vd->vdev_top_zap != 0) {
- objid = vd->vdev_top_zap;
- } else if (vd->vdev_leaf_zap != 0) {
- objid = vd->vdev_leaf_zap;
- } else {
+ if (vdev_prop_get_objid(vd, &objid) != 0)
panic("unexpected vdev type");
- }
mutex_enter(&spa->spa_props_lock);
@@ -6215,6 +6238,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_io_t = intval;
break;
+ case VDEV_PROP_SLOW_IO_EVENTS:
+ if (nvpair_value_uint64(elem, &intval) != 0) {
+ error = EINVAL;
+ break;
+ }
+ vd->vdev_slow_io_events = intval != 0;
+ break;
case VDEV_PROP_SLOW_IO_N:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
@@ -6256,6 +6286,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
nvpair_t *elem = NULL;
nvlist_t *nvprops = NULL;
uint64_t intval = 0;
+ boolean_t boolval = 0;
char *strval = NULL;
const char *propname = NULL;
vdev_prop_t prop;
@@ -6269,15 +6300,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
- if (vd->vdev_root_zap != 0) {
- objid = vd->vdev_root_zap;
- } else if (vd->vdev_top_zap != 0) {
- objid = vd->vdev_top_zap;
- } else if (vd->vdev_leaf_zap != 0) {
- objid = vd->vdev_leaf_zap;
- } else {
+ if (vdev_prop_get_objid(vd, &objid) != 0)
return (SET_ERROR(EINVAL));
- }
ASSERT(objid != 0);
mutex_enter(&spa->spa_props_lock);
@@ -6622,6 +6646,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
intval, src);
break;
+ case VDEV_PROP_SLOW_IO_EVENTS:
+ err = vdev_prop_get_bool(vd, prop, &boolval);
+ if (err && err != ENOENT)
+ break;
+
+ src = ZPROP_SRC_LOCAL;
+ if (boolval == vdev_prop_default_numeric(prop))
+ src = ZPROP_SRC_DEFAULT;
+
+ vdev_prop_add_list(outnvl, propname, NULL,
+ boolval, src);
+ break;
case VDEV_PROP_CHECKSUM_N:
case VDEV_PROP_CHECKSUM_T:
case VDEV_PROP_IO_N:
diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect_births.c b/sys/contrib/openzfs/module/zfs/vdev_indirect_births.c
index c0127829c26c..ab7069f44b37 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect_births.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect_births.c
@@ -147,7 +147,7 @@ vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
old_size = vdev_indirect_births_size_impl(vib);
dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
- &vibe, tx);
+ &vibe, tx, DMU_READ_NO_PREFETCH);
vib->vib_phys->vib_count++;
new_size = vdev_indirect_births_size_impl(vib);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect_mapping.c b/sys/contrib/openzfs/module/zfs/vdev_indirect_mapping.c
index 1515ddc1baa2..da90a8de016f 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect_mapping.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect_mapping.c
@@ -459,13 +459,14 @@ vdev_indirect_mapping_add_entries(vdev_indirect_mapping_t *vim,
dmu_write(vim->vim_objset, vim->vim_object,
vim->vim_phys->vimp_num_entries * sizeof (*mapbuf),
i * sizeof (*mapbuf),
- mapbuf, tx);
+ mapbuf, tx, DMU_READ_NO_PREFETCH);
if (vim->vim_havecounts) {
dmu_write(vim->vim_objset,
vim->vim_phys->vimp_counts_object,
vim->vim_phys->vimp_num_entries *
sizeof (*countbuf),
- i * sizeof (*countbuf), countbuf, tx);
+ i * sizeof (*countbuf), countbuf, tx,
+ DMU_READ_NO_PREFETCH);
}
vim->vim_phys->vimp_num_entries += i;
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_initialize.c b/sys/contrib/openzfs/module/zfs/vdev_initialize.c
index 27188c46e561..d13da1e5a663 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_initialize.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_initialize.c
@@ -685,7 +685,7 @@ vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list)
(void) spa;
vdev_t *vd;
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_export_thread == curthread);
while ((vd = list_remove_head(vd_list)) != NULL) {
@@ -728,7 +728,7 @@ vdev_initialize_stop(vdev_t *vd, vdev_initializing_state_t tgt_state,
if (vd_list == NULL) {
vdev_initialize_stop_wait_impl(vd);
} else {
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
vd->vdev_spa->spa_export_thread == curthread);
list_insert_tail(vd_list, vd);
}
@@ -761,7 +761,7 @@ vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
spa_t *spa = vd->vdev_spa;
list_t vd_list;
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_export_thread == curthread);
list_create(&vd_list, sizeof (vdev_t),
@@ -781,7 +781,7 @@ vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
void
vdev_initialize_restart(vdev_t *vd)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
vd->vdev_spa->spa_load_thread == curthread);
ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c
index 0d4fdaa77ba0..7e222eac5edc 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_label.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_label.c
@@ -862,8 +862,8 @@ retry:
}
}
- if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) {
- flags |= ZIO_FLAG_TRYHARD;
+ if (config == NULL && !(flags & ZIO_FLAG_IO_RETRY)) {
+ flags |= ZIO_FLAG_IO_RETRY;
goto retry;
}
@@ -1079,7 +1079,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
size_t buflen;
int error;
uint64_t spare_guid = 0, l2cache_guid = 0;
- int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_TRYHARD;
boolean_t reason_spare = (reason == VDEV_LABEL_SPARE || (reason ==
VDEV_LABEL_REMOVE && vd->vdev_isspare));
boolean_t reason_l2cache = (reason == VDEV_LABEL_L2CACHE || (reason ==
@@ -1223,7 +1224,6 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
/*
* Write everything in parallel.
*/
-retry:
zio = zio_root(spa, NULL, NULL, flags);
for (int l = 0; l < VDEV_LABELS; l++) {
@@ -1248,11 +1248,6 @@ retry:
error = zio_wait(zio);
- if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
- flags |= ZIO_FLAG_TRYHARD;
- goto retry;
- }
-
nvlist_free(label);
abd_free(bootenv);
abd_free(ub_abd);
@@ -1398,7 +1393,8 @@ vdev_label_write_bootenv(vdev_t *vd, nvlist_t *env)
zio_t *zio;
spa_t *spa = vd->vdev_spa;
vdev_boot_envblock_t *bootenv;
- int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_TRYHARD;
int error;
size_t nvsize;
char *nvbuf;
@@ -1466,7 +1462,6 @@ vdev_label_write_bootenv(vdev_t *vd, nvlist_t *env)
return (SET_ERROR(error));
}
-retry:
zio = zio_root(spa, NULL, NULL, flags);
for (int l = 0; l < VDEV_LABELS; l++) {
vdev_label_write(zio, vd, l, abd,
@@ -1475,10 +1470,6 @@ retry:
}
error = zio_wait(zio);
- if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
- flags |= ZIO_FLAG_TRYHARD;
- goto retry;
- }
abd_free(abd);
return (error);
@@ -2056,13 +2047,13 @@ retry:
* Normally, we don't want to try too hard to write every label and
* uberblock. If there is a flaky disk, we don't want the rest of the
* sync process to block while we retry. But if we can't write a
- * single label out, we should retry with ZIO_FLAG_TRYHARD before
+ * single label out, we should retry with ZIO_FLAG_IO_RETRY before
* bailing out and declaring the pool faulted.
*/
if (error != 0) {
- if ((flags & ZIO_FLAG_TRYHARD) != 0)
+ if ((flags & ZIO_FLAG_IO_RETRY) != 0)
return (error);
- flags |= ZIO_FLAG_TRYHARD;
+ flags |= ZIO_FLAG_IO_RETRY;
}
ASSERT(ub->ub_txg <= txg);
@@ -2113,7 +2104,7 @@ retry:
* are committed to stable storage before the uberblock update.
*/
if ((error = vdev_label_sync_list(spa, 0, txg, flags)) != 0) {
- if ((flags & ZIO_FLAG_TRYHARD) != 0) {
+ if ((flags & ZIO_FLAG_IO_RETRY) != 0) {
zfs_dbgmsg("vdev_label_sync_list() returned error %d "
"for pool '%s' when syncing out the even labels "
"of dirty vdevs", error, spa_name(spa));
@@ -2137,7 +2128,7 @@ retry:
* to the new uberblocks.
*/
if ((error = vdev_uberblock_sync_list(svd, svdcount, ub, flags)) != 0) {
- if ((flags & ZIO_FLAG_TRYHARD) != 0) {
+ if ((flags & ZIO_FLAG_IO_RETRY) != 0) {
zfs_dbgmsg("vdev_uberblock_sync_list() returned error "
"%d for pool '%s'", error, spa_name(spa));
}
@@ -2158,7 +2149,7 @@ retry:
* stable storage before the next transaction group begins.
*/
if ((error = vdev_label_sync_list(spa, 1, txg, flags)) != 0) {
- if ((flags & ZIO_FLAG_TRYHARD) != 0) {
+ if ((flags & ZIO_FLAG_IO_RETRY) != 0) {
zfs_dbgmsg("vdev_label_sync_list() returned error %d "
"for pool '%s' when syncing out the odd labels of "
"dirty vdevs", error, spa_name(spa));
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
index 56b8e3b60b22..5fe70ec2b1d5 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
@@ -4872,7 +4872,7 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr)
else
vre->vre_offset = RRSS_GET_OFFSET(&spa->spa_ubsync);
- /* Reflow the begining portion using the scratch area */
+ /* Reflow the beginning portion using the scratch area */
if (vre->vre_offset == 0) {
VERIFY0(dsl_sync_task(spa_name(spa),
NULL, raidz_reflow_scratch_sync,
diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
index 47b3b9921abe..30be1f851eb3 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
@@ -1079,7 +1079,7 @@ vdev_rebuild_restart_impl(vdev_t *vd)
void
vdev_rebuild_restart(spa_t *spa)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_load_thread == curthread);
vdev_rebuild_restart_impl(spa->spa_root_vdev);
@@ -1094,7 +1094,7 @@ vdev_rebuild_stop_wait(vdev_t *vd)
{
spa_t *spa = vd->vdev_spa;
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_export_thread == curthread);
if (vd == spa->spa_root_vdev) {
diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c
index abb71543e3ab..81e6ecb68ff1 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_removal.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c
@@ -309,12 +309,12 @@ spa_vdev_noalloc(spa_t *spa, uint64_t guid)
uint64_t txg;
int error = 0;
- ASSERT(!MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(!spa_namespace_held());
ASSERT(spa_writeable(spa));
txg = spa_vdev_enter(spa);
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
vd = spa_lookup_by_guid(spa, guid, B_FALSE);
@@ -342,12 +342,12 @@ spa_vdev_alloc(spa_t *spa, uint64_t guid)
uint64_t txg;
int error = 0;
- ASSERT(!MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(!spa_namespace_held());
ASSERT(spa_writeable(spa));
txg = spa_vdev_enter(spa);
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
vd = spa_lookup_by_guid(spa, guid, B_FALSE);
@@ -2085,7 +2085,7 @@ vdev_remove_make_hole_and_free(vdev_t *vd)
spa_t *spa = vd->vdev_spa;
vdev_t *rvd = spa->spa_root_vdev;
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
vdev_free(vd);
@@ -2113,7 +2113,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
ASSERT(vd->vdev_islog);
ASSERT(vd == vd->vdev_top);
ASSERT0P(vd->vdev_log_mg);
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
/*
* Stop allocating from this vdev.
@@ -2140,7 +2140,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
* spa_namespace_lock held. Once this completes the device
* should no longer have any blocks allocated on it.
*/
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
if (vd->vdev_stat.vs_alloc != 0)
error = spa_reset_logs(spa);
@@ -2189,7 +2189,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
sysevent_t *ev = spa_event_create(spa, vd, NULL,
ESC_ZFS_VDEV_REMOVE_DEV);
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
/* The top ZAP should have been destroyed by vdev_remove_empty. */
@@ -2433,7 +2433,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
uint64_t txg = 0;
uint_t nspares, nl2cache;
int error = 0, error_log;
- boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
+ boolean_t locked = spa_namespace_held();
sysevent_t *ev = NULL;
const char *vd_type = NULL;
char *vd_path = NULL;
@@ -2443,7 +2443,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
if (!locked)
txg = spa_vdev_enter(spa);
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) {
error = (spa_has_checkpoint(spa)) ?
ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT;
diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c
index eee18b367909..a97f6650a81c 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_trim.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c
@@ -1045,7 +1045,7 @@ vdev_trim_stop_wait(spa_t *spa, list_t *vd_list)
(void) spa;
vdev_t *vd;
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_export_thread == curthread);
while ((vd = list_remove_head(vd_list)) != NULL) {
@@ -1085,7 +1085,7 @@ vdev_trim_stop(vdev_t *vd, vdev_trim_state_t tgt_state, list_t *vd_list)
if (vd_list == NULL) {
vdev_trim_stop_wait_impl(vd);
} else {
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
vd->vdev_spa->spa_export_thread == curthread);
list_insert_tail(vd_list, vd);
}
@@ -1122,7 +1122,7 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
list_t vd_list;
vdev_t *vd_l2cache;
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_export_thread == curthread);
list_create(&vd_list, sizeof (vdev_t),
@@ -1156,7 +1156,7 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state)
void
vdev_trim_restart(vdev_t *vd)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
vd->vdev_spa->spa_load_thread == curthread);
ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
@@ -1582,7 +1582,7 @@ vdev_autotrim_stop_all(spa_t *spa)
void
vdev_autotrim_restart(spa_t *spa)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
+ ASSERT(spa_namespace_held() ||
spa->spa_load_thread == curthread);
if (spa->spa_autotrim)
vdev_autotrim(spa);
@@ -1689,7 +1689,7 @@ vdev_trim_l2arc_thread(void *arg)
void
vdev_trim_l2arc(spa_t *spa)
{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
/*
* Locate the spa's l2arc devices and kick off TRIM threads.
diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c
index ea4e3117a8b9..7e9e625a193e 100644
--- a/sys/contrib/openzfs/module/zfs/zap_micro.c
+++ b/sys/contrib/openzfs/module/zfs/zap_micro.c
@@ -625,12 +625,10 @@ zap_lockdir_impl(dnode_t *dn, dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
ASSERT0(db->db_offset);
objset_t *os = dmu_buf_get_objset(db);
uint64_t obj = db->db_object;
- dmu_object_info_t doi;
*zapp = NULL;
- dmu_object_info_from_dnode(dn, &doi);
- if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
+ if (DMU_OT_BYTESWAP(dn->dn_type) != DMU_BSWAP_ZAP)
return (SET_ERROR(EINVAL));
zap_t *zap = dmu_buf_get_user(db);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_fm.c b/sys/contrib/openzfs/module/zfs/zfs_fm.c
index 221f24e381dc..4a0d41c24eed 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_fm.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c
@@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
case VDEV_PROP_IO_T:
propval = vd->vdev_io_t;
break;
+ case VDEV_PROP_SLOW_IO_EVENTS:
+ propval = vd->vdev_slow_io_events;
+ break;
case VDEV_PROP_SLOW_IO_N:
propval = vd->vdev_slow_io_n;
break;
@@ -1580,10 +1583,10 @@ zfs_ereport_zvol_post(const char *subclass, const char *name,
nvlist_t *aux;
char *r;
- boolean_t locked = mutex_owned(&spa_namespace_lock);
- if (!locked) mutex_enter(&spa_namespace_lock);
+ boolean_t locked = spa_namespace_held();
+ if (!locked) spa_namespace_enter(FTAG);
spa_t *spa = spa_lookup(name);
- if (!locked) mutex_exit(&spa_namespace_lock);
+ if (!locked) spa_namespace_exit(FTAG);
if (spa == NULL)
return;
diff --git a/sys/contrib/openzfs/module/zfs/zfs_fuid.c b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
index 2af1efe82e62..aa10741ba870 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_fuid.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
@@ -28,8 +28,8 @@
#include <sys/avl.h>
#include <sys/zap.h>
#include <sys/nvpair.h>
-#ifdef _KERNEL
#include <sys/sid.h>
+#ifdef _KERNEL
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
#endif
@@ -268,7 +268,7 @@ zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
nvlist_free(nvp);
zfsvfs->z_fuid_size = nvsize;
dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
- zfsvfs->z_fuid_size, packed, tx);
+ zfsvfs->z_fuid_size, packed, tx, DMU_READ_NO_PREFETCH);
kmem_free(packed, zfsvfs->z_fuid_size);
VERIFY0(dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, FTAG, &db));
dmu_buf_will_dirty(db, tx);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index 5ca7c2320c4e..1b2392aeaa85 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -212,6 +212,8 @@
#include <sys/vdev_impl.h>
#include <sys/vdev_initialize.h>
#include <sys/vdev_trim.h>
+#include <sys/brt.h>
+#include <sys/ddt.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
@@ -3122,12 +3124,12 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc)
if (pair != NULL && strcmp(nvpair_name(pair),
zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
nvlist_next_nvpair(props, pair) == NULL) {
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
if ((spa = spa_lookup(zc->zc_name)) != NULL) {
spa_configfile_set(spa, props, B_FALSE);
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
}
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
if (spa != NULL) {
nvlist_free(props);
return (0);
@@ -3176,14 +3178,14 @@ zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
* get (such as altroot and cachefile), so attempt to get them
* anyway.
*/
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
if ((spa = spa_lookup(pool)) != NULL) {
error = spa_prop_get(spa, outnvl);
if (error == 0 && props != NULL)
error = spa_prop_get_nvlist(spa, props, n_props,
outnvl);
}
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
} else {
error = spa_prop_get(spa, outnvl);
if (error == 0 && props != NULL)
@@ -4276,13 +4278,11 @@ zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
spa_t *spa;
int32_t type;
- /*
- * Currently, only ZPOOL_PREFETCH_DDT is supported
- */
- if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0 ||
- type != ZPOOL_PREFETCH_DDT) {
+ if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0)
+ return (EINVAL);
+
+ if (type != ZPOOL_PREFETCH_DDT && type != ZPOOL_PREFETCH_BRT)
return (EINVAL);
- }
error = spa_open(poolname, &spa, FTAG);
if (error != 0)
@@ -4290,10 +4290,17 @@ zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
hrtime_t start_time = gethrtime();
- ddt_prefetch_all(spa);
-
- zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", spa->spa_name,
- (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
+ if (type == ZPOOL_PREFETCH_DDT) {
+ ddt_prefetch_all(spa);
+ zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms",
+ spa->spa_name,
+ (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
+ } else {
+ brt_prefetch_all(spa);
+ zfs_dbgmsg("pool '%s': loaded brt into ARC in %llu ms",
+ spa->spa_name,
+ (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
+ }
spa_close(spa, FTAG);
@@ -6121,10 +6128,10 @@ zfs_ioc_clear(zfs_cmd_t *zc)
/*
* On zpool clear we also fix up missing slogs
*/
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
spa = spa_lookup(zc->zc_name);
if (spa == NULL) {
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (SET_ERROR(EIO));
}
if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
@@ -6132,7 +6139,7 @@ zfs_ioc_clear(zfs_cmd_t *zc)
spa_set_log_state(spa, SPA_LOG_CLEAR);
}
spa->spa_last_open_failed = 0;
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
if (zc->zc_cookie & ZPOOL_NO_REWIND) {
error = spa_open(zc->zc_name, &spa, FTAG);
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index aeea58bedfe4..74373f759cec 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -3318,8 +3318,8 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
} else if (any_failed && candidate > SPA_OLD_GANGBLOCKSIZE &&
spa_feature_is_enabled(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER) &&
!spa_feature_is_active(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER)) {
- dmu_tx_t *tx =
- dmu_tx_create_assigned(spa->spa_dsl_pool, txg + 1);
+ dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool,
+ MAX(txg, spa_syncing_txg(spa) + 1));
dsl_sync_task_nowait(spa->spa_dsl_pool,
zio_update_feature,
(void *)SPA_FEATURE_DYNAMIC_GANG_HEADER, tx);
@@ -5569,9 +5569,12 @@ zio_done(zio_t *zio)
zio->io_vd->vdev_stat.vs_slow_ios++;
mutex_exit(&zio->io_vd->vdev_stat_lock);
- (void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
- zio->io_spa, zio->io_vd, &zio->io_bookmark,
- zio, 0);
+ if (zio->io_vd->vdev_slow_io_events) {
+ (void) zfs_ereport_post(
+ FM_EREPORT_ZFS_DELAY,
+ zio->io_spa, zio->io_vd,
+ &zio->io_bookmark, zio, 0);
+ }
}
}
}
diff --git a/sys/contrib/openzfs/module/zfs/zio_inject.c b/sys/contrib/openzfs/module/zfs/zio_inject.c
index 287577018ed1..c3adfdab54ce 100644
--- a/sys/contrib/openzfs/module/zfs/zio_inject.c
+++ b/sys/contrib/openzfs/module/zfs/zio_inject.c
@@ -1008,9 +1008,9 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
if (zio_pool_handler_exists(name, record->zi_cmd))
return (SET_ERROR(EEXIST));
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
boolean_t has_spa = spa_lookup(name) != NULL;
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
if (record->zi_cmd == ZINJECT_DELAY_IMPORT && has_spa)
return (SET_ERROR(EEXIST));
@@ -1095,7 +1095,7 @@ zio_inject_list_next(int *id, char *name, size_t buflen,
inject_handler_t *handler;
int ret;
- mutex_enter(&spa_namespace_lock);
+ spa_namespace_enter(FTAG);
rw_enter(&inject_lock, RW_READER);
for (handler = list_head(&inject_handlers); handler != NULL;
@@ -1117,7 +1117,7 @@ zio_inject_list_next(int *id, char *name, size_t buflen,
}
rw_exit(&inject_lock);
- mutex_exit(&spa_namespace_lock);
+ spa_namespace_exit(FTAG);
return (ret);
}
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index 00f98168d3d8..407758641580 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -547,7 +547,8 @@ zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap)
if (error) {
dmu_tx_abort(tx);
} else {
- dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
+ dmu_write(os, ZVOL_OBJ, offset, length, data, tx,
+ DMU_READ_PREFETCH);
(void) zil_replaying(zv->zv_zilog, tx);
dmu_tx_commit(tx);
}
@@ -1232,7 +1233,7 @@ zvol_first_open(zvol_state_t *zv, boolean_t readonly)
ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
- ASSERT(mutex_owned(&spa_namespace_lock));
+ ASSERT(spa_namespace_held());
boolean_t ro = (readonly || (strchr(zv->zv_name, '@') != NULL));
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, ro, B_TRUE, zv, &os);
@@ -1302,7 +1303,7 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg)
list_t *minors_list = j->list;
const char *name = j->name;
- ASSERT0(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT0(spa_namespace_held());
/* skip the designated dataset */
if (name && strcmp(dsname, name) == 0)
@@ -1402,7 +1403,7 @@ zvol_create_minors_cb(const char *dsname, void *arg)
int error;
list_t *minors_list = arg;
- ASSERT0(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT0(spa_namespace_held());
error = dsl_prop_get_integer(dsname, "snapdev", &snapdev, NULL);
if (error)
diff --git a/sys/contrib/openzfs/module/zstd/include/aarch64_compat.h b/sys/contrib/openzfs/module/zstd/include/aarch64_compat.h
deleted file mode 100644
index 9500a832b81c..000000000000
--- a/sys/contrib/openzfs/module/zstd/include/aarch64_compat.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: BSD-3-Clause
-/*
- * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright (c) 2018-2020, Sebastian Gottschall
- */
-
-#ifdef _KERNEL
-#undef __aarch64__
-#endif
diff --git a/sys/contrib/openzfs/module/zstd/lib/common/compiler.h b/sys/contrib/openzfs/module/zstd/lib/common/compiler.h
index d0f588e2ec3c..c8d65a201212 100644
--- a/sys/contrib/openzfs/module/zstd/lib/common/compiler.h
+++ b/sys/contrib/openzfs/module/zstd/lib/common/compiler.h
@@ -115,9 +115,6 @@
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
-# elif defined(__aarch64__)
-# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
-# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
diff --git a/sys/contrib/openzfs/module/zstd/lib/common/zstd_internal.h b/sys/contrib/openzfs/module/zstd/lib/common/zstd_internal.h
index 6b1fc44cf9f6..9650af77bcea 100644
--- a/sys/contrib/openzfs/module/zstd/lib/common/zstd_internal.h
+++ b/sys/contrib/openzfs/module/zstd/lib/common/zstd_internal.h
@@ -12,6 +12,15 @@
#ifndef ZSTD_CCOMMON_H_MODULE
#define ZSTD_CCOMMON_H_MODULE
+/*
+ * Disable the aarch64 NEON SIMD intrinsics for kernel builds. Safely
+ * using them in the kernel context requires saving/restoring the FPU
+ * registers which is not currently done.
+ */
+#ifdef _KERNEL
+#define ZSTD_NO_INTRINSICS
+#endif
+
/* this module contains definitions which must be identical
* across compression, decompression and dictBuilder.
* It also contains a few functions useful to at least 2 of them