diff options
Diffstat (limited to 'sys/contrib/openzfs/module')
53 files changed, 701 insertions, 516 deletions
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in index 58a80dc4402c..95313c984178 100644 --- a/sys/contrib/openzfs/module/Kbuild.in +++ b/sys/contrib/openzfs/module/Kbuild.in @@ -293,10 +293,9 @@ ZSTD_UPSTREAM_OBJS := \ zfs-objs += $(addprefix zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) -# Disable aarch64 neon SIMD instructions for kernel mode $(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -I$(zstd_include) $(ZFS_ZSTD_FLAGS) $(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : asflags-y += -I$(zstd_include) -$(addprefix $(obj)/zstd/,$(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -include $(zstd_include)/aarch64_compat.h -include $(zstd_include)/zstd_compat_wrapper.h -Wp,-w +$(addprefix $(obj)/zstd/,$(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -include $(zstd_include)/zstd_compat_wrapper.h -Wp,-w $(obj)/zstd/zfs_zstd.o : ccflags-y += -include $(zstd_include)/zstd_compat_wrapper.h diff --git a/sys/contrib/openzfs/module/Makefile.bsd b/sys/contrib/openzfs/module/Makefile.bsd index 3ba38c43f25b..c20fdc0c483b 100644 --- a/sys/contrib/openzfs/module/Makefile.bsd +++ b/sys/contrib/openzfs/module/Makefile.bsd @@ -521,30 +521,6 @@ CFLAGS.zstd_ldm.c= -U__BMI__ -fno-tree-vectorize ${NO_WBITWISE_INSTEAD_OF_LOGICA CFLAGS.zstd_opt.c= -U__BMI__ -fno-tree-vectorize ${NO_WBITWISE_INSTEAD_OF_LOGICAL} .if ${MACHINE_ARCH} == "aarch64" -__ZFS_ZSTD_AARCH64_FLAGS= -include ${SRCDIR}/zstd/include/aarch64_compat.h -CFLAGS.zstd.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.entropy_common.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.error_private.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.fse_compress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.fse_decompress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.hist.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.huf_compress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.huf_decompress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.pool.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.xxhash.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_common.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_compress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_compress_literals.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_compress_sequences.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_compress_superblock.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_ddict.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_decompress.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_decompress_block.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_double_fast.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_fast.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_lazy.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_ldm.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} -CFLAGS.zstd_opt.c+= ${__ZFS_ZSTD_AARCH64_FLAGS} sha256-armv8.o: sha256-armv8.S ${CC} -c ${CFLAGS:N-mgeneral-regs-only} ${WERROR} ${.IMPSRC} \ diff --git a/sys/contrib/openzfs/module/icp/spi/kcf_spi.c b/sys/contrib/openzfs/module/icp/spi/kcf_spi.c index 806c0b028017..35fe55b2595d 100644 --- a/sys/contrib/openzfs/module/icp/spi/kcf_spi.c +++ b/sys/contrib/openzfs/module/icp/spi/kcf_spi.c @@ -31,7 +31,6 @@ */ -#include <sys/zfs_context.h> #include <sys/crypto/common.h> #include <sys/crypto/impl.h> #include <sys/crypto/sched_impl.h> diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c index 54d4029c5e6f..b92be3710f3c 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_uio.c @@ -238,7 +238,7 @@ zfs_uio_iov_step(struct iovec v, zfs_uio_t *uio, int *numpages) zfs_uio_rw(uio), &uio->uio_dio.pages[uio->uio_dio.npages]); if (res != n) - return (SET_ERROR(EFAULT)); + return (EFAULT); ASSERT3U(len, ==, res * PAGE_SIZE); *numpages = res; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c index 26cc7981bfcd..1990ec677d37 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c @@ -76,7 +76,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, return (0); err = dmu_buf_hold_array(os, object, offset, size, - FALSE, FTAG, &numbufs, &dbp); + FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH); if (err) return (err); @@ -147,7 +147,8 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, ASSERT3S(last_size, <=, PAGE_SIZE); err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), - IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); + IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp, + DMU_READ_PREFETCH); if (err != 0) return (err); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c index 2d04ccf95fbf..d918b26521a7 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/spa_os.c @@ -193,7 +193,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind) */ config = spa_generate_rootconf(name); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); if (config != NULL) { pname = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME); VERIFY0(strcmp(name, pname)); @@ -204,7 +204,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind) * e.g., after reboot -r. */ if (spa->spa_state == POOL_STATE_ACTIVE) { - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); fnvlist_free(config); return (0); } @@ -226,7 +226,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind) &spa->spa_ubsync.ub_version) != 0) spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; } else if ((spa = spa_lookup(name)) == NULL) { - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); fnvlist_free(config); cmn_err(CE_NOTE, "Cannot find the pool label for '%s'", name); @@ -249,7 +249,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind) VDEV_ALLOC_ROOTPOOL); spa_config_exit(spa, SCL_ALL, FTAG); if (error) { - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); fnvlist_free(config); cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", name); @@ -259,7 +259,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind) spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_free(rvd); spa_config_exit(spa, SCL_ALL, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); fnvlist_free(config); return (0); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c index 11e93b800a54..9663f05cb354 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_label_os.c @@ -42,7 +42,8 @@ vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size) spa_t *spa = vd->vdev_spa; zio_t *zio; abd_t *pad2; - int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_TRYHARD; int error; if (size > VDEV_PAD_SIZE) @@ -59,16 +60,11 @@ vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size) abd_copy_from_buf(pad2, buf, size); abd_zero_off(pad2, size, VDEV_PAD_SIZE - size); -retry: zio = zio_root(spa, NULL, NULL, flags); vdev_label_write(zio, vd, 0, pad2, offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE, NULL, NULL, flags); error = zio_wait(zio); - if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { - flags |= ZIO_FLAG_TRYHARD; - goto retry; - } abd_free(pad2); return (error); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c index cb5787269db2..c98ccd756405 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c @@ -1262,7 +1262,8 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) if (aclnode->z_ace_count == 0) continue; dmu_write(zfsvfs->z_os, aoid, off, - aclnode->z_size, aclnode->z_acldata, tx); + aclnode->z_size, aclnode->z_acldata, tx, + DMU_READ_NO_PREFETCH); off += aclnode->z_size; } } else { diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c index dcdefae56639..29711fcf5d2c 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ioctl_os.c @@ -108,11 +108,11 @@ zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) "command", &command) != 0) return (EINVAL); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa = spa_by_guid(pool_guid, vdev_guid); if (spa != NULL) strcpy(name, spa_name(spa)); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); if (spa == NULL) return (ENOENT); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index f34a2fd37a77..8a9d23d0d554 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -278,7 +278,7 @@ zfs_ioctl_getxattr(vnode_t *vp, zfsxattr_t *fsx) memset(fsx, 0, sizeof (*fsx)); fsx->fsx_xflags = (zp->z_pflags & ZFS_PROJINHERIT) ? - ZFS_PROJINHERIT_FL : 0; + FS_PROJINHERIT_FL : 0; fsx->fsx_projid = zp->z_projid; return (0); @@ -290,7 +290,7 @@ zfs_ioctl_setflags(vnode_t *vp, uint32_t ioctl_flags, xvattr_t *xva) uint64_t zfs_flags = VTOZ(vp)->z_pflags; xoptattr_t *xoap; - if (ioctl_flags & ~(ZFS_PROJINHERIT_FL)) + if (ioctl_flags & ~(FS_PROJINHERIT_FL)) return (SET_ERROR(EOPNOTSUPP)); xva_init(xva); @@ -304,7 +304,7 @@ zfs_ioctl_setflags(vnode_t *vp, uint32_t ioctl_flags, xvattr_t *xva) } \ } while (0) - FLAG_CHANGE(ZFS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT, + FLAG_CHANGE(FS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT, xoap->xoa_projinherit); #undef FLAG_CHANGE @@ -4479,7 +4479,8 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, for (i = 0; wlen > 0; woff += tocopy, wlen -= tocopy, i++) { tocopy = MIN(PAGE_SIZE, wlen); va = zfs_map_page(ma[i], &sf); - dmu_write(zfsvfs->z_os, zp->z_id, woff, tocopy, va, tx); + dmu_write(zfsvfs->z_os, zp->z_id, woff, tocopy, va, tx, + DMU_READ_PREFETCH); zfs_unmap_page(sf); } } else { @@ -5757,7 +5758,7 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap) { ulong_t val; int error; -#ifdef _PC_CLONE_BLKSIZE +#if defined(_PC_CLONE_BLKSIZE) || defined(_PC_CASE_INSENSITIVE) zfsvfs_t *zfsvfs; #endif @@ -5821,6 +5822,15 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap) *ap->a_retval = 0; return (0); #endif +#ifdef _PC_CASE_INSENSITIVE + case _PC_CASE_INSENSITIVE: + zfsvfs = (zfsvfs_t *)ap->a_vp->v_mount->mnt_data; + if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) + *ap->a_retval = 1; + else + *ap->a_retval = 0; + return (0); +#endif default: return (vop_stdpathconf(ap)); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c index 3ddbfcb97184..dc30f6dd939c 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c @@ -283,8 +283,8 @@ retry: * Take spa_namespace_lock to prevent lock inversion when * zvols from one pool are opened as vdevs in another. */ - if (!mutex_owned(&spa_namespace_lock)) { - if (!mutex_tryenter(&spa_namespace_lock)) { + if (!spa_namespace_held()) { + if (!spa_namespace_tryenter(FTAG)) { mutex_exit(&zv->zv_state_lock); rw_exit(&zv->zv_suspend_lock); drop_suspend = B_FALSE; @@ -296,7 +296,7 @@ retry: } err = zvol_first_open(zv, !(flag & FWRITE)); if (drop_namespace) - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); if (err) goto out_locked; pp->mediasize = zv->zv_volsize; @@ -963,8 +963,8 @@ retry: * Take spa_namespace_lock to prevent lock inversion when * zvols from one pool are opened as vdevs in another. */ - if (!mutex_owned(&spa_namespace_lock)) { - if (!mutex_tryenter(&spa_namespace_lock)) { + if (!spa_namespace_held()) { + if (!spa_namespace_tryenter(FTAG)) { mutex_exit(&zv->zv_state_lock); rw_exit(&zv->zv_suspend_lock); drop_suspend = B_FALSE; @@ -976,7 +976,7 @@ retry: } err = zvol_first_open(zv, !(flags & FWRITE)); if (drop_namespace) - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); if (err) goto out_locked; } diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c index 092f090d934b..00ff789265c6 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c @@ -32,7 +32,6 @@ #include <sys/taskq.h> #include <sys/kmem.h> #include <sys/tsd.h> -#include <sys/trace_spl.h> #include <sys/time.h> #include <sys/atomic.h> #include <sys/kstat.h> @@ -325,7 +324,6 @@ task_expire_impl(taskq_ent_t *t) } t->tqent_birth = jiffies; - DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t); /* * The priority list must be maintained in strict task id order @@ -713,9 +711,7 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) t->tqent_taskq = tq; t->tqent_timer.function = NULL; t->tqent_timer.expires = 0; - t->tqent_birth = jiffies; - DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t); ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC)); @@ -840,9 +836,7 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, t->tqent_func = func; t->tqent_arg = arg; t->tqent_taskq = tq; - t->tqent_birth = jiffies; - DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t); spin_unlock(&t->tqent_lock); @@ -1054,11 +1048,6 @@ taskq_thread(void *args) * A TQENT_FLAG_PREALLOC task may be reused or freed * during the task function call. Store tqent_id and * tqent_flags here. - * - * Also use an on stack taskq_ent_t for tqt_task - * assignment in this case; we want to make sure - * to duplicate all fields, so the values are - * correct when it's accessed via DTRACE_PROBE*. */ tqt->tqt_id = t->tqent_id; tqt->tqt_flags = t->tqent_flags; @@ -1074,13 +1063,10 @@ taskq_thread(void *args) spin_unlock_irqrestore(&tq->tq_lock, flags); TQSTAT_INC(tq, threads_active); - DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t); /* Perform the requested task */ t->tqent_func(t->tqent_arg); - DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t); - TQSTAT_DEC(tq, threads_active); if ((t->tqent_flags & TQENT_LIST_MASK) == TQENT_LIST_PENDING) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c index 934d74a112fd..4c929a4642b1 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c @@ -1447,7 +1447,8 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) if (aclnode->z_ace_count == 0) continue; dmu_write(zfsvfs->z_os, aoid, off, - aclnode->z_size, aclnode->z_acldata, tx); + aclnode->z_size, aclnode->z_acldata, tx, + DMU_READ_NO_PREFETCH); off += aclnode->z_size; } } else { diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c index e845ad69ad78..02465adf36d5 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c @@ -3892,7 +3892,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, va = kmap(pp); ASSERT3U(pglen, <=, PAGE_SIZE); - dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx); + dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx, + DMU_READ_PREFETCH); kunmap(pp); SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c index 02965ac8cbee..f7691c02d163 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c @@ -811,28 +811,44 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice) return (error); } -#define ZFS_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL) -#define ZFS_FL_USER_MODIFIABLE (FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL) +#define ZFS_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL) +#define ZFS_FL_USER_MODIFIABLE (FS_FL_USER_MODIFIABLE | FS_PROJINHERIT_FL) + + +static struct { + uint64_t zfs_flag; + uint32_t fs_flag; + uint32_t xflag; +} flags_lookup[] = { + {ZFS_IMMUTABLE, FS_IMMUTABLE_FL, FS_XFLAG_IMMUTABLE}, + {ZFS_APPENDONLY, FS_APPEND_FL, FS_XFLAG_APPEND}, + {ZFS_NODUMP, FS_NODUMP_FL, FS_XFLAG_NODUMP}, + {ZFS_PROJINHERIT, FS_PROJINHERIT_FL, FS_XFLAG_PROJINHERIT} +}; static uint32_t __zpl_ioctl_getflags(struct inode *ip) { uint64_t zfs_flags = ITOZ(ip)->z_pflags; uint32_t ioctl_flags = 0; + for (int i = 0; i < ARRAY_SIZE(flags_lookup); i++) + if (zfs_flags & flags_lookup[i].zfs_flag) + ioctl_flags |= flags_lookup[i].fs_flag; - if (zfs_flags & ZFS_IMMUTABLE) - ioctl_flags |= FS_IMMUTABLE_FL; - - if (zfs_flags & ZFS_APPENDONLY) - ioctl_flags |= FS_APPEND_FL; + return (ioctl_flags); +} - if (zfs_flags & ZFS_NODUMP) - ioctl_flags |= FS_NODUMP_FL; +static uint32_t +__zpl_ioctl_getxflags(struct inode *ip) +{ + uint64_t zfs_flags = ITOZ(ip)->z_pflags; + uint32_t ioctl_flags = 0; - if (zfs_flags & ZFS_PROJINHERIT) - ioctl_flags |= ZFS_PROJINHERIT_FL; + for (int i = 0; i < ARRAY_SIZE(flags_lookup); i++) + if (zfs_flags & flags_lookup[i].zfs_flag) + ioctl_flags |= flags_lookup[i].xflag; - return (ioctl_flags & ZFS_FL_USER_VISIBLE); + return (ioctl_flags); } /* @@ -846,6 +862,7 @@ zpl_ioctl_getflags(struct file *filp, void __user *arg) int err; flags = __zpl_ioctl_getflags(file_inode(filp)); + flags = flags & ZFS_FL_USER_VISIBLE; err = copy_to_user(arg, &flags, sizeof (flags)); return (err); @@ -869,7 +886,7 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva) xoptattr_t *xoap; if (ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | - ZFS_PROJINHERIT_FL)) + FS_PROJINHERIT_FL)) return (-EOPNOTSUPP); if (ioctl_flags & ~ZFS_FL_USER_MODIFIABLE) @@ -900,7 +917,51 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva) xoap->xoa_appendonly); FLAG_CHANGE(FS_NODUMP_FL, ZFS_NODUMP, XAT_NODUMP, xoap->xoa_nodump); - FLAG_CHANGE(ZFS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT, + FLAG_CHANGE(FS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT, + xoap->xoa_projinherit); + +#undef FLAG_CHANGE + + return (0); +} + +static int +__zpl_ioctl_setxflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva) +{ + uint64_t zfs_flags = ITOZ(ip)->z_pflags; + xoptattr_t *xoap; + + if (ioctl_flags & ~(FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND | + FS_XFLAG_NODUMP | FS_XFLAG_PROJINHERIT)) + return (-EOPNOTSUPP); + + if ((fchange(ioctl_flags, zfs_flags, FS_XFLAG_IMMUTABLE, + ZFS_IMMUTABLE) || + fchange(ioctl_flags, zfs_flags, FS_XFLAG_APPEND, ZFS_APPENDONLY)) && + !capable(CAP_LINUX_IMMUTABLE)) + return (-EPERM); + + if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip)) + return (-EACCES); + + xva_init(xva); + xoap = xva_getxoptattr(xva); + +#define FLAG_CHANGE(iflag, zflag, xflag, xfield) do { \ + if (((ioctl_flags & (iflag)) && !(zfs_flags & (zflag))) || \ + ((zfs_flags & (zflag)) && !(ioctl_flags & (iflag)))) { \ + XVA_SET_REQ(xva, (xflag)); \ + (xfield) = ((ioctl_flags & (iflag)) != 0); \ + } \ +} while (0) + + FLAG_CHANGE(FS_XFLAG_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, + xoap->xoa_immutable); + FLAG_CHANGE(FS_XFLAG_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, + xoap->xoa_appendonly); + FLAG_CHANGE(FS_XFLAG_NODUMP, ZFS_NODUMP, XAT_NODUMP, + xoap->xoa_nodump); + FLAG_CHANGE(FS_XFLAG_PROJINHERIT, ZFS_PROJINHERIT, XAT_PROJINHERIT, xoap->xoa_projinherit); #undef FLAG_CHANGE @@ -941,7 +1002,7 @@ zpl_ioctl_getxattr(struct file *filp, void __user *arg) struct inode *ip = file_inode(filp); int err; - fsx.fsx_xflags = __zpl_ioctl_getflags(ip); + fsx.fsx_xflags = __zpl_ioctl_getxflags(ip); fsx.fsx_projid = ITOZ(ip)->z_projid; err = copy_to_user(arg, &fsx, sizeof (fsx)); @@ -965,7 +1026,7 @@ zpl_ioctl_setxattr(struct file *filp, void __user *arg) if (!zpl_is_valid_projid(fsx.fsx_projid)) return (-EINVAL); - err = __zpl_ioctl_setflags(ip, fsx.fsx_xflags, &xva); + err = __zpl_ioctl_setxflags(ip, fsx.fsx_xflags, &xva); if (err) return (err); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index fe939150b641..89f9bc555fcf 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -809,8 +809,8 @@ retry: * the kernel so the only option is to return the error for * the caller to handle it. */ - if (!mutex_owned(&spa_namespace_lock)) { - if (!mutex_tryenter(&spa_namespace_lock)) { + if (!spa_namespace_held()) { + if (!spa_namespace_tryenter(FTAG)) { mutex_exit(&zv->zv_state_lock); rw_exit(&zv->zv_suspend_lock); drop_suspend = B_FALSE; @@ -834,7 +834,7 @@ retry: error = -zvol_first_open(zv, !(blk_mode_is_open_write(flag))); if (drop_namespace) - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } if (error == 0) { diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c index 07819ba2be8b..4826237b23e8 100644 --- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c +++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c @@ -481,6 +481,9 @@ vdev_prop_init(void) zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table, sfeatures); + zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events", + B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", + "SLOW_IO_EVENTS", boolean_table, sfeatures); /* hidden properties */ zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING, diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index dbb5e942e2e6..48bf99f1aeb7 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -8548,7 +8548,7 @@ l2arc_dev_get_next(void) * of cache devices (l2arc_dev_mtx). Once a device has been selected, * both locks will be dropped and a spa config lock held instead. */ - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); mutex_enter(&l2arc_dev_mtx); /* if there are no vdevs, there is nothing to do */ @@ -8591,7 +8591,7 @@ out: */ if (next != NULL) spa_config_enter(next->l2ad_spa, SCL_L2ARC, next, RW_READER); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (next); } @@ -10231,7 +10231,7 @@ l2arc_stop(void) void l2arc_spa_rebuild_start(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); /* * Locate the spa's l2arc devices and kick off rebuild threads. @@ -10256,7 +10256,7 @@ l2arc_spa_rebuild_start(spa_t *spa) void l2arc_spa_rebuild_stop(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_export_thread == curthread); for (int i = 0; i < spa->spa_l2cache.sav_count; i++) { diff --git a/sys/contrib/openzfs/module/zfs/bpobj.c b/sys/contrib/openzfs/module/zfs/bpobj.c index ea9fbd036c6e..afcb2374f824 100644 --- a/sys/contrib/openzfs/module/zfs/bpobj.c +++ b/sys/contrib/openzfs/module/zfs/bpobj.c @@ -752,7 +752,8 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) } dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), - numsubsub * sizeof (subobj), subdb->db_data, tx); + numsubsub * sizeof (subobj), subdb->db_data, tx, + DMU_READ_NO_PREFETCH); dmu_buf_rele(subdb, FTAG); bpo->bpo_phys->bpo_num_subobjs += numsubsub; @@ -777,7 +778,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) dmu_write(bpo->bpo_os, bpo->bpo_object, bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), numbps * sizeof (blkptr_t), - bps->db_data, tx); + bps->db_data, tx, DMU_READ_NO_PREFETCH); dmu_buf_rele(bps, FTAG); bpo->bpo_phys->bpo_num_blkptrs += numbps; @@ -794,7 +795,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), - sizeof (subobj), &subobj, tx); + sizeof (subobj), &subobj, tx, DMU_READ_NO_PREFETCH); bpo->bpo_phys->bpo_num_subobjs++; } diff --git a/sys/contrib/openzfs/module/zfs/bptree.c b/sys/contrib/openzfs/module/zfs/bptree.c index a98bba3eb259..1274278e8e91 100644 --- a/sys/contrib/openzfs/module/zfs/bptree.c +++ b/sys/contrib/openzfs/module/zfs/bptree.c @@ -137,7 +137,8 @@ bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg, bte = kmem_zalloc(sizeof (*bte), KM_SLEEP); bte->be_birth_txg = birth_txg; bte->be_bp = *bp; - dmu_write(os, obj, bt->bt_end * sizeof (*bte), sizeof (*bte), bte, tx); + dmu_write(os, obj, bt->bt_end * sizeof (*bte), sizeof (*bte), bte, tx, + DMU_READ_NO_PREFETCH); kmem_free(bte, sizeof (*bte)); dmu_buf_will_dirty(db, tx); @@ -247,7 +248,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, ZB_DESTROYED_OBJSET); ASSERT0(bte.be_zb.zb_level); dmu_write(os, obj, i * sizeof (bte), - sizeof (bte), &bte, tx); + sizeof (bte), &bte, tx, + DMU_READ_NO_PREFETCH); if (err == EIO || err == ECKSUM || err == ENXIO) { /* @@ -269,7 +271,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, */ bte.be_birth_txg = UINT64_MAX; dmu_write(os, obj, i * sizeof (bte), - sizeof (bte), &bte, tx); + sizeof (bte), &bte, tx, + DMU_READ_NO_PREFETCH); } if (!ioerr) { diff --git a/sys/contrib/openzfs/module/zfs/brt.c b/sys/contrib/openzfs/module/zfs/brt.c index 40664354aa73..08a6bd52ab31 100644 --- a/sys/contrib/openzfs/module/zfs/brt.c +++ b/sys/contrib/openzfs/module/zfs/brt.c @@ -260,8 +260,8 @@ static int brt_zap_prefetch = 1; #define BRT_DEBUG(...) do { } while (0) #endif -static int brt_zap_default_bs = 12; -static int brt_zap_default_ibs = 12; +static int brt_zap_default_bs = 13; +static int brt_zap_default_ibs = 13; static kstat_t *brt_ksp; @@ -454,6 +454,7 @@ brt_vdev_create(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) VERIFY(mos_entries != 0); VERIFY0(dnode_hold(spa->spa_meta_objset, mos_entries, brtvd, &brtvd->bv_mos_entries_dnode)); + dnode_set_storage_type(brtvd->bv_mos_entries_dnode, DMU_OT_DDT_ZAP); rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER); brtvd->bv_mos_entries = mos_entries; rw_exit(&brtvd->bv_mos_entries_lock); @@ -508,8 +509,8 @@ brt_vdev_realloc(spa_t *spa, brt_vdev_t *brtvd) size = (vdev_get_min_asize(vd) - 1) / spa->spa_brt_rangesize + 1; spa_config_exit(spa, SCL_VDEV, FTAG); - entcount = vmem_zalloc(sizeof (entcount[0]) * size, KM_SLEEP); nblocks = BRT_RANGESIZE_TO_NBLOCKS(size); + entcount = vmem_zalloc(nblocks * BRT_BLOCKSIZE, KM_SLEEP); bitmap = kmem_zalloc(BT_SIZEOFMAP(nblocks), KM_SLEEP); if (!brtvd->bv_initiated) { @@ -530,9 +531,8 @@ brt_vdev_realloc(spa_t *spa, brt_vdev_t *brtvd) memcpy(entcount, brtvd->bv_entcount, sizeof (entcount[0]) * MIN(size, brtvd->bv_size)); - vmem_free(brtvd->bv_entcount, - sizeof (entcount[0]) * brtvd->bv_size); onblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); + vmem_free(brtvd->bv_entcount, onblocks * BRT_BLOCKSIZE); memcpy(bitmap, brtvd->bv_bitmap, MIN(BT_SIZEOFMAP(nblocks), BT_SIZEOFMAP(onblocks))); kmem_free(brtvd->bv_bitmap, BT_SIZEOFMAP(onblocks)); @@ -581,13 +581,14 @@ brt_vdev_load(spa_t *spa, brt_vdev_t *brtvd) */ error = dmu_read(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0, MIN(brtvd->bv_size, bvphys->bvp_size) * sizeof (uint16_t), - brtvd->bv_entcount, DMU_READ_NO_PREFETCH); + brtvd->bv_entcount, DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO); if (error != 0) return (error); ASSERT(bvphys->bvp_mos_entries != 0); VERIFY0(dnode_hold(spa->spa_meta_objset, bvphys->bvp_mos_entries, brtvd, &brtvd->bv_mos_entries_dnode)); + dnode_set_storage_type(brtvd->bv_mos_entries_dnode, DMU_OT_DDT_ZAP); rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER); brtvd->bv_mos_entries = bvphys->bvp_mos_entries; rw_exit(&brtvd->bv_mos_entries_lock); @@ -613,9 +614,9 @@ brt_vdev_dealloc(brt_vdev_t *brtvd) ASSERT(brtvd->bv_initiated); ASSERT0(avl_numnodes(&brtvd->bv_tree)); - vmem_free(brtvd->bv_entcount, sizeof (uint16_t) * brtvd->bv_size); - brtvd->bv_entcount = NULL; uint64_t nblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); + vmem_free(brtvd->bv_entcount, nblocks * BRT_BLOCKSIZE); + brtvd->bv_entcount = NULL; kmem_free(brtvd->bv_bitmap, BT_SIZEOFMAP(nblocks)); brtvd->bv_bitmap = NULL; @@ -807,10 +808,10 @@ brt_vdev_sync(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) /* * TODO: Walk brtvd->bv_bitmap and write only the dirty blocks. */ - dmu_write(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0, - brtvd->bv_size * sizeof (brtvd->bv_entcount[0]), - brtvd->bv_entcount, tx); uint64_t nblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); + dmu_write(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0, + nblocks * BRT_BLOCKSIZE, brtvd->bv_entcount, tx, + DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO); memset(brtvd->bv_bitmap, 0, BT_SIZEOFMAP(nblocks)); brtvd->bv_entcount_dirty = FALSE; } @@ -1510,6 +1511,31 @@ brt_load(spa_t *spa) } void +brt_prefetch_all(spa_t *spa) +{ + /* + * Load all BRT entries for each vdev. This is intended to perform + * a prefetch on all such blocks. For the same reason that brt_prefetch + * (called from brt_pending_add) isn't locked, this is also not locked. + */ + brt_rlock(spa); + for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) { + brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid]; + brt_unlock(spa); + + rw_enter(&brtvd->bv_mos_entries_lock, RW_READER); + if (brtvd->bv_mos_entries != 0) { + (void) zap_prefetch_object(spa->spa_meta_objset, + brtvd->bv_mos_entries); + } + rw_exit(&brtvd->bv_mos_entries_lock); + + brt_rlock(spa); + } + brt_unlock(spa); +} + +void brt_unload(spa_t *spa) { if (spa->spa_brt_rangesize == 0) diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index fccc4c5b5b94..72c597609ade 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -446,7 +446,10 @@ static boolean_t dbuf_include_in_metadata_cache(dmu_buf_impl_t *db) { DB_DNODE_ENTER(db); - dmu_object_type_t type = DB_DNODE(db)->dn_type; + dnode_t *dn = DB_DNODE(db); + dmu_object_type_t type = dn->dn_storage_type; + if (type == DMU_OT_NONE) + type = dn->dn_type; DB_DNODE_EXIT(db); /* Check if this dbuf is one of the types we care about */ diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c index c7a2426f3a77..3d42c51365a8 100644 --- a/sys/contrib/openzfs/module/zfs/ddt_log.c +++ b/sys/contrib/openzfs/module/zfs/ddt_log.c @@ -222,7 +222,7 @@ ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx, ddt_log_update_t *dlu) VERIFY0(dmu_buf_hold_array_by_dnode(dlu->dlu_dn, offset, length, B_FALSE, FTAG, &dlu->dlu_ndbp, &dlu->dlu_dbp, - DMU_READ_NO_PREFETCH)); + DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO)); dlu->dlu_tx = tx; dlu->dlu_block = dlu->dlu_offset = 0; @@ -298,7 +298,8 @@ ddt_log_entry(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe, ddt_log_update_t *dlu) * we will fill it, and zero it out. */ if (dlu->dlu_offset == 0) { - dmu_buf_will_fill(db, dlu->dlu_tx, B_FALSE); + dmu_buf_will_fill_flags(db, dlu->dlu_tx, B_FALSE, + DMU_UNCACHEDIO); memset(db->db_data, 0, db->db_size); } @@ -597,7 +598,7 @@ ddt_log_load_one(ddt_t *ddt, uint_t n) for (uint64_t offset = 0; offset < hdr.dlh_length; offset += dn->dn_datablksz) { err = dmu_buf_hold_by_dnode(dn, offset, FTAG, &db, - DMU_READ_PREFETCH); + DMU_READ_PREFETCH | DMU_UNCACHEDIO); if (err != 0) { dnode_rele(dn, FTAG); ddt_log_empty(ddt, ddl); diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index a7a5c89bdafb..5690f8afad00 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -635,7 +635,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, int read, const void *tag, int *numbufsp, - dmu_buf_t ***dbpp) + dmu_buf_t ***dbpp, dmu_flags_t flags) { dnode_t *dn; int err; @@ -645,7 +645,7 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, return (err); err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, - numbufsp, dbpp, DMU_READ_PREFETCH); + numbufsp, dbpp, flags); dnode_rele(dn, FTAG); @@ -655,14 +655,14 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, int dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset, uint64_t length, boolean_t read, const void *tag, int *numbufsp, - dmu_buf_t ***dbpp) + dmu_buf_t ***dbpp, dmu_flags_t flags) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; int err; DB_DNODE_ENTER(db); err = dmu_buf_hold_array_by_dnode(DB_DNODE(db), offset, length, read, - tag, numbufsp, dbpp, DMU_READ_PREFETCH); + tag, numbufsp, dbpp, flags); DB_DNODE_EXIT(db); return (err); @@ -850,12 +850,15 @@ dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset, uint64_t size) return (err); /* - * Chunk the requests (16 indirects worth) so that we can be interrupted + * Chunk the requests (16 indirects worth) so that we can be + * interrupted. Prefetch at least SPA_MAXBLOCKSIZE at a time + * to better utilize pools with smaller block sizes. */ uint64_t chunksize; if (dn->dn_indblkshift) { uint64_t nbps = bp_span_in_blocks(dn->dn_indblkshift, 1); chunksize = (nbps * 16) << dn->dn_datablkshift; + chunksize = MAX(chunksize, SPA_MAXBLOCKSIZE); } else { chunksize = dn->dn_datablksz; } @@ -1293,7 +1296,7 @@ dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size, void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - const void *buf, dmu_tx_t *tx) + const void *buf, dmu_tx_t *tx, dmu_flags_t flags) { dmu_buf_t **dbp; int numbufs; @@ -1302,8 +1305,8 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, return; VERIFY0(dmu_buf_hold_array(os, object, offset, size, - FALSE, FTAG, &numbufs, &dbp)); - dmu_write_impl(dbp, numbufs, offset, size, buf, tx, DMU_READ_PREFETCH); + FALSE, FTAG, &numbufs, &dbp, flags)); + dmu_write_impl(dbp, numbufs, offset, size, buf, tx, flags); dmu_buf_rele_array(dbp, numbufs, FTAG); } @@ -1346,7 +1349,7 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, return; VERIFY0(dmu_buf_hold_array(os, object, offset, size, - FALSE, FTAG, &numbufs, &dbp)); + FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH)); for (i = 0; i < numbufs; i++) { dmu_buf_t *db = dbp[i]; @@ -1383,7 +1386,7 @@ dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_buf_t **dbp; VERIFY0(dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG, - &numbufs, &dbp)); + &numbufs, &dbp, DMU_READ_PREFETCH)); for (i = 0; i < numbufs; i++) dmu_buf_redact(dbp[i], tx); dmu_buf_rele_array(dbp, numbufs, FTAG); @@ -2592,7 +2595,7 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, int error, numbufs; error = dmu_buf_hold_array(os, object, offset, length, FALSE, FTAG, - &numbufs, &dbp); + &numbufs, &dbp, DMU_READ_PREFETCH); if (error != 0) { if (error == ESRCH) { error = SET_ERROR(ENXIO); @@ -2693,7 +2696,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, spa = os->os_spa; VERIFY0(dmu_buf_hold_array(os, object, offset, length, FALSE, FTAG, - &numbufs, &dbp)); + &numbufs, &dbp, DMU_READ_PREFETCH)); ASSERT3U(nbps, ==, numbufs); /* diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c index 5a22ed71a5fe..c087be4c811d 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_redact.c +++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c @@ -544,7 +544,8 @@ redaction_list_update_sync(void *arg, dmu_tx_t *tx) if (index == bufsize) { dmu_write(mos, rl->rl_object, rl->rl_phys->rlp_num_entries * sizeof (*buf), - bufsize * sizeof (*buf), buf, tx); + bufsize * sizeof (*buf), buf, tx, + DMU_READ_NO_PREFETCH); rl->rl_phys->rlp_num_entries += bufsize; index = 0; } @@ -552,7 +553,8 @@ redaction_list_update_sync(void *arg, dmu_tx_t *tx) } if (index > 0) { dmu_write(mos, rl->rl_object, rl->rl_phys->rlp_num_entries * - sizeof (*buf), index * sizeof (*buf), buf, tx); + sizeof (*buf), index * sizeof (*buf), buf, tx, + DMU_READ_NO_PREFETCH); rl->rl_phys->rlp_num_entries += index; } kmem_free(buf, bufsize * sizeof (*buf)); diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index e88d394b5229..e0cc4a7e13e0 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -2496,26 +2496,27 @@ dnode_diduse_space(dnode_t *dn, int64_t delta) } /* - * Scans a block at the indicated "level" looking for a hole or data, - * depending on 'flags'. + * Scans the block at the indicated "level" looking for a hole or data, + * depending on 'flags' starting from array position given by *index. * - * If level > 0, then we are scanning an indirect block looking at its - * pointers. If level == 0, then we are looking at a block of dnodes. + * If lvl > 0, then we are scanning an indirect block looking at its + * pointers. If lvl == 0, then we are looking at a block of dnodes. * * If we don't find what we are looking for in the block, we return ESRCH. - * Otherwise, return with *offset pointing to the beginning (if searching - * forwards) or end (if searching backwards) of the range covered by the - * block pointer we matched on (or dnode). + * Otherwise, return with *index set to the matching array position. * - * The basic search algorithm used below by dnode_next_offset() is to - * use this function to search up the block tree (widen the search) until - * we find something (i.e., we don't return ESRCH) and then search back - * down the tree (narrow the search) until we reach our original search - * level. + * In both cases, *offset is updated to point at the matched BP/dnode or + * the next offset to search (unless at the limit of possible offsets). + * + * The basic search algorithm used below by dnode_next_offset() uses this + * function to perform a block-order tree traversal. We search up the block + * tree (widen the search) until we find something (i.e., we don't return + * ESRCH) and then search back down the tree (narrow the search) until we + * reach our original search level or backtrack up because nothing matches. */ static int -dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, - int lvl, uint64_t blkfill, uint64_t txg) +dnode_next_offset_level(dnode_t *dn, int flags, int lvl, uint64_t blkid, + int *index, uint64_t blkfill, uint64_t txg, uint64_t *offset) { dmu_buf_impl_t *db = NULL; void *data = NULL; @@ -2541,20 +2542,12 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, rrw_enter(&dmu_objset_ds(dn->dn_objset)->ds_bp_rwlock, RW_READER, FTAG); } else { - uint64_t blkid = dbuf_whichblock(dn, lvl, *offset); error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FALSE, FTAG, &db); if (error) { if (error != ENOENT) return (error); if (hole) return (0); - /* - * This can only happen when we are searching up - * the block tree for data. We don't really need to - * adjust the offset, as we will just end up looking - * at the pointer to this block in its parent, and its - * going to be unallocated, so we will skip over it. - */ return (SET_ERROR(ESRCH)); } error = dbuf_read(db, NULL, @@ -2582,8 +2575,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, ASSERT(dn->dn_type == DMU_OT_DNODE); ASSERT(!(flags & DNODE_FIND_BACKWARDS)); - for (i = (*offset >> DNODE_SHIFT) & (blkfill - 1); - i < blkfill; i += dnp[i].dn_extra_slots + 1) { + for (i = *index; i < blkfill; i += dnp[i].dn_extra_slots + 1) { if ((dnp[i].dn_type == DMU_OT_NONE) == hole) break; } @@ -2591,11 +2583,11 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, if (i == blkfill) error = SET_ERROR(ESRCH); + *index = i; *offset = (*offset & ~(DNODE_BLOCK_SIZE - 1)) + (i << DNODE_SHIFT); } else { blkptr_t *bp = data; - uint64_t start = *offset; span = (lvl - 1) * epbs + dn->dn_datablkshift; minfill = 0; maxfill = blkfill << ((lvl - 1) * epbs); @@ -2605,40 +2597,27 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, else minfill++; - if (span >= 8 * sizeof (*offset)) { - /* This only happens on the highest indirection level */ - ASSERT3U((lvl - 1), ==, dn->dn_phys->dn_nlevels - 1); - *offset = 0; - } else { - *offset = *offset >> span; - } - - for (i = BF64_GET(*offset, 0, epbs); - i >= 0 && i < epb; i += inc) { + for (i = *index; i >= 0 && i < epb; i += inc) { if (BP_GET_FILL(&bp[i]) >= minfill && BP_GET_FILL(&bp[i]) <= maxfill && (hole || BP_GET_LOGICAL_BIRTH(&bp[i]) > txg)) break; - if (inc > 0 || *offset > 0) - *offset += inc; } - if (span >= 8 * sizeof (*offset)) { - *offset = start; - } else { - *offset = *offset << span; - } - - if (inc < 0) { - /* traversing backwards; position offset at the end */ - if (span < 8 * sizeof (*offset)) - *offset = MIN(*offset + (1ULL << span) - 1, - start); - } else if (*offset < start) { - *offset = start; - } if (i < 0 || i >= epb) error = SET_ERROR(ESRCH); + + *index = i; + if (span < 8 * sizeof (*offset)) { + uint64_t nblk = blkid << epbs; + if (i >= 0 || blkid != 0) + nblk += i; + if ((nblk >> (8 * sizeof (*offset) - span)) == 0) + *offset = (flags & DNODE_FIND_BACKWARDS) ? + /* backwards: position offset at the end */ + MIN(*offset, ((nblk + 1) << span) - 1) : + MAX(*offset, nblk << span); + } } if (db != NULL) { @@ -2656,38 +2635,24 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, } /* - * Adjust *offset to the next (or previous) block byte offset at lvl. - * Returns FALSE if *offset would overflow or underflow. - */ -static boolean_t -dnode_next_block(dnode_t *dn, int flags, uint64_t *offset, int lvl) -{ - int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; - int span = lvl * epbs + dn->dn_datablkshift; - uint64_t blkid, maxblkid; - - if (span >= 8 * sizeof (uint64_t)) - return (B_FALSE); - - blkid = *offset >> span; - maxblkid = 1ULL << (8 * sizeof (*offset) - span); - if (!(flags & DNODE_FIND_BACKWARDS) && blkid + 1 < maxblkid) - *offset = (blkid + 1) << span; - else if ((flags & DNODE_FIND_BACKWARDS) && blkid > 0) - *offset = (blkid << span) - 1; - else - return (B_FALSE); - - return (B_TRUE); -} - -/* * Find the next hole, data, or sparse region at or after *offset. * The value 'blkfill' tells us how many items we expect to find * in an L0 data block; this value is 1 for normal objects, * DNODES_PER_BLOCK for the meta dnode, and some fraction of * DNODES_PER_BLOCK when searching for sparse regions thereof. * + * If minlvl == 0, this searches for dnodes or unallocated dnodes. + * If found, *offset points to the first offset of the matched dnode. + * Backwards search is not allowed for dnodes. + * + * If minlvl > 0, this searches for blocks at the given level. + * If found, *offset points to the first L0 offset of the block + * (or for backwards search, the last offset, inclusive). + * + * If not found, in both cases, *offset is set to the first (or last) + * offset of the unallocated indirect block where the search ended or + * the initial offset if no such block was encountered. + * * Examples: * * dnode_next_offset(dn, flags, offset, 1, 1, 0); @@ -2708,7 +2673,8 @@ int dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, int minlvl, uint64_t blkfill, uint64_t txg) { - uint64_t matched = *offset; + uint64_t blkid; + int index, epbs; int lvl, maxlvl; int error = 0; @@ -2730,18 +2696,31 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, goto out; } + epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; maxlvl = dn->dn_phys->dn_nlevels; + if (minlvl > 0) { + uint64_t n = dbuf_whichblock(dn, minlvl - 1, *offset); + blkid = n >> epbs; + index = BF64_GET(n, 0, epbs); + } else { + blkid = dbuf_whichblock(dn, 0, *offset); + index = (*offset >> DNODE_SHIFT) & (blkfill - 1); + ASSERT3U(BF64_GET(*offset, 0, DNODE_SHIFT), ==, 0); + } + for (lvl = minlvl; lvl <= maxlvl; ) { error = dnode_next_offset_level(dn, - flags, offset, lvl, blkfill, txg); + flags, lvl, blkid, &index, blkfill, txg, offset); + if (error == 0 && lvl > minlvl) { + /* Continue search at matched block in lvl-1. */ + blkid = (blkid << epbs) + index; + index = 0; --lvl; - matched = *offset; - } else if (error == ESRCH && lvl < maxlvl && - dnode_next_block(dn, flags, &matched, lvl)) { + } else if (error == ESRCH && lvl < maxlvl) { /* - * Continue search at next/prev offset in lvl+1 block. + * Continue search at next/prev index in lvl+1 block. * * Usually we only search upwards at the start of the * search as higher level blocks point at a matching @@ -2752,13 +2731,14 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, * happens if we are still syncing out the tree, and * some BP's at higher levels are not updated yet. * - * We must adjust offset to avoid coming back to the - * same offset and getting stuck looping forever. This - * also deals with the case where offset is already at - * the beginning or end of the object. + * We must adjust index to avoid coming back to the + * same offset and getting stuck looping forever. The + * next loop goes up again if index is -1 or (1<<epbs). */ + index = BF64_GET(blkid, 0, epbs) + + ((flags & DNODE_FIND_BACKWARDS) ? -1 : 1); + blkid = blkid >> epbs; ++lvl; - *offset = matched; } else { break; } diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c index 9f4399af56bd..3f649ffb44e4 100644 --- a/sys/contrib/openzfs/module/zfs/metaslab.c +++ b/sys/contrib/openzfs/module/zfs/metaslab.c @@ -3966,7 +3966,8 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx) object = space_map_object(msp->ms_sm); dmu_write(spa->spa_meta_objset, msp->ms_group->mg_vd->vdev_ms_array, sizeof (uint64_t) * - msp->ms_id, sizeof (uint64_t), &object, tx); + msp->ms_id, sizeof (uint64_t), &object, tx, + DMU_READ_NO_PREFETCH); } /* @@ -4292,7 +4293,8 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) VERIFY3U(new_object, !=, 0); dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) * - msp->ms_id, sizeof (uint64_t), &new_object, tx); + msp->ms_id, sizeof (uint64_t), &new_object, tx, + DMU_READ_NO_PREFETCH); VERIFY0(space_map_open(&msp->ms_sm, mos, new_object, msp->ms_start, msp->ms_size, vd->vdev_ashift)); @@ -6328,7 +6330,7 @@ metaslab_update_ondisk_flush_data(metaslab_t *ms, dmu_tx_t *tx) } dmu_write(spa_meta_objset(spa), object, entry_offset, entry_size, - &entry, tx); + &entry, tx, DMU_READ_NO_PREFETCH); } void diff --git a/sys/contrib/openzfs/module/zfs/mmp.c b/sys/contrib/openzfs/module/zfs/mmp.c index fd46127b6068..b8ba40ecdc9d 100644 --- a/sys/contrib/openzfs/module/zfs/mmp.c +++ b/sys/contrib/openzfs/module/zfs/mmp.c @@ -729,12 +729,12 @@ mmp_signal_all_threads(void) { spa_t *spa = NULL; - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); while ((spa = spa_next(spa))) { if (spa->spa_state == POOL_STATE_ACTIVE) mmp_signal_thread(spa); } - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } ZFS_MODULE_PARAM_CALL(zfs_multihost, zfs_multihost_, interval, diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index b3bb46da263b..34de3f1d9525 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -141,7 +141,7 @@ typedef enum zti_modes { #define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) } #define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 } -#define ZTI_SCALE { ZTI_MODE_SCALE, 0, 1 } +#define ZTI_SCALE(min) { ZTI_MODE_SCALE, (min), 1 } #define ZTI_SYNC { ZTI_MODE_SYNC, 0, 1 } #define ZTI_NULL { ZTI_MODE_NULL, 0, 0 } @@ -180,13 +180,13 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ - { ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */ + { ZTI_N(8), ZTI_NULL, ZTI_SCALE(0), ZTI_NULL }, /* READ */ #ifdef illumos - { ZTI_SYNC, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */ + { ZTI_SYNC, ZTI_N(5), ZTI_SCALE(0), ZTI_N(5) }, /* WRITE */ #else - { ZTI_SYNC, ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* WRITE */ + { ZTI_SYNC, ZTI_NULL, ZTI_SCALE(0), ZTI_NULL }, /* WRITE */ #endif - { ZTI_SCALE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ + { ZTI_SCALE(32), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FLUSH */ { ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */ @@ -1082,7 +1082,7 @@ spa_change_guid(spa_t *spa, const uint64_t *guidp) int error; mutex_enter(&spa->spa_vdev_top_lock); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); if (guidp != NULL) { guid = *guidp; @@ -1117,7 +1117,7 @@ spa_change_guid(spa_t *spa, const uint64_t *guidp) } out: - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); mutex_exit(&spa->spa_vdev_top_lock); return (error); @@ -1170,7 +1170,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) uint_t value = ztip->zti_value; uint_t count = ztip->zti_count; spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; - uint_t cpus, flags = TASKQ_DYNAMIC; + uint_t cpus, threads, flags = TASKQ_DYNAMIC; switch (mode) { case ZTI_MODE_FIXED: @@ -1183,8 +1183,8 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) * Create one wr_iss taskq for every 'zio_taskq_write_tpq' CPUs, * not to exceed the number of spa allocators, and align to it. */ - cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100); - count = MAX(1, cpus / MAX(1, zio_taskq_write_tpq)); + threads = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100); + count = MAX(1, threads / MAX(1, zio_taskq_write_tpq)); count = MAX(count, (zio_taskq_batch_pct + 99) / 100); count = MIN(count, spa->spa_alloc_count); while (spa->spa_alloc_count % count != 0 && @@ -1201,14 +1201,14 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) break; case ZTI_MODE_SCALE: - flags |= TASKQ_THREADS_CPU_PCT; /* * We want more taskqs to reduce lock contention, but we want * less for better request ordering and CPU utilization. */ - cpus = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100); + threads = MAX(1, boot_ncpus * zio_taskq_batch_pct / 100); + threads = MAX(threads, value); if (zio_taskq_batch_tpq > 0) { - count = MAX(1, (cpus + zio_taskq_batch_tpq / 2) / + count = MAX(1, (threads + zio_taskq_batch_tpq / 2) / zio_taskq_batch_tpq); } else { /* @@ -1228,13 +1228,23 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) * 128 10 8% 10 100 * 256 14 6% 15 210 */ - count = 1 + cpus / 6; + cpus = MIN(threads, boot_ncpus); + count = 1 + threads / 6; while (count * count > cpus) count--; } - /* Limit each taskq within 100% to not trigger assertion. */ - count = MAX(count, (zio_taskq_batch_pct + 99) / 100); - value = (zio_taskq_batch_pct + count / 2) / count; + + /* + * Try to represent the number of threads per taskq as percent + * of online CPUs to allow scaling with later online/offline. + * Fall back to absolute numbers if can't. + */ + value = (threads * 100 + boot_ncpus * count / 2) / + (boot_ncpus * count); + if (value < 5 || value > 100) + value = MAX(1, (threads + count / 2) / count); + else + flags |= TASKQ_THREADS_CPU_PCT; break; case ZTI_MODE_NULL: @@ -1433,8 +1443,30 @@ spa_taskq_param_set(zio_type_t t, char *cfg) break; } + /* + * SCALE is optionally parameterised by minimum number of + * threads. + */ case ZTI_MODE_SCALE: { - const zio_taskq_info_t zti = ZTI_SCALE; + unsigned long long mint = 0; + if (c != NULL && *c != '\0') { + /* Need a number */ + if (!(isdigit(*c))) + break; + tok = c; + + /* Take digits */ + err = ddi_strtoull(tok, &tok, 10, &mint); + /* Must succeed, and moved forward */ + if (err != 0 || tok == c || *tok != '\0') + break; + + /* Sanity check */ + if (mint >= 16384) + break; + } + + const zio_taskq_info_t zti = ZTI_SCALE(mint); row[q] = zti; break; } @@ -1501,6 +1533,9 @@ spa_taskq_param_get(zio_type_t t, char *buf, boolean_t add_newline) pos += sprintf(&buf[pos], "%s%s,%u,%u", sep, modes[zti->zti_mode], zti->zti_count, zti->zti_value); + else if (zti->zti_mode == ZTI_MODE_SCALE && zti->zti_value > 0) + pos += sprintf(&buf[pos], "%s%s,%u", sep, + modes[zti->zti_mode], zti->zti_value); else pos += sprintf(&buf[pos], "%s%s", sep, modes[zti->zti_mode]); @@ -1520,9 +1555,10 @@ spa_taskq_read_param_set(const char *val, zfs_kernel_param_t *kp) { char *cfg = kmem_strdup(val); int err = spa_taskq_param_set(ZIO_TYPE_READ, cfg); - kmem_free(cfg, strlen(val)+1); + kmem_strfree(cfg); return (-err); } + static int spa_taskq_read_param_get(char *buf, zfs_kernel_param_t *kp) { @@ -1534,14 +1570,30 @@ spa_taskq_write_param_set(const char *val, zfs_kernel_param_t *kp) { char *cfg = kmem_strdup(val); int err = spa_taskq_param_set(ZIO_TYPE_WRITE, cfg); - kmem_free(cfg, strlen(val)+1); + kmem_strfree(cfg); return (-err); } + static int spa_taskq_write_param_get(char *buf, zfs_kernel_param_t *kp) { return (spa_taskq_param_get(ZIO_TYPE_WRITE, buf, TRUE)); } + +static int +spa_taskq_free_param_set(const char *val, zfs_kernel_param_t *kp) +{ + char *cfg = kmem_strdup(val); + int err = spa_taskq_param_set(ZIO_TYPE_FREE, cfg); + kmem_strfree(cfg); + return (-err); +} + +static int +spa_taskq_free_param_get(char *buf, zfs_kernel_param_t *kp) +{ + return (spa_taskq_param_get(ZIO_TYPE_FREE, buf, TRUE)); +} #else /* * On FreeBSD load-time parameters can be set up before malloc() is available, @@ -1574,6 +1626,19 @@ spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS) return (err); return (spa_taskq_param_set(ZIO_TYPE_WRITE, buf)); } + +static int +spa_taskq_free_param(ZFS_MODULE_PARAM_ARGS) +{ + char buf[SPA_TASKQ_PARAM_MAX]; + int err; + + (void) spa_taskq_param_get(ZIO_TYPE_FREE, buf, FALSE); + err = sysctl_handle_string(oidp, buf, sizeof (buf), req); + if (err || req->newptr == NULL) + return (err); + return (spa_taskq_param_set(ZIO_TYPE_FREE, buf)); +} #endif #endif /* _KERNEL */ @@ -2187,7 +2252,7 @@ spa_should_sync_time_logger_on_unload(spa_t *spa) static void spa_unload(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_export_thread == curthread); ASSERT(spa_state(spa) != POOL_STATE_UNINITIALIZED); @@ -5260,7 +5325,7 @@ spa_ld_read_checkpoint_txg(spa_t *spa) int error = 0; ASSERT0(spa->spa_checkpoint_txg); - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_load_thread == curthread); error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, @@ -5287,7 +5352,7 @@ spa_ld_mos_init(spa_t *spa, spa_import_type_t type) { int error = 0; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); /* @@ -5363,7 +5428,7 @@ spa_ld_checkpoint_rewind(spa_t *spa) uberblock_t checkpoint; int error = 0; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, @@ -5510,7 +5575,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) boolean_t update_config_cache = B_FALSE; hrtime_t load_start = gethrtime(); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); spa_load_note(spa, "LOADING"); @@ -5557,7 +5622,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Drop the namespace lock for the rest of the function. */ spa->spa_load_thread = curthread; - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); /* * Retrieve the checkpoint txg if the pool has a checkpoint. @@ -5796,9 +5861,9 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) spa_load_note(spa, "LOADED"); fail: - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa->spa_load_thread = NULL; - cv_broadcast(&spa_namespace_cv); + spa_namespace_broadcast(); return (error); @@ -5960,14 +6025,14 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag, * up calling spa_open() again. The real fix is to figure out how to * avoid dsl_dir_open() calling this in the first place. */ - if (MUTEX_NOT_HELD(&spa_namespace_lock)) { - mutex_enter(&spa_namespace_lock); + if (!spa_namespace_held()) { + spa_namespace_enter(FTAG); locked = B_TRUE; } if ((spa = spa_lookup(pool)) == NULL) { if (locked) - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (SET_ERROR(ENOENT)); } @@ -6004,7 +6069,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag, spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE); spa_remove(spa); if (locked) - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (SET_ERROR(ENOENT)); } @@ -6024,7 +6089,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag, spa_deactivate(spa); spa->spa_last_open_failed = error; if (locked) - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); *spapp = NULL; return (error); } @@ -6048,7 +6113,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag, spa->spa_last_open_failed = 0; spa->spa_last_ubsync_txg = 0; spa->spa_load_txg = 0; - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } if (firstopen) @@ -6081,13 +6146,13 @@ spa_inject_addref(char *name) { spa_t *spa; - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); if ((spa = spa_lookup(name)) == NULL) { - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (NULL); } spa->spa_inject_ref++; - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (spa); } @@ -6095,9 +6160,9 @@ spa_inject_addref(char *name) void spa_inject_delref(spa_t *spa) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa->spa_inject_ref--; - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } /* @@ -6341,14 +6406,14 @@ spa_get_stats(const char *name, nvlist_t **config, */ if (altroot) { if (spa == NULL) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa = spa_lookup(name); if (spa) spa_altroot(spa, altroot, buflen); else altroot[0] = '\0'; spa = NULL; - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } else { spa_altroot(spa, altroot, buflen); } @@ -6568,9 +6633,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, /* * If this pool already exists, return failure. */ - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); if (spa_lookup(poolname) != NULL) { - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (SET_ERROR(EEXIST)); } @@ -6588,7 +6653,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, if (props && (error = spa_prop_validate(spa, props))) { spa_deactivate(spa); spa_remove(spa); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (error); } @@ -6621,14 +6686,14 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, if (error != 0) { spa_deactivate(spa); spa_remove(spa); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (error); } } if (!has_allocclass && zfs_special_devs(nvroot, NULL)) { spa_deactivate(spa); spa_remove(spa); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (ENOTSUP); } @@ -6694,7 +6759,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_unload(spa); spa_deactivate(spa); spa_remove(spa); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (error); } @@ -6847,7 +6912,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_import_os(spa); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (0); } @@ -6872,9 +6937,9 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) /* * If a pool with this name exists, return failure. */ - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); if (spa_lookup(pool) != NULL) { - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (SET_ERROR(EEXIST)); } @@ -6901,7 +6966,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); zfs_dbgmsg("spa_import: verbatim import of %s", pool); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (0); } @@ -6960,7 +7025,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_unload(spa); spa_deactivate(spa); spa_remove(spa); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (error); } @@ -7028,7 +7093,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); zvol_create_minors(pool); @@ -7060,7 +7125,7 @@ spa_tryimport(nvlist_t *tryconfig) (void) snprintf(name, MAXPATHLEN, "%s-%llx-%s", TRYIMPORT_NAME, (u_longlong_t)(uintptr_t)curthread, poolname); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa = spa_add(name, tryconfig, NULL); spa_activate(spa, SPA_MODE_READ); kmem_free(name, MAXPATHLEN); @@ -7158,7 +7223,7 @@ spa_tryimport(nvlist_t *tryconfig) spa_unload(spa); spa_deactivate(spa); spa_remove(spa); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (config); } @@ -7186,15 +7251,15 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, if (!(spa_mode_global & SPA_MODE_WRITE)) return (SET_ERROR(EROFS)); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); if ((spa = spa_lookup(pool)) == NULL) { - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (SET_ERROR(ENOENT)); } if (spa->spa_is_exporting) { /* the pool is being exported by another thread */ - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (SET_ERROR(ZFS_ERR_EXPORT_IN_PROGRESS)); } spa->spa_is_exporting = B_TRUE; @@ -7204,18 +7269,18 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, * and see if we can export. */ spa_open_ref(spa, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); spa_async_suspend(spa); if (spa->spa_zvol_taskq) { zvol_remove_minors(spa, spa_name(spa), B_TRUE); taskq_wait(spa->spa_zvol_taskq); } - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa->spa_export_thread = curthread; spa_close(spa, FTAG); if (spa->spa_state == POOL_STATE_UNINITIALIZED) { - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); goto export_spa; } @@ -7239,7 +7304,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, goto fail; } - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); /* * At this point we no longer hold the spa_namespace_lock and * there were no references on the spa. Future spa_lookups will @@ -7258,7 +7323,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, if (!force && new_state == POOL_STATE_EXPORTED && spa_has_active_shared_spare(spa)) { error = SET_ERROR(EXDEV); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); goto fail; } @@ -7333,7 +7398,7 @@ export_spa: /* * Take the namespace lock for the actual spa_t removal */ - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); if (new_state != POOL_STATE_UNINITIALIZED) { if (!hardforce) spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE); @@ -7351,8 +7416,8 @@ export_spa: /* * Wake up any waiters in spa_lookup() */ - cv_broadcast(&spa_namespace_cv); - mutex_exit(&spa_namespace_lock); + spa_namespace_broadcast(); + spa_namespace_exit(FTAG); return (0); fail: @@ -7363,8 +7428,8 @@ fail: /* * Wake up any waiters in spa_lookup() */ - cv_broadcast(&spa_namespace_cv); - mutex_exit(&spa_namespace_lock); + spa_namespace_broadcast(); + spa_namespace_exit(FTAG); return (error); } @@ -7574,10 +7639,10 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift) */ (void) spa_vdev_exit(spa, vd, txg, 0); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); spa_event_notify(spa, NULL, NULL, ESC_ZFS_VDEV_ADD); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (0); } @@ -7694,7 +7759,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { error = (spa_has_checkpoint(spa)) ? ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; @@ -8078,7 +8143,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) * as spa_vdev_resilver_done() calls this function everything * should be fine as the resilver will return right away. */ - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { error = (spa_has_checkpoint(spa)) ? ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; @@ -8282,28 +8347,28 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) if (unspare) { spa_t *altspa = NULL; - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); while ((altspa = spa_next(altspa)) != NULL) { if (altspa->spa_state != POOL_STATE_ACTIVE || altspa == spa) continue; spa_open_ref(altspa, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_close(altspa, FTAG); } - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); /* search the rest of the vdevs for spares to remove */ spa_vdev_resilver_done(spa); } /* all done with the spa; OK to release */ - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_close(spa, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (error); } @@ -8312,7 +8377,7 @@ static int spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type, list_t *vd_list) { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); @@ -8396,7 +8461,7 @@ spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, * we can properly assess the vdev state before we commit to * the initializing operation. */ - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); for (nvpair_t *pair = nvlist_next_nvpair(nv, NULL); pair != NULL; pair = nvlist_next_nvpair(nv, pair)) { @@ -8419,7 +8484,7 @@ spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, /* Sync out the initializing state */ txg_wait_synced(spa->spa_dsl_pool, 0); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); list_destroy(&vd_list); @@ -8430,7 +8495,7 @@ static int spa_vdev_trim_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type, uint64_t rate, boolean_t partial, boolean_t secure, list_t *vd_list) { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); @@ -8517,7 +8582,7 @@ spa_vdev_trim(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, uint64_t rate, * we can properly assess the vdev state before we commit to * the TRIM operation. */ - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); for (nvpair_t *pair = nvlist_next_nvpair(nv, NULL); pair != NULL; pair = nvlist_next_nvpair(nv, pair)) { @@ -8540,7 +8605,7 @@ spa_vdev_trim(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, uint64_t rate, /* Sync out the TRIM state */ txg_wait_synced(spa->spa_dsl_pool, 0); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); list_destroy(&vd_list); @@ -8568,7 +8633,7 @@ spa_vdev_split_mirror(spa_t *spa, const char *newname, nvlist_t *config, txg = spa_vdev_enter(spa); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { error = (spa_has_checkpoint(spa)) ? ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; @@ -9242,7 +9307,7 @@ spa_async_thread(void *arg) if (tasks & SPA_ASYNC_CONFIG_UPDATE) { uint64_t old_space, new_space; - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); old_space = metaslab_class_get_space(spa_normal_class(spa)); old_space += metaslab_class_get_space(spa_special_class(spa)); old_space += metaslab_class_get_space(spa_dedup_class(spa)); @@ -9260,7 +9325,7 @@ spa_async_thread(void *arg) spa_embedded_log_class(spa)); new_space += metaslab_class_get_space( spa_special_embedded_log_class(spa)); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); /* * If the pool grew as a result of the config update, @@ -9329,49 +9394,49 @@ spa_async_thread(void *arg) dsl_scan_restart_resilver(dp, 0); if (tasks & SPA_ASYNC_INITIALIZE_RESTART) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); vdev_initialize_restart(spa->spa_root_vdev); spa_config_exit(spa, SCL_CONFIG, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } if (tasks & SPA_ASYNC_TRIM_RESTART) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); vdev_trim_restart(spa->spa_root_vdev); spa_config_exit(spa, SCL_CONFIG, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } if (tasks & SPA_ASYNC_AUTOTRIM_RESTART) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); vdev_autotrim_restart(spa); spa_config_exit(spa, SCL_CONFIG, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } /* * Kick off L2 cache whole device TRIM. */ if (tasks & SPA_ASYNC_L2CACHE_TRIM) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); vdev_trim_l2arc(spa); spa_config_exit(spa, SCL_CONFIG, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } /* * Kick off L2 cache rebuilding. */ if (tasks & SPA_ASYNC_L2CACHE_REBUILD) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_config_enter(spa, SCL_L2ARC, FTAG, RW_READER); l2arc_spa_rebuild_start(spa); spa_config_exit(spa, SCL_L2ARC, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } /* @@ -9601,7 +9666,8 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) KM_SLEEP)); memset(packed + nvsize, 0, bufsize - nvsize); - dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); + dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx, + DMU_READ_NO_PREFETCH); vmem_free(packed, bufsize); @@ -10522,18 +10588,18 @@ void spa_sync_allpools(void) { spa_t *spa = NULL; - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); while ((spa = spa_next(spa)) != NULL) { if (spa_state(spa) != POOL_STATE_ACTIVE || !spa_writeable(spa) || spa_suspended(spa)) continue; spa_open_ref(spa, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); txg_wait_synced(spa_get_dsl(spa), 0); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_close(spa, FTAG); } - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } taskq_t * @@ -10680,7 +10746,7 @@ spa_evict_all(void) * Remove all cached state. All pools should be closed now, * so every spa in the AVL tree should be unreferenced. */ - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); while ((spa = spa_next(NULL)) != NULL) { /* * Stop async tasks. The async thread may need to detach @@ -10688,9 +10754,9 @@ spa_evict_all(void) * spa_namespace_lock, so we must drop it here. */ spa_open_ref(spa, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); spa_async_suspend(spa); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_close(spa, FTAG); if (spa->spa_state != POOL_STATE_UNINITIALIZED) { @@ -10699,7 +10765,7 @@ spa_evict_all(void) } spa_remove(spa); } - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } vdev_t * @@ -11272,6 +11338,9 @@ ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read, ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_write, spa_taskq_write_param_set, spa_taskq_write_param_get, ZMOD_RW, "Configure IO queues for write IO"); +ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_free, + spa_taskq_free_param_set, spa_taskq_free_param_get, ZMOD_RW, + "Configure IO queues for free IO"); #endif ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_write_tpq, UINT, ZMOD_RW, diff --git a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c index e07756c46748..a42aa62e6599 100644 --- a/sys/contrib/openzfs/module/zfs/spa_checkpoint.c +++ b/sys/contrib/openzfs/module/zfs/spa_checkpoint.c @@ -427,7 +427,7 @@ spa_checkpoint_discard_thread(void *arg, zthr_t *zthr) */ int error = dmu_buf_hold_array_by_bonus( checkpoint_sm->sm_dbuf, offset, size, - B_TRUE, FTAG, &numbufs, &dbp); + B_TRUE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH); if (error != 0) { zfs_panic_recover("zfs: error %d was returned " "while prefetching checkpoint space map " diff --git a/sys/contrib/openzfs/module/zfs/spa_config.c b/sys/contrib/openzfs/module/zfs/spa_config.c index f615591e826b..31216e9a7ccc 100644 --- a/sys/contrib/openzfs/module/zfs/spa_config.c +++ b/sys/contrib/openzfs/module/zfs/spa_config.c @@ -161,7 +161,7 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent, boolean_t ccw_failure; int error = 0; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); if (!(spa_mode_global & SPA_MODE_WRITE)) return; @@ -287,7 +287,7 @@ spa_all_configs(uint64_t *generation, nvlist_t **pools) if (*generation == spa_config_generation) return (SET_ERROR(EEXIST)); - int error = mutex_enter_interruptible(&spa_namespace_lock); + int error = spa_namespace_enter_interruptible(FTAG); if (error) return (SET_ERROR(EINTR)); @@ -302,7 +302,7 @@ spa_all_configs(uint64_t *generation, nvlist_t **pools) } } *generation = spa_config_generation; - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (0); } @@ -483,7 +483,7 @@ spa_config_update(spa_t *spa, int what) uint64_t txg; int c; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); txg = spa_last_synced_txg(spa) + 1; diff --git a/sys/contrib/openzfs/module/zfs/spa_history.c b/sys/contrib/openzfs/module/zfs/spa_history.c index 60ab07944d72..b9d0c9656726 100644 --- a/sys/contrib/openzfs/module/zfs/spa_history.c +++ b/sys/contrib/openzfs/module/zfs/spa_history.c @@ -169,13 +169,14 @@ spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp, phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof); shpp->sh_eof += len; - dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx); + dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx, + DMU_READ_NO_PREFETCH); len -= firstwrite; if (len > 0) { /* write out the rest at the beginning of physical file */ dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len, - len, (char *)buf + firstwrite, tx); + len, (char *)buf + firstwrite, tx, DMU_READ_NO_PREFETCH); } return (0); diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index 0bead6d49666..bf22d2eb68e7 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -28,7 +28,7 @@ * Copyright (c) 2017 Datto Inc. * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. - * Copyright (c) 2023, 2024, Klara Inc. + * Copyright (c) 2023, 2024, 2025, Klara, Inc. */ #include <sys/zfs_context.h> @@ -237,9 +237,10 @@ * locking is, always, based on spa_namespace_lock and spa_config_lock[]. */ -avl_tree_t spa_namespace_avl; -kmutex_t spa_namespace_lock; -kcondvar_t spa_namespace_cv; +static avl_tree_t spa_namespace_avl; +static kmutex_t spa_namespace_lock; +static kcondvar_t spa_namespace_cv; + static const int spa_max_replication_override = SPA_DVAS_PER_BP; static kmutex_t spa_spare_lock; @@ -608,6 +609,58 @@ spa_config_held(spa_t *spa, int locks, krw_t rw) * ========================================================================== */ +void +spa_namespace_enter(const void *tag) +{ + (void) tag; + ASSERT(!MUTEX_HELD(&spa_namespace_lock)); + mutex_enter(&spa_namespace_lock); +} + +boolean_t +spa_namespace_tryenter(const void *tag) +{ + (void) tag; + ASSERT(!MUTEX_HELD(&spa_namespace_lock)); + return (mutex_tryenter(&spa_namespace_lock)); +} + +int +spa_namespace_enter_interruptible(const void *tag) +{ + (void) tag; + ASSERT(!MUTEX_HELD(&spa_namespace_lock)); + return (mutex_enter_interruptible(&spa_namespace_lock)); +} + +void +spa_namespace_exit(const void *tag) +{ + (void) tag; + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + mutex_exit(&spa_namespace_lock); +} + +boolean_t +spa_namespace_held(void) +{ + return (MUTEX_HELD(&spa_namespace_lock)); +} + +void +spa_namespace_wait(void) +{ + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + cv_wait(&spa_namespace_cv, &spa_namespace_lock); +} + +void +spa_namespace_broadcast(void) +{ + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + cv_broadcast(&spa_namespace_cv); +} + /* * Lookup the named spa_t in the AVL tree. The spa_namespace_lock must be held. * Returns NULL if no matching spa_t is found. @@ -620,7 +673,7 @@ spa_lookup(const char *name) avl_index_t where; char *cp; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); retry: (void) strlcpy(search.spa_name, name, sizeof (search.spa_name)); @@ -645,7 +698,7 @@ retry: spa->spa_load_thread != curthread) || (spa->spa_export_thread != NULL && spa->spa_export_thread != curthread)) { - cv_wait(&spa_namespace_cv, &spa_namespace_lock); + spa_namespace_wait(); goto retry; } @@ -697,7 +750,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa_t *spa; spa_config_dirent_t *dp; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP); @@ -747,7 +800,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa_config_lock_init(spa); spa_stats_init(spa); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); avl_add(&spa_namespace_avl, spa); /* @@ -837,7 +890,7 @@ spa_remove(spa_t *spa) { spa_config_dirent_t *dp; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); ASSERT(spa_state(spa) == POOL_STATE_UNINITIALIZED); ASSERT3U(zfs_refcount_count(&spa->spa_refcount), ==, 0); ASSERT0(spa->spa_waiters); @@ -916,7 +969,7 @@ spa_remove(spa_t *spa) spa_t * spa_next(spa_t *prev) { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); if (prev) return (AVL_NEXT(&spa_namespace_avl, prev)); @@ -938,7 +991,7 @@ void spa_open_ref(spa_t *spa, const void *tag) { ASSERT(zfs_refcount_count(&spa->spa_refcount) >= spa->spa_minref || - MUTEX_HELD(&spa_namespace_lock) || + spa_namespace_held() || spa->spa_load_thread == curthread); (void) zfs_refcount_add(&spa->spa_refcount, tag); } @@ -951,7 +1004,7 @@ void spa_close(spa_t *spa, const void *tag) { ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref || - MUTEX_HELD(&spa_namespace_lock) || + spa_namespace_held() || spa->spa_load_thread == curthread || spa->spa_export_thread == curthread); (void) zfs_refcount_remove(&spa->spa_refcount, tag); @@ -980,7 +1033,7 @@ spa_async_close(spa_t *spa, const void *tag) boolean_t spa_refcount_zero(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_export_thread == curthread); return (zfs_refcount_count(&spa->spa_refcount) == spa->spa_minref); @@ -1227,7 +1280,7 @@ uint64_t spa_vdev_enter(spa_t *spa) { mutex_enter(&spa->spa_vdev_top_lock); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); ASSERT0(spa->spa_export_thread); @@ -1246,7 +1299,7 @@ uint64_t spa_vdev_detach_enter(spa_t *spa, uint64_t guid) { mutex_enter(&spa->spa_vdev_top_lock); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); ASSERT0(spa->spa_export_thread); @@ -1270,7 +1323,7 @@ spa_vdev_detach_enter(spa_t *spa, uint64_t guid) uint64_t spa_vdev_config_enter(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); @@ -1285,7 +1338,7 @@ void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, const char *tag) { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); int config_changed = B_FALSE; @@ -1374,7 +1427,7 @@ spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error) vdev_rebuild_restart(spa); spa_vdev_config_exit(spa, vd, txg, error, FTAG); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); mutex_exit(&spa->spa_vdev_top_lock); return (error); @@ -1452,9 +1505,9 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error) * If the config changed, update the config cache. */ if (config_changed) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } return (error); @@ -1501,7 +1554,7 @@ spa_by_guid(uint64_t pool_guid, uint64_t device_guid) spa_t *spa; avl_tree_t *t = &spa_namespace_avl; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) { if (spa->spa_state == POOL_STATE_UNINITIALIZED) @@ -1583,7 +1636,7 @@ spa_load_guid_exists(uint64_t guid) { avl_tree_t *t = &spa_namespace_avl; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); for (spa_t *spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) { if (spa_load_guid(spa) == guid) @@ -2200,10 +2253,10 @@ spa_set_deadman_ziotime(hrtime_t ns) spa_t *spa = NULL; if (spa_mode_global != SPA_MODE_UNINIT) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); while ((spa = spa_next(spa)) != NULL) spa->spa_deadman_ziotime = ns; - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } } @@ -2213,10 +2266,10 @@ spa_set_deadman_synctime(hrtime_t ns) spa_t *spa = NULL; if (spa_mode_global != SPA_MODE_UNINIT) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); while ((spa = spa_next(spa)) != NULL) spa->spa_deadman_synctime = ns; - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } } @@ -3048,10 +3101,10 @@ param_set_deadman_failmode_common(const char *val) return (SET_ERROR(EINVAL)); if (spa_mode_global != SPA_MODE_UNINIT) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); while ((spa = spa_next(spa)) != NULL) spa_set_deadman_failmode(spa, val); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } return (0); @@ -3135,7 +3188,6 @@ EXPORT_SYMBOL(spa_has_slogs); EXPORT_SYMBOL(spa_is_root); EXPORT_SYMBOL(spa_writeable); EXPORT_SYMBOL(spa_mode); -EXPORT_SYMBOL(spa_namespace_lock); EXPORT_SYMBOL(spa_trust_config); EXPORT_SYMBOL(spa_missing_tvds_allowed); EXPORT_SYMBOL(spa_set_missing_tvds); diff --git a/sys/contrib/openzfs/module/zfs/space_map.c b/sys/contrib/openzfs/module/zfs/space_map.c index 5f24963f2291..f20c49ebb6de 100644 --- a/sys/contrib/openzfs/module/zfs/space_map.c +++ b/sys/contrib/openzfs/module/zfs/space_map.c @@ -537,7 +537,7 @@ space_map_write_intro_debug(space_map_t *sm, maptype_t maptype, dmu_tx_t *tx) SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx)); dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_length, - sizeof (dentry), &dentry, tx); + sizeof (dentry), &dentry, tx, DMU_READ_NO_PREFETCH); sm->sm_phys->smp_length += sizeof (dentry); } diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index c8d7280387a2..2a4d1876251f 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -449,32 +449,53 @@ vdev_get_nparity(vdev_t *vd) } static int -vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value) +vdev_prop_get_objid(vdev_t *vd, uint64_t *objid) { - spa_t *spa = vd->vdev_spa; - objset_t *mos = spa->spa_meta_objset; - uint64_t objid; - int err; if (vd->vdev_root_zap != 0) { - objid = vd->vdev_root_zap; + *objid = vd->vdev_root_zap; } else if (vd->vdev_top_zap != 0) { - objid = vd->vdev_top_zap; + *objid = vd->vdev_top_zap; } else if (vd->vdev_leaf_zap != 0) { - objid = vd->vdev_leaf_zap; + *objid = vd->vdev_leaf_zap; } else { return (EINVAL); } + return (0); +} + +static int +vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value) +{ + spa_t *spa = vd->vdev_spa; + objset_t *mos = spa->spa_meta_objset; + uint64_t objid; + int err; + + if (vdev_prop_get_objid(vd, &objid) != 0) + return (EINVAL); + err = zap_lookup(mos, objid, vdev_prop_to_name(prop), sizeof (uint64_t), 1, value); - if (err == ENOENT) *value = vdev_prop_default_numeric(prop); return (err); } +static int +vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue) +{ + int err; + uint64_t ivalue; + + err = vdev_prop_get_int(vd, prop, &ivalue); + *bvalue = ivalue != 0; + + return (err); +} + /* * Get the number of data disks for a top-level vdev. */ @@ -737,8 +758,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) */ vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N); vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T); + vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N); vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T); + + vd->vdev_slow_io_events = vdev_prop_default_numeric( + VDEV_PROP_SLOW_IO_EVENTS); vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N); vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T); @@ -3931,6 +3956,11 @@ vdev_load(vdev_t *vd) vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) " "failed [error=%d]", (u_longlong_t)zapobj, error); + error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS, + &vd->vdev_slow_io_events); + if (error && error != ENOENT) + vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) " + "failed [error=%d]", (u_longlong_t)zapobj, error); error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N, &vd->vdev_slow_io_n); if (error && error != ENOENT) @@ -5980,15 +6010,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx) /* * Set vdev property values in the vdev props mos object. */ - if (vd->vdev_root_zap != 0) { - objid = vd->vdev_root_zap; - } else if (vd->vdev_top_zap != 0) { - objid = vd->vdev_top_zap; - } else if (vd->vdev_leaf_zap != 0) { - objid = vd->vdev_leaf_zap; - } else { + if (vdev_prop_get_objid(vd, &objid) != 0) panic("unexpected vdev type"); - } mutex_enter(&spa->spa_props_lock); @@ -6215,6 +6238,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) } vd->vdev_io_t = intval; break; + case VDEV_PROP_SLOW_IO_EVENTS: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->vdev_slow_io_events = intval != 0; + break; case VDEV_PROP_SLOW_IO_N: if (nvpair_value_uint64(elem, &intval) != 0) { error = EINVAL; @@ -6256,6 +6286,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) nvpair_t *elem = NULL; nvlist_t *nvprops = NULL; uint64_t intval = 0; + boolean_t boolval = 0; char *strval = NULL; const char *propname = NULL; vdev_prop_t prop; @@ -6269,15 +6300,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops); - if (vd->vdev_root_zap != 0) { - objid = vd->vdev_root_zap; - } else if (vd->vdev_top_zap != 0) { - objid = vd->vdev_top_zap; - } else if (vd->vdev_leaf_zap != 0) { - objid = vd->vdev_leaf_zap; - } else { + if (vdev_prop_get_objid(vd, &objid) != 0) return (SET_ERROR(EINVAL)); - } ASSERT(objid != 0); mutex_enter(&spa->spa_props_lock); @@ -6622,6 +6646,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) intval, src); break; + case VDEV_PROP_SLOW_IO_EVENTS: + err = vdev_prop_get_bool(vd, prop, &boolval); + if (err && err != ENOENT) + break; + + src = ZPROP_SRC_LOCAL; + if (boolval == vdev_prop_default_numeric(prop)) + src = ZPROP_SRC_DEFAULT; + + vdev_prop_add_list(outnvl, propname, NULL, + boolval, src); + break; case VDEV_PROP_CHECKSUM_N: case VDEV_PROP_CHECKSUM_T: case VDEV_PROP_IO_N: diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect_births.c b/sys/contrib/openzfs/module/zfs/vdev_indirect_births.c index c0127829c26c..ab7069f44b37 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_indirect_births.c +++ b/sys/contrib/openzfs/module/zfs/vdev_indirect_births.c @@ -147,7 +147,7 @@ vdev_indirect_births_add_entry(vdev_indirect_births_t *vib, old_size = vdev_indirect_births_size_impl(vib); dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe), - &vibe, tx); + &vibe, tx, DMU_READ_NO_PREFETCH); vib->vib_phys->vib_count++; new_size = vdev_indirect_births_size_impl(vib); diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect_mapping.c b/sys/contrib/openzfs/module/zfs/vdev_indirect_mapping.c index 1515ddc1baa2..da90a8de016f 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_indirect_mapping.c +++ b/sys/contrib/openzfs/module/zfs/vdev_indirect_mapping.c @@ -459,13 +459,14 @@ vdev_indirect_mapping_add_entries(vdev_indirect_mapping_t *vim, dmu_write(vim->vim_objset, vim->vim_object, vim->vim_phys->vimp_num_entries * sizeof (*mapbuf), i * sizeof (*mapbuf), - mapbuf, tx); + mapbuf, tx, DMU_READ_NO_PREFETCH); if (vim->vim_havecounts) { dmu_write(vim->vim_objset, vim->vim_phys->vimp_counts_object, vim->vim_phys->vimp_num_entries * sizeof (*countbuf), - i * sizeof (*countbuf), countbuf, tx); + i * sizeof (*countbuf), countbuf, tx, + DMU_READ_NO_PREFETCH); } vim->vim_phys->vimp_num_entries += i; } diff --git a/sys/contrib/openzfs/module/zfs/vdev_initialize.c b/sys/contrib/openzfs/module/zfs/vdev_initialize.c index 27188c46e561..d13da1e5a663 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_initialize.c +++ b/sys/contrib/openzfs/module/zfs/vdev_initialize.c @@ -685,7 +685,7 @@ vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list) (void) spa; vdev_t *vd; - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_export_thread == curthread); while ((vd = list_remove_head(vd_list)) != NULL) { @@ -728,7 +728,7 @@ vdev_initialize_stop(vdev_t *vd, vdev_initializing_state_t tgt_state, if (vd_list == NULL) { vdev_initialize_stop_wait_impl(vd); } else { - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || vd->vdev_spa->spa_export_thread == curthread); list_insert_tail(vd_list, vd); } @@ -761,7 +761,7 @@ vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state) spa_t *spa = vd->vdev_spa; list_t vd_list; - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_export_thread == curthread); list_create(&vd_list, sizeof (vdev_t), @@ -781,7 +781,7 @@ vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state) void vdev_initialize_restart(vdev_t *vd) { - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || vd->vdev_spa->spa_load_thread == curthread); ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER)); diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c index 0d4fdaa77ba0..7e222eac5edc 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_label.c +++ b/sys/contrib/openzfs/module/zfs/vdev_label.c @@ -862,8 +862,8 @@ retry: } } - if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) { - flags |= ZIO_FLAG_TRYHARD; + if (config == NULL && !(flags & ZIO_FLAG_IO_RETRY)) { + flags |= ZIO_FLAG_IO_RETRY; goto retry; } @@ -1079,7 +1079,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) size_t buflen; int error; uint64_t spare_guid = 0, l2cache_guid = 0; - int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_TRYHARD; boolean_t reason_spare = (reason == VDEV_LABEL_SPARE || (reason == VDEV_LABEL_REMOVE && vd->vdev_isspare)); boolean_t reason_l2cache = (reason == VDEV_LABEL_L2CACHE || (reason == @@ -1223,7 +1224,6 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) /* * Write everything in parallel. */ -retry: zio = zio_root(spa, NULL, NULL, flags); for (int l = 0; l < VDEV_LABELS; l++) { @@ -1248,11 +1248,6 @@ retry: error = zio_wait(zio); - if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { - flags |= ZIO_FLAG_TRYHARD; - goto retry; - } - nvlist_free(label); abd_free(bootenv); abd_free(ub_abd); @@ -1398,7 +1393,8 @@ vdev_label_write_bootenv(vdev_t *vd, nvlist_t *env) zio_t *zio; spa_t *spa = vd->vdev_spa; vdev_boot_envblock_t *bootenv; - int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_TRYHARD; int error; size_t nvsize; char *nvbuf; @@ -1466,7 +1462,6 @@ vdev_label_write_bootenv(vdev_t *vd, nvlist_t *env) return (SET_ERROR(error)); } -retry: zio = zio_root(spa, NULL, NULL, flags); for (int l = 0; l < VDEV_LABELS; l++) { vdev_label_write(zio, vd, l, abd, @@ -1475,10 +1470,6 @@ retry: } error = zio_wait(zio); - if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { - flags |= ZIO_FLAG_TRYHARD; - goto retry; - } abd_free(abd); return (error); @@ -2056,13 +2047,13 @@ retry: * Normally, we don't want to try too hard to write every label and * uberblock. If there is a flaky disk, we don't want the rest of the * sync process to block while we retry. But if we can't write a - * single label out, we should retry with ZIO_FLAG_TRYHARD before + * single label out, we should retry with ZIO_FLAG_IO_RETRY before * bailing out and declaring the pool faulted. */ if (error != 0) { - if ((flags & ZIO_FLAG_TRYHARD) != 0) + if ((flags & ZIO_FLAG_IO_RETRY) != 0) return (error); - flags |= ZIO_FLAG_TRYHARD; + flags |= ZIO_FLAG_IO_RETRY; } ASSERT(ub->ub_txg <= txg); @@ -2113,7 +2104,7 @@ retry: * are committed to stable storage before the uberblock update. */ if ((error = vdev_label_sync_list(spa, 0, txg, flags)) != 0) { - if ((flags & ZIO_FLAG_TRYHARD) != 0) { + if ((flags & ZIO_FLAG_IO_RETRY) != 0) { zfs_dbgmsg("vdev_label_sync_list() returned error %d " "for pool '%s' when syncing out the even labels " "of dirty vdevs", error, spa_name(spa)); @@ -2137,7 +2128,7 @@ retry: * to the new uberblocks. */ if ((error = vdev_uberblock_sync_list(svd, svdcount, ub, flags)) != 0) { - if ((flags & ZIO_FLAG_TRYHARD) != 0) { + if ((flags & ZIO_FLAG_IO_RETRY) != 0) { zfs_dbgmsg("vdev_uberblock_sync_list() returned error " "%d for pool '%s'", error, spa_name(spa)); } @@ -2158,7 +2149,7 @@ retry: * stable storage before the next transaction group begins. */ if ((error = vdev_label_sync_list(spa, 1, txg, flags)) != 0) { - if ((flags & ZIO_FLAG_TRYHARD) != 0) { + if ((flags & ZIO_FLAG_IO_RETRY) != 0) { zfs_dbgmsg("vdev_label_sync_list() returned error %d " "for pool '%s' when syncing out the odd labels of " "dirty vdevs", error, spa_name(spa)); diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c index 56b8e3b60b22..5fe70ec2b1d5 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c @@ -4872,7 +4872,7 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr) else vre->vre_offset = RRSS_GET_OFFSET(&spa->spa_ubsync); - /* Reflow the begining portion using the scratch area */ + /* Reflow the beginning portion using the scratch area */ if (vre->vre_offset == 0) { VERIFY0(dsl_sync_task(spa_name(spa), NULL, raidz_reflow_scratch_sync, diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c index 47b3b9921abe..30be1f851eb3 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c +++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c @@ -1079,7 +1079,7 @@ vdev_rebuild_restart_impl(vdev_t *vd) void vdev_rebuild_restart(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_load_thread == curthread); vdev_rebuild_restart_impl(spa->spa_root_vdev); @@ -1094,7 +1094,7 @@ vdev_rebuild_stop_wait(vdev_t *vd) { spa_t *spa = vd->vdev_spa; - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_export_thread == curthread); if (vd == spa->spa_root_vdev) { diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c index abb71543e3ab..81e6ecb68ff1 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_removal.c +++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c @@ -309,12 +309,12 @@ spa_vdev_noalloc(spa_t *spa, uint64_t guid) uint64_t txg; int error = 0; - ASSERT(!MUTEX_HELD(&spa_namespace_lock)); + ASSERT(!spa_namespace_held()); ASSERT(spa_writeable(spa)); txg = spa_vdev_enter(spa); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); vd = spa_lookup_by_guid(spa, guid, B_FALSE); @@ -342,12 +342,12 @@ spa_vdev_alloc(spa_t *spa, uint64_t guid) uint64_t txg; int error = 0; - ASSERT(!MUTEX_HELD(&spa_namespace_lock)); + ASSERT(!spa_namespace_held()); ASSERT(spa_writeable(spa)); txg = spa_vdev_enter(spa); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); vd = spa_lookup_by_guid(spa, guid, B_FALSE); @@ -2085,7 +2085,7 @@ vdev_remove_make_hole_and_free(vdev_t *vd) spa_t *spa = vd->vdev_spa; vdev_t *rvd = spa->spa_root_vdev; - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); vdev_free(vd); @@ -2113,7 +2113,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg) ASSERT(vd->vdev_islog); ASSERT(vd == vd->vdev_top); ASSERT0P(vd->vdev_log_mg); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); /* * Stop allocating from this vdev. @@ -2140,7 +2140,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg) * spa_namespace_lock held. Once this completes the device * should no longer have any blocks allocated on it. */ - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); if (vd->vdev_stat.vs_alloc != 0) error = spa_reset_logs(spa); @@ -2189,7 +2189,7 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg) sysevent_t *ev = spa_event_create(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE_DEV); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); /* The top ZAP should have been destroyed by vdev_remove_empty. */ @@ -2433,7 +2433,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) uint64_t txg = 0; uint_t nspares, nl2cache; int error = 0, error_log; - boolean_t locked = MUTEX_HELD(&spa_namespace_lock); + boolean_t locked = spa_namespace_held(); sysevent_t *ev = NULL; const char *vd_type = NULL; char *vd_path = NULL; @@ -2443,7 +2443,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) if (!locked) txg = spa_vdev_enter(spa); - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { error = (spa_has_checkpoint(spa)) ? ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c index eee18b367909..a97f6650a81c 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_trim.c +++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c @@ -1045,7 +1045,7 @@ vdev_trim_stop_wait(spa_t *spa, list_t *vd_list) (void) spa; vdev_t *vd; - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_export_thread == curthread); while ((vd = list_remove_head(vd_list)) != NULL) { @@ -1085,7 +1085,7 @@ vdev_trim_stop(vdev_t *vd, vdev_trim_state_t tgt_state, list_t *vd_list) if (vd_list == NULL) { vdev_trim_stop_wait_impl(vd); } else { - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || vd->vdev_spa->spa_export_thread == curthread); list_insert_tail(vd_list, vd); } @@ -1122,7 +1122,7 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state) list_t vd_list; vdev_t *vd_l2cache; - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_export_thread == curthread); list_create(&vd_list, sizeof (vdev_t), @@ -1156,7 +1156,7 @@ vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state) void vdev_trim_restart(vdev_t *vd) { - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || vd->vdev_spa->spa_load_thread == curthread); ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER)); @@ -1582,7 +1582,7 @@ vdev_autotrim_stop_all(spa_t *spa) void vdev_autotrim_restart(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock) || + ASSERT(spa_namespace_held() || spa->spa_load_thread == curthread); if (spa->spa_autotrim) vdev_autotrim(spa); @@ -1689,7 +1689,7 @@ vdev_trim_l2arc_thread(void *arg) void vdev_trim_l2arc(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); /* * Locate the spa's l2arc devices and kick off TRIM threads. diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c index ea4e3117a8b9..7e9e625a193e 100644 --- a/sys/contrib/openzfs/module/zfs/zap_micro.c +++ b/sys/contrib/openzfs/module/zfs/zap_micro.c @@ -625,12 +625,10 @@ zap_lockdir_impl(dnode_t *dn, dmu_buf_t *db, const void *tag, dmu_tx_t *tx, ASSERT0(db->db_offset); objset_t *os = dmu_buf_get_objset(db); uint64_t obj = db->db_object; - dmu_object_info_t doi; *zapp = NULL; - dmu_object_info_from_dnode(dn, &doi); - if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP) + if (DMU_OT_BYTESWAP(dn->dn_type) != DMU_BSWAP_ZAP) return (SET_ERROR(EINVAL)); zap_t *zap = dmu_buf_get_user(db); diff --git a/sys/contrib/openzfs/module/zfs/zfs_fm.c b/sys/contrib/openzfs/module/zfs/zfs_fm.c index 221f24e381dc..4a0d41c24eed 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_fm.c +++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c @@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop) case VDEV_PROP_IO_T: propval = vd->vdev_io_t; break; + case VDEV_PROP_SLOW_IO_EVENTS: + propval = vd->vdev_slow_io_events; + break; case VDEV_PROP_SLOW_IO_N: propval = vd->vdev_slow_io_n; break; @@ -1580,10 +1583,10 @@ zfs_ereport_zvol_post(const char *subclass, const char *name, nvlist_t *aux; char *r; - boolean_t locked = mutex_owned(&spa_namespace_lock); - if (!locked) mutex_enter(&spa_namespace_lock); + boolean_t locked = spa_namespace_held(); + if (!locked) spa_namespace_enter(FTAG); spa_t *spa = spa_lookup(name); - if (!locked) mutex_exit(&spa_namespace_lock); + if (!locked) spa_namespace_exit(FTAG); if (spa == NULL) return; diff --git a/sys/contrib/openzfs/module/zfs/zfs_fuid.c b/sys/contrib/openzfs/module/zfs/zfs_fuid.c index 2af1efe82e62..aa10741ba870 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_fuid.c +++ b/sys/contrib/openzfs/module/zfs/zfs_fuid.c @@ -28,8 +28,8 @@ #include <sys/avl.h> #include <sys/zap.h> #include <sys/nvpair.h> -#ifdef _KERNEL #include <sys/sid.h> +#ifdef _KERNEL #include <sys/zfs_vfsops.h> #include <sys/zfs_znode.h> #endif @@ -268,7 +268,7 @@ zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx) nvlist_free(nvp); zfsvfs->z_fuid_size = nvsize; dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0, - zfsvfs->z_fuid_size, packed, tx); + zfsvfs->z_fuid_size, packed, tx, DMU_READ_NO_PREFETCH); kmem_free(packed, zfsvfs->z_fuid_size); VERIFY0(dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c index 5ca7c2320c4e..1b2392aeaa85 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c +++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c @@ -212,6 +212,8 @@ #include <sys/vdev_impl.h> #include <sys/vdev_initialize.h> #include <sys/vdev_trim.h> +#include <sys/brt.h> +#include <sys/ddt.h> #include "zfs_namecheck.h" #include "zfs_prop.h" @@ -3122,12 +3124,12 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc) if (pair != NULL && strcmp(nvpair_name(pair), zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 && nvlist_next_nvpair(props, pair) == NULL) { - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); if ((spa = spa_lookup(zc->zc_name)) != NULL) { spa_configfile_set(spa, props, B_FALSE); spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE); } - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); if (spa != NULL) { nvlist_free(props); return (0); @@ -3176,14 +3178,14 @@ zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl) * get (such as altroot and cachefile), so attempt to get them * anyway. */ - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); if ((spa = spa_lookup(pool)) != NULL) { error = spa_prop_get(spa, outnvl); if (error == 0 && props != NULL) error = spa_prop_get_nvlist(spa, props, n_props, outnvl); } - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); } else { error = spa_prop_get(spa, outnvl); if (error == 0 && props != NULL) @@ -4276,13 +4278,11 @@ zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) spa_t *spa; int32_t type; - /* - * Currently, only ZPOOL_PREFETCH_DDT is supported - */ - if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0 || - type != ZPOOL_PREFETCH_DDT) { + if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0) + return (EINVAL); + + if (type != ZPOOL_PREFETCH_DDT && type != ZPOOL_PREFETCH_BRT) return (EINVAL); - } error = spa_open(poolname, &spa, FTAG); if (error != 0) @@ -4290,10 +4290,17 @@ zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) hrtime_t start_time = gethrtime(); - ddt_prefetch_all(spa); - - zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", spa->spa_name, - (u_longlong_t)NSEC2MSEC(gethrtime() - start_time)); + if (type == ZPOOL_PREFETCH_DDT) { + ddt_prefetch_all(spa); + zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", + spa->spa_name, + (u_longlong_t)NSEC2MSEC(gethrtime() - start_time)); + } else { + brt_prefetch_all(spa); + zfs_dbgmsg("pool '%s': loaded brt into ARC in %llu ms", + spa->spa_name, + (u_longlong_t)NSEC2MSEC(gethrtime() - start_time)); + } spa_close(spa, FTAG); @@ -6121,10 +6128,10 @@ zfs_ioc_clear(zfs_cmd_t *zc) /* * On zpool clear we also fix up missing slogs */ - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); spa = spa_lookup(zc->zc_name); if (spa == NULL) { - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (SET_ERROR(EIO)); } if (spa_get_log_state(spa) == SPA_LOG_MISSING) { @@ -6132,7 +6139,7 @@ zfs_ioc_clear(zfs_cmd_t *zc) spa_set_log_state(spa, SPA_LOG_CLEAR); } spa->spa_last_open_failed = 0; - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); if (zc->zc_cookie & ZPOOL_NO_REWIND) { error = spa_open(zc->zc_name, &spa, FTAG); diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index aeea58bedfe4..74373f759cec 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -3318,8 +3318,8 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) } else if (any_failed && candidate > SPA_OLD_GANGBLOCKSIZE && spa_feature_is_enabled(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER) && !spa_feature_is_active(spa, SPA_FEATURE_DYNAMIC_GANG_HEADER)) { - dmu_tx_t *tx = - dmu_tx_create_assigned(spa->spa_dsl_pool, txg + 1); + dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool, + MAX(txg, spa_syncing_txg(spa) + 1)); dsl_sync_task_nowait(spa->spa_dsl_pool, zio_update_feature, (void *)SPA_FEATURE_DYNAMIC_GANG_HEADER, tx); @@ -5569,9 +5569,12 @@ zio_done(zio_t *zio) zio->io_vd->vdev_stat.vs_slow_ios++; mutex_exit(&zio->io_vd->vdev_stat_lock); - (void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY, - zio->io_spa, zio->io_vd, &zio->io_bookmark, - zio, 0); + if (zio->io_vd->vdev_slow_io_events) { + (void) zfs_ereport_post( + FM_EREPORT_ZFS_DELAY, + zio->io_spa, zio->io_vd, + &zio->io_bookmark, zio, 0); + } } } } diff --git a/sys/contrib/openzfs/module/zfs/zio_inject.c b/sys/contrib/openzfs/module/zfs/zio_inject.c index 287577018ed1..c3adfdab54ce 100644 --- a/sys/contrib/openzfs/module/zfs/zio_inject.c +++ b/sys/contrib/openzfs/module/zfs/zio_inject.c @@ -1008,9 +1008,9 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) if (zio_pool_handler_exists(name, record->zi_cmd)) return (SET_ERROR(EEXIST)); - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); boolean_t has_spa = spa_lookup(name) != NULL; - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); if (record->zi_cmd == ZINJECT_DELAY_IMPORT && has_spa) return (SET_ERROR(EEXIST)); @@ -1095,7 +1095,7 @@ zio_inject_list_next(int *id, char *name, size_t buflen, inject_handler_t *handler; int ret; - mutex_enter(&spa_namespace_lock); + spa_namespace_enter(FTAG); rw_enter(&inject_lock, RW_READER); for (handler = list_head(&inject_handlers); handler != NULL; @@ -1117,7 +1117,7 @@ zio_inject_list_next(int *id, char *name, size_t buflen, } rw_exit(&inject_lock); - mutex_exit(&spa_namespace_lock); + spa_namespace_exit(FTAG); return (ret); } diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index 00f98168d3d8..407758641580 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -547,7 +547,8 @@ zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap) if (error) { dmu_tx_abort(tx); } else { - dmu_write(os, ZVOL_OBJ, offset, length, data, tx); + dmu_write(os, ZVOL_OBJ, offset, length, data, tx, + DMU_READ_PREFETCH); (void) zil_replaying(zv->zv_zilog, tx); dmu_tx_commit(tx); } @@ -1232,7 +1233,7 @@ zvol_first_open(zvol_state_t *zv, boolean_t readonly) ASSERT(RW_READ_HELD(&zv->zv_suspend_lock)); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); - ASSERT(mutex_owned(&spa_namespace_lock)); + ASSERT(spa_namespace_held()); boolean_t ro = (readonly || (strchr(zv->zv_name, '@') != NULL)); error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, ro, B_TRUE, zv, &os); @@ -1302,7 +1303,7 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg) list_t *minors_list = j->list; const char *name = j->name; - ASSERT0(MUTEX_HELD(&spa_namespace_lock)); + ASSERT0(spa_namespace_held()); /* skip the designated dataset */ if (name && strcmp(dsname, name) == 0) @@ -1402,7 +1403,7 @@ zvol_create_minors_cb(const char *dsname, void *arg) int error; list_t *minors_list = arg; - ASSERT0(MUTEX_HELD(&spa_namespace_lock)); + ASSERT0(spa_namespace_held()); error = dsl_prop_get_integer(dsname, "snapdev", &snapdev, NULL); if (error) diff --git a/sys/contrib/openzfs/module/zstd/include/aarch64_compat.h b/sys/contrib/openzfs/module/zstd/include/aarch64_compat.h deleted file mode 100644 index 9500a832b81c..000000000000 --- a/sys/contrib/openzfs/module/zstd/include/aarch64_compat.h +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: BSD-3-Clause -/* - * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Copyright (c) 2018-2020, Sebastian Gottschall - */ - -#ifdef _KERNEL -#undef __aarch64__ -#endif diff --git a/sys/contrib/openzfs/module/zstd/lib/common/compiler.h b/sys/contrib/openzfs/module/zstd/lib/common/compiler.h index d0f588e2ec3c..c8d65a201212 100644 --- a/sys/contrib/openzfs/module/zstd/lib/common/compiler.h +++ b/sys/contrib/openzfs/module/zstd/lib/common/compiler.h @@ -115,9 +115,6 @@ # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) -# elif defined(__aarch64__) -# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) -# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) diff --git a/sys/contrib/openzfs/module/zstd/lib/common/zstd_internal.h b/sys/contrib/openzfs/module/zstd/lib/common/zstd_internal.h index 6b1fc44cf9f6..9650af77bcea 100644 --- a/sys/contrib/openzfs/module/zstd/lib/common/zstd_internal.h +++ b/sys/contrib/openzfs/module/zstd/lib/common/zstd_internal.h @@ -12,6 +12,15 @@ #ifndef ZSTD_CCOMMON_H_MODULE #define ZSTD_CCOMMON_H_MODULE +/* + * Disable the aarch64 NEON SIMD intrinsics for kernel builds. Safely + * using them in the kernel context requires saving/restoring the FPU + * registers which is not currently done. + */ +#ifdef _KERNEL +#define ZSTD_NO_INTRINSICS +#endif + /* this module contains definitions which must be identical * across compression, decompression and dictBuilder. * It also contains a few functions useful to at least 2 of them |
