aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2021-07-23 00:50:13 +0000
committerMartin Matuska <mm@FreeBSD.org>2021-07-23 00:50:13 +0000
commit3f9d360c82e0724bfb61346038236bf15c5d4d84 (patch)
treef21ca33e877b6bbba3f57f9b32a054f1793b841a /sys/contrib/openzfs/module
parent95f0da5be1e3456c930f5f9538cbc099c65f2014 (diff)
parent14b43fbd9c13d802409ed886bb6b66fd528fb209 (diff)
downloadsrc-3f9d360c82e0724bfb61346038236bf15c5d4d84.tar.gz
src-3f9d360c82e0724bfb61346038236bf15c5d4d84.zip
zfs: merge openzfs/zfs@14b43fbd9 (master) into main
Notable upstream pull request merges: #12271 Tinker with slop space accounting with dedup #12279 Fix ARC ghost states eviction accounting #12284 Add Module Parameter Regarding Log Size Limit #12300 Introduce dsl_dir_diduse_transfer_space() #12314 Optimize allocation throttling #12348 Minor ARC optimizations #12350 Detect HAVE_LARGE_STACKS at compile time #12356 Use SET_ERROR for more errors in FreeBSD vnops #12375 FreeBSD: Ignore make_dev_s() errors #12378 FreeBSD: Switch from MAXPHYS to maxphys on FreeBSD 13+ Obtained from: OpenZFS OpenZFS commit: 14b43fbd9c13d802409ed886bb6b66fd528fb209
Diffstat (limited to 'sys/contrib/openzfs/module')
-rw-r--r--sys/contrib/openzfs/module/nvpair/nvpair.c64
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c2
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c4
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c45
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c31
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c8
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c19
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c207
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_redact.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_tx.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dataset.c10
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_dir.c112
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_pool.c57
-rw-r--r--sys/contrib/openzfs/module/zfs/metaslab.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c12
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_misc.c39
-rw-r--r--sys/contrib/openzfs/module/zfs/zcp_synctask.c15
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_log.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/zil.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c51
-rw-r--r--sys/contrib/openzfs/module/zfs/zvol.c22
22 files changed, 486 insertions, 253 deletions
diff --git a/sys/contrib/openzfs/module/nvpair/nvpair.c b/sys/contrib/openzfs/module/nvpair/nvpair.c
index 990a4482c993..5f427c8cf2e7 100644
--- a/sys/contrib/openzfs/module/nvpair/nvpair.c
+++ b/sys/contrib/openzfs/module/nvpair/nvpair.c
@@ -3214,6 +3214,56 @@ nvs_xdr_nvl_fini(nvstream_t *nvs)
}
/*
+ * xdrproc_t-compatible callbacks for xdr_array()
+ */
+
+#if defined(_KERNEL) && defined(__linux__) /* Linux kernel */
+
+#define NVS_BUILD_XDRPROC_T(type) \
+static bool_t \
+nvs_xdr_nvp_##type(XDR *xdrs, void *ptr) \
+{ \
+ return (xdr_##type(xdrs, ptr)); \
+}
+
+#elif !defined(_KERNEL) && defined(XDR_CONTROL) /* tirpc */
+
+#define NVS_BUILD_XDRPROC_T(type) \
+static bool_t \
+nvs_xdr_nvp_##type(XDR *xdrs, ...) \
+{ \
+ va_list args; \
+ void *ptr; \
+ \
+ va_start(args, xdrs); \
+ ptr = va_arg(args, void *); \
+ va_end(args); \
+ \
+ return (xdr_##type(xdrs, ptr)); \
+}
+
+#else /* FreeBSD, sunrpc */
+
+#define NVS_BUILD_XDRPROC_T(type) \
+static bool_t \
+nvs_xdr_nvp_##type(XDR *xdrs, void *ptr, ...) \
+{ \
+ return (xdr_##type(xdrs, ptr)); \
+}
+
+#endif
+
+/* BEGIN CSTYLED */
+NVS_BUILD_XDRPROC_T(char);
+NVS_BUILD_XDRPROC_T(short);
+NVS_BUILD_XDRPROC_T(u_short);
+NVS_BUILD_XDRPROC_T(int);
+NVS_BUILD_XDRPROC_T(u_int);
+NVS_BUILD_XDRPROC_T(longlong_t);
+NVS_BUILD_XDRPROC_T(u_longlong_t);
+/* END CSTYLED */
+
+/*
* The format of xdr encoded nvpair is:
* encode_size, decode_size, name string, data type, nelem, data
*/
@@ -3335,38 +3385,38 @@ nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
case DATA_TYPE_INT8_ARRAY:
case DATA_TYPE_UINT8_ARRAY:
ret = xdr_array(xdr, &buf, &nelem, buflen, sizeof (int8_t),
- (xdrproc_t)xdr_char);
+ nvs_xdr_nvp_char);
break;
case DATA_TYPE_INT16_ARRAY:
ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int16_t),
- sizeof (int16_t), (xdrproc_t)xdr_short);
+ sizeof (int16_t), nvs_xdr_nvp_short);
break;
case DATA_TYPE_UINT16_ARRAY:
ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint16_t),
- sizeof (uint16_t), (xdrproc_t)xdr_u_short);
+ sizeof (uint16_t), nvs_xdr_nvp_u_short);
break;
case DATA_TYPE_BOOLEAN_ARRAY:
case DATA_TYPE_INT32_ARRAY:
ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int32_t),
- sizeof (int32_t), (xdrproc_t)xdr_int);
+ sizeof (int32_t), nvs_xdr_nvp_int);
break;
case DATA_TYPE_UINT32_ARRAY:
ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint32_t),
- sizeof (uint32_t), (xdrproc_t)xdr_u_int);
+ sizeof (uint32_t), nvs_xdr_nvp_u_int);
break;
case DATA_TYPE_INT64_ARRAY:
ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int64_t),
- sizeof (int64_t), (xdrproc_t)xdr_longlong_t);
+ sizeof (int64_t), nvs_xdr_nvp_longlong_t);
break;
case DATA_TYPE_UINT64_ARRAY:
ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint64_t),
- sizeof (uint64_t), (xdrproc_t)xdr_u_longlong_t);
+ sizeof (uint64_t), nvs_xdr_nvp_u_longlong_t);
break;
case DATA_TYPE_STRING_ARRAY: {
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
index 05377bb7ed98..3b8b11cff0c2 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
@@ -234,8 +234,6 @@ arc_lowmem(void *arg __unused, int howto __unused)
*/
if (curproc == pageproc)
arc_wait_for_eviction(to_free);
- else
- arc_wait_for_eviction(0);
}
void
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
index b1407e4bd61d..6ac37da1c58a 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
@@ -381,7 +381,11 @@ vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets,
int i, n_bios, j;
size_t bios_size;
+#if __FreeBSD_version > 1300130
maxio = maxphys - (maxphys % cp->provider->sectorsize);
+#else
+ maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
+#endif
n_bios = 0;
/* How many bios are required for all commands ? */
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index 46a632b0385c..846b4b60531f 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -5343,7 +5343,7 @@ zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
vp = nd.ni_vp;
NDFREE(&nd, NDF_ONLY_PNBUF);
if (error != 0)
- return (error);
+ return (SET_ERROR(error));
if (ap->a_size != NULL) {
error = VOP_GETATTR(vp, &va, ap->a_cred);
@@ -5374,15 +5374,17 @@ zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
&nv_value, &nv_size);
- if (error)
- return (error);
+ if (error != 0)
+ return (SET_ERROR(error));
if (ap->a_size != NULL)
*ap->a_size = nv_size;
else if (ap->a_uio != NULL)
error = uiomove(nv_value, nv_size, ap->a_uio);
+ if (error != 0)
+ return (SET_ERROR(error));
- return (error);
+ return (0);
}
/*
@@ -5405,7 +5407,7 @@ zfs_getextattr(struct vop_getextattr_args *ap)
error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
ap->a_cred, ap->a_td, VREAD);
if (error != 0)
- return (error);
+ return (SET_ERROR(error));
error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
sizeof (attrname));
@@ -5456,7 +5458,7 @@ zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
vp = nd.ni_vp;
if (error != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
- return (error);
+ return (SET_ERROR(error));
}
error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
@@ -5487,7 +5489,9 @@ zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
nvl = zp->z_xattr_cached;
error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
- if (error == 0)
+ if (error != 0)
+ error = SET_ERROR(error);
+ else
error = zfs_sa_set_xattr(zp);
if (error != 0) {
zp->z_xattr_cached = NULL;
@@ -5516,7 +5520,7 @@ zfs_deleteextattr(struct vop_deleteextattr_args *ap)
error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
ap->a_cred, ap->a_td, VWRITE);
if (error != 0)
- return (error);
+ return (SET_ERROR(error));
error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
sizeof (attrname));
@@ -5583,7 +5587,7 @@ zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
vp = nd.ni_vp;
NDFREE(&nd, NDF_ONLY_PNBUF);
if (error != 0)
- return (error);
+ return (SET_ERROR(error));
VATTR_NULL(&va);
va.va_size = 0;
@@ -5617,13 +5621,18 @@ zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
return (SET_ERROR(EFBIG));
error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
if (error != 0)
- return (error);
+ return (SET_ERROR(error));
if (sa_size > DXATTR_MAX_SA_SIZE)
return (SET_ERROR(EFBIG));
uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
error = uiomove(buf, entry_size, ap->a_uio);
- if (error == 0)
+ if (error != 0) {
+ error = SET_ERROR(error);
+ } else {
error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
+ if (error != 0)
+ error = SET_ERROR(error);
+ }
kmem_free(buf, entry_size);
if (error == 0)
error = zfs_sa_set_xattr(zp);
@@ -5654,7 +5663,7 @@ zfs_setextattr(struct vop_setextattr_args *ap)
error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
ap->a_cred, ap->a_td, VWRITE);
if (error != 0)
- return (error);
+ return (SET_ERROR(error));
error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
sizeof (attrname));
@@ -5733,7 +5742,7 @@ zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
vp = nd.ni_vp;
NDFREE(&nd, NDF_ONLY_PNBUF);
if (error != 0)
- return (error);
+ return (SET_ERROR(error));
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
@@ -5779,8 +5788,10 @@ zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
char *namep = dp->d_name + plen;
error = uiomove(namep, nlen, ap->a_uio);
}
- if (error != 0)
+ if (error != 0) {
+ error = SET_ERROR(error);
break;
+ }
}
}
} while (!eof && error == 0);
@@ -5825,8 +5836,10 @@ zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
char *namep = __DECONST(char *, name) + plen;
error = uiomove(namep, nlen, ap->a_uio);
}
- if (error != 0)
+ if (error != 0) {
+ error = SET_ERROR(error);
break;
+ }
}
}
@@ -5856,7 +5869,7 @@ zfs_listextattr(struct vop_listextattr_args *ap)
error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
ap->a_cred, ap->a_td, VREAD);
if (error != 0)
- return (error);
+ return (SET_ERROR(error));
error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
sizeof (attrprefix));
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
index aecb9f4c7d87..450369192569 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
@@ -1241,7 +1241,11 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname)
args.mda_si_drv2 = zv;
if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, newname)
== 0) {
+#if __FreeBSD_version > 1300130
dev->si_iosize_max = maxphys;
+#else
+ dev->si_iosize_max = MAXPHYS;
+#endif
zsd->zsd_cdev = dev;
}
}
@@ -1277,9 +1281,10 @@ zvol_free(zvol_state_t *zv)
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
struct cdev *dev = zsd->zsd_cdev;
- ASSERT3P(dev->si_drv2, ==, NULL);
-
- destroy_dev(dev);
+ if (dev != NULL) {
+ ASSERT3P(dev->si_drv2, ==, NULL);
+ destroy_dev(dev);
+ }
}
mutex_destroy(&zv->zv_state_lock);
@@ -1374,16 +1379,15 @@ zvol_create_minor_impl(const char *name)
args.mda_gid = GID_OPERATOR;
args.mda_mode = 0640;
args.mda_si_drv2 = zv;
- error = make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, name);
- if (error) {
- kmem_free(zv->zv_zso, sizeof (struct zvol_state_os));
- mutex_destroy(&zv->zv_state_lock);
- kmem_free(zv, sizeof (*zv));
- dmu_objset_disown(os, B_TRUE, FTAG);
- goto out_doi;
+ if (make_dev_s(&args, &dev, "%s/%s", ZVOL_DRIVER, name)
+ == 0) {
+#if __FreeBSD_version > 1300130
+ dev->si_iosize_max = maxphys;
+#else
+ dev->si_iosize_max = MAXPHYS;
+#endif
+ zsd->zsd_cdev = dev;
}
- dev->si_iosize_max = maxphys;
- zsd->zsd_cdev = dev;
}
(void) strlcpy(zv->zv_name, name, MAXPATHLEN);
rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
@@ -1456,7 +1460,8 @@ zvol_clear_private(zvol_state_t *zv)
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
struct cdev *dev = zsd->zsd_cdev;
- dev->si_drv2 = NULL;
+ if (dev != NULL)
+ dev->si_drv2 = NULL;
}
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index 24c016c5fcf1..e0dc6ed95747 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -367,6 +367,12 @@ zfs_write_simple(znode_t *zp, const void *data, size_t len,
return (error);
}
+static void
+zfs_rele_async_task(void *arg)
+{
+ iput(arg);
+}
+
void
zfs_zrele_async(znode_t *zp)
{
@@ -386,7 +392,7 @@ zfs_zrele_async(znode_t *zp)
*/
if (!atomic_add_unless(&ip->i_count, -1, 1)) {
VERIFY(taskq_dispatch(dsl_pool_zrele_taskq(dmu_objset_pool(os)),
- (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID);
+ zfs_rele_async_task, ip, TQ_SLEEP) != TASKQID_INVALID);
}
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
index 524c43dcded4..0319148b983d 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
@@ -591,8 +591,8 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
* only used to support mmap(2). There will be an identical copy of the
* data in the ARC which is kept up to date via .write() and .writepage().
*/
-static int
-zpl_readpage(struct file *filp, struct page *pp)
+static inline int
+zpl_readpage_common(struct page *pp)
{
struct inode *ip;
struct page *pl[1];
@@ -620,6 +620,18 @@ zpl_readpage(struct file *filp, struct page *pp)
return (error);
}
+static int
+zpl_readpage(struct file *filp, struct page *pp)
+{
+ return (zpl_readpage_common(pp));
+}
+
+static int
+zpl_readpage_filler(void *data, struct page *pp)
+{
+ return (zpl_readpage_common(pp));
+}
+
/*
* Populate a set of pages with data for the Linux page cache. This
* function will only be called for read ahead and never for demand
@@ -630,8 +642,7 @@ static int
zpl_readpages(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- return (read_cache_pages(mapping, pages,
- (filler_t *)zpl_readpage, filp));
+ return (read_cache_pages(mapping, pages, zpl_readpage_filler, NULL));
}
static int
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 394ca1bfe42d..02663e8e2e5d 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -648,13 +648,6 @@ arc_sums_t arc_sums;
} while (0)
kstat_t *arc_ksp;
-static arc_state_t *arc_anon;
-static arc_state_t *arc_mru_ghost;
-static arc_state_t *arc_mfu_ghost;
-static arc_state_t *arc_l2c_only;
-
-arc_state_t *arc_mru;
-arc_state_t *arc_mfu;
/*
* There are several ARC variables that are critical to export as kstats --
@@ -826,6 +819,12 @@ typedef enum arc_fill_flags {
ARC_FILL_IN_PLACE = 1 << 4 /* fill in place (special case) */
} arc_fill_flags_t;
+typedef enum arc_ovf_level {
+ ARC_OVF_NONE, /* ARC within target size. */
+ ARC_OVF_SOME, /* ARC is slightly overflowed. */
+ ARC_OVF_SEVERE /* ARC is severely overflowed. */
+} arc_ovf_level_t;
+
static kmutex_t l2arc_feed_thr_lock;
static kcondvar_t l2arc_feed_thr_cv;
static uint8_t l2arc_thread_exit;
@@ -2197,7 +2196,6 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
return;
}
- ASSERT(!GHOST_STATE(state));
if (hdr->b_l1hdr.b_pabd != NULL) {
(void) zfs_refcount_add_many(&state->arcs_esize[type],
arc_hdr_size(hdr), hdr);
@@ -2238,7 +2236,6 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
return;
}
- ASSERT(!GHOST_STATE(state));
if (hdr->b_l1hdr.b_pabd != NULL) {
(void) zfs_refcount_remove_many(&state->arcs_esize[type],
arc_hdr_size(hdr), hdr);
@@ -3861,9 +3858,18 @@ arc_buf_destroy(arc_buf_t *buf, void* tag)
* - arc_mru_ghost -> deleted
* - arc_mfu_ghost -> arc_l2c_only
* - arc_mfu_ghost -> deleted
+ *
+ * Return total size of evicted data buffers for eviction progress tracking.
+ * When evicting from ghost states return logical buffer size to make eviction
+ * progress at the same (or at least comparable) rate as from non-ghost states.
+ *
+ * Return *real_evicted for actual ARC size reduction to wake up threads
+ * waiting for it. For non-ghost states it includes size of evicted data
+ * buffers (the headers are not freed there). For ghost states it includes
+ * only the evicted headers size.
*/
static int64_t
-arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
+arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, uint64_t *real_evicted)
{
arc_state_t *evicted_state, *state;
int64_t bytes_evicted = 0;
@@ -3873,6 +3879,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
ASSERT(MUTEX_HELD(hash_lock));
ASSERT(HDR_HAS_L1HDR(hdr));
+ *real_evicted = 0;
state = hdr->b_l1hdr.b_state;
if (GHOST_STATE(state)) {
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
@@ -3909,9 +3916,11 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
*/
hdr = arc_hdr_realloc(hdr, hdr_full_cache,
hdr_l2only_cache);
+ *real_evicted += HDR_FULL_SIZE - HDR_L2ONLY_SIZE;
} else {
arc_change_state(arc_anon, hdr, hash_lock);
arc_hdr_destroy(hdr);
+ *real_evicted += HDR_FULL_SIZE;
}
return (bytes_evicted);
}
@@ -3935,8 +3944,10 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
ARCSTAT_BUMP(arcstat_mutex_miss);
break;
}
- if (buf->b_data != NULL)
+ if (buf->b_data != NULL) {
bytes_evicted += HDR_GET_LSIZE(hdr);
+ *real_evicted += HDR_GET_LSIZE(hdr);
+ }
mutex_exit(&buf->b_evict_lock);
arc_buf_destroy_impl(buf);
}
@@ -3972,6 +3983,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
arc_cksum_free(hdr);
bytes_evicted += arc_hdr_size(hdr);
+ *real_evicted += arc_hdr_size(hdr);
/*
* If this hdr is being evicted and has a compressed
@@ -4010,23 +4022,21 @@ arc_set_need_free(void)
static uint64_t
arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
- uint64_t spa, int64_t bytes)
+ uint64_t spa, uint64_t bytes)
{
multilist_sublist_t *mls;
- uint64_t bytes_evicted = 0;
+ uint64_t bytes_evicted = 0, real_evicted = 0;
arc_buf_hdr_t *hdr;
kmutex_t *hash_lock;
- int evict_count = 0;
+ int evict_count = zfs_arc_evict_batch_limit;
ASSERT3P(marker, !=, NULL);
- IMPLY(bytes < 0, bytes == ARC_EVICT_ALL);
mls = multilist_sublist_lock(ml, idx);
- for (hdr = multilist_sublist_prev(mls, marker); hdr != NULL;
+ for (hdr = multilist_sublist_prev(mls, marker); likely(hdr != NULL);
hdr = multilist_sublist_prev(mls, marker)) {
- if ((bytes != ARC_EVICT_ALL && bytes_evicted >= bytes) ||
- (evict_count >= zfs_arc_evict_batch_limit))
+ if ((evict_count <= 0) || (bytes_evicted >= bytes))
break;
/*
@@ -4074,10 +4084,13 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
ASSERT(!MUTEX_HELD(hash_lock));
if (mutex_tryenter(hash_lock)) {
- uint64_t evicted = arc_evict_hdr(hdr, hash_lock);
+ uint64_t revicted;
+ uint64_t evicted = arc_evict_hdr(hdr, hash_lock,
+ &revicted);
mutex_exit(hash_lock);
bytes_evicted += evicted;
+ real_evicted += revicted;
/*
* If evicted is zero, arc_evict_hdr() must have
@@ -4085,7 +4098,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
* evict_count in this case.
*/
if (evicted != 0)
- evict_count++;
+ evict_count--;
} else {
ARCSTAT_BUMP(arcstat_mutex_miss);
@@ -4107,7 +4120,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
* 1/64th of RAM). See the comments in arc_wait_for_eviction().
*/
mutex_enter(&arc_evict_lock);
- arc_evict_count += bytes_evicted;
+ arc_evict_count += real_evicted;
if (arc_free_memory() > arc_sys_free / 2) {
arc_evict_waiter_t *aw;
@@ -4146,7 +4159,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
* the given arc state; which is used by arc_flush().
*/
static uint64_t
-arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
+arc_evict_state(arc_state_t *state, uint64_t spa, uint64_t bytes,
arc_buf_contents_t type)
{
uint64_t total_evicted = 0;
@@ -4154,8 +4167,6 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
int num_sublists;
arc_buf_hdr_t **markers;
- IMPLY(bytes < 0, bytes == ARC_EVICT_ALL);
-
num_sublists = multilist_get_num_sublists(ml);
/*
@@ -4187,7 +4198,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
* While we haven't hit our target number of bytes to evict, or
* we're evicting all available buffers.
*/
- while (total_evicted < bytes || bytes == ARC_EVICT_ALL) {
+ while (total_evicted < bytes) {
int sublist_idx = multilist_get_random_index(ml);
uint64_t scan_evicted = 0;
@@ -4215,9 +4226,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
uint64_t bytes_remaining;
uint64_t bytes_evicted;
- if (bytes == ARC_EVICT_ALL)
- bytes_remaining = ARC_EVICT_ALL;
- else if (total_evicted < bytes)
+ if (total_evicted < bytes)
bytes_remaining = bytes - total_evicted;
else
break;
@@ -4312,7 +4321,7 @@ static uint64_t
arc_evict_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
arc_buf_contents_t type)
{
- int64_t delta;
+ uint64_t delta;
if (bytes > 0 && zfs_refcount_count(&state->arcs_esize[type]) > 0) {
delta = MIN(zfs_refcount_count(&state->arcs_esize[type]),
@@ -5121,7 +5130,7 @@ arc_adapt(int bytes, arc_state_t *state)
* Check if arc_size has grown past our upper threshold, determined by
* zfs_arc_overflow_shift.
*/
-boolean_t
+static arc_ovf_level_t
arc_is_overflowing(void)
{
/* Always allow at least one block of overflow */
@@ -5137,8 +5146,10 @@ arc_is_overflowing(void)
* in the ARC. In practice, that's in the tens of MB, which is low
* enough to be safe.
*/
- return (aggsum_lower_bound(&arc_sums.arcstat_size) >=
- (int64_t)arc_c + overflow);
+ int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) -
+ arc_c - overflow / 2;
+ return (over < 0 ? ARC_OVF_NONE :
+ over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE);
}
static abd_t *
@@ -5180,58 +5191,73 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
void
arc_wait_for_eviction(uint64_t amount)
{
- mutex_enter(&arc_evict_lock);
- if (arc_is_overflowing()) {
- arc_evict_needed = B_TRUE;
- zthr_wakeup(arc_evict_zthr);
-
- if (amount != 0) {
- arc_evict_waiter_t aw;
- list_link_init(&aw.aew_node);
- cv_init(&aw.aew_cv, NULL, CV_DEFAULT, NULL);
+ switch (arc_is_overflowing()) {
+ case ARC_OVF_NONE:
+ return;
+ case ARC_OVF_SOME:
+ /*
+ * This is a bit racy without taking arc_evict_lock, but the
+ * worst that can happen is we either call zthr_wakeup() extra
+ * time due to race with other thread here, or the set flag
+ * get cleared by arc_evict_cb(), which is unlikely due to
+ * big hysteresis, but also not important since at this level
+ * of overflow the eviction is purely advisory. Same time
+ * taking the global lock here every time without waiting for
+ * the actual eviction creates a significant lock contention.
+ */
+ if (!arc_evict_needed) {
+ arc_evict_needed = B_TRUE;
+ zthr_wakeup(arc_evict_zthr);
+ }
+ return;
+ case ARC_OVF_SEVERE:
+ default:
+ {
+ arc_evict_waiter_t aw;
+ list_link_init(&aw.aew_node);
+ cv_init(&aw.aew_cv, NULL, CV_DEFAULT, NULL);
- uint64_t last_count = 0;
- if (!list_is_empty(&arc_evict_waiters)) {
- arc_evict_waiter_t *last =
- list_tail(&arc_evict_waiters);
- last_count = last->aew_count;
- }
- /*
- * Note, the last waiter's count may be less than
- * arc_evict_count if we are low on memory in which
- * case arc_evict_state_impl() may have deferred
- * wakeups (but still incremented arc_evict_count).
- */
- aw.aew_count =
- MAX(last_count, arc_evict_count) + amount;
+ uint64_t last_count = 0;
+ mutex_enter(&arc_evict_lock);
+ if (!list_is_empty(&arc_evict_waiters)) {
+ arc_evict_waiter_t *last =
+ list_tail(&arc_evict_waiters);
+ last_count = last->aew_count;
+ } else if (!arc_evict_needed) {
+ arc_evict_needed = B_TRUE;
+ zthr_wakeup(arc_evict_zthr);
+ }
+ /*
+ * Note, the last waiter's count may be less than
+ * arc_evict_count if we are low on memory in which
+ * case arc_evict_state_impl() may have deferred
+ * wakeups (but still incremented arc_evict_count).
+ */
+ aw.aew_count = MAX(last_count, arc_evict_count) + amount;
- list_insert_tail(&arc_evict_waiters, &aw);
+ list_insert_tail(&arc_evict_waiters, &aw);
- arc_set_need_free();
+ arc_set_need_free();
- DTRACE_PROBE3(arc__wait__for__eviction,
- uint64_t, amount,
- uint64_t, arc_evict_count,
- uint64_t, aw.aew_count);
+ DTRACE_PROBE3(arc__wait__for__eviction,
+ uint64_t, amount,
+ uint64_t, arc_evict_count,
+ uint64_t, aw.aew_count);
- /*
- * We will be woken up either when arc_evict_count
- * reaches aew_count, or when the ARC is no longer
- * overflowing and eviction completes.
- */
+ /*
+ * We will be woken up either when arc_evict_count reaches
+ * aew_count, or when the ARC is no longer overflowing and
+ * eviction completes.
+ * In case of "false" wakeup, we will still be on the list.
+ */
+ do {
cv_wait(&aw.aew_cv, &arc_evict_lock);
+ } while (list_link_active(&aw.aew_node));
+ mutex_exit(&arc_evict_lock);
- /*
- * In case of "false" wakeup, we will still be on the
- * list.
- */
- if (list_link_active(&aw.aew_node))
- list_remove(&arc_evict_waiters, &aw);
-
- cv_destroy(&aw.aew_cv);
- }
+ cv_destroy(&aw.aew_cv);
+ }
}
- mutex_exit(&arc_evict_lock);
}
/*
@@ -5262,16 +5288,8 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
* requested size to be evicted. This should be more than 100%, to
* ensure that that progress is also made towards getting arc_size
* under arc_c. See the comment above zfs_arc_eviction_pct.
- *
- * We do the overflowing check without holding the arc_evict_lock to
- * reduce lock contention in this hot path. Note that
- * arc_wait_for_eviction() will acquire the lock and check again to
- * ensure we are truly overflowing before blocking.
*/
- if (arc_is_overflowing()) {
- arc_wait_for_eviction(size *
- zfs_arc_eviction_pct / 100);
- }
+ arc_wait_for_eviction(size * zfs_arc_eviction_pct / 100);
VERIFY3U(hdr->b_type, ==, type);
if (type == ARC_BUFC_METADATA) {
@@ -7563,13 +7581,6 @@ arc_tuning_update(boolean_t verbose)
static void
arc_state_init(void)
{
- arc_anon = &ARC_anon;
- arc_mru = &ARC_mru;
- arc_mru_ghost = &ARC_mru_ghost;
- arc_mfu = &ARC_mfu;
- arc_mfu_ghost = &ARC_mfu_ghost;
- arc_l2c_only = &ARC_l2c_only;
-
multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
@@ -7969,6 +7980,18 @@ arc_init(void)
zfs_dirty_data_max = MIN(zfs_dirty_data_max,
zfs_dirty_data_max_max);
}
+
+ if (zfs_wrlog_data_max == 0) {
+
+ /*
+ * dp_wrlog_total is reduced for each txg at the end of
+ * spa_sync(). However, dp_dirty_total is reduced every time
+ * a block is written out. Thus under normal operation,
+ * dp_wrlog_total could grow 2 times as big as
+ * zfs_dirty_data_max.
+ */
+ zfs_wrlog_data_max = zfs_dirty_data_max * 2;
+ }
}
void
diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c
index 7b0b1d896761..479e5a3ad625 100644
--- a/sys/contrib/openzfs/module/zfs/ddt.c
+++ b/sys/contrib/openzfs/module/zfs/ddt.c
@@ -503,7 +503,7 @@ ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh)
{
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
- for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
+ for (enum ddt_type type = 0; type < DDT_TYPES && ddt; type++) {
for (enum ddt_class class = 0; class < DDT_CLASSES;
class++) {
ddt_histogram_add(ddh,
diff --git a/sys/contrib/openzfs/module/zfs/dmu_redact.c b/sys/contrib/openzfs/module/zfs/dmu_redact.c
index 62c7d01d4bd2..fdbdf7d6e868 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_redact.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_redact.c
@@ -816,6 +816,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
avl_remove(&end_tree, &redact_nodes[i]);
kmem_free(redact_nodes[i].record,
sizeof (struct redact_record));
+ bqueue_destroy(&thread_args[i].q);
}
avl_destroy(&start_tree);
@@ -1164,6 +1165,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
(void) thread_create(NULL, 0, redact_merge_thread, rmta, 0, curproc,
TS_RUN, minclsyspri);
err = perform_redaction(os, new_rl, rmta);
+ bqueue_destroy(&rmta->q);
kmem_free(rmta, sizeof (struct redact_merge_thread_arg));
out:
diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c
index 0beb983f992f..5fa516866668 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_tx.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c
@@ -53,6 +53,7 @@ dmu_tx_stats_t dmu_tx_stats = {
{ "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 },
+ { "dmu_tx_wrlog_over_max", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 },
{ "dmu_tx_quota", KSTAT_DATA_UINT64 },
};
@@ -885,6 +886,12 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
}
if (!tx->tx_dirty_delayed &&
+ dsl_pool_wrlog_over_max(tx->tx_pool)) {
+ DMU_TX_STAT_BUMP(dmu_tx_wrlog_over_max);
+ return (SET_ERROR(ERESTART));
+ }
+
+ if (!tx->tx_dirty_delayed &&
dsl_pool_need_dirty_delay(tx->tx_pool)) {
tx->tx_wait_dirty = B_TRUE;
DMU_TX_STAT_BUMP(dmu_tx_dirty_delay);
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
index 1c03216ef6d5..f99964511aa6 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -192,9 +192,8 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
}
mutex_exit(&ds->ds_lock);
- dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
- compressed, uncompressed, tx);
- dsl_dir_transfer_space(ds->ds_dir, used - delta,
+ dsl_dir_diduse_transfer_space(ds->ds_dir, delta,
+ compressed, uncompressed, used,
DD_USED_REFRSRV, DD_USED_HEAD, tx);
}
@@ -291,9 +290,8 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
delta = parent_delta(ds, -used);
dsl_dataset_phys(ds)->ds_unique_bytes -= used;
mutex_exit(&ds->ds_lock);
- dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
- delta, -compressed, -uncompressed, tx);
- dsl_dir_transfer_space(ds->ds_dir, -used - delta,
+ dsl_dir_diduse_transfer_space(ds->ds_dir,
+ delta, -compressed, -uncompressed, -used,
DD_USED_REFRSRV, DD_USED_HEAD, tx);
} else {
dprintf_bp(bp, "putting on dead list: %s", "");
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c
index df2c3d8f0637..84caace4dbab 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dir.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c
@@ -1517,6 +1517,11 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
{
int64_t accounted_delta;
+ ASSERT(dmu_tx_is_syncing(tx));
+ ASSERT(type < DD_USED_NUM);
+
+ dmu_buf_will_dirty(dd->dd_dbuf, tx);
+
/*
* dsl_dataset_set_refreservation_sync_impl() calls this with
* dd_lock held, so that it can atomically update
@@ -1525,36 +1530,28 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
* consistently.
*/
boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
-
- ASSERT(dmu_tx_is_syncing(tx));
- ASSERT(type < DD_USED_NUM);
-
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
-
if (needlock)
mutex_enter(&dd->dd_lock);
- accounted_delta =
- parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, used);
- ASSERT(used >= 0 || dsl_dir_phys(dd)->dd_used_bytes >= -used);
- ASSERT(compressed >= 0 ||
- dsl_dir_phys(dd)->dd_compressed_bytes >= -compressed);
+ dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
+ accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);
+ ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);
+ ASSERT(compressed >= 0 || ddp->dd_compressed_bytes >= -compressed);
ASSERT(uncompressed >= 0 ||
- dsl_dir_phys(dd)->dd_uncompressed_bytes >= -uncompressed);
- dsl_dir_phys(dd)->dd_used_bytes += used;
- dsl_dir_phys(dd)->dd_uncompressed_bytes += uncompressed;
- dsl_dir_phys(dd)->dd_compressed_bytes += compressed;
-
- if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
- ASSERT(used > 0 ||
- dsl_dir_phys(dd)->dd_used_breakdown[type] >= -used);
- dsl_dir_phys(dd)->dd_used_breakdown[type] += used;
+ ddp->dd_uncompressed_bytes >= -uncompressed);
+ ddp->dd_used_bytes += used;
+ ddp->dd_uncompressed_bytes += uncompressed;
+ ddp->dd_compressed_bytes += compressed;
+
+ if (ddp->dd_flags & DD_FLAG_USED_BREAKDOWN) {
+ ASSERT(used >= 0 || ddp->dd_used_breakdown[type] >= -used);
+ ddp->dd_used_breakdown[type] += used;
#ifdef ZFS_DEBUG
{
dd_used_t t;
uint64_t u = 0;
for (t = 0; t < DD_USED_NUM; t++)
- u += dsl_dir_phys(dd)->dd_used_breakdown[t];
- ASSERT3U(u, ==, dsl_dir_phys(dd)->dd_used_bytes);
+ u += ddp->dd_used_breakdown[t];
+ ASSERT3U(u, ==, ddp->dd_used_bytes);
}
#endif
}
@@ -1562,11 +1559,9 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
mutex_exit(&dd->dd_lock);
if (dd->dd_parent != NULL) {
- dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
- accounted_delta, compressed, uncompressed, tx);
- dsl_dir_transfer_space(dd->dd_parent,
- used - accounted_delta,
- DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
+ dsl_dir_diduse_transfer_space(dd->dd_parent,
+ accounted_delta, compressed, uncompressed,
+ used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
}
}
@@ -1578,21 +1573,72 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
ASSERT(oldtype < DD_USED_NUM);
ASSERT(newtype < DD_USED_NUM);
+ dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
if (delta == 0 ||
- !(dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN))
+ !(ddp->dd_flags & DD_FLAG_USED_BREAKDOWN))
return;
dmu_buf_will_dirty(dd->dd_dbuf, tx);
mutex_enter(&dd->dd_lock);
ASSERT(delta > 0 ?
- dsl_dir_phys(dd)->dd_used_breakdown[oldtype] >= delta :
- dsl_dir_phys(dd)->dd_used_breakdown[newtype] >= -delta);
- ASSERT(dsl_dir_phys(dd)->dd_used_bytes >= ABS(delta));
- dsl_dir_phys(dd)->dd_used_breakdown[oldtype] -= delta;
- dsl_dir_phys(dd)->dd_used_breakdown[newtype] += delta;
+ ddp->dd_used_breakdown[oldtype] >= delta :
+ ddp->dd_used_breakdown[newtype] >= -delta);
+ ASSERT(ddp->dd_used_bytes >= ABS(delta));
+ ddp->dd_used_breakdown[oldtype] -= delta;
+ ddp->dd_used_breakdown[newtype] += delta;
mutex_exit(&dd->dd_lock);
}
+void
+dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
+ int64_t compressed, int64_t uncompressed, int64_t tonew,
+ dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
+{
+ int64_t accounted_delta;
+
+ ASSERT(dmu_tx_is_syncing(tx));
+ ASSERT(oldtype < DD_USED_NUM);
+ ASSERT(newtype < DD_USED_NUM);
+
+ dmu_buf_will_dirty(dd->dd_dbuf, tx);
+
+ mutex_enter(&dd->dd_lock);
+ dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
+ accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);
+ ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);
+ ASSERT(compressed >= 0 || ddp->dd_compressed_bytes >= -compressed);
+ ASSERT(uncompressed >= 0 ||
+ ddp->dd_uncompressed_bytes >= -uncompressed);
+ ddp->dd_used_bytes += used;
+ ddp->dd_uncompressed_bytes += uncompressed;
+ ddp->dd_compressed_bytes += compressed;
+
+ if (ddp->dd_flags & DD_FLAG_USED_BREAKDOWN) {
+ ASSERT(tonew - used <= 0 ||
+ ddp->dd_used_breakdown[oldtype] >= tonew - used);
+ ASSERT(tonew >= 0 ||
+ ddp->dd_used_breakdown[newtype] >= -tonew);
+ ddp->dd_used_breakdown[oldtype] -= tonew - used;
+ ddp->dd_used_breakdown[newtype] += tonew;
+#ifdef ZFS_DEBUG
+ {
+ dd_used_t t;
+ uint64_t u = 0;
+ for (t = 0; t < DD_USED_NUM; t++)
+ u += ddp->dd_used_breakdown[t];
+ ASSERT3U(u, ==, ddp->dd_used_bytes);
+ }
+#endif
+ }
+ mutex_exit(&dd->dd_lock);
+
+ if (dd->dd_parent != NULL) {
+ dsl_dir_diduse_transfer_space(dd->dd_parent,
+ accounted_delta, compressed, uncompressed,
+ used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
+ }
+}
+
typedef struct dsl_dir_set_qr_arg {
const char *ddsqra_name;
zprop_source_t ddsqra_source;
diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c
index 72f4b86d772e..1350f1329564 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_pool.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c
@@ -105,6 +105,14 @@ int zfs_dirty_data_max_percent = 10;
int zfs_dirty_data_max_max_percent = 25;
/*
+ * zfs_wrlog_data_max, the upper limit of TX_WRITE log data.
+ * Once it is reached, write operation is blocked,
+ * until log data is cleared out after txg sync.
+ * It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
+ */
+unsigned long zfs_wrlog_data_max = 0;
+
+/*
* If there's at least this much dirty data (as a percentage of
* zfs_dirty_data_max), push out a txg. This should be less than
* zfs_vdev_async_write_active_min_dirty_percent.
@@ -220,6 +228,11 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
+ aggsum_init(&dp->dp_wrlog_total, 0);
+ for (int i = 0; i < TXG_SIZE; i++) {
+ aggsum_init(&dp->dp_wrlog_pertxg[i], 0);
+ }
+
dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
TASKQ_THREADS_CPU_PCT);
@@ -416,6 +429,14 @@ dsl_pool_close(dsl_pool_t *dp)
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
cv_destroy(&dp->dp_spaceavail_cv);
+
+ ASSERT0(aggsum_value(&dp->dp_wrlog_total));
+ aggsum_fini(&dp->dp_wrlog_total);
+ for (int i = 0; i < TXG_SIZE; i++) {
+ ASSERT0(aggsum_value(&dp->dp_wrlog_pertxg[i]));
+ aggsum_fini(&dp->dp_wrlog_pertxg[i]);
+ }
+
taskq_destroy(dp->dp_unlinked_drain_taskq);
taskq_destroy(dp->dp_zrele_taskq);
if (dp->dp_blkstats != NULL) {
@@ -592,6 +613,36 @@ dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta)
cv_signal(&dp->dp_spaceavail_cv);
}
+void
+dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg)
+{
+ ASSERT3S(size, >=, 0);
+
+ aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], size);
+ aggsum_add(&dp->dp_wrlog_total, size);
+
+ /* Choose a value slightly bigger than min dirty sync bytes */
+ uint64_t sync_min =
+ zfs_dirty_data_max * (zfs_dirty_data_sync_percent + 10) / 100;
+ if (aggsum_compare(&dp->dp_wrlog_pertxg[txg & TXG_MASK], sync_min) > 0)
+ txg_kick(dp, txg);
+}
+
+boolean_t
+dsl_pool_wrlog_over_max(dsl_pool_t *dp)
+{
+ return (aggsum_compare(&dp->dp_wrlog_total, zfs_wrlog_data_max) > 0);
+}
+
+static void
+dsl_pool_wrlog_clear(dsl_pool_t *dp, uint64_t txg)
+{
+ int64_t delta;
+ delta = -(int64_t)aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]);
+ aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], delta);
+ aggsum_add(&dp->dp_wrlog_total, delta);
+}
+
#ifdef ZFS_DEBUG
static boolean_t
dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
@@ -816,6 +867,9 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
dmu_buf_rele(ds->ds_dbuf, zilog);
}
+
+ dsl_pool_wrlog_clear(dp, txg);
+
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
}
@@ -1405,6 +1459,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
"Determines the dirty space limit");
+ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
+ "The size limit of write-transaction zil log data");
+
/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
"zfs_dirty_data_max upper bound in bytes");
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index 23f3e2989ae7..93d409ceb433 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -5611,19 +5611,11 @@ metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, int allocator,
zio_t *zio, int flags)
{
metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
- uint64_t available_slots = 0;
- boolean_t slot_reserved = B_FALSE;
uint64_t max = mca->mca_alloc_max_slots;
ASSERT(mc->mc_alloc_throttle_enabled);
- mutex_enter(&mc->mc_lock);
-
- uint64_t reserved_slots = zfs_refcount_count(&mca->mca_alloc_slots);
- if (reserved_slots < max)
- available_slots = max - reserved_slots;
-
- if (slots <= available_slots || GANG_ALLOCATION(flags) ||
- flags & METASLAB_MUST_RESERVE) {
+ if (GANG_ALLOCATION(flags) || (flags & METASLAB_MUST_RESERVE) ||
+ zfs_refcount_count(&mca->mca_alloc_slots) + slots <= max) {
/*
* We reserve the slots individually so that we can unreserve
* them individually when an I/O completes.
@@ -5631,11 +5623,9 @@ metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, int allocator,
for (int d = 0; d < slots; d++)
zfs_refcount_add(&mca->mca_alloc_slots, zio);
zio->io_flags |= ZIO_FLAG_IO_ALLOCATING;
- slot_reserved = B_TRUE;
+ return (B_TRUE);
}
-
- mutex_exit(&mc->mc_lock);
- return (slot_reserved);
+ return (B_FALSE);
}
void
@@ -5645,10 +5635,8 @@ metaslab_class_throttle_unreserve(metaslab_class_t *mc, int slots,
metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
ASSERT(mc->mc_alloc_throttle_enabled);
- mutex_enter(&mc->mc_lock);
for (int d = 0; d < slots; d++)
zfs_refcount_remove(&mca->mca_alloc_slots, zio);
- mutex_exit(&mc->mc_lock);
}
static int
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index f6dce076d136..2a4db7d562b6 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -9197,9 +9197,9 @@ spa_sync(spa_t *spa, uint64_t txg)
spa->spa_sync_pass = 0;
for (int i = 0; i < spa->spa_alloc_count; i++) {
- mutex_enter(&spa->spa_alloc_locks[i]);
- VERIFY0(avl_numnodes(&spa->spa_alloc_trees[i]));
- mutex_exit(&spa->spa_alloc_locks[i]);
+ mutex_enter(&spa->spa_allocs[i].spaa_lock);
+ VERIFY0(avl_numnodes(&spa->spa_allocs[i].spaa_tree));
+ mutex_exit(&spa->spa_allocs[i].spaa_lock);
}
/*
@@ -9309,9 +9309,9 @@ spa_sync(spa_t *spa, uint64_t txg)
dsl_pool_sync_done(dp, txg);
for (int i = 0; i < spa->spa_alloc_count; i++) {
- mutex_enter(&spa->spa_alloc_locks[i]);
- VERIFY0(avl_numnodes(&spa->spa_alloc_trees[i]));
- mutex_exit(&spa->spa_alloc_locks[i]);
+ mutex_enter(&spa->spa_allocs[i].spaa_lock);
+ VERIFY0(avl_numnodes(&spa->spa_allocs[i].spaa_tree));
+ mutex_exit(&spa->spa_allocs[i].spaa_lock);
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
index 157dede93cfc..58039f3d103c 100644
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -700,13 +700,12 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa->spa_root = spa_strdup(altroot);
spa->spa_alloc_count = spa_allocators;
- spa->spa_alloc_locks = kmem_zalloc(spa->spa_alloc_count *
- sizeof (kmutex_t), KM_SLEEP);
- spa->spa_alloc_trees = kmem_zalloc(spa->spa_alloc_count *
- sizeof (avl_tree_t), KM_SLEEP);
+ spa->spa_allocs = kmem_zalloc(spa->spa_alloc_count *
+ sizeof (spa_alloc_t), KM_SLEEP);
for (int i = 0; i < spa->spa_alloc_count; i++) {
- mutex_init(&spa->spa_alloc_locks[i], NULL, MUTEX_DEFAULT, NULL);
- avl_create(&spa->spa_alloc_trees[i], zio_bookmark_compare,
+ mutex_init(&spa->spa_allocs[i].spaa_lock, NULL, MUTEX_DEFAULT,
+ NULL);
+ avl_create(&spa->spa_allocs[i].spaa_tree, zio_bookmark_compare,
sizeof (zio_t), offsetof(zio_t, io_alloc_node));
}
avl_create(&spa->spa_metaslabs_by_flushed, metaslab_sort_by_flushed,
@@ -799,13 +798,11 @@ spa_remove(spa_t *spa)
}
for (int i = 0; i < spa->spa_alloc_count; i++) {
- avl_destroy(&spa->spa_alloc_trees[i]);
- mutex_destroy(&spa->spa_alloc_locks[i]);
+ avl_destroy(&spa->spa_allocs[i].spaa_tree);
+ mutex_destroy(&spa->spa_allocs[i].spaa_lock);
}
- kmem_free(spa->spa_alloc_locks, spa->spa_alloc_count *
- sizeof (kmutex_t));
- kmem_free(spa->spa_alloc_trees, spa->spa_alloc_count *
- sizeof (avl_tree_t));
+ kmem_free(spa->spa_allocs, spa->spa_alloc_count *
+ sizeof (spa_alloc_t));
avl_destroy(&spa->spa_metaslabs_by_flushed);
avl_destroy(&spa->spa_sm_logs_by_txg);
@@ -1786,8 +1783,22 @@ spa_get_worst_case_asize(spa_t *spa, uint64_t lsize)
uint64_t
spa_get_slop_space(spa_t *spa)
{
- uint64_t space = spa_get_dspace(spa);
- uint64_t slop = MIN(space >> spa_slop_shift, spa_max_slop);
+ uint64_t space = 0;
+ uint64_t slop = 0;
+
+ /*
+ * Make sure spa_dedup_dspace has been set.
+ */
+ if (spa->spa_dedup_dspace == ~0ULL)
+ spa_update_dspace(spa);
+
+ /*
+ * spa_get_dspace() includes the space only logically "used" by
+ * deduplicated data, so since it's not useful to reserve more
+ * space with more deduplicated data, we subtract that out here.
+ */
+ space = spa_get_dspace(spa) - spa->spa_dedup_dspace;
+ slop = MIN(space >> spa_slop_shift, spa_max_slop);
/*
* Subtract the embedded log space, but no more than half the (3.2%)
diff --git a/sys/contrib/openzfs/module/zfs/zcp_synctask.c b/sys/contrib/openzfs/module/zfs/zcp_synctask.c
index 4e0fa0d85cbf..c6ade59b9ced 100644
--- a/sys/contrib/openzfs/module/zfs/zcp_synctask.c
+++ b/sys/contrib/openzfs/module/zfs/zcp_synctask.c
@@ -54,6 +54,12 @@ typedef struct zcp_synctask_info {
int blocks_modified;
} zcp_synctask_info_t;
+static void
+zcp_synctask_cleanup(void *arg)
+{
+ fnvlist_free(arg);
+}
+
/*
* Generic synctask interface for channel program syncfuncs.
*
@@ -275,7 +281,7 @@ zcp_synctask_snapshot(lua_State *state, boolean_t sync, nvlist_t *err_details)
fnvlist_add_boolean(ddsa.ddsa_snaps, dsname);
zcp_cleanup_handler_t *zch = zcp_register_cleanup(state,
- (zcp_cleanup_t *)&fnvlist_free, ddsa.ddsa_snaps);
+ zcp_synctask_cleanup, ddsa.ddsa_snaps);
err = zcp_sync_task(state, dsl_dataset_snapshot_check,
dsl_dataset_snapshot_sync, &ddsa, sync, dsname);
@@ -363,7 +369,7 @@ zcp_synctask_inherit_prop(lua_State *state, boolean_t sync,
fnvlist_add_boolean(dpsa->dpsa_props, prop);
zcp_cleanup_handler_t *zch = zcp_register_cleanup(state,
- (zcp_cleanup_t *)&fnvlist_free, dpsa->dpsa_props);
+ zcp_synctask_cleanup, dpsa->dpsa_props);
err = zcp_sync_task(state, zcp_synctask_inherit_prop_check,
zcp_synctask_inherit_prop_sync, &zipa, sync, dsname);
@@ -402,7 +408,7 @@ zcp_synctask_bookmark(lua_State *state, boolean_t sync, nvlist_t *err_details)
fnvlist_add_string(bmarks, new, source);
zcp_cleanup_handler_t *zch = zcp_register_cleanup(state,
- (zcp_cleanup_t *)&fnvlist_free, bmarks);
+ zcp_synctask_cleanup, bmarks);
dsl_bookmark_create_arg_t dbca = {
.dbca_bmarks = bmarks,
@@ -467,8 +473,7 @@ zcp_synctask_wrapper(lua_State *state)
* Make sure err_details is properly freed, even if a fatal error is
* thrown during the synctask.
*/
- zch = zcp_register_cleanup(state,
- (zcp_cleanup_t *)&fnvlist_free, err_details);
+ zch = zcp_register_cleanup(state, zcp_synctask_cleanup, err_details);
zcp_synctask_info_t *info = lua_touserdata(state, lua_upvalueindex(1));
boolean_t sync = lua_toboolean(state, lua_upvalueindex(2));
diff --git a/sys/contrib/openzfs/module/zfs/zfs_log.c b/sys/contrib/openzfs/module/zfs/zfs_log.c
index 30d5c4821ae5..0f330ec933aa 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_log.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_log.c
@@ -541,6 +541,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
itx_wr_state_t write_state;
uintptr_t fsync_cnt;
uint64_t gen = 0;
+ ssize_t size = resid;
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp)) {
@@ -626,6 +627,10 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
off += len;
resid -= len;
}
+
+ if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+ dsl_pool_wrlog_count(zilog->zl_dmu_pool, size, tx->tx_txg);
+ }
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index 78d0711cce4e..d8d39f861c75 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -1822,12 +1822,13 @@ zil_itx_destroy(itx_t *itx)
* so no locks are needed.
*/
static void
-zil_itxg_clean(itxs_t *itxs)
+zil_itxg_clean(void *arg)
{
itx_t *itx;
list_t *list;
avl_tree_t *t;
void *cookie;
+ itxs_t *itxs = arg;
itx_async_node_t *ian;
list = &itxs->i_sync_list;
@@ -2047,7 +2048,7 @@ zil_clean(zilog_t *zilog, uint64_t synced_txg)
ASSERT3P(zilog->zl_dmu_pool, !=, NULL);
ASSERT3P(zilog->zl_dmu_pool->dp_zil_clean_taskq, !=, NULL);
taskqid_t id = taskq_dispatch(zilog->zl_dmu_pool->dp_zil_clean_taskq,
- (void (*)(void *))zil_itxg_clean, clean_me, TQ_NOSLEEP);
+ zil_itxg_clean, clean_me, TQ_NOSLEEP);
if (id == TASKQID_INVALID)
zil_itxg_clean(clean_me);
}
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index e33d36dab5f9..76ed4fad4304 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -877,8 +877,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio->io_bookmark = *zb;
if (pio != NULL) {
- if (zio->io_metaslab_class == NULL)
- zio->io_metaslab_class = pio->io_metaslab_class;
+ zio->io_metaslab_class = pio->io_metaslab_class;
if (zio->io_logical == NULL)
zio->io_logical = pio->io_logical;
if (zio->io_child_type == ZIO_CHILD_GANG)
@@ -1891,8 +1890,8 @@ zio_taskq_dispatch(zio_t *zio, zio_taskq_type_t q, boolean_t cutinline)
* to dispatch the zio to another taskq at the same time.
*/
ASSERT(taskq_empty_ent(&zio->io_tqent));
- spa_taskq_dispatch_ent(spa, t, q, (task_func_t *)zio_execute, zio,
- flags, &zio->io_tqent);
+ spa_taskq_dispatch_ent(spa, t, q, zio_execute, zio, flags,
+ &zio->io_tqent);
}
static boolean_t
@@ -1923,7 +1922,7 @@ zio_issue_async(zio_t *zio)
}
void
-zio_interrupt(zio_t *zio)
+zio_interrupt(void *zio)
{
zio_taskq_dispatch(zio, ZIO_TASKQ_INTERRUPT, B_FALSE);
}
@@ -1981,8 +1980,8 @@ zio_delay_interrupt(zio_t *zio)
* OpenZFS's timeout_generic().
*/
tid = taskq_dispatch_delay(system_taskq,
- (task_func_t *)zio_interrupt,
- zio, TQ_NOSLEEP, expire_at_tick);
+ zio_interrupt, zio, TQ_NOSLEEP,
+ expire_at_tick);
if (tid == TASKQID_INVALID) {
/*
* Couldn't allocate a task. Just
@@ -2103,7 +2102,7 @@ static zio_pipe_stage_t *zio_pipeline[];
* it is externally visible.
*/
void
-zio_execute(zio_t *zio)
+zio_execute(void *zio)
{
fstrans_cookie_t cookie;
@@ -2292,8 +2291,9 @@ zio_nowait(zio_t *zio)
*/
static void
-zio_reexecute(zio_t *pio)
+zio_reexecute(void *arg)
{
+ zio_t *pio = arg;
zio_t *cio, *cio_next;
ASSERT(pio->io_child_type == ZIO_CHILD_LOGICAL);
@@ -3379,9 +3379,9 @@ zio_io_to_allocate(spa_t *spa, int allocator)
{
zio_t *zio;
- ASSERT(MUTEX_HELD(&spa->spa_alloc_locks[allocator]));
+ ASSERT(MUTEX_HELD(&spa->spa_allocs[allocator].spaa_lock));
- zio = avl_first(&spa->spa_alloc_trees[allocator]);
+ zio = avl_first(&spa->spa_allocs[allocator].spaa_tree);
if (zio == NULL)
return (NULL);
@@ -3393,11 +3393,11 @@ zio_io_to_allocate(spa_t *spa, int allocator)
*/
ASSERT3U(zio->io_allocator, ==, allocator);
if (!metaslab_class_throttle_reserve(zio->io_metaslab_class,
- zio->io_prop.zp_copies, zio->io_allocator, zio, 0)) {
+ zio->io_prop.zp_copies, allocator, zio, 0)) {
return (NULL);
}
- avl_remove(&spa->spa_alloc_trees[allocator], zio);
+ avl_remove(&spa->spa_allocs[allocator].spaa_tree, zio);
ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE);
return (zio);
@@ -3421,8 +3421,8 @@ zio_dva_throttle(zio_t *zio)
return (zio);
}
+ ASSERT(zio->io_type == ZIO_TYPE_WRITE);
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
-
ASSERT3U(zio->io_queued_timestamp, >, 0);
ASSERT(zio->io_stage == ZIO_STAGE_DVA_THROTTLE);
@@ -3434,14 +3434,14 @@ zio_dva_throttle(zio_t *zio)
* into 2^20 block regions, and then hash based on the objset, object,
* level, and region to accomplish both of these goals.
*/
- zio->io_allocator = cityhash4(bm->zb_objset, bm->zb_object,
+ int allocator = (uint_t)cityhash4(bm->zb_objset, bm->zb_object,
bm->zb_level, bm->zb_blkid >> 20) % spa->spa_alloc_count;
- mutex_enter(&spa->spa_alloc_locks[zio->io_allocator]);
- ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+ zio->io_allocator = allocator;
zio->io_metaslab_class = mc;
- avl_add(&spa->spa_alloc_trees[zio->io_allocator], zio);
- nio = zio_io_to_allocate(spa, zio->io_allocator);
- mutex_exit(&spa->spa_alloc_locks[zio->io_allocator]);
+ mutex_enter(&spa->spa_allocs[allocator].spaa_lock);
+ avl_add(&spa->spa_allocs[allocator].spaa_tree, zio);
+ nio = zio_io_to_allocate(spa, allocator);
+ mutex_exit(&spa->spa_allocs[allocator].spaa_lock);
return (nio);
}
@@ -3450,9 +3450,9 @@ zio_allocate_dispatch(spa_t *spa, int allocator)
{
zio_t *zio;
- mutex_enter(&spa->spa_alloc_locks[allocator]);
+ mutex_enter(&spa->spa_allocs[allocator].spaa_lock);
zio = zio_io_to_allocate(spa, allocator);
- mutex_exit(&spa->spa_alloc_locks[allocator]);
+ mutex_exit(&spa->spa_allocs[allocator].spaa_lock);
if (zio == NULL)
return;
@@ -3642,8 +3642,8 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
* some parallelism.
*/
int flags = METASLAB_FASTWRITE | METASLAB_ZIL;
- int allocator = cityhash4(0, 0, 0, os->os_dsl_dataset->ds_object) %
- spa->spa_alloc_count;
+ int allocator = (uint_t)cityhash4(0, 0, 0,
+ os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
txg, NULL, flags, &io_alloc_list, NULL, allocator);
*slog = (error == 0);
@@ -4788,8 +4788,7 @@ zio_done(zio_t *zio)
ASSERT(taskq_empty_ent(&zio->io_tqent));
spa_taskq_dispatch_ent(zio->io_spa,
ZIO_TYPE_CLAIM, ZIO_TASKQ_ISSUE,
- (task_func_t *)zio_reexecute, zio, 0,
- &zio->io_tqent);
+ zio_reexecute, zio, 0, &zio->io_tqent);
}
return (NULL);
}
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index 23df0e1541a3..b7bc587cf624 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -84,10 +84,8 @@
#include <sys/zfs_rlock.h>
#include <sys/spa_impl.h>
#include <sys/zvol.h>
-
#include <sys/zvol_impl.h>
-
unsigned int zvol_inhibit_dev = 0;
unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
@@ -106,10 +104,8 @@ typedef enum {
typedef struct {
zvol_async_op_t op;
- char pool[MAXNAMELEN];
char name1[MAXNAMELEN];
char name2[MAXNAMELEN];
- zprop_source_t source;
uint64_t value;
} zvol_task_t;
@@ -579,6 +575,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
itx_wr_state_t write_state;
+ uint64_t sz = size;
if (zil_replaying(zilog, tx))
return;
@@ -630,6 +627,10 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
offset += len;
size -= len;
}
+
+ if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+ dsl_pool_wrlog_count(zilog->zl_dmu_pool, sz, tx->tx_txg);
+ }
}
/*
@@ -1197,6 +1198,12 @@ zvol_create_minor(const char *name)
* Remove minors for specified dataset including children and snapshots.
*/
+static void
+zvol_free_task(void *arg)
+{
+ ops->zv_free(arg);
+}
+
void
zvol_remove_minors_impl(const char *name)
{
@@ -1245,8 +1252,8 @@ zvol_remove_minors_impl(const char *name)
mutex_exit(&zv->zv_state_lock);
/* Try parallel zv_free, if failed do it in place */
- t = taskq_dispatch(system_taskq,
- (task_func_t *)ops->zv_free, zv, TQ_SLEEP);
+ t = taskq_dispatch(system_taskq, zvol_free_task, zv,
+ TQ_SLEEP);
if (t == TASKQID_INVALID)
list_insert_head(&free_list, zv);
} else {
@@ -1435,7 +1442,6 @@ zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2,
uint64_t value)
{
zvol_task_t *task;
- char *delim;
/* Never allow tasks on hidden names. */
if (name1[0] == '$')
@@ -1444,8 +1450,6 @@ zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2,
task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
task->op = op;
task->value = value;
- delim = strchr(name1, '/');
- strlcpy(task->pool, name1, delim ? (delim - name1 + 1) : MAXNAMELEN);
strlcpy(task->name1, name1, MAXNAMELEN);
if (name2 != NULL)