aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2021-07-12 21:23:38 +0000
committerMartin Matuska <mm@FreeBSD.org>2021-07-12 21:24:45 +0000
commit5eb61f6c6549f134a4f3bed4c164345d4f616bad (patch)
tree0ebfb9869a1ad0e906ea44630777b52d3db2af1f /sys/contrib/openzfs/module
parentdc5a0d6d6d0c4ad88d032762f036876fb64eb0d5 (diff)
parent07a4c76e9016fad22f1ce2613ab5abc4b2652114 (diff)
downloadsrc-5eb61f6c6549f134a4f3bed4c164345d4f616bad.tar.gz
src-5eb61f6c6549f134a4f3bed4c164345d4f616bad.zip
zfs: merge openzfs/zfs@07a4c76e9 (master) into main
Notable upstream pull request merges: #12299 file reference counts can get corrupted #12320 FreeBSD: Use unmapped I/O for scattered/gang ABD buffers Obtained from: OpenZFS OpenZFS commit: 07a4c76e9016fad22f1ce2613ab5abc4b2652114
Diffstat (limited to 'sys/contrib/openzfs/module')
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c123
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c19
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c28
-rw-r--r--sys/contrib/openzfs/module/zfs/fm.c20
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c71
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_onexit.c23
6 files changed, 185 insertions, 99 deletions
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
index 3853b2b5c900..b1407e4bd61d 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
@@ -29,6 +29,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/bio.h>
+#include <sys/buf.h>
#include <sys/file.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
@@ -36,6 +37,7 @@
#include <sys/vdev_os.h>
#include <sys/fs/zfs.h>
#include <sys/zio.h>
+#include <vm/vm_page.h>
#include <geom/geom.h>
#include <geom/geom_disk.h>
#include <geom/geom_int.h>
@@ -1059,6 +1061,80 @@ vdev_geom_io_intr(struct bio *bp)
zio_delay_interrupt(zio);
}
+struct vdev_geom_check_unmapped_cb_state {
+ int pages;
+ uint_t end;
+};
+
+/*
+ * Callback to check the ABD segment size/alignment and count the pages.
+ * GEOM requires data buffer to look virtually contiguous. It means only
+ * the first page of the buffer may not start and only the last may not
+ * end on a page boundary. All other physical pages must be full.
+ */
+static int
+vdev_geom_check_unmapped_cb(void *buf, size_t len, void *priv)
+{
+ struct vdev_geom_check_unmapped_cb_state *s = priv;
+ vm_offset_t off = (vm_offset_t)buf & PAGE_MASK;
+
+ if (s->pages != 0 && off != 0)
+ return (1);
+ if (s->end != 0)
+ return (1);
+ s->end = (off + len) & PAGE_MASK;
+ s->pages += (off + len + PAGE_MASK) >> PAGE_SHIFT;
+ return (0);
+}
+
+/*
+ * Check whether we can use unmapped I/O for this ZIO on this device to
+ * avoid data copying between scattered and/or gang ABD buffer and linear.
+ */
+static int
+vdev_geom_check_unmapped(zio_t *zio, struct g_consumer *cp)
+{
+ struct vdev_geom_check_unmapped_cb_state s;
+
+ /* If the buffer is already linear, then nothing to do here. */
+ if (abd_is_linear(zio->io_abd))
+ return (0);
+
+ /*
+ * If unmapped I/O is not supported by the GEOM provider,
+ * then we can't do anything and have to copy the data.
+ */
+ if ((cp->provider->flags & G_PF_ACCEPT_UNMAPPED) == 0)
+ return (0);
+
+ /* Check the buffer chunks sizes/alignments and count pages. */
+ s.pages = s.end = 0;
+ if (abd_iterate_func(zio->io_abd, 0, zio->io_size,
+ vdev_geom_check_unmapped_cb, &s))
+ return (0);
+ return (s.pages);
+}
+
+/*
+ * Callback to translate the ABD segment into array of physical pages.
+ */
+static int
+vdev_geom_fill_unmap_cb(void *buf, size_t len, void *priv)
+{
+ struct bio *bp = priv;
+ vm_offset_t addr = (vm_offset_t)buf;
+ vm_offset_t end = addr + len;
+
+ if (bp->bio_ma_n == 0)
+ bp->bio_ma_offset = addr & PAGE_MASK;
+ do {
+ bp->bio_ma[bp->bio_ma_n++] =
+ PHYS_TO_VM_PAGE(pmap_kextract(addr));
+ addr += PAGE_SIZE;
+ } while (addr < end);
+ return (0);
+}
+
static void
vdev_geom_io_start(zio_t *zio)
{
@@ -1123,14 +1199,34 @@ sendreq:
zio->io_target_timestamp = zio_handle_io_delay(zio);
bp->bio_offset = zio->io_offset;
bp->bio_length = zio->io_size;
- if (zio->io_type == ZIO_TYPE_READ) {
+ if (zio->io_type == ZIO_TYPE_READ)
bp->bio_cmd = BIO_READ;
- bp->bio_data =
- abd_borrow_buf(zio->io_abd, zio->io_size);
- } else {
+ else
bp->bio_cmd = BIO_WRITE;
- bp->bio_data =
- abd_borrow_buf_copy(zio->io_abd, zio->io_size);
+
+ /*
+ * If possible, represent scattered and/or gang ABD buffer to
+ * GEOM as an array of physical pages. It allows to satisfy
+ * requirement of virtually contiguous buffer without copying.
+ */
+ int pgs = vdev_geom_check_unmapped(zio, cp);
+ if (pgs > 0) {
+ bp->bio_ma = malloc(sizeof (struct vm_page *) * pgs,
+ M_DEVBUF, M_WAITOK);
+ bp->bio_ma_n = 0;
+ bp->bio_ma_offset = 0;
+ abd_iterate_func(zio->io_abd, 0, zio->io_size,
+ vdev_geom_fill_unmap_cb, bp);
+ bp->bio_data = unmapped_buf;
+ bp->bio_flags |= BIO_UNMAPPED;
+ } else {
+ if (zio->io_type == ZIO_TYPE_READ) {
+ bp->bio_data = abd_borrow_buf(zio->io_abd,
+ zio->io_size);
+ } else {
+ bp->bio_data = abd_borrow_buf_copy(zio->io_abd,
+ zio->io_size);
+ }
}
break;
case ZIO_TYPE_TRIM:
@@ -1169,10 +1265,17 @@ vdev_geom_io_done(zio_t *zio)
return;
}
- if (zio->io_type == ZIO_TYPE_READ)
- abd_return_buf_copy(zio->io_abd, bp->bio_data, zio->io_size);
- else
- abd_return_buf(zio->io_abd, bp->bio_data, zio->io_size);
+ if (bp->bio_ma != NULL) {
+ free(bp->bio_ma, M_DEVBUF);
+ } else {
+ if (zio->io_type == ZIO_TYPE_READ) {
+ abd_return_buf_copy(zio->io_abd, bp->bio_data,
+ zio->io_size);
+ } else {
+ abd_return_buf(zio->io_abd, bp->bio_data,
+ zio->io_size);
+ }
+ }
g_destroy_bio(bp);
zio->io_bio = NULL;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
index 908cff6810eb..a3d67aaa11ba 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
@@ -241,28 +241,21 @@ zfs_file_fsync(zfs_file_t *fp, int flags)
return (zfs_vop_fsync(fp->f_vnode));
}
-int
-zfs_file_get(int fd, zfs_file_t **fpp)
+zfs_file_t *
+zfs_file_get(int fd)
{
struct file *fp;
if (fget(curthread, fd, &cap_no_rights, &fp))
- return (SET_ERROR(EBADF));
+ return (NULL);
- *fpp = fp;
- return (0);
+ return (fp);
}
void
-zfs_file_put(int fd)
+zfs_file_put(zfs_file_t *fp)
{
- struct file *fp;
-
- /* No CAP_ rights required, as we're only releasing. */
- if (fget(curthread, fd, &cap_no_rights, &fp) == 0) {
- fdrop(fp, curthread);
- fdrop(fp, curthread);
- }
+ fdrop(fp, curthread);
}
loff_t
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
index 35e647375d9d..e12f7c3ced43 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
@@ -407,36 +407,22 @@ zfs_file_unlink(const char *path)
* Get reference to file pointer
*
* fd - input file descriptor
- * fpp - pointer to file pointer
*
- * Returns 0 on success EBADF on failure.
+ * Returns pointer to file struct or NULL
*/
-int
-zfs_file_get(int fd, zfs_file_t **fpp)
+zfs_file_t *
+zfs_file_get(int fd)
{
- zfs_file_t *fp;
-
- fp = fget(fd);
- if (fp == NULL)
- return (EBADF);
-
- *fpp = fp;
-
- return (0);
+ return (fget(fd));
}
/*
* Drop reference to file pointer
*
- * fd - input file descriptor
+ * fp - input file struct pointer
*/
void
-zfs_file_put(int fd)
+zfs_file_put(zfs_file_t *fp)
{
- struct file *fp;
-
- if ((fp = fget(fd)) != NULL) {
- fput(fp);
- fput(fp);
- }
+ fput(fp);
}
diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c
index dff7d8ece4be..b8a1c7c8a5ca 100644
--- a/sys/contrib/openzfs/module/zfs/fm.c
+++ b/sys/contrib/openzfs/module/zfs/fm.c
@@ -278,25 +278,29 @@ zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
return (0);
}
-int
+zfs_file_t *
zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
{
- int error;
+ zfs_file_t *fp = zfs_file_get(fd);
+ if (fp == NULL)
+ return (NULL);
- error = zfsdev_getminor(fd, minorp);
+ int error = zfsdev_getminor(fp, minorp);
if (error == 0)
error = zfs_zevent_minor_to_state(*minorp, ze);
- if (error)
- zfs_zevent_fd_rele(fd);
+ if (error) {
+ zfs_zevent_fd_rele(fp);
+ fp = NULL;
+ }
- return (error);
+ return (fp);
}
void
-zfs_zevent_fd_rele(int fd)
+zfs_zevent_fd_rele(zfs_file_t *fp)
{
- zfs_file_put(fd);
+ zfs_file_put(fp);
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index 0d5536cf7cb0..96a021acbc95 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -4861,8 +4861,8 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
*errors = fnvlist_alloc();
off = 0;
- if ((error = zfs_file_get(input_fd, &input_fp)))
- return (error);
+ if ((input_fp = zfs_file_get(input_fd)) == NULL)
+ return (SET_ERROR(EBADF));
noff = off = zfs_file_off(input_fp);
error = dmu_recv_begin(tofs, tosnap, begin_record, force,
@@ -5142,7 +5142,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
nvlist_free(inheritprops);
}
out:
- zfs_file_put(input_fd);
+ zfs_file_put(input_fp);
nvlist_free(origrecvd);
nvlist_free(origprops);
@@ -5472,8 +5472,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
zfs_file_t *fp;
dmu_send_outparams_t out = {0};
- if ((error = zfs_file_get(zc->zc_cookie, &fp)))
- return (error);
+ if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
+ return (SET_ERROR(EBADF));
off = zfs_file_off(fp);
out.dso_outfunc = dump_bytes;
@@ -5483,7 +5483,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
zc->zc_fromobj, embedok, large_block_ok, compressok,
rawok, savedok, zc->zc_cookie, &off, &out);
- zfs_file_put(zc->zc_cookie);
+ zfs_file_put(fp);
}
return (error);
}
@@ -6047,25 +6047,24 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
{
char *snap_name;
char *hold_name;
- int error;
minor_t minor;
- error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
- if (error != 0)
- return (error);
+ zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
+ if (fp == NULL)
+ return (SET_ERROR(EBADF));
snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
(u_longlong_t)ddi_get_lbolt64());
hold_name = kmem_asprintf("%%%s", zc->zc_value);
- error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
+ int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
hold_name);
if (error == 0)
(void) strlcpy(zc->zc_value, snap_name,
sizeof (zc->zc_value));
kmem_strfree(snap_name);
kmem_strfree(hold_name);
- zfs_onexit_fd_rele(zc->zc_cleanup_fd);
+ zfs_onexit_fd_rele(fp);
return (error);
}
@@ -6085,13 +6084,13 @@ zfs_ioc_diff(zfs_cmd_t *zc)
offset_t off;
int error;
- if ((error = zfs_file_get(zc->zc_cookie, &fp)))
- return (error);
+ if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
+ return (SET_ERROR(EBADF));
off = zfs_file_off(fp);
error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
- zfs_file_put(zc->zc_cookie);
+ zfs_file_put(fp);
return (error);
}
@@ -6127,6 +6126,7 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
int cleanup_fd = -1;
int error;
minor_t minor = 0;
+ zfs_file_t *fp = NULL;
holds = fnvlist_lookup_nvlist(args, "holds");
@@ -6144,14 +6144,16 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
}
if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
- error = zfs_onexit_fd_hold(cleanup_fd, &minor);
- if (error != 0)
- return (SET_ERROR(error));
+ fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
+ if (fp == NULL)
+ return (SET_ERROR(EBADF));
}
error = dsl_dataset_user_hold(holds, minor, errlist);
- if (minor != 0)
- zfs_onexit_fd_rele(cleanup_fd);
+ if (fp != NULL) {
+ ASSERT3U(minor, !=, 0);
+ zfs_onexit_fd_rele(fp);
+ }
return (SET_ERROR(error));
}
@@ -6214,9 +6216,9 @@ zfs_ioc_events_next(zfs_cmd_t *zc)
uint64_t dropped = 0;
int error;
- error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
- if (error != 0)
- return (error);
+ zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+ if (fp == NULL)
+ return (SET_ERROR(EBADF));
do {
error = zfs_zevent_next(ze, &event,
@@ -6238,7 +6240,7 @@ zfs_ioc_events_next(zfs_cmd_t *zc)
break;
} while (1);
- zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+ zfs_zevent_fd_rele(fp);
return (error);
}
@@ -6270,12 +6272,12 @@ zfs_ioc_events_seek(zfs_cmd_t *zc)
minor_t minor;
int error;
- error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
- if (error != 0)
- return (error);
+ zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+ if (fp == NULL)
+ return (SET_ERROR(EBADF));
error = zfs_zevent_seek(ze, zc->zc_guid);
- zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+ zfs_zevent_fd_rele(fp);
return (error);
}
@@ -6459,8 +6461,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
- if ((error = zfs_file_get(fd, &fp)))
- return (error);
+ if ((fp = zfs_file_get(fd)) == NULL)
+ return (SET_ERROR(EBADF));
off = zfs_file_off(fp);
@@ -6472,7 +6474,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
compressok, rawok, savedok, resumeobj, resumeoff,
redactbook, fd, &off, &out);
- zfs_file_put(fd);
+ zfs_file_put(fp);
return (error);
}
@@ -7345,17 +7347,12 @@ pool_status_check(const char *name, zfs_ioc_namecheck_t type,
}
int
-zfsdev_getminor(int fd, minor_t *minorp)
+zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
{
zfsdev_state_t *zs, *fpd;
- zfs_file_t *fp;
- int rc;
ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
- if ((rc = zfs_file_get(fd, &fp)))
- return (rc);
-
fpd = zfs_file_private(fp);
if (fpd == NULL)
return (SET_ERROR(EBADF));
diff --git a/sys/contrib/openzfs/module/zfs/zfs_onexit.c b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
index 2a1332e715ee..7c56dd9c97f5 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_onexit.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
@@ -107,30 +107,33 @@ zfs_onexit_destroy(zfs_onexit_t *zo)
* of this function must call zfs_onexit_fd_rele() when they're finished
* using the minor number.
*/
-int
+zfs_file_t *
zfs_onexit_fd_hold(int fd, minor_t *minorp)
{
zfs_onexit_t *zo = NULL;
- int error;
- error = zfsdev_getminor(fd, minorp);
+ zfs_file_t *fp = zfs_file_get(fd);
+ if (fp == NULL)
+ return (NULL);
+
+ int error = zfsdev_getminor(fp, minorp);
if (error) {
- zfs_onexit_fd_rele(fd);
- return (error);
+ zfs_onexit_fd_rele(fp);
+ return (NULL);
}
zo = zfsdev_get_state(*minorp, ZST_ONEXIT);
if (zo == NULL) {
- zfs_onexit_fd_rele(fd);
- return (SET_ERROR(EBADF));
+ zfs_onexit_fd_rele(fp);
+ return (NULL);
}
- return (0);
+ return (fp);
}
void
-zfs_onexit_fd_rele(int fd)
+zfs_onexit_fd_rele(zfs_file_t *fp)
{
- zfs_file_put(fd);
+ zfs_file_put(fp);
}
static int