aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Wilson <george.wilson@delphix.com>2021-07-11 01:00:37 +0000
committerGitHub <noreply@github.com>2021-07-11 01:00:37 +0000
commit958826be7a3e17f29e1f5e114c76aa2ec3c8a490 (patch)
tree7aa2a70b2f8440273b2976de03fbc91d6829765c
parent03dba7ae318e10184a6d5e8327e3005a3ad17eb0 (diff)
downloadsrc-958826be7a3e17f29e1f5e114c76aa2ec3c8a490.tar.gz
src-958826be7a3e17f29e1f5e114c76aa2ec3c8a490.zip
file reference counts can get corrupted
Callers of zfs_file_get and zfs_file_put can corrupt the reference counts for the file structure resulting in a panic or a soft lockup. When zfs send/recv runs, it will add a reference count to the open file, and begin to send or recv the stream. If the file descriptor is closed, then when dmu_recv_stream() or dmu_send() return we will call zfs_file_put to remove the reference we placed on the file structure. Unfortunately, because zfs_file_put() uses the file descriptor to lookup the file structure, it may end up finding that the file descriptor table no longer contains the file struct, thus leaking the file structure. Or it might end up finding a file descriptor for a different file and blindly updating its reference counts. Other failure modes probably exists. This change reworks the zfs_file_[get|put] interface to not rely on the file descriptor but instead pass the zfs_file_t pointer around. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Mark Maybee <mark.maybee@delphix.com> Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Co-authored-by: Allan Jude <allan@klarasystems.com> Signed-off-by: George Wilson <gwilson@delphix.com> External-issue: DLPX-76119 Closes #12299
-rw-r--r--include/sys/fm/util.h5
-rw-r--r--include/sys/zfs_file.h6
-rw-r--r--include/sys/zfs_ioctl.h2
-rw-r--r--include/sys/zfs_onexit.h4
-rw-r--r--lib/libzpool/kernel.c20
-rw-r--r--module/os/freebsd/zfs/zfs_file_os.c19
-rw-r--r--module/os/linux/zfs/zfs_file_os.c28
-rw-r--r--module/zfs/fm.c20
-rw-r--r--module/zfs/zfs_ioctl.c71
-rw-r--r--module/zfs/zfs_onexit.c23
10 files changed, 91 insertions, 107 deletions
diff --git a/include/sys/fm/util.h b/include/sys/fm/util.h
index 56ba8798beb0..5fb6d1d6072b 100644
--- a/include/sys/fm/util.h
+++ b/include/sys/fm/util.h
@@ -31,6 +31,7 @@ extern "C" {
#endif
#include <sys/nvpair.h>
+#include <sys/zfs_file.h>
/*
* Shared user/kernel definitions for class length, error channel name,
@@ -95,8 +96,8 @@ extern void fm_fini(void);
extern void zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector);
extern int zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *);
extern void zfs_zevent_drain_all(int *);
-extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
-extern void zfs_zevent_fd_rele(int);
+extern zfs_file_t *zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
+extern void zfs_zevent_fd_rele(zfs_file_t *);
extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *, uint64_t *);
extern int zfs_zevent_wait(zfs_zevent_t *);
extern int zfs_zevent_seek(zfs_zevent_t *, uint64_t);
diff --git a/include/sys/zfs_file.h b/include/sys/zfs_file.h
index d117933a6e4c..02cd1a6f041a 100644
--- a/include/sys/zfs_file.h
+++ b/include/sys/zfs_file.h
@@ -22,6 +22,8 @@
#ifndef _SYS_ZFS_FILE_H
#define _SYS_ZFS_FILE_H
+#include <sys/zfs_context.h>
+
#ifndef _KERNEL
typedef struct zfs_file {
int f_fd;
@@ -55,8 +57,8 @@ int zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len);
loff_t zfs_file_off(zfs_file_t *fp);
int zfs_file_unlink(const char *);
-int zfs_file_get(int fd, zfs_file_t **fp);
-void zfs_file_put(int fd);
+zfs_file_t *zfs_file_get(int fd);
+void zfs_file_put(zfs_file_t *fp);
void *zfs_file_private(zfs_file_t *fp);
#endif /* _SYS_ZFS_FILE_H */
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index 41c978a3fff5..4fb15636ecb8 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -566,7 +566,7 @@ typedef struct zfsdev_state {
} zfsdev_state_t;
extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
-extern int zfsdev_getminor(int fd, minor_t *minorp);
+extern int zfsdev_getminor(zfs_file_t *fp, minor_t *minorp);
extern uint_t zfs_fsyncer_key;
extern uint_t zfs_allow_log_key;
diff --git a/include/sys/zfs_onexit.h b/include/sys/zfs_onexit.h
index 0fab23ff849b..fd3030e3ac2d 100644
--- a/include/sys/zfs_onexit.h
+++ b/include/sys/zfs_onexit.h
@@ -51,8 +51,8 @@ extern void zfs_onexit_destroy(zfs_onexit_t *zo);
#endif
-extern int zfs_onexit_fd_hold(int fd, minor_t *minorp);
-extern void zfs_onexit_fd_rele(int fd);
+extern zfs_file_t *zfs_onexit_fd_hold(int fd, minor_t *minorp);
+extern void zfs_onexit_fd_rele(zfs_file_t *);
extern int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
uint64_t *action_handle);
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
index b6d836f414ee..25f58f156bf9 100644
--- a/lib/libzpool/kernel.c
+++ b/lib/libzpool/kernel.c
@@ -928,16 +928,16 @@ kmem_asprintf(const char *fmt, ...)
}
/* ARGSUSED */
-int
+zfs_file_t *
zfs_onexit_fd_hold(int fd, minor_t *minorp)
{
*minorp = 0;
- return (0);
+ return (NULL);
}
/* ARGSUSED */
void
-zfs_onexit_fd_rele(int fd)
+zfs_onexit_fd_rele(zfs_file_t *fp)
{
}
@@ -1347,28 +1347,26 @@ zfs_file_unlink(const char *path)
* Get reference to file pointer
*
* fd - input file descriptor
- * fpp - pointer to file pointer
*
- * Returns 0 on success EBADF on failure.
+ * Returns pointer to file struct or NULL.
* Unsupported in user space.
*/
-int
-zfs_file_get(int fd, zfs_file_t **fpp)
+zfs_file_t *
+zfs_file_get(int fd)
{
abort();
- return (EOPNOTSUPP);
+ return (NULL);
}
-
/*
* Drop reference to file pointer
*
- * fd - input file descriptor
+ * fp - pointer to file struct
*
* Unsupported in user space.
*/
void
-zfs_file_put(int fd)
+zfs_file_put(zfs_file_t *fp)
{
abort();
}
diff --git a/module/os/freebsd/zfs/zfs_file_os.c b/module/os/freebsd/zfs/zfs_file_os.c
index 908cff6810eb..a3d67aaa11ba 100644
--- a/module/os/freebsd/zfs/zfs_file_os.c
+++ b/module/os/freebsd/zfs/zfs_file_os.c
@@ -241,28 +241,21 @@ zfs_file_fsync(zfs_file_t *fp, int flags)
return (zfs_vop_fsync(fp->f_vnode));
}
-int
-zfs_file_get(int fd, zfs_file_t **fpp)
+zfs_file_t *
+zfs_file_get(int fd)
{
struct file *fp;
if (fget(curthread, fd, &cap_no_rights, &fp))
- return (SET_ERROR(EBADF));
+ return (NULL);
- *fpp = fp;
- return (0);
+ return (fp);
}
void
-zfs_file_put(int fd)
+zfs_file_put(zfs_file_t *fp)
{
- struct file *fp;
-
- /* No CAP_ rights required, as we're only releasing. */
- if (fget(curthread, fd, &cap_no_rights, &fp) == 0) {
- fdrop(fp, curthread);
- fdrop(fp, curthread);
- }
+ fdrop(fp, curthread);
}
loff_t
diff --git a/module/os/linux/zfs/zfs_file_os.c b/module/os/linux/zfs/zfs_file_os.c
index 35e647375d9d..e12f7c3ced43 100644
--- a/module/os/linux/zfs/zfs_file_os.c
+++ b/module/os/linux/zfs/zfs_file_os.c
@@ -407,36 +407,22 @@ zfs_file_unlink(const char *path)
* Get reference to file pointer
*
* fd - input file descriptor
- * fpp - pointer to file pointer
*
- * Returns 0 on success EBADF on failure.
+ * Returns pointer to file struct or NULL
*/
-int
-zfs_file_get(int fd, zfs_file_t **fpp)
+zfs_file_t *
+zfs_file_get(int fd)
{
- zfs_file_t *fp;
-
- fp = fget(fd);
- if (fp == NULL)
- return (EBADF);
-
- *fpp = fp;
-
- return (0);
+ return (fget(fd));
}
/*
* Drop reference to file pointer
*
- * fd - input file descriptor
+ * fp - input file struct pointer
*/
void
-zfs_file_put(int fd)
+zfs_file_put(zfs_file_t *fp)
{
- struct file *fp;
-
- if ((fp = fget(fd)) != NULL) {
- fput(fp);
- fput(fp);
- }
+ fput(fp);
}
diff --git a/module/zfs/fm.c b/module/zfs/fm.c
index dff7d8ece4be..b8a1c7c8a5ca 100644
--- a/module/zfs/fm.c
+++ b/module/zfs/fm.c
@@ -278,25 +278,29 @@ zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
return (0);
}
-int
+zfs_file_t *
zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
{
- int error;
+ zfs_file_t *fp = zfs_file_get(fd);
+ if (fp == NULL)
+ return (NULL);
- error = zfsdev_getminor(fd, minorp);
+ int error = zfsdev_getminor(fp, minorp);
if (error == 0)
error = zfs_zevent_minor_to_state(*minorp, ze);
- if (error)
- zfs_zevent_fd_rele(fd);
+ if (error) {
+ zfs_zevent_fd_rele(fp);
+ fp = NULL;
+ }
- return (error);
+ return (fp);
}
void
-zfs_zevent_fd_rele(int fd)
+zfs_zevent_fd_rele(zfs_file_t *fp)
{
- zfs_file_put(fd);
+ zfs_file_put(fp);
}
/*
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 0d5536cf7cb0..96a021acbc95 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -4861,8 +4861,8 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
*errors = fnvlist_alloc();
off = 0;
- if ((error = zfs_file_get(input_fd, &input_fp)))
- return (error);
+ if ((input_fp = zfs_file_get(input_fd)) == NULL)
+ return (SET_ERROR(EBADF));
noff = off = zfs_file_off(input_fp);
error = dmu_recv_begin(tofs, tosnap, begin_record, force,
@@ -5142,7 +5142,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
nvlist_free(inheritprops);
}
out:
- zfs_file_put(input_fd);
+ zfs_file_put(input_fp);
nvlist_free(origrecvd);
nvlist_free(origprops);
@@ -5472,8 +5472,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
zfs_file_t *fp;
dmu_send_outparams_t out = {0};
- if ((error = zfs_file_get(zc->zc_cookie, &fp)))
- return (error);
+ if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
+ return (SET_ERROR(EBADF));
off = zfs_file_off(fp);
out.dso_outfunc = dump_bytes;
@@ -5483,7 +5483,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
zc->zc_fromobj, embedok, large_block_ok, compressok,
rawok, savedok, zc->zc_cookie, &off, &out);
- zfs_file_put(zc->zc_cookie);
+ zfs_file_put(fp);
}
return (error);
}
@@ -6047,25 +6047,24 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
{
char *snap_name;
char *hold_name;
- int error;
minor_t minor;
- error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
- if (error != 0)
- return (error);
+ zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
+ if (fp == NULL)
+ return (SET_ERROR(EBADF));
snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
(u_longlong_t)ddi_get_lbolt64());
hold_name = kmem_asprintf("%%%s", zc->zc_value);
- error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
+ int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
hold_name);
if (error == 0)
(void) strlcpy(zc->zc_value, snap_name,
sizeof (zc->zc_value));
kmem_strfree(snap_name);
kmem_strfree(hold_name);
- zfs_onexit_fd_rele(zc->zc_cleanup_fd);
+ zfs_onexit_fd_rele(fp);
return (error);
}
@@ -6085,13 +6084,13 @@ zfs_ioc_diff(zfs_cmd_t *zc)
offset_t off;
int error;
- if ((error = zfs_file_get(zc->zc_cookie, &fp)))
- return (error);
+ if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
+ return (SET_ERROR(EBADF));
off = zfs_file_off(fp);
error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
- zfs_file_put(zc->zc_cookie);
+ zfs_file_put(fp);
return (error);
}
@@ -6127,6 +6126,7 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
int cleanup_fd = -1;
int error;
minor_t minor = 0;
+ zfs_file_t *fp = NULL;
holds = fnvlist_lookup_nvlist(args, "holds");
@@ -6144,14 +6144,16 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
}
if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
- error = zfs_onexit_fd_hold(cleanup_fd, &minor);
- if (error != 0)
- return (SET_ERROR(error));
+ fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
+ if (fp == NULL)
+ return (SET_ERROR(EBADF));
}
error = dsl_dataset_user_hold(holds, minor, errlist);
- if (minor != 0)
- zfs_onexit_fd_rele(cleanup_fd);
+ if (fp != NULL) {
+ ASSERT3U(minor, !=, 0);
+ zfs_onexit_fd_rele(fp);
+ }
return (SET_ERROR(error));
}
@@ -6214,9 +6216,9 @@ zfs_ioc_events_next(zfs_cmd_t *zc)
uint64_t dropped = 0;
int error;
- error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
- if (error != 0)
- return (error);
+ zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+ if (fp == NULL)
+ return (SET_ERROR(EBADF));
do {
error = zfs_zevent_next(ze, &event,
@@ -6238,7 +6240,7 @@ zfs_ioc_events_next(zfs_cmd_t *zc)
break;
} while (1);
- zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+ zfs_zevent_fd_rele(fp);
return (error);
}
@@ -6270,12 +6272,12 @@ zfs_ioc_events_seek(zfs_cmd_t *zc)
minor_t minor;
int error;
- error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
- if (error != 0)
- return (error);
+ zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+ if (fp == NULL)
+ return (SET_ERROR(EBADF));
error = zfs_zevent_seek(ze, zc->zc_guid);
- zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+ zfs_zevent_fd_rele(fp);
return (error);
}
@@ -6459,8 +6461,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
- if ((error = zfs_file_get(fd, &fp)))
- return (error);
+ if ((fp = zfs_file_get(fd)) == NULL)
+ return (SET_ERROR(EBADF));
off = zfs_file_off(fp);
@@ -6472,7 +6474,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
compressok, rawok, savedok, resumeobj, resumeoff,
redactbook, fd, &off, &out);
- zfs_file_put(fd);
+ zfs_file_put(fp);
return (error);
}
@@ -7345,17 +7347,12 @@ pool_status_check(const char *name, zfs_ioc_namecheck_t type,
}
int
-zfsdev_getminor(int fd, minor_t *minorp)
+zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
{
zfsdev_state_t *zs, *fpd;
- zfs_file_t *fp;
- int rc;
ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
- if ((rc = zfs_file_get(fd, &fp)))
- return (rc);
-
fpd = zfs_file_private(fp);
if (fpd == NULL)
return (SET_ERROR(EBADF));
diff --git a/module/zfs/zfs_onexit.c b/module/zfs/zfs_onexit.c
index 2a1332e715ee..7c56dd9c97f5 100644
--- a/module/zfs/zfs_onexit.c
+++ b/module/zfs/zfs_onexit.c
@@ -107,30 +107,33 @@ zfs_onexit_destroy(zfs_onexit_t *zo)
* of this function must call zfs_onexit_fd_rele() when they're finished
* using the minor number.
*/
-int
+zfs_file_t *
zfs_onexit_fd_hold(int fd, minor_t *minorp)
{
zfs_onexit_t *zo = NULL;
- int error;
- error = zfsdev_getminor(fd, minorp);
+ zfs_file_t *fp = zfs_file_get(fd);
+ if (fp == NULL)
+ return (NULL);
+
+ int error = zfsdev_getminor(fp, minorp);
if (error) {
- zfs_onexit_fd_rele(fd);
- return (error);
+ zfs_onexit_fd_rele(fp);
+ return (NULL);
}
zo = zfsdev_get_state(*minorp, ZST_ONEXIT);
if (zo == NULL) {
- zfs_onexit_fd_rele(fd);
- return (SET_ERROR(EBADF));
+ zfs_onexit_fd_rele(fp);
+ return (NULL);
}
- return (0);
+ return (fp);
}
void
-zfs_onexit_fd_rele(int fd)
+zfs_onexit_fd_rele(zfs_file_t *fp)
{
- zfs_file_put(fd);
+ zfs_file_put(fp);
}
static int