aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorKonstantin Belousov <kib@FreeBSD.org>2020-09-15 22:06:36 +0000
committerKonstantin Belousov <kib@FreeBSD.org>2020-09-15 22:06:36 +0000
commit3c484f325e60e961c476dfb6ebd992290667d6dd (patch)
tree2d952012e6511531fbcff7fbb1b2ce57aa2791a5 /sys
parent888636655ddddb9e8ae3afb998c72fd2c99f63d5 (diff)
downloadsrc-3c484f325e60e961c476dfb6ebd992290667d6dd.tar.gz
src-3c484f325e60e961c476dfb6ebd992290667d6dd.zip
Convert page cache read to VOP.
There are several negative side-effects of not calling into VOP layer at all for page cache reads. The biggest is the missed activation of EVFILT_READ knotes. Also, it allows filesystem to make more fine grained decision to refuse read from page cache. Keep VIRF_PGREAD flag around, it is still useful for nullfs, and for asserts. Reviewed by: markj Tested by: pho Discussed with: mjg Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D26346
Notes
Notes: svn path=/head/; revision=365785
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/vfs_default.c8
-rw-r--r--sys/kern/vfs_subr.c9
-rw-r--r--sys/kern/vfs_vnops.c30
-rw-r--r--sys/kern/vnode_if.src11
-rw-r--r--sys/sys/vnode.h1
-rw-r--r--sys/ufs/ufs/ufs_vnops.c17
6 files changed, 60 insertions, 16 deletions
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index b120e8b6de5b..93b3a288732a 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -90,6 +90,7 @@ static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
+static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap);
static int vop_stdstat(struct vop_stat_args *ap);
/*
@@ -135,6 +136,7 @@ struct vop_vector default_vnodeops = {
.vop_poll = vop_nopoll,
.vop_putpages = vop_stdputpages,
.vop_readlink = VOP_EINVAL,
+ .vop_read_pgcache = vop_stdread_pgcache,
.vop_rename = vop_norename,
.vop_revoke = VOP_PANIC,
.vop_strategy = vop_nostrategy,
@@ -1575,3 +1577,9 @@ vop_stdstat(struct vop_stat_args *a)
out:
return (vop_stat_helper_post(a, error));
}
+
+static int
+vop_stdread_pgcache(struct vop_read_pgcache_args *ap __unused)
+{
+ return (EJUSTRETURN);
+}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 34f35f3638d0..6956a8f55e89 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -5839,6 +5839,15 @@ vop_read_post(void *ap, int rc)
}
void
+vop_read_pgcache_post(void *ap, int rc)
+{
+ struct vop_read_pgcache_args *a = ap;
+
+ if (!rc)
+ VFS_KNOTE_UNLOCKED(a->a_vp, NOTE_READ);
+}
+
+void
vop_readdir_post(void *ap, int rc)
{
struct vop_readdir_args *a = ap;
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 4498dd8f4c58..671ee089d445 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -951,15 +951,6 @@ out_pip:
return (uio->uio_resid == 0 ? 0 : EJUSTRETURN);
}
-static bool
-do_vn_read_from_pgcache(struct vnode *vp, struct uio *uio, struct file *fp)
-{
- return ((vp->v_irflag & (VIRF_DOOMED | VIRF_PGREAD)) == VIRF_PGREAD &&
- !mac_vnode_check_read_enabled() &&
- uio->uio_resid <= ptoa(io_hold_cnt) && uio->uio_offset >= 0 &&
- (fp->f_flag & O_DIRECT) == 0 && vn_io_pgcache_read_enable);
-}
-
/*
* File table vnode read routine.
*/
@@ -976,8 +967,19 @@ vn_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
uio->uio_td, td));
KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET"));
vp = fp->f_vnode;
- if (do_vn_read_from_pgcache(vp, uio, fp)) {
- error = vn_read_from_obj(vp, uio);
+ ioflag = 0;
+ if (fp->f_flag & FNONBLOCK)
+ ioflag |= IO_NDELAY;
+ if (fp->f_flag & O_DIRECT)
+ ioflag |= IO_DIRECT;
+
+ /*
+ * Try to read from page cache. VIRF_DOOMED check is racy but
+ * allows us to avoid unneeded work outright.
+ */
+ if (vn_io_pgcache_read_enable && !mac_vnode_check_read_enabled() &&
+ (vp->v_irflag & (VIRF_DOOMED | VIRF_PGREAD)) == VIRF_PGREAD) {
+ error = VOP_READ_PGCACHE(vp, uio, ioflag, fp->f_cred);
if (error == 0) {
fp->f_nextoff[UIO_READ] = uio->uio_offset;
return (0);
@@ -985,11 +987,7 @@ vn_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
if (error != EJUSTRETURN)
return (error);
}
- ioflag = 0;
- if (fp->f_flag & FNONBLOCK)
- ioflag |= IO_NDELAY;
- if (fp->f_flag & O_DIRECT)
- ioflag |= IO_DIRECT;
+
advice = get_advice(fp, uio);
vn_lock(vp, LK_SHARED | LK_RETRY);
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index fde75d91325c..ad678625c00e 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -225,6 +225,17 @@ vop_read {
};
+%% read_pgcache vp - - -
+%! read_pgcache post vop_read_pgcache_post
+
+vop_read_pgcache {
+ IN struct vnode *vp;
+ INOUT struct uio *uio;
+ IN int ioflag;
+ IN struct ucred *cred;
+};
+
+
%% write vp L L L
%! write pre VOP_WRITE_PRE
%! write post VOP_WRITE_POST
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index bdeb5287c1db..942a160dff09 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -852,6 +852,7 @@ void vop_mknod_pre(void *a);
void vop_mknod_post(void *a, int rc);
void vop_open_post(void *a, int rc);
void vop_read_post(void *a, int rc);
+void vop_read_pgcache_post(void *ap, int rc);
void vop_readdir_post(void *a, int rc);
void vop_reclaim_post(void *a, int rc);
void vop_remove_pre(void *a);
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 6d38f509ed39..dd073dc143f6 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -2874,6 +2874,22 @@ ufs_ioctl(struct vop_ioctl_args *ap)
}
}
+static int
+ufs_read_pgcache(struct vop_read_pgcache_args *ap)
+{
+ struct uio *uio;
+ struct vnode *vp;
+
+ uio = ap->a_uio;
+ vp = ap->a_vp;
+ MPASS((vp->v_irflag & VIRF_PGREAD) != 0);
+
+ if (uio->uio_resid > ptoa(io_hold_cnt) || uio->uio_offset < 0 ||
+ (ap->a_ioflag & IO_DIRECT) != 0)
+ return (EJUSTRETURN);
+ return (vn_read_from_obj(vp, uio));
+}
+
/* Global vfs data structures for ufs. */
struct vop_vector ufs_vnodeops = {
.vop_default = &default_vnodeops,
@@ -2901,6 +2917,7 @@ struct vop_vector ufs_vnodeops = {
.vop_pathconf = ufs_pathconf,
.vop_poll = vop_stdpoll,
.vop_print = ufs_print,
+ .vop_read_pgcache = ufs_read_pgcache,
.vop_readdir = ufs_readdir,
.vop_readlink = ufs_readlink,
.vop_reclaim = ufs_reclaim,