aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2020-08-07 23:06:40 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2020-08-07 23:06:40 +0000
commit51ea7bea910148ae6cf40c57de0cd3b120d542e3 (patch)
treebf5eda7ce6fe5b88a48f7b4889b50337b4c5667b
parent1e5d733503521375eb5372079366329936365f6f (diff)
downloadsrc-51ea7bea910148ae6cf40c57de0cd3b120d542e3.tar.gz
src-51ea7bea910148ae6cf40c57de0cd3b120d542e3.zip
vfs: add VOP_STAT
The current scheme of calling VOP_GETATTR adds avoidable overhead. An example with tmpfs doing fstat (ops/s): before: 7488958 after: 7913833 Reviewed by: kib (previous version) Differential Revision: https://reviews.freebsd.org/D25910
Notes
Notes: svn path=/head/; revision=364044
-rw-r--r--share/man/man9/Makefile3
-rw-r--r--share/man/man9/VOP_ATTRIB.945
-rw-r--r--sys/compat/linuxkpi/common/src/linux_compat.c2
-rw-r--r--sys/kern/vfs_default.c113
-rw-r--r--sys/kern/vfs_syscalls.c6
-rw-r--r--sys/kern/vfs_vnops.c113
-rw-r--r--sys/kern/vnode_if.src11
-rw-r--r--sys/security/audit/audit_arg.c2
-rw-r--r--sys/sys/vnode.h18
9 files changed, 189 insertions, 124 deletions
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index 01aaed947fa1..33e162d5fae0 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -2308,7 +2308,8 @@ MLINKS+=vm_page_insert.9 vm_page_remove.9
MLINKS+=vm_page_wire.9 vm_page_unwire.9
MLINKS+=VOP_ACCESS.9 VOP_ACCESSX.9
MLINKS+=VOP_ATTRIB.9 VOP_GETATTR.9 \
- VOP_ATTRIB.9 VOP_SETATTR.9
+ VOP_ATTRIB.9 VOP_SETATTR.9 \
+ VOP_ATTRIB.9 VOP_STAT.9
MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \
VOP_CREATE.9 VOP_MKNOD.9 \
VOP_CREATE.9 VOP_SYMLINK.9
diff --git a/share/man/man9/VOP_ATTRIB.9 b/share/man/man9/VOP_ATTRIB.9
index e48e4eb9a254..45f1e2f1652a 100644
--- a/share/man/man9/VOP_ATTRIB.9
+++ b/share/man/man9/VOP_ATTRIB.9
@@ -28,7 +28,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd August 29, 2008
+.Dd August 8, 2020
.Dt VOP_ATTRIB 9
.Os
.Sh NAME
@@ -42,19 +42,49 @@
.Fn VOP_GETATTR "struct vnode *vp" "struct vattr *vap" "struct ucred *cred"
.Ft int
.Fn VOP_SETATTR "struct vnode *vp" "struct vattr *vap" "struct ucred *cred"
+.Ft int
+.Fn VOP_STAT "struct vnode *vp" "struct stat *sb" "struct ucred *active_cred" \
+"struct ucred *file_cred" "struct thread *td"
.Sh DESCRIPTION
These entry points manipulate various attributes of a file or directory,
including file permissions, owner, group, size,
access time and modification time.
.Pp
-The arguments are:
+.Fn VOP_STAT
+returns data in a format suitable for the
+.Xr stat 2
+system call and by default is implemented as a wrapper around
+.Fn VOP_GETATTR .
+Filesystems may want to implement their own variant for performance reasons.
+.Pp
+For
+.Fn VOP_GETATTR
+and
+.Fn VOP_SETATTR
+the arguments are:
.Bl -tag -width cred
.It Fa vp
The vnode of the file.
.It Fa vap
The attributes of the file.
.It Fa cred
-The user credentials of the calling process.
+The user credentials of the calling thread.
+.El
+.Pp
+For
+.Fn VOP_STAT
+the arguments are:
+.Bl -tag -width active_cred
+.It Fa vp
+The vnode of the file.
+.It Fa sb
+The attributes of the file.
+.It Fa active_cred
+The user credentials of the calling thread.
+.It Fa file_cred
+The credentials installed on the file description pointing to the vnode or NOCRED.
+.It Fa td
+The calling thread.
.El
.Pp
Attributes which are not being modified by
@@ -67,8 +97,11 @@ the contents of
.Fa *vap
prior to setting specific values.
.Sh LOCKS
+Both
.Fn VOP_GETATTR
-expects the vnode to be locked on entry and will leave the vnode locked on
+and
+.Fn VOP_STAT
+expect the vnode to be locked on entry and will leave the vnode locked on
return.
The lock type can be either shared or exclusive.
.Pp
@@ -84,6 +117,10 @@ otherwise an appropriate error is returned.
.Fn VOP_SETATTR
returns zero if the attributes were changed successfully, otherwise an
appropriate error is returned.
+.Fn VOP_STAT
+returns 0 if it was able to retrieve the attribute data
+.Fa *sb ,
+otherwise an appropriate error is returned.
.Sh ERRORS
.Bl -tag -width Er
.It Bq Er EPERM
diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c
index 385dbc89b977..0d5e14b9d027 100644
--- a/sys/compat/linuxkpi/common/src/linux_compat.c
+++ b/sys/compat/linuxkpi/common/src/linux_compat.c
@@ -1691,7 +1691,7 @@ linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
vp = filp->f_vnode;
vn_lock(vp, LK_SHARED | LK_RETRY);
- error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
+ error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td);
VOP_UNLOCK(vp);
return (error);
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index f67bc9bf3ef0..57465506d7d8 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -57,6 +57,9 @@ __FBSDID("$FreeBSD$");
#include <sys/vnode.h>
#include <sys/dirent.h>
#include <sys/poll.h>
+#include <sys/stat.h>
+#include <security/audit/audit.h>
+#include <sys/priv.h>
#include <security/mac/mac_framework.h>
@@ -87,6 +90,7 @@ static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
+static int vop_stdstat(struct vop_stat_args *ap);
/*
* This vnode table stores what we want to do if the filesystem doesn't
@@ -114,6 +118,7 @@ struct vop_vector default_vnodeops = {
.vop_bmap = vop_stdbmap,
.vop_close = VOP_NULL,
.vop_fsync = VOP_NULL,
+ .vop_stat = vop_stdstat,
.vop_fdatasync = vop_stdfdatasync,
.vop_getpages = vop_stdgetpages,
.vop_getpages_async = vop_stdgetpages_async,
@@ -1461,3 +1466,111 @@ vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a)
sigallowstop(prev_stops);
return (rc);
}
+
+static int
+vop_stdstat(struct vop_stat_args *a)
+{
+ struct vattr vattr;
+ struct vattr *vap;
+ struct vnode *vp;
+ struct stat *sb;
+ int error;
+ u_short mode;
+
+ vp = a->a_vp;
+ sb = a->a_sb;
+
+ error = vop_stat_helper_pre(a);
+ if (error != 0)
+ return (error);
+
+ vap = &vattr;
+
+ /*
+ * Initialize defaults for new and unusual fields, so that file
+ * systems which don't support these fields don't need to know
+ * about them.
+ */
+ vap->va_birthtime.tv_sec = -1;
+ vap->va_birthtime.tv_nsec = 0;
+ vap->va_fsid = VNOVAL;
+ vap->va_rdev = NODEV;
+
+ error = VOP_GETATTR(vp, vap, a->a_active_cred);
+ if (error)
+ goto out;
+
+ /*
+ * Zero the spare stat fields
+ */
+ bzero(sb, sizeof *sb);
+
+ /*
+ * Copy from vattr table
+ */
+ if (vap->va_fsid != VNOVAL)
+ sb->st_dev = vap->va_fsid;
+ else
+ sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
+ sb->st_ino = vap->va_fileid;
+ mode = vap->va_mode;
+ switch (vap->va_type) {
+ case VREG:
+ mode |= S_IFREG;
+ break;
+ case VDIR:
+ mode |= S_IFDIR;
+ break;
+ case VBLK:
+ mode |= S_IFBLK;
+ break;
+ case VCHR:
+ mode |= S_IFCHR;
+ break;
+ case VLNK:
+ mode |= S_IFLNK;
+ break;
+ case VSOCK:
+ mode |= S_IFSOCK;
+ break;
+ case VFIFO:
+ mode |= S_IFIFO;
+ break;
+ default:
+ error = EBADF;
+ goto out;
+ }
+ sb->st_mode = mode;
+ sb->st_nlink = vap->va_nlink;
+ sb->st_uid = vap->va_uid;
+ sb->st_gid = vap->va_gid;
+ sb->st_rdev = vap->va_rdev;
+ if (vap->va_size > OFF_MAX) {
+ error = EOVERFLOW;
+ goto out;
+ }
+ sb->st_size = vap->va_size;
+ sb->st_atim.tv_sec = vap->va_atime.tv_sec;
+ sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
+ sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
+ sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
+ sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
+ sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
+ sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
+ sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
+
+ /*
+ * According to www.opengroup.org, the meaning of st_blksize is
+ * "a filesystem-specific preferred I/O block size for this
+ * object. In some filesystem types, this may vary from file
+ * to file"
+ * Use minimum/default of PAGE_SIZE (e.g. for VCHR).
+ */
+
+ sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
+ sb->st_flags = vap->va_flags;
+ sb->st_blocks = vap->va_bytes / S_BLKSIZE;
+ sb->st_gen = vap->va_gen;
+out:
+ return (vop_stat_helper_post(a, error));
+}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 9e9c53d3327c..69a6be798208 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1867,7 +1867,7 @@ restart:
if (vp->v_type == VDIR && oldinum == 0) {
error = EPERM; /* POSIX */
} else if (oldinum != 0 &&
- ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
+ ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
sb.st_ino != oldinum) {
error = EIDRM; /* Identifier removed */
} else if (fp != NULL && fp->f_vnode != vp) {
@@ -2381,7 +2381,7 @@ kern_statat(struct thread *td, int flag, int fd, const char *path,
if ((error = namei(&nd)) != 0)
return (error);
- error = vn_stat(nd.ni_vp, sbp, td->td_ucred, NOCRED, td);
+ error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED, td);
if (error == 0) {
SDT_PROBE2(vfs, , stat, mode, path, sbp->st_mode);
if (S_ISREG(sbp->st_mode))
@@ -4566,7 +4566,7 @@ kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
vfs_unbusy(mp);
if (error != 0)
return (error);
- error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
+ error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td);
vput(vp);
return (error);
}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index eb1a7c28ddfe..a0dd9fd2436b 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1455,124 +1455,13 @@ vn_statfile(struct file *fp, struct stat *sb, struct ucred *active_cred,
int error;
vn_lock(vp, LK_SHARED | LK_RETRY);
- error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
+ error = VOP_STAT(vp, sb, active_cred, fp->f_cred, td);
VOP_UNLOCK(vp);
return (error);
}
/*
- * Stat a vnode; implementation for the stat syscall
- */
-int
-vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
- struct ucred *file_cred, struct thread *td)
-{
- struct vattr vattr;
- struct vattr *vap;
- int error;
- u_short mode;
-
- AUDIT_ARG_VNODE1(vp);
-#ifdef MAC
- error = mac_vnode_check_stat(active_cred, file_cred, vp);
- if (error)
- return (error);
-#endif
-
- vap = &vattr;
-
- /*
- * Initialize defaults for new and unusual fields, so that file
- * systems which don't support these fields don't need to know
- * about them.
- */
- vap->va_birthtime.tv_sec = -1;
- vap->va_birthtime.tv_nsec = 0;
- vap->va_fsid = VNOVAL;
- vap->va_rdev = NODEV;
-
- error = VOP_GETATTR(vp, vap, active_cred);
- if (error)
- return (error);
-
- /*
- * Zero the spare stat fields
- */
- bzero(sb, sizeof *sb);
-
- /*
- * Copy from vattr table
- */
- if (vap->va_fsid != VNOVAL)
- sb->st_dev = vap->va_fsid;
- else
- sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
- sb->st_ino = vap->va_fileid;
- mode = vap->va_mode;
- switch (vap->va_type) {
- case VREG:
- mode |= S_IFREG;
- break;
- case VDIR:
- mode |= S_IFDIR;
- break;
- case VBLK:
- mode |= S_IFBLK;
- break;
- case VCHR:
- mode |= S_IFCHR;
- break;
- case VLNK:
- mode |= S_IFLNK;
- break;
- case VSOCK:
- mode |= S_IFSOCK;
- break;
- case VFIFO:
- mode |= S_IFIFO;
- break;
- default:
- return (EBADF);
- }
- sb->st_mode = mode;
- sb->st_nlink = vap->va_nlink;
- sb->st_uid = vap->va_uid;
- sb->st_gid = vap->va_gid;
- sb->st_rdev = vap->va_rdev;
- if (vap->va_size > OFF_MAX)
- return (EOVERFLOW);
- sb->st_size = vap->va_size;
- sb->st_atim.tv_sec = vap->va_atime.tv_sec;
- sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
- sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
- sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
- sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
- sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
- sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
- sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
-
- /*
- * According to www.opengroup.org, the meaning of st_blksize is
- * "a filesystem-specific preferred I/O block size for this
- * object. In some filesystem types, this may vary from file
- * to file"
- * Use minimum/default of PAGE_SIZE (e.g. for VCHR).
- */
-
- sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
-
- sb->st_flags = vap->va_flags;
- if (priv_check_cred_vfs_generation(td->td_ucred))
- sb->st_gen = 0;
- else
- sb->st_gen = vap->va_gen;
-
- sb->st_blocks = vap->va_bytes / S_BLKSIZE;
- return (0);
-}
-
-/*
* File table vnode ioctl routine.
*/
static int
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index e5a7b389fb30..10bca613606d 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -177,6 +177,17 @@ vop_accessx {
};
+%% stat vp L L L
+
+vop_stat {
+ IN struct vnode *vp;
+ OUT struct stat *sb;
+ IN struct ucred *active_cred;
+ IN struct ucred *file_cred;
+ IN struct thread *td;
+};
+
+
%% getattr vp L L L
vop_getattr {
diff --git a/sys/security/audit/audit_arg.c b/sys/security/audit/audit_arg.c
index fc5318750e3e..44b17e36c8ea 100644
--- a/sys/security/audit/audit_arg.c
+++ b/sys/security/audit/audit_arg.c
@@ -854,7 +854,7 @@ audit_arg_upath2_canon(char *upath)
* It is assumed that the caller will hold any vnode locks necessary to
* perform a VOP_GETATTR() on the passed vnode.
*
- * XXX: The attr code is very similar to vfs_vnops.c:vn_stat(), but always
+ * XXX: The attr code is very similar to vfs_default.c:vop_stdstat(), but always
* provides access to the generation number as we need that to construct the
* BSM file ID.
*
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 5d9e3496d12e..87c01a962064 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -737,8 +737,6 @@ int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base,
struct thread *td);
int vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio,
struct thread *td);
-int vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
- struct ucred *file_cred, struct thread *td);
int vn_start_write(struct vnode *vp, struct mount **mpp, int flags);
int vn_start_secondary_write(struct vnode *vp, struct mount **mpp,
int flags);
@@ -893,6 +891,22 @@ void vop_need_inactive_debugpost(void *a, int rc);
void vop_rename_fail(struct vop_rename_args *ap);
+#define vop_stat_helper_pre(ap) ({ \
+ int _error; \
+ AUDIT_ARG_VNODE1(ap->a_vp); \
+ _error = mac_vnode_check_stat(ap->a_active_cred, ap->a_file_cred, ap->a_vp);\
+ if (__predict_true(_error == 0)) \
+ bzero(ap->a_sb, sizeof(*ap->a_sb)); \
+ _error; \
+})
+
+#define vop_stat_helper_post(ap, error) ({ \
+ int _error = (error); \
+ if (priv_check_cred_vfs_generation(ap->a_td->td_ucred)) \
+ ap->a_sb->st_gen = 0; \
+ _error; \
+})
+
#define VOP_WRITE_PRE(ap) \
struct vattr va; \
int error; \