aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2020-07-25 10:32:45 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2020-07-25 10:32:45 +0000
commit07d2145a17178e9df663c90cc45d249502f7f768 (patch)
treed08893cd15aa34820684cca307763205f290ad6d
parent0379ff6ae30878fcb2fb5de34e8dfe2ba49d0d9d (diff)
downloadsrc-07d2145a17178e9df663c90cc45d249502f7f768.tar.gz
src-07d2145a17178e9df663c90cc45d249502f7f768.zip
vfs: add the infrastructure for lockless lookup
Reviewed by: kib Tested by: pho (in a patchset) Differential Revision: https://reviews.freebsd.org/D25577
Notes
Notes: svn path=/head/; revision=363518
-rw-r--r--sys/kern/kern_descrip.c29
-rw-r--r--sys/kern/vfs_subr.c103
-rw-r--r--sys/kern/vnode_if.src11
-rw-r--r--sys/security/mac/mac_framework.h3
-rw-r--r--sys/sys/filedesc.h1
-rw-r--r--sys/sys/mount.h1
-rw-r--r--sys/sys/vnode.h16
7 files changed, 140 insertions, 24 deletions
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index e2b57609c9f1..1f422bbf57c0 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -102,8 +102,8 @@ MALLOC_DECLARE(M_FADVISE);
static __read_mostly uma_zone_t file_zone;
static __read_mostly uma_zone_t filedesc0_zone;
-static __read_mostly uma_zone_t pwd_zone;
-static __read_mostly smr_t pwd_smr;
+__read_mostly uma_zone_t pwd_zone;
+VFS_SMR_DECLARE;
static int closefp(struct filedesc *fdp, int fd, struct file *fp,
struct thread *td, int holdleaders);
@@ -3343,18 +3343,27 @@ pwd_hold(struct thread *td)
fdp = td->td_proc->p_fd;
- smr_enter(pwd_smr);
- pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr);
+ vfs_smr_enter();
+ pwd = vfs_smr_entered_load(&fdp->fd_pwd);
MPASS(pwd != NULL);
if (__predict_true(refcount_acquire_if_not_zero(&pwd->pwd_refcount))) {
- smr_exit(pwd_smr);
+ vfs_smr_exit();
return (pwd);
}
- smr_exit(pwd_smr);
+ vfs_smr_exit();
FILEDESC_SLOCK(fdp);
pwd = pwd_hold_filedesc(fdp);
MPASS(pwd != NULL);
- FILEDESC_SUNLOCK(fdp);
+ return (pwd);
+}
+
+struct pwd *
+pwd_get_smr(void)
+{
+ struct pwd *pwd;
+
+ pwd = vfs_smr_entered_load(&curproc->p_fd->fd_pwd);
+ MPASS(pwd != NULL);
return (pwd);
}
@@ -4368,7 +4377,11 @@ filelistinit(void *dummy)
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR);
- pwd_smr = uma_zone_get_smr(pwd_zone);
+ /*
+ * XXXMJG this is a temporary hack due to boot ordering issues against
+ * the vnode zone.
+ */
+ vfs_smr = uma_zone_get_smr(pwd_zone);
mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
}
SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index c5604c422fd6..2eec46774327 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -664,8 +664,8 @@ vntblinit(void *dummy __unused)
vnode_list_reclaim_marker = vn_alloc_marker(NULL);
TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist);
vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
- vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR);
- vfs_smr = uma_zone_get_smr(vnode_zone);
+ vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
+ uma_zone_set_smr(vnode_zone, vfs_smr);
vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
/*
@@ -2914,6 +2914,22 @@ vget_prep(struct vnode *vp)
return (vs);
}
+void
+vget_abort(struct vnode *vp, enum vgetstate vs)
+{
+
+ switch (vs) {
+ case VGET_USECOUNT:
+ vrele(vp);
+ break;
+ case VGET_HOLDCNT:
+ vdrop(vp);
+ break;
+ default:
+ __assert_unreachable();
+ }
+}
+
int
vget(struct vnode *vp, int flags, struct thread *td)
{
@@ -2925,7 +2941,7 @@ vget(struct vnode *vp, int flags, struct thread *td)
return (vget_finish(vp, flags, vs));
}
-static int __noinline
+static void __noinline
vget_finish_vchr(struct vnode *vp)
{
@@ -2941,7 +2957,7 @@ vget_finish_vchr(struct vnode *vp)
#else
refcount_release(&vp->v_holdcnt);
#endif
- return (0);
+ return;
}
VI_LOCK(vp);
@@ -2953,18 +2969,17 @@ vget_finish_vchr(struct vnode *vp)
refcount_release(&vp->v_holdcnt);
#endif
VI_UNLOCK(vp);
- return (0);
+ return;
}
v_incr_devcount(vp);
refcount_acquire(&vp->v_usecount);
VI_UNLOCK(vp);
- return (0);
}
int
vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
{
- int error, old;
+ int error;
if ((flags & LK_INTERLOCK) != 0)
ASSERT_VI_LOCKED(vp, __func__);
@@ -2976,20 +2991,32 @@ vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
error = vn_lock(vp, flags);
if (__predict_false(error != 0)) {
- if (vs == VGET_USECOUNT)
- vrele(vp);
- else
- vdrop(vp);
+ vget_abort(vp, vs);
CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
vp);
return (error);
}
+ vget_finish_ref(vp, vs);
+ return (0);
+}
+
+void
+vget_finish_ref(struct vnode *vp, enum vgetstate vs)
+{
+ int old;
+
+ VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp);
+ VNPASS(vp->v_holdcnt > 0, vp);
+ VNPASS(vs == VGET_HOLDCNT || vp->v_usecount > 0, vp);
+
if (vs == VGET_USECOUNT)
- return (0);
+ return;
- if (__predict_false(vp->v_type == VCHR))
- return (vget_finish_vchr(vp));
+ if (__predict_false(vp->v_type == VCHR)) {
+ vget_finish_vchr(vp);
+ return;
+ }
/*
* We hold the vnode. If the usecount is 0 it will be utilized to keep
@@ -3006,7 +3033,6 @@ vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
refcount_release(&vp->v_holdcnt);
#endif
}
- return (0);
}
/*
@@ -4424,6 +4450,7 @@ DB_SHOW_COMMAND(mount, db_show_mount)
MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
MNT_KERN_FLAG(MNTK_MARKER);
MNT_KERN_FLAG(MNTK_USES_BCACHE);
+ MNT_KERN_FLAG(MNTK_FPLOOKUP);
MNT_KERN_FLAG(MNTK_NOASYNC);
MNT_KERN_FLAG(MNTK_UNMOUNT);
MNT_KERN_FLAG(MNTK_MWAIT);
@@ -5240,6 +5267,38 @@ out:
}
/*
+ * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
+ * the comment above cache_fplookup for details.
+ *
+ * We never deny as priv_check_cred calls are not yet supported, see vaccess.
+ */
+int
+vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred)
+{
+
+ VFS_SMR_ASSERT_ENTERED();
+
+ /* Check the owner. */
+ if (cred->cr_uid == file_uid) {
+ if (file_mode & S_IXUSR)
+ return (0);
+ return (EAGAIN);
+ }
+
+ /* Otherwise, check the groups (first match) */
+ if (groupmember(file_gid, cred)) {
+ if (file_mode & S_IXGRP)
+ return (0);
+ return (EAGAIN);
+ }
+
+ /* Otherwise, check everyone else. */
+ if (file_mode & S_IXOTH)
+ return (0);
+ return (EAGAIN);
+}
+
+/*
* Common filesystem object access control check routine. Accepts a
* vnode's type, "mode", uid and gid, requested access mode, credentials,
* and optional call-by-reference privused argument allowing vaccess()
@@ -5538,6 +5597,20 @@ vop_rename_pre(void *ap)
#ifdef DEBUG_VFS_LOCKS
void
+vop_fplookup_vexec_pre(void *ap __unused)
+{
+
+ VFS_SMR_ASSERT_ENTERED();
+}
+
+void
+vop_fplookup_vexec_post(void *ap __unused, int rc __unused)
+{
+
+ VFS_SMR_ASSERT_ENTERED();
+}
+
+void
vop_strategy_pre(void *ap)
{
struct vop_strategy_args *a;
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index beac2eef4e21..5c0649fdadaf 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -146,6 +146,17 @@ vop_close {
};
+%% fplookup_vexec vp - - -
+%! fplookup_vexec pre vop_fplookup_vexec_pre
+%! fplookup_vexec post vop_fplookup_vexec_post
+
+vop_fplookup_vexec {
+ IN struct vnode *vp;
+ IN struct ucred *cred;
+ IN struct thread *td;
+};
+
+
%% access vp L L L
vop_access {
diff --git a/sys/security/mac/mac_framework.h b/sys/security/mac/mac_framework.h
index 1ab82dd709d4..e917eeb3c893 100644
--- a/sys/security/mac/mac_framework.h
+++ b/sys/security/mac/mac_framework.h
@@ -422,13 +422,14 @@ int mac_vnode_check_listextattr(struct ucred *cred, struct vnode *vp,
int mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp,
struct componentname *cnp);
extern bool mac_vnode_check_lookup_fp_flag;
+#define mac_vnode_check_lookup_enabled() __predict_false(mac_vnode_check_lookup_fp_flag)
static inline int
mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp,
struct componentname *cnp)
{
mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup");
- if (__predict_false(mac_vnode_check_lookup_fp_flag))
+ if (mac_vnode_check_lookup_enabled())
return (mac_vnode_check_lookup_impl(cred, dvp, cnp));
return (0);
}
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
index 6954b1d23f45..a99b3b0cb047 100644
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -311,6 +311,7 @@ pwd_set(struct filedesc *fdp, struct pwd *newpwd)
smr_serialized_store(&fdp->fd_pwd, newpwd,
(FILEDESC_XLOCK_ASSERT(fdp), true));
}
+struct pwd *pwd_get_smr(void);
#endif /* _KERNEL */
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 6e1517aac4c6..a3bc0518a7ea 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -420,6 +420,7 @@ void __mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp);
#define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */
#define MNTK_VMSETSIZE_BUG 0x00010000
#define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */
+#define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */
#define MNTK_NOASYNC 0x00800000 /* disable async */
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 7c0a4f568451..8273842a91f5 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -666,6 +666,8 @@ int vn_path_to_global_path(struct thread *td, struct vnode *vp,
int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid,
gid_t file_gid, accmode_t accmode, struct ucred *cred,
int *privused);
+int vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid,
+ struct ucred *cred);
int vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid,
struct acl *aclp, accmode_t accmode, struct ucred *cred,
int *privused);
@@ -682,6 +684,8 @@ int vget(struct vnode *vp, int flags, struct thread *td);
enum vgetstate vget_prep_smr(struct vnode *vp);
enum vgetstate vget_prep(struct vnode *vp);
int vget_finish(struct vnode *vp, int flags, enum vgetstate vs);
+void vget_finish_ref(struct vnode *vp, enum vgetstate vs);
+void vget_abort(struct vnode *vp, enum vgetstate vs);
void vgone(struct vnode *vp);
void vhold(struct vnode *);
void vholdl(struct vnode *);
@@ -865,6 +869,8 @@ void vop_symlink_post(void *a, int rc);
int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);
#ifdef DEBUG_VFS_LOCKS
+void vop_fplookup_vexec_pre(void *a);
+void vop_fplookup_vexec_post(void *a, int rc);
void vop_strategy_pre(void *a);
void vop_lock_pre(void *a);
void vop_lock_post(void *a, int rc);
@@ -872,6 +878,8 @@ void vop_unlock_pre(void *a);
void vop_need_inactive_pre(void *a);
void vop_need_inactive_post(void *a, int rc);
#else
+#define vop_fplookup_vexec_pre(x) do { } while (0)
+#define vop_fplookup_vexec_post(x, y) do { } while (0)
#define vop_strategy_pre(x) do { } while (0)
#define vop_lock_pre(x) do { } while (0)
#define vop_lock_post(x, y) do { } while (0)
@@ -1025,10 +1033,18 @@ int vn_dir_check_exec(struct vnode *vp, struct componentname *cnp);
#define VFS_SMR() vfs_smr
#define vfs_smr_enter() smr_enter(VFS_SMR())
#define vfs_smr_exit() smr_exit(VFS_SMR())
+#define vfs_smr_entered_load(ptr) smr_entered_load((ptr), VFS_SMR())
#define VFS_SMR_ASSERT_ENTERED() SMR_ASSERT_ENTERED(VFS_SMR())
#define VFS_SMR_ASSERT_NOT_ENTERED() SMR_ASSERT_NOT_ENTERED(VFS_SMR())
#define VFS_SMR_ZONE_SET(zone) uma_zone_set_smr((zone), VFS_SMR())
+#define vn_load_v_data_smr(vp) ({ \
+ struct vnode *_vp = (vp); \
+ \
+ VFS_SMR_ASSERT_ENTERED(); \
+ atomic_load_ptr(&(_vp)->v_data); \
+})
+
#endif /* _KERNEL */
#endif /* !_SYS_VNODE_H_ */