aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2020-10-10 03:48:17 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2020-10-10 03:48:17 +0000
commitdd28b379cb8d5b02442782a2586d729e3e197818 (patch)
tree79b478af8961197d978256e98ccfd10974f191d7
parent49d48f45c8f728956cc2d982bf33ae79cd6fd8e9 (diff)
downloadsrc-dd28b379cb8d5b02442782a2586d729e3e197818.tar.gz
src-dd28b379cb8d5b02442782a2586d729e3e197818.zip
vfs: support lockless dirfd lookups
Notes
Notes: svn path=/head/; revision=366597
-rw-r--r--sys/kern/kern_descrip.c105
-rw-r--r--sys/kern/vfs_cache.c62
-rw-r--r--sys/sys/file.h2
3 files changed, 163 insertions, 6 deletions
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index c533e714ce52..af7d51057893 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -2708,6 +2708,111 @@ get_locked:
return (error);
}
+#ifdef CAPABILITIES
+int
+fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
+{
+ const struct filedescent *fde;
+ const struct fdescenttbl *fdt;
+ struct filedesc *fdp;
+ struct file *fp;
+ struct vnode *vp;
+ const cap_rights_t *haverights;
+ cap_rights_t rights;
+ seqc_t seq;
+
+ VFS_SMR_ASSERT_ENTERED();
+
+ rights = *ndp->ni_rightsneeded;
+ cap_rights_set_one(&rights, CAP_LOOKUP);
+
+ fdp = curproc->p_fd;
+ fdt = fdp->fd_files;
+ if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
+ return (EBADF);
+ seq = seqc_read_any(fd_seqc(fdt, fd));
+ if (__predict_false(seqc_in_modify(seq)))
+ return (EAGAIN);
+ fde = &fdt->fdt_ofiles[fd];
+ haverights = cap_rights_fde_inline(fde);
+ fp = fde->fde_file;
+ if (__predict_false(fp == NULL))
+ return (EAGAIN);
+ if (__predict_false(cap_check_inline_transient(haverights, &rights)))
+ return (EAGAIN);
+ *fsearch = ((fp->f_flag & FSEARCH) != 0);
+ vp = fp->f_vnode;
+ if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
+ return (EAGAIN);
+ }
+ if (!filecaps_copy(&fde->fde_caps, &ndp->ni_filecaps, false)) {
+ return (EAGAIN);
+ }
+ /*
+ * Use an acquire barrier to force re-reading of fdt so it is
+ * refreshed for verification.
+ */
+ atomic_thread_fence_acq();
+ fdt = fdp->fd_files;
+ if (__predict_false(!seqc_consistent_nomb(fd_seqc(fdt, fd), seq)))
+ return (EAGAIN);
+ /*
+ * If file descriptor doesn't have all rights,
+ * all lookups relative to it must also be
+ * strictly relative.
+ *
+ * Not yet supported by fast path.
+ */
+ CAP_ALL(&rights);
+ if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, &rights) ||
+ ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL ||
+ ndp->ni_filecaps.fc_nioctls != -1) {
+#ifdef notyet
+ ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
+#else
+ return (EAGAIN);
+#endif
+ }
+ *vpp = vp;
+ return (0);
+}
+#else
+int
+fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
+{
+ const struct fdescenttbl *fdt;
+ struct filedesc *fdp;
+ struct file *fp;
+ struct vnode *vp;
+
+ VFS_SMR_ASSERT_ENTERED();
+
+ fdp = curproc->p_fd;
+ fdt = fdp->fd_files;
+ if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
+ return (EBADF);
+ fp = fdt->fdt_ofiles[fd].fde_file;
+ if (__predict_false(fp == NULL))
+ return (EAGAIN);
+ *fsearch = ((fp->f_flag & FSEARCH) != 0);
+ vp = fp->f_vnode;
+ if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
+ return (EAGAIN);
+ }
+ /*
+ * Use an acquire barrier to force re-reading of fdt so it is
+ * refreshed for verification.
+ */
+ atomic_thread_fence_acq();
+ fdt = fdp->fd_files;
+ if (__predict_false(fp != fdt->fdt_ofiles[fd].fde_file))
+ return (EAGAIN);
+ filecaps_fill(&ndp->ni_filecaps);
+ *vpp = vp;
+ return (0);
+}
+#endif
+
int
fget_unlocked_seq(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
struct file **fpp, seqc_t *seqp)
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index ef1b057a23c0..0880ed26b28b 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -3189,6 +3189,7 @@ struct cache_fpl {
int line;
enum cache_fpl_status status:8;
bool in_smr;
+ bool fsearch;
};
static void
@@ -3346,10 +3347,6 @@ cache_can_fplookup(struct cache_fpl *fpl)
cache_fpl_aborted(fpl);
return (false);
}
- if (ndp->ni_dirfd != AT_FDCWD) {
- cache_fpl_aborted(fpl);
- return (false);
- }
if (IN_CAPABILITY_MODE(td)) {
cache_fpl_aborted(fpl);
return (false);
@@ -3365,6 +3362,23 @@ cache_can_fplookup(struct cache_fpl *fpl)
return (true);
}
+static int
+cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode **vpp)
+{
+ struct nameidata *ndp;
+ int error;
+ bool fsearch;
+
+ ndp = fpl->ndp;
+ error = fgetvp_lookup_smr(ndp->ni_dirfd, ndp, vpp, &fsearch);
+ if (__predict_false(error != 0)) {
+ cache_fpl_smr_exit(fpl);
+ return (cache_fpl_aborted(fpl));
+ }
+ fpl->fsearch = fsearch;
+ return (0);
+}
+
static bool
cache_fplookup_vnode_supported(struct vnode *vp)
{
@@ -4046,9 +4060,11 @@ cache_fplookup_parse_advance(struct cache_fpl *fpl)
static int __noinline
cache_fplookup_failed_vexec(struct cache_fpl *fpl, int error)
{
+ struct componentname *cnp;
struct vnode *dvp;
seqc_t dvp_seqc;
+ cnp = fpl->cnp;
dvp = fpl->dvp;
dvp_seqc = fpl->dvp_seqc;
@@ -4070,6 +4086,32 @@ cache_fplookup_failed_vexec(struct cache_fpl *fpl, int error)
error = ENOTDIR;
}
+ /*
+ * Hack: handle O_SEARCH.
+ *
+ * Open Group Base Specifications Issue 7, 2018 edition states:
+ * If the access mode of the open file description associated with the
+ * file descriptor is not O_SEARCH, the function shall check whether
+ * directory searches are permitted using the current permissions of
+ * the directory underlying the file descriptor. If the access mode is
+ * O_SEARCH, the function shall not perform the check.
+ *
+ * Regular lookup tests for the NOEXECCHECK flag for every path
+ * component to decide whether to do the permission check. However,
+ * since most lookups never have the flag (and when they do it is only
+ * present for the first path component), lockless lookup only acts on
+ * it if there is a permission problem. Here the flag is represented
+ * with a boolean so that we don't have to clear it on the way out.
+ *
+ * For simplicity this always aborts.
+ * TODO: check if this is the first lookup and ignore the permission
+ * problem. Note the flag has to survive fallback (if it happens to be
+ * performed).
+ */
+ if (fpl->fsearch) {
+ return (cache_fpl_aborted(fpl));
+ }
+
switch (error) {
case EAGAIN:
if (!vn_seqc_consistent(dvp, dvp_seqc)) {
@@ -4308,6 +4350,7 @@ cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
cache_fpl_checkpoint(&fpl, &orig);
cache_fpl_smr_enter_initial(&fpl);
+ fpl.fsearch = false;
pwd = pwd_get_smr();
fpl.pwd = pwd;
ndp->ni_rootdir = pwd->pwd_rdir;
@@ -4318,13 +4361,20 @@ cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
if (cnp->cn_pnbuf[0] == '/') {
cache_fpl_handle_root(ndp, &dvp);
} else {
- MPASS(ndp->ni_dirfd == AT_FDCWD);
- dvp = pwd->pwd_cdir;
+ if (ndp->ni_dirfd == AT_FDCWD) {
+ dvp = pwd->pwd_cdir;
+ } else {
+ error = cache_fplookup_dirfd(&fpl, &dvp);
+ if (__predict_false(error != 0)) {
+ goto out;
+ }
+ }
}
SDT_PROBE4(vfs, namei, lookup, entry, dvp, cnp->cn_pnbuf, cnp->cn_flags, true);
error = cache_fplookup_impl(dvp, &fpl);
+out:
cache_fpl_smr_assert_not_entered(&fpl);
SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status);
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 7c69ee2ae98e..cbf61d0a3dcb 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -52,6 +52,7 @@ struct thread;
struct uio;
struct knote;
struct vnode;
+struct nameidata;
#endif /* _KERNEL */
@@ -279,6 +280,7 @@ int fgetvp_read(struct thread *td, int fd, cap_rights_t *rightsp,
struct vnode **vpp);
int fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp,
struct vnode **vpp);
+int fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch);
static __inline __result_use_check bool
fhold(struct file *fp)