aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2021-01-23 13:40:48 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2021-02-01 12:37:46 +0000
commitdcee9964238b12a8e55917f292139f074b1a80b2 (patch)
tree03fbaa91c895cbdb4c7f9aa105abbfa106fe87a2
parent6bc1f8425560fa3e2ebc6ac3367967a083ccf6e0 (diff)
downloadsrc-dcee9964238b12a8e55917f292139f074b1a80b2.tar.gz
src-dcee9964238b12a8e55917f292139f074b1a80b2.zip
cache: add symlink support to lockless lookup
Reviewed by: kib (previous version) Tested by: pho (previous version) Differential Revision: https://reviews.freebsd.org/D27488
-rw-r--r--sys/fs/deadfs/dead_vnops.c1
-rw-r--r--sys/kern/vfs_cache.c214
-rw-r--r--sys/kern/vfs_lookup.c11
-rw-r--r--sys/kern/vfs_subr.c13
-rw-r--r--sys/kern/vnode_if.src10
-rw-r--r--sys/sys/namei.h4
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/vnode.h9
8 files changed, 227 insertions, 37 deletions
diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c
index 49f852791387..09c3c996ee0e 100644
--- a/sys/fs/deadfs/dead_vnops.c
+++ b/sys/fs/deadfs/dead_vnops.c
@@ -80,6 +80,7 @@ struct vop_vector dead_vnodeops = {
.vop_unset_text = dead_unset_text,
.vop_write = dead_write,
.vop_fplookup_vexec = VOP_EOPNOTSUPP,
+ .vop_fplookup_symlink = VOP_EAGAIN,
};
VFS_VOP_VECTOR_REGISTER(dead_vnodeops);
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index cfa25dc59ee4..770c8ebf061b 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -562,6 +562,36 @@ static uma_zone_t __read_mostly cache_zone_small_ts;
static uma_zone_t __read_mostly cache_zone_large;
static uma_zone_t __read_mostly cache_zone_large_ts;
+char *
+cache_symlink_alloc(size_t size, int flags)
+{
+
+ if (size < CACHE_ZONE_SMALL_SIZE) {
+ return (uma_zalloc_smr(cache_zone_small, flags));
+ }
+ if (size < CACHE_ZONE_LARGE_SIZE) {
+ return (uma_zalloc_smr(cache_zone_large, flags));
+ }
+ return (NULL);
+}
+
+void
+cache_symlink_free(char *string, size_t size)
+{
+
+ MPASS(string != NULL);
+
+ if (size < CACHE_ZONE_SMALL_SIZE) {
+ uma_zfree_smr(cache_zone_small, string);
+ return;
+ }
+ if (size < CACHE_ZONE_LARGE_SIZE) {
+ uma_zfree_smr(cache_zone_large, string);
+ return;
+ }
+ __assert_unreachable();
+}
+
static struct namecache *
cache_alloc_uma(int len, bool ts)
{
@@ -3584,6 +3614,7 @@ cache_fast_lookup_enabled_recalc(void)
#ifdef MAC
mac_on = mac_vnode_check_lookup_enabled();
+ mac_on |= mac_vnode_check_readlink_enabled();
#else
mac_on = 0;
#endif
@@ -3615,6 +3646,7 @@ SYSCTL_PROC(_vfs, OID_AUTO, cache_fast_lookup, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MP
* need restoring in case fast path lookup fails.
*/
struct nameidata_outer {
+ size_t ni_pathlen;
int cn_flags;
};
@@ -3656,8 +3688,10 @@ static bool cache_fplookup_is_mp(struct cache_fpl *fpl);
static int cache_fplookup_cross_mount(struct cache_fpl *fpl);
static int cache_fplookup_partial_setup(struct cache_fpl *fpl);
static int cache_fplookup_skip_slashes(struct cache_fpl *fpl);
+static int cache_fplookup_preparse(struct cache_fpl *fpl);
static void cache_fpl_pathlen_dec(struct cache_fpl *fpl);
static void cache_fpl_pathlen_inc(struct cache_fpl *fpl);
+static void cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n);
static void cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n);
static void
@@ -3698,6 +3732,7 @@ static void
cache_fpl_checkpoint_outer(struct cache_fpl *fpl)
{
+ fpl->snd_outer.ni_pathlen = fpl->ndp->ni_pathlen;
fpl->snd_outer.cn_flags = fpl->ndp->ni_cnd.cn_flags;
}
@@ -3731,6 +3766,7 @@ cache_fpl_restore_abort(struct cache_fpl *fpl)
*/
fpl->ndp->ni_resflags = 0;
fpl->ndp->ni_cnd.cn_nameptr = fpl->ndp->ni_cnd.cn_pnbuf;
+ fpl->ndp->ni_pathlen = fpl->snd_outer.ni_pathlen;
}
#ifdef INVARIANTS
@@ -3752,6 +3788,7 @@ cache_fpl_assert_status(struct cache_fpl *fpl)
case CACHE_FPL_STATUS_UNSET:
__assert_unreachable();
break;
+ case CACHE_FPL_STATUS_DESTROYED:
case CACHE_FPL_STATUS_ABORTED:
case CACHE_FPL_STATUS_PARTIAL:
case CACHE_FPL_STATUS_HANDLED:
@@ -3804,6 +3841,11 @@ cache_fpl_aborted_early_impl(struct cache_fpl *fpl, int line)
static int __noinline
cache_fpl_aborted_impl(struct cache_fpl *fpl, int line)
{
+ struct nameidata *ndp;
+ struct componentname *cnp;
+
+ ndp = fpl->ndp;
+ cnp = fpl->cnp;
if (fpl->status != CACHE_FPL_STATUS_UNSET) {
KASSERT(fpl->status == CACHE_FPL_STATUS_PARTIAL,
@@ -3815,6 +3857,14 @@ cache_fpl_aborted_impl(struct cache_fpl *fpl, int line)
if (fpl->in_smr)
cache_fpl_smr_exit(fpl);
cache_fpl_restore_abort(fpl);
+ /*
+ * Resolving symlinks overwrites data passed by the caller.
+ * Let namei know.
+ */
+ if (ndp->ni_loopcnt > 0) {
+ fpl->status = CACHE_FPL_STATUS_DESTROYED;
+ cache_fpl_cleanup_cnp(cnp);
+ }
return (CACHE_FPL_FAILED);
}
@@ -3955,13 +4005,6 @@ cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode **vpp)
return (0);
}
-static bool
-cache_fplookup_vnode_supported(struct vnode *vp)
-{
-
- return (vp->v_type != VLNK);
-}
-
static int __noinline
cache_fplookup_negative_promote(struct cache_fpl *fpl, struct namecache *oncp,
uint32_t hash)
@@ -4242,8 +4285,7 @@ cache_fplookup_final_modifying(struct cache_fpl *fpl)
* Since we expect this to be the terminal vnode it should
* almost never be true.
*/
- if (__predict_false(!cache_fplookup_vnode_supported(tvp) ||
- cache_fplookup_is_mp(fpl))) {
+ if (__predict_false(tvp->v_type == VLNK || cache_fplookup_is_mp(fpl))) {
vput(dvp);
vput(tvp);
return (cache_fpl_aborted(fpl));
@@ -4546,8 +4588,7 @@ cache_fplookup_noentry(struct cache_fpl *fpl)
return (cache_fpl_handled(fpl));
}
- if (__predict_false(!cache_fplookup_vnode_supported(tvp) ||
- cache_fplookup_is_mp(fpl))) {
+ if (__predict_false(tvp->v_type == VLNK || cache_fplookup_is_mp(fpl))) {
vput(dvp);
vput(tvp);
return (cache_fpl_aborted(fpl));
@@ -4688,6 +4729,102 @@ cache_fplookup_neg(struct cache_fpl *fpl, struct namecache *ncp, uint32_t hash)
return (cache_fpl_handled_error(fpl, ENOENT));
}
+/*
+ * Resolve a symlink. Called by filesystem-specific routines.
+ *
+ * Code flow is:
+ * ... -> cache_fplookup_symlink -> VOP_FPLOOKUP_SYMLINK -> cache_symlink_resolve
+ */
+int
+cache_symlink_resolve(struct cache_fpl *fpl, const char *string, size_t len)
+{
+ struct nameidata *ndp;
+ struct componentname *cnp;
+
+ ndp = fpl->ndp;
+ cnp = fpl->cnp;
+
+ if (__predict_false(len == 0)) {
+ return (ENOENT);
+ }
+
+ ndp->ni_pathlen = fpl->nulchar - cnp->cn_nameptr - cnp->cn_namelen + 1;
+#ifdef INVARIANTS
+ if (ndp->ni_pathlen != fpl->debug.ni_pathlen) {
+ panic("%s: mismatch (%zu != %zu) nulchar %p nameptr %p [%s] ; full string [%s]\n",
+ __func__, ndp->ni_pathlen, fpl->debug.ni_pathlen, fpl->nulchar,
+ cnp->cn_nameptr, cnp->cn_nameptr, cnp->cn_pnbuf);
+ }
+#endif
+
+ if (__predict_false(len + ndp->ni_pathlen > MAXPATHLEN)) {
+ return (ENAMETOOLONG);
+ }
+
+ if (__predict_false(ndp->ni_loopcnt++ >= MAXSYMLINKS)) {
+ return (ELOOP);
+ }
+
+ if (ndp->ni_pathlen > 1) {
+ bcopy(ndp->ni_next, cnp->cn_pnbuf + len, ndp->ni_pathlen);
+ } else {
+ cnp->cn_pnbuf[len] = '\0';
+ }
+ bcopy(string, cnp->cn_pnbuf, len);
+
+ ndp->ni_pathlen += len;
+ cache_fpl_pathlen_add(fpl, len);
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ fpl->nulchar = &cnp->cn_nameptr[ndp->ni_pathlen - 1];
+
+ return (0);
+}
+
+static int __noinline
+cache_fplookup_symlink(struct cache_fpl *fpl)
+{
+ struct nameidata *ndp;
+ struct componentname *cnp;
+ struct vnode *dvp, *tvp;
+ int error;
+
+ ndp = fpl->ndp;
+ cnp = fpl->cnp;
+ dvp = fpl->dvp;
+ tvp = fpl->tvp;
+
+ if (cache_fpl_islastcn(ndp)) {
+ if ((cnp->cn_flags & FOLLOW) == 0) {
+ return (cache_fplookup_final(fpl));
+ }
+ }
+
+ error = VOP_FPLOOKUP_SYMLINK(tvp, fpl);
+ if (__predict_false(error != 0)) {
+ switch (error) {
+ case EAGAIN:
+ return (cache_fpl_partial(fpl));
+ case ENOENT:
+ case ENAMETOOLONG:
+ case ELOOP:
+ cache_fpl_smr_exit(fpl);
+ return (cache_fpl_handled_error(fpl, error));
+ default:
+ return (cache_fpl_aborted(fpl));
+ }
+ }
+
+ if (*(cnp->cn_nameptr) == '/') {
+ fpl->dvp = cache_fpl_handle_root(fpl);
+ fpl->dvp_seqc = vn_seqc_read_any(fpl->dvp);
+ if (seqc_in_modify(fpl->dvp_seqc)) {
+ return (cache_fpl_aborted(fpl));
+ }
+ }
+
+ return (cache_fplookup_preparse(fpl));
+}
+
static int
cache_fplookup_next(struct cache_fpl *fpl)
{
@@ -4743,10 +4880,6 @@ cache_fplookup_next(struct cache_fpl *fpl)
return (cache_fpl_partial(fpl));
}
- if (!cache_fplookup_vnode_supported(tvp)) {
- return (cache_fpl_partial(fpl));
- }
-
counter_u64_add(numposhits, 1);
SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, tvp);
@@ -4931,7 +5064,16 @@ static void
cache_fpl_pathlen_inc(struct cache_fpl *fpl)
{
- fpl->debug.ni_pathlen++;
+ cache_fpl_pathlen_add(fpl, 1);
+}
+
+static void
+cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n)
+{
+
+ fpl->debug.ni_pathlen += n;
+ KASSERT(fpl->debug.ni_pathlen <= PATH_MAX,
+ ("%s: pathlen overflow to %zd\n", __func__, fpl->debug.ni_pathlen));
}
static void
@@ -4954,12 +5096,17 @@ cache_fpl_pathlen_inc(struct cache_fpl *fpl)
}
static void
+cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n)
+{
+}
+
+static void
cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n)
{
}
#endif
-static int
+static int __always_inline
cache_fplookup_preparse(struct cache_fpl *fpl)
{
struct componentname *cnp;
@@ -5238,8 +5385,6 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
return (cache_fpl_aborted(fpl));
}
- VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp);
-
error = cache_fplookup_preparse(fpl);
if (__predict_false(cache_fpl_terminated(fpl))) {
return (error);
@@ -5251,8 +5396,6 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
break;
}
- VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp);
-
error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred);
if (__predict_false(error != 0)) {
error = cache_fplookup_failed_vexec(fpl, error);
@@ -5266,20 +5409,27 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
- if (cache_fpl_islastcn(ndp)) {
- error = cache_fplookup_final(fpl);
- break;
- }
+ if (fpl->tvp->v_type == VLNK) {
+ error = cache_fplookup_symlink(fpl);
+ if (cache_fpl_terminated(fpl)) {
+ break;
+ }
+ } else {
+ if (cache_fpl_islastcn(ndp)) {
+ error = cache_fplookup_final(fpl);
+ break;
+ }
- if (!vn_seqc_consistent(fpl->dvp, fpl->dvp_seqc)) {
- error = cache_fpl_aborted(fpl);
- break;
- }
+ if (!vn_seqc_consistent(fpl->dvp, fpl->dvp_seqc)) {
+ error = cache_fpl_aborted(fpl);
+ break;
+ }
- fpl->dvp = fpl->tvp;
- fpl->dvp_seqc = fpl->tvp_seqc;
+ fpl->dvp = fpl->tvp;
+ fpl->dvp_seqc = fpl->tvp_seqc;
+ cache_fplookup_parse_advance(fpl);
+ }
- cache_fplookup_parse_advance(fpl);
cache_fpl_checkpoint(fpl);
}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index b6529623ecbb..ad65ab11bb1d 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -578,6 +578,7 @@ namei(struct nameidata *ndp)
ndp->ni_startdir->v_type == VBAD);
ndp->ni_lcf = 0;
+ ndp->ni_loopcnt = 0;
ndp->ni_vp = NULL;
error = namei_getpath(ndp);
@@ -611,8 +612,16 @@ namei(struct nameidata *ndp)
TAILQ_INIT(&ndp->ni_cap_tracker);
dp = ndp->ni_startdir;
break;
+ case CACHE_FPL_STATUS_DESTROYED:
+ ndp->ni_loopcnt = 0;
+ error = namei_getpath(ndp);
+ if (__predict_false(error != 0)) {
+ return (error);
+ }
+ /* FALLTHROUGH */
case CACHE_FPL_STATUS_ABORTED:
TAILQ_INIT(&ndp->ni_cap_tracker);
+ MPASS(ndp->ni_lcf == 0);
error = namei_setup(ndp, &dp, &pwd);
if (error != 0) {
namei_cleanup_cnp(cnp);
@@ -621,8 +630,6 @@ namei(struct nameidata *ndp)
break;
}
- ndp->ni_loopcnt = 0;
-
/*
* Locked lookup.
*/
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 8461fd0d49b5..047e4c54f0c5 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -5453,6 +5453,19 @@ vop_fplookup_vexec_debugpost(void *ap __unused, int rc __unused)
}
void
+vop_fplookup_symlink_debugpre(void *ap __unused)
+{
+
+ VFS_SMR_ASSERT_ENTERED();
+}
+
+void
+vop_fplookup_symlink_debugpost(void *ap __unused, int rc __unused)
+{
+
+ VFS_SMR_ASSERT_ENTERED();
+}
+void
vop_strategy_debugpre(void *ap)
{
struct vop_strategy_args *a;
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index ed6f3bf9d61b..5d15d4a0c863 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -156,6 +156,16 @@ vop_fplookup_vexec {
};
+%% fplookup_symlink vp - - -
+%! fplookup_symlink debugpre vop_fplookup_symlink_debugpre
+%! fplookup_symlink debugpost vop_fplookup_symlink_debugpost
+
+vop_fplookup_symlink {
+ IN struct vnode *vp;
+ IN struct cache_fpl *fpl;
+};
+
+
%% access vp L L L
vop_access {
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index c045bc7c6bc5..1f9bae9e2753 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -116,8 +116,8 @@ struct nameidata {
#ifdef _KERNEL
-enum cache_fpl_status { CACHE_FPL_STATUS_ABORTED, CACHE_FPL_STATUS_PARTIAL,
- CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET };
+enum cache_fpl_status { CACHE_FPL_STATUS_DESTROYED, CACHE_FPL_STATUS_ABORTED,
+ CACHE_FPL_STATUS_PARTIAL, CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET };
int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
struct pwd **pwdp);
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 97ec07ed3a4d..afa5d9a2e3a8 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -60,7 +60,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1300137 /* Master, propagated to newvers */
+#define __FreeBSD_version 1300138 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 0eadfec02313..ffd0bdad940d 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -66,6 +66,7 @@ enum vgetstate { VGET_NONE, VGET_HOLDCNT, VGET_USECOUNT };
*/
struct namecache;
+struct cache_fpl;
struct vpollinfo {
struct mtx vpi_lock; /* lock to protect below */
@@ -646,6 +647,10 @@ void cache_purge(struct vnode *vp);
void cache_purge_vgone(struct vnode *vp);
void cache_purge_negative(struct vnode *vp);
void cache_purgevfs(struct mount *mp);
+char *cache_symlink_alloc(size_t size, int flags);
+void cache_symlink_free(char *string, size_t size);
+int cache_symlink_resolve(struct cache_fpl *fpl, const char *string,
+ size_t len);
void cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp);
void cache_vop_rmdir(struct vnode *dvp, struct vnode *vp);
@@ -900,6 +905,8 @@ int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);
#ifdef DEBUG_VFS_LOCKS
void vop_fplookup_vexec_debugpre(void *a);
void vop_fplookup_vexec_debugpost(void *a, int rc);
+void vop_fplookup_symlink_debugpre(void *a);
+void vop_fplookup_symlink_debugpost(void *a, int rc);
void vop_strategy_debugpre(void *a);
void vop_lock_debugpre(void *a);
void vop_lock_debugpost(void *a, int rc);
@@ -910,6 +917,8 @@ void vop_mkdir_debugpost(void *a, int rc);
#else
#define vop_fplookup_vexec_debugpre(x) do { } while (0)
#define vop_fplookup_vexec_debugpost(x, y) do { } while (0)
+#define vop_fplookup_symlink_debugpre(x) do { } while (0)
+#define vop_fplookup_symlink_debugpost(x, y) do { } while (0)
#define vop_strategy_debugpre(x) do { } while (0)
#define vop_lock_debugpre(x) do { } while (0)
#define vop_lock_debugpost(x, y) do { } while (0)