aboutsummaryrefslogtreecommitdiff
path: root/sys/kern/vfs_subr.c
diff options
context:
space:
mode:
authorKonstantin Belousov <kib@FreeBSD.org>2017-05-15 10:02:45 +0000
committerKonstantin Belousov <kib@FreeBSD.org>2017-05-15 10:02:45 +0000
commit391aba32e6d308a8fd0ac12ee49495119c7a307c (patch)
tree1a415e230ff24ae84ba0bbdef2d6e686908329c6 /sys/kern/vfs_subr.c
parent344add7203f449d15ae291e4ff9f2d0e2466f814 (diff)
downloadsrc-391aba32e6d308a8fd0ac12ee49495119c7a307c.tar.gz
src-391aba32e6d308a8fd0ac12ee49495119c7a307c.zip
mnt_vnode_next_active: use conventional lock order when trylock fails.
Previously, when the VI_TRYLOCK failed, we would spin under the mutex that protects the vnode active list until we either succeeded or noticed that we had hogged the CPU. Since we were violating the lock order, this would guarantee that we would become a hog under any deadlock condition (e.g. a race with vdrop(9) on the same vnode). In the presence of many concurrent threads in sync(2) or vdrop etc, the victim could hang for a long time. Now, avoid spinning by dropping and reacquiring the locks in the conventional lock order when the trylock fails. This requires a dance with the vnode hold count. Submitted by: Tom Rix <trix@juniper.net> Tested by: pho Differential revision: https://reviews.freebsd.org/D10692
Notes
Notes: svn path=/head/; revision=318285
Diffstat (limited to 'sys/kern/vfs_subr.c')
-rw-r--r--sys/kern/vfs_subr.c98
1 files changed, 87 insertions, 11 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 9edce97fc009..2ca95a8bbb34 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -5362,6 +5362,84 @@ mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
*mvp = NULL;
}
+/*
+ * Relock the mp mount vnode list lock with the vp vnode interlock in the
+ * conventional lock order during mnt_vnode_next_active iteration.
+ *
+ * On entry, the mount vnode list lock is held and the vnode interlock is not.
+ * The list lock is dropped and reacquired. On success, both locks are held.
+ * On failure, the mount vnode list lock is held but the vnode interlock is
+ * not, and the procedure may have yielded.
+ */
+static bool
+mnt_vnode_next_active_relock(struct vnode *mvp, struct mount *mp,
+ struct vnode *vp)
+{
+ const struct vnode *tmp;
+ bool held, ret;
+
+ VNASSERT(mvp->v_mount == mp && mvp->v_type == VMARKER &&
+ TAILQ_NEXT(mvp, v_actfreelist) != NULL, mvp,
+ ("%s: bad marker", __func__));
+ VNASSERT(vp->v_mount == mp && vp->v_type != VMARKER, vp,
+ ("%s: inappropriate vnode", __func__));
+ ASSERT_VI_UNLOCKED(vp, __func__);
+ mtx_assert(&mp->mnt_listmtx, MA_OWNED);
+
+ ret = false;
+
+ TAILQ_REMOVE(&mp->mnt_activevnodelist, mvp, v_actfreelist);
+ TAILQ_INSERT_BEFORE(vp, mvp, v_actfreelist);
+
+ /*
+ * Use a hold to prevent vp from disappearing while the mount vnode
+ * list lock is dropped and reacquired. Normally a hold would be
+ * acquired with vhold(), but that might try to acquire the vnode
+ * interlock, which would be a LOR with the mount vnode list lock.
+ */
+ held = vfs_refcount_acquire_if_not_zero(&vp->v_holdcnt);
+ mtx_unlock(&mp->mnt_listmtx);
+ if (!held)
+ goto abort;
+ VI_LOCK(vp);
+ if (!vfs_refcount_release_if_not_last(&vp->v_holdcnt)) {
+ vdropl(vp);
+ goto abort;
+ }
+ mtx_lock(&mp->mnt_listmtx);
+
+ /*
+ * Determine whether the vnode is still the next one after the marker,
+ * excepting any other markers. If the vnode has not been doomed by
+ * vgone() then the hold should have ensured that it remained on the
+ * active list. If it has been doomed but is still on the active list,
+ * don't abort, but rather skip over it (avoid spinning on doomed
+ * vnodes).
+ */
+ tmp = mvp;
+ do {
+ tmp = TAILQ_NEXT(tmp, v_actfreelist);
+ } while (tmp != NULL && tmp->v_type == VMARKER);
+ if (tmp != vp) {
+ mtx_unlock(&mp->mnt_listmtx);
+ VI_UNLOCK(vp);
+ goto abort;
+ }
+
+ ret = true;
+ goto out;
+abort:
+ maybe_yield();
+ mtx_lock(&mp->mnt_listmtx);
+out:
+ if (ret)
+ ASSERT_VI_LOCKED(vp, __func__);
+ else
+ ASSERT_VI_UNLOCKED(vp, __func__);
+ mtx_assert(&mp->mnt_listmtx, MA_OWNED);
+ return (ret);
+}
+
static struct vnode *
mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
{
@@ -5371,22 +5449,19 @@ mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
restart:
vp = TAILQ_NEXT(*mvp, v_actfreelist);
- TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
while (vp != NULL) {
if (vp->v_type == VMARKER) {
vp = TAILQ_NEXT(vp, v_actfreelist);
continue;
}
- if (!VI_TRYLOCK(vp)) {
- if (mp_ncpus == 1 || should_yield()) {
- TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
- mtx_unlock(&mp->mnt_listmtx);
- pause("vnacti", 1);
- mtx_lock(&mp->mnt_listmtx);
- goto restart;
- }
- continue;
- }
+ /*
+ * Try-lock because this is the wrong lock order. If that does
+ * not succeed, drop the mount vnode list lock and try to
+ * reacquire it and the vnode interlock in the right order.
+ */
+ if (!VI_TRYLOCK(vp) &&
+ !mnt_vnode_next_active_relock(*mvp, mp, vp))
+ goto restart;
KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp));
KASSERT(vp->v_mount == mp || vp->v_mount == NULL,
("alien vnode on the active list %p %p", vp, mp));
@@ -5396,6 +5471,7 @@ restart:
VI_UNLOCK(vp);
vp = nvp;
}
+ TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
/* Check if we are done */
if (vp == NULL) {