3 files changed, 181 insertions, 109 deletions
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
index 0a93878c859f..aa7a689bec34 100644
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -37,6 +37,9 @@
 
 #define	NULLM_CACHE	0x0001
 
+#include <sys/ck.h>
+#include <vm/uma.h>
+
 struct null_mount {
 	struct mount	*nullm_vfs;
 	struct vnode	*nullm_lowerrootvp;	/* Ref to lower root vnode */
@@ -50,7 +53,7 @@ struct null_mount {
  * A cache of vnode references
  */
 struct null_node {
-	LIST_ENTRY(null_node)	null_hash;	/* Hash list */
+	CK_LIST_ENTRY(null_node) null_hash;	/* Hash list */
 	struct vnode	        *null_lowervp;	/* VREFed once */
 	struct vnode		*null_vnode;	/* Back pointer */
 	u_int			null_flags;
@@ -61,6 +64,7 @@ struct null_node {
 
 #define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
 #define	VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define	VTONULL_SMR(vp) ((struct null_node *)vn_load_v_data_smr(vp))
 #define	NULLTOV(xp) ((xp)->null_vnode)
 
 int nullfs_init(struct vfsconf *vfsp);
@@ -79,9 +83,7 @@ struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno);
 
 extern struct vop_vector null_vnodeops;
 
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_NULLFSNODE);
-#endif
+extern uma_zone_t null_node_zone;
 
 #ifdef NULLFS_DEBUG
 #define NULLFSDEBUG(format, args...) printf(format ,## args)
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index 7dcc83880bb9..146d3bbdaedd 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -41,9 +41,14 @@
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
+#include <sys/smr.h>
 
 #include <fs/nullfs/null.h>
 
+#include <vm/uma.h>
+
+VFS_SMR_DECLARE;
+
 /*
  * Null layer cache:
  * Each cache entry holds a reference to the lower vnode
@@ -54,12 +59,12 @@
 
 #define	NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask])
 
-static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
+static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
 static struct rwlock null_hash_lock;
 static u_long null_hash_mask;
 
 static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
-MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
+uma_zone_t __read_mostly null_node_zone;
 
 static void null_hashins(struct mount *, struct null_node *);
 
@@ -73,6 +78,10 @@ nullfs_init(struct vfsconf *vfsp)
 	null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH,
 	    &null_hash_mask);
 	rw_init(&null_hash_lock, "nullhs");
+	null_node_zone = uma_zcreate("nullfs node", sizeof(struct null_node),
+	    NULL, NULL, NULL, NULL, 0, UMA_ZONE_ZINIT);
+	VFS_SMR_ZONE_SET(null_node_zone);
+
 	return (0);
 }
 
@@ -80,6 +89,7 @@ int
 nullfs_uninit(struct vfsconf *vfsp)
 {
 
+	uma_zdestroy(null_node_zone);
 	rw_destroy(&null_hash_lock);
 	hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask);
 	return (0);
@@ -96,7 +106,7 @@ null_hashget_locked(struct mount *mp, struct vnode *lowervp)
 	struct null_node *a;
 	struct vnode *vp;
 
-	ASSERT_VOP_LOCKED(lowervp, "null_hashget");
+	ASSERT_VOP_LOCKED(lowervp, __func__);
 	rw_assert(&null_hash_lock, RA_LOCKED);
 
 	/*
@@ -106,37 +116,57 @@ null_hashget_locked(struct mount *mp, struct vnode *lowervp)
 	 * reference count (but NOT the lower vnode's VREF counter).
 	 */
 	hd = NULL_NHASH(lowervp);
-	LIST_FOREACH(a, hd, null_hash) {
-		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
-			/*
-			 * Since we have the lower node locked the nullfs
-			 * node can not be in the process of recycling.  If
-			 * it had been recycled before we grabed the lower
-			 * lock it would not have been found on the hash.
-			 */
-			vp = NULLTOV(a);
-			vref(vp);
-			return (vp);
-		}
+	CK_LIST_FOREACH(a, hd, null_hash) {
+		if (a->null_lowervp != lowervp)
+			continue;
+		/*
+		 * Since we have the lower node locked the nullfs
+		 * node can not be in the process of recycling.  If
+		 * it had been recycled before we grabed the lower
+		 * lock it would not have been found on the hash.
+		 */
+		vp = NULLTOV(a);
+		VNPASS(!VN_IS_DOOMED(vp), vp);
+		if (vp->v_mount != mp)
+			continue;
+		vref(vp);
+		return (vp);
 	}
-	return (NULLVP);
+	return (NULL);
 }
 
 struct vnode *
 null_hashget(struct mount *mp, struct vnode *lowervp)
 {
 	struct null_node_hashhead *hd;
+	struct null_node *a;
 	struct vnode *vp;
+	enum vgetstate vs;
 
-	hd = NULL_NHASH(lowervp);
-	if (LIST_EMPTY(hd))
-		return (NULLVP);
-
-	rw_rlock(&null_hash_lock);
-	vp = null_hashget_locked(mp, lowervp);
-	rw_runlock(&null_hash_lock);
+	ASSERT_VOP_LOCKED(lowervp, "null_hashget");
+	rw_assert(&null_hash_lock, RA_UNLOCKED);
 
-	return (vp);
+	vfs_smr_enter();
+	hd = NULL_NHASH(lowervp);
+	CK_LIST_FOREACH(a, hd, null_hash) {
+		if (a->null_lowervp != lowervp)
+			continue;
+		/*
+		 * See null_hashget_locked as to why the nullfs vnode can't be
+		 * doomed here.
+		 */
+		vp = NULLTOV(a);
+		VNPASS(!VN_IS_DOOMED(vp), vp);
+		if (vp->v_mount != mp)
+			continue;
+		vs = vget_prep_smr(vp);
+		vfs_smr_exit();
+		VNPASS(vs != VGET_NONE, vp);
+		vget_finish_ref(vp, vs);
+		return (vp);
+	}
+	vfs_smr_exit();
+	return (NULL);
 }
 
 static void
@@ -151,7 +181,7 @@ null_hashins(struct mount *mp, struct null_node *xp)
 
 	hd = NULL_NHASH(xp->null_lowervp);
 #ifdef INVARIANTS
-	LIST_FOREACH(oxp, hd, null_hash) {
+	CK_LIST_FOREACH(oxp, hd, null_hash) {
 		if (oxp->null_lowervp == xp->null_lowervp &&
 		    NULLTOV(oxp)->v_mount == mp) {
 			VNASSERT(0, NULLTOV(oxp),
@@ -159,7 +189,7 @@ null_hashins(struct mount *mp, struct null_node *xp)
 		}
 	}
 #endif
-	LIST_INSERT_HEAD(hd, xp, null_hash);
+	CK_LIST_INSERT_HEAD(hd, xp, null_hash);
 }
 
 static void
@@ -174,7 +204,7 @@ null_destroy_proto(struct vnode *vp, void *xp)
 	VI_UNLOCK(vp);
 	vgone(vp);
 	vput(vp);
-	free(xp, M_NULLFSNODE);
+	uma_zfree_smr(null_node_zone, xp);
 }
 
 /*
@@ -208,12 +238,12 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
 	 * Note that duplicate can only appear in hash if the lowervp is
 	 * locked LK_SHARED.
 	 */
-	xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK);
+	xp = uma_zalloc_smr(null_node_zone, M_WAITOK);
 
 	error = getnewvnode("nullfs", mp, &null_vnodeops, &vp);
 	if (error) {
 		vput(lowervp);
-		free(xp, M_NULLFSNODE);
+		uma_zfree_smr(null_node_zone, xp);
 		return (error);
 	}
 
@@ -261,8 +291,8 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
 		return (error);
 	}
 
-	null_hashins(mp, xp);
 	vn_set_state(vp, VSTATE_CONSTRUCTED);
+	null_hashins(mp, xp);
 	rw_wunlock(&null_hash_lock);
 	*vpp = vp;
 
@@ -277,7 +307,7 @@ null_hashrem(struct null_node *xp)
 {
 
 	rw_wlock(&null_hash_lock);
-	LIST_REMOVE(xp, null_hash);
+	CK_LIST_REMOVE(xp, null_hash);
 	rw_wunlock(&null_hash_lock);
 }
 
@@ -298,7 +328,7 @@ null_checkvp(struct vnode *vp, char *fil, int lno)
 		panic("null_checkvp");
 	}
 #endif
-	if (a->null_lowervp == NULLVP) {
+	if (a->null_lowervp == NULL) {
 		/* Should never happen */
 		panic("null_checkvp %p", vp);
 	}
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 74c1a8f3acb6..375b6aa27531 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -174,6 +174,8 @@
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/smr.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
@@ -185,6 +187,8 @@
 #include <vm/vm_object.h>
 #include <vm/vnode_pager.h>
 
+VFS_SMR_DECLARE;
+
 static int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
 SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, 
 	&null_bug_bypass, 0, "");
@@ -273,9 +277,9 @@ null_bypass(struct vop_generic_args *ap)
 		 * are of our type.  Check for and don't map any
 		 * that aren't.  (We must always map first vp or vclean fails.)
 		 */
-		if (i != 0 && (*this_vp_p == NULLVP ||
-		    (*this_vp_p)->v_op != &null_vnodeops)) {
-			old_vps[i] = NULLVP;
+		if (i != 0 && (*this_vp_p == NULL ||
+			       (*this_vp_p)->v_op != &null_vnodeops)) {
+			old_vps[i] = NULL;
 		} else {
 			old_vps[i] = *this_vp_p;
 			*(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
@@ -306,7 +310,7 @@ null_bypass(struct vop_generic_args *ap)
 	 * with the modified argument structure.
 	 */
 	if (vps_p[0] != NULL && *vps_p[0] != NULL) {
-		error = VCALL(ap);
+		error = ap->a_desc->vdesc_call(ap);
 	} else {
 		printf("null_bypass: no map for %s\n", descp->vdesc_name);
 		error = EINVAL;
@@ -336,7 +340,7 @@ null_bypass(struct vop_generic_args *ap)
 			 * must move lock ownership from lower to
 			 * upper (reclaimed) vnode.
 			 */
-			if (lvp != NULLVP) {
+			if (lvp != NULL) {
 				null_copy_inotify(old_vps[i], lvp,
 				    VIRF_INOTIFY);
 				null_copy_inotify(old_vps[i], lvp,
@@ -494,7 +498,7 @@ null_lookup(struct vop_lookup_args *ap)
 	if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
 		if (ldvp == lvp) {
 			*ap->a_vpp = dvp;
-			VREF(dvp);
+			vref(dvp);
 			vrele(lvp);
 		} else {
 			error = null_nodeget(mp, lvp, &vp);
@@ -665,7 +669,7 @@ null_remove(struct vop_remove_args *ap)
 	vp = ap->a_vp;
 	if (vrefcnt(vp) > 1) {
 		lvp = NULLVPTOLOWERVP(vp);
-		VREF(lvp);
+		vref(lvp);
 		vreleit = 1;
 	} else
 		vreleit = 0;
@@ -768,83 +772,110 @@ null_rmdir(struct vop_rmdir_args *ap)
 }
 
 /*
- * We need to process our own vnode lock and then clear the
- * interlock flag as it applies only to our vnode, not the
- * vnodes below us on the stack.
+ * We need to process our own vnode lock and then clear the interlock flag as
+ * it applies only to our vnode, not the vnodes below us on the stack.
+ *
+ * We have to hold the vnode here to solve a potential reclaim race.  If we're
+ * forcibly vgone'd while we still have refs, a thread could be sleeping inside
+ * the lowervp's vop_lock routine.  When we vgone we will drop our last ref to
+ * the lowervp, which would allow it to be reclaimed.  The lowervp could then
+ * be recycled, in which case it is not legal to be sleeping in its VOP.  We
+ * prevent it from being recycled by holding the vnode here.
  */
+static struct vnode *
+null_lock_prep_with_smr(struct vop_lock1_args *ap)
+{
+	struct null_node *nn;
+	struct vnode *lvp;
+
+	vfs_smr_enter();
+
+	lvp = NULL;
+
+	nn = VTONULL_SMR(ap->a_vp);
+	if (__predict_true(nn != NULL)) {
+		lvp = nn->null_lowervp;
+		if (lvp != NULL && !vhold_smr(lvp))
+			lvp = NULL;
+	}
+
+	vfs_smr_exit();
+	return (lvp);
+}
+
+static struct vnode *
+null_lock_prep_with_interlock(struct vop_lock1_args *ap)
+{
+	struct null_node *nn;
+	struct vnode *lvp;
+
+	ASSERT_VI_LOCKED(ap->a_vp, __func__);
+
+	ap->a_flags &= ~LK_INTERLOCK;
+
+	lvp = NULL;
+
+	nn = VTONULL(ap->a_vp);
+	if (__predict_true(nn != NULL)) {
+		lvp = nn->null_lowervp;
+		if (lvp != NULL)
+			vholdnz(lvp);
+	}
+	VI_UNLOCK(ap->a_vp);
+	return (lvp);
+}
+
 static int
 null_lock(struct vop_lock1_args *ap)
 {
-	struct vnode *vp = ap->a_vp;
-	int flags;
-	struct null_node *nn;
 	struct vnode *lvp;
-	int error;
+	int error, flags;
 
-	if ((ap->a_flags & LK_INTERLOCK) == 0)
-		VI_LOCK(vp);
-	else
-		ap->a_flags &= ~LK_INTERLOCK;
-	flags = ap->a_flags;
-	nn = VTONULL(vp);
+	if (__predict_true((ap->a_flags & LK_INTERLOCK) == 0)) {
+		lvp = null_lock_prep_with_smr(ap);
+		if (__predict_false(lvp == NULL)) {
+			VI_LOCK(ap->a_vp);
+			lvp = null_lock_prep_with_interlock(ap);
+		}
+	} else {
+		lvp = null_lock_prep_with_interlock(ap);
+	}
+
+	ASSERT_VI_UNLOCKED(ap->a_vp, __func__);
+
+	if (__predict_false(lvp == NULL))
+		return (vop_stdlock(ap));
+
+	VNPASS(lvp->v_holdcnt > 0, lvp);
+	error = VOP_LOCK(lvp, ap->a_flags);
 	/*
-	 * If we're still active we must ask the lower layer to
-	 * lock as ffs has special lock considerations in its
-	 * vop lock.
+	 * We might have slept to get the lock and someone might have
+	 * clean our vnode already, switching vnode lock from one in
+	 * lowervp to v_lock in our own vnode structure.  Handle this
+	 * case by reacquiring correct lock in requested mode.
 	 */
-	if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
-		/*
-		 * We have to hold the vnode here to solve a potential
-		 * reclaim race.  If we're forcibly vgone'd while we
-		 * still have refs, a thread could be sleeping inside
-		 * the lowervp's vop_lock routine.  When we vgone we will
-		 * drop our last ref to the lowervp, which would allow it
-		 * to be reclaimed.  The lowervp could then be recycled,
-		 * in which case it is not legal to be sleeping in its VOP.
-		 * We prevent it from being recycled by holding the vnode
-		 * here.
-		 */
-		vholdnz(lvp);
-		VI_UNLOCK(vp);
-		error = VOP_LOCK(lvp, flags);
-
-		/*
-		 * We might have slept to get the lock and someone might have
-		 * clean our vnode already, switching vnode lock from one in
-		 * lowervp to v_lock in our own vnode structure.  Handle this
-		 * case by reacquiring correct lock in requested mode.
-		 */
-		if (VTONULL(vp) == NULL && error == 0) {
-			ap->a_flags &= ~LK_TYPE_MASK;
-			switch (flags & LK_TYPE_MASK) {
-			case LK_SHARED:
-				ap->a_flags |= LK_SHARED;
-				break;
-			case LK_UPGRADE:
-			case LK_EXCLUSIVE:
-				ap->a_flags |= LK_EXCLUSIVE;
-				break;
-			default:
-				panic("Unsupported lock request %d\n",
-				    ap->a_flags);
-			}
-			VOP_UNLOCK(lvp);
-			error = vop_stdlock(ap);
+	if (VTONULL(ap->a_vp) == NULL && error == 0) {
+		flags = ap->a_flags;
+		ap->a_flags &= ~LK_TYPE_MASK;
+		switch (flags & LK_TYPE_MASK) {
+		case LK_SHARED:
+			ap->a_flags |= LK_SHARED;
+			break;
+		case LK_UPGRADE:
+		case LK_EXCLUSIVE:
+			ap->a_flags |= LK_EXCLUSIVE;
+			break;
+		default:
+			panic("Unsupported lock request %d\n",
+			    flags);
 		}
-		vdrop(lvp);
-	} else {
-		VI_UNLOCK(vp);
+		VOP_UNLOCK(lvp);
 		error = vop_stdlock(ap);
 	}
-
+	vdrop(lvp);
 	return (error);
 }
 
-/*
- * We need to process our own vnode unlock and then clear the
- * interlock flag as it applies only to our vnode, not the
- * vnodes below us on the stack.
- */
 static int
 null_unlock(struct vop_unlock_args *ap)
 {
@@ -853,11 +884,20 @@ null_unlock(struct vop_unlock_args *ap)
 	struct vnode *lvp;
 	int error;
 
+	/*
+	 * Contrary to null_lock, we don't need to hold the vnode around
+	 * unlock.
+	 *
+	 * We hold the lock, which means we can't be racing against vgone.
+	 *
+	 * At the same time VOP_UNLOCK promises to not touch anything after
+	 * it finishes unlock, just like we don't.
+	 *
+	 * vop_stdunlock for a doomed vnode matches doomed locking in null_lock.
+	 */
 	nn = VTONULL(vp);
 	if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
-		vholdnz(lvp);
 		error = VOP_UNLOCK(lvp);
-		vdrop(lvp);
 	} else {
 		error = vop_stdunlock(ap);
 	}
@@ -961,7 +1001,7 @@ null_reclaim(struct vop_reclaim_args *ap)
 		vunref(lowervp);
 	else
 		vput(lowervp);
-	free(xp, M_NULLFSNODE);
+	uma_zfree_smr(null_node_zone, xp);
 
 	return (0);
 }