diff options
Diffstat (limited to 'sys/fs/nullfs')
-rw-r--r-- | sys/fs/nullfs/null.h | 10 | ||||
-rw-r--r-- | sys/fs/nullfs/null_subr.c | 94 | ||||
-rw-r--r-- | sys/fs/nullfs/null_vnops.c | 186 |
3 files changed, 181 insertions, 109 deletions
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h index 0a93878c859f..aa7a689bec34 100644 --- a/sys/fs/nullfs/null.h +++ b/sys/fs/nullfs/null.h @@ -37,6 +37,9 @@ #define NULLM_CACHE 0x0001 +#include <sys/ck.h> +#include <vm/uma.h> + struct null_mount { struct mount *nullm_vfs; struct vnode *nullm_lowerrootvp; /* Ref to lower root vnode */ @@ -50,7 +53,7 @@ struct null_mount { * A cache of vnode references */ struct null_node { - LIST_ENTRY(null_node) null_hash; /* Hash list */ + CK_LIST_ENTRY(null_node) null_hash; /* Hash list */ struct vnode *null_lowervp; /* VREFed once */ struct vnode *null_vnode; /* Back pointer */ u_int null_flags; @@ -61,6 +64,7 @@ struct null_node { #define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data)) #define VTONULL(vp) ((struct null_node *)(vp)->v_data) +#define VTONULL_SMR(vp) ((struct null_node *)vn_load_v_data_smr(vp)) #define NULLTOV(xp) ((xp)->null_vnode) int nullfs_init(struct vfsconf *vfsp); @@ -79,9 +83,7 @@ struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno); extern struct vop_vector null_vnodeops; -#ifdef MALLOC_DECLARE -MALLOC_DECLARE(M_NULLFSNODE); -#endif +extern uma_zone_t null_node_zone; #ifdef NULLFS_DEBUG #define NULLFSDEBUG(format, args...) printf(format ,## args) diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c index 7dcc83880bb9..146d3bbdaedd 100644 --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -41,9 +41,14 @@ #include <sys/mount.h> #include <sys/proc.h> #include <sys/vnode.h> +#include <sys/smr.h> #include <fs/nullfs/null.h> +#include <vm/uma.h> + +VFS_SMR_DECLARE; + /* * Null layer cache: * Each cache entry holds a reference to the lower vnode @@ -54,12 +59,12 @@ #define NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask]) -static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; +static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; static struct rwlock null_hash_lock; static u_long null_hash_mask; static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table"); -MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part"); +uma_zone_t __read_mostly null_node_zone; static void null_hashins(struct mount *, struct null_node *); @@ -73,6 +78,10 @@ nullfs_init(struct vfsconf *vfsp) null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH, &null_hash_mask); rw_init(&null_hash_lock, "nullhs"); + null_node_zone = uma_zcreate("nullfs node", sizeof(struct null_node), + NULL, NULL, NULL, NULL, 0, UMA_ZONE_ZINIT); + VFS_SMR_ZONE_SET(null_node_zone); + return (0); } @@ -80,6 +89,7 @@ int nullfs_uninit(struct vfsconf *vfsp) { + uma_zdestroy(null_node_zone); rw_destroy(&null_hash_lock); hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask); return (0); @@ -96,7 +106,7 @@ null_hashget_locked(struct mount *mp, struct vnode *lowervp) struct null_node *a; struct vnode *vp; - ASSERT_VOP_LOCKED(lowervp, "null_hashget"); + ASSERT_VOP_LOCKED(lowervp, __func__); rw_assert(&null_hash_lock, RA_LOCKED); /* @@ -106,37 +116,57 @@ null_hashget_locked(struct mount *mp, struct vnode *lowervp) * reference count (but NOT the lower vnode's VREF counter). */ hd = NULL_NHASH(lowervp); - LIST_FOREACH(a, hd, null_hash) { - if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) { - /* - * Since we have the lower node locked the nullfs - * node can not be in the process of recycling. If - * it had been recycled before we grabed the lower - * lock it would not have been found on the hash. - */ - vp = NULLTOV(a); - vref(vp); - return (vp); - } + CK_LIST_FOREACH(a, hd, null_hash) { + if (a->null_lowervp != lowervp) + continue; + /* + * Since we have the lower node locked the nullfs + * node can not be in the process of recycling. If + * it had been recycled before we grabed the lower + * lock it would not have been found on the hash. + */ + vp = NULLTOV(a); + VNPASS(!VN_IS_DOOMED(vp), vp); + if (vp->v_mount != mp) + continue; + vref(vp); + return (vp); } - return (NULLVP); + return (NULL); } struct vnode * null_hashget(struct mount *mp, struct vnode *lowervp) { struct null_node_hashhead *hd; + struct null_node *a; struct vnode *vp; + enum vgetstate vs; - hd = NULL_NHASH(lowervp); - if (LIST_EMPTY(hd)) - return (NULLVP); - - rw_rlock(&null_hash_lock); - vp = null_hashget_locked(mp, lowervp); - rw_runlock(&null_hash_lock); + ASSERT_VOP_LOCKED(lowervp, "null_hashget"); + rw_assert(&null_hash_lock, RA_UNLOCKED); - return (vp); + vfs_smr_enter(); + hd = NULL_NHASH(lowervp); + CK_LIST_FOREACH(a, hd, null_hash) { + if (a->null_lowervp != lowervp) + continue; + /* + * See null_hashget_locked as to why the nullfs vnode can't be + * doomed here. + */ + vp = NULLTOV(a); + VNPASS(!VN_IS_DOOMED(vp), vp); + if (vp->v_mount != mp) + continue; + vs = vget_prep_smr(vp); + vfs_smr_exit(); + VNPASS(vs != VGET_NONE, vp); + vget_finish_ref(vp, vs); + return (vp); + } + vfs_smr_exit(); + return (NULL); } static void @@ -151,7 +181,7 @@ null_hashins(struct mount *mp, struct null_node *xp) hd = NULL_NHASH(xp->null_lowervp); #ifdef INVARIANTS - LIST_FOREACH(oxp, hd, null_hash) { + CK_LIST_FOREACH(oxp, hd, null_hash) { if (oxp->null_lowervp == xp->null_lowervp && NULLTOV(oxp)->v_mount == mp) { VNASSERT(0, NULLTOV(oxp), @@ -159,7 +189,7 @@ null_hashins(struct mount *mp, struct null_node *xp) } } #endif - LIST_INSERT_HEAD(hd, xp, null_hash); + CK_LIST_INSERT_HEAD(hd, xp, null_hash); } static void @@ -174,7 +204,7 @@ null_destroy_proto(struct vnode *vp, void *xp) VI_UNLOCK(vp); vgone(vp); vput(vp); - free(xp, M_NULLFSNODE); + uma_zfree_smr(null_node_zone, xp); } /* @@ -208,12 +238,12 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp) * Note that duplicate can only appear in hash if the lowervp is * locked LK_SHARED. */ - xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK); + xp = uma_zalloc_smr(null_node_zone, M_WAITOK); error = getnewvnode("nullfs", mp, &null_vnodeops, &vp); if (error) { vput(lowervp); - free(xp, M_NULLFSNODE); + uma_zfree_smr(null_node_zone, xp); return (error); } @@ -261,8 +291,8 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp) return (error); } - null_hashins(mp, xp); vn_set_state(vp, VSTATE_CONSTRUCTED); + null_hashins(mp, xp); rw_wunlock(&null_hash_lock); *vpp = vp; @@ -277,7 +307,7 @@ null_hashrem(struct null_node *xp) { rw_wlock(&null_hash_lock); - LIST_REMOVE(xp, null_hash); + CK_LIST_REMOVE(xp, null_hash); rw_wunlock(&null_hash_lock); } @@ -298,7 +328,7 @@ null_checkvp(struct vnode *vp, char *fil, int lno) panic("null_checkvp"); } #endif - if (a->null_lowervp == NULLVP) { + if (a->null_lowervp == NULL) { /* Should never happen */ panic("null_checkvp %p", vp); } diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index 74c1a8f3acb6..375b6aa27531 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -174,6 +174,8 @@ #include <sys/mount.h> #include <sys/mutex.h> #include <sys/namei.h> +#include <sys/proc.h> +#include <sys/smr.h> #include <sys/sysctl.h> #include <sys/vnode.h> #include <sys/stat.h> @@ -185,6 +187,8 @@ #include <vm/vm_object.h> #include <vm/vnode_pager.h> +VFS_SMR_DECLARE; + static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, &null_bug_bypass, 0, ""); @@ -273,9 +277,9 @@ null_bypass(struct vop_generic_args *ap) * are of our type. Check for and don't map any * that aren't. (We must always map first vp or vclean fails.) */ - if (i != 0 && (*this_vp_p == NULLVP || - (*this_vp_p)->v_op != &null_vnodeops)) { - old_vps[i] = NULLVP; + if (i != 0 && (*this_vp_p == NULL || + (*this_vp_p)->v_op != &null_vnodeops)) { + old_vps[i] = NULL; } else { old_vps[i] = *this_vp_p; *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); @@ -306,7 +310,7 @@ null_bypass(struct vop_generic_args *ap) * with the modified argument structure. */ if (vps_p[0] != NULL && *vps_p[0] != NULL) { - error = VCALL(ap); + error = ap->a_desc->vdesc_call(ap); } else { printf("null_bypass: no map for %s\n", descp->vdesc_name); error = EINVAL; @@ -336,7 +340,7 @@ null_bypass(struct vop_generic_args *ap) * must move lock ownership from lower to * upper (reclaimed) vnode. */ - if (lvp != NULLVP) { + if (lvp != NULL) { null_copy_inotify(old_vps[i], lvp, VIRF_INOTIFY); null_copy_inotify(old_vps[i], lvp, @@ -494,7 +498,7 @@ null_lookup(struct vop_lookup_args *ap) if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { if (ldvp == lvp) { *ap->a_vpp = dvp; - VREF(dvp); + vref(dvp); vrele(lvp); } else { error = null_nodeget(mp, lvp, &vp); @@ -665,7 +669,7 @@ null_remove(struct vop_remove_args *ap) vp = ap->a_vp; if (vrefcnt(vp) > 1) { lvp = NULLVPTOLOWERVP(vp); - VREF(lvp); + vref(lvp); vreleit = 1; } else vreleit = 0; @@ -768,83 +772,110 @@ null_rmdir(struct vop_rmdir_args *ap) } /* - * We need to process our own vnode lock and then clear the - * interlock flag as it applies only to our vnode, not the - * vnodes below us on the stack. + * We need to process our own vnode lock and then clear the interlock flag as + * it applies only to our vnode, not the vnodes below us on the stack. + * + * We have to hold the vnode here to solve a potential reclaim race. If we're + * forcibly vgone'd while we still have refs, a thread could be sleeping inside + * the lowervp's vop_lock routine. When we vgone we will drop our last ref to + * the lowervp, which would allow it to be reclaimed. The lowervp could then + * be recycled, in which case it is not legal to be sleeping in its VOP. We + * prevent it from being recycled by holding the vnode here. */ +static struct vnode * +null_lock_prep_with_smr(struct vop_lock1_args *ap) +{ + struct null_node *nn; + struct vnode *lvp; + + vfs_smr_enter(); + + lvp = NULL; + + nn = VTONULL_SMR(ap->a_vp); + if (__predict_true(nn != NULL)) { + lvp = nn->null_lowervp; + if (lvp != NULL && !vhold_smr(lvp)) + lvp = NULL; + } + + vfs_smr_exit(); + return (lvp); +} + +static struct vnode * +null_lock_prep_with_interlock(struct vop_lock1_args *ap) +{ + struct null_node *nn; + struct vnode *lvp; + + ASSERT_VI_LOCKED(ap->a_vp, __func__); + + ap->a_flags &= ~LK_INTERLOCK; + + lvp = NULL; + + nn = VTONULL(ap->a_vp); + if (__predict_true(nn != NULL)) { + lvp = nn->null_lowervp; + if (lvp != NULL) + vholdnz(lvp); + } + VI_UNLOCK(ap->a_vp); + return (lvp); +} + static int null_lock(struct vop_lock1_args *ap) { - struct vnode *vp = ap->a_vp; - int flags; - struct null_node *nn; struct vnode *lvp; - int error; + int error, flags; - if ((ap->a_flags & LK_INTERLOCK) == 0) - VI_LOCK(vp); - else - ap->a_flags &= ~LK_INTERLOCK; - flags = ap->a_flags; - nn = VTONULL(vp); + if (__predict_true((ap->a_flags & LK_INTERLOCK) == 0)) { + lvp = null_lock_prep_with_smr(ap); + if (__predict_false(lvp == NULL)) { + VI_LOCK(ap->a_vp); + lvp = null_lock_prep_with_interlock(ap); + } + } else { + lvp = null_lock_prep_with_interlock(ap); + } + + ASSERT_VI_UNLOCKED(ap->a_vp, __func__); + + if (__predict_false(lvp == NULL)) + return (vop_stdlock(ap)); + + VNPASS(lvp->v_holdcnt > 0, lvp); + error = VOP_LOCK(lvp, ap->a_flags); /* - * If we're still active we must ask the lower layer to - * lock as ffs has special lock considerations in its - * vop lock. + * We might have slept to get the lock and someone might have + * clean our vnode already, switching vnode lock from one in + * lowervp to v_lock in our own vnode structure. Handle this + * case by reacquiring correct lock in requested mode. */ - if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) { - /* - * We have to hold the vnode here to solve a potential - * reclaim race. If we're forcibly vgone'd while we - * still have refs, a thread could be sleeping inside - * the lowervp's vop_lock routine. When we vgone we will - * drop our last ref to the lowervp, which would allow it - * to be reclaimed. The lowervp could then be recycled, - * in which case it is not legal to be sleeping in its VOP. - * We prevent it from being recycled by holding the vnode - * here. - */ - vholdnz(lvp); - VI_UNLOCK(vp); - error = VOP_LOCK(lvp, flags); - - /* - * We might have slept to get the lock and someone might have - * clean our vnode already, switching vnode lock from one in - * lowervp to v_lock in our own vnode structure. Handle this - * case by reacquiring correct lock in requested mode. - */ - if (VTONULL(vp) == NULL && error == 0) { - ap->a_flags &= ~LK_TYPE_MASK; - switch (flags & LK_TYPE_MASK) { - case LK_SHARED: - ap->a_flags |= LK_SHARED; - break; - case LK_UPGRADE: - case LK_EXCLUSIVE: - ap->a_flags |= LK_EXCLUSIVE; - break; - default: - panic("Unsupported lock request %d\n", - ap->a_flags); - } - VOP_UNLOCK(lvp); - error = vop_stdlock(ap); + if (VTONULL(ap->a_vp) == NULL && error == 0) { + flags = ap->a_flags; + ap->a_flags &= ~LK_TYPE_MASK; + switch (flags & LK_TYPE_MASK) { + case LK_SHARED: + ap->a_flags |= LK_SHARED; + break; + case LK_UPGRADE: + case LK_EXCLUSIVE: + ap->a_flags |= LK_EXCLUSIVE; + break; + default: + panic("Unsupported lock request %d\n", + flags); } - vdrop(lvp); - } else { - VI_UNLOCK(vp); + VOP_UNLOCK(lvp); error = vop_stdlock(ap); } - + vdrop(lvp); return (error); } -/* - * We need to process our own vnode unlock and then clear the - * interlock flag as it applies only to our vnode, not the - * vnodes below us on the stack. - */ static int null_unlock(struct vop_unlock_args *ap) { @@ -853,11 +884,20 @@ null_unlock(struct vop_unlock_args *ap) struct vnode *lvp; int error; + /* + * Contrary to null_lock, we don't need to hold the vnode around + * unlock. + * + * We hold the lock, which means we can't be racing against vgone. + * + * At the same time VOP_UNLOCK promises to not touch anything after + * it finishes unlock, just like we don't. + * + * vop_stdunlock for a doomed vnode matches doomed locking in null_lock. + */ nn = VTONULL(vp); if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) { - vholdnz(lvp); error = VOP_UNLOCK(lvp); - vdrop(lvp); } else { error = vop_stdunlock(ap); } @@ -961,7 +1001,7 @@ null_reclaim(struct vop_reclaim_args *ap) vunref(lowervp); else vput(lowervp); - free(xp, M_NULLFSNODE); + uma_zfree_smr(null_node_zone, xp); return (0); } |