aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2020-07-25 10:38:05 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2020-07-25 10:38:05 +0000
commit9d5a594f0b7f5939834be10a8652837c6933344c (patch)
treeb185307a889913d9b63b071aad474b26869ff5f0
parentc42b77e6941903af76b58f96a1ecb86c6ffac36d (diff)
downloadsrc-9d5a594f0b7f5939834be10a8652837c6933344c.tar.gz
src-9d5a594f0b7f5939834be10a8652837c6933344c.zip
ufs: add support for lockless lookup
ACLs are not supported, meaning their presence will force the use of the old lookup. Reviewed by: kib Tested by: pho (in a patchset) Differential Revision: https://reviews.freebsd.org/D25579
Notes
Notes: svn path=/head/; revision=363520
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c22
-rw-r--r--sys/ufs/ffs/ffs_vnops.c8
-rw-r--r--sys/ufs/ufs/inode.h10
-rw-r--r--sys/ufs/ufs/ufs_acl.c13
-rw-r--r--sys/ufs/ufs/ufs_vnops.c101
5 files changed, 132 insertions, 22 deletions
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 696be51ae6a0..8c69212d82e6 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <ddb/ddb.h>
static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
+VFS_SMR_DECLARE;
static int ffs_mountfs(struct vnode *, struct mount *, struct thread *);
static void ffs_oldfscompat_read(struct fs *, struct ufsmount *,
@@ -393,6 +394,7 @@ ffs_mount(struct mount *mp)
uma_ufs2 = uma_zcreate("FFS2 dinode",
sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
+ VFS_SMR_ZONE_SET(uma_inode);
}
vfs_deleteopt(mp->mnt_optnew, "groupquota");
@@ -455,6 +457,7 @@ ffs_mount(struct mount *mp)
}
MNT_ILOCK(mp);
+ mp->mnt_kern_flag &= ~MNTK_FPLOOKUP;
mp->mnt_flag |= mntorflags;
MNT_IUNLOCK(mp);
/*
@@ -795,6 +798,17 @@ ffs_mount(struct mount *mp)
}
}
}
+
+ MNT_ILOCK(mp);
+ /*
+ * This is racy versus lookup, see ufs_fplookup_vexec for details.
+ */
+ if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) != 0)
+ panic("MNTK_FPLOOKUP set on mount %p when it should not be", mp);
+ if ((mp->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) == 0)
+ mp->mnt_kern_flag |= MNTK_FPLOOKUP;
+ MNT_IUNLOCK(mp);
+
vfs_mountedfrom(mp, fspec);
return (0);
}
@@ -1968,14 +1982,14 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
ump = VFSTOUFS(mp);
fs = ump->um_fs;
- ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
+ ip = uma_zalloc_smr(uma_inode, M_WAITOK | M_ZERO);
/* Allocate a new vnode/inode. */
error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
&ffs_vnodeops1 : &ffs_vnodeops2, &vp);
if (error) {
*vpp = NULL;
- uma_zfree(uma_inode, ip);
+ uma_zfree_smr(uma_inode, ip);
return (error);
}
/*
@@ -2004,7 +2018,7 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
vp->v_vflag |= VV_FORCEINSMQ;
error = insmntque(vp, mp);
if (error != 0) {
- uma_zfree(uma_inode, ip);
+ uma_zfree_smr(uma_inode, ip);
*vpp = NULL;
return (error);
}
@@ -2327,7 +2341,7 @@ ffs_ifree(struct ufsmount *ump, struct inode *ip)
uma_zfree(uma_ufs1, ip->i_din1);
else if (ip->i_din2 != NULL)
uma_zfree(uma_ufs2, ip->i_din2);
- uma_zfree(uma_inode, ip);
+ uma_zfree_smr(uma_inode, ip);
}
static int dobkgrdwrite = 1;
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 000ded6cbbaa..c363f4bbb094 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -905,8 +905,10 @@ ffs_write(ap)
if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
ap->a_cred) {
if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID)) {
- ip->i_mode &= ~(ISUID | ISGID);
+ vn_seqc_write_begin(vp);
+ UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
DIP_SET(ip, i_mode, ip->i_mode);
+ vn_seqc_write_end(vp);
}
}
if (error) {
@@ -1152,8 +1154,10 @@ ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
*/
if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID)) {
- ip->i_mode &= ~(ISUID | ISGID);
+ vn_seqc_write_begin(vp);
+ UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
dp->di_mode = ip->i_mode;
+ vn_seqc_write_end(vp);
}
}
if (error) {
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index defa888b17fe..15f60caae1f7 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -43,6 +43,7 @@
#include <sys/lock.h>
#include <sys/queue.h>
#include <ufs/ufs/dinode.h>
+#include <sys/seqc.h>
/*
* This must agree with the definition in <ufs/ufs/dir.h>.
@@ -149,6 +150,14 @@ struct inode {
#define UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE \
(UFS_INODE_FLAG_LAZY_MASK & ~(IN_LAZYMOD | IN_LAZYACCESS))
+#define UFS_INODE_SET_MODE(ip, mode) do { \
+ struct inode *_ip = (ip); \
+ int _mode = (mode); \
+ \
+ ASSERT_VOP_IN_SEQC(ITOV(_ip)); \
+ atomic_store_short(&(_ip)->i_mode, _mode); \
+} while (0)
+
#define UFS_INODE_SET_FLAG(ip, flags) do { \
struct inode *_ip = (ip); \
struct vnode *_vp = ITOV(_ip); \
@@ -229,6 +238,7 @@ struct indir {
/* Convert between inode pointers and vnode pointers. */
#define VTOI(vp) ((struct inode *)(vp)->v_data)
+#define VTOI_SMR(vp) ((struct inode *)vn_load_v_data_smr(vp))
#define ITOV(ip) ((ip)->i_vnode)
/* Determine if soft dependencies are being done */
diff --git a/sys/ufs/ufs/ufs_acl.c b/sys/ufs/ufs/ufs_acl.c
index cb077387ae9c..68e8ef91534e 100644
--- a/sys/ufs/ufs/ufs_acl.c
+++ b/sys/ufs/ufs/ufs_acl.c
@@ -139,9 +139,11 @@ ufs_sync_acl_from_inode(struct inode *ip, struct acl *acl)
void
ufs_sync_inode_from_acl(struct acl *acl, struct inode *ip)
{
+ int newmode;
- ip->i_mode &= ACL_PRESERVE_MASK;
- ip->i_mode |= acl_posix1e_acl_to_mode(acl);
+ newmode = ip->i_mode & ACL_PRESERVE_MASK;
+ newmode |= acl_posix1e_acl_to_mode(acl);
+ UFS_INODE_SET_MODE(ip, newmode);
DIP_SET(ip, i_mode, ip->i_mode);
}
@@ -381,7 +383,7 @@ int
ufs_setacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td)
{
int error;
- mode_t mode;
+ mode_t mode, newmode;
struct inode *ip = VTOI(vp);
KASSERT(acl_nfs4_check(aclp, vp->v_type == VDIR) == 0,
@@ -418,8 +420,9 @@ ufs_setacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td)
acl_nfs4_sync_mode_from_acl(&mode, aclp);
- ip->i_mode &= ACL_PRESERVE_MASK;
- ip->i_mode |= mode;
+ newmode = ip->i_mode & ACL_PRESERVE_MASK;
+ newmode |= mode;
+ UFS_INODE_SET_MODE(ip, newmode);
DIP_SET(ip, i_mode, ip->i_mode);
UFS_INODE_SET_FLAG(ip, IN_CHANGE);
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 2e6aa283fc80..70dd7dbe86c1 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
#include <sys/lockf.h>
#include <sys/conf.h>
#include <sys/acl.h>
+#include <sys/smr.h>
#include <security/mac/mac_framework.h>
@@ -96,10 +97,12 @@ FEATURE(suiddir,
"Give all new files in directory the same ownership as the directory");
#endif
+VFS_SMR_DECLARE;
#include <ufs/ffs/ffs_extern.h>
static vop_accessx_t ufs_accessx;
+static vop_fplookup_vexec_t ufs_fplookup_vexec;
static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
static vop_close_t ufs_close;
@@ -422,6 +425,46 @@ ufs_accessx(ap)
return (error);
}
+/*
+ * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
+ * the comment above cache_fplookup for details.
+ */
+static int
+ufs_fplookup_vexec(ap)
+ struct vop_fplookup_vexec_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ struct thread *a_td;
+ } */ *ap;
+{
+ struct vnode *vp;
+ struct inode *ip;
+ struct ucred *cred;
+ mode_t all_x, mode;
+
+ vp = ap->a_vp;
+ ip = VTOI_SMR(vp);
+ if (__predict_false(ip == NULL))
+ return (EAGAIN);
+
+ /*
+ * XXX ACL race
+ *
+ * ACLs are not supported and UFS clears/sets this flag on mount and
+ * remount. However, we may still be racing with seeing them and there
+ * is no provision to make sure they were accounted for. This matches
+ * the behavior of the locked case, since the lookup there is also
+ * racy: mount takes no measures to block anyone from progressing.
+ */
+ all_x = S_IXUSR | S_IXGRP | S_IXOTH;
+ mode = atomic_load_short(&ip->i_mode);
+ if (__predict_true((mode & all_x) == all_x))
+ return (0);
+
+ cred = ap->a_cred;
+ return (vaccess_vexec_smr(mode, ip->i_uid, ip->i_gid, cred));
+}
+
/* ARGSUSED */
static int
ufs_getattr(ap)
@@ -711,7 +754,7 @@ ufs_chmod(vp, mode, cred, td)
struct thread *td;
{
struct inode *ip = VTOI(vp);
- int error;
+ int newmode, error;
/*
* To modify the permissions on a file, must possess VADMIN
@@ -744,8 +787,9 @@ ufs_chmod(vp, mode, cred, td)
return (error);
}
- ip->i_mode &= ~ALLPERMS;
- ip->i_mode |= (mode & ALLPERMS);
+ newmode = ip->i_mode & ~ALLPERMS;
+ newmode |= (mode & ALLPERMS);
+ UFS_INODE_SET_MODE(ip, newmode);
DIP_SET(ip, i_mode, ip->i_mode);
UFS_INODE_SET_FLAG(ip, IN_CHANGE);
#ifdef UFS_ACL
@@ -869,7 +913,7 @@ good:
UFS_INODE_SET_FLAG(ip, IN_CHANGE);
if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
- ip->i_mode &= ~(ISUID | ISGID);
+ UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
DIP_SET(ip, i_mode, ip->i_mode);
}
}
@@ -1111,6 +1155,9 @@ ufs_rename(ap)
int error = 0;
struct mount *mp;
ino_t ino;
+ bool want_seqc_end;
+
+ want_seqc_end = false;
#ifdef INVARIANTS
if ((tcnp->cn_flags & HASBUF) == 0 ||
@@ -1315,6 +1362,13 @@ relock:
tdp->i_effnlink == 0)
panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
+ if (tvp != NULL)
+ vn_seqc_write_begin(tvp);
+ vn_seqc_write_begin(tdvp);
+ vn_seqc_write_begin(fvp);
+ vn_seqc_write_begin(fdvp);
+ want_seqc_end = true;
+
/*
* 1) Bump link count while we're moving stuff
* around. If we crash somewhere before
@@ -1513,6 +1567,14 @@ relock:
cache_purge_negative(tdvp);
unlockout:
+ if (want_seqc_end) {
+ if (tvp != NULL)
+ vn_seqc_write_end(tvp);
+ vn_seqc_write_end(tdvp);
+ vn_seqc_write_end(fvp);
+ vn_seqc_write_end(fdvp);
+ }
+
vput(fdvp);
vput(fvp);
if (tvp)
@@ -1556,6 +1618,14 @@ bad:
goto unlockout;
releout:
+ if (want_seqc_end) {
+ if (tvp != NULL)
+ vn_seqc_write_end(tvp);
+ vn_seqc_write_end(tdvp);
+ vn_seqc_write_end(fvp);
+ vn_seqc_write_end(fdvp);
+ }
+
vrele(fdvp);
vrele(fvp);
vrele(tdvp);
@@ -1590,7 +1660,7 @@ ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
*/
if (acl->acl_cnt != 0) {
dmode = acl_posix1e_newfilemode(dmode, acl);
- ip->i_mode = dmode;
+ UFS_INODE_SET_MODE(ip, dmode);
DIP_SET(ip, i_mode, dmode);
*dacl = *acl;
ufs_sync_acl_from_inode(ip, acl);
@@ -1602,7 +1672,7 @@ ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
/*
* Just use the mode as-is.
*/
- ip->i_mode = dmode;
+ UFS_INODE_SET_MODE(ip, dmode);
DIP_SET(ip, i_mode, dmode);
error = 0;
goto out;
@@ -1673,7 +1743,7 @@ ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
* the it's not defined case.
*/
mode = acl_posix1e_newfilemode(mode, acl);
- ip->i_mode = mode;
+ UFS_INODE_SET_MODE(ip, mode);
DIP_SET(ip, i_mode, mode);
ufs_sync_acl_from_inode(ip, acl);
break;
@@ -1684,7 +1754,7 @@ ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
/*
* Just use the mode as-is.
*/
- ip->i_mode = mode;
+ UFS_INODE_SET_MODE(ip, mode);
DIP_SET(ip, i_mode, mode);
error = 0;
goto out;
@@ -1796,6 +1866,7 @@ ufs_mkdir(ap)
error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
if (error)
goto out;
+ vn_seqc_write_begin(tvp);
ip = VTOI(tvp);
ip->i_gid = dp->i_gid;
DIP_SET(ip, i_gid, dp->i_gid);
@@ -1846,6 +1917,7 @@ ufs_mkdir(ap)
if (DOINGSOFTDEP(tvp))
softdep_revert_link(dp, ip);
UFS_VFREE(tvp, ip->i_number, dmode);
+ vn_seqc_write_end(tvp);
vgone(tvp);
vput(tvp);
return (error);
@@ -1861,6 +1933,7 @@ ufs_mkdir(ap)
if (DOINGSOFTDEP(tvp))
softdep_revert_link(dp, ip);
UFS_VFREE(tvp, ip->i_number, dmode);
+ vn_seqc_write_end(tvp);
vgone(tvp);
vput(tvp);
return (error);
@@ -1868,7 +1941,7 @@ ufs_mkdir(ap)
#endif
#endif /* !SUIDDIR */
UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
- ip->i_mode = dmode;
+ UFS_INODE_SET_MODE(ip, dmode);
DIP_SET(ip, i_mode, dmode);
tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */
ip->i_effnlink = 2;
@@ -1974,6 +2047,7 @@ ufs_mkdir(ap)
bad:
if (error == 0) {
*ap->a_vpp = tvp;
+ vn_seqc_write_end(tvp);
} else {
dp->i_effnlink--;
dp->i_nlink--;
@@ -1989,6 +2063,7 @@ bad:
UFS_INODE_SET_FLAG(ip, IN_CHANGE);
if (DOINGSOFTDEP(tvp))
softdep_revert_mkdir(dp, ip);
+ vn_seqc_write_end(tvp);
vgone(tvp);
vput(tvp);
}
@@ -2637,8 +2712,9 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
}
#endif
#endif /* !SUIDDIR */
+ vn_seqc_write_begin(tvp); /* Mostly to cover asserts */
UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
- ip->i_mode = mode;
+ UFS_INODE_SET_MODE(ip, mode);
DIP_SET(ip, i_mode, mode);
tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */
ip->i_effnlink = 1;
@@ -2648,7 +2724,7 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
softdep_setup_create(VTOI(dvp), ip);
if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) {
- ip->i_mode &= ~ISGID;
+ UFS_INODE_SET_MODE(ip, ip->i_mode & ~ISGID);
DIP_SET(ip, i_mode, ip->i_mode);
}
@@ -2688,6 +2764,7 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
if (error)
goto bad;
+ vn_seqc_write_end(tvp);
*vpp = tvp;
return (0);
@@ -2702,6 +2779,7 @@ bad:
UFS_INODE_SET_FLAG(ip, IN_CHANGE);
if (DOINGSOFTDEP(tvp))
softdep_revert_create(VTOI(dvp), ip);
+ vn_seqc_write_end(tvp);
vgone(tvp);
vput(tvp);
return (error);
@@ -2740,6 +2818,7 @@ struct vop_vector ufs_vnodeops = {
.vop_write = VOP_PANIC,
.vop_accessx = ufs_accessx,
.vop_bmap = ufs_bmap,
+ .vop_fplookup_vexec = ufs_fplookup_vexec,
.vop_cachedlookup = ufs_lookup,
.vop_close = ufs_close,
.vop_create = ufs_create,