aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/compat/svr4/svr4_stream.c4
-rw-r--r--sys/dev/streams/streams.c7
-rw-r--r--sys/fs/devfs/devfs_vnops.c5
-rw-r--r--sys/fs/fifofs/fifo_vnops.c5
-rw-r--r--sys/kern/kern_descrip.c172
-rw-r--r--sys/kern/kern_event.c34
-rw-r--r--sys/kern/sys_generic.c12
-rw-r--r--sys/kern/sys_pipe.c14
-rw-r--r--sys/kern/uipc_mqueue.c23
-rw-r--r--sys/kern/uipc_syscalls.c33
-rw-r--r--sys/kern/uipc_usrreq.c412
-rw-r--r--sys/kern/vfs_syscalls.c34
-rw-r--r--sys/kern/vfs_vnops.c14
-rw-r--r--sys/netgraph/ng_socket.c2
-rw-r--r--sys/opencrypto/cryptodev.c7
-rw-r--r--sys/sys/file.h91
-rw-r--r--sys/sys/unpcb.h9
17 files changed, 343 insertions, 535 deletions
diff --git a/sys/compat/svr4/svr4_stream.c b/sys/compat/svr4/svr4_stream.c
index 3a2e2750c377..ddf082b5b204 100644
--- a/sys/compat/svr4/svr4_stream.c
+++ b/sys/compat/svr4/svr4_stream.c
@@ -1481,8 +1481,6 @@ svr4_do_putmsg(td, uap, fp)
uap->dat, uap->flags);
#endif /* DEBUG_SVR4 */
- FILE_LOCK_ASSERT(fp, MA_NOTOWNED);
-
if (uap->ctl != NULL) {
if ((error = copyin(uap->ctl, &ctl, sizeof(ctl))) != 0) {
#ifdef DEBUG_SVR4
@@ -1656,8 +1654,6 @@ svr4_do_getmsg(td, uap, fp)
error = 0;
afp = NULL;
- FILE_LOCK_ASSERT(fp, MA_NOTOWNED);
-
memset(&sc, 0, sizeof(sc));
#ifdef DEBUG_SVR4
diff --git a/sys/dev/streams/streams.c b/sys/dev/streams/streams.c
index dc67fc53811c..55df9e5913eb 100644
--- a/sys/dev/streams/streams.c
+++ b/sys/dev/streams/streams.c
@@ -251,12 +251,7 @@ streamsopen(struct cdev *dev, int oflags, int devtype, struct thread *td)
return error;
}
- FILE_LOCK(fp);
- fp->f_data = so;
- fp->f_flag = FREAD|FWRITE;
- fp->f_ops = &svr4_netops;
- fp->f_type = DTYPE_SOCKET;
- FILE_UNLOCK(fp);
+ finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &svr4_netops);
/*
* Allocate a stream structure and attach it to this socket.
diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index ba0131814478..269f2c3a72a1 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -800,12 +800,9 @@ devfs_open(struct vop_open_args *ap)
if(fp == NULL)
return (error);
#endif
- FILE_LOCK(fp);
KASSERT(fp->f_ops == &badfileops,
("Could not vnode bypass device on fdops %p", fp->f_ops));
- fp->f_data = dev;
- fp->f_ops = &devfs_ops_f;
- FILE_UNLOCK(fp);
+ finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f);
return (error);
}
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
index 2d3254f968dd..78718dbfed60 100644
--- a/sys/fs/fifofs/fifo_vnops.c
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -294,11 +294,8 @@ fail1:
}
mtx_unlock(&fifo_mtx);
KASSERT(fp != NULL, ("can't fifo/vnode bypass"));
- FILE_LOCK(fp);
KASSERT(fp->f_ops == &badfileops, ("not badfileops in fifo_open"));
- fp->f_data = fip;
- fp->f_ops = &fifo_ops_f;
- FILE_UNLOCK(fp);
+ finit(fp, fp->f_flag, DTYPE_FIFO, fip, &fifo_ops_f);
return (0);
}
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 274522f14c21..070fac7516df 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -95,7 +95,6 @@ static int do_dup(struct thread *td, enum dup_type type, int old, int new,
static int fd_first_free(struct filedesc *, int, int);
static int fd_last_used(struct filedesc *, int, int);
static void fdgrowtable(struct filedesc *, int);
-static int fdrop_locked(struct file *fp, struct thread *td);
static void fdunused(struct filedesc *fdp, int fd);
static void fdused(struct filedesc *fdp, int fd);
@@ -137,9 +136,7 @@ struct filedesc0 {
/*
* Descriptor management.
*/
-struct filelist filehead; /* head of list of open files */
-int openfiles; /* actual number of open files */
-struct sx filelist_lock; /* sx to protect filelist */
+volatile int openfiles; /* actual number of open files */
struct mtx sigio_lock; /* mtx to protect pointers to sigio */
void (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
@@ -428,9 +425,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = EBADF;
break;
}
- FILE_LOCK(fp);
td->td_retval[0] = OFLAGS(fp->f_flag);
- FILE_UNLOCK(fp);
FILEDESC_SUNLOCK(fdp);
break;
@@ -441,12 +436,13 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = EBADF;
break;
}
- FILE_LOCK(fp);
- fhold_locked(fp);
- fp->f_flag &= ~FCNTLFLAGS;
- fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
- FILE_UNLOCK(fp);
+ fhold(fp);
FILEDESC_SUNLOCK(fdp);
+ do {
+ tmp = flg = fp->f_flag;
+ tmp &= ~FCNTLFLAGS;
+ tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
+ } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
tmp = fp->f_flag & FNONBLOCK;
error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
if (error) {
@@ -459,9 +455,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
fdrop(fp, td);
break;
}
- FILE_LOCK(fp);
- fp->f_flag &= ~FNONBLOCK;
- FILE_UNLOCK(fp);
+ atomic_clear_int(&fp->f_flag, FNONBLOCK);
tmp = 0;
(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
fdrop(fp, td);
@@ -1359,15 +1353,13 @@ int
falloc(struct thread *td, struct file **resultfp, int *resultfd)
{
struct proc *p = td->td_proc;
- struct file *fp, *fq;
+ struct file *fp;
int error, i;
int maxuserfiles = maxfiles - (maxfiles / 20);
static struct timeval lastfail;
static int curfail;
fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
- sx_xlock(&filelist_lock);
-
if ((openfiles >= maxuserfiles &&
priv_check(td, PRIV_MAXFILES) != 0) ||
openfiles >= maxfiles) {
@@ -1375,18 +1367,16 @@ falloc(struct thread *td, struct file **resultfp, int *resultfd)
printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n",
td->td_ucred->cr_ruid);
}
- sx_xunlock(&filelist_lock);
uma_zfree(file_zone, fp);
return (ENFILE);
}
- openfiles++;
+ atomic_add_int(&openfiles, 1);
/*
* If the process has file descriptor zero open, add the new file
* descriptor to the list of open files at that point, otherwise
* put it at the front of the list of open files.
*/
- fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep);
fp->f_count = 1;
if (resultfp)
fp->f_count++;
@@ -1395,12 +1385,6 @@ falloc(struct thread *td, struct file **resultfp, int *resultfd)
fp->f_data = NULL;
fp->f_vnode = NULL;
FILEDESC_XLOCK(p->p_fd);
- if ((fq = p->p_fd->fd_ofiles[0])) {
- LIST_INSERT_AFTER(fq, fp, f_list);
- } else {
- LIST_INSERT_HEAD(&filehead, fp, f_list);
- }
- sx_xunlock(&filelist_lock);
if ((error = fdalloc(td, 0, &i))) {
FILEDESC_XUNLOCK(p->p_fd);
fdrop(fp, td);
@@ -1962,6 +1946,23 @@ closef(struct file *fp, struct thread *td)
}
/*
+ * Initialize the file pointer with the specified properties.
+ *
+ * The ops are set with release semantics to be certain that the flags, type,
+ * and data are visible when ops is. This is to prevent ops methods from being
+ * called with bad data.
+ */
+void
+finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
+{
+ fp->f_data = data;
+ fp->f_flag = flag;
+ fp->f_type = type;
+ atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
+}
+
+
+/*
* Extract the file pointer associated with the specified descriptor for the
* current user process.
*
@@ -2135,54 +2136,20 @@ fputsock(struct socket *so)
sorele(so);
}
-int
-fdrop(struct file *fp, struct thread *td)
-{
-
- FILE_LOCK(fp);
- return (fdrop_locked(fp, td));
-}
-
/*
- * Drop reference on struct file passed in, may call closef if the
- * reference hits zero.
- * Expects struct file locked, and will unlock it.
+ * Handle the last reference to a file being closed.
*/
-static int
-fdrop_locked(struct file *fp, struct thread *td)
+int
+_fdrop(struct file *fp, struct thread *td)
{
int error;
- FILE_LOCK_ASSERT(fp, MA_OWNED);
-
- if (--fp->f_count > 0) {
- FILE_UNLOCK(fp);
- return (0);
- }
-
- /*
- * We might have just dropped the last reference to a file
- * object that is for a UNIX domain socket whose message
- * buffers are being examined in unp_gc(). If that is the
- * case, FWAIT will be set in f_gcflag and we need to wait for
- * unp_gc() to finish its scan.
- */
- while (fp->f_gcflag & FWAIT)
- msleep(&fp->f_gcflag, fp->f_mtxp, 0, "fpdrop", 0);
-
- /* We have the last ref so we can proceed without the file lock. */
- FILE_UNLOCK(fp);
- if (fp->f_count < 0)
- panic("fdrop: count < 0");
+ error = 0;
+ if (fp->f_count != 0)
+ panic("fdrop: count %d", fp->f_count);
if (fp->f_ops != &badfileops)
error = fo_close(fp, td);
- else
- error = 0;
-
- sx_xlock(&filelist_lock);
- LIST_REMOVE(fp, f_list);
- openfiles--;
- sx_xunlock(&filelist_lock);
+ atomic_subtract_int(&openfiles, 1);
crfree(fp->f_cred);
uma_zfree(file_zone, fp);
@@ -2225,9 +2192,7 @@ flock(struct thread *td, struct flock_args *uap)
lf.l_len = 0;
if (uap->how & LOCK_UN) {
lf.l_type = F_UNLCK;
- FILE_LOCK(fp);
- fp->f_flag &= ~FHASLOCK;
- FILE_UNLOCK(fp);
+ atomic_clear_int(&fp->f_flag, FHASLOCK);
error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
goto done2;
}
@@ -2239,9 +2204,7 @@ flock(struct thread *td, struct flock_args *uap)
error = EBADF;
goto done2;
}
- FILE_LOCK(fp);
- fp->f_flag |= FHASLOCK;
- FILE_UNLOCK(fp);
+ atomic_set_int(&fp->f_flag, FHASLOCK);
error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
(uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
done2:
@@ -2286,9 +2249,7 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode,
* Check that the mode the file is being opened for is a
* subset of the mode of the existing descriptor.
*/
- FILE_LOCK(wfp);
if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
- FILE_UNLOCK(wfp);
FILEDESC_XUNLOCK(fdp);
return (EACCES);
}
@@ -2297,8 +2258,7 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode,
fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
if (fp == NULL)
fdused(fdp, indx);
- fhold_locked(wfp);
- FILE_UNLOCK(wfp);
+ fhold(wfp);
FILEDESC_XUNLOCK(fdp);
if (fp != NULL)
/*
@@ -2419,29 +2379,23 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS)
struct proc *p;
int error, n;
- /*
- * Note: because the number of file descriptors is calculated
- * in different ways for sizing vs returning the data,
- * there is information leakage from the first loop. However,
- * it is of a similar order of magnitude to the leakage from
- * global system statistics such as kern.openfiles.
- */
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
if (req->oldptr == NULL) {
- n = 16; /* A slight overestimate. */
- sx_slock(&filelist_lock);
- LIST_FOREACH(fp, &filehead, f_list) {
- /*
- * We should grab the lock, but this is an
- * estimate, so does it really matter?
- */
- /* mtx_lock(fp->f_mtxp); */
- n += fp->f_count;
- /* mtx_unlock(f->f_mtxp); */
+ n = 0;
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ fdp = fdhold(p);
+ if (fdp == NULL)
+ continue;
+ /* overestimates sparse tables. */
+ n += fdp->fd_lastfile;
+ fddrop(fdp);
}
- sx_sunlock(&filelist_lock);
+ sx_sunlock(&allproc_lock);
return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
}
error = 0;
@@ -2472,7 +2426,7 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS)
xf.xf_vnode = fp->f_vnode;
xf.xf_type = fp->f_type;
xf.xf_count = fp->f_count;
- xf.xf_msgcount = fp->f_msgcount;
+ xf.xf_msgcount = 0;
xf.xf_offset = fp->f_offset;
xf.xf_flag = fp->f_flag;
error = SYSCTL_OUT(req, &xf, sizeof(xf));
@@ -2523,7 +2477,6 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
continue;
bzero(kif, sizeof(*kif));
kif->kf_structsize = sizeof(*kif);
- FILE_LOCK(fp);
vp = NULL;
so = NULL;
kif->kf_fd = i;
@@ -2531,7 +2484,6 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
case DTYPE_VNODE:
kif->kf_type = KF_TYPE_VNODE;
vp = fp->f_vnode;
- vref(vp);
break;
case DTYPE_SOCKET:
@@ -2583,8 +2535,8 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
if (fp->f_flag & FHASLOCK)
kif->kf_flags |= KF_FLAG_HASLOCK;
kif->kf_offset = fp->f_offset;
- FILE_UNLOCK(fp);
if (vp != NULL) {
+ vref(vp);
switch (vp->v_type) {
case VNON:
kif->kf_vnode_type = KF_VTYPE_VNON;
@@ -2736,7 +2688,7 @@ db_print_file(struct file *fp, int header)
p = file_to_first_proc(fp);
db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp,
file_type_to_name(fp->f_type), fp->f_data, fp->f_flag,
- fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode,
+ 0, fp->f_count, 0, fp->f_vnode,
p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
}
@@ -2754,13 +2706,24 @@ DB_SHOW_COMMAND(file, db_show_file)
DB_SHOW_COMMAND(files, db_show_files)
{
+ struct filedesc *fdp;
struct file *fp;
+ struct proc *p;
int header;
+ int n;
header = 1;
- LIST_FOREACH(fp, &filehead, f_list) {
- db_print_file(fp, header);
- header = 0;
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ if ((fdp = p->p_fd) == NULL)
+ continue;
+ for (n = 0; n < fdp->fd_nfiles; ++n) {
+ if ((fp = fdp->fd_ofiles[n]) == NULL)
+ continue;
+ db_print_file(fp, header);
+ header = 0;
+ }
}
}
#endif
@@ -2772,7 +2735,7 @@ SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
&maxfiles, 0, "Maximum number of files");
SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
- &openfiles, 0, "System-wide number of open files");
+ __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files");
/* ARGSUSED*/
static void
@@ -2781,7 +2744,6 @@ filelistinit(void *dummy)
file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, 0);
- sx_init(&filelist_lock, "filelist lock");
mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
}
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index 4d75822dbff2..b5d01d07621c 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -531,12 +531,7 @@ kqueue(struct thread *td, struct kqueue_args *uap)
SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
FILEDESC_XUNLOCK(fdp);
- FILE_LOCK(fp);
- fp->f_flag = FREAD | FWRITE;
- fp->f_type = DTYPE_KQUEUE;
- fp->f_data = kq;
- fp->f_ops = &kqueueops;
- FILE_UNLOCK(fp);
+ finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
fdrop(fp, td);
td->td_retval[0] = fd;
@@ -990,24 +985,17 @@ kqueue_acquire(struct file *fp, struct kqueue **kqp)
error = 0;
- FILE_LOCK(fp);
- do {
- kq = fp->f_data;
- if (fp->f_type != DTYPE_KQUEUE || kq == NULL) {
- error = EBADF;
- break;
- }
- *kqp = kq;
- KQ_LOCK(kq);
- if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
- KQ_UNLOCK(kq);
- error = EBADF;
- break;
- }
- kq->kq_refcnt++;
+ kq = fp->f_data;
+ if (fp->f_type != DTYPE_KQUEUE || kq == NULL)
+ return (EBADF);
+ *kqp = kq;
+ KQ_LOCK(kq);
+ if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
KQ_UNLOCK(kq);
- } while (0);
- FILE_UNLOCK(fp);
+ return (EBADF);
+ }
+ kq->kq_refcnt++;
+ KQ_UNLOCK(kq);
return error;
}
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 0800c08073e6..9c800f37d342 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -646,21 +646,17 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
FILEDESC_XUNLOCK(fdp);
goto out;
case FIONBIO:
- FILE_LOCK(fp);
if ((tmp = *(int *)data))
- fp->f_flag |= FNONBLOCK;
+ atomic_set_int(&fp->f_flag, FNONBLOCK);
else
- fp->f_flag &= ~FNONBLOCK;
- FILE_UNLOCK(fp);
+ atomic_clear_int(&fp->f_flag, FNONBLOCK);
data = (void *)&tmp;
break;
case FIOASYNC:
- FILE_LOCK(fp);
if ((tmp = *(int *)data))
- fp->f_flag |= FASYNC;
+ atomic_set_int(&fp->f_flag, FASYNC);
else
- fp->f_flag &= ~FASYNC;
- FILE_UNLOCK(fp);
+ atomic_clear_int(&fp->f_flag, FASYNC);
data = (void *)&tmp;
break;
}
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 262ef0c1fec8..27ecf80896da 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -363,12 +363,7 @@ pipe(td, uap)
* to avoid races against processes which manage to dup() the read
* side while we are blocked trying to allocate the write side.
*/
- FILE_LOCK(rf);
- rf->f_flag = FREAD | FWRITE;
- rf->f_type = DTYPE_PIPE;
- rf->f_data = rpipe;
- rf->f_ops = &pipeops;
- FILE_UNLOCK(rf);
+ finit(rf, FREAD | FWRITE, DTYPE_PIPE, rpipe, &pipeops);
error = falloc(td, &wf, &fd);
if (error) {
fdclose(fdp, rf, td->td_retval[0], td);
@@ -378,12 +373,7 @@ pipe(td, uap)
return (error);
}
/* An extra reference on `wf' has been held for us by falloc(). */
- FILE_LOCK(wf);
- wf->f_flag = FREAD | FWRITE;
- wf->f_type = DTYPE_PIPE;
- wf->f_data = wpipe;
- wf->f_ops = &pipeops;
- FILE_UNLOCK(wf);
+ finit(wf, FREAD | FWRITE, DTYPE_PIPE, wpipe, &pipeops);
fdrop(wf, td);
td->td_retval[1] = fd;
fdrop(rf, td);
diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c
index 1c5cadbc3311..8fe34bcba0a9 100644
--- a/sys/kern/uipc_mqueue.c
+++ b/sys/kern/uipc_mqueue.c
@@ -1999,12 +1999,8 @@ kmq_open(struct thread *td, struct kmq_open_args *uap)
mqnode_addref(pn);
sx_xunlock(&mqfs_data.mi_lock);
- FILE_LOCK(fp);
- fp->f_flag = (flags & (FREAD | FWRITE | O_NONBLOCK));
- fp->f_type = DTYPE_MQUEUE;
- fp->f_data = pn;
- fp->f_ops = &mqueueops;
- FILE_UNLOCK(fp);
+ finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn,
+ &mqueueops);
FILEDESC_XLOCK(fdp);
if (fdp->fd_ofiles[fd] == fp)
@@ -2097,6 +2093,7 @@ kmq_setattr(struct thread *td, struct kmq_setattr_args *uap)
struct mqueue *mq;
struct file *fp;
struct mq_attr attr, oattr;
+ u_int oflag, flag;
int error;
if (uap->attr) {
@@ -2112,13 +2109,15 @@ kmq_setattr(struct thread *td, struct kmq_setattr_args *uap)
oattr.mq_maxmsg = mq->mq_maxmsg;
oattr.mq_msgsize = mq->mq_msgsize;
oattr.mq_curmsgs = mq->mq_curmsgs;
- FILE_LOCK(fp);
- oattr.mq_flags = (O_NONBLOCK & fp->f_flag);
if (uap->attr) {
- fp->f_flag &= ~O_NONBLOCK;
- fp->f_flag |= (attr.mq_flags & O_NONBLOCK);
- }
- FILE_UNLOCK(fp);
+ do {
+ oflag = flag = fp->f_flag;
+ flag &= ~O_NONBLOCK;
+ flag |= (attr.mq_flags & O_NONBLOCK);
+ } while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0);
+ } else
+ oflag = fp->f_flag;
+ oattr.mq_flags = (O_NONBLOCK & oflag);
fdrop(fp, td);
if (uap->oattr)
error = copyout(&oattr, uap->oattr, sizeof(oattr));
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index faf7f24a66cc..616afa0e5d65 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -180,12 +180,7 @@ socket(td, uap)
if (error) {
fdclose(fdp, fp, fd, td);
} else {
- FILE_LOCK(fp);
- fp->f_data = so; /* already has ref count */
- fp->f_flag = FREAD|FWRITE;
- fp->f_type = DTYPE_SOCKET;
- fp->f_ops = &socketops;
- FILE_UNLOCK(fp);
+ finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
td->td_retval[0] = fd;
}
fdrop(fp, td);
@@ -423,12 +418,7 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
if (pgid != 0)
fsetown(pgid, &so->so_sigio);
- FILE_LOCK(nfp);
- nfp->f_data = so; /* nfp has ref count from falloc */
- nfp->f_flag = fflag;
- nfp->f_type = DTYPE_SOCKET;
- nfp->f_ops = &socketops;
- FILE_UNLOCK(nfp);
+ finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
/* Sync socket nonblocking/async state with file flags */
tmp = fflag & FNONBLOCK;
(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
@@ -640,16 +630,8 @@ socketpair(td, uap)
if (error)
goto free4;
}
- FILE_LOCK(fp1);
- fp1->f_flag = FREAD|FWRITE;
- fp1->f_type = DTYPE_SOCKET;
- fp1->f_ops = &socketops;
- FILE_UNLOCK(fp1);
- FILE_LOCK(fp2);
- fp2->f_flag = FREAD|FWRITE;
- fp2->f_type = DTYPE_SOCKET;
- fp2->f_ops = &socketops;
- FILE_UNLOCK(fp2);
+ finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
+ finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
so1 = so2 = NULL;
error = copyout(sv, uap->rsv, 2 * sizeof (int));
if (error)
@@ -2270,12 +2252,7 @@ sctp_peeloff(td, uap)
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
ACCEPT_UNLOCK();
- FILE_LOCK(nfp);
- nfp->f_data = so;
- nfp->f_flag = fflag;
- nfp->f_type = DTYPE_SOCKET;
- nfp->f_ops = &socketops;
- FILE_UNLOCK(nfp);
+ finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
if (error)
goto noconnection;
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 9fea71b4b626..1d6cc464da0f 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -233,10 +233,11 @@ static void unp_shutdown(struct unpcb *);
static void unp_drop(struct unpcb *, int);
static void unp_gc(__unused void *, int);
static void unp_scan(struct mbuf *, void (*)(struct file *));
-static void unp_mark(struct file *);
static void unp_discard(struct file *);
static void unp_freerights(struct file **, int);
static int unp_internalize(struct mbuf **, struct thread *);
+static void unp_internalize_fp(struct file *);
+static void unp_externalize_fp(struct file *);
static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
/*
@@ -586,9 +587,9 @@ uipc_detach(struct socket *so)
unp_drop(ref, ECONNRESET);
UNP_PCB_UNLOCK(ref);
}
+ local_unp_rights = unp_rights;
UNP_GLOBAL_WUNLOCK();
unp->unp_socket->so_pcb = NULL;
- local_unp_rights = unp_rights;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
unp->unp_refcount--;
@@ -1600,10 +1601,7 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp)
panic("unp_externalize fdalloc failed");
fp = *rp++;
td->td_proc->p_fd->fd_ofiles[f] = fp;
- FILE_LOCK(fp);
- fp->f_msgcount--;
- FILE_UNLOCK(fp);
- unp_rights--;
+ unp_externalize_fp(fp);
*fdp++ = f;
}
FILEDESC_XUNLOCK(td->td_proc->p_fd);
@@ -1765,11 +1763,8 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
for (i = 0; i < oldfds; i++) {
fp = fdescp->fd_ofiles[*fdp++];
*rp++ = fp;
- FILE_LOCK(fp);
- fp->f_count++;
- fp->f_msgcount++;
- FILE_UNLOCK(fp);
- unp_rights++;
+ fhold(fp);
+ unp_internalize_fp(fp);
}
FILEDESC_SUNLOCK(fdescp);
break;
@@ -1860,230 +1855,198 @@ unp_addsockcred(struct thread *td, struct mbuf *control)
return (m);
}
+static struct unpcb *
+fptounp(struct file *fp)
+{
+ struct socket *so;
+
+ if (fp->f_type != DTYPE_SOCKET)
+ return (NULL);
+ if ((so = fp->f_data) == NULL)
+ return (NULL);
+ if (so->so_proto->pr_domain != &localdomain)
+ return (NULL);
+ return sotounpcb(so);
+}
+
+static void
+unp_discard(struct file *fp)
+{
+
+ unp_externalize_fp(fp);
+ (void) closef(fp, (struct thread *)NULL);
+}
+
+static void
+unp_internalize_fp(struct file *fp)
+{
+ struct unpcb *unp;
+
+ UNP_GLOBAL_WLOCK();
+ if ((unp = fptounp(fp)) != NULL) {
+ unp->unp_file = fp;
+ unp->unp_msgcount++;
+ }
+ unp_rights++;
+ UNP_GLOBAL_WUNLOCK();
+}
+
+static void
+unp_externalize_fp(struct file *fp)
+{
+ struct unpcb *unp;
+
+ UNP_GLOBAL_WLOCK();
+ if ((unp = fptounp(fp)) != NULL)
+ unp->unp_msgcount--;
+ unp_rights--;
+ UNP_GLOBAL_WUNLOCK();
+}
+
/*
* unp_defer indicates whether additional work has been defered for a future
* pass through unp_gc(). It is thread local and does not require explicit
* synchronization.
*/
-static int unp_defer;
+static int unp_marked;
+static int unp_unreachable;
-static int unp_taskcount;
-SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "");
+static void
+unp_accessable(struct file *fp)
+{
+ struct unpcb *unp;
+
+ unp = fptounp(fp);
+ if (fp == NULL)
+ return;
+ if (unp->unp_gcflag & UNPGC_REF)
+ return;
+ unp->unp_gcflag &= ~UNPGC_DEAD;
+ unp->unp_gcflag |= UNPGC_REF;
+ unp_marked++;
+}
+
+static void
+unp_gc_process(struct unpcb *unp)
+{
+ struct socket *soa;
+ struct socket *so;
+ struct file *fp;
+
+ /* Already processed. */
+ if (unp->unp_gcflag & UNPGC_SCANNED)
+ return;
+ fp = unp->unp_file;
+ /*
+ * Check for a socket potentially in a cycle. It must be in a
+ * queue as indicated by msgcount, and this must equal the file
+ * reference count. Note that when msgcount is 0 the file is NULL.
+ */
+ if (unp->unp_msgcount != 0 && fp->f_count != 0 &&
+ fp->f_count == unp->unp_msgcount) {
+ unp->unp_gcflag |= UNPGC_DEAD;
+ unp_unreachable++;
+ return;
+ }
+ /*
+ * Mark all sockets we reference with RIGHTS.
+ */
+ so = unp->unp_socket;
+ SOCKBUF_LOCK(&so->so_rcv);
+ unp_scan(so->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ /*
+ * Mark all sockets in our accept queue.
+ */
+ ACCEPT_LOCK();
+ TAILQ_FOREACH(soa, &so->so_comp, so_list) {
+ SOCKBUF_LOCK(&soa->so_rcv);
+ unp_scan(soa->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&soa->so_rcv);
+ }
+ ACCEPT_UNLOCK();
+ unp->unp_gcflag |= UNPGC_SCANNED;
+}
static int unp_recycled;
SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "");
+static int unp_taskcount;
+SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "");
+
static void
unp_gc(__unused void *arg, int pending)
{
- struct file *fp, *nextfp;
- struct socket *so;
- struct file **extra_ref, **fpp;
- int nunref, i;
- int nfiles_snap;
- int nfiles_slack = 20;
+ struct unp_head *heads[] = { &unp_dhead, &unp_shead, NULL };
+ struct unp_head **head;
+ struct file **unref;
+ struct unpcb *unp;
+ int i;
unp_taskcount++;
- unp_defer = 0;
+ UNP_GLOBAL_RLOCK();
+ /*
+ * First clear all gc flags from previous runs.
+ */
+ for (head = heads; *head != NULL; head++)
+ LIST_FOREACH(unp, *head, unp_link)
+ unp->unp_gcflag &= ~(UNPGC_REF|UNPGC_DEAD);
/*
- * Before going through all this, set all FDs to be NOT deferred and
- * NOT externally accessible.
+ * Scan marking all reachable sockets with UNPGC_REF. Once a socket
+ * is reachable all of the sockets it references are reachable.
+ * Stop the scan once we do a complete loop without discovering
+ * a new reachable socket.
*/
- sx_slock(&filelist_lock);
- LIST_FOREACH(fp, &filehead, f_list)
- fp->f_gcflag &= ~(FMARK|FDEFER);
do {
- KASSERT(unp_defer >= 0, ("unp_gc: unp_defer %d", unp_defer));
- LIST_FOREACH(fp, &filehead, f_list) {
- FILE_LOCK(fp);
- /*
- * If the file is not open, skip it -- could be a
- * file in the process of being opened, or in the
- * process of being closed. If the file is
- * "closing", it may have been marked for deferred
- * consideration. Clear the flag now if so.
- */
- if (fp->f_count == 0) {
- if (fp->f_gcflag & FDEFER)
- unp_defer--;
- fp->f_gcflag &= ~(FMARK|FDEFER);
- FILE_UNLOCK(fp);
- continue;
- }
- /*
- * If we already marked it as 'defer' in a
- * previous pass, then try to process it this
- * time and un-mark it.
- */
- if (fp->f_gcflag & FDEFER) {
- fp->f_gcflag &= ~FDEFER;
- unp_defer--;
- } else {
- /*
- * If it's not deferred, then check if it's
- * already marked.. if so skip it
- */
- if (fp->f_gcflag & FMARK) {
- FILE_UNLOCK(fp);
- continue;
- }
- /*
- * If all references are from messages in
- * transit, then skip it. it's not externally
- * accessible.
- */
- if (fp->f_count == fp->f_msgcount) {
- FILE_UNLOCK(fp);
- continue;
- }
- /*
- * If it got this far then it must be
- * externally accessible.
- */
- fp->f_gcflag |= FMARK;
- }
- /*
- * Either it was deferred, or it is externally
- * accessible and not already marked so. Now check
- * if it is possibly one of OUR sockets.
- */
- if (fp->f_type != DTYPE_SOCKET ||
- (so = fp->f_data) == NULL) {
- FILE_UNLOCK(fp);
- continue;
- }
- if (so->so_proto->pr_domain != &localdomain ||
- (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
- FILE_UNLOCK(fp);
- continue;
+ unp_unreachable = 0;
+ unp_marked = 0;
+ for (head = heads; *head != NULL; head++)
+ LIST_FOREACH(unp, *head, unp_link)
+ unp_gc_process(unp);
+ } while (unp_marked);
+ UNP_GLOBAL_RUNLOCK();
+ if (unp_unreachable == 0)
+ return;
+ /*
+ * Allocate space for a local list of dead unpcbs.
+ */
+ unref = malloc(unp_unreachable * sizeof(struct file *),
+ M_TEMP, M_WAITOK);
+ /*
+ * Iterate looking for sockets which have been specifically marked
+ * as as unreachable and store them locally.
+ */
+ UNP_GLOBAL_RLOCK();
+ for (i = 0, head = heads; *head != NULL; head++)
+ LIST_FOREACH(unp, *head, unp_link)
+ if (unp->unp_gcflag & UNPGC_DEAD) {
+ unref[i++] = unp->unp_file;
+ KASSERT(unp->unp_file != NULL,
+ ("unp_gc: Invalid unpcb."));
+ KASSERT(i <= unp_unreachable,
+ ("unp_gc: incorrect unreachable count."));
}
-
- /*
- * Tell any other threads that do a subsequent
- * fdrop() that we are scanning the message
- * buffers.
- */
- fp->f_gcflag |= FWAIT;
- FILE_UNLOCK(fp);
-
- /*
- * So, Ok, it's one of our sockets and it IS
- * externally accessible (or was deferred). Now we
- * look to see if we hold any file descriptors in its
- * message buffers. Follow those links and mark them
- * as accessible too.
- */
- SOCKBUF_LOCK(&so->so_rcv);
- unp_scan(so->so_rcv.sb_mb, unp_mark);
- SOCKBUF_UNLOCK(&so->so_rcv);
-
- /*
- * Wake up any threads waiting in fdrop().
- */
- FILE_LOCK(fp);
- fp->f_gcflag &= ~FWAIT;
- wakeup(&fp->f_gcflag);
- FILE_UNLOCK(fp);
- }
- } while (unp_defer);
- sx_sunlock(&filelist_lock);
+ UNP_GLOBAL_RUNLOCK();
/*
- * XXXRW: The following comments need updating for a post-SMPng and
- * deferred unp_gc() world, but are still generally accurate.
- *
- * We grab an extra reference to each of the file table entries that
- * are not otherwise accessible and then free the rights that are
- * stored in messages on them.
- *
- * The bug in the orginal code is a little tricky, so I'll describe
- * what's wrong with it here.
- *
- * It is incorrect to simply unp_discard each entry for f_msgcount
- * times -- consider the case of sockets A and B that contain
- * references to each other. On a last close of some other socket,
- * we trigger a gc since the number of outstanding rights (unp_rights)
- * is non-zero. If during the sweep phase the gc code unp_discards,
- * we end up doing a (full) closef on the descriptor. A closef on A
- * results in the following chain. Closef calls soo_close, which
- * calls soclose. Soclose calls first (through the switch
- * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
- * returns because the previous instance had set unp_gcing, and we
- * return all the way back to soclose, which marks the socket with
- * SS_NOFDREF, and then calls sofree. Sofree calls sorflush to free
- * up the rights that are queued in messages on the socket A, i.e.,
- * the reference on B. The sorflush calls via the dom_dispose switch
- * unp_dispose, which unp_scans with unp_discard. This second
- * instance of unp_discard just calls closef on B.
- *
- * Well, a similar chain occurs on B, resulting in a sorflush on B,
- * which results in another closef on A. Unfortunately, A is already
- * being closed, and the descriptor has already been marked with
- * SS_NOFDREF, and soclose panics at this point.
- *
- * Here, we first take an extra reference to each inaccessible
- * descriptor. Then, we call sorflush ourself, since we know it is a
- * Unix domain socket anyhow. After we destroy all the rights
- * carried in messages, we do a last closef to get rid of our extra
- * reference. This is the last close, and the unp_detach etc will
- * shut down the socket.
- *
- * 91/09/19, bsy@cs.cmu.edu
+ * All further operation is now done on a local list. We first ref
+ * all sockets to avoid closing them until all are flushed.
*/
-again:
- nfiles_snap = openfiles + nfiles_slack; /* some slack */
- extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP,
- M_WAITOK);
- sx_slock(&filelist_lock);
- if (nfiles_snap < openfiles) {
- sx_sunlock(&filelist_lock);
- free(extra_ref, M_TEMP);
- nfiles_slack += 20;
- goto again;
- }
- for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
- fp != NULL; fp = nextfp) {
- nextfp = LIST_NEXT(fp, f_list);
- FILE_LOCK(fp);
- /*
- * If it's not open, skip it
- */
- if (fp->f_count == 0) {
- FILE_UNLOCK(fp);
- continue;
- }
- /*
- * If all refs are from msgs, and it's not marked accessible
- * then it must be referenced from some unreachable cycle of
- * (shut-down) FDs, so include it in our list of FDs to
- * remove.
- */
- if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
- *fpp++ = fp;
- nunref++;
- fp->f_count++;
- }
- FILE_UNLOCK(fp);
- }
- sx_sunlock(&filelist_lock);
+ for (i = 0; i < unp_unreachable; i++)
+ fhold(unref[i]);
/*
- * For each FD on our hit list, do the following two things:
+ * Now flush all sockets, free'ing rights. This will free the
+ * struct files associated with these sockets but leave each socket
+ * with one remaining ref.
*/
- for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
- struct file *tfp = *fpp;
- FILE_LOCK(tfp);
- if (tfp->f_type == DTYPE_SOCKET &&
- tfp->f_data != NULL) {
- FILE_UNLOCK(tfp);
- sorflush(tfp->f_data);
- } else {
- FILE_UNLOCK(tfp);
- }
- }
- for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
- closef(*fpp, (struct thread *) NULL);
- unp_recycled++;
- }
- free(extra_ref, M_TEMP);
+ for (i = 0; i < unp_unreachable; i++)
+ sorflush(unref[i]->f_data);
+ /*
+ * And finally release the sockets so they can be reclaimed.
+ */
+ for (i = 0; i < unp_unreachable; i++)
+ fdrop(unref[i], NULL);
+ unp_recycled += unp_unreachable;
+ free(unref, M_TEMP);
}
void
@@ -2143,31 +2106,6 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *))
}
}
-static void
-unp_mark(struct file *fp)
-{
-
- /* XXXRW: Should probably assert file list lock here. */
-
- if (fp->f_gcflag & FMARK)
- return;
- unp_defer++;
- fp->f_gcflag |= (FMARK|FDEFER);
-}
-
-static void
-unp_discard(struct file *fp)
-{
-
- UNP_GLOBAL_WLOCK();
- FILE_LOCK(fp);
- fp->f_msgcount--;
- unp_rights--;
- FILE_UNLOCK(fp);
- UNP_GLOBAL_WUNLOCK();
- (void) closef(fp, (struct thread *)NULL);
-}
-
#ifdef DDB
static void
db_print_indent(int indent)
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 20d722efb646..0e42ea393f3f 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1022,6 +1022,8 @@ kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
return (error);
/* An extra reference on `nfp' has been held for us by falloc(). */
fp = nfp;
+ /* Set the flags early so the finit in devfs can pick them up. */
+ fp->f_flag = flags & FMASK;
cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
td->td_dupfd = -1; /* XXX check for fdopen */
@@ -1067,16 +1069,16 @@ kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
- FILE_LOCK(fp);
- fp->f_vnode = vp;
- if (fp->f_data == NULL)
- fp->f_data = vp;
- fp->f_flag = flags & FMASK;
- fp->f_seqcount = 1;
- fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
- if (fp->f_ops == &badfileops)
- fp->f_ops = &vnops;
- FILE_UNLOCK(fp);
+ fp->f_vnode = vp; /* XXX Does devfs need this? */
+ /*
+ * If the file wasn't claimed by devfs bind it to the normal
+ * vnode operations here.
+ */
+ if (fp->f_ops == &badfileops) {
+ KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
+ fp->f_seqcount = 1;
+ finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
+ }
VOP_UNLOCK(vp, 0, td);
if (flags & (O_EXLOCK | O_SHLOCK)) {
@@ -1093,7 +1095,7 @@ kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
type)) != 0)
goto bad;
- fp->f_flag |= FHASLOCK;
+ atomic_set_int(&fp->f_flag, FHASLOCK);
}
if (flags & O_TRUNC) {
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
@@ -4179,14 +4181,8 @@ fhopen(td, uap)
}
/* An extra reference on `nfp' has been held for us by falloc(). */
fp = nfp;
-
- FILE_LOCK(nfp);
nfp->f_vnode = vp;
- nfp->f_data = vp;
- nfp->f_flag = fmode & FMASK;
- nfp->f_type = DTYPE_VNODE;
- nfp->f_ops = &vnops;
- FILE_UNLOCK(nfp);
+ finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
if (fmode & (O_EXLOCK | O_SHLOCK)) {
lf.l_whence = SEEK_SET;
lf.l_start = 0;
@@ -4215,7 +4211,7 @@ fhopen(td, uap)
goto out;
}
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
- fp->f_flag |= FHASLOCK;
+ atomic_set_int(&fp->f_flag, FHASLOCK);
}
VOP_UNLOCK(vp, 0, td);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 50835747e78f..c7df6adef59f 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -488,10 +488,12 @@ vn_read(fp, uio, active_cred, flags, td)
{
struct vnode *vp;
int error, ioflag;
+ struct mtx *mtxp;
int vfslocked;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
+ mtxp = NULL;
vp = fp->f_vnode;
ioflag = 0;
if (fp->f_flag & FNONBLOCK)
@@ -505,13 +507,15 @@ vn_read(fp, uio, active_cred, flags, td)
* It is now protected by the FOFFSET_LOCKED flag.
*/
if ((flags & FOF_OFFSET) == 0) {
- FILE_LOCK(fp);
+ mtxp = mtx_pool_find(mtxpool_sleep, fp);
+ mtx_lock(mtxp);
while(fp->f_vnread_flags & FOFFSET_LOCKED) {
fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
- msleep(&fp->f_vnread_flags,fp->f_mtxp,PUSER -1,"vnread offlock",0);
+ msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
+ "vnread offlock", 0);
}
fp->f_vnread_flags |= FOFFSET_LOCKED;
- FILE_UNLOCK(fp);
+ mtx_unlock(mtxp);
vn_lock(vp, LK_SHARED | LK_RETRY, td);
uio->uio_offset = fp->f_offset;
} else
@@ -526,11 +530,11 @@ vn_read(fp, uio, active_cred, flags, td)
error = VOP_READ(vp, uio, ioflag, fp->f_cred);
if ((flags & FOF_OFFSET) == 0) {
fp->f_offset = uio->uio_offset;
- FILE_LOCK(fp);
+ mtx_lock(mtxp);
if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
wakeup(&fp->f_vnread_flags);
fp->f_vnread_flags = 0;
- FILE_UNLOCK(fp);
+ mtx_unlock(mtxp);
}
fp->f_nextoff = uio->uio_offset;
VOP_UNLOCK(vp, 0, td);
diff --git a/sys/netgraph/ng_socket.c b/sys/netgraph/ng_socket.c
index 377182b2a68c..b4447940bdd0 100644
--- a/sys/netgraph/ng_socket.c
+++ b/sys/netgraph/ng_socket.c
@@ -689,7 +689,7 @@ ng_internalize(struct mbuf *control, struct thread *td)
vn = fp->f_data;
if (vn && (vn->v_type == VCHR)) {
/* for a VCHR, actually reference the FILE */
- fp->f_count++;
+ fhold(fp);
/* XXX then what :) */
/* how to pass on to other modules? */
} else {
diff --git a/sys/opencrypto/cryptodev.c b/sys/opencrypto/cryptodev.c
index a940a3cd1913..c9fc6d264fc7 100644
--- a/sys/opencrypto/cryptodev.c
+++ b/sys/opencrypto/cryptodev.c
@@ -840,12 +840,7 @@ cryptoioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread
return (error);
}
/* falloc automatically provides an extra reference to 'f'. */
- FILE_LOCK(f);
- f->f_flag = FREAD | FWRITE;
- f->f_type = DTYPE_CRYPTO;
- f->f_data = fcr;
- f->f_ops = &cryptofops;
- FILE_UNLOCK(f);
+ finit(f, FREAD | FWRITE, DTYPE_CRYPTO, fcr, &cryptofops);
*(u_int32_t *)data = fd;
fdrop(f, td);
break;
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 58501fa83f37..c5f4afbfadae 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -99,49 +99,37 @@ struct fileops {
*
* Below is the list of locks that protects members in struct file.
*
- * (fl) filelist_lock
- * (f) f_mtx in struct file
+ * (f) protected with mtx_lock(mtx_pool_find(fp))
* none not locked
*/
struct file {
- LIST_ENTRY(file) f_list;/* (fl) list of active files */
- short f_type; /* descriptor type */
- void *f_data; /* file descriptor specific data */
- u_int f_flag; /* see fcntl.h */
- struct mtx *f_mtxp; /* mutex to protect data */
- struct fileops *f_ops; /* File operations */
- struct ucred *f_cred; /* credentials associated with descriptor */
- int f_count; /* (f) reference count */
- struct vnode *f_vnode; /* NULL or applicable vnode */
-
- /* DFLAG_SEEKABLE specific fields */
- off_t f_offset;
- short f_vnread_flags; /*
- * (f) home grown sleep lock for f_offset
- * Used only for shared vnode locking in
- * vnread()
- */
-#define FOFFSET_LOCKED 0x1
-#define FOFFSET_LOCK_WAITING 0x2
- /* DTYPE_SOCKET specific fields */
- short f_gcflag; /* used by thread doing fd garbage collection */
-#define FMARK 0x1 /* mark during gc() */
-#define FDEFER 0x2 /* defer for next gc pass */
-#define FWAIT 0x4 /* gc is scanning message buffers */
- int f_msgcount; /* (f) references from message queue */
-
- /* DTYPE_VNODE specific fields */
- int f_seqcount; /*
- * count of sequential accesses -- cleared
- * by most seek operations.
- */
- off_t f_nextoff; /*
- * offset of next expected read or write
- */
- void *f_label; /* Place-holder for struct label pointer. */
+ void *f_data; /* file descriptor specific data */
+ struct fileops *f_ops; /* File operations */
+ struct ucred *f_cred; /* associated credentials. */
+ struct vnode *f_vnode; /* NULL or applicable vnode */
+ short f_type; /* descriptor type */
+ short f_vnread_flags; /* (f) Sleep lock for f_offset */
+ volatile u_int f_flag; /* see fcntl.h */
+ volatile int f_count; /* reference count */
+ /*
+ * DTYPE_VNODE specific fields.
+ */
+ int f_seqcount; /* Count of sequential accesses. */
+ off_t f_nextoff; /* next expected read/write offset. */
+ /*
+ * DFLAG_SEEKABLE specific fields
+ */
+ off_t f_offset;
+ /*
+ * Mandatory Access control information.
+ */
+ void *f_label; /* Place-holder for MAC label. */
};
+#define FOFFSET_LOCKED 0x1
+#define FOFFSET_LOCK_WAITING 0x2
+
#endif /* _KERNEL */
/*
@@ -168,20 +156,17 @@ struct xfile {
MALLOC_DECLARE(M_FILE);
#endif
-LIST_HEAD(filelist, file);
-extern struct filelist filehead; /* (fl) head of list of open files */
extern struct fileops vnops;
extern struct fileops badfileops;
extern struct fileops socketops;
extern int maxfiles; /* kernel limit on number of open files */
extern int maxfilesperproc; /* per process limit on number of open files */
-extern int openfiles; /* (fl) actual number of open files */
-extern struct sx filelist_lock; /* sx to protect filelist and openfiles */
+extern volatile int openfiles; /* actual number of open files */
int fget(struct thread *td, int fd, struct file **fpp);
int fget_read(struct thread *td, int fd, struct file **fpp);
int fget_write(struct thread *td, int fd, struct file **fpp);
-int fdrop(struct file *fp, struct thread *td);
+int _fdrop(struct file *fp, struct thread *td);
/*
* The socket operations are used a couple of places.
@@ -196,12 +181,7 @@ fo_kqfilter_t soo_kqfilter;
fo_stat_t soo_stat;
fo_close_t soo_close;
-/* Lock a file. */
-#define FILE_LOCK(f) mtx_lock((f)->f_mtxp)
-#define FILE_UNLOCK(f) mtx_unlock((f)->f_mtxp)
-#define FILE_LOCKED(f) mtx_owned((f)->f_mtxp)
-#define FILE_LOCK_ASSERT(f, type) mtx_assert((f)->f_mtxp, (type))
-
+void finit(struct file *, u_int, short, void *, struct fileops *);
int fgetvp(struct thread *td, int fd, struct vnode **vpp);
int fgetvp_read(struct thread *td, int fd, struct vnode **vpp);
int fgetvp_write(struct thread *td, int fd, struct vnode **vpp);
@@ -209,18 +189,9 @@ int fgetvp_write(struct thread *td, int fd, struct vnode **vpp);
int fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp);
void fputsock(struct socket *sp);
-#define fhold_locked(fp) \
- do { \
- FILE_LOCK_ASSERT(fp, MA_OWNED); \
- (fp)->f_count++; \
- } while (0)
-
-#define fhold(fp) \
- do { \
- FILE_LOCK(fp); \
- (fp)->f_count++; \
- FILE_UNLOCK(fp); \
- } while (0)
+#define fhold(fp) atomic_add_int(&(fp)->f_count, 1)
+#define fdrop(fp, td) \
+ (atomic_fetchadd_int(&(fp)->f_count, -1) <= 1 ? _fdrop((fp), (td)) : 0)
static __inline fo_rdwr_t fo_read;
static __inline fo_rdwr_t fo_write;
diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h
index c7b3a44588ef..4d69f3e5ace5 100644
--- a/sys/sys/unpcb.h
+++ b/sys/sys/unpcb.h
@@ -67,6 +67,7 @@ LIST_HEAD(unp_head, unpcb);
struct unpcb {
LIST_ENTRY(unpcb) unp_link; /* glue on list of all PCBs */
struct socket *unp_socket; /* pointer back to socket */
+ struct file *unp_file; /* back-pointer to file for gc. */
struct vnode *unp_vnode; /* if associated with file */
ino_t unp_ino; /* fake inode number */
struct unpcb *unp_conn; /* control block of connected socket */
@@ -76,9 +77,11 @@ struct unpcb {
int unp_cc; /* copy of rcv.sb_cc */
int unp_mbcnt; /* copy of rcv.sb_mbcnt */
unp_gen_t unp_gencnt; /* generation count of this instance */
- int unp_flags; /* flags */
+ short unp_flags; /* flags */
+ short unp_gcflag; /* Garbage collector flags. */
struct xucred unp_peercred; /* peer credentials, if applicable */
u_int unp_refcount;
+ u_int unp_msgcount; /* references from message queue */
struct mtx unp_mtx; /* mutex */
};
@@ -100,6 +103,10 @@ struct unpcb {
#define UNP_WANTCRED 0x004 /* credentials wanted */
#define UNP_CONNWAIT 0x008 /* connect blocks until accepted */
+#define UNPGC_REF 0x1 /* unpcb has external ref. */
+#define UNPGC_DEAD 0x2 /* unpcb might be dead. */
+#define UNPGC_SCANNED 0x4 /* Has been scanned. */
+
/*
* These flags are used to handle non-atomicity in connect() and bind()
* operations on a socket: in particular, to avoid races between multiple