diff options
Diffstat (limited to 'sys/kern')
58 files changed, 28358 insertions, 0 deletions
diff --git a/sys/kern/Makefile b/sys/kern/Makefile new file mode 100644 index 000000000000..e8e4425fc8a6 --- /dev/null +++ b/sys/kern/Makefile @@ -0,0 +1,50 @@ +# from: @(#)Makefile 7.13 (Berkeley) 5/9/91 +# $Id: Makefile,v 1.2 1993/10/16 15:23:57 rgrimes Exp $ + +# Makefile for kernel tags files, init_systent, etc. + +ARCH= i386 + +all: + @echo "make tags, make links or init_sysent.c only" + +init_sysent.c syscalls.c ../sys/syscall.h: makesyscalls.sh syscalls.master + -mv -f init_sysent.c init_sysent.c.bak + -mv -f syscalls.c syscalls.c.bak + -mv -f ../sys/syscall.h ../sys/syscall.h.bak + sh makesyscalls.sh syscalls.master + +# Kernel tags: +# We need to have links to tags files from the generic directories +# that are relative to the machine type, even via remote mounts; +# therefore we use symlinks to $SYSTAGS, which points at +# ${SYSDIR}/${MACHINE}/tags. + +SYSTAGS=/var/db/sys_tags +SYSDIR=/sys + +# Put the ../sys stuff near the end so that subroutine definitions win when +# there is a struct tag with the same name (eg., vmmeter). The real +# solution would probably be for ctags to generate "struct vmmeter" tags. + +COMM= ../net/*.[ch] ../netimp/*.[ch] ../netinet/*.[ch] ../netns/*.[ch] \ + ../netiso/*.[ch] ../netccitt/*.[ch] \ + ../kern/*.c ../ufs/*.[ch] ../nfs/*.[ch] ../vm/*.[ch] ../sys/*.h + +# Directories in which to place tags links (other than machine-dependent) +DGEN= kern sys net netimp netinet netns netccitt nfs ufs vm + +tags: FRC + -for i in ${ARCH}; do \ + cd ../$$i && make ${MFLAGS} tags; done + +links: ${SYSTAGS} + -for i in ${DGEN}; do \ + cd $$i && rm -f tags; ln -s ${SYSTAGS} tags; done + -for i in ${ARCH}; do \ + cd ../$$i && make ${MFLAGS} links; done + +${SYSTAGS}: + ln -s ${SYSDIR}/${MACHINE}/tags ${SYSTAGS} + +FRC: diff --git a/sys/kern/dead_vnops.c b/sys/kern/dead_vnops.c new file mode 100644 index 000000000000..df66b12f7d13 --- /dev/null +++ b/sys/kern/dead_vnops.c @@ -0,0 +1,418 @@ +/* + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)dead_vnops.c 7.13 (Berkeley) 4/15/91 + * $Id: dead_vnops.c,v 1.2 1993/10/16 15:23:59 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "time.h" +#include "vnode.h" +#include "errno.h" +#include "namei.h" +#include "buf.h" + +/* + * Prototypes for dead operations on vnodes. + */ +int dead_badop(), + dead_ebadf(); +int dead_lookup __P(( + struct vnode *vp, + struct nameidata *ndp, + struct proc *p)); +#define dead_create ((int (*) __P(( \ + struct nameidata *ndp, \ + struct vattr *vap, \ + struct proc *p))) dead_badop) +#define dead_mknod ((int (*) __P(( \ + struct nameidata *ndp, \ + struct vattr *vap, \ + struct ucred *cred, \ + struct proc *p))) dead_badop) +int dead_open __P(( + struct vnode *vp, + int mode, + struct ucred *cred, + struct proc *p)); +#define dead_close ((int (*) __P(( \ + struct vnode *vp, \ + int fflag, \ + struct ucred *cred, \ + struct proc *p))) nullop) +#define dead_access ((int (*) __P(( \ + struct vnode *vp, \ + int mode, \ + struct ucred *cred, \ + struct proc *p))) dead_ebadf) +#define dead_getattr ((int (*) __P(( \ + struct vnode *vp, \ + struct vattr *vap, \ + struct ucred *cred, \ + struct proc *p))) dead_ebadf) +#define dead_setattr ((int (*) __P(( \ + struct vnode *vp, \ + struct vattr *vap, \ + struct ucred *cred, \ + struct proc *p))) dead_ebadf) +int dead_read __P(( + struct vnode *vp, + struct uio *uio, + int ioflag, + struct ucred *cred)); +int dead_write __P(( + struct vnode *vp, + struct uio *uio, + int ioflag, + struct ucred *cred)); +int dead_ioctl __P(( + struct vnode *vp, + int command, + caddr_t data, + int fflag, + struct ucred *cred, + struct proc *p)); +int dead_select __P(( + struct vnode *vp, + int which, + int fflags, + struct ucred *cred, + struct proc *p)); +#define dead_mmap ((int (*) __P(( \ + struct vnode *vp, \ + int fflags, \ + struct ucred *cred, \ + struct proc *p))) dead_badop) +#define dead_fsync ((int (*) __P(( \ + struct vnode *vp, \ + int fflags, \ + struct ucred *cred, \ + int waitfor, \ + struct proc *p))) nullop) +#define dead_seek ((int (*) __P(( \ + struct vnode *vp, \ + off_t oldoff, \ + off_t newoff, \ + struct ucred *cred))) nullop) +#define dead_remove ((int (*) __P(( \ + struct nameidata *ndp, \ + struct proc *p))) dead_badop) +#define dead_link ((int (*) __P(( \ + struct vnode *vp, \ + struct nameidata *ndp, \ + struct proc *p))) dead_badop) +#define dead_rename ((int (*) __P(( \ + struct nameidata *fndp, \ + struct nameidata *tdnp, \ + struct proc *p))) dead_badop) +#define dead_mkdir ((int (*) __P(( \ + struct nameidata *ndp, \ + struct vattr *vap, \ + struct proc *p))) dead_badop) +#define dead_rmdir ((int (*) __P(( \ + struct nameidata *ndp, \ + struct proc *p))) dead_badop) +#define dead_symlink ((int (*) __P(( \ + struct nameidata *ndp, \ + struct vattr *vap, \ + char *target, \ + struct proc *p))) dead_badop) +#define dead_readdir ((int (*) __P(( \ + struct vnode *vp, \ + struct uio *uio, \ + struct ucred *cred, \ + int *eofflagp))) dead_ebadf) +#define dead_readlink ((int (*) __P(( \ + struct vnode *vp, \ + struct uio *uio, \ + struct ucred *cred))) dead_ebadf) +#define dead_abortop ((int (*) __P(( \ + struct nameidata *ndp))) dead_badop) +#define dead_inactive ((int (*) __P(( \ + struct vnode *vp, \ + struct proc *p))) nullop) +#define dead_reclaim ((int (*) __P(( \ + struct vnode *vp))) nullop) +int dead_lock __P(( + struct vnode *vp)); +#define dead_unlock ((int (*) __P(( \ + struct vnode *vp))) nullop) +int dead_bmap __P(( + struct vnode *vp, + daddr_t bn, + struct vnode **vpp, + daddr_t *bnp)); +int dead_strategy __P(( + struct buf *bp)); +int dead_print __P(( + struct vnode *vp)); +#define dead_islocked ((int (*) __P(( \ + struct vnode *vp))) nullop) +#define dead_advlock ((int (*) __P(( \ + struct vnode *vp, \ + caddr_t id, \ + int op, \ + struct flock *fl, \ + int flags))) dead_ebadf) + +struct vnodeops dead_vnodeops = { + dead_lookup, /* lookup */ + dead_create, /* create */ + dead_mknod, /* mknod */ + dead_open, /* open */ + dead_close, /* close */ + dead_access, /* access */ + dead_getattr, /* getattr */ + dead_setattr, /* setattr */ + dead_read, /* read */ + dead_write, /* write */ + dead_ioctl, /* ioctl */ + dead_select, /* select */ + dead_mmap, /* mmap */ + dead_fsync, /* fsync */ + dead_seek, /* seek */ + dead_remove, /* remove */ + dead_link, /* link */ + dead_rename, /* rename */ + dead_mkdir, /* mkdir */ + dead_rmdir, /* rmdir */ + dead_symlink, /* symlink */ + dead_readdir, /* readdir */ + dead_readlink, /* readlink */ + dead_abortop, /* abortop */ + dead_inactive, /* inactive */ + dead_reclaim, /* reclaim */ + dead_lock, /* lock */ + dead_unlock, /* unlock */ + dead_bmap, /* bmap */ + dead_strategy, /* strategy */ + dead_print, /* print */ + dead_islocked, /* islocked */ + dead_advlock, /* advlock */ +}; + +/* + * Trivial lookup routine that always fails. + */ +/* ARGSUSED */ +dead_lookup(vp, ndp, p) + struct vnode *vp; + struct nameidata *ndp; + struct proc *p; +{ + + ndp->ni_dvp = vp; + ndp->ni_vp = NULL; + return (ENOTDIR); +} + +/* + * Open always fails as if device did not exist. + */ +/* ARGSUSED */ +dead_open(vp, mode, cred, p) + struct vnode *vp; + int mode; + struct ucred *cred; + struct proc *p; +{ + + return (ENXIO); +} + +/* + * Vnode op for read + */ +/* ARGSUSED */ +dead_read(vp, uio, ioflag, cred) + struct vnode *vp; + struct uio *uio; + int ioflag; + struct ucred *cred; +{ + + if (chkvnlock(vp)) + panic("dead_read: lock"); + /* + * Return EOF for character devices, EIO for others + */ + if (vp->v_type != VCHR) + return (EIO); + return (0); +} + +/* + * Vnode op for write + */ +/* ARGSUSED */ +dead_write(vp, uio, ioflag, cred) + register struct vnode *vp; + struct uio *uio; + int ioflag; + struct ucred *cred; +{ + + if (chkvnlock(vp)) + panic("dead_write: lock"); + return (EIO); +} + +/* + * Device ioctl operation. + */ +/* ARGSUSED */ +dead_ioctl(vp, com, data, fflag, cred, p) + struct vnode *vp; + register int com; + caddr_t data; + int fflag; + struct ucred *cred; + struct proc *p; +{ + + if (!chkvnlock(vp)) + return (EBADF); + return (VOP_IOCTL(vp, com, data, fflag, cred, p)); +} + +/* ARGSUSED */ +dead_select(vp, which, fflags, cred, p) + struct vnode *vp; + int which, fflags; + struct ucred *cred; + struct proc *p; +{ + + /* + * Let the user find out that the descriptor is gone. + */ + return (1); +} + +/* + * Just call the device strategy routine + */ +dead_strategy(bp) + register struct buf *bp; +{ + + if (bp->b_vp == NULL || !chkvnlock(bp->b_vp)) { + bp->b_flags |= B_ERROR; + biodone(bp); + return (EIO); + } + return (VOP_STRATEGY(bp)); +} + +/* + * Wait until the vnode has finished changing state. + */ +dead_lock(vp) + struct vnode *vp; +{ + + if (!chkvnlock(vp)) + return (0); + return (VOP_LOCK(vp)); +} + +/* + * Wait until the vnode has finished changing state. + */ +dead_bmap(vp, bn, vpp, bnp) + struct vnode *vp; + daddr_t bn; + struct vnode **vpp; + daddr_t *bnp; +{ + + if (!chkvnlock(vp)) + return (EIO); + return (VOP_BMAP(vp, bn, vpp, bnp)); +} + +/* + * Print out the contents of a dead vnode. + */ +/* ARGSUSED */ +dead_print(vp) + struct vnode *vp; +{ + + printf("tag VT_NON, dead vnode\n"); +} + +/* + * Empty vnode failed operation + */ +dead_ebadf() +{ + + return (EBADF); +} + +/* + * Empty vnode bad operation + */ +dead_badop() +{ + + panic("dead_badop called"); + /* NOTREACHED */ +} + +/* + * Empty vnode null operation + */ +dead_nullop() +{ + + return (0); +} + +/* + * We have to wait during times when the vnode is + * in a state of change. + */ +chkvnlock(vp) + register struct vnode *vp; +{ + int locked = 0; + + while (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + sleep((caddr_t)vp, PINOD); + locked = 1; + } + return (locked); +} diff --git a/sys/kern/fifo_vnops.c b/sys/kern/fifo_vnops.c new file mode 100644 index 000000000000..d51400a2872b --- /dev/null +++ b/sys/kern/fifo_vnops.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)fifo_vnops.c 7.7 (Berkeley) 4/15/91 + * $Id: fifo_vnops.c,v 1.4 1993/10/16 15:24:02 rgrimes Exp $ + */ + +#ifdef FIFO +#include "param.h" +#include "time.h" +#include "namei.h" +#include "vnode.h" +#include "socket.h" +#include "socketvar.h" +#include "stat.h" +#include "systm.h" +#include "ioctl.h" +#include "file.h" +#include "fifo.h" +#include "errno.h" +#include "malloc.h" + +/* + * This structure is associated with the FIFO vnode and stores + * the state associated with the FIFO. + */ +struct fifoinfo { + struct socket *fi_readsock; + struct socket *fi_writesock; + long fi_readers; + long fi_writers; +}; + +struct vnodeops fifo_vnodeops = { + fifo_lookup, /* lookup */ + fifo_create, /* create */ + fifo_mknod, /* mknod */ + fifo_open, /* open */ + fifo_close, /* close */ + fifo_access, /* access */ + fifo_getattr, /* getattr */ + fifo_setattr, /* setattr */ + fifo_read, /* read */ + fifo_write, /* write */ + fifo_ioctl, /* ioctl */ + fifo_select, /* select */ + fifo_mmap, /* mmap */ + fifo_fsync, /* fsync */ + fifo_seek, /* seek */ + fifo_remove, /* remove */ + fifo_link, /* link */ + fifo_rename, /* rename */ + fifo_mkdir, /* mkdir */ + fifo_rmdir, /* rmdir */ + fifo_symlink, /* symlink */ + fifo_readdir, /* readdir */ + fifo_readlink, /* readlink */ + fifo_abortop, /* abortop */ + fifo_inactive, /* inactive */ + fifo_reclaim, /* reclaim */ + fifo_lock, /* lock */ + fifo_unlock, /* unlock */ + fifo_bmap, /* bmap */ + fifo_strategy, /* strategy */ + fifo_print, /* print */ + fifo_islocked, /* islocked */ + fifo_advlock, /* advlock */ +}; + +/* + * Trivial lookup routine that always fails. + */ +/* ARGSUSED */ +fifo_lookup(vp, ndp, p) + struct vnode *vp; + struct nameidata *ndp; + struct proc *p; +{ + + ndp->ni_dvp = vp; + ndp->ni_vp = NULL; + return (ENOTDIR); +} + +/* + * Open called to set up a new instance of a fifo or + * to find an active instance of a fifo. + */ +/* ARGSUSED */ +fifo_open(vp, mode, cred, p) + register struct vnode *vp; + int mode; + struct ucred *cred; + struct proc *p; +{ + register struct fifoinfo *fip; + struct socket *rso, *wso; + int error; + static char openstr[] = "fifo"; + + if ((mode & (FREAD|FWRITE)) == (FREAD|FWRITE)) + return (EINVAL); + if ((fip = vp->v_fifoinfo) == NULL) { + MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK); + vp->v_fifoinfo = fip; + fip->fi_readers = fip->fi_writers = 0; + if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) { + free(fip, M_VNODE); + vp->v_fifoinfo = NULL; + return (error); + } + fip->fi_readsock = rso; + if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) { + (void)soclose(rso); + free(fip, M_VNODE); + vp->v_fifoinfo = NULL; + return (error); + } + fip->fi_writesock = wso; + if (error = unp_connect2(wso, rso)) { + (void)soclose(wso); + (void)soclose(rso); + free(fip, M_VNODE); + vp->v_fifoinfo = NULL; + return (error); + } + wso->so_state |= SS_CANTRCVMORE; + rso->so_state |= SS_CANTSENDMORE; + } + error = 0; + if (mode & FREAD) { + fip->fi_readers++; + if (fip->fi_readers == 1) { + fip->fi_writesock->so_state &= ~SS_CANTSENDMORE; + if (fip->fi_writers > 0) + wakeup((caddr_t)&fip->fi_writers); + } + if (mode & O_NONBLOCK) + return (0); + while (fip->fi_writers == 0) { + VOP_UNLOCK(vp); + error = tsleep((caddr_t)&fip->fi_readers, PSOCK | PCATCH, + openstr, 0); + VOP_LOCK(vp); + if (error) + break; + } + } else { + fip->fi_writers++; + if (fip->fi_readers == 0 && (mode & O_NONBLOCK)) { + error = ENXIO; + } else { + if (fip->fi_writers == 1) { + fip->fi_readsock->so_state &= ~SS_CANTRCVMORE; + if (fip->fi_readers > 0) + wakeup((caddr_t)&fip->fi_readers); + } + while (fip->fi_readers == 0) { + VOP_UNLOCK(vp); + error = tsleep((caddr_t)&fip->fi_writers, + PSOCK | PCATCH, openstr, 0); + VOP_LOCK(vp); + if (error) + break; + } + } + } + if (error) + fifo_close(vp, mode, cred, p); + return (error); +} + +/* + * Vnode op for read + */ +/* ARGSUSED */ +fifo_read(vp, uio, ioflag, cred) + struct vnode *vp; + register struct uio *uio; + int ioflag; + struct ucred *cred; +{ + register struct socket *rso = vp->v_fifoinfo->fi_readsock; + int error, startresid; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_READ) + panic("fifo_read mode"); +#endif + if (uio->uio_resid == 0) + return (0); + if (ioflag & IO_NDELAY) + rso->so_state |= SS_NBIO; + startresid = uio->uio_resid; + VOP_UNLOCK(vp); + error = soreceive(rso, (struct mbuf **)0, uio, (int *)0, + (struct mbuf **)0, (struct mbuf **)0); + VOP_LOCK(vp); + /* + * Clear EOF indication after first such return. + */ + if (uio->uio_resid == startresid) + rso->so_state &= ~SS_CANTRCVMORE; + if (ioflag & IO_NDELAY) + rso->so_state &= ~SS_NBIO; + return (error); +} + +/* + * Vnode op for write + */ +/* ARGSUSED */ +fifo_write(vp, uio, ioflag, cred) + struct vnode *vp; + register struct uio *uio; + int ioflag; + struct ucred *cred; +{ + struct socket *wso = vp->v_fifoinfo->fi_writesock; + int error; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_WRITE) + panic("fifo_write mode"); +#endif + if (ioflag & IO_NDELAY) + wso->so_state |= SS_NBIO; + VOP_UNLOCK(vp); + error = sosend(wso, (struct mbuf *)0, uio, 0, (struct mbuf *)0, 0); + VOP_LOCK(vp); + if (ioflag & IO_NDELAY) + wso->so_state &= ~SS_NBIO; + return (error); +} + +/* + * Device ioctl operation. + */ +/* ARGSUSED */ +fifo_ioctl(vp, com, data, fflag, cred, p) + struct vnode *vp; + int com; + caddr_t data; + int fflag; + struct ucred *cred; + struct proc *p; +{ + struct file filetmp; + int error; + + if (com == FIONBIO) + return (0); + if (fflag & FREAD) + filetmp.f_data = (caddr_t)vp->v_fifoinfo->fi_readsock; + else + filetmp.f_data = (caddr_t)vp->v_fifoinfo->fi_writesock; + return (soo_ioctl(&filetmp, com, data, p)); +} + +/* ARGSUSED */ +fifo_select(vp, which, fflag, cred, p) + struct vnode *vp; + int which, fflag; + struct ucred *cred; + struct proc *p; +{ + struct file filetmp; + int error; + + if (fflag & FREAD) + filetmp.f_data = (caddr_t)vp->v_fifoinfo->fi_readsock; + else + filetmp.f_data = (caddr_t)vp->v_fifoinfo->fi_writesock; + return (soo_select(&filetmp, which, p)); +} + +/* + * This is a noop, simply returning what one has been given. + */ +fifo_bmap(vp, bn, vpp, bnp) + struct vnode *vp; + daddr_t bn; + struct vnode **vpp; + daddr_t *bnp; +{ + + if (vpp != NULL) + *vpp = vp; + if (bnp != NULL) + *bnp = bn; + return (0); +} + +/* + * At the moment we do not do any locking. + */ +/* ARGSUSED */ +fifo_lock(vp) + struct vnode *vp; +{ + + return (0); +} + +/* ARGSUSED */ +fifo_unlock(vp) + struct vnode *vp; +{ + + return (0); +} + +/* + * Device close routine + */ +/* ARGSUSED */ +fifo_close(vp, fflag, cred, p) + register struct vnode *vp; + int fflag; + struct ucred *cred; + struct proc *p; +{ + register struct fifoinfo *fip = vp->v_fifoinfo; + int error1, error2; + + if (fflag & FWRITE) { + fip->fi_writers--; + if (fip->fi_writers == 0) + socantrcvmore(fip->fi_readsock); + } else { + fip->fi_readers--; + if (fip->fi_readers == 0) + socantsendmore(fip->fi_writesock); + } + if (vp->v_usecount > 1) + return (0); + error1 = soclose(fip->fi_readsock); + error2 = soclose(fip->fi_writesock); + FREE(fip, M_VNODE); + vp->v_fifoinfo = NULL; + if (error1) + return (error1); + return (error2); +} + +/* + * Print out the contents of a fifo vnode. + */ +fifo_print(vp) + struct vnode *vp; +{ + + printf("tag VT_NON"); + fifo_printinfo(vp); + printf("\n"); +} + +/* + * Print out internal contents of a fifo vnode. + */ +fifo_printinfo(vp) + struct vnode *vp; +{ + register struct fifoinfo *fip = vp->v_fifoinfo; + + printf(", fifo with %d readers and %d writers", + fip->fi_readers, fip->fi_writers); +} + +/* + * Fifo failed operation + */ +fifo_ebadf() +{ + + return (EBADF); +} + +/* + * Fifo advisory byte-level locks. + */ +/* ARGSUSED */ +fifo_advlock(vp, id, op, fl, flags) + struct vnode *vp; + caddr_t id; + int op; + struct flock *fl; + int flags; +{ + + return (EOPNOTSUPP); +} + +/* + * Fifo bad operation + */ +fifo_badop() +{ + + panic("fifo_badop called"); + /* NOTREACHED */ +} +#endif /*FIFO*/ diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c new file mode 100644 index 000000000000..8200ee199c70 --- /dev/null +++ b/sys/kern/init_main.c @@ -0,0 +1,369 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)init_main.c 7.41 (Berkeley) 5/15/91 + * $Id: init_main.c,v 1.7 1993/10/08 10:50:42 rgrimes Exp $ + */ + +#include "param.h" +#include "filedesc.h" +#include "kernel.h" +#include "mount.h" +#include "proc.h" +#include "resourcevar.h" +#include "signalvar.h" +#include "systm.h" +#include "vnode.h" +#include "conf.h" +#include "buf.h" +#include "malloc.h" +#include "protosw.h" +#include "reboot.h" +#include "user.h" +#include "utsname.h" + +#include "ufs/quota.h" + +#include "machine/cpu.h" + +#include "vm/vm.h" + +char copyright[] = +"Copyright (c) 1989,1990,1991,1992 William F. Jolitz. All rights reserved.\n\ +Copyright (c) 1982,1986,1989,1991 The Regents of the University\n\ +of California. All rights reserved.\n\n"; + +/* For uname() */ +struct utsname utsname; + +/* + * Components of process 0; + * never freed. + */ +struct session session0; +struct pgrp pgrp0; +struct proc proc0; +struct pcred cred0; +struct filedesc0 filedesc0; +struct plimit limit0; +struct vmspace vmspace0; +struct proc *curproc = &proc0; +struct proc *initproc, *pageproc; + +int cmask = CMASK; +extern struct user *proc0paddr; +extern int (*mountroot)(); + +struct vnode *rootvp, *swapdev_vp; +int boothowto; + +#if __GNUC__ >= 2 +__main() {} +#endif + +/* + * System startup; initialize the world, create process 0, + * mount root filesystem, and fork to create init and pagedaemon. + * Most of the hard work is done in the lower-level initialization + * routines including startup(), which does memory initialization + * and autoconfiguration. + */ +main() +{ + register int i; + register struct proc *p; + register struct filedesc0 *fdp; + int s, rval[2]; + char *cp; + + /* + * Initialize curproc before any possible traps/probes + * to simplify trap processing. + */ + p = &proc0; + curproc = p; + /* + * Attempt to find console and initialize + * in case of early panic or other messages. + */ + startrtclock(); + consinit(); + + printf("%s", copyright); + + vm_mem_init(); + kmeminit(); + cpu_startup(); + + /* + * set up system process 0 (swapper) + */ + p = &proc0; + curproc = p; + + allproc = p; + p->p_prev = &allproc; + p->p_pgrp = &pgrp0; + pgrphash[0] = &pgrp0; + pgrp0.pg_mem = p; + pgrp0.pg_session = &session0; + session0.s_count = 1; + session0.s_leader = p; + + p->p_flag = SLOAD|SSYS; + p->p_stat = SRUN; + p->p_nice = NZERO; + bcopy("swapper", p->p_comm, sizeof ("swapper")); + + /* + * Setup credentials + */ + cred0.p_refcnt = 1; + p->p_cred = &cred0; + p->p_ucred = crget(); + p->p_ucred->cr_ngroups = 1; /* group 0 */ + + /* + * Create the file descriptor table for process 0. + */ + fdp = &filedesc0; + p->p_fd = &fdp->fd_fd; + fdp->fd_fd.fd_refcnt = 1; + fdp->fd_fd.fd_cmask = cmask; + fdp->fd_fd.fd_ofiles = fdp->fd_dfiles; + fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags; + fdp->fd_fd.fd_nfiles = NDFILE; + + /* + * Set initial limits + */ + p->p_limit = &limit0; + for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++) + limit0.pl_rlimit[i].rlim_cur = + limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; + limit0.pl_rlimit[RLIMIT_OFILE].rlim_cur = NOFILE; + limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC; + limit0.p_refcnt = 1; + + /* + * Allocate a prototype map so we have something to fork + */ + p->p_vmspace = &vmspace0; + vmspace0.vm_refcnt = 1; + pmap_pinit(&vmspace0.vm_pmap); + vm_map_init(&p->p_vmspace->vm_map, round_page(VM_MIN_ADDRESS), + trunc_page(VM_MAX_ADDRESS), TRUE); + vmspace0.vm_map.pmap = &vmspace0.vm_pmap; + p->p_addr = proc0paddr; /* XXX */ + + /* + * We continue to place resource usage info + * and signal actions in the user struct so they're pageable. + */ + p->p_stats = &p->p_addr->u_stats; + p->p_sigacts = &p->p_addr->u_sigacts; + + rqinit(); + + /* + * configure virtual memory system, + * set vm rlimits + */ + vm_init_limits(p); + + /* + * Initialize the file systems. + * + * Get vnodes for swapdev and rootdev. + */ + vfsinit(); + if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp)) + panic("can't setup bdevvp's"); + +#if defined(vax) +#include "kg.h" +#if NKG > 0 + startkgclock(); +#endif +#endif + + /* + * Initialize tables, protocols, and set up well-known inodes. + */ + mbinit(); +#ifdef SYSVSHM + shminit(); +#endif +#include "sl.h" +#if NSL > 0 + slattach(); /* XXX */ +#endif +#include "ppp.h" +#if NPPP > 0 + pppattach(); /* XXX */ +#endif +#include "loop.h" +#if NLOOP > 0 + loattach(); /* XXX */ +#endif + /* + * Block reception of incoming packets + * until protocols have been initialized. + */ + s = splimp(); + ifinit(); + domaininit(); + splx(s); + +#ifdef GPROF + kmstartup(); +#endif + + /* kick off timeout driven events by calling first time */ + roundrobin(); + schedcpu(); + enablertclock(); /* enable realtime clock interrupts */ + + /* + * Set up the root file system and vnode. + */ + if ((*mountroot)()) + panic("cannot mount root"); + /* + * Get vnode for '/'. + * Setup rootdir and fdp->fd_fd.fd_cdir to point to it. + */ + if (VFS_ROOT(rootfs, &rootdir)) + panic("cannot find root vnode"); + fdp->fd_fd.fd_cdir = rootdir; + VREF(fdp->fd_fd.fd_cdir); + VOP_UNLOCK(rootdir); + fdp->fd_fd.fd_rdir = NULL; + swapinit(); + + /* + * Now can look at time, having had a chance + * to verify the time from the file system. + */ + boottime = p->p_stats->p_start = time; + + /* + * Setup version number for uname syscall + * XXX probably should go elsewhere. + */ + bzero(utsname.sysname, sizeof(utsname.sysname)); + for (cp = version, i= 0; + *cp && *cp != ' ' && i <= sizeof(utsname.sysname); + ) + utsname.sysname[i++] = *cp++; + bzero(utsname.release, sizeof(utsname.release)); + for (cp++, i= 0; *cp && *cp != ' ' && i <= sizeof(utsname.release);) + utsname.release[i++] = *cp++; + bzero(utsname.version, sizeof(utsname.version)); + for (; *cp != '('; cp++); + for (cp++, i= 0; *cp && *cp != ')' && i <= sizeof(utsname.version);) + utsname.version[i++] = *cp++; + for (; *cp != '#'; cp++); + if(i <= sizeof(utsname.version)) + utsname.version[i++] = '#'; + for (cp++; *cp && *cp != ':' && i <= sizeof(utsname.version);) + utsname.version[i++] = *cp++; + strncpy(utsname.machine, MACHINE, sizeof(utsname.machine)); + utsname.machine[sizeof(utsname.machine)-1] = '\0'; + + /* + * make init process + */ + siginit(p); + if (fork(p, (void *) NULL, rval)) + panic("fork init"); + if (rval[1]) { + static char initflags[] = "-sf"; + char *ip = initflags + 1; + vm_offset_t addr = 0; + extern int icode[]; /* user init code */ + extern int szicode; /* size of icode */ + + /* + * Now in process 1. Set init flags into icode, + * get a minimal address space, copy out "icode", + * and return to it to do an exec of init. + */ + p = curproc; + initproc = p; + if (boothowto&RB_SINGLE) + *ip++ = 's'; +#ifdef notyet + if (boothowto&RB_FASTBOOT) + *ip++ = 'f'; +#endif + *ip++ = '\0'; + + if (vm_allocate(&p->p_vmspace->vm_map, &addr, + round_page(szicode + sizeof(initflags)), FALSE) != 0 || + addr != 0) + panic("init: couldn't allocate at zero"); + + /* need just enough stack to exec from */ + addr = trunc_page(USRSTACK - MAXSSIZ); + if (vm_allocate(&p->p_vmspace->vm_map, &addr, + MAXSSIZ, FALSE) != KERN_SUCCESS) + panic("vm_allocate init stack"); + p->p_vmspace->vm_maxsaddr = (caddr_t)addr; + p->p_vmspace->vm_ssize = 1; + (void) copyout((caddr_t)icode, (caddr_t)0, (unsigned)szicode); + (void) copyout(initflags, (caddr_t)szicode, sizeof(initflags)); + return; /* returns to icode */ + } + + /* + * Start up pageout daemon (process 2). + */ + if (fork(p, (void *) NULL, rval)) + panic("fork pager"); + if (rval[1]) { + /* + * Now in process 2. + */ + p = curproc; + pageproc = p; + p->p_flag |= SLOAD|SSYS; /* XXX */ + bcopy("pagedaemon", curproc->p_comm, sizeof ("pagedaemon")); + vm_pageout(); + /*NOTREACHED*/ + } + + /* + * enter scheduling loop + */ + sched(); +} diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c new file mode 100644 index 000000000000..55e428c00fa4 --- /dev/null +++ b/sys/kern/init_sysent.c @@ -0,0 +1,441 @@ +/* + * System call switch table. + * + * DO NOT EDIT-- this file is automatically generated. + * created from $Id: syscalls.master,v 1.5 1993/10/24 06:19:58 paul Exp $ + */ + +#include "param.h" +#include "systm.h" + +int nosys(); + +int rexit(); +int fork(); +int read(); +int write(); +int open(); +int close(); +int wait4(); +int link(); +int unlink(); +int chdir(); +int fchdir(); +int mknod(); +int chmod(); +int chown(); +int obreak(); +int getfsstat(); +int lseek(); +int getpid(); +int mount(); +int unmount(); +int setuid(); +int getuid(); +int geteuid(); +int ptrace(); +int recvmsg(); +int sendmsg(); +int recvfrom(); +int accept(); +int getpeername(); +int getsockname(); +int saccess(); +int chflags(); +int fchflags(); +int sync(); +int kill(); +int stat(); +int getppid(); +int lstat(); +int dup(); +int pipe(); +int getegid(); +int profil(); +#ifdef KTRACE +int ktrace(); +#else +#endif +int sigaction(); +int getgid(); +int sigprocmask(); +int getlogin(); +int setlogin(); +int sysacct(); +int sigpending(); +#ifdef notyet +int sigaltstack(); +#else +#endif +int ioctl(); +int reboot(); +int revoke(); +int symlink(); +int readlink(); +int execve(); +int umask(); +int chroot(); +int fstat(); +int getkerninfo(); +int getpagesize(); +int msync(); +int vfork(); +int sbrk(); +int sstk(); +int smmap(); +int ovadvise(); +int munmap(); +int mprotect(); +int madvise(); +int mincore(); +int getgroups(); +int setgroups(); +int getpgrp(); +int setpgid(); +int setitimer(); +int swapon(); +int getitimer(); +int gethostname(); +int sethostname(); +int getdtablesize(); +int dup2(); +int fcntl(); +int select(); +int fsync(); +int setpriority(); +int socket(); +int connect(); +int getpriority(); +int sigreturn(); +int bind(); +int setsockopt(); +int listen(); +int sigsuspend(); +int sigstack(); +#ifdef TRACE +int vtrace(); +#else +#endif +int gettimeofday(); +int getrusage(); +int getsockopt(); +#ifdef vax +int resuba(); +#else +#endif +int readv(); +int writev(); +int settimeofday(); +int fchown(); +int fchmod(); +int rename(); +int truncate(); +int ftruncate(); +int flock(); +int mkfifo(); +int sendto(); +int shutdown(); +int socketpair(); +int mkdir(); +int rmdir(); +int utimes(); +int adjtime(); +int gethostid(); +int sethostid(); +int getrlimit(); +int setrlimit(); +int setsid(); +int quotactl(); +#ifdef NFS +int nfssvc(); +#else +#endif +int getdirentries(); +int statfs(); +int fstatfs(); +#ifdef NFS +int async_daemon(); +int getfh(); +#else +#endif +int getdomainname(); +int setdomainname(); +int uname(); +#ifdef SYSVSHM +int shmsys(); +#else +#endif +int setgid(); +int setegid(); +int seteuid(); + +#ifdef COMPAT_43 +#define compat(n, name) n, __CONCAT(o,name) + +int ocreat(); +#ifdef KTRACE +#else +#endif +#ifdef notyet +#else +#endif +int owait(); +int oaccept(); +int osend(); +int orecv(); +int osigvec(); +int osigblock(); +int osigsetmask(); +int orecvmsg(); +int osendmsg(); +#ifdef TRACE +#else +#endif +#ifdef vax +#else +#endif +int orecvfrom(); +int osetreuid(); +int osetregid(); +int ogetpeername(); +int okillpg(); +int oquota(); +int ogetsockname(); +#ifdef NFS +#else +#endif +#ifdef NFS +#else +#endif +#ifdef SYSVSHM +#else +#endif + +#else /* COMPAT_43 */ +#define compat(n, name) 0, nosys +#endif /* COMPAT_43 */ + +struct sysent sysent[] = { + 0, nosys, /* 0 = indir or out-of-range */ + 1, rexit, /* 1 = exit */ + 0, fork, /* 2 = fork */ + 3, read, /* 3 = read */ + 3, write, /* 4 = write */ + 3, open, /* 5 = open */ + 1, close, /* 6 = close */ + 4, wait4, /* 7 = wait4 */ + compat(2,creat), /* 8 = old creat */ + 2, link, /* 9 = link */ + 1, unlink, /* 10 = unlink */ + 0, nosys, /* 11 = obsolete execv */ + 1, chdir, /* 12 = chdir */ + 1, fchdir, /* 13 = fchdir */ + 3, mknod, /* 14 = mknod */ + 2, chmod, /* 15 = chmod */ + 3, chown, /* 16 = chown */ + 1, obreak, /* 17 = break */ + 3, getfsstat, /* 18 = getfsstat */ + 3, lseek, /* 19 = lseek */ + 0, getpid, /* 20 = getpid */ + 4, mount, /* 21 = mount */ + 2, unmount, /* 22 = unmount */ + 1, setuid, /* 23 = setuid */ + 0, getuid, /* 24 = getuid */ + 0, geteuid, /* 25 = geteuid */ + 4, ptrace, /* 26 = ptrace */ + 3, recvmsg, /* 27 = recvmsg */ + 3, sendmsg, /* 28 = sendmsg */ + 6, recvfrom, /* 29 = recvfrom */ + 3, accept, /* 30 = accept */ + 3, getpeername, /* 31 = getpeername */ + 3, getsockname, /* 32 = getsockname */ + 2, saccess, /* 33 = access */ + 2, chflags, /* 34 = chflags */ + 2, fchflags, /* 35 = fchflags */ + 0, sync, /* 36 = sync */ + 2, kill, /* 37 = kill */ + 2, stat, /* 38 = stat */ + 0, getppid, /* 39 = getppid */ + 2, lstat, /* 40 = lstat */ + 2, dup, /* 41 = dup */ + 0, pipe, /* 42 = pipe */ + 0, getegid, /* 43 = getegid */ + 4, profil, /* 44 = profil */ +#ifdef KTRACE + 4, ktrace, /* 45 = ktrace */ +#else + 0, nosys, /* 45 = ktrace */ +#endif + 3, sigaction, /* 46 = sigaction */ + 0, getgid, /* 47 = getgid */ + 2, sigprocmask, /* 48 = sigprocmask */ + 2, getlogin, /* 49 = getlogin */ + 1, setlogin, /* 50 = setlogin */ + 1, sysacct, /* 51 = acct */ + 0, sigpending, /* 52 = sigpending */ +#ifdef notyet + 3, sigaltstack, /* 53 = sigaltstack */ +#else + 0, nosys, /* 53 = sigaltstack */ +#endif + 3, ioctl, /* 54 = ioctl */ + 1, reboot, /* 55 = reboot */ + 1, revoke, /* 56 = revoke */ + 2, symlink, /* 57 = symlink */ + 3, readlink, /* 58 = readlink */ + 3, execve, /* 59 = execve */ + 1, umask, /* 60 = umask */ + 1, chroot, /* 61 = chroot */ + 2, fstat, /* 62 = fstat */ + 4, getkerninfo, /* 63 = getkerninfo */ + 0, getpagesize, /* 64 = getpagesize */ + 2, msync, /* 65 = msync */ + 0, vfork, /* 66 = vfork */ + 0, nosys, /* 67 = obsolete vread */ + 0, nosys, /* 68 = obsolete vwrite */ + 1, sbrk, /* 69 = sbrk */ + 1, sstk, /* 70 = sstk */ + 6, smmap, /* 71 = mmap */ + 1, ovadvise, /* 72 = vadvise */ + 2, munmap, /* 73 = munmap */ + 3, mprotect, /* 74 = mprotect */ + 3, madvise, /* 75 = madvise */ + 0, nosys, /* 76 = obsolete vhangup */ + 0, nosys, /* 77 = obsolete vlimit */ + 3, mincore, /* 78 = mincore */ + 2, getgroups, /* 79 = getgroups */ + 2, setgroups, /* 80 = setgroups */ + 0, getpgrp, /* 81 = getpgrp */ + 2, setpgid, /* 82 = setpgid */ + 3, setitimer, /* 83 = setitimer */ + compat(0,wait), /* 84 = old wait */ + 1, swapon, /* 85 = swapon */ + 2, getitimer, /* 86 = getitimer */ + 2, gethostname, /* 87 = gethostname */ + 2, sethostname, /* 88 = sethostname */ + 0, getdtablesize, /* 89 = getdtablesize */ + 2, dup2, /* 90 = dup2 */ + 0, nosys, /* 91 = getdopt */ + 3, fcntl, /* 92 = fcntl */ + 5, select, /* 93 = select */ + 0, nosys, /* 94 = setdopt */ + 1, fsync, /* 95 = fsync */ + 3, setpriority, /* 96 = setpriority */ + 3, socket, /* 97 = socket */ + 3, connect, /* 98 = connect */ + compat(3,accept), /* 99 = old accept */ + 2, getpriority, /* 100 = getpriority */ + compat(4,send), /* 101 = old send */ + compat(4,recv), /* 102 = old recv */ + 1, sigreturn, /* 103 = sigreturn */ + 3, bind, /* 104 = bind */ + 5, setsockopt, /* 105 = setsockopt */ + 2, listen, /* 106 = listen */ + 0, nosys, /* 107 = obsolete vtimes */ + compat(3,sigvec), /* 108 = old sigvec */ + compat(1,sigblock), /* 109 = old sigblock */ + compat(1,sigsetmask), /* 110 = old sigsetmask */ + 1, sigsuspend, /* 111 = sigsuspend */ + 2, sigstack, /* 112 = sigstack */ + compat(3,recvmsg), /* 113 = old recvmsg */ + compat(3,sendmsg), /* 114 = old sendmsg */ +#ifdef TRACE + 2, vtrace, /* 115 = vtrace */ +#else + 0, nosys, /* 115 = obsolete vtrace */ +#endif + 2, gettimeofday, /* 116 = gettimeofday */ + 2, getrusage, /* 117 = getrusage */ + 5, getsockopt, /* 118 = getsockopt */ +#ifdef vax + 1, resuba, /* 119 = resuba */ +#else + 0, nosys, /* 119 = nosys */ +#endif + 3, readv, /* 120 = readv */ + 3, writev, /* 121 = writev */ + 2, settimeofday, /* 122 = settimeofday */ + 3, fchown, /* 123 = fchown */ + 2, fchmod, /* 124 = fchmod */ + compat(6,recvfrom), /* 125 = old recvfrom */ + compat(2,setreuid), /* 126 = old setreuid */ + compat(2,setregid), /* 127 = old setregid */ + 2, rename, /* 128 = rename */ + 2, truncate, /* 129 = truncate */ + 2, ftruncate, /* 130 = ftruncate */ + 2, flock, /* 131 = flock */ + 2, mkfifo, /* 132 = mkfifo */ + 6, sendto, /* 133 = sendto */ + 2, shutdown, /* 134 = shutdown */ + 5, socketpair, /* 135 = socketpair */ + 2, mkdir, /* 136 = mkdir */ + 1, rmdir, /* 137 = rmdir */ + 2, utimes, /* 138 = utimes */ + 0, nosys, /* 139 = obsolete 4.2 sigreturn */ + 2, adjtime, /* 140 = adjtime */ + compat(3,getpeername), /* 141 = old getpeername */ + 0, gethostid, /* 142 = gethostid */ + 1, sethostid, /* 143 = sethostid */ + 2, getrlimit, /* 144 = getrlimit */ + 2, setrlimit, /* 145 = setrlimit */ + compat(2,killpg), /* 146 = old killpg */ + 0, setsid, /* 147 = setsid */ + 4, quotactl, /* 148 = quotactl */ + compat(4,quota), /* 149 = old quota */ + compat(3,getsockname), /* 150 = old getsockname */ + 0, nosys, /* 151 = nosys */ + 0, nosys, /* 152 = nosys */ + 0, nosys, /* 153 = nosys */ + 0, nosys, /* 154 = nosys */ +#ifdef NFS + 5, nfssvc, /* 155 = nfssvc */ +#else + 0, nosys, /* 155 = nosys */ +#endif + 4, getdirentries, /* 156 = getdirentries */ + 2, statfs, /* 157 = statfs */ + 2, fstatfs, /* 158 = fstatfs */ + 0, nosys, /* 159 = nosys */ +#ifdef NFS + 0, async_daemon, /* 160 = async_daemon */ + 2, getfh, /* 161 = getfh */ +#else + 0, nosys, /* 160 = nosys */ + 0, nosys, /* 161 = nosys */ +#endif + 2, getdomainname, /* 162 = getdomainname */ + 2, setdomainname, /* 163 = setdomainname */ + 1, uname, /* 164 = uname */ + 0, nosys, /* 165 = nosys */ + 0, nosys, /* 166 = nosys */ + 0, nosys, /* 167 = nosys */ + 0, nosys, /* 168 = nosys */ + 0, nosys, /* 169 = nosys */ + 0, nosys, /* 170 = nosys */ +#ifdef SYSVSHM + 4, shmsys, /* 171 = shmsys */ +#else + 0, nosys, /* 171 = nosys */ +#endif + 0, nosys, /* 172 = nosys */ + 0, nosys, /* 173 = nosys */ + 0, nosys, /* 174 = nosys */ + 0, nosys, /* 175 = nosys */ + 0, nosys, /* 176 = nosys */ + 0, nosys, /* 177 = nosys */ + 0, nosys, /* 178 = nosys */ + 0, nosys, /* 179 = nosys */ + 0, nosys, /* 180 = nosys */ + 1, setgid, /* 181 = setgid */ + 1, setegid, /* 182 = setegid */ + 1, seteuid, /* 183 = seteuid */ + 0, nosys, /* 184 = nosys */ + 0, nosys, /* 185 = nosys */ + 0, nosys, /* 186 = nosys */ + 0, nosys, /* 187 = nosys */ + 0, nosys, /* 188 = nosys */ + 0, nosys, /* 189 = nosys */ + 0, nosys, /* 190 = nosys */ +}; + +int nsysent = sizeof(sysent) / sizeof(sysent[0]); diff --git a/sys/kern/kern__physio.c b/sys/kern/kern__physio.c new file mode 100644 index 000000000000..1e08acd734d1 --- /dev/null +++ b/sys/kern/kern__physio.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This software is a component of "386BSD" developed by + William F. Jolitz, TeleMuse. + * 4. Neither the name of the developer nor the name "386BSD" + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ + * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS + * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT. + * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT + * NOT MAKE USE THIS WORK. + * + * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED + * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN + * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES + * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING + * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND + * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE + * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS + * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992. + * + * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: kern__physio.c,v 1.2 1993/10/16 15:24:06 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "buf.h" +#include "conf.h" +#include "proc.h" +#include "malloc.h" +#include "vnode.h" +#include "vm/vm.h" +#include "specdev.h" + +static physio(int (*)(), int, int, int, caddr_t, int *, struct proc *); + +/* + * Driver interface to do "raw" I/O in the address space of a + * user process directly for read and write operations.. + */ + +rawread(dev, uio) + dev_t dev; struct uio *uio; +{ + return (uioapply(physio, cdevsw[major(dev)].d_strategy, dev, uio)); +} + +rawwrite(dev, uio) + dev_t dev; struct uio *uio; +{ + return (uioapply(physio, cdevsw[major(dev)].d_strategy, dev, uio)); +} + +static physio(strat, dev, off, rw, base, len, p) + int (*strat)(); + dev_t dev; + int rw, off; + caddr_t base; + int *len; + struct proc *p; +{ + register struct buf *bp; + int amttodo = *len, error, amtdone; + vm_prot_t ftype; + static zero; + caddr_t adr; + + rw = rw == UIO_READ ? B_READ : 0; + + /* create and build a buffer header for a transfer */ + bp = (struct buf *)malloc(sizeof(*bp), M_TEMP, M_NOWAIT); + bzero((char *)bp, sizeof(*bp)); /* 09 Sep 92*/ + bp->b_flags = B_BUSY | B_PHYS | rw; + bp->b_proc = p; + bp->b_dev = dev; + bp->b_error = 0; + bp->b_blkno = off/DEV_BSIZE; + amtdone = 0; + + /* iteratively do I/O on as large a chunk as possible */ + do { + bp->b_flags &= ~B_DONE; + bp->b_un.b_addr = base; + /* XXX limit */ + bp->b_bcount = min (256*1024, amttodo); + + /* first, check if accessible */ + if (rw == B_READ && !useracc(base, bp->b_bcount, B_WRITE)) { + free(bp, M_TEMP); + return (EFAULT); + } + if (rw == B_WRITE && !useracc(base, bp->b_bcount, B_READ)) { + free(bp, M_TEMP); + return (EFAULT); + } + + /* update referenced and dirty bits, handle copy objects */ + if (rw == B_READ) + ftype = VM_PROT_READ | VM_PROT_WRITE; + else + ftype = VM_PROT_READ; +/* 09 Sep 92*/ for (adr = (caddr_t)trunc_page(base); adr < base + bp->b_bcount; + adr += NBPG) { + vm_fault(&curproc->p_vmspace->vm_map, + adr, ftype, FALSE); + *(int *) adr += zero; + } + + /* lock in core */ + vslock (base, bp->b_bcount); + + /* perform transfer */ + physstrat(bp, strat, PRIBIO); + + /* unlock */ + vsunlock (base, bp->b_bcount, 0); + amtdone = bp->b_bcount - bp->b_resid; + amttodo -= amtdone; + base += amtdone; + bp->b_blkno += amtdone/DEV_BSIZE; + } while (amttodo && (bp->b_flags & B_ERROR) == 0 && amtdone > 0); + + error = bp->b_error; + free(bp, M_TEMP); + *len = amttodo; + return (error); +} diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c new file mode 100644 index 000000000000..05cb3bfb4451 --- /dev/null +++ b/sys/kern/kern_acct.c @@ -0,0 +1,282 @@ +/* + * Copyright (c) 1982, 1986, 1989 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_acct.c 7.18 (Berkeley) 5/11/91 + * $Id: kern_acct.c,v 1.5 1993/10/19 05:46:05 davidg Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "namei.h" +#include "resourcevar.h" +#include "proc.h" +#include "ioctl.h" +#include "termios.h" +#include "tty.h" +#include "vnode.h" +#include "mount.h" +#include "kernel.h" +#include "file.h" +#include "acct.h" +#include "syslog.h" + +#include "vm/vm.h" +#include "vm/vm_param.h" + +/* + * Values associated with enabling and disabling accounting + */ +int acctsuspend = 2; /* stop accounting when < 2% free space left */ +int acctresume = 4; /* resume when free space risen to > 4% */ +struct timeval chk = { 15, 0 };/* frequency to check space for accounting */ +struct vnode *acctp = NULL; /* file to which to do accounting */ +struct vnode *savacctp = NULL; /* file to which to do accounting when space */ + +/* + * Enable or disable process accounting. + * + * If a non-null filename is given, that file is used to store accounting + * records on process exit. If a null filename is given process accounting + * is suspended. If accounting is enabled, the system checks the amount + * of freespace on the filesystem at timeval intervals. If the amount of + * freespace is below acctsuspend percent, accounting is suspended. If + * accounting has been suspended, and freespace rises above acctresume, + * accounting is resumed. + */ + +/* Mark Tinguely (tinguely@plains.NoDak.edu) 8/10/93 */ + +struct sysacct_args { + char *fname; +}; + +/* ARGSUSED */ +sysacct(p, uap, retval) + struct proc *p; + struct sysacct_args *uap; + int *retval; +{ + + register struct nameidata *ndp; + struct nameidata nd; + struct vattr attr; + int rv, acctwatch(); + + if (p->p_ucred->cr_uid != 0) + return(EPERM); /* must be root */ + + /* + * Step 1. turn off accounting (if on). exit if fname is nil + */ + + rv = 0; /* just in case nothing is open */ + if (acctp != NULL) { + rv = vn_close(acctp, FWRITE, p->p_ucred, p); + untimeout(acctwatch, (caddr_t) &chk); /* turn off disk check */ + acctp = NULL; + } + else if (savacctp != NULL ) { + rv = vn_close(savacctp, FWRITE, p->p_ucred, p); + untimeout(acctwatch, (caddr_t) &chk); /* turn off disk check */ + savacctp = NULL; + } + + if (uap->fname == NULL) /* accounting stopping complete */ + return(rv); + + /* + * Step 2. open accounting filename for writing. + */ + + nd.ni_segflg = UIO_USERSPACE; + nd.ni_dirp = uap->fname; + + /* is it there? */ + if (rv = vn_open(&nd, p, FWRITE, 0)) + return (rv); + + /* Step 2. Check the attributes on accounting file */ + rv = VOP_GETATTR(nd.ni_vp, &attr, p->p_ucred, p); + if (rv) + goto acct_fail; + + /* is filesystem writable, do I have permission to write and is + * a regular file? + */ + if (nd.ni_vp->v_mount->mnt_flag & MNT_RDONLY) { + rv = EROFS; /* to be consistant with man page */ + goto acct_fail; + } + + if ((VOP_ACCESS(nd.ni_vp, VWRITE, p->p_ucred, p)) || + (attr.va_type != VREG)) { + rv = EACCES; /* permission denied error */ + goto acct_fail; + } + + /* Step 3. Save the accounting file vnode, schedule freespace watch. */ + + acctp = nd.ni_vp; + savacctp = NULL; + acctwatch(&chk); /* look for full system */ + VOP_UNLOCK(acctp); + return(0); /* end successfully */ + +acct_fail: + + vn_close(nd.ni_vp, FWRITE, p->p_ucred, p); + return(rv); +} + +/* + * Periodically check the file system to see if accounting + * should be turned on or off. + */ +acctwatch(resettime) + struct timeval *resettime; +{ + struct statfs sb; + + if (savacctp) { + (void)VFS_STATFS(savacctp->v_mount, &sb, (struct proc *)0); + if (sb.f_bavail > acctresume * sb.f_blocks / 100) { + acctp = savacctp; + savacctp = NULL; + log(LOG_NOTICE, "Accounting resumed\n"); + return; + } + } + if (acctp == NULL) + return; + (void)VFS_STATFS(acctp->v_mount, &sb, (struct proc *)0); + if (sb.f_bavail <= acctsuspend * sb.f_blocks / 100) { + savacctp = acctp; + acctp = NULL; + log(LOG_NOTICE, "Accounting suspended\n"); + } + timeout(acctwatch, (caddr_t)resettime, hzto(resettime)); +} + +/* + * This routine calculates an accounting record for a process and, + * if accounting is enabled, writes it to the accounting file. + */ + +/* Mark Tinguely (tinguely@plains.NoDak.edu) 8/10/93 */ + +acct(p) + register struct proc *p; +{ + + struct acct acct; + struct rusage *r; + int rv; + long i; + u_int cnt; + char *c; + comp_t int2comp(); + + + if (acctp == NULL) /* accounting not turned on */ + return; + + /* Step 1. Get command name (remove path if necessary) */ + + strncpy(acct.ac_comm, p->p_comm, sizeof(acct.ac_comm)); + + /* Step 2. Get rest of information */ + + acct.ac_utime = int2comp((unsigned) p->p_utime.tv_sec * 1000000 + p->p_utime.tv_usec); + acct.ac_stime = int2comp((unsigned) p->p_stime.tv_sec * 1000000 + p->p_stime.tv_usec); + acct.ac_btime = p->p_stats->p_start.tv_sec; + /* elapse time = current - start */ + i = (time.tv_sec - p->p_stats->p_start.tv_sec) * 1000000 + + (time.tv_usec - p->p_stats->p_start.tv_usec); + acct.ac_etime = int2comp((unsigned) i); + + acct.ac_uid = p->p_cred->p_ruid; + acct.ac_gid = p->p_cred->p_rgid; + + r = &p->p_stats->p_ru; + if (i = (p->p_utime.tv_sec + p->p_stime.tv_sec) * hz + + (p->p_utime.tv_usec + p->p_stime.tv_usec) / tick) + acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / i; + else + acct.ac_mem = 0; + acct.ac_io = int2comp((unsigned) (r->ru_inblock + r->ru_oublock) * 1000000); + + if ((p->p_flag & SCTTY) && p->p_pgrp->pg_session->s_ttyp) + acct.ac_tty = p->p_pgrp->pg_session->s_ttyp->t_dev; + else + acct.ac_tty = NODEV; + acct.ac_flag = p->p_acflag; + + /* Step 3. Write record to file */ + + + rv = vn_rdwr(UIO_WRITE, acctp, (caddr_t) &acct, sizeof (acct), + (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, p->p_ucred, (int *) NULL, + p); +} + +/* int2comp converts from ticks in a microsecond to ticks in 1/AHZ second + * + * comp_t is a psuedo-floating point number with 13 bits of + * mantissa and 3 bits of base 8 exponent and has resolution + * of 1/AHZ seconds. + * + * notice I already converted the incoming values into microseconds + * I need to convert back into AHZ ticks. + */ + +/* Mark Tinguely (tinguely@plains.NoDak.edu) 8/10/93 */ + + +#define RES 13 +#define EXP 3 +#define MAXFRACT 1<<RES + +comp_t +int2comp(mantissa) +unsigned int mantissa; +{ + comp_t exp=0; + + mantissa = mantissa * AHZ / 1000000; /* convert back to AHZ ticks */ + while (mantissa > MAXFRACT) { + mantissa >>= EXP; /* base 8 exponent */ + exp++; + } + exp <<= RES; /* move the exponent */ + exp += mantissa; /* add on the manissa */ + return (exp); +} diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c new file mode 100644 index 000000000000..eca7041a3e35 --- /dev/null +++ b/sys/kern/kern_clock.c @@ -0,0 +1,475 @@ +/*- + * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_clock.c 7.16 (Berkeley) 5/9/91 + * $Id: kern_clock.c,v 1.6 1993/10/25 02:02:51 davidg Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "dkstat.h" +#include "callout.h" +#include "kernel.h" +#include "proc.h" +#include "resourcevar.h" + +#include "machine/cpu.h" + +#include "resource.h" +#include "vm/vm.h" + +#ifdef GPROF +#include "gprof.h" +#endif + +/* + * Clock handling routines. + * + * This code is written to operate with two timers which run + * independently of each other. The main clock, running at hz + * times per second, is used to do scheduling and timeout calculations. + * The second timer does resource utilization estimation statistically + * based on the state of the machine phz times a second. Both functions + * can be performed by a single clock (ie hz == phz), however the + * statistics will be much more prone to errors. Ideally a machine + * would have separate clocks measuring time spent in user state, system + * state, interrupt state, and idle state. These clocks would allow a non- + * approximate measure of resource utilization. + */ + +/* + * TODO: + * time of day, system/user timing, timeouts, profiling on separate timers + * allocate more timeout table slots when table overflows. + */ + +/* + * Bump a timeval by a small number of usec's. + */ +#define BUMPTIME(t, usec) { \ + register struct timeval *tp = (t); \ + \ + tp->tv_usec += (usec); \ + if (tp->tv_usec >= 1000000) { \ + tp->tv_usec -= 1000000; \ + tp->tv_sec++; \ + } \ +} + +/* + * The hz hardware interval timer. + * We update the events relating to real time. + * If this timer is also being used to gather statistics, + * we run through the statistics gathering routine as well. + */ +hardclock(frame) + clockframe frame; +{ + register struct callout *p1; + register struct proc *p = curproc; + register struct pstats *pstats; + register struct rusage *ru; + register struct vmspace *vm; + register int s; + int needsoft = 0; + extern int tickdelta; + extern long timedelta; + + /* + * Update real-time timeout queue. + * At front of queue are some number of events which are ``due''. + * The time to these is <= 0 and if negative represents the + * number of ticks which have passed since it was supposed to happen. + * The rest of the q elements (times > 0) are events yet to happen, + * where the time for each is given as a delta from the previous. + * Decrementing just the first of these serves to decrement the time + * to all events. + */ + p1 = calltodo.c_next; + while (p1) { + if (--p1->c_time > 0) + break; + needsoft = 1; + if (p1->c_time == 0) + break; + p1 = p1->c_next; + } + + /* + * Curproc (now in p) is null if no process is running. + * We assume that curproc is set in user mode! + */ + if (p) + pstats = p->p_stats; + /* + * Charge the time out based on the mode the cpu is in. + * Here again we fudge for the lack of proper interval timers + * assuming that the current state has been around at least + * one tick. + */ + if (CLKF_USERMODE(&frame)) { + if (pstats->p_prof.pr_scale) + needsoft = 1; + /* + * CPU was in user state. Increment + * user time counter, and process process-virtual time + * interval timer. + */ + BUMPTIME(&p->p_utime, tick); + if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) + psignal(p, SIGVTALRM); + } else { + /* + * CPU was in system state. + */ + if (p) + BUMPTIME(&p->p_stime, tick); + } + + /* bump the resource usage of integral space use */ + if (p && pstats && (ru = &pstats->p_ru) && (vm = p->p_vmspace)) { + ru->ru_ixrss += vm->vm_tsize * NBPG / 1024; + ru->ru_idrss += vm->vm_dsize * NBPG / 1024; + ru->ru_isrss += vm->vm_ssize * NBPG / 1024; + if ((vm->vm_pmap.pm_stats.resident_count * NBPG / 1024) > + ru->ru_maxrss) { + ru->ru_maxrss = + vm->vm_pmap.pm_stats.resident_count * NBPG / 1024; + } + } + + /* + * If the cpu is currently scheduled to a process, then + * charge it with resource utilization for a tick, updating + * statistics which run in (user+system) virtual time, + * such as the cpu time limit and profiling timers. + * This assumes that the current process has been running + * the entire last tick. + */ + if (p) { + if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > + p->p_rlimit[RLIMIT_CPU].rlim_cur) { + psignal(p, SIGXCPU); + if (p->p_rlimit[RLIMIT_CPU].rlim_cur < + p->p_rlimit[RLIMIT_CPU].rlim_max) + p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; + } + if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) + psignal(p, SIGPROF); + + /* + * We adjust the priority of the current process. + * The priority of a process gets worse as it accumulates + * CPU time. The cpu usage estimator (p_cpu) is increased here + * and the formula for computing priorities (in kern_synch.c) + * will compute a different value each time the p_cpu increases + * by 4. The cpu usage estimator ramps up quite quickly when + * the process is running (linearly), and decays away + * exponentially, * at a rate which is proportionally slower + * when the system is busy. The basic principal is that the + * system will 90% forget that a process used a lot of CPU + * time in 5*loadav seconds. This causes the system to favor + * processes which haven't run much recently, and to + * round-robin among other processes. + */ + p->p_cpticks++; + if (++p->p_cpu == 0) + p->p_cpu--; + if ((p->p_cpu&3) == 0) { + setpri(p); + if (p->p_pri >= PUSER) + p->p_pri = p->p_usrpri; + } + } + + /* + * If the alternate clock has not made itself known then + * we must gather the statistics. + */ + if (phz == 0) + gatherstats(&frame); + + /* + * Increment the time-of-day, and schedule + * processing of the callouts at a very low cpu priority, + * so we don't keep the relatively high clock interrupt + * priority any longer than necessary. + */ + if (timedelta == 0) + BUMPTIME(&time, tick) + else { + register delta; + + if (timedelta < 0) { + delta = tick - tickdelta; + timedelta += tickdelta; + } else { + delta = tick + tickdelta; + timedelta -= tickdelta; + } + BUMPTIME(&time, delta); + } +#ifdef DCFCLK + /* + * This is lousy, but until I can get the $&^%&^(!!! signal onto one + * of the interrupt's I'll have to poll it. No, it will not work if + * you attempt -DHZ=1000, things break. + * But keep the NDCFCLK low, to avoid waste of cycles... + * phk@data.fls.dk + */ + dcfclk_worker(); +#endif + if (needsoft) { +#if 0 +/* + * XXX - hardclock runs at splhigh, so the splsoftclock is useless and + * softclock runs at splhigh as well if we do this. It is not much of + * an optimization, since the "software interrupt" is done with a call + * from doreti, and the overhead of checking there is sometimes less + * than checking here. Moreover, the whole %$$%$^ frame is passed by + * value here. + */ + if (CLKF_BASEPRI(&frame)) { + /* + * Save the overhead of a software interrupt; + * it will happen as soon as we return, so do it now. + */ + (void) splsoftclock(); + softclock(frame); + } else +#endif + setsoftclock(); + } +} + +int dk_ndrive = DK_NDRIVE; +/* + * Gather statistics on resource utilization. + * + * We make a gross assumption: that the system has been in the + * state it is in (user state, kernel state, interrupt state, + * or idle state) for the entire last time interval, and + * update statistics accordingly. + */ +gatherstats(framep) + clockframe *framep; +{ + register int cpstate, s; + + /* + * Determine what state the cpu is in. + */ + if (CLKF_USERMODE(framep)) { + /* + * CPU was in user state. + */ + if (curproc->p_nice > NZERO) + cpstate = CP_NICE; + else + cpstate = CP_USER; + } else { + /* + * CPU was in system state. If profiling kernel + * increment a counter. If no process is running + * then this is a system tick if we were running + * at a non-zero IPL (in a driver). If a process is running, + * then we charge it with system time even if we were + * at a non-zero IPL, since the system often runs + * this way during processing of system calls. + * This is approximate, but the lack of true interval + * timers makes doing anything else difficult. + */ + cpstate = CP_SYS; + if (curproc == NULL && CLKF_BASEPRI(framep)) + cpstate = CP_IDLE; +#ifdef GPROF + s = (u_long) CLKF_PC(framep) - (u_long) s_lowpc; + if (profiling < 2 && s < s_textsize) + kcount[s / (HISTFRACTION * sizeof (*kcount))]++; +#endif + } + /* + * We maintain statistics shown by user-level statistics + * programs: the amount of time in each cpu state, and + * the amount of time each of DK_NDRIVE ``drives'' is busy. + */ + cp_time[cpstate]++; + for (s = 0; s < DK_NDRIVE; s++) + if (dk_busy&(1<<s)) + dk_time[s]++; +} + +/* + * Software priority level clock interrupt. + * Run periodic events from timeout queue. + */ +/*ARGSUSED*/ +softclock(frame) + clockframe frame; +{ + + for (;;) { + register struct callout *p1; + register caddr_t arg; + register int (*func)(); + register int a, s; + + s = splhigh(); + if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { + splx(s); + break; + } + arg = p1->c_arg; func = p1->c_func; a = p1->c_time; + calltodo.c_next = p1->c_next; + p1->c_next = callfree; + callfree = p1; + splx(s); + (*func)(arg, a); + } + + /* + * If no process to work with, we're finished. + */ + if (curproc == 0) return; + + /* + * If trapped user-mode and profiling, give it + * a profiling tick. + */ + if (CLKF_USERMODE(&frame)) { + register struct proc *p = curproc; + + if (p->p_stats->p_prof.pr_scale) + profile_tick(p, &frame); + /* + * Check to see if process has accumulated + * more than 10 minutes of user time. If so + * reduce priority to give others a chance. + */ + if (p->p_ucred->cr_uid && p->p_nice == NZERO && + p->p_utime.tv_sec > 10 * 60) { + p->p_nice = NZERO + 4; + setpri(p); + p->p_pri = p->p_usrpri; + } + } +} + +/* + * Arrange that (*func)(arg) is called in t/hz seconds. + */ +timeout(func, arg, t) + int (*func)(); + caddr_t arg; + register int t; +{ + register struct callout *p1, *p2, *pnew; + register int s = splhigh(); + + if (t <= 0) + t = 1; + pnew = callfree; + if (pnew == NULL) + panic("timeout table overflow"); + callfree = pnew->c_next; + pnew->c_arg = arg; + pnew->c_func = func; + for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) + if (p2->c_time > 0) + t -= p2->c_time; + p1->c_next = pnew; + pnew->c_next = p2; + pnew->c_time = t; + if (p2) + p2->c_time -= t; + splx(s); +} + +/* + * untimeout is called to remove a function timeout call + * from the callout structure. + */ +untimeout(func, arg) + int (*func)(); + caddr_t arg; +{ + register struct callout *p1, *p2; + register int s; + + s = splhigh(); + for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { + if (p2->c_func == func && p2->c_arg == arg) { + if (p2->c_next && p2->c_time > 0) + p2->c_next->c_time += p2->c_time; + p1->c_next = p2->c_next; + p2->c_next = callfree; + callfree = p2; + break; + } + } + splx(s); +} + +/* + * Compute number of hz until specified time. + * Used to compute third argument to timeout() from an + * absolute time. + */ +hzto(tv) + struct timeval *tv; +{ + register long ticks; + register long sec; + int s = splhigh(); + + /* + * If number of milliseconds will fit in 32 bit arithmetic, + * then compute number of milliseconds to time and scale to + * ticks. Otherwise just compute number of hz in time, rounding + * times greater than representible to maximum value. + * + * Delta times less than 25 days can be computed ``exactly''. + * Maximum value for any timeout in 10ms ticks is 250 days. + */ + sec = tv->tv_sec - time.tv_sec; + if (sec <= 0x7fffffff / 1000 - 1000) + ticks = ((tv->tv_sec - time.tv_sec) * 1000 + + (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); + else if (sec <= 0x7fffffff / hz) + ticks = sec * hz; + else + ticks = 0x7fffffff; + splx(s); + return (ticks); +} diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c new file mode 100644 index 000000000000..f8987ae8611a --- /dev/null +++ b/sys/kern/kern_descrip.c @@ -0,0 +1,814 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_descrip.c 7.28 (Berkeley) 6/25/91 + * $Id: kern_descrip.c,v 1.4 1993/10/16 15:24:11 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "filedesc.h" +#include "kernel.h" +#include "vnode.h" +#include "proc.h" +#include "file.h" +#include "socket.h" +#include "socketvar.h" +#include "stat.h" +#include "ioctl.h" +#include "fcntl.h" +#include "malloc.h" +#include "syslog.h" +#include "resourcevar.h" + +/* + * Descriptor management. + */ +struct file *filehead; /* head of list of open files */ +int nfiles; /* actual number of open files */ +extern int maxfdescs; /* maximum number of file descriptors to a process */ + +/* + * System calls on descriptors. + */ +/* ARGSUSED */ +getdtablesize(p, uap, retval) + struct proc *p; + struct args *uap; + int *retval; +{ + + *retval = p->p_rlimit[RLIMIT_OFILE].rlim_cur; + return (0); +} + +/* + * Duplicate a file descriptor. + */ + +struct dup_args { + int i; +}; + +/* ARGSUSED */ +dup(p, uap, retval) + struct proc *p; + struct dup_args *uap; + int *retval; +{ + register struct filedesc *fdp = p->p_fd; + struct file *fp; + int fd, error; + + /* + * XXX Compatibility + */ + if (uap->i &~ 077) { uap->i &= 077; return (dup2(p, uap, retval)); } + + if ((unsigned)uap->i >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->i]) == NULL) + return (EBADF); + if (error = fdalloc(p, 0, &fd)) + return (error); + fdp->fd_ofiles[fd] = fp; + fdp->fd_ofileflags[fd] = fdp->fd_ofileflags[uap->i] &~ UF_EXCLOSE; + fp->f_count++; + if (fd > fdp->fd_lastfile) + fdp->fd_lastfile = fd; + *retval = fd; + return (0); +} + +/* + * Duplicate a file descriptor to a particular value. + */ + +struct dup2_args { + u_int from; + u_int to; +}; + +/* ARGSUSED */ +dup2(p, uap, retval) + struct proc *p; + struct dup2_args *uap; + int *retval; +{ + register struct filedesc *fdp = p->p_fd; + register struct file *fp; + register u_int old = uap->from, new = uap->to; + int i, error; + + if (old >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[old]) == NULL || + new >= p->p_rlimit[RLIMIT_OFILE].rlim_cur || + new >= maxfdescs) + return (EBADF); + *retval = new; + if (old == new) + return (0); + if (new >= fdp->fd_nfiles) { + if (error = fdalloc(p, new, &i)) + return (error); + if (new != i) + panic("dup2: fdalloc"); + } else if (fdp->fd_ofiles[new]) { + if (fdp->fd_ofileflags[new] & UF_MAPPED) + (void) munmapfd(p, new); + /* + * dup2() must succeed even if the close has an error. + */ + (void) closef(fdp->fd_ofiles[new], p); + } + fdp->fd_ofiles[new] = fp; + fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; + fp->f_count++; + if (new > fdp->fd_lastfile) + fdp->fd_lastfile = new; + return (0); +} + +/* + * The file control system call. + */ + +struct fcntl_args { + int fd; + int cmd; + int arg; +}; + +/* ARGSUSED */ +fcntl(p, uap, retval) + struct proc *p; + register struct fcntl_args *uap; + int *retval; +{ + register struct filedesc *fdp = p->p_fd; + register struct file *fp; + register char *pop; + struct vnode *vp; + int i, tmp, error, flg = F_POSIX; + struct flock fl; + + if ((unsigned)uap->fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->fd]) == NULL) + return (EBADF); + pop = &fdp->fd_ofileflags[uap->fd]; + switch(uap->cmd) { + case F_DUPFD: + if ((unsigned)uap->arg >= p->p_rlimit[RLIMIT_OFILE].rlim_cur || + ((unsigned)uap->arg >= maxfdescs)) + return (EINVAL); + if (error = fdalloc(p, uap->arg, &i)) + return (error); + fdp->fd_ofiles[i] = fp; + fdp->fd_ofileflags[i] = *pop &~ UF_EXCLOSE; + fp->f_count++; + if (i > fdp->fd_lastfile) + fdp->fd_lastfile = i; + *retval = i; + return (0); + + case F_GETFD: + *retval = *pop & 1; + return (0); + + case F_SETFD: + *pop = (*pop &~ 1) | (uap->arg & 1); + return (0); + + case F_GETFL: + *retval = OFLAGS(fp->f_flag); + return (0); + + case F_SETFL: + fp->f_flag &= ~FCNTLFLAGS; + fp->f_flag |= FFLAGS(uap->arg) & FCNTLFLAGS; + tmp = fp->f_flag & FNONBLOCK; + error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); + if (error) + return (error); + tmp = fp->f_flag & FASYNC; + error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); + if (!error) + return (0); + fp->f_flag &= ~FNONBLOCK; + tmp = 0; + (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); + return (error); + + case F_GETOWN: + if (fp->f_type == DTYPE_SOCKET) { + *retval = ((struct socket *)fp->f_data)->so_pgid; + return (0); + } + error = (*fp->f_ops->fo_ioctl) + (fp, (int)TIOCGPGRP, (caddr_t)retval, p); + *retval = -*retval; + return (error); + + case F_SETOWN: + if (fp->f_type == DTYPE_SOCKET) { + ((struct socket *)fp->f_data)->so_pgid = uap->arg; + return (0); + } + if (uap->arg <= 0) { + uap->arg = -uap->arg; + } else { + struct proc *p1 = pfind(uap->arg); + if (p1 == 0) + return (ESRCH); + uap->arg = p1->p_pgrp->pg_id; + } + return ((*fp->f_ops->fo_ioctl) + (fp, (int)TIOCSPGRP, (caddr_t)&uap->arg, p)); + + case F_SETLKW: + flg |= F_WAIT; + /* Fall into F_SETLK */ + + case F_SETLK: + if (fp->f_type != DTYPE_VNODE) + return (EBADF); + vp = (struct vnode *)fp->f_data; + /* Copy in the lock structure */ + error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl)); + if (error) + return (error); + if (fl.l_whence == SEEK_CUR) + fl.l_start += fp->f_offset; + switch (fl.l_type) { + + case F_RDLCK: + if ((fp->f_flag & FREAD) == 0) + return (EBADF); + p->p_flag |= SADVLCK; + return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg)); + + case F_WRLCK: + if ((fp->f_flag & FWRITE) == 0) + return (EBADF); + p->p_flag |= SADVLCK; + return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg)); + + case F_UNLCK: + return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl, + F_POSIX)); + + default: + return (EINVAL); + } + + case F_GETLK: + if (fp->f_type != DTYPE_VNODE) + return (EBADF); + vp = (struct vnode *)fp->f_data; + /* Copy in the lock structure */ + error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl)); + if (error) + return (error); + if (fl.l_whence == SEEK_CUR) + fl.l_start += fp->f_offset; + if (error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX)) + return (error); + return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl))); + + default: + return (EINVAL); + } + /* NOTREACHED */ +} + +/* + * Close a file descriptor. + */ +/* ARGSUSED */ +struct close_args { + int fd; +}; + +close(p, uap, retval) + struct proc *p; + struct close_args *uap; + int *retval; +{ + register struct filedesc *fdp = p->p_fd; + register struct file *fp; + register int fd = uap->fd; + register u_char *pf; + + if ((unsigned)fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) + return (EBADF); + pf = (u_char *)&fdp->fd_ofileflags[fd]; + if (*pf & UF_MAPPED) + (void) munmapfd(p, fd); + fdp->fd_ofiles[fd] = NULL; + while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) + fdp->fd_lastfile--; + if (fd < fdp->fd_freefile) + fdp->fd_freefile = fd; + *pf = 0; + return (closef(fp, p)); +} + +/* + * Return status information about a file descriptor. + */ + +struct fstat_args { + int fd; + struct stat *sb; +}; + +/* ARGSUSED */ +fstat(p, uap, retval) + struct proc *p; + register struct fstat_args *uap; + int *retval; +{ + register struct filedesc *fdp = p->p_fd; + register struct file *fp; + struct stat ub; + int error; + + if ((unsigned)uap->fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->fd]) == NULL) + return (EBADF); + switch (fp->f_type) { + + case DTYPE_VNODE: + error = vn_stat((struct vnode *)fp->f_data, &ub, p); + break; + + case DTYPE_SOCKET: + error = soo_stat((struct socket *)fp->f_data, &ub); + break; + + default: + panic("fstat"); + /*NOTREACHED*/ + } + if (error == 0) + error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub)); + return (error); +} + +/* + * Allocate a file descriptor for the process. + */ +int fdexpand; + +fdalloc(p, want, result) + struct proc *p; + int want; + int *result; +{ + register struct filedesc *fdp = p->p_fd; + register int i; + int lim, last, nfiles; + struct file **newofile; + char *newofileflags; + + /* + * Search for a free descriptor starting at the higher + * of want or fd_freefile. If that fails, consider + * expanding the ofile array. + */ + lim = p->p_rlimit[RLIMIT_OFILE].rlim_cur; + for (;;) { + last = min(fdp->fd_nfiles, lim); + if ((i = want) < fdp->fd_freefile) + i = fdp->fd_freefile; + for (; i < last; i++) { + if (fdp->fd_ofiles[i] == NULL) { + fdp->fd_ofileflags[i] = 0; + if (i > fdp->fd_lastfile) + fdp->fd_lastfile = i; + if (want <= fdp->fd_freefile) + fdp->fd_freefile = i; + *result = i; + return (0); + } + } + + /* + * No space in current array. Expand? + */ + if (fdp->fd_nfiles >= lim) + return (EMFILE); + if (fdp->fd_nfiles < NDEXTENT) + nfiles = NDEXTENT; + else + nfiles = 2 * fdp->fd_nfiles; + MALLOC(newofile, struct file **, nfiles * OFILESIZE, + M_FILEDESC, M_WAITOK); + newofileflags = (char *) &newofile[nfiles]; + /* + * Copy the existing ofile and ofileflags arrays + * and zero the new portion of each array. + */ + bcopy(fdp->fd_ofiles, newofile, + (i = sizeof(struct file *) * fdp->fd_nfiles)); + bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i); + bcopy(fdp->fd_ofileflags, newofileflags, + (i = sizeof(char) * fdp->fd_nfiles)); + bzero(newofileflags + i, nfiles * sizeof(char) - i); + if (fdp->fd_nfiles > NDFILE) + FREE(fdp->fd_ofiles, M_FILEDESC); + fdp->fd_ofiles = newofile; + fdp->fd_ofileflags = newofileflags; + fdp->fd_nfiles = nfiles; + fdexpand++; + } +} + +/* + * Check to see whether n user file descriptors + * are available to the process p. + */ +fdavail(p, n) + struct proc *p; + register int n; +{ + register struct filedesc *fdp = p->p_fd; + register struct file **fpp; + register int i; + + if ((i = p->p_rlimit[RLIMIT_OFILE].rlim_cur - fdp->fd_nfiles) > 0 && + (n -= i) <= 0) + return (1); + fpp = &fdp->fd_ofiles[fdp->fd_freefile]; + for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++) + if (*fpp == NULL && --n <= 0) + return (1); + return (0); +} + +/* + * Create a new open file structure and allocate + * a file decriptor for the process that refers to it. + */ +falloc(p, resultfp, resultfd) + register struct proc *p; + struct file **resultfp; + int *resultfd; +{ + register struct file *fp, *fq, **fpp; + int error, i; + + if (error = fdalloc(p, 0, &i)) + return (error); + if (nfiles >= maxfiles) { + tablefull("file"); + return (ENFILE); + } + /* + * Allocate a new file descriptor. + * If the process has file descriptor zero open, add to the list + * of open files at that point, otherwise put it at the front of + * the list of open files. + */ + nfiles++; + MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK); + if (fq = p->p_fd->fd_ofiles[0]) + fpp = &fq->f_filef; + else + fpp = &filehead; + p->p_fd->fd_ofiles[i] = fp; + if (fq = *fpp) + fq->f_fileb = &fp->f_filef; + fp->f_filef = fq; + fp->f_fileb = fpp; + *fpp = fp; + fp->f_count = 1; + fp->f_msgcount = 0; + fp->f_offset = 0; + fp->f_cred = p->p_ucred; + crhold(fp->f_cred); + if (resultfp) + *resultfp = fp; + if (resultfd) + *resultfd = i; + return (0); +} + +/* + * Free a file descriptor. + */ +ffree(fp) + register struct file *fp; +{ + register struct file *fq; + + if (fq = fp->f_filef) + fq->f_fileb = fp->f_fileb; + *fp->f_fileb = fq; + crfree(fp->f_cred); +#ifdef DIAGNOSTIC + fp->f_filef = NULL; + fp->f_fileb = NULL; + fp->f_count = 0; +#endif + nfiles--; + FREE(fp, M_FILE); +} + +/* + * Copy a filedesc structure. + */ +struct filedesc * +fdcopy(p) + struct proc *p; +{ + register struct filedesc *newfdp, *fdp = p->p_fd; + register struct file **fpp; + register int i; + + MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0), + M_FILEDESC, M_WAITOK); + bcopy(fdp, newfdp, sizeof(struct filedesc)); + VREF(newfdp->fd_cdir); + if (newfdp->fd_rdir) + VREF(newfdp->fd_rdir); + newfdp->fd_refcnt = 1; + + /* + * If the number of open files fits in the internal arrays + * of the open file structure, use them, otherwise allocate + * additional memory for the number of descriptors currently + * in use. + */ + if (newfdp->fd_lastfile < NDFILE) { + newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; + newfdp->fd_ofileflags = + ((struct filedesc0 *) newfdp)->fd_dfileflags; + i = NDFILE; + } else { + /* + * Compute the smallest multiple of NDEXTENT needed + * for the file descriptors currently in use, + * allowing the table to shrink. + */ + i = newfdp->fd_nfiles; + while (i > 2 * NDEXTENT && i >= newfdp->fd_lastfile * 2) + i /= 2; + MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE, + M_FILEDESC, M_WAITOK); + newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; + } + newfdp->fd_nfiles = i; + bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **)); + bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char)); + fpp = newfdp->fd_ofiles; + for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) + if (*fpp != NULL) + (*fpp)->f_count++; + return (newfdp); +} + +/* + * Release a filedesc structure. + */ +void +fdfree(p) + struct proc *p; +{ + register struct filedesc *fdp = p->p_fd; + struct file **fpp; + char *fdfp; + register int i; + + if (--fdp->fd_refcnt > 0) + return; + fpp = fdp->fd_ofiles; + fdfp = fdp->fd_ofileflags; + for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) + if (*fpp != NULL) { + if (*fdfp & UF_MAPPED) + (void) munmapfd(p, i); + (void) closef(*fpp, p); + } + if (fdp->fd_nfiles > NDFILE) + FREE(fdp->fd_ofiles, M_FILEDESC); + vrele(fdp->fd_cdir); + if (fdp->fd_rdir) + vrele(fdp->fd_rdir); + FREE(fdp, M_FILEDESC); +} + +/* + * Close any files on exec? + */ +void +fdcloseexec(p) + struct proc *p; +{ + struct filedesc *fdp = p->p_fd; + struct file **fpp; + char *fdfp; + register int i; + + fpp = fdp->fd_ofiles; + fdfp = fdp->fd_ofileflags; + for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) + if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) { + if (*fdfp & UF_MAPPED) + (void) munmapfd(p, i); + (void) closef(*fpp, p); + *fpp = NULL; + *fdfp = 0; + if (i < fdp->fd_freefile) + fdp->fd_freefile = i; + } + while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) + fdp->fd_lastfile--; +} + +/* + * Internal form of close. + * Decrement reference count on file structure. + */ +closef(fp, p) + register struct file *fp; + register struct proc *p; +{ + struct vnode *vp; + struct flock lf; + int error; + + if (fp == NULL) + return (0); + /* + * POSIX record locking dictates that any close releases ALL + * locks owned by this process. This is handled by setting + * a flag in the unlock to free ONLY locks obeying POSIX + * semantics, and not to free BSD-style file locks. + */ + if ((p->p_flag & SADVLCK) && fp->f_type == DTYPE_VNODE) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_UNLCK; + vp = (struct vnode *)fp->f_data; + (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX); + } + if (--fp->f_count > 0) + return (0); + if (fp->f_count < 0) + panic("closef: count < 0"); + if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_UNLCK; + vp = (struct vnode *)fp->f_data; + (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); + } + error = (*fp->f_ops->fo_close)(fp, p); + ffree(fp); + return (error); +} + +/* + * Apply an advisory lock on a file descriptor. + * + * Just attempt to get a record lock of the requested type on + * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). + */ + +struct flock_args { + int fd; + int how; +}; + +/* ARGSUSED */ +flock(p, uap, retval) + struct proc *p; + register struct flock_args *uap; + int *retval; +{ + register struct filedesc *fdp = p->p_fd; + register struct file *fp; + struct vnode *vp; + struct flock lf; + int error; + + if ((unsigned)uap->fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->fd]) == NULL) + return (EBADF); + if (fp->f_type != DTYPE_VNODE) + return (EOPNOTSUPP); + vp = (struct vnode *)fp->f_data; + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + if (uap->how & LOCK_UN) { + lf.l_type = F_UNLCK; + fp->f_flag &= ~FHASLOCK; + return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK)); + } + if (uap->how & LOCK_EX) + lf.l_type = F_WRLCK; + else if (uap->how & LOCK_SH) + lf.l_type = F_RDLCK; + else + return (EBADF); + fp->f_flag |= FHASLOCK; + if (uap->how & LOCK_NB) + return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK)); + return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT)); +} + +/* + * File Descriptor pseudo-device driver (/dev/fd/). + * + * Opening minor device N dup()s the file (if any) connected to file + * descriptor N belonging to the calling process. Note that this driver + * consists of only the ``open()'' routine, because all subsequent + * references to this file will be direct to the other driver. + */ +/* ARGSUSED */ +fdopen(dev, mode, type) + dev_t dev; + int mode, type; +{ + + /* + * XXX Kludge: set curproc->p_dupfd to contain the value of the + * the file descriptor being sought for duplication. The error + * return ensures that the vnode for this device will be released + * by vn_open. Open will detect this special error and take the + * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN + * will simply report the error. + */ + curproc->p_dupfd = minor(dev); /* XXX */ + return (ENODEV); +} + +/* + * Duplicate the specified descriptor to a free descriptor. + */ +dupfdopen(fdp, indx, dfd, mode) + register struct filedesc *fdp; + register int indx, dfd; + int mode; +{ + register struct file *wfp; + struct file *fp; + + /* + * If the to-be-dup'd fd number is greater than the allowed number + * of file descriptors, or the fd to be dup'd has already been + * closed, reject. Note, check for new == old is necessary as + * falloc could allocate an already closed to-be-dup'd descriptor + * as the new descriptor. + */ + fp = fdp->fd_ofiles[indx]; + if ((u_int)dfd >= fdp->fd_nfiles || + (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp) + return (EBADF); + + /* + * Check that the mode the file is being opened for is a subset + * of the mode of the existing descriptor. + */ + if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) + return (EACCES); + fdp->fd_ofiles[indx] = wfp; + fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; + wfp->f_count++; + if (indx > fdp->fd_lastfile) + fdp->fd_lastfile = indx; + return (0); +} diff --git a/sys/kern/kern_execve.c b/sys/kern/kern_execve.c new file mode 100644 index 000000000000..003b2f768b07 --- /dev/null +++ b/sys/kern/kern_execve.c @@ -0,0 +1,559 @@ +/* + * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This software is a component of "386BSD" developed by + * William F. Jolitz, TeleMuse. + * 4. Neither the name of the developer nor the name "386BSD" + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ + * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS + * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT. + * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT + * NOT MAKE USE OF THIS WORK. + * + * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED + * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN + * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES + * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING + * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND + * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE + * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS + * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992. + * + * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This procedure implements a minimal program execution facility for + * 386BSD. It interfaces to the BSD kernel as the execve system call. + * Significant limitations and lack of compatiblity with POSIX are + * present with this version, to make its basic operation more clear. + * + * $Id: kern_execve.c,v 1.8 1993/10/25 17:26:01 davidg Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "signalvar.h" +#include "resourcevar.h" +#include "proc.h" +#include "mount.h" +#include "namei.h" +#include "vnode.h" +#include "file.h" +#include "acct.h" +#include "exec.h" +#include "stat.h" +#include "wait.h" +#include "mman.h" +#include "malloc.h" + +#include "vm/vm.h" +#include "vm/vm_param.h" +#include "vm/vm_map.h" +#include "vm/vm_kern.h" + +#include "machine/reg.h" + +extern int dostacklimits; +#define copyinoutstr copyinstr + +/* + * execve() system call. + */ + +struct execve_args { + char *fname; + char **argp; + char **envp; +}; + +/* ARGSUSED */ +execve(p, uap, retval) + struct proc *p; + register struct execve_args *uap; + int *retval; +{ + register struct nameidata *ndp; + struct nameidata nd; + char **argbuf, **argbufp, *stringbuf, *stringbufp; + char **vectp, *ep; + int needsenv, limitonargs, stringlen, addr, size, len, + rv, amt, argc, tsize, dsize, bsize, cnt, file_offset, + virtual_offset; + struct vattr attr; + struct vmspace *vs; + caddr_t newframe; + char shellname[MAXINTERP]; /* 05 Aug 92*/ + char *shellargs; + union { + char ex_shell[MAXINTERP]; /* #! and interpreter name */ + struct exec ex_hdr; + } exdata; + int indir = 0; + + /* + * Step 1. Lookup filename to see if we have something to execute. + */ + ndp = &nd; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + +again: /* 05 Aug 92*/ + ndp->ni_nameiop = LOOKUP | LOCKLEAF | FOLLOW | SAVENAME; + + /* is it there? */ + if (rv = namei(ndp, p)) + return (rv); + + if (ndp->ni_vp->v_writecount) { /* don't exec if file is busy */ + rv = EBUSY; + goto exec_fail; + } + /* does it have any attributes? */ + rv = VOP_GETATTR(ndp->ni_vp, &attr, p->p_ucred, p); + if (rv) + goto exec_fail; + + if (ndp->ni_vp->v_mount->mnt_flag & MNT_NOEXEC) { /* no exec on fs ?*/ + rv = EACCES; + goto exec_fail; + } + + /* is it executable, and a regular file? */ + if ((ndp->ni_vp->v_mount->mnt_flag & MNT_NOEXEC) || /* 29 Jul 92*/ + (VOP_ACCESS(ndp->ni_vp, VEXEC, p->p_ucred, p)) || + ((attr.va_mode & 0111) == 0) || + (attr.va_type != VREG)) { + rv = EACCES; + goto exec_fail; + } + + /* + * Step 2. Does the file contain a format we can + * understand and execute + * + * XXX 05 Aug 92 + * Read in first few bytes of file for segment sizes, magic number: + * ZMAGIC = demand paged RO text + * Also an ASCII line beginning with #! is + * the file name of a ``shell'' and arguments may be prepended + * to the argument list if given here. + */ + exdata.ex_shell[0] = '\0'; /* for zero length files */ + + rv = vn_rdwr(UIO_READ, ndp->ni_vp, (caddr_t)&exdata, sizeof(exdata), + 0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &amt, p); + + /* big enough to hold a header? */ + if (rv) + goto exec_fail; + + if (exdata.ex_hdr.a_text != 0 && (ndp->ni_vp->v_flag & VTEXT) == 0 && + ndp->ni_vp->v_writecount != 0) { + rv = ETXTBSY; + goto exec_fail; + } + +#define SHELLMAGIC 0x2123 /* #! */ + + switch (exdata.ex_hdr.a_magic) { + case ZMAGIC: + virtual_offset = 0; + if (exdata.ex_hdr.a_text) { + file_offset = NBPG; + } else { + /* Bill's "screwball mode" */ + file_offset = 0; + } + break; + case QMAGIC: + virtual_offset = NBPG; + file_offset = 0; + break; + default: + if ((exdata.ex_hdr.a_magic & 0xffff) != SHELLMAGIC) { + /* NetBSD compatibility */ + switch (ntohl(exdata.ex_hdr.a_magic) & 0xffff) { + case ZMAGIC: + case QMAGIC: + virtual_offset = NBPG; + file_offset = 0; + break; + default: + rv = ENOEXEC; + goto exec_fail; + } + } else { + char *cp, *sp; + + if (indir) { + rv = ENOEXEC; + goto exec_fail; + } + for (cp = &exdata.ex_shell[2];; ++cp) { + if (cp >= &exdata.ex_shell[MAXINTERP]) { + rv = ENOEXEC; + goto exec_fail; + } + if (*cp == '\n') { + *cp = '\0'; + break; + } + if (*cp == '\t') + *cp = ' '; + } + cp = &exdata.ex_shell[2]; /* get shell interpreter name */ + while (*cp == ' ') + cp++; + + sp = shellname; + while (*cp && *cp != ' ') + *sp++ = *cp++; + *sp = '\0'; + + /* copy the args in the #! line */ + while (*cp == ' ') + cp++; + if (*cp) { + sp++; + shellargs = sp; + while (*cp) + *sp++ = *cp++; + *sp = '\0'; + } else { + shellargs = 0; + } + + indir = 1; /* indicate this is a script file */ + vput(ndp->ni_vp); + FREE(ndp->ni_pnbuf, M_NAMEI); + + ndp->ni_dirp = shellname; /* find shell interpreter */ + ndp->ni_segflg = UIO_SYSSPACE; + goto again; + } + /* NOT REACHED */ + } + + /* sanity check "ain't not such thing as a sanity clause" -groucho */ + rv = ENOMEM; + if (/*exdata.ex_hdr.a_text == 0 || */ exdata.ex_hdr.a_text > MAXTSIZ || + exdata.ex_hdr.a_text % NBPG || exdata.ex_hdr.a_text > attr.va_size) + goto exec_fail; + + if (exdata.ex_hdr.a_data == 0 || exdata.ex_hdr.a_data > DFLDSIZ + || exdata.ex_hdr.a_data > attr.va_size + || exdata.ex_hdr.a_data + exdata.ex_hdr.a_text > attr.va_size) + goto exec_fail; + + if (exdata.ex_hdr.a_bss > MAXDSIZ) + goto exec_fail; + + if (exdata.ex_hdr.a_text + exdata.ex_hdr.a_data + exdata.ex_hdr.a_bss > MAXTSIZ + MAXDSIZ) + goto exec_fail; + + if (exdata.ex_hdr.a_data + exdata.ex_hdr.a_bss > p->p_rlimit[RLIMIT_DATA].rlim_cur) + goto exec_fail; + + if (exdata.ex_hdr.a_entry > exdata.ex_hdr.a_text + exdata.ex_hdr.a_data) + goto exec_fail; + + /* + * Step 3. File and header are valid. Now, dig out the strings + * out of the old process image. + */ + + /* + * We implement a single-pass algorithm that builds a new stack + * frame within the address space of the "old" process image, + * avoiding the second pass entirely. Thus, the new frame is + * in position to be run. This consumes much virtual address space, + * and two pages more of 'real' memory, such are the costs. + * [Also, note the cache wipe that's avoided!] + */ + + /* create anonymous memory region for new stack */ + vs = p->p_vmspace; + if ((unsigned)vs->vm_maxsaddr + MAXSSIZ < USRSTACK) + newframe = (caddr_t) USRSTACK - MAXSSIZ; + else + vs->vm_maxsaddr = newframe = (caddr_t) USRSTACK - 2*MAXSSIZ; + + /* don't do stack limit checking on traps temporarily XXX*/ + dostacklimits = 0; + + rv = vm_allocate(&vs->vm_map, &newframe, MAXSSIZ, FALSE); + if (rv) goto exec_fail; + + /* allocate string buffer and arg buffer */ + argbuf = (char **) (newframe + MAXSSIZ - 3*ARG_MAX); + stringbuf = stringbufp = ((char *)argbuf) + 2*ARG_MAX; + argbufp = argbuf; + + /* first, do args */ + vectp = uap->argp; + needsenv = 1; + limitonargs = ARG_MAX; + cnt = 0; + + /* first, do (shell name if any then) args */ + if (indir) { + ep = shellname; +thrice: + if (ep) { + /* did we outgrow initial argbuf, if so, die */ + if (argbufp >= (char **)stringbuf) { + rv = E2BIG; + goto exec_dealloc; + } + + if (rv = copyoutstr(ep, stringbufp, + (u_int)limitonargs, (u_int *)&stringlen)) { + if (rv == ENAMETOOLONG) + rv = E2BIG; + goto exec_dealloc; + } + suword(argbufp++, (int)stringbufp); + cnt++; + stringbufp += stringlen; + limitonargs -= stringlen; + } + + if (shellargs) { + ep = shellargs; + shellargs = 0; + goto thrice; + } + + if (indir) { + indir = 0; + /* orginal executable is 1st argument with scripts */ + ep = uap->fname; + goto thrice; + } + /* terminate in case no more args to script */ + suword(argbufp, 0); + if (vectp = uap->argp) vectp++; /* manually doing the first + argument with scripts */ + } + +do_env_as_well: + if(vectp == 0) goto dont_bother; + + /* for each envp, copy in string */ + do { + /* did we outgrow initial argbuf, if so, die */ + if (argbufp == (char **)stringbuf) { + rv = E2BIG; + goto exec_dealloc; + } + + /* get an string pointer */ + ep = (char *)fuword(vectp++); + if (ep == (char *)-1) { + rv = EFAULT; + goto exec_dealloc; + } + + /* if not a null pointer, copy string */ + if (ep) { + if (rv = copyinoutstr(ep, stringbufp, + (u_int)limitonargs, (u_int *) &stringlen)) { + if (rv == ENAMETOOLONG) + rv = E2BIG; + goto exec_dealloc; + } + suword(argbufp++, (int)stringbufp); + cnt++; + stringbufp += stringlen; + limitonargs -= stringlen; + } else { + suword(argbufp++, 0); + break; + } + } while (limitonargs > 0); + +dont_bother: + if (limitonargs <= 0) { + rv = E2BIG; + goto exec_dealloc; + } + + /* have we done the environment yet ? */ + if (needsenv) { + /* remember the arg count for later */ + argc = cnt; + vectp = uap->envp; + needsenv = 0; + goto do_env_as_well; + } + + /* At this point, one could optionally implement a + * second pass to condense the strings, arguement vectors, + * and stack to fit the fewest pages. + * + * One might selectively do this when copying was cheaper + * than leaving allocated two more pages per process. + */ + + /* stuff arg count on top of "new" stack */ + /* argbuf[-1] = (char *)argc;*/ + suword(argbuf-1,argc); + + /* + * Step 4. Build the new processes image. + * + * At this point, we are committed -- destroy old executable! + */ + + /* blow away all address space, except the stack */ + rv = vm_deallocate(&vs->vm_map, 0, USRSTACK - 2*MAXSSIZ); + if (rv) + goto exec_abort; + + /* destroy "old" stack */ + if ((unsigned)newframe < USRSTACK - MAXSSIZ) { + rv = vm_deallocate(&vs->vm_map, USRSTACK - MAXSSIZ, MAXSSIZ); + if (rv) + goto exec_abort; + } else { + rv = vm_deallocate(&vs->vm_map, USRSTACK - 2*MAXSSIZ, MAXSSIZ); + if (rv) + goto exec_abort; + } + + /* build a new address space */ + + + + /* treat text, data, and bss in terms of integral page size */ + tsize = roundup(exdata.ex_hdr.a_text, NBPG); + dsize = roundup(exdata.ex_hdr.a_data, NBPG); + bsize = roundup(exdata.ex_hdr.a_bss, NBPG); + + addr = virtual_offset; + + /* map text as being read/execute only and demand paged */ + rv = vm_mmap(&vs->vm_map, &addr, tsize, VM_PROT_READ|VM_PROT_EXECUTE, + VM_PROT_DEFAULT, MAP_FILE|MAP_PRIVATE|MAP_FIXED, + (caddr_t)ndp->ni_vp, file_offset); + if (rv) + goto exec_abort; + + addr = virtual_offset + tsize; + + /* map data as being read/write and demand paged */ + rv = vm_mmap(&vs->vm_map, &addr, dsize, + VM_PROT_READ | VM_PROT_WRITE | (tsize ? 0 : VM_PROT_EXECUTE), + VM_PROT_DEFAULT, MAP_FILE|MAP_PRIVATE|MAP_FIXED, + (caddr_t)ndp->ni_vp, file_offset + tsize); + if (rv) + goto exec_abort; + + /* create anonymous memory region for bss */ + addr = virtual_offset + tsize + dsize; + rv = vm_allocate(&vs->vm_map, &addr, bsize, FALSE); + if (rv) + goto exec_abort; + + /* + * Step 5. Prepare process for execution. + */ + + /* touchup process information -- vm system is unfinished! */ + vs->vm_tsize = tsize/NBPG; /* text size (pages) XXX */ + vs->vm_dsize = (dsize+bsize)/NBPG; /* data size (pages) XXX */ + vs->vm_taddr = (caddr_t) virtual_offset; /* virtual address of text */ + vs->vm_daddr = (caddr_t) virtual_offset + tsize; /* virtual address of data */ + vs->vm_maxsaddr = newframe; /* user VA at max stack growth XXX */ + vs->vm_ssize = ((unsigned)vs->vm_maxsaddr + MAXSSIZ + - (unsigned)argbuf)/ NBPG + 1; /* stack size (pages) */ + dostacklimits = 1; /* allow stack limits to be enforced XXX */ + + /* close files on exec, fixup signals */ + fdcloseexec(p); + execsigs(p); + + /* name this process - nameiexec(p, ndp) */ + len = MIN(ndp->ni_namelen,MAXCOMLEN); + bcopy(ndp->ni_ptr, p->p_comm, len); + p->p_comm[len] = 0; + + /* mark as executable, wakeup any process that was vforked and tell + * it that it now has it's own resources back */ + p->p_flag |= SEXEC; + if (p->p_pptr && (p->p_flag & SPPWAIT)) { + p->p_flag &= ~SPPWAIT; + wakeup(p->p_pptr); + } + + /* implement set userid/groupid */ + if ((attr.va_mode&VSUID) && (p->p_flag & STRC) == 0) { + p->p_ucred = crcopy(p->p_ucred); + p->p_cred->p_svuid = p->p_ucred->cr_uid = attr.va_uid; + } + if ((attr.va_mode&VSGID) && (p->p_flag & STRC) == 0) { + p->p_ucred = crcopy(p->p_ucred); + p->p_cred->p_svgid = p->p_ucred->cr_groups[0] = attr.va_gid; + } + + /* setup initial register state */ + p->p_regs[SP] = (unsigned) (argbuf - 1); + setregs(p, exdata.ex_hdr.a_entry); + + ndp->ni_vp->v_flag |= VTEXT; /* mark vnode pure text */ + + vput(ndp->ni_vp); + FREE(ndp->ni_pnbuf, M_NAMEI); + + /* if tracing process, pass control back to debugger so breakpoints + can be set before the program "runs" */ + if (p->p_flag & STRC) + psignal(p, SIGTRAP); + p->p_acflag &= ~AFORK; /* remove fork, but no exec flag */ + + return (0); + +exec_dealloc: + /* remove interim "new" stack frame we were building */ + vm_deallocate(&vs->vm_map, newframe, MAXSSIZ); + +exec_fail: + dostacklimits = 1; + vput(ndp->ni_vp); + FREE(ndp->ni_pnbuf, M_NAMEI); + + return(rv); + +exec_abort: + /* sorry, no more process anymore. exit gracefully */ + vm_deallocate(&vs->vm_map, newframe, MAXSSIZ); + vput(ndp->ni_vp); + FREE(ndp->ni_pnbuf, M_NAMEI); + kexit(p, W_EXITCODE(0, SIGABRT)); + + /* NOTREACHED */ + return(0); +} diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c new file mode 100644 index 000000000000..6a742b0e160c --- /dev/null +++ b/sys/kern/kern_exit.c @@ -0,0 +1,421 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_exit.c 7.35 (Berkeley) 6/27/91 + * $Id: kern_exit.c,v 1.9 1993/10/19 01:01:20 nate Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "ioctl.h" +#include "tty.h" +#include "time.h" +#include "resource.h" +#include "kernel.h" +#include "proc.h" +#include "buf.h" +#include "wait.h" +#include "file.h" +#include "vnode.h" +#include "syslog.h" +#include "malloc.h" +#include "resourcevar.h" + +#include "machine/cpu.h" +#ifdef COMPAT_43 +#include "machine/reg.h" +#include "machine/psl.h" +#endif + +#include "vm/vm.h" +#include "vm/vm_kern.h" + +/* + * Exit system call: pass back caller's arg + */ + +struct rexit_args { + int rval; +}; +/* ARGSUSED */ +void +rexit(p, uap, retval) + struct proc *p; + struct rexit_args *uap; + int *retval; +{ + + kexit(p, W_EXITCODE(uap->rval, 0)); + /* NOTREACHED */ +} + +/* + * Exit: deallocate address space and other resources, + * change proc state to zombie, and unlink proc from allproc + * and parent's lists. Save exit status and rusage for wait(). + * Check for child processes and orphan them. + */ +void +kexit(p, rv) + register struct proc *p; + int rv; +{ + register struct proc *q, *nq; + register struct proc **pp; + int s; + + acct(p); /* MT - do process accounting -- must be done before + address space is released */ + +#ifdef PGINPROF + vmsizmon(); +#endif + MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage), + M_ZOMBIE, M_WAITOK); + /* + * If parent is waiting for us to exit or exec, + * SPPWAIT is set; we will wakeup the parent below. + */ + p->p_flag &= ~(STRC|SPPWAIT); + p->p_flag |= SWEXIT; + p->p_sigignore = ~0; + p->p_sig = 0; + untimeout(realitexpire, (caddr_t)p); + + /* + * Close open files and release open-file table. + * This may block! + */ + fdfree(p); + + /* The next two chunks should probably be moved to vmspace_exit. */ +#ifdef SYSVSHM + if (p->p_vmspace->vm_shm) + shmexit(p); +#endif + /* + * Release user portion of address space. + * This releases references to vnodes, + * which could cause I/O if the file has been unlinked. + * Need to do this early enough that we can still sleep. + * Can't free the entire vmspace as the kernel stack + * may be mapped within that space also. + */ + if (p->p_vmspace->vm_refcnt == 1) + (void) vm_map_remove(&p->p_vmspace->vm_map, VM_MIN_ADDRESS, + VM_MAXUSER_ADDRESS); + + if (p->p_pid == 1) + panic("init died"); + + if (SESS_LEADER(p)) { + register struct session *sp = p->p_session; + + if (sp->s_ttyvp) { + /* + * Controlling process. + * Signal foreground pgrp, + * drain controlling terminal + * and revoke access to controlling terminal. + */ + if (sp->s_ttyp->t_session == sp) { + if (sp->s_ttyp->t_pgrp) + pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1); + (void) ttywait(sp->s_ttyp); + vgoneall(sp->s_ttyvp); + } + vrele(sp->s_ttyvp); + sp->s_ttyvp = NULL; + /* + * s_ttyp is not zero'd; we use this to indicate + * that the session once had a controlling terminal. + * (for logging and informational purposes) + */ + } + sp->s_leader = NULL; + } + fixjobc(p, p->p_pgrp, 0); + p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; +#ifdef KTRACE + /* + * release trace file + */ + if (p->p_tracep) + vrele(p->p_tracep); +#endif + + /* current process does not exist, as far as other parts of the + * system (clock) is concerned, since parts of it might not be + * there anymore */ + curproc = NULL; + + if (--p->p_limit->p_refcnt == 0) { + FREE(p->p_limit, M_SUBPROC); + p->p_limit = (struct plimit *) -1; + } + + /* + * Remove proc from allproc queue and pidhash chain. + * Place onto zombproc. Unlink from parent's child list. + */ + if (*p->p_prev = p->p_nxt) + p->p_nxt->p_prev = p->p_prev; + if (p->p_nxt = zombproc) + p->p_nxt->p_prev = &p->p_nxt; + p->p_prev = &zombproc; + zombproc = p; + p->p_stat = SZOMB; + for (pp = &pidhash[PIDHASH(p->p_pid)]; *pp; pp = &(*pp)->p_hash) + if (*pp == p) { + *pp = p->p_hash; + goto done; + } + panic("exit"); +done: + + if (p->p_cptr) /* only need this if any child is S_ZOMB */ + wakeup((caddr_t) initproc); + for (q = p->p_cptr; q != NULL; q = nq) { + nq = q->p_osptr; + if (nq != NULL) + nq->p_ysptr = NULL; + if (initproc->p_cptr) + initproc->p_cptr->p_ysptr = q; + q->p_osptr = initproc->p_cptr; + q->p_ysptr = NULL; + initproc->p_cptr = q; + + q->p_pptr = initproc; + /* + * Traced processes are killed + * since their existence means someone is screwing up. + */ + if (q->p_flag&STRC) { + q->p_flag &= ~STRC; + psignal(q, SIGKILL); + } + } + p->p_cptr = NULL; + + /* + * Save exit status and final rusage info, + * adding in child rusage info and self times. + */ + p->p_xstat = rv; + *p->p_ru = p->p_stats->p_ru; + p->p_ru->ru_stime = p->p_stime; + p->p_ru->ru_utime = p->p_utime; + ruadd(p->p_ru, &p->p_stats->p_cru); + + /* + * Notify parent that we're gone. + */ + psignal(p->p_pptr, SIGCHLD); + wakeup((caddr_t)p->p_pptr); +#if defined(tahoe) + /* move this to cpu_exit */ + p->p_addr->u_pcb.pcb_savacc.faddr = (float *)NULL; +#endif + /* + * Finally, call machine-dependent code to release the remaining + * resources including address space, the kernel stack and pcb. + * The address space is released by "vmspace_free(p->p_vmspace)"; + * This is machine-dependent, as we may have to change stacks + * or ensure that the current one isn't reallocated before we + * finish. cpu_exit will end with a call to swtch(), finishing + * our execution (pun intended). + */ + cpu_exit(p); + /* NOTREACHED */ +} + +#ifdef COMPAT_43 + +struct owait_args { + int pid; + int *status; + int options; + struct rusage *rusage; + int compat; +}; + +owait(p, uap, retval) + struct proc *p; + register struct owait_args *uap; + int *retval; +{ + + uap->options = 0; + uap->rusage = 0; + uap->pid = WAIT_ANY; + uap->status = 0; + uap->compat = 1; + return (wait1(p, uap, retval)); +} + +struct wait4_args { + int pid; + int *status; + int options; + struct rusage *rusage; + int compat; +}; + +wait4(p, uap, retval) + struct proc *p; + struct wait4_args *uap; + int *retval; +{ + + uap->compat = 0; + return (wait1(p, uap, retval)); +} +#else +#define wait1 wait4 +#endif + +/* + * Wait: check child processes to see if any have exited, + * stopped under trace, or (optionally) stopped by a signal. + * Pass back status and deallocate exited child's proc structure. + */ + +struct wait1_args { + int pid; + int *status; + int options; + struct rusage *rusage; +#ifdef COMPAT_43 + int compat; +#endif +}; + +wait1(q, uap, retval) + register struct proc *q; + register struct wait1_args *uap; + int retval[]; +{ + register int nfound; + register struct proc *p; + int status, error; + + if (uap->pid == 0) + uap->pid = -q->p_pgid; +#ifdef notyet + if (uap->options &~ (WUNTRACED|WNOHANG)) + return (EINVAL); +#endif +loop: + nfound = 0; + for (p = q->p_cptr; p; p = p->p_osptr) { + if (uap->pid != WAIT_ANY && + p->p_pid != uap->pid && p->p_pgid != -uap->pid) + continue; + nfound++; + if (p->p_stat == SZOMB) { + retval[0] = p->p_pid; +#ifdef COMPAT_43 + if (uap->compat) + retval[1] = p->p_xstat; + else +#endif + if (uap->status) { + status = p->p_xstat; /* convert to int */ + if (error = copyout((caddr_t)&status, + (caddr_t)uap->status, sizeof(status))) + return (error); + } + if (uap->rusage && (error = copyout((caddr_t)p->p_ru, + (caddr_t)uap->rusage, sizeof (struct rusage)))) + return (error); + p->p_xstat = 0; + ruadd(&q->p_stats->p_cru, p->p_ru); + FREE(p->p_ru, M_ZOMBIE); + if (--p->p_cred->p_refcnt == 0) { + crfree(p->p_cred->pc_ucred); + FREE(p->p_cred, M_SUBPROC); + p->p_cred = (struct pcred *) -1; + } + + /* + * Finally finished with old proc entry. + * Unlink it from its process group and free it. + */ + leavepgrp(p); + if (*p->p_prev = p->p_nxt) /* off zombproc */ + p->p_nxt->p_prev = p->p_prev; + if (q = p->p_ysptr) + q->p_osptr = p->p_osptr; + if (q = p->p_osptr) + q->p_ysptr = p->p_ysptr; + if ((q = p->p_pptr)->p_cptr == p) + q->p_cptr = p->p_osptr; + + /* + * Give machine-dependent layer a chance + * to free anything that cpu_exit couldn't + * release while still running in process context. + */ + cpu_wait(p); + FREE(p, M_PROC); + nprocs--; + return (0); + } + if (p->p_stat == SSTOP && (p->p_flag & SWTED) == 0 && + (p->p_flag & STRC || uap->options & WUNTRACED)) { + p->p_flag |= SWTED; + retval[0] = p->p_pid; +#ifdef COMPAT_43 + if (uap->compat) { + retval[1] = W_STOPCODE(p->p_xstat); + error = 0; + } else +#endif + if (uap->status) { + status = W_STOPCODE(p->p_xstat); + error = copyout((caddr_t)&status, + (caddr_t)uap->status, sizeof(status)); + } else + error = 0; + return (error); + } + } + if (nfound == 0) + return (ECHILD); + if (uap->options & WNOHANG) { + retval[0] = 0; + return (0); + } + if (error = tsleep((caddr_t)q, PWAIT | PCATCH, "wait", 0)) + return (error); + goto loop; +} diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c new file mode 100644 index 000000000000..a387af7e9577 --- /dev/null +++ b/sys/kern/kern_fork.c @@ -0,0 +1,296 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_fork.c 7.29 (Berkeley) 5/15/91 + * $Id: kern_fork.c,v 1.2 1993/10/16 15:24:17 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "filedesc.h" +#include "kernel.h" +#include "malloc.h" +#include "proc.h" +#include "resourcevar.h" +#include "vnode.h" +#include "file.h" +#include "acct.h" +#include "ktrace.h" +#include "vm/vm.h" + +/* ARGSUSED */ +fork(p, uap, retval) + struct proc *p; + void *uap; + int retval[]; +{ + + return (fork1(p, 0, retval)); +} + +/* ARGSUSED */ +vfork(p, uap, retval) + struct proc *p; + void *uap; + int retval[]; +{ + + return (fork1(p, 1, retval)); +} + +int nprocs = 1; /* process 0 */ + +fork1(p1, isvfork, retval) + register struct proc *p1; + int isvfork, retval[]; +{ + register struct proc *p2; + register int count, uid; + static int nextpid, pidchecked = 0; + + count = 0; + if ((uid = p1->p_ucred->cr_uid) != 0) { + for (p2 = allproc; p2; p2 = p2->p_nxt) + if (p2->p_ucred->cr_uid == uid) + count++; + for (p2 = zombproc; p2; p2 = p2->p_nxt) + if (p2->p_ucred->cr_uid == uid) + count++; + } + /* + * Although process entries are dynamically entries, + * we still keep a global limit on the maximum number + * we will create. Don't allow a nonprivileged user + * to exceed its current limit or to bring us within one + * of the global limit; don't let root exceed the limit. + * nprocs is the current number of processes, + * maxproc is the limit. + */ + if (nprocs >= maxproc || uid == 0 && nprocs >= maxproc + 1) { + tablefull("proc"); + return (EAGAIN); + } + if (count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) + return (EAGAIN); + + /* + * Find an unused process ID. + * We remember a range of unused IDs ready to use + * (from nextpid+1 through pidchecked-1). + */ + nextpid++; +retry: + /* + * If the process ID prototype has wrapped around, + * restart somewhat above 0, as the low-numbered procs + * tend to include daemons that don't exit. + */ + if (nextpid >= PID_MAX) { + nextpid = 100; + pidchecked = 0; + } + if (nextpid >= pidchecked) { + int doingzomb = 0; + + pidchecked = PID_MAX; + /* + * Scan the active and zombie procs to check whether this pid + * is in use. Remember the lowest pid that's greater + * than nextpid, so we can avoid checking for a while. + */ + p2 = allproc; +again: + for (; p2 != NULL; p2 = p2->p_nxt) { + if (p2->p_pid == nextpid || + p2->p_pgrp->pg_id == nextpid) { + nextpid++; + if (nextpid >= pidchecked) + goto retry; + } + if (p2->p_pid > nextpid && pidchecked > p2->p_pid) + pidchecked = p2->p_pid; + if (p2->p_pgrp->pg_id > nextpid && + pidchecked > p2->p_pgrp->pg_id) + pidchecked = p2->p_pgrp->pg_id; + } + if (!doingzomb) { + doingzomb = 1; + p2 = zombproc; + goto again; + } + } + + + /* + * Allocate new proc. + * Link onto allproc (this should probably be delayed). + */ + MALLOC(p2, struct proc *, sizeof(struct proc), M_PROC, M_WAITOK); + nprocs++; + p2->p_nxt = allproc; + p2->p_nxt->p_prev = &p2->p_nxt; /* allproc is never NULL */ + p2->p_prev = &allproc; + allproc = p2; + p2->p_link = NULL; /* shouldn't be necessary */ + p2->p_rlink = NULL; /* shouldn't be necessary */ + + /* + * Make a proc table entry for the new process. + * Start by zeroing the section of proc that is zero-initialized, + * then copy the section that is copied directly from the parent. + */ + bzero(&p2->p_startzero, + (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero)); + bcopy(&p1->p_startcopy, &p2->p_startcopy, + (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy)); + p2->p_spare[0] = 0; /* XXX - should be in zero range */ + p2->p_spare[1] = 0; /* XXX - should be in zero range */ + p2->p_spare[2] = 0; /* XXX - should be in zero range */ + p2->p_spare[3] = 0; /* XXX - should be in zero range */ + + /* + * Duplicate sub-structures as needed. + * Increase reference counts on shared objects. + * The p_stats and p_sigacts substructs are set in vm_fork. + */ + MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred), + M_SUBPROC, M_WAITOK); + bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred)); + p2->p_cred->p_refcnt = 1; + crhold(p1->p_ucred); + + p2->p_fd = fdcopy(p1); + /* + * If p_limit is still copy-on-write, bump refcnt, + * otherwise get a copy that won't be modified. + * (If PL_SHAREMOD is clear, the structure is shared + * copy-on-write.) + */ + if (p1->p_limit->p_lflags & PL_SHAREMOD) + p2->p_limit = limcopy(p1->p_limit); + else { + p2->p_limit = p1->p_limit; + p2->p_limit->p_refcnt++; + } + + p2->p_flag = SLOAD | (p1->p_flag & SHPUX); + if (p1->p_session->s_ttyvp != NULL && p1->p_flag & SCTTY) + p2->p_flag |= SCTTY; + if (isvfork) + p2->p_flag |= SPPWAIT; + p2->p_stat = SIDL; + p2->p_pid = nextpid; + { + struct proc **hash = &pidhash[PIDHASH(p2->p_pid)]; + + p2->p_hash = *hash; + *hash = p2; + } + p2->p_pgrpnxt = p1->p_pgrpnxt; + p1->p_pgrpnxt = p2; + p2->p_pptr = p1; + p2->p_osptr = p1->p_cptr; + if (p1->p_cptr) + p1->p_cptr->p_ysptr = p2; + p1->p_cptr = p2; +#ifdef KTRACE + /* + * Copy traceflag and tracefile if enabled. + * If not inherited, these were zeroed above. + */ + if (p1->p_traceflag&KTRFAC_INHERIT) { + p2->p_traceflag = p1->p_traceflag; + if ((p2->p_tracep = p1->p_tracep) != NULL) + VREF(p2->p_tracep); + } +#endif + +#if defined(tahoe) + p2->p_vmspace->p_ckey = p1->p_vmspace->p_ckey; /* XXX move this */ +#endif + + /* + * This begins the section where we must prevent the parent + * from being swapped. + */ + p1->p_flag |= SKEEP; + /* + * Set return values for child before vm_fork, + * so they can be copied to child stack. + * We return parent pid, and mark as child in retval[1]. + * NOTE: the kernel stack may be at a different location in the child + * process, and thus addresses of automatic variables (including retval) + * may be invalid after vm_fork returns in the child process. + */ + retval[0] = p1->p_pid; + retval[1] = 1; + if (vm_fork(p1, p2, isvfork)) { + /* + * Child process. Set start time and get to work. + */ + (void) splclock(); + p2->p_stats->p_start = time; + (void) spl0(); + p2->p_acflag = AFORK; + return (0); + } + + /* + * Make child runnable and add to run queue. + */ + (void) splhigh(); + p2->p_stat = SRUN; + setrq(p2); + (void) spl0(); + + /* + * Now can be swapped. + */ + p1->p_flag &= ~SKEEP; + + /* + * Preserve synchronization semantics of vfork. + * If waiting for child to exec or exit, set SPPWAIT + * on child, and sleep on our proc (in case of exit). + */ + if (isvfork) + while (p2->p_flag & SPPWAIT) + tsleep((caddr_t)p1, PWAIT, "ppwait", 0); + + /* + * Return child pid to parent process, + * marking us as parent via retval[1]. + */ + retval[0] = p2->p_pid; + retval[1] = 0; + return (0); +} diff --git a/sys/kern/kern_kinfo.c b/sys/kern/kern_kinfo.c new file mode 100644 index 000000000000..e9c97b6d3a0c --- /dev/null +++ b/sys/kern/kern_kinfo.c @@ -0,0 +1,303 @@ +/* + * Copyright (c) 1982, 1986, 1989 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_kinfo.c 7.17 (Berkeley) 6/26/91 + * $Id: kern_kinfo.c,v 1.3 1993/10/16 15:24:18 rgrimes Exp $ + */ + +#include "param.h" +#include "proc.h" +#include "kinfo.h" +#include "ioctl.h" +#include "tty.h" +#include "buf.h" +#include "file.h" + +#include "vm/vm.h" + +#include "kinfo_proc.h" + +#define snderr(e) { error = (e); goto release;} +extern int kinfo_doproc(), kinfo_rtable(), kinfo_vnode(), kinfo_file(); +struct kinfo_lock kinfo_lock; + +struct getkerninfo_args { + int op; + char *where; + int *size; + int arg; +}; + +/* ARGSUSED */ +getkerninfo(p, uap, retval) + struct proc *p; + register struct getkerninfo_args *uap; + int *retval; +{ + + int bufsize; /* max size of users buffer */ + int needed, locked, (*server)(), error = 0; + + if (error = copyin((caddr_t)uap->size, (caddr_t)&bufsize, + sizeof (bufsize))) + goto done; + + switch (ki_type(uap->op)) { + + case KINFO_PROC: + server = kinfo_doproc; + break; + + case KINFO_RT: + server = kinfo_rtable; + break; + + case KINFO_VNODE: + server = kinfo_vnode; + break; + + case KINFO_FILE: + server = kinfo_file; + break; + + default: + error = EINVAL; + goto done; + } + if (uap->where == NULL || uap->size == NULL) { + error = (*server)(uap->op, NULL, NULL, uap->arg, &needed); + goto done; + } + while (kinfo_lock.kl_lock) { + kinfo_lock.kl_want++; + sleep(&kinfo_lock, PRIBIO+1); + kinfo_lock.kl_want--; + kinfo_lock.kl_locked++; + } + kinfo_lock.kl_lock++; + + if (!useracc(uap->where, bufsize, B_WRITE)) + snderr(EFAULT); + if (server != kinfo_vnode) /* XXX */ + vslock(uap->where, bufsize); + locked = bufsize; + error = (*server)(uap->op, uap->where, &bufsize, uap->arg, &needed); + if (server != kinfo_vnode) /* XXX */ + vsunlock(uap->where, locked, B_WRITE); + if (error == 0) + error = copyout((caddr_t)&bufsize, + (caddr_t)uap->size, sizeof (bufsize)); +release: + kinfo_lock.kl_lock--; + if (kinfo_lock.kl_want) + wakeup(&kinfo_lock); +done: + if (!error) + *retval = needed; + return (error); +} + +/* + * try over estimating by 5 procs + */ +#define KINFO_PROCSLOP (5 * sizeof (struct kinfo_proc)) + +kinfo_doproc(op, where, acopysize, arg, aneeded) + char *where; + int *acopysize, *aneeded; +{ + register struct proc *p; + register struct kinfo_proc *dp = (struct kinfo_proc *)where; + register needed = 0; + int buflen; + int doingzomb; + struct eproc eproc; + int error = 0; + + if (where != NULL) + buflen = *acopysize; + + p = allproc; + doingzomb = 0; +again: + for (; p != NULL; p = p->p_nxt) { + /* + * TODO - make more efficient (see notes below). + * do by session. + */ + switch (ki_op(op)) { + + case KINFO_PROC_PID: + /* could do this with just a lookup */ + if (p->p_pid != (pid_t)arg) + continue; + break; + + case KINFO_PROC_PGRP: + /* could do this by traversing pgrp */ + if (p->p_pgrp->pg_id != (pid_t)arg) + continue; + break; + + case KINFO_PROC_TTY: + if ((p->p_flag&SCTTY) == 0 || + p->p_session->s_ttyp == NULL || + p->p_session->s_ttyp->t_dev != (dev_t)arg) + continue; + break; + + case KINFO_PROC_UID: + if (p->p_ucred->cr_uid != (uid_t)arg) + continue; + break; + + case KINFO_PROC_RUID: + if (p->p_cred->p_ruid != (uid_t)arg) + continue; + break; + } + if (where != NULL && buflen >= sizeof (struct kinfo_proc)) { + fill_eproc(p, &eproc); + if (error = copyout((caddr_t)p, &dp->kp_proc, + sizeof (struct proc))) + return (error); + if (error = copyout((caddr_t)&eproc, &dp->kp_eproc, + sizeof (eproc))) + return (error); + dp++; + buflen -= sizeof (struct kinfo_proc); + } + needed += sizeof (struct kinfo_proc); + } + if (doingzomb == 0) { + p = zombproc; + doingzomb++; + goto again; + } + if (where != NULL) + *acopysize = (caddr_t)dp - where; + else + needed += KINFO_PROCSLOP; + *aneeded = needed; + + return (0); +} + +/* + * Fill in an eproc structure for the specified process. + */ +void +fill_eproc(p, ep) + register struct proc *p; + register struct eproc *ep; +{ + register struct tty *tp; + + ep->e_paddr = p; + ep->e_sess = p->p_pgrp->pg_session; + ep->e_pcred = *p->p_cred; + ep->e_ucred = *p->p_ucred; + ep->e_vm = *p->p_vmspace; + if (p->p_pptr) + ep->e_ppid = p->p_pptr->p_pid; + else + ep->e_ppid = 0; + ep->e_pgid = p->p_pgrp->pg_id; + ep->e_jobc = p->p_pgrp->pg_jobc; + if ((p->p_flag&SCTTY) && + (tp = ep->e_sess->s_ttyp)) { + ep->e_tdev = tp->t_dev; + ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; + ep->e_tsess = tp->t_session; + } else + ep->e_tdev = NODEV; + ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : 0; + if (SESS_LEADER(p)) + ep->e_flag |= EPROC_SLEADER; + if (p->p_wmesg) + strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN); + ep->e_xsize = ep->e_xrssize = 0; + ep->e_xccount = ep->e_xswrss = 0; +} + +/* + * Get file structures. + */ +kinfo_file(op, where, acopysize, arg, aneeded) + register char *where; + int *acopysize, *aneeded; +{ + int buflen, needed, error; + struct file *fp; + char *start = where; + + if (where == NULL) { + /* + * overestimate by 10 files + */ + *aneeded = sizeof (filehead) + + (nfiles + 10) * sizeof (struct file); + return (0); + } + buflen = *acopysize; + needed = 0; + + /* + * first copyout filehead + */ + if (buflen > sizeof (filehead)) { + if (error = copyout((caddr_t)&filehead, where, + sizeof (filehead))) + return (error); + buflen -= sizeof (filehead); + where += sizeof (filehead); + } + needed += sizeof (filehead); + + /* + * followed by an array of file structures + */ + for (fp = filehead; fp != NULL; fp = fp->f_filef) { + if (buflen > sizeof (struct file)) { + if (error = copyout((caddr_t)fp, where, + sizeof (struct file))) + return (error); + buflen -= sizeof (struct file); + where += sizeof (struct file); + } + needed += sizeof (struct file); + } + *acopysize = where - start; + *aneeded = needed; + + return (0); +} diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c new file mode 100644 index 000000000000..9b218294e33c --- /dev/null +++ b/sys/kern/kern_ktrace.c @@ -0,0 +1,419 @@ +/* + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_ktrace.c 7.15 (Berkeley) 6/21/91 + * $Id: kern_ktrace.c,v 1.4 1993/10/16 15:24:20 rgrimes Exp $ + */ + +#ifdef KTRACE + +#include "param.h" +#include "systm.h" +#include "proc.h" +#include "file.h" +#include "namei.h" +#include "vnode.h" +#include "ktrace.h" +#include "malloc.h" +#include "syslog.h" + +struct ktr_header * +ktrgetheader(type) +{ + register struct ktr_header *kth; + struct proc *p = curproc; /* XXX */ + + MALLOC(kth, struct ktr_header *, sizeof (struct ktr_header), + M_TEMP, M_WAITOK); + kth->ktr_type = type; + microtime(&kth->ktr_time); + kth->ktr_pid = p->p_pid; + bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN); + return (kth); +} + +ktrsyscall(vp, code, narg, args) + struct vnode *vp; + int code, narg, args[]; +{ + struct ktr_header *kth = ktrgetheader(KTR_SYSCALL); + struct ktr_syscall *ktp; + register len = sizeof(struct ktr_syscall) + (narg * sizeof(int)); + int *argp, i; + + MALLOC(ktp, struct ktr_syscall *, len, M_TEMP, M_WAITOK); + ktp->ktr_code = code; + ktp->ktr_narg = narg; + argp = (int *)((char *)ktp + sizeof(struct ktr_syscall)); + for (i = 0; i < narg; i++) + *argp++ = args[i]; + kth->ktr_buf = (caddr_t)ktp; + kth->ktr_len = len; + ktrwrite(vp, kth); + FREE(ktp, M_TEMP); + FREE(kth, M_TEMP); +} + +ktrsysret(vp, code, error, retval) + struct vnode *vp; + int code, error, retval; +{ + struct ktr_header *kth = ktrgetheader(KTR_SYSRET); + struct ktr_sysret ktp; + + ktp.ktr_code = code; + ktp.ktr_error = error; + ktp.ktr_retval = retval; /* what about val2 ? */ + + kth->ktr_buf = (caddr_t)&ktp; + kth->ktr_len = sizeof(struct ktr_sysret); + + ktrwrite(vp, kth); + FREE(kth, M_TEMP); +} + +ktrnamei(vp, path) + struct vnode *vp; + char *path; +{ + struct ktr_header *kth = ktrgetheader(KTR_NAMEI); + + kth->ktr_len = strlen(path); + kth->ktr_buf = path; + + ktrwrite(vp, kth); + FREE(kth, M_TEMP); +} + +ktrgenio(vp, fd, rw, iov, len, error) + struct vnode *vp; + int fd; + enum uio_rw rw; + register struct iovec *iov; +{ + struct ktr_header *kth = ktrgetheader(KTR_GENIO); + register struct ktr_genio *ktp; + register caddr_t cp; + register int resid = len, cnt; + + if (error) + return; + MALLOC(ktp, struct ktr_genio *, sizeof(struct ktr_genio) + len, + M_TEMP, M_WAITOK); + ktp->ktr_fd = fd; + ktp->ktr_rw = rw; + cp = (caddr_t)((char *)ktp + sizeof (struct ktr_genio)); + while (resid > 0) { + if ((cnt = iov->iov_len) > resid) + cnt = resid; + if (copyin(iov->iov_base, cp, (unsigned)cnt)) + goto done; + cp += cnt; + resid -= cnt; + iov++; + } + kth->ktr_buf = (caddr_t)ktp; + kth->ktr_len = sizeof (struct ktr_genio) + len; + + ktrwrite(vp, kth); +done: + FREE(kth, M_TEMP); + FREE(ktp, M_TEMP); +} + +ktrpsig(vp, sig, action, mask, code) + struct vnode *vp; + sig_t action; +{ + struct ktr_header *kth = ktrgetheader(KTR_PSIG); + struct ktr_psig kp; + + kp.signo = (char)sig; + kp.action = action; + kp.mask = mask; + kp.code = code; + kth->ktr_buf = (caddr_t)&kp; + kth->ktr_len = sizeof (struct ktr_psig); + + ktrwrite(vp, kth); + FREE(kth, M_TEMP); +} + +/* Interface and common routines */ + +/* + * ktrace system call + */ + +struct ktrace_args { + char *fname; + int ops; + int facs; + int pid; +}; + +/* ARGSUSED */ +ktrace(curp, uap, retval) + struct proc *curp; + register struct ktrace_args *uap; + int *retval; +{ + register struct vnode *vp = NULL; + register struct proc *p; + struct pgrp *pg; + int facs = uap->facs & ~KTRFAC_ROOT; + int ops = KTROP(uap->ops); + int descend = uap->ops & KTRFLAG_DESCEND; + int ret = 0; + int error = 0; + struct nameidata nd; + + if (ops != KTROP_CLEAR) { + /* + * an operation which requires a file argument. + */ + nd.ni_segflg = UIO_USERSPACE; + nd.ni_dirp = uap->fname; + if (error = vn_open(&nd, curp, FREAD|FWRITE, 0)) + return (error); + vp = nd.ni_vp; + VOP_UNLOCK(vp); + if (vp->v_type != VREG) { + (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp); + return (EACCES); + } + } + /* + * Clear all uses of the tracefile + */ + if (ops == KTROP_CLEARFILE) { + for (p = allproc; p != NULL; p = p->p_nxt) { + if (p->p_tracep == vp) { + if (ktrcanset(curp, p)) { + p->p_tracep = NULL; + p->p_traceflag = 0; + (void) vn_close(vp, FREAD|FWRITE, + p->p_ucred, p); + } else + error = EPERM; + } + } + goto done; + } + /* + * need something to (un)trace (XXX - why is this here?) + */ + if (!facs) { + error = EINVAL; + goto done; + } + /* + * do it + */ + if (uap->pid < 0) { + /* + * by process group + */ + pg = pgfind(-uap->pid); + if (pg == NULL) { + error = ESRCH; + goto done; + } + for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt) + if (descend) + ret |= ktrsetchildren(curp, p, ops, facs, vp); + else + ret |= ktrops(curp, p, ops, facs, vp); + + } else { + /* + * by pid + */ + p = pfind(uap->pid); + if (p == NULL) { + error = ESRCH; + goto done; + } + if (descend) + ret |= ktrsetchildren(curp, p, ops, facs, vp); + else + ret |= ktrops(curp, p, ops, facs, vp); + } + if (!ret) + error = EPERM; +done: + if (vp != NULL) + (void) vn_close(vp, FWRITE, curp->p_ucred, curp); + return (error); +} + +ktrops(curp, p, ops, facs, vp) + struct proc *curp, *p; + struct vnode *vp; +{ + + if (!ktrcanset(curp, p)) + return (0); + if (ops == KTROP_SET) { + if (p->p_tracep != vp) { + /* + * if trace file already in use, relinquish + */ + if (p->p_tracep != NULL) + vrele(p->p_tracep); + VREF(vp); + p->p_tracep = vp; + } + p->p_traceflag |= facs; + if (curp->p_ucred->cr_uid == 0) + p->p_traceflag |= KTRFAC_ROOT; + } else { + /* KTROP_CLEAR */ + if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { + /* no more tracing */ + p->p_traceflag = 0; + if (p->p_tracep != NULL) { + vrele(p->p_tracep); + p->p_tracep = NULL; + } + } + } + + return (1); +} + +ktrsetchildren(curp, top, ops, facs, vp) + struct proc *curp, *top; + struct vnode *vp; +{ + register struct proc *p; + register int ret = 0; + + p = top; + for (;;) { + ret |= ktrops(curp, p, ops, facs, vp); + /* + * If this process has children, descend to them next, + * otherwise do any siblings, and if done with this level, + * follow back up the tree (but not past top). + */ + if (p->p_cptr) + p = p->p_cptr; + else if (p == top) + return (ret); + else if (p->p_osptr) + p = p->p_osptr; + else for (;;) { + p = p->p_pptr; + if (p == top) + return (ret); + if (p->p_osptr) { + p = p->p_osptr; + break; + } + } + } + /*NOTREACHED*/ +} + +ktrwrite(vp, kth) + struct vnode *vp; + register struct ktr_header *kth; +{ + struct uio auio; + struct iovec aiov[2]; + register struct proc *p = curproc; /* XXX */ + int error; + + if (vp == NULL) + return; + auio.uio_iov = &aiov[0]; + auio.uio_offset = 0; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + aiov[0].iov_base = (caddr_t)kth; + aiov[0].iov_len = sizeof(struct ktr_header); + auio.uio_resid = sizeof(struct ktr_header); + auio.uio_iovcnt = 1; + auio.uio_procp = (struct proc *)0; + if (kth->ktr_len > 0) { + auio.uio_iovcnt++; + aiov[1].iov_base = kth->ktr_buf; + aiov[1].iov_len = kth->ktr_len; + auio.uio_resid += kth->ktr_len; + } + VOP_LOCK(vp); + error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred); + VOP_UNLOCK(vp); + if (!error) + return; + /* + * If error encountered, give up tracing on this vnode. + */ + log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", + error); + for (p = allproc; p != NULL; p = p->p_nxt) { + if (p->p_tracep == vp) { + p->p_tracep = NULL; + p->p_traceflag = 0; + vrele(vp); + } + } +} + +/* + * Return true if caller has permission to set the ktracing state + * of target. Essentially, the target can't possess any + * more permissions than the caller. KTRFAC_ROOT signifies that + * root previously set the tracing status on the target process, and + * so, only root may further change it. + * + * TODO: check groups. use caller effective gid. + */ +ktrcanset(callp, targetp) + struct proc *callp, *targetp; +{ + register struct pcred *caller = callp->p_cred; + register struct pcred *target = targetp->p_cred; + + if ((caller->pc_ucred->cr_uid == target->p_ruid && + target->p_ruid == target->p_svuid && + caller->p_rgid == target->p_rgid && /* XXX */ + target->p_rgid == target->p_svgid && + (targetp->p_traceflag & KTRFAC_ROOT) == 0) || + caller->pc_ucred->cr_uid == 0) + return (1); + + return (0); +} + +#endif diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c new file mode 100644 index 000000000000..1a27d598ffcd --- /dev/null +++ b/sys/kern/kern_malloc.c @@ -0,0 +1,254 @@ +/* + * Copyright (c) 1987, 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_malloc.c 7.25 (Berkeley) 5/8/91 + * $Id: kern_malloc.c,v 1.3 1993/10/18 03:46:54 davidg Exp $ + */ + +#include "param.h" +#include "proc.h" +#include "kernel.h" +#include "malloc.h" +#include "vm/vm.h" +#include "vm/vm_kern.h" + +struct kmembuckets bucket[MINBUCKET + 16]; +struct kmemstats kmemstats[M_LAST]; +struct kmemusage *kmemusage; +char *kmembase, *kmemlimit; +char *memname[] = INITKMEMNAMES; + +/* + * Allocate a block of memory + */ +void * +malloc(size, type, flags) + unsigned long size; + int type, flags; +{ + register struct kmembuckets *kbp; + register struct kmemusage *kup; + long indx, npg, alloc, allocsize; + int s; + caddr_t va, cp, savedlist; +#ifdef KMEMSTATS + register struct kmemstats *ksp = &kmemstats[type]; + + if (((unsigned long)type) > M_LAST) + panic("malloc - bogus type"); +#endif + + indx = BUCKETINDX(size); + kbp = &bucket[indx]; + s = splimp(); +#ifdef KMEMSTATS + while (ksp->ks_memuse >= ksp->ks_limit) { + if (flags & M_NOWAIT) { + splx(s); + return ((void *) NULL); + } + if (ksp->ks_limblocks < 65535) + ksp->ks_limblocks++; + tsleep((caddr_t)ksp, PSWP+2, memname[type], 0); + } +#endif + if (kbp->kb_next == NULL) { + if (size > MAXALLOCSAVE) + allocsize = roundup(size, CLBYTES); + else + allocsize = 1 << indx; + npg = clrnd(btoc(allocsize)); + va = (caddr_t) kmem_malloc(kmem_map, (vm_size_t)ctob(npg), + !(flags & M_NOWAIT)); + if (va == NULL) { + splx(s); + return ((void *) NULL); + } +#ifdef KMEMSTATS + kbp->kb_total += kbp->kb_elmpercl; +#endif + kup = btokup(va); + kup->ku_indx = indx; + if (allocsize > MAXALLOCSAVE) { + if (npg > 65535) + panic("malloc: allocation too large"); + kup->ku_pagecnt = npg; +#ifdef KMEMSTATS + ksp->ks_memuse += allocsize; +#endif + goto out; + } +#ifdef KMEMSTATS + kup->ku_freecnt = kbp->kb_elmpercl; + kbp->kb_totalfree += kbp->kb_elmpercl; +#endif + /* + * Just in case we blocked while allocating memory, + * and someone else also allocated memory for this + * bucket, don't assume the list is still empty. + */ + savedlist = kbp->kb_next; + kbp->kb_next = va + (npg * NBPG) - allocsize; + for (cp = kbp->kb_next; cp > va; cp -= allocsize) + *(caddr_t *)cp = cp - allocsize; + *(caddr_t *)cp = savedlist; + } + va = kbp->kb_next; + kbp->kb_next = *(caddr_t *)va; +#ifdef KMEMSTATS + kup = btokup(va); + if (kup->ku_indx != indx) + panic("malloc: wrong bucket"); + if (kup->ku_freecnt == 0) + panic("malloc: lost data"); + kup->ku_freecnt--; + kbp->kb_totalfree--; + ksp->ks_memuse += 1 << indx; +out: + kbp->kb_calls++; + ksp->ks_inuse++; + ksp->ks_calls++; + if (ksp->ks_memuse > ksp->ks_maxused) + ksp->ks_maxused = ksp->ks_memuse; +#else +out: +#endif + splx(s); + return ((void *) va); +} + +#ifdef DIAGNOSTIC +long addrmask[] = { 0x00000000, + 0x00000001, 0x00000003, 0x00000007, 0x0000000f, + 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, + 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, + 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, +}; +#endif /* DIAGNOSTIC */ + +/* + * Free a block of memory allocated by malloc. + */ +void +free(addr, type) + void *addr; + int type; +{ + register struct kmembuckets *kbp; + register struct kmemusage *kup; + long alloc, size; + int s; +#ifdef KMEMSTATS + register struct kmemstats *ksp = &kmemstats[type]; +#endif + + kup = btokup(addr); + size = 1 << kup->ku_indx; +#ifdef DIAGNOSTIC + if (size > NBPG * CLSIZE) + alloc = addrmask[BUCKETINDX(NBPG * CLSIZE)]; + else + alloc = addrmask[kup->ku_indx]; + if (((u_long)addr & alloc) != 0) { + printf("free: unaligned addr 0x%x, size %d, type %d, mask %d\n", + addr, size, type, alloc); + panic("free: unaligned addr"); + } +#endif /* DIAGNOSTIC */ + kbp = &bucket[kup->ku_indx]; + s = splimp(); + if (size > MAXALLOCSAVE) { + kmem_free(kmem_map, (vm_offset_t)addr, ctob(kup->ku_pagecnt)); +#ifdef KMEMSTATS + size = kup->ku_pagecnt << PGSHIFT; + ksp->ks_memuse -= size; + kup->ku_indx = 0; + kup->ku_pagecnt = 0; + if (ksp->ks_memuse + size >= ksp->ks_limit && + ksp->ks_memuse < ksp->ks_limit) + wakeup((caddr_t)ksp); + ksp->ks_inuse--; + kbp->kb_total -= 1; +#endif + splx(s); + return; + } +#ifdef KMEMSTATS + kup->ku_freecnt++; + if (kup->ku_freecnt >= kbp->kb_elmpercl) + if (kup->ku_freecnt > kbp->kb_elmpercl) + panic("free: multiple frees"); + else if (kbp->kb_totalfree > kbp->kb_highwat) + kbp->kb_couldfree++; + kbp->kb_totalfree++; + ksp->ks_memuse -= size; + if (ksp->ks_memuse + size >= ksp->ks_limit && + ksp->ks_memuse < ksp->ks_limit) + wakeup((caddr_t)ksp); + ksp->ks_inuse--; +#endif + *(caddr_t *)addr = kbp->kb_next; + kbp->kb_next = addr; + splx(s); +} + +/* + * Initialize the kernel memory allocator + */ +kmeminit() +{ + register long indx; + int npg; + +#if (MAXALLOCSAVE > MINALLOCSIZE * 32768) + ERROR!_kmeminit:_MAXALLOCSAVE_too_big +#endif +#if (MAXALLOCSAVE < CLBYTES-1) + ERROR!_kmeminit:_MAXALLOCSAVE_too_small +#endif + npg = VM_KMEM_SIZE/ NBPG; + kmemusage = (struct kmemusage *) kmem_alloc(kernel_map, + (vm_size_t)(npg * sizeof(struct kmemusage))); + kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, + (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * NBPG), FALSE); +#ifdef KMEMSTATS + for (indx = 0; indx < MINBUCKET + 16; indx++) { + if (1 << indx >= CLBYTES) + bucket[indx].kb_elmpercl = 1; + else + bucket[indx].kb_elmpercl = CLBYTES / (1 << indx); + bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl; + } + for (indx = 0; indx < M_LAST; indx++) + kmemstats[indx].ks_limit = npg * NBPG * 6 / 10; +#endif +} diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c new file mode 100644 index 000000000000..3dc511da238a --- /dev/null +++ b/sys/kern/kern_proc.c @@ -0,0 +1,321 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_proc.c 7.16 (Berkeley) 6/28/91 + * $Id: kern_proc.c,v 1.2 1993/10/16 15:24:23 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "kernel.h" +#include "proc.h" +#include "buf.h" +#include "acct.h" +#include "wait.h" +#include "file.h" +#include "../ufs/quota.h" +#include "uio.h" +#include "malloc.h" +#include "mbuf.h" +#include "ioctl.h" +#include "tty.h" + +/* + * Is p an inferior of the current process? + */ +inferior(p) + register struct proc *p; +{ + + for (; p != curproc; p = p->p_pptr) + if (p->p_pid == 0) + return (0); + return (1); +} + +/* + * Locate a process by number + */ +struct proc * +pfind(pid) + register pid; +{ + register struct proc *p = pidhash[PIDHASH(pid)]; + + for (; p; p = p->p_hash) + if (p->p_pid == pid) + return (p); + return ((struct proc *)0); +} + +/* + * Locate a process group by number + */ +struct pgrp * +pgfind(pgid) + register pid_t pgid; +{ + register struct pgrp *pgrp = pgrphash[PIDHASH(pgid)]; + + for (; pgrp; pgrp = pgrp->pg_hforw) + if (pgrp->pg_id == pgid) + return (pgrp); + return ((struct pgrp *)0); +} + +/* + * Move p to a new or existing process group (and session) + */ +enterpgrp(p, pgid, mksess) + register struct proc *p; + pid_t pgid; +{ + register struct pgrp *pgrp = pgfind(pgid); + register struct proc **pp; + register struct proc *cp; + int n; + +#ifdef DIAGNOSTIC + if (pgrp && mksess) /* firewalls */ + panic("enterpgrp: setsid into non-empty pgrp"); + if (SESS_LEADER(p)) + panic("enterpgrp: session leader attempted setpgrp"); +#endif + if (pgrp == NULL) { + /* + * new process group + */ +#ifdef DIAGNOSTIC + if (p->p_pid != pgid) + panic("enterpgrp: new pgrp and pid != pgid"); +#endif + MALLOC(pgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP, + M_WAITOK); + if (mksess) { + register struct session *sess; + + /* + * new session + */ + MALLOC(sess, struct session *, sizeof(struct session), + M_SESSION, M_WAITOK); + sess->s_leader = p; + sess->s_count = 1; + sess->s_ttyvp = NULL; + sess->s_ttyp = NULL; + bcopy(p->p_session->s_login, sess->s_login, + sizeof(sess->s_login)); + p->p_flag &= ~SCTTY; + pgrp->pg_session = sess; +#ifdef DIAGNOSTIC + if (p != curproc) + panic("enterpgrp: mksession and p != curproc"); +#endif + } else { + pgrp->pg_session = p->p_session; + pgrp->pg_session->s_count++; + } + pgrp->pg_id = pgid; + pgrp->pg_hforw = pgrphash[n = PIDHASH(pgid)]; + pgrphash[n] = pgrp; + pgrp->pg_jobc = 0; + pgrp->pg_mem = NULL; + } else if (pgrp == p->p_pgrp) + return; + + /* + * Adjust eligibility of affected pgrps to participate in job control. + * Increment eligibility counts before decrementing, otherwise we + * could reach 0 spuriously during the first call. + */ + fixjobc(p, pgrp, 1); + fixjobc(p, p->p_pgrp, 0); + + /* + * unlink p from old process group + */ + for (pp = &p->p_pgrp->pg_mem; *pp; pp = &(*pp)->p_pgrpnxt) + if (*pp == p) { + *pp = p->p_pgrpnxt; + goto done; + } + panic("enterpgrp: can't find p on old pgrp"); +done: + /* + * delete old if empty + */ + if (p->p_pgrp->pg_mem == 0) + pgdelete(p->p_pgrp); + /* + * link into new one + */ + p->p_pgrp = pgrp; + p->p_pgrpnxt = pgrp->pg_mem; + pgrp->pg_mem = p; +} + +/* + * remove process from process group + */ +leavepgrp(p) + register struct proc *p; +{ + register struct proc **pp = &p->p_pgrp->pg_mem; + + for (; *pp; pp = &(*pp)->p_pgrpnxt) + if (*pp == p) { + *pp = p->p_pgrpnxt; + goto done; + } + panic("leavepgrp: can't find p in pgrp"); +done: + if (!p->p_pgrp->pg_mem) + pgdelete(p->p_pgrp); + p->p_pgrp = 0; +} + +/* + * delete a process group + */ +pgdelete(pgrp) + register struct pgrp *pgrp; +{ + register struct pgrp **pgp = &pgrphash[PIDHASH(pgrp->pg_id)]; + + if (pgrp->pg_session->s_ttyp != NULL && + pgrp->pg_session->s_ttyp->t_pgrp == pgrp) + pgrp->pg_session->s_ttyp->t_pgrp = NULL; + for (; *pgp; pgp = &(*pgp)->pg_hforw) + if (*pgp == pgrp) { + *pgp = pgrp->pg_hforw; + goto done; + } + panic("pgdelete: can't find pgrp on hash chain"); +done: + if (--pgrp->pg_session->s_count == 0) + FREE(pgrp->pg_session, M_SESSION); + FREE(pgrp, M_PGRP); +} + +static orphanpg(); + +/* + * Adjust pgrp jobc counters when specified process changes process group. + * We count the number of processes in each process group that "qualify" + * the group for terminal job control (those with a parent in a different + * process group of the same session). If that count reaches zero, the + * process group becomes orphaned. Check both the specified process' + * process group and that of its children. + * entering == 0 => p is leaving specified group. + * entering == 1 => p is entering specified group. + */ +fixjobc(p, pgrp, entering) + register struct proc *p; + register struct pgrp *pgrp; + int entering; +{ + register struct pgrp *hispgrp; + register struct session *mysession = pgrp->pg_session; + + /* + * Check p's parent to see whether p qualifies its own process + * group; if so, adjust count for p's process group. + */ + if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && + hispgrp->pg_session == mysession) + if (entering) + pgrp->pg_jobc++; + else if (--pgrp->pg_jobc == 0) + orphanpg(pgrp); + + /* + * Check this process' children to see whether they qualify + * their process groups; if so, adjust counts for children's + * process groups. + */ + for (p = p->p_cptr; p; p = p->p_osptr) + if ((hispgrp = p->p_pgrp) != pgrp && + hispgrp->pg_session == mysession && + p->p_stat != SZOMB) + if (entering) + hispgrp->pg_jobc++; + else if (--hispgrp->pg_jobc == 0) + orphanpg(hispgrp); +} + +/* + * A process group has become orphaned; + * if there are any stopped processes in the group, + * hang-up all process in that group. + */ +static +orphanpg(pg) + struct pgrp *pg; +{ + register struct proc *p; + + for (p = pg->pg_mem; p; p = p->p_pgrpnxt) { + if (p->p_stat == SSTOP) { + for (p = pg->pg_mem; p; p = p->p_pgrpnxt) { + psignal(p, SIGHUP); + psignal(p, SIGCONT); + } + return; + } + } +} + +#ifdef debug +/* DEBUG */ +pgrpdump() +{ + register struct pgrp *pgrp; + register struct proc *p; + register i; + + for (i=0; i<PIDHSZ; i++) { + if (pgrphash[i]) { + printf("\tindx %d\n", i); + for (pgrp=pgrphash[i]; pgrp; pgrp=pgrp->pg_hforw) { + printf("\tpgrp %x, pgid %d, sess %x, sesscnt %d, mem %x\n", + pgrp, pgrp->pg_id, pgrp->pg_session, + pgrp->pg_session->s_count, pgrp->pg_mem); + for (p=pgrp->pg_mem; p; p=p->p_pgrpnxt) { + printf("\t\tpid %d addr %x pgrp %x\n", + p->p_pid, p, p->p_pgrp); + } + } + + } + } +} +#endif /* debug */ diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c new file mode 100644 index 000000000000..cc76c7c6f406 --- /dev/null +++ b/sys/kern/kern_prot.c @@ -0,0 +1,611 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1990, 1991 Regents of the University + * of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_prot.c 7.21 (Berkeley) 5/3/91 + * $Id: kern_prot.c,v 1.3 1993/10/16 15:24:24 rgrimes Exp $ + */ + +/* + * System calls related to processes and protection + */ + +#include "param.h" +#include "acct.h" +#include "systm.h" +#include "ucred.h" +#include "proc.h" +#include "timeb.h" +#include "times.h" +#include "malloc.h" + +/* ARGSUSED */ +getpid(p, uap, retval) + struct proc *p; + void *uap; + int *retval; +{ + + *retval = p->p_pid; +#ifdef COMPAT_43 + retval[1] = p->p_pptr->p_pid; +#endif + return (0); +} + +/* ARGSUSED */ +getppid(p, uap, retval) + struct proc *p; + void *uap; + int *retval; +{ + + *retval = p->p_pptr->p_pid; + return (0); +} + +/* Get process group ID; note that POSIX getpgrp takes no parameter */ +getpgrp(p, uap, retval) + struct proc *p; + void *uap; + int *retval; +{ + + *retval = p->p_pgrp->pg_id; + return (0); +} + +/* ARGSUSED */ +getuid(p, uap, retval) + struct proc *p; + void *uap; + int *retval; +{ + + *retval = p->p_cred->p_ruid; +#ifdef COMPAT_43 + retval[1] = p->p_ucred->cr_uid; +#endif + return (0); +} + +/* ARGSUSED */ +geteuid(p, uap, retval) + struct proc *p; + void *uap; + int *retval; +{ + + *retval = p->p_ucred->cr_uid; + return (0); +} + +/* ARGSUSED */ +getgid(p, uap, retval) + struct proc *p; + void *uap; + int *retval; +{ + + *retval = p->p_cred->p_rgid; +#ifdef COMPAT_43 + retval[1] = p->p_ucred->cr_groups[0]; +#endif + return (0); +} + +/* + * Get effective group ID. The "egid" is groups[0], and could be obtained + * via getgroups. This syscall exists because it is somewhat painful to do + * correctly in a library function. + */ +/* ARGSUSED */ +getegid(p, uap, retval) + struct proc *p; + void *uap; + int *retval; +{ + + *retval = p->p_ucred->cr_groups[0]; + return (0); +} + +struct getgroups_args { + u_int gidsetsize; + int *gidset; /* XXX not yet POSIX */ +}; + +getgroups(p, uap, retval) + struct proc *p; + register struct getgroups_args *uap; + int *retval; +{ + register struct pcred *pc = p->p_cred; + register gid_t *gp; + register int *lp; + register u_int ngrp; + int groups[NGROUPS]; + int error; + + if ((ngrp = uap->gidsetsize) == 0) { + *retval = pc->pc_ucred->cr_ngroups; + return (0); + } + if (ngrp < pc->pc_ucred->cr_ngroups) + return (EINVAL); + ngrp = pc->pc_ucred->cr_ngroups; + for (gp = pc->pc_ucred->cr_groups, lp = groups; lp < &groups[ngrp]; ) + *lp++ = *gp++; + if (error = copyout((caddr_t)groups, (caddr_t)uap->gidset, + ngrp * sizeof (groups[0]))) + return (error); + *retval = ngrp; + return (0); +} + +/* ARGSUSED */ +setsid(p, uap, retval) + register struct proc *p; + void *uap; + int *retval; +{ + + if (p->p_pgid == p->p_pid || pgfind(p->p_pid)) { + return (EPERM); + } else { + enterpgrp(p, p->p_pid, 1); + *retval = p->p_pid; + return (0); + } +} + +/* + * set process group (setpgid/old setpgrp) + * + * caller does setpgid(targpid, targpgid) + * + * pid must be caller or child of caller (ESRCH) + * if a child + * pid must be in same session (EPERM) + * pid can't have done an exec (EACCES) + * if pgid != pid + * there must exist some pid in same session having pgid (EPERM) + * pid must not be session leader (EPERM) + */ + +struct setpgid_args { + int pid; /* target process id */ + int pgid; /* target pgrp id */ +}; + +/* ARGSUSED */ +setpgid(curp, uap, retval) + struct proc *curp; + register struct setpgid_args *uap; + int *retval; +{ + register struct proc *targp; /* target process */ + register struct pgrp *pgrp; /* target pgrp */ + + if (uap->pid != 0 && uap->pid != curp->p_pid) { + if ((targp = pfind(uap->pid)) == 0 || !inferior(targp)) + return (ESRCH); + if (targp->p_session != curp->p_session) + return (EPERM); + if (targp->p_flag&SEXEC) + return (EACCES); + } else + targp = curp; + if (SESS_LEADER(targp)) + return (EPERM); + if (uap->pgid == 0) + uap->pgid = targp->p_pid; + else if (uap->pgid != targp->p_pid) + if ((pgrp = pgfind(uap->pgid)) == 0 || + pgrp->pg_session != curp->p_session) + return (EPERM); + enterpgrp(targp, uap->pgid, 0); + return (0); +} + +struct setuid_args { + int uid; +}; + +/* ARGSUSED */ +setuid(p, uap, retval) + struct proc *p; + struct setuid_args *uap; + int *retval; +{ + register struct pcred *pc = p->p_cred; + register uid_t uid; + int error; + + uid = uap->uid; + if (uid != pc->p_ruid && + (error = suser(pc->pc_ucred, &p->p_acflag))) + return (error); + /* + * Everything's okay, do it. Copy credentials so other references do + * not see our changes. + */ + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_uid = uid; + pc->p_ruid = uid; + pc->p_svuid = uid; + return (0); +} + +struct seteuid_args { + int euid; +}; + +/* ARGSUSED */ +seteuid(p, uap, retval) + struct proc *p; + struct seteuid_args *uap; + int *retval; +{ + register struct pcred *pc = p->p_cred; + register uid_t euid; + int error; + + euid = uap->euid; + if (euid != pc->p_ruid && euid != pc->p_svuid && + (error = suser(pc->pc_ucred, &p->p_acflag))) + return (error); + /* + * Everything's okay, do it. Copy credentials so other references do + * not see our changes. + */ + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_uid = euid; + return (0); +} + +struct setgid_args { + int gid; +}; + +/* ARGSUSED */ +setgid(p, uap, retval) + struct proc *p; + struct setgid_args *uap; + int *retval; +{ + register struct pcred *pc = p->p_cred; + register gid_t gid; + int error; + + gid = uap->gid; + if (gid != pc->p_rgid && (error = suser(pc->pc_ucred, &p->p_acflag))) + return (error); + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_groups[0] = gid; + pc->p_rgid = gid; + pc->p_svgid = gid; /* ??? */ + return (0); +} + +struct setegid_args { + int egid; +}; + +/* ARGSUSED */ +setegid(p, uap, retval) + struct proc *p; + struct setegid_args *uap; + int *retval; +{ + register struct pcred *pc = p->p_cred; + register gid_t egid; + int error; + + egid = uap->egid; + if (egid != pc->p_rgid && egid != pc->p_svgid && + (error = suser(pc->pc_ucred, &p->p_acflag))) + return (error); + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_groups[0] = egid; + return (0); +} + +#ifdef COMPAT_43 + +struct osetreuid_args { + int ruid; + int euid; +}; + +/* ARGSUSED */ +osetreuid(p, uap, retval) + register struct proc *p; + struct osetreuid_args *uap; + int *retval; +{ + register struct pcred *pc = p->p_cred; + register uid_t ruid, euid; + int error; + + if (uap->ruid == -1) + ruid = pc->p_ruid; + else + ruid = uap->ruid; + /* + * Allow setting real uid to previous effective, for swapping real and + * effective. This should be: + * + * if (ruid != pc->p_ruid && + * (error = suser(pc->pc_ucred, &p->p_acflag))) + */ + if (ruid != pc->p_ruid && ruid != pc->pc_ucred->cr_uid /* XXX */ && + (error = suser(pc->pc_ucred, &p->p_acflag))) + return (error); + if (uap->euid == -1) + euid = pc->pc_ucred->cr_uid; + else + euid = uap->euid; + if (euid != pc->pc_ucred->cr_uid && euid != pc->p_ruid && + euid != pc->p_svuid && (error = suser(pc->pc_ucred, &p->p_acflag))) + return (error); + /* + * Everything's okay, do it. Copy credentials so other references do + * not see our changes. + */ + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_uid = euid; + pc->p_ruid = ruid; + return (0); +} + +struct osetregid_args { + int rgid; + int egid; +}; + +/* ARGSUSED */ +osetregid(p, uap, retval) + register struct proc *p; + struct osetregid_args *uap; + int *retval; +{ + register struct pcred *pc = p->p_cred; + register gid_t rgid, egid; + int error; + + if (uap->rgid == -1) + rgid = pc->p_rgid; + else + rgid = uap->rgid; + /* + * Allow setting real gid to previous effective, for swapping real and + * effective. This didn't really work correctly in 4.[23], but is + * preserved so old stuff doesn't fail. This should be: + * + * if (rgid != pc->p_rgid && + * (error = suser(pc->pc_ucred, &p->p_acflag))) + */ + if (rgid != pc->p_rgid && rgid != pc->pc_ucred->cr_groups[0] /* XXX */ && + (error = suser(pc->pc_ucred, &p->p_acflag))) + return (error); + if (uap->egid == -1) + egid = pc->pc_ucred->cr_groups[0]; + else + egid = uap->egid; + if (egid != pc->pc_ucred->cr_groups[0] && egid != pc->p_rgid && + egid != pc->p_svgid && (error = suser(pc->pc_ucred, &p->p_acflag))) + return (error); + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_groups[0] = egid; + pc->p_rgid = rgid; + return (0); +} +#endif + +struct setgroups_args { + u_int gidsetsize; + int *gidset; +}; + +/* ARGSUSED */ +setgroups(p, uap, retval) + struct proc *p; + struct setgroups_args *uap; + int *retval; +{ + register struct pcred *pc = p->p_cred; + register gid_t *gp; + register u_int ngrp; + register int *lp; + int error, groups[NGROUPS]; + + if (error = suser(pc->pc_ucred, &p->p_acflag)) + return (error); + if ((ngrp = uap->gidsetsize) > NGROUPS) + return (EINVAL); + if (error = copyin((caddr_t)uap->gidset, (caddr_t)groups, + ngrp * sizeof (groups[0]))) + return (error); + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_ngroups = ngrp; + /* convert from int's to gid_t's */ + for (gp = pc->pc_ucred->cr_groups, lp = groups; ngrp--; ) + *gp++ = *lp++; + return (0); +} + +/* + * Check if gid is a member of the group set. + */ +groupmember(gid, cred) + gid_t gid; + register struct ucred *cred; +{ + register gid_t *gp; + gid_t *egp; + + egp = &(cred->cr_groups[cred->cr_ngroups]); + for (gp = cred->cr_groups; gp < egp; gp++) + if (*gp == gid) + return (1); + return (0); +} + +/* + * Test whether the specified credentials imply "super-user" + * privilege; if so, and we have accounting info, set the flag + * indicating use of super-powers. + * Returns 0 or error. + */ +suser(cred, acflag) + struct ucred *cred; + u_short *acflag; +{ + if (cred->cr_uid == 0) { + if (acflag) + *acflag |= ASU; + return (0); + } + return (EPERM); +} + +/* + * Allocate a zeroed cred structure. + */ +struct ucred * +crget() +{ + register struct ucred *cr; + + MALLOC(cr, struct ucred *, sizeof(*cr), M_CRED, M_WAITOK); + bzero((caddr_t)cr, sizeof(*cr)); + cr->cr_ref = 1; + return (cr); +} + +/* + * Free a cred structure. + * Throws away space when ref count gets to 0. + */ +crfree(cr) + struct ucred *cr; +{ + int s = splimp(); /* ??? */ + + if (--cr->cr_ref != 0) { + (void) splx(s); + return; + } + FREE((caddr_t)cr, M_CRED); + (void) splx(s); +} + +/* + * Copy cred structure to a new one and free the old one. + */ +struct ucred * +crcopy(cr) + struct ucred *cr; +{ + struct ucred *newcr; + + if (cr->cr_ref == 1) + return (cr); + newcr = crget(); + *newcr = *cr; + crfree(cr); + newcr->cr_ref = 1; + return (newcr); +} + +/* + * Dup cred struct to a new held one. + */ +struct ucred * +crdup(cr) + struct ucred *cr; +{ + struct ucred *newcr; + + newcr = crget(); + *newcr = *cr; + newcr->cr_ref = 1; + return (newcr); +} + +/* + * Get login name, if available. + */ + +struct getlogin_args { + char *namebuf; + u_int namelen; +}; + +/* ARGSUSED */ +getlogin(p, uap, retval) + struct proc *p; + struct getlogin_args *uap; + int *retval; +{ + + if (uap->namelen > sizeof (p->p_pgrp->pg_session->s_login)) + uap->namelen = sizeof (p->p_pgrp->pg_session->s_login); + return (copyout((caddr_t) p->p_pgrp->pg_session->s_login, + (caddr_t) uap->namebuf, uap->namelen)); +} + +/* + * Set login name. + */ + +struct setlogin_args { + char *namebuf; +}; + +/* ARGSUSED */ +setlogin(p, uap, retval) + struct proc *p; + struct setlogin_args *uap; + int *retval; +{ + int error; + + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + error = copyinstr((caddr_t) uap->namebuf, + (caddr_t) p->p_pgrp->pg_session->s_login, + sizeof (p->p_pgrp->pg_session->s_login) - 1, (u_int *)0); + if (error == ENAMETOOLONG) + error = EINVAL; + return (error); +} diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c new file mode 100644 index 000000000000..e2f42e706c27 --- /dev/null +++ b/sys/kern/kern_resource.c @@ -0,0 +1,358 @@ +/*- + * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_resource.c 7.13 (Berkeley) 5/9/91 + * $Id: kern_resource.c,v 1.6 1993/10/19 01:02:16 nate Exp $ + */ + +#include "param.h" +#include "resourcevar.h" +#include "malloc.h" +#include "proc.h" + +#include "vm/vm.h" + +/* + * Resource controls and accounting. + */ + +struct getpriority_args { + int which; + int who; +}; + +getpriority(curp, uap, retval) + struct proc *curp; + register struct getpriority_args *uap; + int *retval; +{ + register struct proc *p; + register int low = PRIO_MAX + 1; + + switch (uap->which) { + + case PRIO_PROCESS: + if (uap->who == 0) + p = curp; + else + p = pfind(uap->who); + if (p == 0) + break; + low = p->p_nice; + break; + + case PRIO_PGRP: { + register struct pgrp *pg; + + if (uap->who == 0) + pg = curp->p_pgrp; + else if ((pg = pgfind(uap->who)) == NULL) + break; + for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt) { + if (p->p_nice < low) + low = p->p_nice; + } + break; + } + + case PRIO_USER: + if (uap->who == 0) + uap->who = curp->p_ucred->cr_uid; + for (p = allproc; p != NULL; p = p->p_nxt) { + if (p->p_ucred->cr_uid == uap->who && + p->p_nice < low) + low = p->p_nice; + } + break; + + default: + return (EINVAL); + } + if (low == PRIO_MAX + 1) + return (ESRCH); + *retval = low; + return (0); +} + +struct setpriority_args { + int which; + int who; + int prio; +}; + +/* ARGSUSED */ +setpriority(curp, uap, retval) + struct proc *curp; + register struct setpriority_args *uap; + int *retval; +{ + register struct proc *p; + int found = 0, error = 0; + + switch (uap->which) { + + case PRIO_PROCESS: + if (uap->who == 0) + p = curp; + else + p = pfind(uap->who); + if (p == 0) + break; + error = donice(curp, p, uap->prio); + found++; + break; + + case PRIO_PGRP: { + register struct pgrp *pg; + + if (uap->who == 0) + pg = curp->p_pgrp; + else if ((pg = pgfind(uap->who)) == NULL) + break; + for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt) { + error = donice(curp, p, uap->prio); + found++; + } + break; + } + + case PRIO_USER: + if (uap->who == 0) + uap->who = curp->p_ucred->cr_uid; + for (p = allproc; p != NULL; p = p->p_nxt) + if (p->p_ucred->cr_uid == uap->who) { + error = donice(curp, p, uap->prio); + found++; + } + break; + + default: + return (EINVAL); + } + if (found == 0) + return (ESRCH); + return (0); +} + +donice(curp, chgp, n) + register struct proc *curp, *chgp; + register int n; +{ + register struct pcred *pcred = curp->p_cred; + + if (pcred->pc_ucred->cr_uid && pcred->p_ruid && + pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid && + pcred->p_ruid != chgp->p_ucred->cr_uid) + return (EPERM); + if (n > PRIO_MAX) + n = PRIO_MAX; + if (n < PRIO_MIN) + n = PRIO_MIN; + if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag)) + return (EACCES); + chgp->p_nice = n; + (void) setpri(chgp); + return (0); +} + +struct setrlimit_args { + u_int which; + struct rlimit *lim; +}; + +/* ARGSUSED */ +setrlimit(p, uap, retval) + struct proc *p; + register struct setrlimit_args *uap; + int *retval; +{ + struct rlimit alim; + register struct rlimit *alimp; + extern int maxfdescs; + int error; + + if (uap->which >= RLIM_NLIMITS) + return (EINVAL); + alimp = &p->p_rlimit[uap->which]; + if (error = + copyin((caddr_t)uap->lim, (caddr_t)&alim, sizeof (struct rlimit))) + return (error); + if (alim.rlim_cur > alimp->rlim_max || alim.rlim_max > alimp->rlim_max) + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + if (p->p_limit->p_refcnt > 1 && + (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { + p->p_limit->p_refcnt--; + p->p_limit = limcopy(p->p_limit); + } + + switch (uap->which) { + + case RLIMIT_DATA: + if (alim.rlim_cur > MAXDSIZ) + alim.rlim_cur = MAXDSIZ; + if (alim.rlim_max > MAXDSIZ) + alim.rlim_max = MAXDSIZ; + break; + + case RLIMIT_OFILE: + if (alim.rlim_cur > maxfdescs) + alim.rlim_cur = maxfdescs; + if (alim.rlim_max > maxfdescs) + alim.rlim_max = maxfdescs; + break; + case RLIMIT_STACK: + if (alim.rlim_cur > MAXSSIZ) + alim.rlim_cur = MAXSSIZ; + if (alim.rlim_max > MAXSSIZ) + alim.rlim_max = MAXSSIZ; + /* + * Stack is allocated to the max at exec time with only + * "rlim_cur" bytes accessible. If stack limit is going + * up make more accessible, if going down make inaccessible. + */ + if (alim.rlim_cur != alimp->rlim_cur) { + vm_offset_t addr; + vm_size_t size; + vm_prot_t prot; + struct vmspace *vm = p->p_vmspace; + + addr = (unsigned) vm->vm_maxsaddr + MAXSSIZ; + if (alim.rlim_cur > alimp->rlim_cur) { + prot = VM_PROT_ALL; + size = alim.rlim_cur - alimp->rlim_cur; + addr -= alim.rlim_cur; + } else { + prot = VM_PROT_NONE; + size = alimp->rlim_cur - alim.rlim_cur; + addr -= alimp->rlim_cur; + } + addr = trunc_page(addr); + size = round_page(size); + (void) vm_map_protect(&p->p_vmspace->vm_map, + addr, addr+size, prot, FALSE); + } + break; + } + p->p_rlimit[uap->which] = alim; + return (0); +} + +struct getrlimit_args { + u_int which; + struct rlimit *rlp; +}; + +/* ARGSUSED */ +getrlimit(p, uap, retval) + struct proc *p; + register struct getrlimit_args *uap; + int *retval; +{ + + if (uap->which >= RLIM_NLIMITS) + return (EINVAL); + return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp, + sizeof (struct rlimit))); +} + +struct getrusage_args { + int who; + struct rusage *rusage; +}; + +/* ARGSUSED */ +getrusage(p, uap, retval) + register struct proc *p; + register struct getrusage_args *uap; + int *retval; +{ + register struct rusage *rup; + + switch (uap->who) { + + case RUSAGE_SELF: { + int s; + + rup = &p->p_stats->p_ru; + s = splclock(); + rup->ru_stime = p->p_stime; + rup->ru_utime = p->p_utime; + splx(s); + break; + } + + case RUSAGE_CHILDREN: + rup = &p->p_stats->p_cru; + break; + + default: + return (EINVAL); + } + return (copyout((caddr_t)rup, (caddr_t)uap->rusage, + sizeof (struct rusage))); +} + +ruadd(ru, ru2) + register struct rusage *ru, *ru2; +{ + register long *ip, *ip2; + register int i; + + timevaladd(&ru->ru_utime, &ru2->ru_utime); + timevaladd(&ru->ru_stime, &ru2->ru_stime); + if (ru->ru_maxrss < ru2->ru_maxrss) + ru->ru_maxrss = ru2->ru_maxrss; + ip = &ru->ru_first; ip2 = &ru2->ru_first; + for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) /* Yuval fix */ + *ip++ += *ip2++; +} + +/* + * Make a copy of the plimit structure. + * We share these structures copy-on-write after fork, + * and copy when a limit is changed. + */ +struct plimit * +limcopy(lim) + struct plimit *lim; +{ + register struct plimit *copy; + + MALLOC(copy, struct plimit *, sizeof(struct plimit), + M_SUBPROC, M_WAITOK); + bcopy(lim->pl_rlimit, copy->pl_rlimit, + sizeof(struct rlimit) * RLIM_NLIMITS); + copy->p_lflags = 0; + copy->p_refcnt = 1; + return (copy); +} diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c new file mode 100644 index 000000000000..e0e40b77809d --- /dev/null +++ b/sys/kern/kern_sig.c @@ -0,0 +1,1142 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_sig.c 7.35 (Berkeley) 6/28/91 + * $Id: kern_sig.c,v 1.5 1993/10/16 15:24:27 rgrimes Exp $ + */ + +#define SIGPROP /* include signal properties table */ +#include "param.h" +#include "signalvar.h" +#include "resourcevar.h" +#include "namei.h" +#include "vnode.h" +#include "mount.h" +#include "filedesc.h" +#include "proc.h" +#include "systm.h" +#include "timeb.h" +#include "times.h" +#include "buf.h" +#include "acct.h" +#include "file.h" +#include "kernel.h" +#include "wait.h" +#include "ktrace.h" + +#include "machine/cpu.h" + +#include "vm/vm.h" +#include "kinfo_proc.h" +#include "user.h" /* for coredump */ + +/* + * Can process p, with pcred pc, send the signal signo to process q? + */ +#define CANSIGNAL(p, pc, q, signo) \ + ((pc)->pc_ucred->cr_uid == 0 || \ + (pc)->p_ruid == (q)->p_cred->p_ruid || \ + (pc)->pc_ucred->cr_uid == (q)->p_cred->p_ruid || \ + (pc)->p_ruid == (q)->p_ucred->cr_uid || \ + (pc)->pc_ucred->cr_uid == (q)->p_ucred->cr_uid || \ + ((signo) == SIGCONT && (q)->p_session == (p)->p_session)) + +struct sigaction_args { + int signo; + struct sigaction *nsa; + struct sigaction *osa; +}; + +/* ARGSUSED */ +sigaction(p, uap, retval) + struct proc *p; + register struct sigaction_args *uap; + int *retval; +{ + struct sigaction vec; + register struct sigaction *sa; + register struct sigacts *ps = p->p_sigacts; + register int sig; + int bit, error; + + sig = uap->signo; + if (sig <= 0 || sig >= NSIG || sig == SIGKILL || sig == SIGSTOP) + return (EINVAL); + sa = &vec; + if (uap->osa) { + sa->sa_handler = ps->ps_sigact[sig]; + sa->sa_mask = ps->ps_catchmask[sig]; + bit = sigmask(sig); + sa->sa_flags = 0; + if ((ps->ps_sigonstack & bit) != 0) + sa->sa_flags |= SA_ONSTACK; + if ((ps->ps_sigintr & bit) == 0) + sa->sa_flags |= SA_RESTART; + if (p->p_flag & SNOCLDSTOP) + sa->sa_flags |= SA_NOCLDSTOP; + if (error = copyout((caddr_t)sa, (caddr_t)uap->osa, + sizeof (vec))) + return (error); + } + if (uap->nsa) { + if (error = copyin((caddr_t)uap->nsa, (caddr_t)sa, + sizeof (vec))) + return (error); + setsigvec(p, sig, sa); + } + return (0); +} + +setsigvec(p, sig, sa) + register struct proc *p; + int sig; + register struct sigaction *sa; +{ + register struct sigacts *ps = p->p_sigacts; + register int bit; + + bit = sigmask(sig); + /* + * Change setting atomically. + */ + (void) splhigh(); + ps->ps_sigact[sig] = sa->sa_handler; + ps->ps_catchmask[sig] = sa->sa_mask &~ sigcantmask; + if ((sa->sa_flags & SA_RESTART) == 0) + ps->ps_sigintr |= bit; + else + ps->ps_sigintr &= ~bit; + if (sa->sa_flags & SA_ONSTACK) + ps->ps_sigonstack |= bit; + else + ps->ps_sigonstack &= ~bit; + if (sig == SIGCHLD) { + if (sa->sa_flags & SA_NOCLDSTOP) + p->p_flag |= SNOCLDSTOP; + else + p->p_flag &= ~SNOCLDSTOP; + } + /* + * Set bit in p_sigignore for signals that are set to SIG_IGN, + * and for signals set to SIG_DFL where the default is to ignore. + * However, don't put SIGCONT in p_sigignore, + * as we have to restart the process. + */ + if (sa->sa_handler == SIG_IGN || + (sigprop[sig] & SA_IGNORE && sa->sa_handler == SIG_DFL)) { + p->p_sig &= ~bit; /* never to be seen again */ + if (sig != SIGCONT) + p->p_sigignore |= bit; /* easier in psignal */ + p->p_sigcatch &= ~bit; + } else { + p->p_sigignore &= ~bit; + if (sa->sa_handler == SIG_DFL) + p->p_sigcatch &= ~bit; + else + p->p_sigcatch |= bit; + } + (void) spl0(); +} + +/* + * Initialize signal state for process 0; + * set to ignore signals that are ignored by default. + */ +void +siginit(p) + struct proc *p; +{ + register int i; + + for (i = 0; i < NSIG; i++) + if (sigprop[i] & SA_IGNORE && i != SIGCONT) + p->p_sigignore |= sigmask(i); +} + +/* + * Reset signals for an exec of the specified process. + */ +void +execsigs(p) + register struct proc *p; +{ + register struct sigacts *ps = p->p_sigacts; + register int nc, mask; + + /* + * Reset caught signals. Held signals remain held + * through p_sigmask (unless they were caught, + * and are now ignored by default). + */ + while (p->p_sigcatch) { + nc = ffs((long)p->p_sigcatch); + mask = sigmask(nc); + p->p_sigcatch &= ~mask; + if (sigprop[nc] & SA_IGNORE) { + if (nc != SIGCONT) + p->p_sigignore |= mask; + p->p_sig &= ~mask; + } + ps->ps_sigact[nc] = SIG_DFL; + } + /* + * Reset stack state to the user stack. + * Clear set of signals caught on the signal stack. + */ + ps->ps_onstack = 0; + ps->ps_sigsp = 0; + ps->ps_sigonstack = 0; +} + +/* + * Manipulate signal mask. + * Note that we receive new mask, not pointer, + * and return old mask as return value; + * the library stub does the rest. + */ + +struct sigprocmask_args { + int how; + sigset_t mask; +}; + +sigprocmask(p, uap, retval) + register struct proc *p; + struct sigprocmask_args *uap; + int *retval; +{ + int error = 0; + + *retval = p->p_sigmask; + (void) splhigh(); + + switch (uap->how) { + case SIG_BLOCK: + p->p_sigmask |= uap->mask &~ sigcantmask; + break; + + case SIG_UNBLOCK: + p->p_sigmask &= ~uap->mask; + break; + + case SIG_SETMASK: + p->p_sigmask = uap->mask &~ sigcantmask; + break; + + default: + error = EINVAL; + break; + } + (void) spl0(); + return (error); +} + +/* ARGSUSED */ +sigpending(p, uap, retval) + struct proc *p; + void *uap; + int *retval; +{ + + *retval = p->p_sig; + return (0); +} + +#ifdef COMPAT_43 +/* + * Generalized interface signal handler, 4.3-compatible. + */ + +struct osigvec_args { + int signo; + struct sigvec *nsv; + struct sigvec *osv; +}; + +/* ARGSUSED */ +osigvec(p, uap, retval) + struct proc *p; + register struct osigvec_args *uap; + int *retval; +{ + struct sigvec vec; + register struct sigacts *ps = p->p_sigacts; + register struct sigvec *sv; + register int sig; + int bit, error; + + sig = uap->signo; + if (sig <= 0 || sig >= NSIG || sig == SIGKILL || sig == SIGSTOP) + return (EINVAL); + sv = &vec; + if (uap->osv) { + *(sig_t *)&sv->sv_handler = ps->ps_sigact[sig]; + sv->sv_mask = ps->ps_catchmask[sig]; + bit = sigmask(sig); + sv->sv_flags = 0; + if ((ps->ps_sigonstack & bit) != 0) + sv->sv_flags |= SV_ONSTACK; + if ((ps->ps_sigintr & bit) != 0) + sv->sv_flags |= SV_INTERRUPT; + if (p->p_flag & SNOCLDSTOP) + sv->sv_flags |= SA_NOCLDSTOP; + if (error = copyout((caddr_t)sv, (caddr_t)uap->osv, + sizeof (vec))) + return (error); + } + if (uap->nsv) { + if (error = copyin((caddr_t)uap->nsv, (caddr_t)sv, + sizeof (vec))) + return (error); + sv->sv_flags ^= SA_RESTART; /* opposite of SV_INTERRUPT */ + setsigvec(p, sig, (struct sigaction *)sv); + } + return (0); +} + +struct osigblock_args { + int mask; +}; + +osigblock(p, uap, retval) + register struct proc *p; + struct osigblock_args *uap; + int *retval; +{ + + (void) splhigh(); + *retval = p->p_sigmask; + p->p_sigmask |= uap->mask &~ sigcantmask; + (void) spl0(); + return (0); +} + +struct osigsetmask_args { + int mask; +}; + +osigsetmask(p, uap, retval) + struct proc *p; + struct osigsetmask_args *uap; + int *retval; +{ + + (void) splhigh(); + *retval = p->p_sigmask; + p->p_sigmask = uap->mask &~ sigcantmask; + (void) spl0(); + return (0); +} +#endif + +/* + * Suspend process until signal, providing mask to be set + * in the meantime. Note nonstandard calling convention: + * libc stub passes mask, not pointer, to save a copyin. + */ + +struct sigsuspend_args { + sigset_t mask; +}; + +/* ARGSUSED */ +sigsuspend(p, uap, retval) + register struct proc *p; + struct sigsuspend_args *uap; + int *retval; +{ + register struct sigacts *ps = p->p_sigacts; + + /* + * When returning from sigpause, we want + * the old mask to be restored after the + * signal handler has finished. Thus, we + * save it here and mark the proc structure + * to indicate this (should be in sigacts). + */ + ps->ps_oldmask = p->p_sigmask; + ps->ps_flags |= SA_OLDMASK; + p->p_sigmask = uap->mask &~ sigcantmask; + (void) tsleep((caddr_t) ps, PPAUSE|PCATCH, "pause", 0); + /* always return EINTR rather than ERESTART... */ + return (EINTR); +} + +struct sigstack_args { + struct sigstack *nss; + struct sigstack *oss; +}; + +/* ARGSUSED */ +sigstack(p, uap, retval) + struct proc *p; + register struct sigstack_args *uap; + int *retval; +{ + struct sigstack ss; + int error = 0; + + if (uap->oss && (error = copyout((caddr_t)&p->p_sigacts->ps_sigstack, + (caddr_t)uap->oss, sizeof (struct sigstack)))) + return (error); + if (uap->nss && (error = copyin((caddr_t)uap->nss, (caddr_t)&ss, + sizeof (ss))) == 0) + p->p_sigacts->ps_sigstack = ss; + return (error); +} + +struct kill_args { + int pid; + int signo; +}; + +/* ARGSUSED */ +kill(cp, uap, retval) + register struct proc *cp; + register struct kill_args *uap; + int *retval; +{ + register struct proc *p; + register struct pcred *pc = cp->p_cred; + + if ((unsigned) uap->signo >= NSIG) + return (EINVAL); + if (uap->pid > 0) { + /* kill single process */ + p = pfind(uap->pid); + if (p == 0) + return (ESRCH); + if (!CANSIGNAL(cp, pc, p, uap->signo)) + return (EPERM); + if (uap->signo) + psignal(p, uap->signo); + return (0); + } + switch (uap->pid) { + case -1: /* broadcast signal */ + return (killpg1(cp, uap->signo, 0, 1)); + case 0: /* signal own process group */ + return (killpg1(cp, uap->signo, 0, 0)); + default: /* negative explicit process group */ + return (killpg1(cp, uap->signo, -uap->pid, 0)); + } + /* NOTREACHED */ +} + +#ifdef COMPAT_43 + +struct okillpg_args { + int pgid; + int signo; +}; + +/* ARGSUSED */ +okillpg(p, uap, retval) + struct proc *p; + register struct okillpg_args *uap; + int *retval; +{ + + if ((unsigned) uap->signo >= NSIG) + return (EINVAL); + return (killpg1(p, uap->signo, uap->pgid, 0)); +} +#endif + +/* + * Common code for kill process group/broadcast kill. + * cp is calling process. + */ +killpg1(cp, signo, pgid, all) + register struct proc *cp; + int signo, pgid, all; +{ + register struct proc *p; + register struct pcred *pc = cp->p_cred; + struct pgrp *pgrp; + int nfound = 0; + + if (all) + /* + * broadcast + */ + for (p = allproc; p != NULL; p = p->p_nxt) { + if (p->p_pid <= 1 || p->p_flag&SSYS || + p == cp || !CANSIGNAL(cp, pc, p, signo)) + continue; + nfound++; + if (signo) + psignal(p, signo); + } + else { + if (pgid == 0) + /* + * zero pgid means send to my process group. + */ + pgrp = cp->p_pgrp; + else { + pgrp = pgfind(pgid); + if (pgrp == NULL) + return (ESRCH); + } + for (p = pgrp->pg_mem; p != NULL; p = p->p_pgrpnxt) { + if (p->p_pid <= 1 || p->p_flag&SSYS || + p->p_stat == SZOMB || !CANSIGNAL(cp, pc, p, signo)) + continue; + nfound++; + if (signo) + psignal(p, signo); + } + } + return (nfound ? 0 : ESRCH); +} + +/* + * Send the specified signal to + * all processes with 'pgid' as + * process group. + */ +void +gsignal(pgid, sig) + int pgid, sig; +{ + struct pgrp *pgrp; + + if (pgid && (pgrp = pgfind(pgid))) + pgsignal(pgrp, sig, 0); +} + +/* + * Send sig to every member of a process group. + * If checktty is 1, limit to members which have a controlling + * terminal. + */ +void +pgsignal(pgrp, sig, checkctty) + struct pgrp *pgrp; + int sig, checkctty; +{ + register struct proc *p; + + if (pgrp) + for (p = pgrp->pg_mem; p != NULL; p = p->p_pgrpnxt) + if (checkctty == 0 || p->p_flag&SCTTY) + psignal(p, sig); +} + +/* + * Send a signal caused by a trap to the current process. + * If it will be caught immediately, deliver it with correct code. + * Otherwise, post it normally. + */ +void +trapsignal(p, sig, code) + struct proc *p; + register int sig; + unsigned code; +{ + register struct sigacts *ps = p->p_sigacts; + int mask; + + mask = sigmask(sig); + if (p == curproc && (p->p_flag & STRC) == 0 && + (p->p_sigcatch & mask) != 0 && (p->p_sigmask & mask) == 0) { + p->p_stats->p_ru.ru_nsignals++; +#ifdef KTRACE + if (KTRPOINT(p, KTR_PSIG)) + ktrpsig(p->p_tracep, sig, ps->ps_sigact[sig], + p->p_sigmask, code); +#endif + sendsig(ps->ps_sigact[sig], sig, p->p_sigmask, code); + p->p_sigmask |= ps->ps_catchmask[sig] | mask; + } else { + ps->ps_code = code; /* XXX for core dump/debugger */ + psignal(p, sig); + } +} + +/* + * Send the specified signal to the specified process. + * If the signal has an action, the action is usually performed + * by the target process rather than the caller; we simply add + * the signal to the set of pending signals for the process. + * Exceptions: + * o When a stop signal is sent to a sleeping process that takes the default + * action, the process is stopped without awakening it. + * o SIGCONT restarts stopped processes (or puts them back to sleep) + * regardless of the signal action (eg, blocked or ignored). + * Other ignored signals are discarded immediately. + */ +void +psignal(p, sig) + register struct proc *p; + register int sig; +{ + register int s, prop; + register sig_t action; + int mask; + + if ((unsigned)sig >= NSIG || sig == 0) + panic("psignal sig"); + mask = sigmask(sig); + prop = sigprop[sig]; + + /* + * If proc is traced, always give parent a chance. + */ + if (p->p_flag & STRC) + action = SIG_DFL; + else { + /* + * If the signal is being ignored, + * then we forget about it immediately. + * (Note: we don't set SIGCONT in p_sigignore, + * and if it is set to SIG_IGN, + * action will be SIG_DFL here.) + */ + if (p->p_sigignore & mask) + return; + if (p->p_sigmask & mask) + action = SIG_HOLD; + else if (p->p_sigcatch & mask) + action = SIG_CATCH; + else + action = SIG_DFL; + } + + if (p->p_nice > NZERO && (sig == SIGKILL || + sig == SIGTERM && (p->p_flag&STRC || action != SIG_DFL))) + p->p_nice = NZERO; + + if (prop & SA_CONT) + p->p_sig &= ~stopsigmask; + + if (prop & SA_STOP) { + /* + * If sending a tty stop signal to a member of an orphaned + * process group, discard the signal here if the action + * is default; don't stop the process below if sleeping, + * and don't clear any pending SIGCONT. + */ + if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0 && + action == SIG_DFL) + return; + p->p_sig &= ~contsigmask; + } + p->p_sig |= mask; + + /* + * Defer further processing for signals which are held, + * except that stopped processes must be continued by SIGCONT. + */ + if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP)) + return; + s = splhigh(); + switch (p->p_stat) { + + case SSLEEP: + /* + * If process is sleeping uninterruptibly + * we can't interrupt the sleep... the signal will + * be noticed when the process returns through + * trap() or syscall(). + */ + if ((p->p_flag & SSINTR) == 0) + goto out; + /* + * Process is sleeping and traced... make it runnable + * so it can discover the signal in issig() and stop + * for the parent. + */ + if (p->p_flag&STRC) + goto run; + /* + * When a sleeping process receives a stop + * signal, process immediately if possible. + * All other (caught or default) signals + * cause the process to run. + */ + if (prop & SA_STOP) { + if (action != SIG_DFL) + goto runfast; + /* + * If a child holding parent blocked, + * stopping could cause deadlock. + */ + if (p->p_flag&SPPWAIT) + goto out; + p->p_sig &= ~mask; + p->p_xstat = sig; + if ((p->p_pptr->p_flag & SNOCLDSTOP) == 0) + psignal(p->p_pptr, SIGCHLD); + stop(p); + goto out; + } else + goto runfast; + /*NOTREACHED*/ + + case SSTOP: + /* + * If traced process is already stopped, + * then no further action is necessary. + */ + if (p->p_flag&STRC) + goto out; + + /* + * Kill signal always sets processes running. + */ + if (sig == SIGKILL) + goto runfast; + + if (prop & SA_CONT) { + /* + * If SIGCONT is default (or ignored), we continue + * the process but don't leave the signal in p_sig, + * as it has no further action. If SIGCONT is held, + * continue the process and leave the signal in p_sig. + * If the process catches SIGCONT, let it handle + * the signal itself. If it isn't waiting on + * an event, then it goes back to run state. + * Otherwise, process goes back to sleep state. + */ + if (action == SIG_DFL) + p->p_sig &= ~mask; + if (action == SIG_CATCH) + goto runfast; + if (p->p_wchan == 0) + goto run; + p->p_stat = SSLEEP; + goto out; + } + + if (prop & SA_STOP) { + /* + * Already stopped, don't need to stop again. + * (If we did the shell could get confused.) + */ + p->p_sig &= ~mask; /* take it away */ + goto out; + } + + /* + * If process is sleeping interruptibly, then + * simulate a wakeup so that when it is continued, + * it will be made runnable and can look at the signal. + * But don't setrun the process, leave it stopped. + */ + if (p->p_wchan && p->p_flag & SSINTR) + unsleep(p); + goto out; + + default: + /* + * SRUN, SIDL, SZOMB do nothing with the signal, + * other than kicking ourselves if we are running. + * It will either never be noticed, or noticed very soon. + */ + if (p == curproc) + signotify(p); + goto out; + } + /*NOTREACHED*/ + +runfast: + /* + * Raise priority to at least PUSER. + */ + if (p->p_pri > PUSER) + p->p_pri = PUSER; +run: + setrun(p); +out: + splx(s); +} + +/* + * If the current process has a signal to process (should be caught + * or cause termination, should interrupt current syscall), + * return the signal number. Stop signals with default action + * are processed immediately, then cleared; they aren't returned. + * This is checked after each entry to the system for a syscall + * or trap (though this can usually be done without actually calling + * issig by checking the pending signal masks in the CURSIG macro.) + * The normal call sequence is + * + * while (sig = CURSIG(curproc)) + * psig(sig); + */ +issig(p) + register struct proc *p; +{ + register int sig, mask, prop; + + for (;;) { + mask = p->p_sig &~ p->p_sigmask; + if (p->p_flag&SPPWAIT) + mask &= ~stopsigmask; + if (mask == 0) /* no signal to send */ + return (0); + sig = ffs((long)mask); + mask = sigmask(sig); + prop = sigprop[sig]; + /* + * We should see pending but ignored signals + * only if STRC was on when they were posted. + */ + if (mask & p->p_sigignore && (p->p_flag&STRC) == 0) { + p->p_sig &= ~mask; + continue; + } + if (p->p_flag&STRC && (p->p_flag&SPPWAIT) == 0) { + /* + * If traced, always stop, and stay + * stopped until released by the parent. + */ + p->p_xstat = sig; + psignal(p->p_pptr, SIGCHLD); + do { + stop(p); + (void) splclock(); + swtch(); + (void) splnone(); + } while (!procxmt(p) && p->p_flag&STRC); + + /* + * If the traced bit got turned off, + * go back up to the top to rescan signals. + * This ensures that p_sig* and ps_sigact + * are consistent. + */ + if ((p->p_flag&STRC) == 0) + continue; + + /* + * If parent wants us to take the signal, + * then it will leave it in p->p_xstat; + * otherwise we just look for signals again. + */ + p->p_sig &= ~mask; /* clear the old signal */ + sig = p->p_xstat; + if (sig == 0) + continue; + + /* + * Put the new signal into p_sig. + * If signal is being masked, + * look for other signals. + */ + mask = sigmask(sig); + p->p_sig |= mask; + if (p->p_sigmask & mask) + continue; + } + + /* + * Decide whether the signal should be returned. + * Return the signal's number, or fall through + * to clear it from the pending mask. + */ + switch ((int)p->p_sigacts->ps_sigact[sig]) { + + case SIG_DFL: + /* + * Don't take default actions on system processes. + */ + if (p->p_pid <= 1) + break; /* == ignore */ + /* + * If there is a pending stop signal to process + * with default action, stop here, + * then clear the signal. However, + * if process is member of an orphaned + * process group, ignore tty stop signals. + */ + if (prop & SA_STOP) { + if (p->p_flag&STRC || + (p->p_pgrp->pg_jobc == 0 && + prop & SA_TTYSTOP)) + break; /* == ignore */ + p->p_xstat = sig; + stop(p); + if ((p->p_pptr->p_flag & SNOCLDSTOP) == 0) + psignal(p->p_pptr, SIGCHLD); + (void) splclock(); + swtch(); + (void) splnone(); + break; + } else if (prop & SA_IGNORE) { + /* + * Except for SIGCONT, shouldn't get here. + * Default action is to ignore; drop it. + */ + break; /* == ignore */ + } else + return (sig); + /*NOTREACHED*/ + + case SIG_IGN: + /* + * Masking above should prevent us ever trying + * to take action on an ignored signal other + * than SIGCONT, unless process is traced. + */ + if ((prop & SA_CONT) == 0 && (p->p_flag&STRC) == 0) + printf("issig\n"); + break; /* == ignore */ + + default: + /* + * This signal has an action, let + * psig process it. + */ + return (sig); + } + p->p_sig &= ~mask; /* take the signal! */ + } + /* NOTREACHED */ +} + +/* + * Put the argument process into the stopped + * state and notify the parent via wakeup. + * Signals are handled elsewhere. + * The process must not be on the run queue. + */ +stop(p) + register struct proc *p; +{ + + p->p_stat = SSTOP; + p->p_flag &= ~SWTED; + wakeup((caddr_t)p->p_pptr); +} + +/* + * Take the action for the specified signal + * from the current set of pending signals. + */ +void +psig(sig) + register int sig; +{ + register struct proc *p = curproc; + register struct sigacts *ps = p->p_sigacts; + register sig_t action; + int mask, returnmask; + +#ifdef DIAGNOSTIC + if (sig == 0) + panic("psig"); +#endif + mask = sigmask(sig); + p->p_sig &= ~mask; + action = ps->ps_sigact[sig]; +#ifdef KTRACE + if (KTRPOINT(p, KTR_PSIG)) + ktrpsig(p->p_tracep, sig, action, ps->ps_flags & SA_OLDMASK ? + ps->ps_oldmask : p->p_sigmask, 0); +#endif + if (action == SIG_DFL) { + /* + * Default action, where the default is to kill + * the process. (Other cases were ignored above.) + */ + sigexit(p, sig); + /* NOTREACHED */ + } else { + /* + * If we get here, the signal must be caught. + */ +#ifdef DIAGNOSTIC + if (action == SIG_IGN || (p->p_sigmask & mask)) + panic("psig action"); +#endif + /* + * Set the new mask value and also defer further + * occurences of this signal. + * + * Special case: user has done a sigpause. Here the + * current mask is not of interest, but rather the + * mask from before the sigpause is what we want + * restored after the signal processing is completed. + */ + (void) splhigh(); + if (ps->ps_flags & SA_OLDMASK) { + returnmask = ps->ps_oldmask; + ps->ps_flags &= ~SA_OLDMASK; + } else + returnmask = p->p_sigmask; + p->p_sigmask |= ps->ps_catchmask[sig] | mask; + (void) spl0(); + p->p_stats->p_ru.ru_nsignals++; + sendsig(action, sig, returnmask, 0); + } +} + +/* + * Force the current process to exit with the specified + * signal, dumping core if appropriate. We bypass the normal + * tests for masked and caught signals, allowing unrecoverable + * failures to terminate the process without changing signal state. + * Mark the accounting record with the signal termination. + * If dumping core, save the signal number for the debugger. + * Calls exit and does not return. + */ +sigexit(p, sig) + register struct proc *p; + int sig; +{ + + p->p_acflag |= AXSIG; + if (sigprop[sig] & SA_CORE) { + p->p_sigacts->ps_sig = sig; + if (coredump(p) == 0) + sig |= WCOREFLAG; + } + kexit(p, W_EXITCODE(0, sig)); + /* NOTREACHED */ +} + +/* + * Create a core dump. + * The file name is "progname.core". + * Core dumps are not created if: + * the process is setuid, + * we are on a filesystem mounted with MNT_NOCORE, + * a file already exists and is not a core file, + * or was not produced from the same program, + * the link count to the corefile is > 1. + */ +coredump(p) + register struct proc *p; +{ + register struct vnode *vp; + register struct pcred *pcred = p->p_cred; + register struct ucred *cred = pcred->pc_ucred; + register struct vmspace *vm = p->p_vmspace; + struct vattr vattr; + int error, error1, exists; + struct nameidata nd; + char name[MAXCOMLEN+6]; /* progname.core */ + + if (pcred->p_svuid != pcred->p_ruid || + pcred->p_svgid != pcred->p_rgid) + return (EFAULT); + if (ctob(UPAGES + vm->vm_dsize + vm->vm_ssize) >= + p->p_rlimit[RLIMIT_CORE].rlim_cur) + return (EFAULT); + if (p->p_fd->fd_cdir->v_mount->mnt_flag & MNT_NOCORE) + return (EFAULT); + + sprintf(name, "%s.core", p->p_comm); + nd.ni_dirp = name; + nd.ni_segflg = UIO_SYSSPACE; + if ((error = vn_open(&nd, p, FWRITE, 0644)) == 0) + exists = 1; + else + exists = 0; + if (error == ENOENT) + error = vn_open(&nd, p, O_CREAT | FWRITE, 0644); + if (error) + return (error); + vp = nd.ni_vp; + if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred, p) || + vattr.va_nlink != 1) { + error = EFAULT; + goto out; + } + if (exists) { /* if file already exists, look if it's a coredump */ + struct user userbuf; /* XXX */ + error = vn_rdwr(UIO_READ, vp, (caddr_t)&userbuf, sizeof(userbuf), + (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, + (int *)NULL, p); + if (error || (vattr.va_size != ctob(UPAGES + + userbuf.u_kproc.kp_eproc.e_vm.vm_dsize + + userbuf.u_kproc.kp_eproc.e_vm.vm_ssize)) || + strcmp(p->p_comm, userbuf.u_kproc.kp_proc.p_comm)) { + error = EFAULT; + goto out; + } + } + VATTR_NULL(&vattr); + vattr.va_size = 0; + VOP_SETATTR(vp, &vattr, cred, p); + p->p_acflag |= ACORE; + bcopy(p, &p->p_addr->u_kproc.kp_proc, sizeof(struct proc)); + fill_eproc(p, &p->p_addr->u_kproc.kp_eproc); +#ifdef HPUXCOMPAT + /* + * BLETCH! If we loaded from an HPUX format binary file + * we have to dump an HPUX style user struct so that the + * HPUX debuggers can grok it. + */ + if (p->p_addr->u_pcb.pcb_flags & PCB_HPUXBIN) + error = hpuxdumpu(vp, cred); + else +#endif + error = vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES), + (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, + p); + if (error == 0) + error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, + (int)ctob(vm->vm_dsize), (off_t)ctob(UPAGES), UIO_USERSPACE, + IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p); + if (error == 0) + error = vn_rdwr(UIO_WRITE, vp, + (caddr_t) trunc_page(vm->vm_maxsaddr + MAXSSIZ + - ctob(vm->vm_ssize)), + round_page(ctob(vm->vm_ssize)), + (off_t)ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE, + IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p); +out: + VOP_UNLOCK(vp); + error1 = vn_close(vp, FWRITE, cred, p); + if (error == 0) + error = error1; + return (error); +} + +/* + * Nonexistent system call-- signal process (may want to handle it). + * Flag error in case process won't see signal immediately (blocked or ignored). + */ +/* ARGSUSED */ +nosys(p, args, retval) + struct proc *p; + void *args; + int *retval; +{ + + psignal(p, SIGSYS); + return (EINVAL); +} diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c new file mode 100644 index 000000000000..6b9a5aba202d --- /dev/null +++ b/sys/kern/kern_subr.c @@ -0,0 +1,262 @@ +/* + * Copyright (c) 1982, 1986, 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_subr.c 7.7 (Berkeley) 4/15/91 + * $Id: kern_subr.c,v 1.2 1993/10/16 15:24:30 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "proc.h" + +uiomove(cp, n, uio) + register caddr_t cp; + register int n; + register struct uio *uio; +{ + register struct iovec *iov; + u_int cnt; + int error = 0; + + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) + panic("uiomove: mode"); + if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) + panic("uiomove proc"); +#endif + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + case UIO_USERISPACE: + if (uio->uio_rw == UIO_READ) + error = copyout(cp, iov->iov_base, cnt); + else + error = copyin(iov->iov_base, cp, cnt); + if (error) + return (error); + break; + + case UIO_SYSSPACE: + if (uio->uio_rw == UIO_READ) + bcopy((caddr_t)cp, iov->iov_base, cnt); + else + bcopy(iov->iov_base, (caddr_t)cp, cnt); + break; + } + iov->iov_base += cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + cp += cnt; + n -= cnt; + } + return (error); +} + +uioapply(func, arg1, arg2, uio) + int (*func)() ; + register struct uio *uio; +{ + register struct iovec *iov; + u_int cnt, cnt1; + int error = 0; + + +/*#ifdef DIAGNOSTIC*/ + if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) + panic("uioapply: mode"); + if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) + panic("uioapply proc"); +/*#endif*/ + while (uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + cnt1 = cnt; + error = (*func)(arg1, arg2, uio->uio_offset, uio->uio_rw, + iov->iov_base, &cnt1, uio->uio_procp); + cnt -= cnt1; + iov->iov_base += cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + if (error || cnt1) + return (error); + } + return (0); +} + +/* + * Give next character to user as result of read. + */ +ureadc(c, uio) + register int c; + register struct uio *uio; +{ + register struct iovec *iov; + +again: + if (uio->uio_iovcnt == 0) + panic("ureadc"); + iov = uio->uio_iov; + if (iov->iov_len <= 0 || uio->uio_resid <= 0) { + uio->uio_iovcnt--; + uio->uio_iov++; + goto again; + } + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + if (subyte(iov->iov_base, c) < 0) + return (EFAULT); + break; + + case UIO_SYSSPACE: + *iov->iov_base = c; + break; + + case UIO_USERISPACE: + if (suibyte(iov->iov_base, c) < 0) + return (EFAULT); + break; + } + iov->iov_base++; + iov->iov_len--; + uio->uio_resid--; + uio->uio_offset++; + return (0); +} + +strcat(src, append) + register char *src, *append; +{ + + for (; *src; ++src) + ; + while (*src++ = *append++) + ; +} + +strcpy(to, from) + register char *to, *from; +{ + + for (; *to = *from; ++from, ++to) + ; +} + +strncpy(to, from, cnt) + register char *to, *from; + register int cnt; +{ + + for (; cnt && (*to = *from); --cnt, ++from, ++to) + ; + *to = '\0'; +} + + +int +strcmp(s1, s2) + register const char *s1, *s2; +{ + while (*s1 == *s2++) + if (*s1++ == 0) + return (0); + return (*(unsigned char *)s1 - *(unsigned char *)--s2); +} + + + + + + +#ifndef lint /* unused except by ct.c, other oddities XXX */ +/* + * Get next character written in by user from uio. + */ +uwritec(uio) + struct uio *uio; +{ + register struct iovec *iov; + register int c; + + if (uio->uio_resid <= 0) + return (-1); +again: + if (uio->uio_iovcnt <= 0) + panic("uwritec"); + iov = uio->uio_iov; + if (iov->iov_len == 0) { + uio->uio_iov++; + if (--uio->uio_iovcnt == 0) + return (-1); + goto again; + } + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + c = fubyte(iov->iov_base); + break; + + case UIO_SYSSPACE: + c = *(u_char *) iov->iov_base; + break; + + case UIO_USERISPACE: + c = fuibyte(iov->iov_base); + break; + } + if (c < 0) + return (-1); + iov->iov_base++; + iov->iov_len--; + uio->uio_resid--; + uio->uio_offset++; + return (c); +} +#endif /* notdef */ diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c new file mode 100644 index 000000000000..4b888509b915 --- /dev/null +++ b/sys/kern/kern_synch.c @@ -0,0 +1,600 @@ +/*- + * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_synch.c 7.18 (Berkeley) 6/27/91 + * $Id: kern_synch.c,v 1.2 1993/10/16 15:24:32 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "proc.h" +#include "kernel.h" +#include "buf.h" +#include "signalvar.h" +#include "resourcevar.h" + +#include "machine/cpu.h" + +u_char curpri; /* usrpri of curproc */ + +/* + * Force switch among equal priority processes every 100ms. + */ +roundrobin() +{ + + need_resched(); + timeout(roundrobin, (caddr_t)0, hz / 10); +} + +/* + * constants for digital decay and forget + * 90% of (p_cpu) usage in 5*loadav time + * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) + * Note that, as ps(1) mentions, this can let percentages + * total over 100% (I've seen 137.9% for 3 processes). + * + * Note that hardclock updates p_cpu and p_cpticks independently. + * + * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds. + * That is, the system wants to compute a value of decay such + * that the following for loop: + * for (i = 0; i < (5 * loadavg); i++) + * p_cpu *= decay; + * will compute + * p_cpu *= 0.1; + * for all values of loadavg: + * + * Mathematically this loop can be expressed by saying: + * decay ** (5 * loadavg) ~= .1 + * + * The system computes decay as: + * decay = (2 * loadavg) / (2 * loadavg + 1) + * + * We wish to prove that the system's computation of decay + * will always fulfill the equation: + * decay ** (5 * loadavg) ~= .1 + * + * If we compute b as: + * b = 2 * loadavg + * then + * decay = b / (b + 1) + * + * We now need to prove two things: + * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) + * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) + * + * Facts: + * For x close to zero, exp(x) =~ 1 + x, since + * exp(x) = 0! + x**1/1! + x**2/2! + ... . + * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. + * For x close to zero, ln(1+x) =~ x, since + * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 + * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). + * ln(.1) =~ -2.30 + * + * Proof of (1): + * Solve (factor)**(power) =~ .1 given power (5*loadav): + * solving for factor, + * ln(factor) =~ (-2.30/5*loadav), or + * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = + * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED + * + * Proof of (2): + * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): + * solving for power, + * power*ln(b/(b+1)) =~ -2.30, or + * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED + * + * Actual power values for the implemented algorithm are as follows: + * loadav: 1 2 3 4 + * power: 5.68 10.32 14.94 19.55 + */ + +/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ +#define loadfactor(loadav) (2 * (loadav)) +#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) + +/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ +fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ + +/* + * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the + * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below + * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). + * + * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: + * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). + * + * If you dont want to bother with the faster/more-accurate formula, you + * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate + * (more general) method of calculating the %age of CPU used by a process. + */ +#define CCPU_SHIFT 11 + +/* + * Recompute process priorities, once a second + */ +schedcpu() +{ + register fixpt_t loadfac = loadfactor(averunnable[0]); + register struct proc *p; + register int s; + register unsigned int newcpu; + + wakeup((caddr_t)&lbolt); + for (p = allproc; p != NULL; p = p->p_nxt) { + /* + * Increment time in/out of memory and sleep time + * (if sleeping). We ignore overflow; with 16-bit int's + * (remember them?) overflow takes 45 days. + */ + p->p_time++; + if (p->p_stat == SSLEEP || p->p_stat == SSTOP) + p->p_slptime++; + p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; + /* + * If the process has slept the entire second, + * stop recalculating its priority until it wakes up. + */ + if (p->p_slptime > 1) + continue; + /* + * p_pctcpu is only for ps. + */ +#if (FSHIFT >= CCPU_SHIFT) + p->p_pctcpu += (hz == 100)? + ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): + 100 * (((fixpt_t) p->p_cpticks) + << (FSHIFT - CCPU_SHIFT)) / hz; +#else + p->p_pctcpu += ((FSCALE - ccpu) * + (p->p_cpticks * FSCALE / hz)) >> FSHIFT; +#endif + p->p_cpticks = 0; + newcpu = (u_int) decay_cpu(loadfac, p->p_cpu) + p->p_nice; + p->p_cpu = min(newcpu, UCHAR_MAX); + setpri(p); + s = splhigh(); /* prevent state changes */ + if (p->p_pri >= PUSER) { +#define PPQ (128 / NQS) /* priorities per queue */ + if ((p != curproc) && + p->p_stat == SRUN && + (p->p_flag & (SLOAD|SWEXIT)) == SLOAD && + (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) { + remrq(p); + p->p_pri = p->p_usrpri; + setrq(p); + } else + p->p_pri = p->p_usrpri; + } + splx(s); + } + vmmeter(); + if (bclnlist != NULL) + wakeup((caddr_t)pageproc); + timeout(schedcpu, (caddr_t)0, hz); +} + +/* + * Recalculate the priority of a process after it has slept for a while. + * For all load averages >= 1 and max p_cpu of 255, sleeping for at least + * six times the loadfactor will decay p_cpu to zero. + */ +updatepri(p) + register struct proc *p; +{ + register unsigned int newcpu = p->p_cpu; + register fixpt_t loadfac = loadfactor(averunnable[0]); + + if (p->p_slptime > 5 * loadfac) + p->p_cpu = 0; + else { + p->p_slptime--; /* the first time was done in schedcpu */ + while (newcpu && --p->p_slptime) + newcpu = (int) decay_cpu(loadfac, newcpu); + p->p_cpu = min(newcpu, UCHAR_MAX); + } + setpri(p); +} + +#define SQSIZE 0100 /* Must be power of 2 */ +#define HASH(x) (( (int) x >> 5) & (SQSIZE-1)) +struct slpque { + struct proc *sq_head; + struct proc **sq_tailp; +} slpque[SQSIZE]; + +/* + * During autoconfiguration or after a panic, a sleep will simply + * lower the priority briefly to allow interrupts, then return. + * The priority to be used (safepri) is machine-dependent, thus this + * value is initialized and maintained in the machine-dependent layers. + * This priority will typically be 0, or the lowest priority + * that is safe for use on the interrupt stack; it can be made + * higher to block network software interrupts after panics. + */ +int safepri; + +/* + * General sleep call. + * Suspends current process until a wakeup is made on chan. + * The process will then be made runnable with priority pri. + * Sleeps at most timo/hz seconds (0 means no timeout). + * If pri includes PCATCH flag, signals are checked + * before and after sleeping, else signals are not checked. + * Returns 0 if awakened, EWOULDBLOCK if the timeout expires. + * If PCATCH is set and a signal needs to be delivered, + * ERESTART is returned if the current system call should be restarted + * if possible, and EINTR is returned if the system call should + * be interrupted by the signal (return EINTR). + */ +tsleep(chan, pri, wmesg, timo) + caddr_t chan; + int pri; + char *wmesg; + int timo; +{ + register struct proc *p = curproc; + register struct slpque *qp; + register s; + int sig, catch = pri & PCATCH; + extern int cold; + int endtsleep(); + + s = splhigh(); + if (cold || panicstr) { + /* + * After a panic, or during autoconfiguration, + * just give interrupts a chance, then just return; + * don't run any other procs or panic below, + * in case this is the idle process and already asleep. + */ + splx(safepri); + splx(s); + return (0); + } +#ifdef DIAGNOSTIC + if (chan == 0 || p->p_stat != SRUN || p->p_rlink) + panic("tsleep"); +#endif + p->p_wchan = chan; + p->p_wmesg = wmesg; + p->p_slptime = 0; + p->p_pri = pri & PRIMASK; + qp = &slpque[HASH(chan)]; + if (qp->sq_head == 0) + qp->sq_head = p; + else + *qp->sq_tailp = p; + *(qp->sq_tailp = &p->p_link) = 0; + if (timo) + timeout(endtsleep, (caddr_t)p, timo); + /* + * We put ourselves on the sleep queue and start our timeout + * before calling CURSIG, as we could stop there, and a wakeup + * or a SIGCONT (or both) could occur while we were stopped. + * A SIGCONT would cause us to be marked as SSLEEP + * without resuming us, thus we must be ready for sleep + * when CURSIG is called. If the wakeup happens while we're + * stopped, p->p_wchan will be 0 upon return from CURSIG. + */ + if (catch) { + p->p_flag |= SSINTR; + if (sig = CURSIG(p)) { + if (p->p_wchan) + unsleep(p); + p->p_stat = SRUN; + goto resume; + } + if (p->p_wchan == 0) { + catch = 0; + goto resume; + } + } + p->p_stat = SSLEEP; + p->p_stats->p_ru.ru_nvcsw++; + swtch(); +#include "ddb.h" +#ifdef NDDB + /* handy breakpoint location after process "wakes" */ + asm(".globl bpendtsleep ; bpendtsleep:"); +#endif +resume: + curpri = p->p_usrpri; + splx(s); + p->p_flag &= ~SSINTR; + if (p->p_flag & STIMO) { + p->p_flag &= ~STIMO; + if (catch == 0 || sig == 0) + return (EWOULDBLOCK); + } else if (timo) + untimeout(endtsleep, (caddr_t)p); + if (catch && (sig != 0 || (sig = CURSIG(p)))) { + if (p->p_sigacts->ps_sigintr & sigmask(sig)) + return (EINTR); + return (ERESTART); + } + return (0); +} + +/* + * Implement timeout for tsleep. + * If process hasn't been awakened (wchan non-zero), + * set timeout flag and undo the sleep. If proc + * is stopped, just unsleep so it will remain stopped. + */ +endtsleep(p) + register struct proc *p; +{ + int s = splhigh(); + + if (p->p_wchan) { + if (p->p_stat == SSLEEP) + setrun(p); + else + unsleep(p); + p->p_flag |= STIMO; + } + splx(s); +} + +/* + * Short-term, non-interruptable sleep. + */ +sleep(chan, pri) + caddr_t chan; + int pri; +{ + register struct proc *p = curproc; + register struct slpque *qp; + register s; + extern int cold; + +#ifdef DIAGNOSTIC + if (pri > PZERO) { + printf("sleep called with pri %d > PZERO, wchan: %x\n", + pri, chan); + panic("old sleep"); + } +#endif + s = splhigh(); + if (cold || panicstr) { + /* + * After a panic, or during autoconfiguration, + * just give interrupts a chance, then just return; + * don't run any other procs or panic below, + * in case this is the idle process and already asleep. + */ + splx(safepri); + splx(s); + return; + } +#ifdef DIAGNOSTIC + if (chan==0 || p->p_stat != SRUN || p->p_rlink) + panic("sleep"); +#endif + p->p_wchan = chan; + p->p_wmesg = NULL; + p->p_slptime = 0; + p->p_pri = pri; + qp = &slpque[HASH(chan)]; + if (qp->sq_head == 0) + qp->sq_head = p; + else + *qp->sq_tailp = p; + *(qp->sq_tailp = &p->p_link) = 0; + p->p_stat = SSLEEP; + p->p_stats->p_ru.ru_nvcsw++; + swtch(); +#ifdef NDDB + /* handy breakpoint location after process "wakes" */ + asm(".globl bpendsleep ; bpendsleep:"); +#endif + curpri = p->p_usrpri; + splx(s); +} + +/* + * Remove a process from its wait queue + */ +unsleep(p) + register struct proc *p; +{ + register struct slpque *qp; + register struct proc **hp; + int s; + + s = splhigh(); + if (p->p_wchan) { + hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head; + while (*hp != p) + hp = &(*hp)->p_link; + *hp = p->p_link; + if (qp->sq_tailp == &p->p_link) + qp->sq_tailp = hp; + p->p_wchan = 0; + } + splx(s); +} + +/* + * Wakeup on "chan"; set all processes + * sleeping on chan to run state. + */ +wakeup(chan) + register caddr_t chan; +{ + register struct slpque *qp; + register struct proc *p, **q; + int s; + + s = splhigh(); + qp = &slpque[HASH(chan)]; +restart: + for (q = &qp->sq_head; p = *q; ) { +#ifdef DIAGNOSTIC + if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP) + panic("wakeup"); +#endif + if (p->p_wchan == chan) { + p->p_wchan = 0; + *q = p->p_link; + if (qp->sq_tailp == &p->p_link) + qp->sq_tailp = q; + if (p->p_stat == SSLEEP) { + /* OPTIMIZED INLINE EXPANSION OF setrun(p) */ + if (p->p_slptime > 1) + updatepri(p); + p->p_slptime = 0; + p->p_stat = SRUN; + if (p->p_flag & SLOAD) + setrq(p); + /* + * Since curpri is a usrpri, + * p->p_pri is always better than curpri. + */ + if ((p->p_flag&SLOAD) == 0) + wakeup((caddr_t)&proc0); + else + need_resched(); + /* END INLINE EXPANSION */ + goto restart; + } + } else + q = &p->p_link; + } + splx(s); +} + +/* + * Initialize the (doubly-linked) run queues + * to be empty. + */ +rqinit() +{ + register int i; + + for (i = 0; i < NQS; i++) + qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; +} + +/* + * Change process state to be runnable, + * placing it on the run queue if it is in memory, + * and awakening the swapper if it isn't in memory. + */ +setrun(p) + register struct proc *p; +{ + register int s; + + s = splhigh(); + switch (p->p_stat) { + + case 0: + case SWAIT: + case SRUN: + case SZOMB: + default: + panic("setrun"); + + case SSTOP: + case SSLEEP: + unsleep(p); /* e.g. when sending signals */ + break; + + case SIDL: + break; + } + p->p_stat = SRUN; + if (p->p_flag & SLOAD) + setrq(p); + splx(s); + if (p->p_slptime > 1) + updatepri(p); + p->p_slptime = 0; + if ((p->p_flag&SLOAD) == 0) + wakeup((caddr_t)&proc0); + else if (p->p_pri < curpri) + need_resched(); +} + +/* + * Compute priority of process when running in user mode. + * Arrange to reschedule if the resulting priority + * is better than that of the current process. + */ +setpri(p) + register struct proc *p; +{ + register unsigned int newpri; + + newpri = PUSER + p->p_cpu / 4 + 2 * p->p_nice; + newpri = min(newpri, MAXPRI); + p->p_usrpri = newpri; + if (newpri < curpri) + need_resched(); +} + +#ifdef NDDB +#define DDBFUNC(s) ddb_##s +DDBFUNC(ps) () { + int np; + struct proc *ap, *p, *pp; + np = nprocs; + p = ap = allproc; + printf(" pid proc addr uid ppid pgrp flag stat comm wchan\n"); + while (--np >= 0) { + pp = p->p_pptr; + if (pp == 0) + pp = p; + if (p->p_stat) { + printf("%5d %06x %06x %3d %5d %5d %06x %d %s ", + p->p_pid, ap, p->p_addr, p->p_cred->p_ruid, pp->p_pid, + p->p_pgrp->pg_id, p->p_flag, p->p_stat, + p->p_comm); + if (p->p_wchan) { + if (p->p_wmesg) + printf("%s ", p->p_wmesg); + printf("%x", p->p_wchan); + } + printf("\n"); + } + ap = p->p_nxt; + if (ap == 0 && np > 0) + ap = zombproc; + p = ap; + } +} +#endif diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c new file mode 100644 index 000000000000..f78786c3668a --- /dev/null +++ b/sys/kern/kern_time.c @@ -0,0 +1,389 @@ +/* + * Copyright (c) 1982, 1986, 1989 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_time.c 7.15 (Berkeley) 3/17/91 + * $Id: kern_time.c,v 1.3 1993/10/16 15:24:33 rgrimes Exp $ + */ + +#include "param.h" +#include "resourcevar.h" +#include "kernel.h" +#include "proc.h" + +#include "machine/cpu.h" + +/* + * Time of day and interval timer support. + * + * These routines provide the kernel entry points to get and set + * the time-of-day and per-process interval timers. Subroutines + * here provide support for adding and subtracting timeval structures + * and decrementing interval timers, optionally reloading the interval + * timers when they expire. + */ + +struct gettimeofday_args { + struct timeval *tp; + struct timezone *tzp; +}; + +/* ARGSUSED */ +gettimeofday(p, uap, retval) + struct proc *p; + register struct gettimeofday_args *uap; + int *retval; +{ + struct timeval atv; + int error = 0; + + if (uap->tp) { + microtime(&atv); + if (error = copyout((caddr_t)&atv, (caddr_t)uap->tp, + sizeof (atv))) + return (error); + } + if (uap->tzp) + error = copyout((caddr_t)&tz, (caddr_t)uap->tzp, + sizeof (tz)); + return (error); +} + +struct settimeofday_args { + struct timeval *tv; + struct timezone *tzp; +}; + +/* ARGSUSED */ +settimeofday(p, uap, retval) + struct proc *p; + struct settimeofday_args *uap; + int *retval; +{ + struct timeval atv; + struct timezone atz; + int error, s; + + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + if (uap->tv) { + if (error = copyin((caddr_t)uap->tv, (caddr_t)&atv, + sizeof (struct timeval))) + return (error); + /* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */ + boottime.tv_sec += atv.tv_sec - time.tv_sec; + s = splhigh(); time = atv; splx(s); + resettodr(); + } + if (uap->tzp && (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, + sizeof (atz))) == 0) + tz = atz; + return (error); +} + +extern int tickadj; /* "standard" clock skew, us./tick */ +int tickdelta; /* current clock skew, us. per tick */ +long timedelta; /* unapplied time correction, us. */ +long bigadj = 1000000; /* use 10x skew above bigadj us. */ + +struct adjtime_args { + struct timeval *delta; + struct timeval *olddelta; +}; + +/* ARGSUSED */ +adjtime(p, uap, retval) + struct proc *p; + register struct adjtime_args *uap; + int *retval; +{ + struct timeval atv, oatv; + register long ndelta; + int s, error; + + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + if (error = + copyin((caddr_t)uap->delta, (caddr_t)&atv, sizeof (struct timeval))) + return (error); + ndelta = atv.tv_sec * 1000000 + atv.tv_usec; + if (timedelta == 0) + if (ndelta > bigadj) + tickdelta = 10 * tickadj; + else + tickdelta = tickadj; + if (ndelta % tickdelta) + ndelta = ndelta / tickadj * tickadj; + + s = splclock(); + if (uap->olddelta) { + oatv.tv_sec = timedelta / 1000000; + oatv.tv_usec = timedelta % 1000000; + } + timedelta = ndelta; + splx(s); + + if (uap->olddelta) + (void) copyout((caddr_t)&oatv, (caddr_t)uap->olddelta, + sizeof (struct timeval)); + return (0); +} + +/* + * Get value of an interval timer. The process virtual and + * profiling virtual time timers are kept in the p_stats area, since + * they can be swapped out. These are kept internally in the + * way they are specified externally: in time until they expire. + * + * The real time interval timer is kept in the process table slot + * for the process, and its value (it_value) is kept as an + * absolute time rather than as a delta, so that it is easy to keep + * periodic real-time signals from drifting. + * + * Virtual time timers are processed in the hardclock() routine of + * kern_clock.c. The real time timer is processed by a timeout + * routine, called from the softclock() routine. Since a callout + * may be delayed in real time due to interrupt processing in the system, + * it is possible for the real time timeout routine (realitexpire, given below), + * to be delayed in real time past when it is supposed to occur. It + * does not suffice, therefore, to reload the real timer .it_value from the + * real time timers .it_interval. Rather, we compute the next time in + * absolute time the timer should go off. + */ + +struct getitimer_args { + u_int which; + struct itimerval *itv; +}; + +/* ARGSUSED */ +getitimer(p, uap, retval) + struct proc *p; + register struct getitimer_args *uap; + int *retval; +{ + struct itimerval aitv; + int s; + + if (uap->which > ITIMER_PROF) + return (EINVAL); + s = splclock(); + if (uap->which == ITIMER_REAL) { + /* + * Convert from absoulte to relative time in .it_value + * part of real time timer. If time for real time timer + * has passed return 0, else return difference between + * current time and time for the timer to go off. + */ + aitv = p->p_realtimer; + if (timerisset(&aitv.it_value)) + if (timercmp(&aitv.it_value, &time, <)) + timerclear(&aitv.it_value); + else + timevalsub(&aitv.it_value, &time); + } else + aitv = p->p_stats->p_timer[uap->which]; + splx(s); + return (copyout((caddr_t)&aitv, (caddr_t)uap->itv, + sizeof (struct itimerval))); +} + +struct setitimer_args { + u_int which; + struct itimerval *itv, *oitv; +}; + +/* ARGSUSED */ +setitimer(p, uap, retval) + struct proc *p; + register struct setitimer_args *uap; + int *retval; +{ + struct itimerval aitv; + register struct itimerval *itvp; + int s, error; + + if (uap->which > ITIMER_PROF) + return (EINVAL); + itvp = uap->itv; + if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv, + sizeof(struct itimerval)))) + return (error); + if ((uap->itv = uap->oitv) && (error = getitimer(p, uap, retval))) + return (error); + if (itvp == 0) + return (0); + if (itimerfix(&aitv.it_value) || itimerfix(&aitv.it_interval)) + return (EINVAL); + s = splclock(); + if (uap->which == ITIMER_REAL) { + untimeout(realitexpire, (caddr_t)p); + if (timerisset(&aitv.it_value)) { + timevaladd(&aitv.it_value, &time); + timeout(realitexpire, (caddr_t)p, hzto(&aitv.it_value)); + } + p->p_realtimer = aitv; + } else + p->p_stats->p_timer[uap->which] = aitv; + splx(s); + return (0); +} + +/* + * Real interval timer expired: + * send process whose timer expired an alarm signal. + * If time is not set up to reload, then just return. + * Else compute next time timer should go off which is > current time. + * This is where delay in processing this timeout causes multiple + * SIGALRM calls to be compressed into one. + */ +realitexpire(p) + register struct proc *p; +{ + int s; + + psignal(p, SIGALRM); + if (!timerisset(&p->p_realtimer.it_interval)) { + timerclear(&p->p_realtimer.it_value); + return; + } + for (;;) { + s = splclock(); + timevaladd(&p->p_realtimer.it_value, + &p->p_realtimer.it_interval); + if (timercmp(&p->p_realtimer.it_value, &time, >)) { + timeout(realitexpire, (caddr_t)p, + hzto(&p->p_realtimer.it_value)); + splx(s); + return; + } + splx(s); + } +} + +/* + * Check that a proposed value to load into the .it_value or + * .it_interval part of an interval timer is acceptable, and + * fix it to have at least minimal value (i.e. if it is less + * than the resolution of the clock, round it up.) + */ +itimerfix(tv) + struct timeval *tv; +{ + + if (tv->tv_sec < 0 || tv->tv_sec > 100000000 || + tv->tv_usec < 0 || tv->tv_usec >= 1000000) + return (EINVAL); + if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick) + tv->tv_usec = tick; + return (0); +} + +/* + * Decrement an interval timer by a specified number + * of microseconds, which must be less than a second, + * i.e. < 1000000. If the timer expires, then reload + * it. In this case, carry over (usec - old value) to + * reducint the value reloaded into the timer so that + * the timer does not drift. This routine assumes + * that it is called in a context where the timers + * on which it is operating cannot change in value. + */ +itimerdecr(itp, usec) + register struct itimerval *itp; + int usec; +{ + + if (itp->it_value.tv_usec < usec) { + if (itp->it_value.tv_sec == 0) { + /* expired, and already in next interval */ + usec -= itp->it_value.tv_usec; + goto expire; + } + itp->it_value.tv_usec += 1000000; + itp->it_value.tv_sec--; + } + itp->it_value.tv_usec -= usec; + usec = 0; + if (timerisset(&itp->it_value)) + return (1); + /* expired, exactly at end of interval */ +expire: + if (timerisset(&itp->it_interval)) { + itp->it_value = itp->it_interval; + itp->it_value.tv_usec -= usec; + if (itp->it_value.tv_usec < 0) { + itp->it_value.tv_usec += 1000000; + itp->it_value.tv_sec--; + } + } else + itp->it_value.tv_usec = 0; /* sec is already 0 */ + return (0); +} + +/* + * Add and subtract routines for timevals. + * N.B.: subtract routine doesn't deal with + * results which are before the beginning, + * it just gets very confused in this case. + * Caveat emptor. + */ +timevaladd(t1, t2) + struct timeval *t1, *t2; +{ + + t1->tv_sec += t2->tv_sec; + t1->tv_usec += t2->tv_usec; + timevalfix(t1); +} + +timevalsub(t1, t2) + struct timeval *t1, *t2; +{ + + t1->tv_sec -= t2->tv_sec; + t1->tv_usec -= t2->tv_usec; + timevalfix(t1); +} + +timevalfix(t1) + struct timeval *t1; +{ + + if (t1->tv_usec < 0) { + t1->tv_sec--; + t1->tv_usec += 1000000; + } + if (t1->tv_usec >= 1000000) { + t1->tv_sec++; + t1->tv_usec -= 1000000; + } +} diff --git a/sys/kern/kern_xxx.c b/sys/kern/kern_xxx.c new file mode 100644 index 000000000000..d904a8a6af26 --- /dev/null +++ b/sys/kern/kern_xxx.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 1982, 1986, 1989 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kern_xxx.c 7.17 (Berkeley) 4/20/91 + * $Id: kern_xxx.c,v 1.5 1993/10/24 06:19:56 paul Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "kernel.h" +#include "proc.h" +#include "reboot.h" +#include "utsname.h" + +/* ARGSUSED */ +gethostid(p, uap, retval) + struct proc *p; + void *uap; + long *retval; +{ + + *retval = hostid; + return (0); +} + +struct sethostid_args { + long hostid; +}; + +/* ARGSUSED */ +sethostid(p, uap, retval) + struct proc *p; + struct sethostid_args *uap; + int *retval; +{ + int error; + + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + hostid = uap->hostid; + return (0); +} + +struct gethostname_args { + char *hostname; + u_int len; +}; + +/* ARGSUSED */ +gethostname(p, uap, retval) + struct proc *p; + struct gethostname_args *uap; + int *retval; +{ + + if (uap->len > hostnamelen + 1) + uap->len = hostnamelen + 1; + return (copyout((caddr_t)hostname, (caddr_t)uap->hostname, uap->len)); +} + +struct sethostname_args { + char *hostname; + u_int len; +}; + +/* ARGSUSED */ +sethostname(p, uap, retval) + struct proc *p; + register struct sethostname_args *uap; + int *retval; +{ + int error; + + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + if (uap->len > sizeof (hostname) - 1) + return (EINVAL); + hostnamelen = uap->len; + error = copyin((caddr_t)uap->hostname, hostname, uap->len); + hostname[hostnamelen] = 0; + return (error); +} + +struct getdomainname_args { + char *domainname; + u_int len; +}; + +/* ARGSUSED */ +int +getdomainname(p, uap, retval) + struct proc *p; + struct getdomainname_args *uap; + int *retval; +{ + if (uap->len > domainnamelen + 1) + uap->len = domainnamelen + 1; + return (copyout((caddr_t)domainname, (caddr_t)uap->domainname, uap->len)); +} + +struct setdomainname_args { + char *domainname; + u_int len; +}; + +/* ARGSUSED */ +int +setdomainname(p, uap, retval) + struct proc *p; + struct setdomainname_args *uap; + int *retval; +{ + int error; + + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + if (uap->len > sizeof (domainname) - 1) + return EINVAL; + domainnamelen = uap->len; + error = copyin((caddr_t)uap->domainname, domainname, uap->len); + domainname[domainnamelen] = 0; + return (error); +} + +struct uname_args { + struct utsname *name; +}; + +/* ARGSUSED */ +int +uname(p, uap, retval) + struct proc *p; + struct uname_args *uap; + int *retval; +{ + bcopy(hostname, utsname.nodename, sizeof(utsname.nodename)); + utsname.nodename[sizeof(utsname.nodename)-1] = '\0'; + return (copyout((caddr_t)&utsname, (caddr_t)uap->name, + sizeof(struct utsname))); +} + +struct reboot_args { + int opt; +}; + +/* ARGSUSED */ +reboot(p, uap, retval) + struct proc *p; + struct reboot_args *uap; + int *retval; +{ + int error; + + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + boot(uap->opt); + return (0); +} + +#ifdef COMPAT_43 +oquota() +{ + + return (ENOSYS); +} +#endif diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh new file mode 100644 index 000000000000..709065445b1a --- /dev/null +++ b/sys/kern/makesyscalls.sh @@ -0,0 +1,172 @@ +#! /bin/sh - +# from: @(#)makesyscalls.sh 7.6 (Berkeley) 4/20/91 +# $Id: makesyscalls.sh,v 1.2 1993/10/16 15:24:36 rgrimes Exp $ + +set -e + +# name of compat option: +compat=COMPAT_43 + +# output files: +sysnames="syscalls.c" +syshdr="../sys/syscall.h" +syssw="init_sysent.c" + +# tmp files: +sysdcl="sysent.dcl" +syscompat="sysent.compat" +sysent="sysent.switch" + +trap "rm $sysdcl $syscompat $sysent" 0 + +case $# in + 0) echo "Usage: $0 input-file" 1>&2 + exit 1 + ;; +esac + +awk < $1 " + BEGIN { + sysdcl = \"$sysdcl\" + syscompat = \"$syscompat\" + sysent = \"$sysent\" + sysnames = \"$sysnames\" + syshdr = \"$syshdr\" + compat = \"$compat\" + infile = \"$1\" + "' + + printf "/*\n * System call switch table.\n *\n" > sysdcl + printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysdcl + + printf "\n#ifdef %s\n", compat > syscompat + printf "#define compat(n, name) n, __CONCAT(o,name)\n\n" > syscompat + + printf "/*\n * System call names.\n *\n" > sysnames + printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysnames + + printf "/*\n * System call numbers.\n *\n" > syshdr + printf " * DO NOT EDIT-- this file is automatically generated.\n" > syshdr + } + NR == 1 { + printf " * created from%s\n */\n\n", $0 > sysdcl + printf "#include \"param.h\"\n" > sysdcl + printf "#include \"systm.h\"\n\n" > sysdcl + printf "int\tnosys();\n\n" > sysdcl + + printf "struct sysent sysent[] = {\n" > sysent + + printf " * created from%s\n */\n\n", $0 > sysnames + printf "char *syscallnames[] = {\n" > sysnames + + printf " * created from%s\n */\n\n", $0 > syshdr + next + } + NF == 0 || $1 ~ /^;/ { + next + } + $1 ~ /^#[ ]*if/ { + print > sysent + print > sysdcl + print > syscompat + print > sysnames + savesyscall = syscall + next + } + $1 ~ /^#[ ]*else/ { + print > sysent + print > sysdcl + print > syscompat + print > sysnames + syscall = savesyscall + next + } + $1 ~ /^#/ { + print > sysent + print > sysdcl + print > syscompat + print > sysnames + next + } + syscall != $1 { + printf "%s: line %d: syscall number out of sync at %d\n", \ + infile, NR, syscall + printf "line is:\n" + print + exit 1 + } + { comment = $4 + for (i = 5; i <= NF; i++) + comment = comment " " $i + if (NF < 5) + $5 = $4 + } + $2 == "STD" { + printf("int\t%s();\n", $4) > sysdcl + printf("\t%d, %s,\t\t\t/* %d = %s */\n", \ + $3, $4, syscall, $5) > sysent + printf("\t\"%s\",\t\t\t/* %d = %s */\n", \ + $5, syscall, $5) > sysnames + printf("#define\tSYS_%s\t%d\n", \ + $5, syscall) > syshdr + syscall++ + next + } + $2 == "COMPAT" { + printf("int\to%s();\n", $4) > syscompat + printf("\tcompat(%d,%s),\t\t/* %d = old %s */\n", \ + $3, $4, syscall, $5) > sysent + printf("\t\"old.%s\",\t\t/* %d = old %s */\n", \ + $5, syscall, $5) > sysnames + printf("\t\t\t\t/* %d is old %s */\n", \ + syscall, comment) > syshdr + syscall++ + next + } + $2 == "LIBCOMPAT" { + printf("int\to%s();\n", $4) > syscompat + printf("\tcompat(%d,%s),\t\t/* %d = old %s */\n", \ + $3, $4, syscall, $5) > sysent + printf("\t\"old.%s\",\t\t/* %d = old %s */\n", \ + $5, syscall, $5) > sysnames + printf("#define\tSYS_%s\t%d\t/* compatibility; still used by libc */\n", \ + $5, syscall) > syshdr + syscall++ + next + } + $2 == "OBSOL" { + printf("\t0, nosys,\t\t\t/* %d = obsolete %s */\n", \ + syscall, comment) > sysent + printf("\t\"obs_%s\",\t\t\t/* %d = obsolete %s */\n", \ + $4, syscall, comment) > sysnames + printf("\t\t\t\t/* %d is obsolete %s */\n", \ + syscall, comment) > syshdr + syscall++ + next + } + $2 == "UNIMPL" { + printf("\t0, nosys,\t\t\t/* %d = %s */\n", \ + syscall, comment) > sysent + printf("\t\"#%d\",\t\t\t/* %d = %s */\n", \ + syscall, syscall, comment) > sysnames + syscall++ + next + } + { + printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $2 + exit 1 + } + END { + printf("\n#else /* %s */\n", compat) > syscompat + printf("#define compat(n, name) 0, nosys\n") > syscompat + printf("#endif /* %s */\n\n", compat) > syscompat + + printf("};\n\n") > sysent + printf("int\tnsysent = sizeof(sysent) / sizeof(sysent[0]);\n") > sysent + + printf("};\n") > sysnames + } ' + +cat $sysdcl $syscompat $sysent >$syssw + +chmod 444 $sysnames $syshdr $syssw diff --git a/sys/kern/spec_vnops.c b/sys/kern/spec_vnops.c new file mode 100644 index 000000000000..d60ea0218fc1 --- /dev/null +++ b/sys/kern/spec_vnops.c @@ -0,0 +1,522 @@ +/* + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)spec_vnops.c 7.37 (Berkeley) 5/30/91 + * $Id: spec_vnops.c,v 1.2 1993/10/16 15:24:37 rgrimes Exp $ + */ + +#include "param.h" +#include "proc.h" +#include "systm.h" +#include "kernel.h" +#include "conf.h" +#include "buf.h" +#include "mount.h" +#include "namei.h" +#include "vnode.h" +#include "specdev.h" +#include "stat.h" +#include "errno.h" +#include "ioctl.h" +#include "file.h" +#include "dkbad.h" /* XXX */ +#include "disklabel.h" + +/* symbolic sleep message strings for devices */ +char devopn[] = "devopn"; +char devio[] = "devio"; +char devwait[] = "devwait"; +char devin[] = "devin"; +char devout[] = "devout"; +char devioc[] = "devioc"; +char devcls[] = "devcls"; + +struct vnodeops spec_vnodeops = { + spec_lookup, /* lookup */ + spec_create, /* create */ + spec_mknod, /* mknod */ + spec_open, /* open */ + spec_close, /* close */ + spec_access, /* access */ + spec_getattr, /* getattr */ + spec_setattr, /* setattr */ + spec_read, /* read */ + spec_write, /* write */ + spec_ioctl, /* ioctl */ + spec_select, /* select */ + spec_mmap, /* mmap */ + spec_fsync, /* fsync */ + spec_seek, /* seek */ + spec_remove, /* remove */ + spec_link, /* link */ + spec_rename, /* rename */ + spec_mkdir, /* mkdir */ + spec_rmdir, /* rmdir */ + spec_symlink, /* symlink */ + spec_readdir, /* readdir */ + spec_readlink, /* readlink */ + spec_abortop, /* abortop */ + spec_inactive, /* inactive */ + spec_reclaim, /* reclaim */ + spec_lock, /* lock */ + spec_unlock, /* unlock */ + spec_bmap, /* bmap */ + spec_strategy, /* strategy */ + spec_print, /* print */ + spec_islocked, /* islocked */ + spec_advlock, /* advlock */ +}; + +/* + * Trivial lookup routine that always fails. + */ +spec_lookup(vp, ndp, p) + struct vnode *vp; + struct nameidata *ndp; + struct proc *p; +{ + + ndp->ni_dvp = vp; + ndp->ni_vp = NULL; + return (ENOTDIR); +} + +/* + * Open a special file: Don't allow open if fs is mounted -nodev, + * and don't allow opens of block devices that are currently mounted. + * Otherwise, call device driver open function. + */ +/* ARGSUSED */ +spec_open(vp, mode, cred, p) + register struct vnode *vp; + int mode; + struct ucred *cred; + struct proc *p; +{ + dev_t dev = (dev_t)vp->v_rdev; + register int maj = major(dev); + int error; + + if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) + return (ENXIO); + + switch (vp->v_type) { + + case VCHR: + if ((u_int)maj >= nchrdev) + return (ENXIO); + VOP_UNLOCK(vp); + error = (*cdevsw[maj].d_open)(dev, mode, S_IFCHR, p); + VOP_LOCK(vp); + return (error); + + case VBLK: + if ((u_int)maj >= nblkdev) + return (ENXIO); + if (error = mountedon(vp)) + return (error); + return ((*bdevsw[maj].d_open)(dev, mode, S_IFBLK, p)); + } + return (0); +} + +/* + * Vnode op for read + */ +/* ARGSUSED */ +spec_read(vp, uio, ioflag, cred) + register struct vnode *vp; + register struct uio *uio; + int ioflag; + struct ucred *cred; +{ + struct proc *p = uio->uio_procp; + struct buf *bp; + daddr_t bn; + long bsize, bscale; + struct partinfo dpart; + register int n, on; + int error = 0; + extern int mem_no; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_READ) + panic("spec_read mode"); + if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) + panic("spec_read proc"); +#endif + if (uio->uio_resid == 0) + return (0); + + switch (vp->v_type) { + + case VCHR: + /* + * Negative offsets allowed only for /dev/kmem + */ + if (uio->uio_offset < 0 && major(vp->v_rdev) != mem_no) + return (EINVAL); + VOP_UNLOCK(vp); + error = (*cdevsw[major(vp->v_rdev)].d_read) + (vp->v_rdev, uio, ioflag); + VOP_LOCK(vp); + return (error); + + case VBLK: + if (uio->uio_offset < 0) + return (EINVAL); + bsize = BLKDEV_IOSIZE; + if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, + (caddr_t)&dpart, FREAD, p) == 0) { + if (dpart.part->p_fstype == FS_BSDFFS && + dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) + bsize = dpart.part->p_frag * + dpart.part->p_fsize; + } + bscale = bsize / DEV_BSIZE; + do { + bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1); + on = uio->uio_offset % bsize; + n = MIN((unsigned)(bsize - on), uio->uio_resid); + if (vp->v_lastr + bscale == bn) + error = breada(vp, bn, (int)bsize, bn + bscale, + (int)bsize, NOCRED, &bp); + else + error = bread(vp, bn, (int)bsize, NOCRED, &bp); + vp->v_lastr = bn; + n = MIN(n, bsize - bp->b_resid); + if (error) { + brelse(bp); + return (error); + } + error = uiomove(bp->b_un.b_addr + on, n, uio); +#ifdef OMIT /* 20 Aug 92*/ + if (n + on == bsize) + bp->b_flags |= B_AGE; +#endif /* OMIT*/ + brelse(bp); + } while (error == 0 && uio->uio_resid > 0 && n != 0); + return (error); + + default: + panic("spec_read type"); + } + /* NOTREACHED */ +} + +/* + * Vnode op for write + */ +/* ARGSUSED */ +spec_write(vp, uio, ioflag, cred) + register struct vnode *vp; + register struct uio *uio; + int ioflag; + struct ucred *cred; +{ + struct proc *p = uio->uio_procp; + struct buf *bp; + daddr_t bn; + int bsize, blkmask; + struct partinfo dpart; + register int n, on; + int error = 0; + extern int mem_no; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_WRITE) + panic("spec_write mode"); + if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) + panic("spec_write proc"); +#endif + + switch (vp->v_type) { + + case VCHR: + /* + * Negative offsets allowed only for /dev/kmem + */ + if (uio->uio_offset < 0 && major(vp->v_rdev) != mem_no) + return (EINVAL); + VOP_UNLOCK(vp); + error = (*cdevsw[major(vp->v_rdev)].d_write) + (vp->v_rdev, uio, ioflag); + VOP_LOCK(vp); + return (error); + + case VBLK: + if (uio->uio_resid == 0) + return (0); + if (uio->uio_offset < 0) + return (EINVAL); + bsize = BLKDEV_IOSIZE; + if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART, + (caddr_t)&dpart, FREAD, p) == 0) { + if (dpart.part->p_fstype == FS_BSDFFS && + dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) + bsize = dpart.part->p_frag * + dpart.part->p_fsize; + } + blkmask = (bsize / DEV_BSIZE) - 1; + do { + bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask; + on = uio->uio_offset % bsize; + n = MIN((unsigned)(bsize - on), uio->uio_resid); + if (n == bsize) + bp = getblk(vp, bn, bsize); + else + error = bread(vp, bn, bsize, NOCRED, &bp); + n = MIN(n, bsize - bp->b_resid); + if (error) { + brelse(bp); + return (error); + } + error = uiomove(bp->b_un.b_addr + on, n, uio); + if (n + on == bsize) { + bp->b_flags |= B_AGE; + bawrite(bp); + } else + bdwrite(bp); + } while (error == 0 && uio->uio_resid > 0 && n != 0); + return (error); + + default: + panic("spec_write type"); + } + /* NOTREACHED */ +} + +/* + * Device ioctl operation. + */ +/* ARGSUSED */ +spec_ioctl(vp, com, data, fflag, cred, p) + struct vnode *vp; + int com; + caddr_t data; + int fflag; + struct ucred *cred; + struct proc *p; +{ + dev_t dev = vp->v_rdev; + + switch (vp->v_type) { + + case VCHR: + return ((*cdevsw[major(dev)].d_ioctl)(dev, com, data, + fflag, p)); + + case VBLK: + if (com == 0 && (int)data == B_TAPE) + if (bdevsw[major(dev)].d_flags & B_TAPE) + return (0); + else + return (1); + return ((*bdevsw[major(dev)].d_ioctl)(dev, com, data, + fflag, p)); + + default: + panic("spec_ioctl"); + /* NOTREACHED */ + } +} + +/* ARGSUSED */ +spec_select(vp, which, fflags, cred, p) + struct vnode *vp; + int which, fflags; + struct ucred *cred; + struct proc *p; +{ + register dev_t dev; + + switch (vp->v_type) { + + default: + return (1); /* XXX */ + + case VCHR: + dev = vp->v_rdev; + return (*cdevsw[major(dev)].d_select)(dev, which, p); + } +} + +/* + * Just call the device strategy routine + */ +spec_strategy(bp) + register struct buf *bp; +{ + + (*bdevsw[major(bp->b_dev)].d_strategy)(bp); + return (0); +} + +/* + * This is a noop, simply returning what one has been given. + */ +spec_bmap(vp, bn, vpp, bnp) + struct vnode *vp; + daddr_t bn; + struct vnode **vpp; + daddr_t *bnp; +{ + + if (vpp != NULL) + *vpp = vp; + if (bnp != NULL) + *bnp = bn; + return (0); +} + +/* + * At the moment we do not do any locking. + */ +/* ARGSUSED */ +spec_lock(vp) + struct vnode *vp; +{ + + return (0); +} + +/* ARGSUSED */ +spec_unlock(vp) + struct vnode *vp; +{ + + return (0); +} + +/* + * Device close routine + */ +/* ARGSUSED */ +spec_close(vp, flag, cred, p) + register struct vnode *vp; + int flag; + struct ucred *cred; + struct proc *p; +{ + dev_t dev = vp->v_rdev; + int (*devclose) __P((dev_t, int, int, struct proc *)); + int mode; + + switch (vp->v_type) { + + case VCHR: + /* + * If the vnode is locked, then we are in the midst + * of forcably closing the device, otherwise we only + * close on last reference. + */ + if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + return (0); + devclose = cdevsw[major(dev)].d_close; + mode = S_IFCHR; + break; + + case VBLK: + /* + * On last close of a block device (that isn't mounted) + * we must invalidate any in core blocks, so that + * we can, for instance, change floppy disks. + */ + vflushbuf(vp, 0); + if (vinvalbuf(vp, 1)) + return (0); + /* + * We do not want to really close the device if it + * is still in use unless we are trying to close it + * forcibly. Since every use (buffer, vnode, swap, cmap) + * holds a reference to the vnode, and because we mark + * any other vnodes that alias this device, when the + * sum of the reference counts on all the aliased + * vnodes descends to one, we are on last close. + */ + if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + return (0); + devclose = bdevsw[major(dev)].d_close; + mode = S_IFBLK; + break; + + default: + panic("spec_close: not special"); + } + + return ((*devclose)(dev, flag, mode, p)); +} + +/* + * Print out the contents of a special device vnode. + */ +spec_print(vp) + struct vnode *vp; +{ + + printf("tag VT_NON, dev %d, %d\n", major(vp->v_rdev), + minor(vp->v_rdev)); +} + +/* + * Special device advisory byte-level locks. + */ +/* ARGSUSED */ +spec_advlock(vp, id, op, fl, flags) + struct vnode *vp; + caddr_t id; + int op; + struct flock *fl; + int flags; +{ + + return (EOPNOTSUPP); +} + +/* + * Special device failed operation + */ +spec_ebadf() +{ + + return (EBADF); +} + +/* + * Special device bad operation + */ +spec_badop() +{ + + panic("spec_badop called"); + /* NOTREACHED */ +} diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c new file mode 100644 index 000000000000..bc054fcd56dc --- /dev/null +++ b/sys/kern/subr_log.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 1982, 1986 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)subr_log.c 7.11 (Berkeley) 3/17/91 + * $Id: subr_log.c,v 1.4 1993/10/16 15:24:39 rgrimes Exp $ + */ + +/* + * Error log buffer for kernel printf's. + */ + +#include "param.h" +#include "systm.h" +#include "proc.h" +#include "vnode.h" +#include "ioctl.h" +#include "msgbuf.h" +#include "file.h" + +#define LOG_RDPRI (PZERO + 1) + +#define LOG_ASYNC 0x04 +#define LOG_RDWAIT 0x08 + +struct logsoftc { + int sc_state; /* see above for possibilities */ + pid_t sc_sel; /* pid of process waiting on select call 16 Jun 93 */ + int sc_pgid; /* process/group for async I/O */ +} logsoftc; + +int log_open; /* also used in log() */ + +/*ARGSUSED*/ +logopen(dev, flags, mode, p) + dev_t dev; + int flags, mode; + struct proc *p; +{ + register struct msgbuf *mbp = msgbufp; + + if (log_open) + return (EBUSY); + log_open = 1; + logsoftc.sc_pgid = p->p_pid; /* signal process only */ + /* + * Potential race here with putchar() but since putchar should be + * called by autoconf, msg_magic should be initialized by the time + * we get here. + */ + if (mbp->msg_magic != MSG_MAGIC) { + register int i; + + mbp->msg_magic = MSG_MAGIC; + mbp->msg_bufx = mbp->msg_bufr = 0; + for (i=0; i < MSG_BSIZE; i++) + mbp->msg_bufc[i] = 0; + } + return (0); +} + +/*ARGSUSED*/ +logclose(dev, flag) + dev_t dev; +{ + log_open = 0; + logsoftc.sc_state = 0; + logsoftc.sc_sel = 0; /* 16 Jun 93 */ +} + +/*ARGSUSED*/ +logread(dev, uio, flag) + dev_t dev; + struct uio *uio; + int flag; +{ + register struct msgbuf *mbp = msgbufp; + register long l; + register int s; + int error = 0; + + s = splhigh(); + while (mbp->msg_bufr == mbp->msg_bufx) { + if (flag & IO_NDELAY) { + splx(s); + return (EWOULDBLOCK); + } + logsoftc.sc_state |= LOG_RDWAIT; + if (error = tsleep((caddr_t)mbp, LOG_RDPRI | PCATCH, + "klog", 0)) { + splx(s); + return (error); + } + } + splx(s); + logsoftc.sc_state &= ~LOG_RDWAIT; + + while (uio->uio_resid > 0) { + l = mbp->msg_bufx - mbp->msg_bufr; + if (l < 0) + l = MSG_BSIZE - mbp->msg_bufr; + l = MIN(l, uio->uio_resid); + if (l == 0) + break; + error = uiomove((caddr_t)&mbp->msg_bufc[mbp->msg_bufr], + (int)l, uio); + if (error) + break; + mbp->msg_bufr += l; + if (mbp->msg_bufr < 0 || mbp->msg_bufr >= MSG_BSIZE) + mbp->msg_bufr = 0; + } + return (error); +} + +/*ARGSUSED*/ +logselect(dev, rw, p) + dev_t dev; + int rw; + struct proc *p; +{ + int s = splhigh(); + + switch (rw) { + + case FREAD: + if (msgbufp->msg_bufr != msgbufp->msg_bufx) { + splx(s); + return (1); + } + logsoftc.sc_sel = p->p_pid; /* 16 Jun 93 */ + break; + } + splx(s); + return (0); +} + +logwakeup() +{ + struct proc *p; + + if (!log_open) + return; + if (logsoftc.sc_sel) { /* 16 Jun 93 */ + selwakeup(logsoftc.sc_sel, 0); + logsoftc.sc_sel = 0; + } + if (logsoftc.sc_state & LOG_ASYNC) { + if (logsoftc.sc_pgid < 0) + gsignal(-logsoftc.sc_pgid, SIGIO); + else if (p = pfind(logsoftc.sc_pgid)) + psignal(p, SIGIO); + } + if (logsoftc.sc_state & LOG_RDWAIT) { + wakeup((caddr_t)msgbufp); + logsoftc.sc_state &= ~LOG_RDWAIT; + } +} + +/*ARGSUSED*/ +logioctl(dev, com, data, flag) + caddr_t data; +{ + long l; + int s; + + switch (com) { + + /* return number of characters immediately available */ + case FIONREAD: + s = splhigh(); + l = msgbufp->msg_bufx - msgbufp->msg_bufr; + splx(s); + if (l < 0) + l += MSG_BSIZE; + *(off_t *)data = l; + break; + + case FIONBIO: + break; + + case FIOASYNC: + if (*(int *)data) + logsoftc.sc_state |= LOG_ASYNC; + else + logsoftc.sc_state &= ~LOG_ASYNC; + break; + + case TIOCSPGRP: + logsoftc.sc_pgid = *(int *)data; + break; + + case TIOCGPGRP: + *(int *)data = logsoftc.sc_pgid; + break; + + default: + return (-1); + } + return (0); +} diff --git a/sys/kern/subr_mcount.c b/sys/kern/subr_mcount.c new file mode 100644 index 000000000000..7d57dc190c21 --- /dev/null +++ b/sys/kern/subr_mcount.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 1982, 1986 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)subr_mcount.c 7.10 (Berkeley) 5/7/91 + * $Id: subr_mcount.c,v 1.3 1993/10/16 15:24:41 rgrimes Exp $ + */ + +#ifdef GPROF +#include "gprof.h" +#include "param.h" +#include "systm.h" +#include "malloc.h" + +/* + * Froms is actually a bunch of unsigned shorts indexing tos + */ +int profiling = 3; +u_short *froms; +struct tostruct *tos = 0; +long tolimit = 0; +char *s_lowpc = (char *)KERNBASE; +extern char etext; +char *s_highpc = &etext; +u_long s_textsize = 0; +int ssiz; +u_short *sbuf; +u_short *kcount; + +kmstartup() +{ + u_long fromssize, tossize; + + /* + * Round lowpc and highpc to multiples of the density we're using + * so the rest of the scaling (here and in gprof) stays in ints. + */ + s_lowpc = (char *) + ROUNDDOWN((unsigned)s_lowpc, HISTFRACTION*sizeof (HISTCOUNTER)); + s_highpc = (char *) + ROUNDUP((unsigned)s_highpc, HISTFRACTION*sizeof (HISTCOUNTER)); + s_textsize = s_highpc - s_lowpc; + printf("Profiling kernel, s_textsize=%d [%x..%x]\n", + s_textsize, s_lowpc, s_highpc); + ssiz = (s_textsize / HISTFRACTION) + sizeof (struct phdr); + sbuf = (u_short *)malloc(ssiz, M_GPROF, M_WAITOK); + if (sbuf == 0) { + printf("No space for monitor buffer(s)\n"); + return; + } + bzero(sbuf, ssiz); + fromssize = s_textsize / HASHFRACTION; + froms = (u_short *)malloc(fromssize, M_GPROF, M_NOWAIT); + if (froms == 0) { + printf("No space for monitor buffer(s)\n"); + free(sbuf, M_GPROF); + sbuf = 0; + return; + } + bzero(froms, fromssize); + tolimit = s_textsize * ARCDENSITY / 100; + if (tolimit < MINARCS) + tolimit = MINARCS; + else if (tolimit > (0xffff - 1)) + tolimit = 0xffff - 1; + tossize = tolimit * sizeof (struct tostruct); + tos = (struct tostruct *)malloc(tossize, M_GPROF, M_WAITOK); + if (tos == 0) { + printf("No space for monitor buffer(s)\n"); + free(sbuf, M_GPROF), sbuf = 0; + free(froms, M_GPROF), froms = 0; + return; + } + bzero(tos, tossize); + tos[0].link = 0; + ((struct phdr *)sbuf)->lpc = s_lowpc; + ((struct phdr *)sbuf)->hpc = s_highpc; + ((struct phdr *)sbuf)->ncnt = ssiz; + kcount = (u_short *)(((int)sbuf) + sizeof (struct phdr)); +} + +mcount() +{ + register char *selfpc; /* r11 => r5 */ + register u_short *frompcindex; /* r10 => r4 */ + register struct tostruct *top; /* r9 => r3 */ + register struct tostruct *prevtop; /* r8 => r2 */ + register long toindex; /* r7 => r1 */ + static int s; + + /* + * Check that we are profiling. + */ + if (profiling) + goto out; + /* + * Find the return address for mcount, + * and the return address for mcount's caller. + */ +#ifdef lint + selfpc = (char *)0; + frompcindex = 0; +#else + ; /* avoid label botch */ +#ifdef __GNUC__ +#if defined(vax) + Fix Me!! +#endif +#if defined(tahoe) + Fix Me!! +#endif +#if defined(hp300) + /* + * selfpc = pc pushed by mcount jsr, + * frompcindex = pc pushed by jsr into self. + * In GCC the caller's stack frame has already been built so we + * have to chase a6 to find caller's raddr. This assumes that all + * routines we are profiling were built with GCC and that all + * profiled routines use link/unlk. + */ + asm("movl a6@(4),%0" : "=r" (selfpc)); + asm("movl a6@(0)@(4),%0" : "=r" (frompcindex)); +#endif +#if defined(i386) + /* + * selfpc = pc pushed by mcount call + */ + asm("movl 4(%%ebp),%0" : "=r" (selfpc)); + /* + * frompcindex = pc pushed by jsr into self. + * in GCC, the caller's stack frame has already been built, so we + * have to chase the base pointer to find caller's raddr. + */ + asm("movl (%%ebp),%0" : "=r" (frompcindex)); + frompcindex = ((unsigned short **)frompcindex)[1]; +#endif /* i386 */ +#else +#if defined(vax) + asm(" movl (sp), r11"); /* selfpc = ... (jsb frame) */ + asm(" movl 16(fp), r10"); /* frompcindex = (calls frame) */ +#endif +#if defined(i386) + Fix Me!! +#endif /* i386 */ +#if defined(tahoe) + asm(" movl -8(fp),r12"); /* selfpc = callf frame */ + asm(" movl (fp),r11"); + asm(" movl -8(r11),r11"); /* frompcindex = 1 callf frame back */ +#endif +#if defined(hp300) + Fix Me!! +#endif +#endif /* not __GNUC__ */ +#endif /* not lint */ + /* + * Insure that we cannot be recursively invoked. + * this requires that splhigh() and splx() below + * do NOT call mcount! + */ +#if defined(hp300) + asm("movw sr,%0" : "=g" (s)); + asm("movw #0x2700,sr"); +#else + s = splhigh(); +#endif + /* + * Check that frompcindex is a reasonable pc value. + * For example: signal catchers get called from the stack, + * not from text space. too bad. + */ + frompcindex = (u_short *)((u_long)frompcindex - (u_long)s_lowpc); + if ((u_long)frompcindex > s_textsize) + goto done; + frompcindex = + &froms[((long)frompcindex) / (HASHFRACTION * sizeof (*froms))]; + toindex = *frompcindex; + if (toindex == 0) { + /* + * First time traversing this arc + */ + toindex = ++tos[0].link; + if (toindex >= tolimit) + goto overflow; + *frompcindex = toindex; + top = &tos[toindex]; + top->selfpc = selfpc; + top->count = 1; + top->link = 0; + goto done; + } + top = &tos[toindex]; + if (top->selfpc == selfpc) { + /* + * Arc at front of chain; usual case. + */ + top->count++; + goto done; + } + /* + * Have to go looking down chain for it. + * Top points to what we are looking at, + * prevtop points to previous top. + * We know it is not at the head of the chain. + */ + for (; /* goto done */; ) { + if (top->link == 0) { + /* + * Top is end of the chain and none of the chain + * had top->selfpc == selfpc. + * So we allocate a new tostruct + * and link it to the head of the chain. + */ + toindex = ++tos[0].link; + if (toindex >= tolimit) + goto overflow; + top = &tos[toindex]; + top->selfpc = selfpc; + top->count = 1; + top->link = *frompcindex; + *frompcindex = toindex; + goto done; + } + /* + * Otherwise, check the next arc on the chain. + */ + prevtop = top; + top = &tos[top->link]; + if (top->selfpc == selfpc) { + /* + * There it is, increment its count and + * move it to the head of the chain. + */ + top->count++; + toindex = prevtop->link; + prevtop->link = top->link; + top->link = *frompcindex; + *frompcindex = toindex; + goto done; + } + + } +done: +#if defined(hp300) + asm("movw %0,sr" : : "g" (s)); +#else + splx(s); +#endif + /* and fall through */ +out: +#if defined(vax) + asm(" rsb"); +#endif + return; +overflow: + profiling = 3; + printf("mcount: tos overflow\n"); + goto out; +} +#endif diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c new file mode 100644 index 000000000000..66c9e13f0c22 --- /dev/null +++ b/sys/kern/subr_prf.c @@ -0,0 +1,615 @@ +/*- + * Copyright (c) 1986, 1988, 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)subr_prf.c 7.30 (Berkeley) 6/29/91 + * $Id: subr_prf.c,v 1.4.2.1 1993/11/14 18:13:07 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "buf.h" +#include "conf.h" +#include "reboot.h" +#include "msgbuf.h" +#include "proc.h" +#include "ioctl.h" +#include "vnode.h" +#include "file.h" +#include "tty.h" +#include "tprintf.h" +#include "syslog.h" +#include "malloc.h" + +/* + * Note that stdarg.h and the ANSI style va_start macro is used for both + * ANSI and traditional C compilers. + */ +#include <machine/stdarg.h> + +#ifdef KADB +#include "machine/kdbparam.h" +#endif + +#define TOCONS 0x01 +#define TOTTY 0x02 +#define TOLOG 0x04 + +struct tty *constty; /* pointer to console "window" tty */ + +#if defined(KADB) +extern cngetc(); /* standard console getc */ +#endif +#ifdef KADB +int (*v_getc)() = cngetc; /* "" getc from virtual console */ +extern cnpoll(); +int (*v_poll)() = cnpoll; /* kdb hook to enable input polling */ +#endif +extern cnputc(); /* standard console putc */ +int (*v_putc)() = cnputc; /* routine to putc on virtual console */ + +static void logpri __P((int level)); +static void putchar __P((int ch, int flags, struct tty *tp)); +static char *ksprintn __P((u_long num, int base, int *len)); +void kprintf __P((const char *fmt, int flags, struct tty *tp, va_list)); +volatile void boot(int bootopt); + +/* + * Variable panicstr contains argument to first call to panic; used + * as flag to indicate that the kernel has already called panic. + */ +const char *panicstr; + +/* + * Message buffer + */ +struct msgbuf *msgbufp; +int msgbufmapped; + +/* + * Panic is called on unresolvable fatal errors. It prints "panic: mesg", + * and then reboots. If we are called twice, then we avoid trying to sync + * the disks as this often leads to recursive panics. + */ +#ifdef __STDC__ +volatile void +panic(const char *msg) +#else +void +panic(msg) + char *msg; +#endif +{ + int bootopt = RB_AUTOBOOT | RB_DUMP; + + if (panicstr) + bootopt |= RB_NOSYNC; + else + panicstr = msg; + printf("panic: %s\n", msg); +#ifdef KGDB + kgdb_panic(); +#endif +#ifdef KADB + if (boothowto & RB_KDB) { + int s; + + s = splnet(); /* below kdb pri */ + setsoftkdb(); + splx(s); + } +#endif +#include "ddb.h" +#if NDDB > 0 + Debugger (); +#endif + boot(bootopt); +} + +/* + * Warn that a system table is full. + */ +void +tablefull(tab) + char *tab; +{ + + log(LOG_ERR, "%s: table is full\n", tab); +} + +/* + * Uprintf prints to the controlling terminal for the current process. + * It may block if the tty queue is overfull. No message is printed if + * the queue does not clear in a reasonable time. + */ +void +#ifdef __STDC__ +uprintf(const char *fmt, ...) +#else +uprintf(fmt /*, va_alist */) + char *fmt; +#endif +{ + register struct proc *p = curproc; + va_list ap; + + if (p->p_flag & SCTTY && p->p_session->s_ttyvp) { + va_start(ap, fmt); + kprintf(fmt, TOTTY, p->p_session->s_ttyp, ap); + va_end(ap); + } +} + +tpr_t +tprintf_open(p) + register struct proc *p; +{ + + if (p->p_flag & SCTTY && p->p_session->s_ttyvp) { + SESSHOLD(p->p_session); + return ((tpr_t) p->p_session); + } + return ((tpr_t) NULL); +} + +void +tprintf_close(sess) + tpr_t sess; +{ + + if (sess) + SESSRELE((struct session *) sess); +} + +/* + * tprintf prints on the controlling terminal associated + * with the given session. + */ +void +#ifdef __STDC__ +tprintf(tpr_t tpr, const char *fmt, ...) +#else +tprintf(tpr, fmt /*, va_alist */) + tpr_t tpr; + char *fmt; +#endif +{ + register struct session *sess = (struct session *)tpr; + struct tty *tp = NULL; + int flags = TOLOG; + va_list ap; + + logpri(LOG_INFO); + if (sess && sess->s_ttyvp && ttycheckoutq(sess->s_ttyp, 0)) { + flags |= TOTTY; + tp = sess->s_ttyp; + } + va_start(ap, fmt); + kprintf(fmt, flags, tp, ap); + va_end(ap); + logwakeup(); +} + +/* + * Ttyprintf displays a message on a tty; it should be used only by + * the tty driver, or anything that knows the underlying tty will not + * be revoke(2)'d away. Other callers should use tprintf. + */ +void +#ifdef __STDC__ +ttyprintf(struct tty *tp, const char *fmt, ...) +#else +ttyprintf(tp, fmt /*, va_alist */) + struct tty *tp; + char *fmt; +#endif +{ + va_list ap; + + va_start(ap, fmt); + kprintf(fmt, TOTTY, tp, ap); + va_end(ap); +} + +extern int log_open; + +/* + * Log writes to the log buffer, and guarantees not to sleep (so can be + * called by interrupt routines). If there is no process reading the + * log yet, it writes to the console also. + */ +void +#ifdef __STDC__ +log(int level, const char *fmt, ...) +#else +log(level, fmt /*, va_alist */) + int level; + char *fmt; +#endif +{ + register int s; + va_list ap; + + s = splhigh(); + logpri(level); + va_start(ap, fmt); + kprintf(fmt, TOLOG, NULL, ap); + splx(s); + va_end(ap); + if (!log_open) { + va_start(ap, fmt); + kprintf(fmt, TOCONS, NULL, ap); + va_end(ap); + } + logwakeup(); +} + +static void +logpri(level) + int level; +{ + register int ch; + register char *p; + + putchar('<', TOLOG, NULL); + for (p = ksprintn((u_long)level, 10, NULL); ch = *p--;) + putchar(ch, TOLOG, NULL); + putchar('>', TOLOG, NULL); +} + +int +#ifdef __STDC__ +addlog(const char *fmt, ...) +#else +addlog(fmt /*, va_alist */) + char *fmt; +#endif +{ + register int s; + va_list ap; + + s = splhigh(); + va_start(ap, fmt); + kprintf(fmt, TOLOG, NULL, ap); + splx(s); + va_end(ap); + if (!log_open) { + va_start(ap, fmt); + kprintf(fmt, TOCONS, NULL, ap); + va_end(ap); + } + logwakeup(); + return (0); +} + +int consintr = 1; /* ok to handle console interrupts? */ + +int +#ifdef __STDC__ +printf(const char *fmt, ...) +#else +printf(fmt /*, va_alist */) + char *fmt; +#endif +{ + va_list ap; + register int savintr; + + savintr = consintr; /* disable interrupts */ + consintr = 0; + va_start(ap, fmt); + kprintf(fmt, TOCONS | TOLOG, NULL, ap); + va_end(ap); + if (!panicstr) + logwakeup(); + consintr = savintr; /* reenable interrupts */ + + return 0; /* for compatibility with libc's printf() */ +} + +/* + * Scaled down version of printf(3). + * + * Two additional formats: + * + * The format %b is supported to decode error registers. + * Its usage is: + * + * printf("reg=%b\n", regval, "<base><arg>*"); + * + * where <base> is the output base expressed as a control character, e.g. + * \10 gives octal; \20 gives hex. Each arg is a sequence of characters, + * the first of which gives the bit number to be inspected (origin 1), and + * the next characters (up to a control character, i.e. a character <= 32), + * give the name of the register. Thus: + * + * printf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n"); + * + * would produce output: + * + * reg=3<BITTWO,BITONE> + * + * The format %r is supposed to pass an additional format string and argument + * list recursively. + * Its usage is: + * + * fn(otherstuff, char *fmt, ...) + * { + * va_list ap; + * va_start(ap, fmt); + * printf("prefix: %r, other stuff\n", fmt, ap); + * va_end(ap); + * + * Space or zero padding and a field width are supported for the numeric + * formats only. + */ +void +kprintf(fmt, flags, tp, ap) + register const char *fmt; + int flags; + struct tty *tp; + va_list ap; +{ + register char *p, *p2; + register int ch, n; + u_long ul; + int base, lflag, tmp, width; + char padc; + + for (;;) { + padc = ' '; + width = 0; + while ((ch = *(u_char *)fmt++) != '%') { + if (ch == '\0') + return; + putchar(ch, flags, tp); + } + lflag = 0; +reswitch: switch (ch = *(u_char *)fmt++) { + case '0': + padc = '0'; + goto reswitch; + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + for (width = 0;; ++fmt) { + width = width * 10 + ch - '0'; + ch = *fmt; + if (ch < '0' || ch > '9') + break; + } + goto reswitch; + case 'l': + lflag = 1; + goto reswitch; + case 'b': + ul = va_arg(ap, int); + p = va_arg(ap, char *); + for (p2 = ksprintn(ul, *p++, NULL); ch = *p2--;) + putchar(ch, flags, tp); + + if (!ul) + break; + + for (tmp = 0; n = *p++;) { + if (ul & (1 << (n - 1))) { + putchar(tmp ? ',' : '<', flags, tp); + for (; (n = *p) > ' '; ++p) + putchar(n, flags, tp); + tmp = 1; + } else + for (; *p > ' '; ++p); + } + if (tmp) + putchar('>', flags, tp); + break; + case 'c': + putchar(va_arg(ap, int), flags, tp); + break; + case 'r': + p = va_arg(ap, char *); + kprintf(p, flags, tp, va_arg(ap, va_list)); + break; + case 's': + p = va_arg(ap, char *); + while (ch = *p++) + putchar(ch, flags, tp); + break; + case 'd': + ul = lflag ? va_arg(ap, long) : va_arg(ap, int); + if ((long)ul < 0) { + putchar('-', flags, tp); + ul = -(long)ul; + } + base = 10; + goto number; + case 'o': + ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int); + base = 8; + goto number; + case 'u': + ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int); + base = 10; + goto number; + case 'x': + ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int); + base = 16; +number: p = ksprintn(ul, base, &tmp); + if (width && (width -= tmp) > 0) + while (width--) + putchar(padc, flags, tp); + while (ch = *p--) + putchar(ch, flags, tp); + break; + default: + putchar('%', flags, tp); + if (lflag) + putchar('l', flags, tp); + /* FALLTHROUGH */ + case '%': + putchar(ch, flags, tp); + } + } +} + +/* + * Print a character on console or users terminal. If destination is + * the console then the last MSGBUFS characters are saved in msgbuf for + * inspection later. + */ +static void +putchar(c, flags, tp) + register int c; + int flags; + struct tty *tp; +{ + register struct msgbuf *mbp; + + if (panicstr) + constty = NULL; + if ((flags & TOCONS) && tp == NULL && constty) { + tp = constty; + flags |= TOTTY; + } + if ((flags & TOTTY) && tp && tputchar(c, tp) < 0 && + (flags & TOCONS) && tp == constty) + constty = NULL; + if ((flags & TOLOG) && + c != '\0' && c != '\r' && c != 0177 && msgbufmapped) { + mbp = msgbufp; + if (mbp->msg_magic != MSG_MAGIC) { + bzero((caddr_t)mbp, sizeof(*mbp)); + mbp->msg_magic = MSG_MAGIC; + } + mbp->msg_bufc[mbp->msg_bufx++] = c; + if (mbp->msg_bufx < 0 || mbp->msg_bufx >= MSG_BSIZE) + mbp->msg_bufx = 0; + } + if ((flags & TOCONS) && constty == NULL && c != '\0') + (*v_putc)(c); +} + +/* + * Scaled down version of sprintf(3). + */ +#ifdef __STDC__ +int +sprintf(char *buf, const char *cfmt, ...) +#else +int +sprintf(buf, cfmt /*, va_alist */) + char *buf, *cfmt; +#endif +{ + register const char *fmt = cfmt; + register char *p, *bp; + register int ch, base; + u_long ul; + int lflag; + va_list ap; + + va_start(ap, cfmt); + for (bp = buf; ; ) { + while ((ch = *(u_char *)fmt++) != '%') + if ((*bp++ = ch) == '\0') + return ((bp - buf) - 1); + + lflag = 0; +reswitch: switch (ch = *(u_char *)fmt++) { + case 'l': + lflag = 1; + goto reswitch; + case 'c': + *bp++ = va_arg(ap, int); + break; + case 's': + p = va_arg(ap, char *); + while (*bp++ = *p++) + ; + --bp; + break; + case 'd': + ul = lflag ? va_arg(ap, long) : va_arg(ap, int); + if ((long)ul < 0) { + *bp++ = '-'; + ul = -(long)ul; + } + base = 10; + goto number; + break; + case 'o': + ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int); + base = 8; + goto number; + break; + case 'u': + ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int); + base = 10; + goto number; + break; + case 'x': + ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int); + base = 16; +number: for (p = ksprintn(ul, base, NULL); ch = *p--;) + *bp++ = ch; + break; + default: + *bp++ = '%'; + if (lflag) + *bp++ = 'l'; + /* FALLTHROUGH */ + case '%': + *bp++ = ch; + } + } + va_end(ap); +} + +/* + * Put a number (base <= 16) in a buffer in reverse order; return an + * optional length and a pointer to the NULL terminated (preceded?) + * buffer. + */ +static char * +ksprintn(ul, base, lenp) + register u_long ul; + register int base, *lenp; +{ /* A long in base 8, plus NULL. */ + static char buf[sizeof(long) * NBBY / 3 + 2]; + register char *p; + + p = buf; + do { + *++p = "0123456789abcdef"[ul % base]; + } while (ul /= base); + if (lenp) + *lenp = p - buf; + return (p); +} diff --git a/sys/kern/subr_rlist.c b/sys/kern/subr_rlist.c new file mode 100644 index 000000000000..8185be26e30a --- /dev/null +++ b/sys/kern/subr_rlist.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 1992 William F. Jolitz, TeleMuse + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This software is a component of "386BSD" developed by + William F. Jolitz, TeleMuse. + * 4. Neither the name of the developer nor the name "386BSD" + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ + * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS + * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT. + * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT + * NOT MAKE USE THIS WORK. + * + * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED + * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN + * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES + * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING + * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND + * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE + * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS + * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992. + * + * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: subr_rlist.c,v 1.2 1993/10/16 15:24:44 rgrimes Exp $ + */ + +#include "sys/param.h" +#include "sys/cdefs.h" +#include "sys/malloc.h" +#include "rlist.h" + +/* + * Resource lists. + */ + +/* + * Add space to a resource list. Used to either + * initialize a list or return free space to it. + */ +rlist_free (rlp, start, end) +register struct rlist **rlp; unsigned start, end; { + struct rlist *head; + + head = *rlp; + +loop: + /* if nothing here, insert (tail of list) */ + if (*rlp == 0) { + *rlp = (struct rlist *)malloc(sizeof(**rlp), M_TEMP, M_NOWAIT); + (*rlp)->rl_start = start; + (*rlp)->rl_end = end; + (*rlp)->rl_next = 0; + return; + } + + /* if new region overlaps something currently present, panic */ + if (start >= (*rlp)->rl_start && start <= (*rlp)->rl_end) { + printf("Frag %d:%d, ent %d:%d ", start, end, + (*rlp)->rl_start, (*rlp)->rl_end); + panic("overlapping front rlist_free: freed twice?"); + } + if (end >= (*rlp)->rl_start && end <= (*rlp)->rl_end) { + printf("Frag %d:%d, ent %d:%d ", start, end, + (*rlp)->rl_start, (*rlp)->rl_end); + panic("overlapping tail rlist_free: freed twice?"); + } + + /* are we adjacent to this element? (in front) */ + if (end+1 == (*rlp)->rl_start) { + /* coalesce */ + (*rlp)->rl_start = start; + goto scan; + } + + /* are we before this element? */ + if (end < (*rlp)->rl_start) { + register struct rlist *nlp; + + nlp = (struct rlist *)malloc(sizeof(*nlp), M_TEMP, M_NOWAIT); + nlp->rl_start = start; + nlp->rl_end = end; + nlp->rl_next = *rlp; + *rlp = nlp; + return; + } + + /* are we adjacent to this element? (at tail) */ + if ((*rlp)->rl_end + 1 == start) { + /* coalesce */ + (*rlp)->rl_end = end; + goto scan; + } + + /* are we after this element */ + if (start > (*rlp)->rl_end) { + rlp = &((*rlp)->rl_next); + goto loop; + } else + panic("rlist_free: can't happen"); + +scan: + /* can we coalesce list now that we've filled a void? */ + { + register struct rlist *lp, *lpn; + + for (lp = head; lp->rl_next ;) { + lpn = lp->rl_next; + + /* coalesce ? */ + if (lp->rl_end + 1 == lpn->rl_start) { + lp->rl_end = lpn->rl_end; + lp->rl_next = lpn->rl_next; + free(lpn, M_TEMP); + } else + lp = lp->rl_next; + } + } +} + +/* + * Obtain a region of desired size from a resource list. + * If nothing available of that size, return 0. Otherwise, + * return a value of 1 and set resource start location with + * "*loc". (Note: loc can be zero if we don't wish the value) + */ +int rlist_alloc (rlp, size, loc) +struct rlist **rlp; unsigned size, *loc; { + register struct rlist *lp; + + + /* walk list, allocating first thing that's big enough (first fit) */ + for (; *rlp; rlp = &((*rlp)->rl_next)) + if(size <= (*rlp)->rl_end - (*rlp)->rl_start + 1) { + + /* hand it to the caller */ + if (loc) *loc = (*rlp)->rl_start; + (*rlp)->rl_start += size; + + /* did we eat this element entirely? */ + if ((*rlp)->rl_start > (*rlp)->rl_end) { + lp = (*rlp)->rl_next; + free (*rlp, M_TEMP); + *rlp = lp; + } + + return (1); + } + + /* nothing in list that's big enough */ + return (0); +} + +/* + * Finished with this resource list, reclaim all space and + * mark it as being empty. + */ +rlist_destroy (rlp) +struct rlist **rlp; { + struct rlist *lp, *nlp; + + lp = *rlp; + *rlp = 0; + for (; lp; lp = nlp) { + nlp = lp->rl_next; + free (lp, M_TEMP); + } +} diff --git a/sys/kern/subr_xxx.c b/sys/kern/subr_xxx.c new file mode 100644 index 000000000000..e6457a2cd237 --- /dev/null +++ b/sys/kern/subr_xxx.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 1982, 1986, 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)subr_xxx.c 7.10 (Berkeley) 4/20/91 + * $Id: subr_xxx.c,v 1.2 1993/10/16 15:24:45 rgrimes Exp $ + */ + +/* + * Miscellaneous trivial functions, including many + * that are often inline-expanded or done in assembler. + */ +#include "param.h" +#include "systm.h" +#include "machine/cpu.h" + +/* + * Unsupported device function (e.g. writing to read-only device). + */ +enodev() +{ + + return (ENODEV); +} + +/* + * Unconfigured device function; driver not configured. + */ +enxio() +{ + + return (ENXIO); +} + +/* + * Unsupported ioctl function. + */ +enoioctl() +{ + + return (ENOTTY); +} + +/* + * Unsupported system function. + * This is used for an otherwise-reasonable operation + * that is not supported by the current system binary. + */ +enosys() +{ + + return (ENOSYS); +} + +/* + * Return error for operation not supported + * on a specific object or file type. + */ +eopnotsupp() +{ + + return (EOPNOTSUPP); +} + +/* + * Generic null operation, always returns success. + */ +nullop() +{ + + return (0); +} + +/* + * Definitions of various trivial functions; + * usually expanded inline rather than being defined here. + */ +#ifdef NEED_MINMAX +imin(a, b) + int a, b; +{ + + return (a < b ? a : b); +} + +imax(a, b) + int a, b; +{ + + return (a > b ? a : b); +} + +unsigned int +min(a, b) + unsigned int a, b; +{ + + return (a < b ? a : b); +} + +unsigned int +max(a, b) + unsigned int a, b; +{ + + return (a > b ? a : b); +} + +long +lmin(a, b) + long a, b; +{ + + return (a < b ? a : b); +} + +long +lmax(a, b) + long a, b; +{ + + return (a > b ? a : b); +} + +unsigned long +ulmin(a, b) + unsigned long a, b; +{ + + return (a < b ? a : b); +} + +unsigned long +ulmax(a, b) + unsigned long a, b; +{ + + return (a > b ? a : b); +} +#endif /* NEED_MINMAX */ + +#ifdef NEED_FFS +ffs(mask) + register long mask; +{ + register int bit; + + if (!mask) + return(0); + for (bit = 1;; ++bit) { + if (mask&0x01) + return(bit); + mask >>= 1; + } +} +#endif /* NEED_FFS */ + +#ifdef NEED_BCMP +bcmp(v1, v2, len) + void *v1, *v2; + register unsigned len; +{ + register u_char *s1 = v1, *s2 = v2; + + while (len--) + if (*s1++ != *s2++) + return (1); + return (0); +} +#endif /* NEED_BCMP */ + +#ifdef NEED_STRLEN +size_t +strlen(s1) + register const char *s1; +{ + register size_t len; + + for (len = 0; *s1++ != '\0'; len++) + ; + return (len); +} +#endif /* NEED_STRLEN */ diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c new file mode 100644 index 000000000000..2a4222496a0f --- /dev/null +++ b/sys/kern/sys_generic.c @@ -0,0 +1,659 @@ +/* + * Copyright (c) 1982, 1986, 1989 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)sys_generic.c 7.30 (Berkeley) 5/30/91 + * $Id: sys_generic.c,v 1.4 1993/10/16 15:24:47 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "filedesc.h" +#include "ioctl.h" +#include "file.h" +#include "socketvar.h" +#include "proc.h" +#include "uio.h" +#include "kernel.h" +#include "stat.h" +#include "malloc.h" +#ifdef KTRACE +#include "ktrace.h" +#endif + +struct read_args { + int fdes; + char *cbuf; + unsigned count; +}; + +/* + * Read system call. + */ +/* ARGSUSED */ +read(p, uap, retval) + struct proc *p; + register struct read_args *uap; + int *retval; +{ + register struct file *fp; + register struct filedesc *fdp = p->p_fd; + struct uio auio; + struct iovec aiov; + long cnt, error = 0; +#ifdef KTRACE + struct iovec ktriov; +#endif + + if (((unsigned)uap->fdes) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->fdes]) == NULL || + (fp->f_flag & FREAD) == 0) + return (EBADF); + aiov.iov_base = (caddr_t)uap->cbuf; + aiov.iov_len = uap->count; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = uap->count; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; +#ifdef KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) + ktriov = aiov; +#endif + cnt = uap->count; + if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)) + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + cnt -= auio.uio_resid; +#ifdef KTRACE + if (KTRPOINT(p, KTR_GENIO) && error == 0) + ktrgenio(p->p_tracep, uap->fdes, UIO_READ, &ktriov, cnt, error); +#endif + *retval = cnt; + return (error); +} + +/* + * Scatter read system call. + */ + +struct readv_args { + int fdes; + struct iovec *iovp; + unsigned iovcnt; +}; + +/* ARGSUSED */ +readv(p, uap, retval) + struct proc *p; + register struct readv_args *uap; + int *retval; +{ + register struct file *fp; + register struct filedesc *fdp = p->p_fd; + struct uio auio; + register struct iovec *iov; + struct iovec *saveiov; + struct iovec aiov[UIO_SMALLIOV]; + long i, cnt, error = 0; + unsigned iovlen; +#ifdef KTRACE + struct iovec *ktriov = NULL; +#endif + + if (((unsigned)uap->fdes) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->fdes]) == NULL || + (fp->f_flag & FREAD) == 0) + return (EBADF); + /* note: can't use iovlen until iovcnt is validated */ + iovlen = uap->iovcnt * sizeof (struct iovec); + if (uap->iovcnt > UIO_SMALLIOV) { + if (uap->iovcnt > UIO_MAXIOV) + return (EINVAL); + MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); + saveiov = iov; + } else + iov = aiov; + auio.uio_iov = iov; + auio.uio_iovcnt = uap->iovcnt; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + if (error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)) + goto done; + auio.uio_resid = 0; + for (i = 0; i < uap->iovcnt; i++) { + if (iov->iov_len < 0) { + error = EINVAL; + goto done; + } + auio.uio_resid += iov->iov_len; + if (auio.uio_resid < 0) { + error = EINVAL; + goto done; + } + iov++; + } +#ifdef KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) { + MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); + bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); + } +#endif + cnt = auio.uio_resid; + if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)) + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + cnt -= auio.uio_resid; +#ifdef KTRACE + if (ktriov != NULL) { + if (error == 0) + ktrgenio(p->p_tracep, uap->fdes, UIO_READ, ktriov, + cnt, error); + FREE(ktriov, M_TEMP); + } +#endif + *retval = cnt; +done: + if (uap->iovcnt > UIO_SMALLIOV) + FREE(saveiov, M_IOV); + return (error); +} + +/* + * Write system call + */ + +struct write_args { + int fdes; + char *cbuf; + unsigned count; +}; + +write(p, uap, retval) + struct proc *p; + register struct write_args *uap; + int *retval; +{ + register struct file *fp; + register struct filedesc *fdp = p->p_fd; + struct uio auio; + struct iovec aiov; + long cnt, error = 0; +#ifdef KTRACE + struct iovec ktriov; +#endif + + if (((unsigned)uap->fdes) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->fdes]) == NULL || + (fp->f_flag & FWRITE) == 0) + return (EBADF); + aiov.iov_base = (caddr_t)uap->cbuf; + aiov.iov_len = uap->count; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = uap->count; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; +#ifdef KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) + ktriov = aiov; +#endif + cnt = uap->count; + if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) { + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + if (error == EPIPE) + psignal(p, SIGPIPE); + } + cnt -= auio.uio_resid; +#ifdef KTRACE + if (KTRPOINT(p, KTR_GENIO) && error == 0) + ktrgenio(p->p_tracep, uap->fdes, UIO_WRITE, + &ktriov, cnt, error); +#endif + *retval = cnt; + return (error); +} + +/* + * Gather write system call + */ + +struct writev_args { + int fdes; + struct iovec *iovp; + unsigned iovcnt; +}; + +writev(p, uap, retval) + struct proc *p; + register struct writev_args *uap; + int *retval; +{ + register struct file *fp; + register struct filedesc *fdp = p->p_fd; + struct uio auio; + register struct iovec *iov; + struct iovec *saveiov; + struct iovec aiov[UIO_SMALLIOV]; + long i, cnt, error = 0; + unsigned iovlen; +#ifdef KTRACE + struct iovec *ktriov = NULL; +#endif + + if (((unsigned)uap->fdes) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->fdes]) == NULL || + (fp->f_flag & FWRITE) == 0) + return (EBADF); + /* note: can't use iovlen until iovcnt is validated */ + iovlen = uap->iovcnt * sizeof (struct iovec); + if (uap->iovcnt > UIO_SMALLIOV) { + if (uap->iovcnt > UIO_MAXIOV) + return (EINVAL); + MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); + saveiov = iov; + } else + iov = aiov; + auio.uio_iov = iov; + auio.uio_iovcnt = uap->iovcnt; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + if (error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)) + goto done; + auio.uio_resid = 0; + for (i = 0; i < uap->iovcnt; i++) { + if (iov->iov_len < 0) { + error = EINVAL; + goto done; + } + auio.uio_resid += iov->iov_len; + if (auio.uio_resid < 0) { + error = EINVAL; + goto done; + } + iov++; + } +#ifdef KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) { + MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); + bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); + } +#endif + cnt = auio.uio_resid; + if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) { + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + if (error == EPIPE) + psignal(p, SIGPIPE); + } + cnt -= auio.uio_resid; +#ifdef KTRACE + if (ktriov != NULL) { + if (error == 0) + ktrgenio(p->p_tracep, uap->fdes, UIO_WRITE, + ktriov, cnt, error); + FREE(ktriov, M_TEMP); + } +#endif + *retval = cnt; +done: + if (uap->iovcnt > UIO_SMALLIOV) + FREE(saveiov, M_IOV); + return (error); +} + +/* + * Ioctl system call + */ + +struct ioctl_args { + int fdes; + int cmd; + caddr_t cmarg; +}; + +/* ARGSUSED */ +ioctl(p, uap, retval) + struct proc *p; + register struct ioctl_args *uap; + int *retval; +{ + register struct file *fp; + register struct filedesc *fdp = p->p_fd; + register int com, error; + register u_int size; + caddr_t memp = 0; +#define STK_PARAMS 128 + char stkbuf[STK_PARAMS]; + caddr_t data = stkbuf; + int tmp; + + if ((unsigned)uap->fdes >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->fdes]) == NULL) + return (EBADF); + if ((fp->f_flag & (FREAD|FWRITE)) == 0) + return (EBADF); + com = uap->cmd; + + if (com == FIOCLEX) { + fdp->fd_ofileflags[uap->fdes] |= UF_EXCLOSE; + return (0); + } + if (com == FIONCLEX) { + fdp->fd_ofileflags[uap->fdes] &= ~UF_EXCLOSE; + return (0); + } + + /* + * Interpret high order word to find + * amount of data to be copied to/from the + * user's address space. + */ + size = IOCPARM_LEN(com); + if (size > IOCPARM_MAX) + return (ENOTTY); + if (size > sizeof (stkbuf)) { + memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); + data = memp; + } + if (com&IOC_IN) { + if (size) { + error = copyin(uap->cmarg, data, (u_int)size); + if (error) { + if (memp) + free(memp, M_IOCTLOPS); + return (error); + } + } else + *(caddr_t *)data = uap->cmarg; + } else if ((com&IOC_OUT) && size) + /* + * Zero the buffer so the user always + * gets back something deterministic. + */ + bzero(data, size); + else if (com&IOC_VOID) + *(caddr_t *)data = uap->cmarg; + + switch (com) { + + case FIONBIO: + if (tmp = *(int *)data) + fp->f_flag |= FNONBLOCK; + else + fp->f_flag &= ~FNONBLOCK; + error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); + break; + + case FIOASYNC: + if (tmp = *(int *)data) + fp->f_flag |= FASYNC; + else + fp->f_flag &= ~FASYNC; + error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); + break; + + case FIOSETOWN: + tmp = *(int *)data; + if (fp->f_type == DTYPE_SOCKET) { + ((struct socket *)fp->f_data)->so_pgid = tmp; + error = 0; + break; + } + if (tmp <= 0) { + tmp = -tmp; + } else { + struct proc *p1 = pfind(tmp); + if (p1 == 0) { + error = ESRCH; + break; + } + tmp = p1->p_pgrp->pg_id; + } + error = (*fp->f_ops->fo_ioctl) + (fp, (int)TIOCSPGRP, (caddr_t)&tmp, p); + break; + + case FIOGETOWN: + if (fp->f_type == DTYPE_SOCKET) { + error = 0; + *(int *)data = ((struct socket *)fp->f_data)->so_pgid; + break; + } + error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p); + *(int *)data = -*(int *)data; + break; + + default: + error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); + /* + * Copy any data to user, size was + * already set and checked above. + */ + if (error == 0 && (com&IOC_OUT) && size) + error = copyout(data, uap->cmarg, (u_int)size); + break; + } + if (memp) + free(memp, M_IOCTLOPS); + return (error); +} + +int selwait, nselcoll; + +/* + * Select system call. + */ + +struct select_args { + u_int nd; + fd_set *in, *ou, *ex; + struct timeval *tv; +}; + +select(p, uap, retval) + register struct proc *p; + register struct select_args *uap; + int *retval; +{ + fd_set ibits[3], obits[3]; + struct timeval atv; + int s, ncoll, error = 0, timo; + u_int ni; + + bzero((caddr_t)ibits, sizeof(ibits)); + bzero((caddr_t)obits, sizeof(obits)); + if (uap->nd > p->p_fd->fd_nfiles) + uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ + ni = howmany(uap->nd, NFDBITS); + +#define getbits(name, x) \ + if (uap->name) { \ + error = copyin((caddr_t)uap->name, (caddr_t)&ibits[x], \ + (unsigned)(ni * sizeof(fd_mask))); \ + if (error) \ + goto done; \ + } + getbits(in, 0); + getbits(ou, 1); + getbits(ex, 2); +#undef getbits + + if (uap->tv) { + error = copyin((caddr_t)uap->tv, (caddr_t)&atv, + sizeof (atv)); + if (error) + goto done; + if (itimerfix(&atv)) { + error = EINVAL; + goto done; + } + s = splhigh(); timevaladd(&atv, &time); splx(s); + timo = hzto(&atv); + } else + timo = 0; +retry: + ncoll = nselcoll; + p->p_flag |= SSEL; + error = selscan(p, ibits, obits, uap->nd, retval); + if (error || *retval) + goto done; + s = splhigh(); + /* this should be timercmp(&time, &atv, >=) */ + if (uap->tv && (time.tv_sec > atv.tv_sec || + time.tv_sec == atv.tv_sec && time.tv_usec >= atv.tv_usec)) { + splx(s); + goto done; + } + if ((p->p_flag & SSEL) == 0 || nselcoll != ncoll) { + splx(s); + goto retry; + } + p->p_flag &= ~SSEL; + error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); + splx(s); + if (error == 0) + goto retry; +done: + p->p_flag &= ~SSEL; + /* select is not restarted after signals... */ + if (error == ERESTART) + error = EINTR; + if (error == EWOULDBLOCK) + error = 0; +#define putbits(name, x) \ + if (uap->name) { \ + int error2 = copyout((caddr_t)&obits[x], (caddr_t)uap->name, \ + (unsigned)(ni * sizeof(fd_mask))); \ + if (error2) \ + error = error2; \ + } + if (error == 0) { + putbits(in, 0); + putbits(ou, 1); + putbits(ex, 2); +#undef putbits + } + return (error); +} + +int +selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int *retval) +{ + register struct filedesc *fdp = p->p_fd; + register int which, i, j; + register fd_mask bits; + int flag; + struct file *fp; + int error = 0, n = 0; + + for (which = 0; which < 3; which++) { + switch (which) { + + case 0: + flag = FREAD; break; + + case 1: + flag = FWRITE; break; + + case 2: + flag = 0; break; + } + for (i = 0; i < nfd; i += NFDBITS) { + bits = ibits[which].fds_bits[i/NFDBITS]; + while ((j = ffs(bits)) && i + --j < nfd) { + bits &= ~(1 << j); + fp = fdp->fd_ofiles[i + j]; + if (fp == NULL) { + error = EBADF; + break; + } + if ((*fp->f_ops->fo_select)(fp, flag, p)) { + FD_SET(i + j, &obits[which]); + n++; + } + } + } + } + *retval = n; + return (error); +} + +/*ARGSUSED*/ +int +seltrue(dev_t dev, int which, struct proc *p) +{ + + return (1); +} + +void +selwakeup(pid_t pid, int coll) +{ + register struct proc *p; + + if (coll) { + nselcoll++; + wakeup((caddr_t)&selwait); + } + if (pid && (p = pfind(pid))) { + int s = splhigh(); + if (p->p_wchan == (caddr_t)&selwait) { + if (p->p_stat == SSLEEP) + setrun(p); + else + unsleep(p); + } else if (p->p_flag & SSEL) + p->p_flag &= ~SSEL; + splx(s); + } +} diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c new file mode 100644 index 000000000000..a0717fdc4e63 --- /dev/null +++ b/sys/kern/sys_process.c @@ -0,0 +1,553 @@ +/* + * Copyright (c) 1982, 1986, 1989 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)sys_process.c 7.22 (Berkeley) 5/11/91 + * $Id: sys_process.c,v 1.5 1993/10/16 15:24:48 rgrimes Exp $ + */ + +#include <stddef.h> + +#define IPCREG +#include "param.h" +#include "proc.h" +#include "vnode.h" +#include "buf.h" +#include "ptrace.h" + +#include "machine/eflags.h" +#include "machine/reg.h" +#include "machine/psl.h" +#include "vm/vm.h" +#include "vm/vm_page.h" + +#include "user.h" + +/* + * NOTES. + * + * The following ptrace calls have been defined in addition to + * the standard ones found in original <sys/ptrace.h>: + * + * PT_ATTACH - attach to running process + * PT_DETACH - detach from running process + * PT_SYSCALL - trace system calls + * PT_GETREG - get register file + * PT_SETREG - set register file + * PT_BREAD_[IDU] - block read from process (not yet implemented) + * PT_BWRITE_[IDU] - block write " " + * PT_INHERIT - make forked processes inherit trace flags + * + */ + +/* Define to prevent extraneous clutter in source */ +#ifndef SSTRC +#define SSTRC 0 +#endif +#ifndef SFTRC +#define SFTRC 0 +#endif + +/* + * `ipcreg' defined in <machine/reg.h> + * Should we define a structure with all regs? + */ +int sipcreg[NIPCREG] = + { 0,0,sEDI,sESI,sEBP,sEBX,sEDX,sECX,sEAX,sEIP,sCS,sEFLAGS,sESP,sSS }; + +struct { + int flag; +#define IPC_BUSY 1 +#define IPC_WANT 2 +#define IPC_DONE 4 + int req; /* copy of ptrace request */ + int *addr; /* copy of ptrace address */ + int data; /* copy of ptrace data */ + int error; /* errno from `procxmt' */ + int regs[NIPCREG]; /* PT_[GS]ETREG */ + caddr_t buf; /* PT_BREAD/WRITE */ + int buflen; /* " */ +} ipc; + +/* + * Process debugging system call. + */ + +struct ptrace_args { + int req; + int pid; + int *addr; + int data; +}; + +ptrace(curp, uap, retval) + struct proc *curp; + register struct ptrace_args *uap; + int *retval; +{ + struct proc *p; + int s, error = 0; + + *retval = 0; + if (uap->req == PT_TRACE_ME) { + curp->p_flag |= STRC; + /*p->p_tptr = p->p_pptr; * What shall we do here ? */ + return 0; + } + if ((p = pfind(uap->pid)) == NULL) { + return ESRCH; + } + +#ifdef notyet + if (uap->req != PT_ATTACH && ( + (p->p_flag & STRC) == 0 || + (p->p_tptr && curp != p->p_tptr) || + (!p->p_tptr && curp != p->p_pptr))) + + return ESRCH; +#endif + + +#ifdef PT_ATTACH + switch (uap->req) { + case PT_ATTACH: + if (curp->p_ucred->cr_uid != 0 && ( + curp->p_ucred->cr_uid != p->p_ucred->cr_uid || + curp->p_ucred->cr_uid != p->p_cred->p_svuid)) + return EACCES; + + p->p_tptr = curp; + p->p_flag |= STRC; + psignal(p, SIGTRAP); + return 0; + + case PT_DETACH: + if ((unsigned)uap->data >= NSIG) + return EINVAL; + p->p_flag &= ~(STRC|SSTRC|SFTRC); + p->p_tptr = NULL; + psignal(p->p_pptr, SIGCHLD); + wakeup((caddr_t)p->p_pptr); + s = splhigh(); + if (p->p_stat == SSTOP) { + p->p_xstat = uap->data; + setrun(p); + } else if (uap->data) { + psignal(p, uap->data); + } + splx(s); + return 0; + +#ifdef PT_INHERIT + case PT_INHERIT: + if ((p->p_flag & STRC) == 0) + return ESRCH; + p->p_flag |= SFTRC; + return 0; +#endif + + default: + break; + } +#endif + + /* Other ptrace calls require target process to be in stopped state */ + if ((p->p_flag & STRC) == 0 || p->p_stat != SSTOP) { + return ESRCH; + } + + /* Acquire the ipc structure */ + while (ipc.flag & IPC_BUSY) { + ipc.flag |= IPC_WANT; + error = tsleep((caddr_t)&ipc, PWAIT|PCATCH, "ipc", 0); + if (error) + goto out; + } + + /* Got it, fill it */ + ipc.flag = IPC_BUSY; + ipc.error = 0; + ipc.req = uap->req; + ipc.addr = uap->addr; + ipc.data = uap->data; + +#ifdef PT_GETREGS + switch (uap->req) { + case PT_SETREGS: + error = copyin((char *)ipc.addr, (char *)ipc.regs, sizeof(ipc.regs)); + if (error) + goto out; + break; + +#ifdef notyet /* requires change in number of args to ptrace syscall */ + case PT_BWRITE_I: + case PT_BWRITE_D: + ipc.buflen = uap->data; + ipc.buf = kmem_alloc_wait(kernelmap, uap->data); + error = copyin((char *)ipc.addr, (char *)ipc.buf, ipc.buflen); + if (error) { + kmem_free_wakeup(kernelmap, ipc.buf, ipc.buflen); + goto out; + } +#endif + default: + break; + } +#endif + + setrun(p); + while ((ipc.flag & IPC_DONE) == 0) { + error = tsleep((caddr_t)&ipc, PWAIT|PCATCH, "ipc", 0); + if (error) + goto out; + } + + *retval = ipc.data; + if (error = ipc.error) + goto out; + +#ifdef PT_GETREGS + switch (uap->req) { + case PT_GETREGS: + error = copyout((char *)ipc.regs, (char *)ipc.addr, sizeof(ipc.regs)); + break; + + case PT_BREAD_I: + case PT_BREAD_D: + /* Not yet */ + default: + break; + } +#endif + +out: + /* Release ipc structure */ + ipc.flag &= ~IPC_BUSY; + if (ipc.flag & IPC_WANT) { + ipc.flag &= ~IPC_WANT; + wakeup((caddr_t)&ipc); + } + return error; +} + +procxmt(p) + register struct proc *p; +{ + int i, *xreg, rv = 0; +#ifdef i386 + int new_eflags, old_cs, old_ds, old_es, old_ss, old_eflags; + int *regs; +#endif + + /* Are we still being traced? */ + if ((p->p_flag & STRC) == 0) + return 1; + + p->p_addr->u_kproc.kp_proc = *p; + fill_eproc(p, &p->p_addr->u_kproc.kp_eproc); + + switch (ipc.req) { + case PT_READ_I: + case PT_READ_D: + if (!useracc(ipc.addr, sizeof(ipc.data), B_READ)) { + ipc.error = EFAULT; + break; + } + ipc.error = copyin((char *)ipc.addr, (char *)&ipc.data, sizeof(ipc.data)); + break; + + case PT_READ_U: + if ((u_int)ipc.addr > UPAGES * NBPG - sizeof(int)) { + ipc.error = EFAULT; + break; + } + ipc.data = *(int *)((u_int)p->p_addr + (u_int)ipc.addr); + break; + + case PT_WRITE_I: + case PT_WRITE_D: { /* 04 Sep 92*/ + vm_prot_t prot; /* current protection of region */ + int cow; /* ensure copy-on-write happens */ + + if (cow = (useracc(ipc.addr, sizeof(ipc.data), B_WRITE) == 0)) { + vm_offset_t addr = (vm_offset_t)ipc.addr; + vm_size_t size; + vm_prot_t max_prot; + vm_inherit_t inh; + boolean_t shared; + vm_object_t object; + vm_offset_t objoff; + + /* + * XXX - the useracc check is stronger than the vm + * checks because the user page tables are in the map. + * Anyway, most of this can be removed now that COW + * works. + */ + if (!useracc(ipc.addr, sizeof(ipc.data), B_READ) || + vm_region(&p->p_vmspace->vm_map, &addr, &size, + &prot, &max_prot, &inh, &shared, + &object, &objoff) != KERN_SUCCESS || + vm_protect(&p->p_vmspace->vm_map, ipc.addr, + sizeof(ipc.data), FALSE, + prot|VM_PROT_WRITE) != KERN_SUCCESS || + vm_fault(&p->p_vmspace->vm_map,trunc_page(ipc.addr), + VM_PROT_WRITE, FALSE) != KERN_SUCCESS) { + + ipc.error = EFAULT; + break; + } + } + ipc.error = copyout((char *)&ipc.data, + (char *)ipc.addr, sizeof(ipc.data)); + if (cow) + if (vm_protect(&p->p_vmspace->vm_map, ipc.addr, + sizeof(ipc.data), FALSE, + prot) != KERN_SUCCESS) + printf("ptrace: oops\n"); + break; + } + + case PT_WRITE_U: +#ifdef i386 + regs = p->p_regs; + /* + * XXX - privileged kernel state is scattered all over the + * user area. Only allow write access to areas known to + * be safe. + */ +#define GO_IF_SAFE(min, size) \ + if ((u_int)ipc.addr >= (min) \ + && (u_int)ipc.addr <= (min) + (size) - sizeof(int)) \ + goto pt_write_u + /* + * Allow writing entire FPU state. + */ + GO_IF_SAFE(offsetof(struct user, u_pcb) + + offsetof(struct pcb, pcb_savefpu), + sizeof(struct save87)); + /* + * Allow writing ordinary registers. Changes to segment + * registers and to some bits in %eflags will be silently + * ignored. Such changes ought to be an error. + */ +/* + * XXX - there is no define for the base of the user area except USRSTACK. + * XXX - USRSTACK is not the base of the user stack. It is the base of the + * user area. + */ +#define USER_OFF(va) ((u_int)(va) - USRSTACK) + GO_IF_SAFE(USER_OFF(regs), + (curpcb->pcb_flags & FM_TRAP ? tSS + 1 : sSS + 1) + * sizeof *regs); + ipc.error = EFAULT; + break; +#else + if ((u_int)ipc.addr > UPAGES * NBPG - sizeof(int)) { + ipc.error = EFAULT; + break; + } +#endif + pt_write_u: +#ifdef i386 + if (curpcb->pcb_flags & FM_TRAP) { + old_cs = regs[tCS]; + old_ds = regs[tES]; + old_es = regs[tES]; + old_ss = regs[tSS]; + old_eflags = regs[tEFLAGS]; + } else { + old_cs = regs[sCS]; + old_ss = regs[sSS]; + old_eflags = regs[sEFLAGS]; + } +#endif + *(int *)((u_int)p->p_addr + (u_int)ipc.addr) = ipc.data; +#ifdef i386 + /* + * Don't allow segment registers to change (although they can + * be changed directly to certain values). + * Don't allow privileged bits in %eflags to change. Users + * have privilege to change TF and NT although although they + * usually shouldn't. + * XXX - fix PT_SETREGS. + * XXX - simplify. Maybe copy through a temporary struct. + * Watch out for problems when ipc.addr is not a multiple + * of the register size. + */ +#define EFL_UNPRIVILEGED (EFL_CF | EFL_PF | EFL_AF | EFL_ZF | EFL_SF \ + | EFL_TF | EFL_DF | EFL_OF | EFL_NT) + if (curpcb->pcb_flags & FM_TRAP) { + regs[tCS] = old_cs; + regs[tDS] = old_ds; + regs[tES] = old_es; + regs[tSS] = old_es; + new_eflags = regs[tEFLAGS]; + regs[tEFLAGS] + = (new_eflags & EFL_UNPRIVILEGED) + | (old_eflags & ~EFL_UNPRIVILEGED); + } else { + regs[sCS] = old_cs; + regs[sSS] = old_ss; + new_eflags = regs[sEFLAGS]; + regs[sEFLAGS] + = (new_eflags & EFL_UNPRIVILEGED) + | (old_eflags & ~EFL_UNPRIVILEGED); + } +#endif + break; + + case PT_CONTINUE: + if (ipc.addr != (int *)1) { +#ifdef i386 + p->p_regs[(curpcb->pcb_flags&FM_TRAP)?tEIP:sEIP] = (int)ipc.addr; +#endif + } + p->p_flag &= ~SSTRC; /* Only set by PT_SYSCALL */ + if ((unsigned)ipc.data >= NSIG) { + ipc.error = EINVAL; + } else { + p->p_xstat = ipc.data; + rv = 1; + } + break; + + case PT_KILL: + p->p_flag &= ~SSTRC; /* Only set by PT_SYSCALL */ + rv = 2; + break; + + case PT_STEP: +#ifdef i386 + if (ipc.addr != (int *)1) { + p->p_regs[(curpcb->pcb_flags&FM_TRAP)?tEIP:sEIP] = (int)ipc.addr; + } + p->p_regs[(curpcb->pcb_flags&FM_TRAP)?tEFLAGS:sEFLAGS] |= PSL_T; +#endif + p->p_flag &= ~SSTRC; /* Only set by PT_SYSCALL */ + p->p_xstat = 0; + rv = 1; + break; + +#ifdef PT_SYSCALL + case PT_SYSCALL: + if (ipc.addr != (int *)1) { +#ifdef i386 + p->p_regs[(curpcb->pcb_flags&FM_TRAP)?tEIP:sEIP] = (int)ipc.addr; +#endif + } + p->p_flag |= SSTRC; + p->p_xstat = 0; + rv = 1; + break; +#endif +#ifdef PT_GETREGS + case PT_GETREGS: +#ifdef i386 + xreg = (curpcb->pcb_flags&FM_TRAP)?ipcreg:sipcreg; +#endif + + for (i = 0; i < NIPCREG; i++) + ipc.regs[i] = p->p_regs[xreg[i]]; + break; + + case PT_SETREGS: +#ifdef i386 + xreg = (curpcb->pcb_flags&FM_TRAP)?ipcreg:sipcreg; +#endif + + for (i = 0; i < NIPCREG; i++) + p->p_regs[xreg[i]] = ipc.regs[i]; + break; +#endif + +#ifdef PT_DUMP + case PT_DUMP: + /* Should be able to specify core file name */ + ipc.error = coredump(p); + break; +#endif + + default: + ipc.error = EINVAL; + } + ipc.flag |= IPC_DONE; + wakeup((caddr_t)&ipc); + + if (rv == 2) + kexit(p, 0); /*???*/ + + return rv; +} + +/* + * Enable process profiling system call. + */ + +struct profil_args { + short *bufbase; /* base of data buffer */ + unsigned bufsize; /* size of data buffer */ + unsigned pcoffset; /* pc offset (for subtraction) */ + unsigned pcscale; /* scaling factor for offset pc */ +}; + +/* ARGSUSED */ +profil(p, uap, retval) + struct proc *p; + register struct profil_args *uap; + int *retval; +{ + /* from looking at man pages, and include files, looks like + * this just sets up the fields of p->p_stats->p_prof... + * and those fields come straight from the args. + * only thing *we* have to do is check the args for validity... + * + * cgd + */ + + /* check to make sure that the buffer is OK. addupc (in locore) + * checks for faults, but would one be generated, say, writing to + * kernel space? probably not -- it just uses "movl"... + * + * so we've gotta check to make sure that the info set up for + * addupc is set right... it's gotta be writable by the user... + */ + + if (useracc(uap->bufbase,uap->bufsize*sizeof(short),B_WRITE) == 0) + return EFAULT; + + p->p_stats->p_prof.pr_base = uap->bufbase; + p->p_stats->p_prof.pr_size = uap->bufsize; + p->p_stats->p_prof.pr_off = uap->pcoffset; + p->p_stats->p_prof.pr_scale = uap->pcscale; + + return 0; +} diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c new file mode 100644 index 000000000000..247dbc9a2a59 --- /dev/null +++ b/sys/kern/sys_socket.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 1982, 1986, 1990 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)sys_socket.c 7.11 (Berkeley) 4/16/91 + * $Id: sys_socket.c,v 1.2 1993/10/16 15:24:50 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "file.h" +#include "mbuf.h" +#include "protosw.h" +#include "socket.h" +#include "socketvar.h" +#include "ioctl.h" +#include "stat.h" + +#include "net/if.h" +#include "net/route.h" + +struct fileops socketops = + { soo_read, soo_write, soo_ioctl, soo_select, soo_close }; + +/* ARGSUSED */ +soo_read(fp, uio, cred) + struct file *fp; + struct uio *uio; + struct ucred *cred; +{ + + return (soreceive((struct socket *)fp->f_data, (struct mbuf **)0, + uio, (struct mbuf **)0, (struct mbuf **)0, (int *)0)); +} + +/* ARGSUSED */ +soo_write(fp, uio, cred) + struct file *fp; + struct uio *uio; + struct ucred *cred; +{ + + return (sosend((struct socket *)fp->f_data, (struct mbuf *)0, + uio, (struct mbuf *)0, (struct mbuf *)0, 0)); +} + +soo_ioctl(fp, cmd, data, p) + struct file *fp; + int cmd; + register caddr_t data; + struct proc *p; +{ + register struct socket *so = (struct socket *)fp->f_data; + + switch (cmd) { + + case FIONBIO: + if (*(int *)data) + so->so_state |= SS_NBIO; + else + so->so_state &= ~SS_NBIO; + return (0); + + case FIOASYNC: + if (*(int *)data) { + so->so_state |= SS_ASYNC; + so->so_rcv.sb_flags |= SB_ASYNC; + so->so_snd.sb_flags |= SB_ASYNC; + } else { + so->so_state &= ~SS_ASYNC; + so->so_rcv.sb_flags &= ~SB_ASYNC; + so->so_snd.sb_flags &= ~SB_ASYNC; + } + return (0); + + case FIONREAD: + *(int *)data = so->so_rcv.sb_cc; + return (0); + + case SIOCSPGRP: + so->so_pgid = *(int *)data; + return (0); + + case SIOCGPGRP: + *(int *)data = so->so_pgid; + return (0); + + case SIOCATMARK: + *(int *)data = (so->so_state&SS_RCVATMARK) != 0; + return (0); + } + /* + * Interface/routing/protocol specific ioctls: + * interface and routing ioctls should have a + * different entry since a socket's unnecessary + */ + if (IOCGROUP(cmd) == 'i') + return (ifioctl(so, cmd, data, p)); + if (IOCGROUP(cmd) == 'r') + return (rtioctl(cmd, data, p)); + return ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, + (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)0)); +} + +soo_select(fp, which, p) + struct file *fp; + int which; + struct proc *p; +{ + register struct socket *so = (struct socket *)fp->f_data; + register int s = splnet(); + + switch (which) { + + case FREAD: + if (soreadable(so)) { + splx(s); + return (1); + } + sbselqueue(&so->so_rcv, p); + break; + + case FWRITE: + if (sowriteable(so)) { + splx(s); + return (1); + } + sbselqueue(&so->so_snd, p); + break; + + case 0: + if (so->so_oobmark || + (so->so_state & SS_RCVATMARK)) { + splx(s); + return (1); + } + sbselqueue(&so->so_rcv, p); + break; + } + splx(s); + return (0); +} + +soo_stat(so, ub) + register struct socket *so; + register struct stat *ub; +{ + + bzero((caddr_t)ub, sizeof (*ub)); + return ((*so->so_proto->pr_usrreq)(so, PRU_SENSE, + (struct mbuf *)ub, (struct mbuf *)0, + (struct mbuf *)0)); +} + +/* ARGSUSED */ +soo_close(fp, p) + struct file *fp; + struct proc *p; +{ + int error = 0; + + if (fp->f_data) + error = soclose((struct socket *)fp->f_data); + fp->f_data = 0; + return (error); +} diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c new file mode 100644 index 000000000000..4fe1c433549c --- /dev/null +++ b/sys/kern/syscalls.c @@ -0,0 +1,229 @@ +/* + * System call names. + * + * DO NOT EDIT-- this file is automatically generated. + * created from $Id: syscalls.master,v 1.5 1993/10/24 06:19:58 paul Exp $ + */ + +char *syscallnames[] = { + "#0", /* 0 = indir or out-of-range */ + "exit", /* 1 = exit */ + "fork", /* 2 = fork */ + "read", /* 3 = read */ + "write", /* 4 = write */ + "open", /* 5 = open */ + "close", /* 6 = close */ + "wait4", /* 7 = wait4 */ + "old.creat", /* 8 = old creat */ + "link", /* 9 = link */ + "unlink", /* 10 = unlink */ + "obs_execv", /* 11 = obsolete execv */ + "chdir", /* 12 = chdir */ + "fchdir", /* 13 = fchdir */ + "mknod", /* 14 = mknod */ + "chmod", /* 15 = chmod */ + "chown", /* 16 = chown */ + "break", /* 17 = break */ + "getfsstat", /* 18 = getfsstat */ + "lseek", /* 19 = lseek */ + "getpid", /* 20 = getpid */ + "mount", /* 21 = mount */ + "unmount", /* 22 = unmount */ + "setuid", /* 23 = setuid */ + "getuid", /* 24 = getuid */ + "geteuid", /* 25 = geteuid */ + "ptrace", /* 26 = ptrace */ + "recvmsg", /* 27 = recvmsg */ + "sendmsg", /* 28 = sendmsg */ + "recvfrom", /* 29 = recvfrom */ + "accept", /* 30 = accept */ + "getpeername", /* 31 = getpeername */ + "getsockname", /* 32 = getsockname */ + "access", /* 33 = access */ + "chflags", /* 34 = chflags */ + "fchflags", /* 35 = fchflags */ + "sync", /* 36 = sync */ + "kill", /* 37 = kill */ + "stat", /* 38 = stat */ + "getppid", /* 39 = getppid */ + "lstat", /* 40 = lstat */ + "dup", /* 41 = dup */ + "pipe", /* 42 = pipe */ + "getegid", /* 43 = getegid */ + "profil", /* 44 = profil */ +#ifdef KTRACE + "ktrace", /* 45 = ktrace */ +#else + "#45", /* 45 = ktrace */ +#endif + "sigaction", /* 46 = sigaction */ + "getgid", /* 47 = getgid */ + "sigprocmask", /* 48 = sigprocmask */ + "getlogin", /* 49 = getlogin */ + "setlogin", /* 50 = setlogin */ + "acct", /* 51 = acct */ + "sigpending", /* 52 = sigpending */ +#ifdef notyet + "sigaltstack", /* 53 = sigaltstack */ +#else + "#53", /* 53 = sigaltstack */ +#endif + "ioctl", /* 54 = ioctl */ + "reboot", /* 55 = reboot */ + "revoke", /* 56 = revoke */ + "symlink", /* 57 = symlink */ + "readlink", /* 58 = readlink */ + "execve", /* 59 = execve */ + "umask", /* 60 = umask */ + "chroot", /* 61 = chroot */ + "fstat", /* 62 = fstat */ + "getkerninfo", /* 63 = getkerninfo */ + "getpagesize", /* 64 = getpagesize */ + "msync", /* 65 = msync */ + "vfork", /* 66 = vfork */ + "obs_vread", /* 67 = obsolete vread */ + "obs_vwrite", /* 68 = obsolete vwrite */ + "sbrk", /* 69 = sbrk */ + "sstk", /* 70 = sstk */ + "mmap", /* 71 = mmap */ + "vadvise", /* 72 = vadvise */ + "munmap", /* 73 = munmap */ + "mprotect", /* 74 = mprotect */ + "madvise", /* 75 = madvise */ + "obs_vhangup", /* 76 = obsolete vhangup */ + "obs_vlimit", /* 77 = obsolete vlimit */ + "mincore", /* 78 = mincore */ + "getgroups", /* 79 = getgroups */ + "setgroups", /* 80 = setgroups */ + "getpgrp", /* 81 = getpgrp */ + "setpgid", /* 82 = setpgid */ + "setitimer", /* 83 = setitimer */ + "old.wait", /* 84 = old wait */ + "swapon", /* 85 = swapon */ + "getitimer", /* 86 = getitimer */ + "gethostname", /* 87 = gethostname */ + "sethostname", /* 88 = sethostname */ + "getdtablesize", /* 89 = getdtablesize */ + "dup2", /* 90 = dup2 */ + "#91", /* 91 = getdopt */ + "fcntl", /* 92 = fcntl */ + "select", /* 93 = select */ + "#94", /* 94 = setdopt */ + "fsync", /* 95 = fsync */ + "setpriority", /* 96 = setpriority */ + "socket", /* 97 = socket */ + "connect", /* 98 = connect */ + "old.accept", /* 99 = old accept */ + "getpriority", /* 100 = getpriority */ + "old.send", /* 101 = old send */ + "old.recv", /* 102 = old recv */ + "sigreturn", /* 103 = sigreturn */ + "bind", /* 104 = bind */ + "setsockopt", /* 105 = setsockopt */ + "listen", /* 106 = listen */ + "obs_vtimes", /* 107 = obsolete vtimes */ + "old.sigvec", /* 108 = old sigvec */ + "old.sigblock", /* 109 = old sigblock */ + "old.sigsetmask", /* 110 = old sigsetmask */ + "sigsuspend", /* 111 = sigsuspend */ + "sigstack", /* 112 = sigstack */ + "old.recvmsg", /* 113 = old recvmsg */ + "old.sendmsg", /* 114 = old sendmsg */ +#ifdef TRACE + "vtrace", /* 115 = vtrace */ +#else + "obs_vtrace", /* 115 = obsolete vtrace */ +#endif + "gettimeofday", /* 116 = gettimeofday */ + "getrusage", /* 117 = getrusage */ + "getsockopt", /* 118 = getsockopt */ +#ifdef vax + "resuba", /* 119 = resuba */ +#else + "#119", /* 119 = nosys */ +#endif + "readv", /* 120 = readv */ + "writev", /* 121 = writev */ + "settimeofday", /* 122 = settimeofday */ + "fchown", /* 123 = fchown */ + "fchmod", /* 124 = fchmod */ + "old.recvfrom", /* 125 = old recvfrom */ + "old.setreuid", /* 126 = old setreuid */ + "old.setregid", /* 127 = old setregid */ + "rename", /* 128 = rename */ + "truncate", /* 129 = truncate */ + "ftruncate", /* 130 = ftruncate */ + "flock", /* 131 = flock */ + "mkfifo", /* 132 = mkfifo */ + "sendto", /* 133 = sendto */ + "shutdown", /* 134 = shutdown */ + "socketpair", /* 135 = socketpair */ + "mkdir", /* 136 = mkdir */ + "rmdir", /* 137 = rmdir */ + "utimes", /* 138 = utimes */ + "obs_4.2", /* 139 = obsolete 4.2 sigreturn */ + "adjtime", /* 140 = adjtime */ + "old.getpeername", /* 141 = old getpeername */ + "gethostid", /* 142 = gethostid */ + "sethostid", /* 143 = sethostid */ + "getrlimit", /* 144 = getrlimit */ + "setrlimit", /* 145 = setrlimit */ + "old.killpg", /* 146 = old killpg */ + "setsid", /* 147 = setsid */ + "quotactl", /* 148 = quotactl */ + "old.quota", /* 149 = old quota */ + "old.getsockname", /* 150 = old getsockname */ + "#151", /* 151 = nosys */ + "#152", /* 152 = nosys */ + "#153", /* 153 = nosys */ + "#154", /* 154 = nosys */ +#ifdef NFS + "nfssvc", /* 155 = nfssvc */ +#else + "#155", /* 155 = nosys */ +#endif + "getdirentries", /* 156 = getdirentries */ + "statfs", /* 157 = statfs */ + "fstatfs", /* 158 = fstatfs */ + "#159", /* 159 = nosys */ +#ifdef NFS + "async_daemon", /* 160 = async_daemon */ + "getfh", /* 161 = getfh */ +#else + "#160", /* 160 = nosys */ + "#161", /* 161 = nosys */ +#endif + "getdomainname", /* 162 = getdomainname */ + "setdomainname", /* 163 = setdomainname */ + "uname", /* 164 = uname */ + "#165", /* 165 = nosys */ + "#166", /* 166 = nosys */ + "#167", /* 167 = nosys */ + "#168", /* 168 = nosys */ + "#169", /* 169 = nosys */ + "#170", /* 170 = nosys */ +#ifdef SYSVSHM + "shmsys", /* 171 = shmsys */ +#else + "#171", /* 171 = nosys */ +#endif + "#172", /* 172 = nosys */ + "#173", /* 173 = nosys */ + "#174", /* 174 = nosys */ + "#175", /* 175 = nosys */ + "#176", /* 176 = nosys */ + "#177", /* 177 = nosys */ + "#178", /* 178 = nosys */ + "#179", /* 179 = nosys */ + "#180", /* 180 = nosys */ + "setgid", /* 181 = setgid */ + "setegid", /* 182 = setegid */ + "seteuid", /* 183 = seteuid */ + "#184", /* 184 = nosys */ + "#185", /* 185 = nosys */ + "#186", /* 186 = nosys */ + "#187", /* 187 = nosys */ + "#188", /* 188 = nosys */ + "#189", /* 189 = nosys */ + "#190", /* 190 = nosys */ +}; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master new file mode 100644 index 000000000000..f7470cccbd84 --- /dev/null +++ b/sys/kern/syscalls.master @@ -0,0 +1,255 @@ + $Id: syscalls.master,v 1.5 1993/10/24 06:19:58 paul Exp $ +; from: @(#)syscalls.master 7.26 (Berkeley) 3/25/91 +; System call name/number master file. +; Processed to created init_sysent.c, syscalls.c and syscall.h. + +; Columns: number type nargs name altname/comments +; number system call number, must be in order +; type one of STD, OBSOL, UNIMPL, COMPAT +; nargs number of arguments +; name name of syscall routine +; altname name of system call if different +; for UNIMPL/OBSOL, name continues with comments + +; types: +; STD always included +; COMPAT included on COMPAT #ifdef +; LIBCOMPAT included on COMPAT #ifdef, and placed in syscall.h +; OBSOL obsolete, not included in system, only specifies name +; UNIMPL not implemented, placeholder only + +; #ifdef's, etc. may be included, and are copied to the output files. + +; Reserved/unimplemented system calls in the range 0-150 inclusive +; are reserved for use in future Berkeley releases. +; Additional system calls implemented in vendor and other +; redistributions should be placed in the reserved range at the end +; of the current calls. + +0 UNIMPL 0 indir or out-of-range +1 STD 1 rexit exit +2 STD 0 fork +3 STD 3 read +4 STD 3 write +5 STD 3 open +6 STD 1 close +7 STD 4 wait4 +8 COMPAT 2 creat +9 STD 2 link +10 STD 1 unlink +11 OBSOL 2 execv +12 STD 1 chdir +13 STD 1 fchdir +14 STD 3 mknod +15 STD 2 chmod +16 STD 3 chown +17 STD 1 obreak break +18 STD 3 getfsstat +19 STD 3 lseek +20 STD 0 getpid +21 STD 4 mount +22 STD 2 unmount +23 STD 1 setuid +24 STD 0 getuid +25 STD 0 geteuid +26 STD 4 ptrace +27 STD 3 recvmsg +28 STD 3 sendmsg +29 STD 6 recvfrom +30 STD 3 accept +31 STD 3 getpeername +32 STD 3 getsockname +33 STD 2 saccess access +34 STD 2 chflags +35 STD 2 fchflags +36 STD 0 sync +37 STD 2 kill +38 STD 2 stat +39 STD 0 getppid +40 STD 2 lstat +41 STD 2 dup +42 STD 0 pipe +43 STD 0 getegid +44 STD 4 profil +#ifdef KTRACE +45 STD 4 ktrace +#else +45 UNIMPL 0 ktrace +#endif +46 STD 3 sigaction +47 STD 0 getgid +48 STD 2 sigprocmask +49 STD 2 getlogin +50 STD 1 setlogin +51 STD 1 sysacct acct +52 STD 0 sigpending +#ifdef notyet +53 STD 3 sigaltstack +#else +53 UNIMPL 3 sigaltstack +#endif +54 STD 3 ioctl +55 STD 1 reboot +56 STD 1 revoke +57 STD 2 symlink +58 STD 3 readlink +59 STD 3 execve +60 STD 1 umask +61 STD 1 chroot +62 STD 2 fstat +63 STD 4 getkerninfo +64 STD 0 getpagesize +65 STD 2 msync +66 STD 0 vfork +67 OBSOL 0 vread +68 OBSOL 0 vwrite +69 STD 1 sbrk +70 STD 1 sstk +71 STD 6 smmap mmap +72 STD 1 ovadvise vadvise +73 STD 2 munmap +74 STD 3 mprotect +75 STD 3 madvise +76 OBSOL 0 vhangup +77 OBSOL 0 vlimit +78 STD 3 mincore +79 STD 2 getgroups +80 STD 2 setgroups +81 STD 0 getpgrp +82 STD 2 setpgid +83 STD 3 setitimer +84 COMPAT 0 wait +85 STD 1 swapon +86 STD 2 getitimer +87 STD 2 gethostname +88 STD 2 sethostname +89 STD 0 getdtablesize +90 STD 2 dup2 +91 UNIMPL 2 getdopt +92 STD 3 fcntl +93 STD 5 select +94 UNIMPL 2 setdopt +95 STD 1 fsync +96 STD 3 setpriority +97 STD 3 socket +98 STD 3 connect +99 COMPAT 3 accept +100 STD 2 getpriority +101 COMPAT 4 send +102 COMPAT 4 recv +103 STD 1 sigreturn +104 STD 3 bind +105 STD 5 setsockopt +106 STD 2 listen +107 OBSOL 0 vtimes +108 COMPAT 3 sigvec +109 COMPAT 1 sigblock +110 COMPAT 1 sigsetmask +111 STD 1 sigsuspend +112 STD 2 sigstack +113 COMPAT 3 recvmsg +114 COMPAT 3 sendmsg +#ifdef TRACE +115 STD 2 vtrace +#else +115 OBSOL 2 vtrace +#endif +116 STD 2 gettimeofday +117 STD 2 getrusage +118 STD 5 getsockopt +#ifdef vax +119 STD 1 resuba +#else +119 UNIMPL 0 nosys +#endif +120 STD 3 readv +121 STD 3 writev +122 STD 2 settimeofday +123 STD 3 fchown +124 STD 2 fchmod +125 COMPAT 6 recvfrom +126 LIBCOMPAT 2 setreuid +127 LIBCOMPAT 2 setregid +128 STD 2 rename +129 STD 2 truncate +130 STD 2 ftruncate +131 STD 2 flock +132 STD 2 mkfifo +133 STD 6 sendto +134 STD 2 shutdown +135 STD 5 socketpair +136 STD 2 mkdir +137 STD 1 rmdir +138 STD 2 utimes +139 OBSOL 0 4.2 sigreturn +140 STD 2 adjtime +141 COMPAT 3 getpeername +142 STD 0 gethostid +143 STD 1 sethostid +144 STD 2 getrlimit +145 STD 2 setrlimit +146 COMPAT 2 killpg +147 STD 0 setsid +148 STD 4 quotactl +149 COMPAT 4 quota +150 COMPAT 3 getsockname + +; Syscalls 151-180 inclusive are reserved for vendor-specific +; system calls. (This includes various calls added for compatibity +; with other Unix variants.) +; Some of these calls are now supported by BSD... +151 UNIMPL 0 nosys +152 UNIMPL 0 nosys +153 UNIMPL 0 nosys +154 UNIMPL 0 nosys +#ifdef NFS +155 STD 5 nfssvc +#else +155 UNIMPL 0 nosys +#endif +156 STD 4 getdirentries +157 STD 2 statfs +158 STD 2 fstatfs +159 UNIMPL 0 nosys +#ifdef NFS +160 STD 0 async_daemon +161 STD 2 getfh +#else +160 UNIMPL 0 nosys +161 UNIMPL 0 nosys +#endif +162 STD 2 getdomainname +163 STD 2 setdomainname +164 STD 1 uname +165 UNIMPL 0 nosys +166 UNIMPL 0 nosys +167 UNIMPL 0 nosys +168 UNIMPL 0 nosys +169 UNIMPL 0 nosys +170 UNIMPL 0 nosys +#ifdef SYSVSHM +171 STD 4 shmsys +#else +171 UNIMPL 0 nosys +#endif +172 UNIMPL 0 nosys +173 UNIMPL 0 nosys +174 UNIMPL 0 nosys +175 UNIMPL 0 nosys +176 UNIMPL 0 nosys +177 UNIMPL 0 nosys +178 UNIMPL 0 nosys +179 UNIMPL 0 nosys +180 UNIMPL 0 nosys + +; Syscalls 180-199 are used by/reserved for BSD +181 STD 1 setgid +182 STD 1 setegid +183 STD 1 seteuid +184 UNIMPL 0 nosys +185 UNIMPL 0 nosys +186 UNIMPL 0 nosys +187 UNIMPL 0 nosys +188 UNIMPL 0 nosys +189 UNIMPL 0 nosys +190 UNIMPL 0 nosys diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c new file mode 100644 index 000000000000..0e7c3c84db29 --- /dev/null +++ b/sys/kern/sysv_shm.c @@ -0,0 +1,534 @@ +/* + * Copyright (c) 1988 University of Utah. + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. Originally from University of Wisconsin. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: Utah $Hdr: uipc_shm.c 1.9 89/08/14$ + * from: @(#)sysv_shm.c 7.15 (Berkeley) 5/13/91 + * $Id: sysv_shm.c,v 1.4 1993/10/16 15:24:52 rgrimes Exp $ + */ + +/* + * System V shared memory routines. + * TEMPORARY, until mmap is in place; + * needed now for HP-UX compatibility and X server (yech!). + */ + +#ifdef SYSVSHM + +#include "param.h" +#include "systm.h" +#include "kernel.h" +#include "proc.h" +#include "shm.h" +#include "malloc.h" +#include "mman.h" +#include "vm/vm.h" +#include "vm/vm_kern.h" +#include "vm/vm_inherit.h" +#include "vm/vm_pager.h" + +#ifdef HPUXCOMPAT +#include "hp300/hpux/hpux.h" +#endif + +int shmat(), shmctl(), shmdt(), shmget(); +int (*shmcalls[])() = { shmat, shmctl, shmdt, shmget }; +int shmtot = 0; + +/* + * Per process internal structure for managing segments. + * Each process using shm will have an array of ``shmseg'' of these. + */ +struct shmdesc { + vm_offset_t shmd_uva; + int shmd_id; +}; + +/* + * Per segment internal structure (shm_handle). + */ +struct shmhandle { + vm_offset_t shmh_kva; + caddr_t shmh_id; +}; + +vm_map_t shm_map; /* address space for shared memory segments */ + +shminit() +{ + register int i; + vm_offset_t whocares1, whocares2; + + shm_map = kmem_suballoc(kernel_map, &whocares1, &whocares2, + shminfo.shmall * NBPG, FALSE); + if (shminfo.shmmni > SHMMMNI) + shminfo.shmmni = SHMMMNI; + for (i = 0; i < shminfo.shmmni; i++) { + shmsegs[i].shm_perm.mode = 0; + shmsegs[i].shm_perm.seq = 0; + } +} + +/* + * Entry point for all SHM calls + */ + +struct shmsys_args { + u_int which; +}; + +shmsys(p, uap, retval) + struct proc *p; + struct shmsys_args *uap; + int *retval; +{ + + if (uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0])) + return (EINVAL); + return ((*shmcalls[uap->which])(p, &uap[1], retval)); +} + +/* + * Get a shared memory segment + */ + +struct shmget_args { + key_t key; + int size; + int shmflg; +}; + +shmget(p, uap, retval) + struct proc *p; + register struct shmget_args *uap; + int *retval; +{ + register struct shmid_ds *shp; + register struct ucred *cred = p->p_ucred; + register int i; + int error, size, rval = 0; + register struct shmhandle *shmh; + + /* look up the specified shm_id */ + if (uap->key != IPC_PRIVATE) { + for (i = 0; i < shminfo.shmmni; i++) + if ((shmsegs[i].shm_perm.mode & SHM_ALLOC) && + shmsegs[i].shm_perm.key == uap->key) { + rval = i; + break; + } + } else + i = shminfo.shmmni; + + /* create a new shared segment if necessary */ + if (i == shminfo.shmmni) { + if ((uap->shmflg & IPC_CREAT) == 0) + return (ENOENT); + if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax) + return (EINVAL); + for (i = 0; i < shminfo.shmmni; i++) + if ((shmsegs[i].shm_perm.mode & SHM_ALLOC) == 0) { + rval = i; + break; + } + if (i == shminfo.shmmni) + return (ENOSPC); + size = clrnd(btoc(uap->size)); + if (shmtot + size > shminfo.shmall) + return (ENOMEM); + shp = &shmsegs[rval]; + /* + * We need to do a couple of things to ensure consistency + * in case we sleep in malloc(). We mark segment as + * allocated so that other shmgets() will not allocate it. + * We mark it as "destroyed" to insure that shmvalid() is + * false making most operations fail (XXX). We set the key, + * so that other shmget()s will fail. + */ + shp->shm_perm.mode = SHM_ALLOC | SHM_DEST; + shp->shm_perm.key = uap->key; + shmh = (struct shmhandle *) + malloc(sizeof(struct shmhandle), M_SHM, M_WAITOK); + shmh->shmh_kva = 0; + shmh->shmh_id = (caddr_t)(0xc0000000|rval); /* XXX */ + error = vm_mmap(shm_map, &shmh->shmh_kva, ctob(size), + VM_PROT_ALL, VM_PROT_DEFAULT, MAP_ANON, shmh->shmh_id, 0); + if (error) { + free((caddr_t)shmh, M_SHM); + shp->shm_perm.mode = 0; + return(ENOMEM); + } + shp->shm_handle = (void *) shmh; + shmtot += size; + shp->shm_perm.cuid = shp->shm_perm.uid = cred->cr_uid; + shp->shm_perm.cgid = shp->shm_perm.gid = cred->cr_gid; + shp->shm_perm.mode = SHM_ALLOC | (uap->shmflg&0777); + shp->shm_segsz = uap->size; + shp->shm_cpid = p->p_pid; + shp->shm_lpid = shp->shm_nattch = 0; + shp->shm_atime = shp->shm_dtime = 0; + shp->shm_ctime = time.tv_sec; + } else { + shp = &shmsegs[rval]; + /* XXX: probably not the right thing to do */ + if (shp->shm_perm.mode & SHM_DEST) + return (EBUSY); + if (error = ipcaccess(&shp->shm_perm, uap->shmflg&0777, cred)) + return (error); + if (uap->size && uap->size > shp->shm_segsz) + return (EINVAL); + if ((uap->shmflg&IPC_CREAT) && (uap->shmflg&IPC_EXCL)) + return (EEXIST); + } + *retval = shp->shm_perm.seq * SHMMMNI + rval; + return (0); +} + +/* + * Shared memory control + */ + +struct shmctl_args { + int shmid; + int cmd; + caddr_t buf; +}; + +/* ARGSUSED */ +shmctl(p, uap, retval) + struct proc *p; + register struct shmctl_args *uap; + int *retval; +{ + register struct shmid_ds *shp; + register struct ucred *cred = p->p_ucred; + struct shmid_ds sbuf; + int error; + + if (error = shmvalid(uap->shmid)) + return (error); + shp = &shmsegs[uap->shmid % SHMMMNI]; + switch (uap->cmd) { + case IPC_STAT: + if (error = ipcaccess(&shp->shm_perm, IPC_R, cred)) + return (error); + return (copyout((caddr_t)shp, uap->buf, sizeof(*shp))); + + case IPC_SET: + if (cred->cr_uid && cred->cr_uid != shp->shm_perm.uid && + cred->cr_uid != shp->shm_perm.cuid) + return (EPERM); + if (error = copyin(uap->buf, (caddr_t)&sbuf, sizeof sbuf)) + return (error); + shp->shm_perm.uid = sbuf.shm_perm.uid; + shp->shm_perm.gid = sbuf.shm_perm.gid; + shp->shm_perm.mode = (shp->shm_perm.mode & ~0777) + | (sbuf.shm_perm.mode & 0777); + shp->shm_ctime = time.tv_sec; + break; + + case IPC_RMID: + if (cred->cr_uid && cred->cr_uid != shp->shm_perm.uid && + cred->cr_uid != shp->shm_perm.cuid) + return (EPERM); + /* set ctime? */ + shp->shm_perm.key = IPC_PRIVATE; + shp->shm_perm.mode |= SHM_DEST; + if (shp->shm_nattch <= 0) + shmfree(shp); + break; + +#ifdef HPUXCOMPAT + case SHM_LOCK: + case SHM_UNLOCK: + /* don't really do anything, but make them think we did */ + if ((p->p_flag & SHPUX) == 0) + return (EINVAL); + if (cred->cr_uid && cred->cr_uid != shp->shm_perm.uid && + cred->cr_uid != shp->shm_perm.cuid) + return (EPERM); + break; +#endif + + default: + return (EINVAL); + } + return (0); +} + +/* + * Attach to shared memory segment. + */ + +struct shmat_args { + int shmid; + caddr_t shmaddr; + int shmflg; +}; + +shmat(p, uap, retval) + struct proc *p; + register struct shmat_args *uap; + int *retval; +{ + register struct shmid_ds *shp; + register int size; + caddr_t uva; + int error; + int flags; + vm_prot_t prot; + struct shmdesc *shmd; + + /* + * Allocate descriptors now (before validity check) + * in case malloc() blocks. + */ + shmd = (struct shmdesc *)p->p_vmspace->vm_shm; + size = shminfo.shmseg * sizeof(struct shmdesc); + if (shmd == NULL) { + shmd = (struct shmdesc *)malloc(size, M_SHM, M_WAITOK); + bzero((caddr_t)shmd, size); + p->p_vmspace->vm_shm = (caddr_t)shmd; + } + if (error = shmvalid(uap->shmid)) + return (error); + shp = &shmsegs[uap->shmid % SHMMMNI]; + if (shp->shm_handle == NULL) + panic("shmat NULL handle"); + if (error = ipcaccess(&shp->shm_perm, + (uap->shmflg&SHM_RDONLY) ? IPC_R : IPC_R|IPC_W, p->p_ucred)) + return (error); + uva = uap->shmaddr; + if (uva && ((int)uva & (SHMLBA-1))) { + if (uap->shmflg & SHM_RND) + uva = (caddr_t) ((int)uva & ~(SHMLBA-1)); + else + return (EINVAL); + } + /* + * Make sure user doesn't use more than their fair share + */ + for (size = 0; size < shminfo.shmseg; size++) { + if (shmd->shmd_uva == 0) + break; + shmd++; + } + if (size >= shminfo.shmseg) + return (EMFILE); + size = ctob(clrnd(btoc(shp->shm_segsz))); + prot = VM_PROT_READ; + if ((uap->shmflg & SHM_RDONLY) == 0) + prot |= VM_PROT_WRITE; + flags = MAP_ANON|MAP_SHARED; + if (uva) + flags |= MAP_FIXED; + else + uva = (caddr_t)0x1000000; /* XXX */ + error = vm_mmap(&p->p_vmspace->vm_map, &uva, (vm_size_t)size, prot, VM_PROT_DEFAULT, + flags, ((struct shmhandle *)shp->shm_handle)->shmh_id, 0); + if (error) + return(error); + shmd->shmd_uva = (vm_offset_t)uva; + shmd->shmd_id = uap->shmid; + /* + * Fill in the remaining fields + */ + shp->shm_lpid = p->p_pid; + shp->shm_atime = time.tv_sec; + shp->shm_nattch++; + *retval = (int) uva; + return (0); +} + +/* + * Detach from shared memory segment. + */ + +struct shmdt_args { + caddr_t shmaddr; +}; + +/* ARGSUSED */ +shmdt(p, uap, retval) + struct proc *p; + struct shmdt_args *uap; + int *retval; +{ + register struct shmdesc *shmd; + register int i; + + shmd = (struct shmdesc *)p->p_vmspace->vm_shm; + for (i = 0; i < shminfo.shmseg; i++, shmd++) + if (shmd->shmd_uva && + shmd->shmd_uva == (vm_offset_t)uap->shmaddr) + break; + if (i == shminfo.shmseg) + return(EINVAL); + shmufree(p, shmd); + shmsegs[shmd->shmd_id % SHMMMNI].shm_lpid = p->p_pid; +} + +shmfork(p1, p2, isvfork) + struct proc *p1, *p2; + int isvfork; +{ + register struct shmdesc *shmd; + register int size; + + /* + * Copy parents descriptive information + */ + size = shminfo.shmseg * sizeof(struct shmdesc); + shmd = (struct shmdesc *)malloc(size, M_SHM, M_WAITOK); + bcopy((caddr_t)p1->p_vmspace->vm_shm, (caddr_t)shmd, size); + p2->p_vmspace->vm_shm = (caddr_t)shmd; + /* + * Increment reference counts + */ + for (size = 0; size < shminfo.shmseg; size++, shmd++) + if (shmd->shmd_uva) + shmsegs[shmd->shmd_id % SHMMMNI].shm_nattch++; +} + +shmexit(p) + struct proc *p; +{ + register struct shmdesc *shmd; + register int i; + + shmd = (struct shmdesc *)p->p_vmspace->vm_shm; + for (i = 0; i < shminfo.shmseg; i++, shmd++) + if (shmd->shmd_uva) + shmufree(p, shmd); + free((caddr_t)p->p_vmspace->vm_shm, M_SHM); + p->p_vmspace->vm_shm = NULL; +} + +shmvalid(id) + register int id; +{ + register struct shmid_ds *shp; + + if (id < 0 || (id % SHMMMNI) >= shminfo.shmmni) + return(EINVAL); + shp = &shmsegs[id % SHMMMNI]; + if (shp->shm_perm.seq == (id / SHMMMNI) && + (shp->shm_perm.mode & (SHM_ALLOC|SHM_DEST)) == SHM_ALLOC) + return(0); + return(EINVAL); +} + +/* + * Free user resources associated with a shared memory segment + */ +shmufree(p, shmd) + struct proc *p; + struct shmdesc *shmd; +{ + register struct shmid_ds *shp; + + shp = &shmsegs[shmd->shmd_id % SHMMMNI]; + (void) vm_deallocate(&p->p_vmspace->vm_map, shmd->shmd_uva, + ctob(clrnd(btoc(shp->shm_segsz)))); + shmd->shmd_id = 0; + shmd->shmd_uva = 0; + shp->shm_dtime = time.tv_sec; + if (--shp->shm_nattch <= 0 && (shp->shm_perm.mode & SHM_DEST)) + shmfree(shp); +} + +/* + * Deallocate resources associated with a shared memory segment + */ +shmfree(shp) + register struct shmid_ds *shp; +{ + + if (shp->shm_handle == NULL) + panic("shmfree"); + /* + * Lose our lingering object reference by deallocating space + * in kernel. Pager will also be deallocated as a side-effect. + */ + vm_deallocate(shm_map, + ((struct shmhandle *)shp->shm_handle)->shmh_kva, + ctob(clrnd(btoc(shp->shm_segsz)))); + free((caddr_t)shp->shm_handle, M_SHM); + shp->shm_handle = NULL; + shmtot -= clrnd(btoc(shp->shm_segsz)); + shp->shm_perm.mode = 0; + /* + * Increment the sequence number to ensure that outstanding + * shmids for this segment will be invalid in the event that + * the segment is reallocated. Note that shmids must be + * positive as decreed by SVID. + */ + shp->shm_perm.seq++; + if ((int)(shp->shm_perm.seq * SHMMMNI) < 0) + shp->shm_perm.seq = 0; +} + +/* + * XXX This routine would be common to all sysV style IPC + * (if the others were implemented). + */ +ipcaccess(ipc, mode, cred) + register struct ipc_perm *ipc; + int mode; + register struct ucred *cred; +{ + register int m; + + if (cred->cr_uid == 0) + return(0); + /* + * Access check is based on only one of owner, group, public. + * If not owner, then check group. + * If not a member of the group, then check public access. + */ + mode &= 0700; + m = ipc->mode; + if (cred->cr_uid != ipc->uid && cred->cr_uid != ipc->cuid) { + m <<= 3; + if (!groupmember(ipc->gid, cred) && + !groupmember(ipc->cgid, cred)) + m <<= 3; + } + if ((mode&m) == mode) + return (0); + return (EACCES); +} +#endif /* SYSVSHM */ diff --git a/sys/kern/tty.c b/sys/kern/tty.c new file mode 100644 index 000000000000..faf8b4d4a1a4 --- /dev/null +++ b/sys/kern/tty.c @@ -0,0 +1,1965 @@ +/*- + * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)tty.c 7.44 (Berkeley) 5/28/91 + * $Id: tty.c,v 1.5 1993/10/16 15:24:54 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "ioctl.h" +#define TTYDEFCHARS +#include "tty.h" +#undef TTYDEFCHARS +#include "proc.h" +#include "file.h" +#include "conf.h" +#include "dkstat.h" +#include "uio.h" +#include "kernel.h" +#include "vnode.h" +#include "syslog.h" + +#include "vm/vm.h" + +static int proc_compare __P((struct proc *p1, struct proc *p2)); + +/* symbolic sleep message strings */ +char ttyin[] = "ttyin"; +char ttyout[] = "ttyout"; +char ttopen[] = "ttyopn"; +char ttclos[] = "ttycls"; +char ttybg[] = "ttybg"; +char ttybuf[] = "ttybuf"; + +/* + * Table giving parity for characters and indicating + * character classes to tty driver. The 8th bit + * indicates parity, the 7th bit indicates the character + * is an alphameric or underscore (for ALTWERASE), and the + * low 6 bits indicate delay type. If the low 6 bits are 0 + * then the character needs no special processing on output; + * classes other than 0 might be translated or (not currently) + * require delays. + */ +#define PARITY(c) (partab[c] & 0x80) +#define ISALPHA(c) (partab[(c)&TTY_CHARMASK] & 0x40) +#define CCLASSMASK 0x3f +#define CCLASS(c) (partab[c] & CCLASSMASK) + +#define E 0x00 /* even parity */ +#define O 0x80 /* odd parity */ +#define ALPHA 0x40 /* alpha or underscore */ + +#define NO ORDINARY +#define NA ORDINARY|ALPHA +#define CC CONTROL +#define BS BACKSPACE +#define NL NEWLINE +#define TB TAB +#define VT VTAB +#define CR RETURN + +char partab[] = { + E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* nul - bel */ + O|BS, E|TB, E|NL, O|CC, E|VT, O|CR, O|CC, E|CC, /* bs - si */ + O|CC, E|CC, E|CC, O|CC, E|CC, O|CC, O|CC, E|CC, /* dle - etb */ + E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* can - us */ + O|NO, E|NO, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* sp - ' */ + E|NO, O|NO, O|NO, E|NO, O|NO, E|NO, E|NO, O|NO, /* ( - / */ + E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* 0 - 7 */ + O|NA, E|NA, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* 8 - ? */ + O|NO, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* @ - G */ + E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* H - O */ + E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* P - W */ + O|NA, E|NA, E|NA, O|NO, E|NO, O|NO, O|NO, O|NA, /* X - _ */ + E|NO, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* ` - g */ + O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* h - o */ + O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* p - w */ + E|NA, O|NA, O|NA, E|NO, O|NO, E|NO, E|NO, O|CC, /* x - del */ + /* + * "meta" chars; should be settable per charset. + * For now, treat all as normal characters. + */ + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, +}; +#undef NO +#undef NA +#undef CC +#undef BS +#undef NL +#undef TB +#undef VT +#undef CR + +extern struct tty *constty; /* temporary virtual console */ + +/* + * Is 'c' a line delimiter ("break" character)? + */ +#define ttbreakc(c) ((c) == '\n' || ((c) == cc[VEOF] || \ + (c) == cc[VEOL] || (c) == cc[VEOL2]) && (c) != _POSIX_VDISABLE) + +ttychars(tp) + struct tty *tp; +{ + + bcopy(ttydefchars, tp->t_cc, sizeof(ttydefchars)); +} + +/* + * Flush tty after output has drained. + */ +ttywflush(tp) + struct tty *tp; +{ + int error; + + if ((error = ttywait(tp)) == 0) + ttyflush(tp, FREAD); + return (error); +} + +/* + * Wait for output to drain. + */ +ttywait(tp) + register struct tty *tp; +{ + int error = 0, s = spltty(); + + while ((RB_LEN(&tp->t_out) || tp->t_state&TS_BUSY) && + (tp->t_state&TS_CARR_ON || tp->t_cflag&CLOCAL) && + tp->t_oproc) { + (*tp->t_oproc)(tp); + tp->t_state |= TS_ASLEEP; + if (error = ttysleep(tp, (caddr_t)&tp->t_out, + TTOPRI | PCATCH, ttyout, 0)) + break; + } + splx(s); + return (error); +} + +#define flushq(qq) { \ + register struct ringb *r = qq; \ + r->rb_hd = r->rb_tl; \ +} + +/* + * Flush TTY read and/or write queues, + * notifying anyone waiting. + */ +ttyflush(tp, rw) + register struct tty *tp; +{ + register s; + + s = spltty(); + if (rw & FREAD) { + flushq(&tp->t_can); + flushq(&tp->t_raw); + tp->t_rocount = 0; + tp->t_rocol = 0; + tp->t_state &= ~(TS_LOCAL|TS_TBLOCK); /* XXX - should be TS_RTSBLOCK */ + ttwakeup(tp); + } + if (rw & FWRITE) { + tp->t_state &= ~TS_TTSTOP; + (*cdevsw[major(tp->t_dev)].d_stop)(tp, rw); + flushq(&tp->t_out); + wakeup((caddr_t)&tp->t_out); + if (tp->t_wsel) { + selwakeup(tp->t_wsel, tp->t_state & TS_WCOLL); + tp->t_wsel = 0; + tp->t_state &= ~TS_WCOLL; + } + } + splx(s); +} + +/* + * Send stop character on input overflow. + */ +ttyblock(tp) + register struct tty *tp; +{ + register x; + int rawcc, cancc; + + rawcc = RB_LEN(&tp->t_raw); + cancc = RB_LEN(&tp->t_can); + x = rawcc + cancc; + if (rawcc > TTYHOG) { + ttyflush(tp, FREAD|FWRITE); + } + /* + * Block further input iff: + * Current input > threshold AND input is available to user program + */ + if (x >= TTYHOG/2 && (tp->t_state & TS_TBLOCK) == 0 && + ((tp->t_lflag&ICANON) == 0) || (cancc > 0)) { + if (tp->t_cc[VSTOP] != _POSIX_VDISABLE) { + putc(tp->t_cc[VSTOP], &tp->t_out); + } + tp->t_state |= TS_TBLOCK; /* XXX - should be TS_RTSBLOCK? */ + ttstart(tp); + } +} + +ttstart(tp) + struct tty *tp; +{ + + if (tp->t_oproc) /* kludge for pty */ + (*tp->t_oproc)(tp); +} + +ttrstrt(tp) /* XXX */ + struct tty *tp; +{ + +#ifdef DIAGNOSTIC + if (tp == 0) + panic("ttrstrt"); +#endif + tp->t_state &= ~TS_TIMEOUT; + ttstart(tp); +} + + +/* + * Common code for ioctls on tty devices. + * Called after line-discipline-specific ioctl + * has been called to do discipline-specific functions + * and/or reject any of these ioctl commands. + */ +/*ARGSUSED*/ +ttioctl(tp, com, data, flag) + register struct tty *tp; + caddr_t data; +{ + register struct proc *p = curproc; /* XXX */ + extern int nldisp; + int s, error; + + /* + * If the ioctl involves modification, + * hang if in the background. + */ + switch (com) { + + case TIOCSETD: + case TIOCFLUSH: + /*case TIOCSPGRP:*/ + case TIOCSTI: + case TIOCSWINSZ: + case TIOCSETA: + case TIOCSETAW: + case TIOCSETAF: + case TIOCSTAT: +#ifdef COMPAT_43 + case TIOCSETP: + case TIOCSETN: + case TIOCSETC: + case TIOCSLTC: + case TIOCLBIS: + case TIOCLBIC: + case TIOCLSET: + case OTIOCSETD: +#endif + while (isbackground(curproc, tp) && + p->p_pgrp->pg_jobc && (p->p_flag&SPPWAIT) == 0 && + (p->p_sigignore & sigmask(SIGTTOU)) == 0 && + (p->p_sigmask & sigmask(SIGTTOU)) == 0) { + pgsignal(p->p_pgrp, SIGTTOU, 1); + if (error = ttysleep(tp, (caddr_t)&lbolt, + TTOPRI | PCATCH, ttybg, 0)) + return (error); + } + break; + } + + /* + * Process the ioctl. + */ + switch (com) { + + /* get discipline number */ + case TIOCGETD: + *(int *)data = tp->t_line; + break; + + /* set line discipline */ + case TIOCSETD: { + register int t = *(int *)data; + dev_t dev = tp->t_dev; + + if ((unsigned)t >= nldisp) + return (ENXIO); + if (t != tp->t_line) { + s = spltty(); + (*linesw[tp->t_line].l_close)(tp, flag); + error = (*linesw[t].l_open)(dev, tp); + if (error) { + (void)(*linesw[tp->t_line].l_open)(dev, tp); + splx(s); + return (error); + } + tp->t_line = t; + splx(s); + } + break; + } + + /* prevent more opens on channel */ + case TIOCEXCL: + tp->t_state |= TS_XCLUDE; + break; + + case TIOCNXCL: + tp->t_state &= ~TS_XCLUDE; + break; + +#ifdef COMPAT_43 + /* wkt */ + case TIOCHPCL: + tp->t_cflag |= HUPCL; + break; +#endif + + case TIOCFLUSH: { + register int flags = *(int *)data; + + if (flags == 0) + flags = FREAD|FWRITE; + else + flags &= FREAD|FWRITE; + ttyflush(tp, flags); + break; + } + + case FIOASYNC: + if (*(int *)data) + tp->t_state |= TS_ASYNC; + else + tp->t_state &= ~TS_ASYNC; + break; + + case FIONBIO: + break; /* XXX remove */ + + /* return number of characters immediately available */ + case FIONREAD: + *(off_t *)data = ttnread(tp); + break; + + case TIOCOUTQ: + *(int *)data = RB_LEN(&tp->t_out); + break; + + case TIOCSTOP: + s = spltty(); + if ((tp->t_state&TS_TTSTOP) == 0) { + tp->t_state |= TS_TTSTOP; + (*cdevsw[major(tp->t_dev)].d_stop)(tp, 0); + } + splx(s); + break; + + case TIOCSTART: + s = spltty(); + if ((tp->t_state&TS_TTSTOP) || (tp->t_lflag&FLUSHO)) { + tp->t_state &= ~TS_TTSTOP; + tp->t_lflag &= ~FLUSHO; + ttstart(tp); + } + splx(s); + break; + + /* + * Simulate typing of a character at the terminal. + */ + case TIOCSTI: + if (p->p_ucred->cr_uid && (flag & FREAD) == 0) + return (EPERM); + if (p->p_ucred->cr_uid && !isctty(p, tp)) + return (EACCES); + (*linesw[tp->t_line].l_rint)(*(u_char *)data, tp); + break; + + case TIOCGETA: { + struct termios *t = (struct termios *)data; + + bcopy(&tp->t_termios, t, sizeof(struct termios)); + break; + } + + case TIOCSETA: + case TIOCSETAW: + case TIOCSETAF: { + register struct termios *t = (struct termios *)data; + + s = spltty(); + if (com == TIOCSETAW || com == TIOCSETAF) { + if (error = ttywait(tp)) { + splx(s); + return (error); + } + if (com == TIOCSETAF) + ttyflush(tp, FREAD); + } + if ((t->c_cflag&CIGNORE) == 0) { + /* + * set device hardware + */ + if (tp->t_param && (error = (*tp->t_param)(tp, t))) { + splx(s); + return (error); + } else { + if ((tp->t_state&TS_CARR_ON) == 0 && + (tp->t_cflag&CLOCAL) && + (t->c_cflag&CLOCAL) == 0) { + tp->t_state &= ~TS_ISOPEN; + tp->t_state |= TS_WOPEN; + ttwakeup(tp); + } + tp->t_cflag = t->c_cflag; + tp->t_ispeed = t->c_ispeed; + tp->t_ospeed = t->c_ospeed; + } + ttsetwater(tp); + } + if (com != TIOCSETAF) { + if ((t->c_lflag&ICANON) != (tp->t_lflag&ICANON)) + if (t->c_lflag&ICANON) { + tp->t_lflag |= PENDIN; + ttwakeup(tp); + } + else { + catb(&tp->t_raw, &tp->t_can); + catb(&tp->t_can, &tp->t_raw); + } + } + tp->t_iflag = t->c_iflag; + tp->t_oflag = t->c_oflag; + /* + * Make the EXTPROC bit read only. + */ + if (tp->t_lflag&EXTPROC) + t->c_lflag |= EXTPROC; + else + t->c_lflag &= ~EXTPROC; + tp->t_lflag = t->c_lflag; + bcopy(t->c_cc, tp->t_cc, sizeof(t->c_cc)); + splx(s); + break; + } + + /* + * Give load average stats if requested (tcsh uses raw mode + * and directly sends the ioctl() to the tty driver) + */ + case TIOCSTAT: + ttyinfo(tp); + break; + + /* + * Set controlling terminal. + * Session ctty vnode pointer set in vnode layer. + */ + case TIOCSCTTY: + if (!SESS_LEADER(p) || + (p->p_session->s_ttyvp || tp->t_session) && + (tp->t_session != p->p_session)) + return (EPERM); + tp->t_session = p->p_session; + tp->t_pgrp = p->p_pgrp; + p->p_session->s_ttyp = tp; + p->p_flag |= SCTTY; + break; + + /* + * Set terminal process group. + */ + case TIOCSPGRP: { + register struct pgrp *pgrp = pgfind(*(int *)data); + + if (!isctty(p, tp)) + return (ENOTTY); + else if (pgrp == NULL || pgrp->pg_session != p->p_session) + return (EPERM); + tp->t_pgrp = pgrp; + break; + } + + case TIOCGPGRP: + if (!isctty(p, tp)) + return (ENOTTY); + *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; + break; + + case TIOCSWINSZ: + if (bcmp((caddr_t)&tp->t_winsize, data, + sizeof (struct winsize))) { + tp->t_winsize = *(struct winsize *)data; + pgsignal(tp->t_pgrp, SIGWINCH, 1); + } + break; + + case TIOCGWINSZ: + *(struct winsize *)data = tp->t_winsize; + break; + + case TIOCCONS: + if (*(int *)data) { + if (constty && constty != tp && + (constty->t_state & (TS_CARR_ON|TS_ISOPEN)) == + (TS_CARR_ON|TS_ISOPEN)) + return (EBUSY); +#ifndef UCONSOLE + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); +#endif + constty = tp; + } else if (tp == constty) + constty = NULL; + break; + + case TIOCDRAIN: + if (error = ttywait(tp)) + return (error); + break; + + default: +#ifdef COMPAT_43 + return (ttcompat(tp, com, data, flag)); +#else + return (-1); +#endif + } + return (0); +} + +ttnread(tp) + struct tty *tp; +{ + int nread = 0; + + if (tp->t_lflag & PENDIN) + ttypend(tp); + nread = RB_LEN(&tp->t_can); + if ((tp->t_lflag & ICANON) == 0) + nread += RB_LEN(&tp->t_raw); + return (nread); +} + +ttselect(dev, rw, p) + dev_t dev; + int rw; + struct proc *p; +{ + register struct tty *tp = &cdevsw[major(dev)].d_ttys[minor(dev)]; + int nread; + int s = spltty(); + struct proc *selp; + + switch (rw) { + + case FREAD: + nread = ttnread(tp); + if (nread > 0 || + ((tp->t_cflag&CLOCAL) == 0 && (tp->t_state&TS_CARR_ON) == 0)) + goto win; + if (tp->t_rsel && (selp = pfind(tp->t_rsel)) && selp->p_wchan == (caddr_t)&selwait) + tp->t_state |= TS_RCOLL; + else + tp->t_rsel = p->p_pid; + break; + + case FWRITE: + if (RB_LEN(&tp->t_out) <= tp->t_lowat) + goto win; + if (tp->t_wsel && (selp = pfind(tp->t_wsel)) && selp->p_wchan == (caddr_t)&selwait) + tp->t_state |= TS_WCOLL; + else + tp->t_wsel = p->p_pid; + break; + } + splx(s); + return (0); +win: + splx(s); + return (1); +} + +/* + * Initial open of tty, or (re)entry to standard tty line discipline. + */ +ttyopen(dev, tp) + dev_t dev; + register struct tty *tp; +{ + + tp->t_dev = dev; + + tp->t_state &= ~TS_WOPEN; + if ((tp->t_state & TS_ISOPEN) == 0) { + tp->t_state |= TS_ISOPEN; + initrb(&tp->t_raw); + initrb(&tp->t_can); + initrb(&tp->t_out); + bzero((caddr_t)&tp->t_winsize, sizeof(tp->t_winsize)); + } + return (0); +} + +/* + * "close" a line discipline + */ +ttylclose(tp, flag) + struct tty *tp; + int flag; +{ + + if (flag&IO_NDELAY) + ttyflush(tp, FREAD|FWRITE); + else + ttywflush(tp); +} + +/* + * Handle close() on a tty line: flush and set to initial state, + * bumping generation number so that pending read/write calls + * can detect recycling of the tty. + */ +ttyclose(tp) + register struct tty *tp; +{ + if (constty == tp) + constty = NULL; + ttyflush(tp, FREAD|FWRITE); + tp->t_session = NULL; + tp->t_pgrp = NULL; +/* + * XXX - do we need to send cc[VSTART] or do a ttstart() here in some cases? + * (TS_TBLOCK and TS_RTSBLOCK are being cleared.) + */ + tp->t_state = 0; + tp->t_gen++; + return (0); +} + +/* + * Handle modem control transition on a tty. + * Flag indicates new state of carrier. + * Returns 0 if the line should be turned off, otherwise 1. + */ +ttymodem(tp, flag) + register struct tty *tp; +{ + + if ((tp->t_state&TS_WOPEN) == 0 && (tp->t_lflag&MDMBUF)) { + /* + * MDMBUF: do flow control according to carrier flag + */ + if (flag) { + tp->t_state &= ~TS_TTSTOP; + ttstart(tp); + } else if ((tp->t_state&TS_TTSTOP) == 0) { + tp->t_state |= TS_TTSTOP; + (*cdevsw[major(tp->t_dev)].d_stop)(tp, 0); + } + } else if (flag == 0) { + /* + * Lost carrier. + */ + tp->t_state &= ~TS_CARR_ON; + if (tp->t_state&TS_ISOPEN && (tp->t_cflag&CLOCAL) == 0) { + if (tp->t_session && tp->t_session->s_leader) + psignal(tp->t_session->s_leader, SIGHUP); + ttyflush(tp, FREAD|FWRITE); + return (0); + } + } else { + /* + * Carrier now on. + */ + tp->t_state |= TS_CARR_ON; + ttwakeup(tp); + } + return (1); +} + +/* + * Default modem control routine (for other line disciplines). + * Return argument flag, to turn off device on carrier drop. + */ +nullmodem(tp, flag) + register struct tty *tp; + int flag; +{ + + if (flag) + tp->t_state |= TS_CARR_ON; + else { + tp->t_state &= ~TS_CARR_ON; + if ((tp->t_cflag&CLOCAL) == 0) { + if (tp->t_session && tp->t_session->s_leader) + psignal(tp->t_session->s_leader, SIGHUP); + return (0); + } + } + return (1); +} + +/* + * reinput pending characters after state switch + * call at spltty(). + */ +ttypend(tp) + register struct tty *tp; +{ + register c; + char *hd, *tl; + + tp->t_lflag &= ~PENDIN; + tp->t_state |= TS_TYPEN; + hd = tp->t_raw.rb_hd; + tl = tp->t_raw.rb_tl; + flushq(&tp->t_raw); + while (hd != tl) { + ttyinput(*hd, tp); + hd = RB_SUCC(&tp->t_raw, hd); + } + tp->t_state &= ~TS_TYPEN; +} + +/* + * Process input of a single character received on a tty. + */ +ttyinput(c, tp) + register c; + register struct tty *tp; +{ + register int iflag = tp->t_iflag; + register int lflag = tp->t_lflag; + register u_char *cc = tp->t_cc; + int i, err; + + /* + * If input is pending take it first. + */ + if (lflag&PENDIN) + ttypend(tp); + /* + * Gather stats. + */ + tk_nin++; + if (lflag&ICANON) { + tk_cancc++; + tp->t_cancc++; + } else { + tk_rawcc++; + tp->t_rawcc++; + } + /* + * Handle exceptional conditions (break, parity, framing). + */ + if (err = (c&TTY_ERRORMASK)) { + c &= ~TTY_ERRORMASK; + if (err&TTY_FE && !c) { /* break */ + if (iflag&IGNBRK) + goto endcase; + else if (iflag&BRKINT && lflag&ISIG && + (cc[VINTR] != _POSIX_VDISABLE)) + c = cc[VINTR]; + else if (iflag&PARMRK) + goto parmrk; + } else if ((err&TTY_PE && iflag&INPCK) || err&TTY_FE) { + if (iflag&IGNPAR) + goto endcase; + else if (iflag&PARMRK) { +parmrk: + putc(0377|TTY_QUOTE, &tp->t_raw); + putc(0|TTY_QUOTE, &tp->t_raw); + putc(c|TTY_QUOTE, &tp->t_raw); + goto endcase; + } else + c = 0; + } + } + /* + * In tandem mode, check high water mark. + */ + if (iflag&IXOFF) + ttyblock(tp); + if ((tp->t_state&TS_TYPEN) == 0 && (iflag&ISTRIP)) + c &= ~0x80; + if ((tp->t_lflag&EXTPROC) == 0) { + /* + * Check for literal nexting very first + */ + if (tp->t_state&TS_LNCH) { + c |= TTY_QUOTE; + tp->t_state &= ~TS_LNCH; + } + /* + * Scan for special characters. This code + * is really just a big case statement with + * non-constant cases. The bottom of the + * case statement is labeled ``endcase'', so goto + * it after a case match, or similar. + */ + + /* + * Control chars which aren't controlled + * by ICANON, ISIG, or IXON. + */ + if (lflag&IEXTEN) { + if (CCEQ(cc[VLNEXT], c)) { + if (lflag&ECHO) { + if (lflag&ECHOE) + ttyoutstr("^\b", tp); + else + ttyecho(c, tp); + } + tp->t_state |= TS_LNCH; + goto endcase; + } + if (CCEQ(cc[VDISCARD], c)) { + if (lflag&FLUSHO) + tp->t_lflag &= ~FLUSHO; + else { + ttyflush(tp, FWRITE); + ttyecho(c, tp); + if (RB_LEN(&tp->t_raw) + RB_LEN(&tp->t_can)) + ttyretype(tp); + tp->t_lflag |= FLUSHO; + } + goto startoutput; + } + } + /* + * Signals. + */ + if (lflag&ISIG) { + if (CCEQ(cc[VINTR], c) || CCEQ(cc[VQUIT], c)) { + if ((lflag&NOFLSH) == 0) + ttyflush(tp, FREAD|FWRITE); + ttyecho(c, tp); + pgsignal(tp->t_pgrp, + CCEQ(cc[VINTR], c) ? SIGINT : SIGQUIT, 1); + goto endcase; + } + if (CCEQ(cc[VSUSP], c)) { + if ((lflag&NOFLSH) == 0) + ttyflush(tp, FREAD); + ttyecho(c, tp); + pgsignal(tp->t_pgrp, SIGTSTP, 1); + goto endcase; + } + } + /* + * Handle start/stop characters. + */ + if (iflag&IXON) { + if (CCEQ(cc[VSTOP], c)) { + if ((tp->t_state&TS_TTSTOP) == 0) { + tp->t_state |= TS_TTSTOP; + (*cdevsw[major(tp->t_dev)].d_stop)(tp, + 0); + return; + } + if (!CCEQ(cc[VSTART], c)) + return; + /* + * if VSTART == VSTOP then toggle + */ + goto endcase; + } + if (CCEQ(cc[VSTART], c)) + goto restartoutput; + } + /* + * IGNCR, ICRNL, & INLCR + */ + if (c == '\r') { + if (iflag&IGNCR) + goto endcase; + else if (iflag&ICRNL) + c = '\n'; + } else if (c == '\n' && iflag&INLCR) + c = '\r'; + } + if ((tp->t_lflag&EXTPROC) == 0 && lflag&ICANON) { + /* + * From here on down canonical mode character + * processing takes place. + */ + /* + * erase (^H / ^?) + */ + if (CCEQ(cc[VERASE], c)) { + if (RB_LEN(&tp->t_raw)) + ttyrub(unputc(&tp->t_raw), tp); + goto endcase; + } + /* + * kill (^U) + */ + if (CCEQ(cc[VKILL], c)) { + if (lflag&ECHOKE && RB_LEN(&tp->t_raw) == tp->t_rocount && + (lflag&ECHOPRT) == 0) { + while (RB_LEN(&tp->t_raw)) + ttyrub(unputc(&tp->t_raw), tp); + } else { + ttyecho(c, tp); + if (lflag&ECHOK || lflag&ECHOKE) + ttyecho('\n', tp); + while (getc(&tp->t_raw) > 0) + ; + tp->t_rocount = 0; + } + tp->t_state &= ~TS_LOCAL; + goto endcase; + } + /* + * word erase (^W) + */ + if (CCEQ(cc[VWERASE], c)) { + int ctype; + int alt = lflag&ALTWERASE; + + /* + * erase whitespace + */ + while ((c = unputc(&tp->t_raw)) == ' ' || c == '\t') + ttyrub(c, tp); + if (c == -1) + goto endcase; + /* + * erase last char of word and remember the + * next chars type (for ALTWERASE) + */ + ttyrub(c, tp); + c = unputc(&tp->t_raw); + if (c == -1) + goto endcase; + ctype = ISALPHA(c); + /* + * erase rest of word + */ + do { + ttyrub(c, tp); + c = unputc(&tp->t_raw); + if (c == -1) + goto endcase; + } while (c != ' ' && c != '\t' && + (alt == 0 || ISALPHA(c) == ctype)); + (void) putc(c, &tp->t_raw); + goto endcase; + } + /* + * reprint line (^R) + */ + if (CCEQ(cc[VREPRINT], c)) { + ttyretype(tp); + goto endcase; + } + /* + * ^T - kernel info and generate SIGINFO + */ + if (CCEQ(cc[VSTATUS], c)) { + pgsignal(tp->t_pgrp, SIGINFO, 1); + if ((lflag&NOKERNINFO) == 0) + ttyinfo(tp); + goto endcase; + } + } + /* + * Check for input buffer overflow + */ + if (RB_LEN(&tp->t_raw)+RB_LEN(&tp->t_can) >= TTYHOG) { + if (iflag&IMAXBEL) { + if (RB_LEN(&tp->t_out) < tp->t_hiwat) + (void) ttyoutput(CTRL('g'), tp); + } else + ttyflush(tp, FREAD | FWRITE); + goto endcase; + } + /* + * Put data char in q for user and + * wakeup on seeing a line delimiter. + */ + if (putc(c, &tp->t_raw) >= 0) { + if ((lflag&ICANON) == 0) { + ttwakeup(tp); + ttyecho(c, tp); + goto endcase; + } + if (ttbreakc(c)) { + tp->t_rocount = 0; + catb(&tp->t_raw, &tp->t_can); + ttwakeup(tp); + } else if (tp->t_rocount++ == 0) + tp->t_rocol = tp->t_col; + if (tp->t_state&TS_ERASE) { + /* + * end of prterase \.../ + */ + tp->t_state &= ~TS_ERASE; + (void) ttyoutput('/', tp); + } + i = tp->t_col; + ttyecho(c, tp); + if (CCEQ(cc[VEOF], c) && lflag&ECHO) { + /* + * Place the cursor over the '^' of the ^D. + */ + i = MIN(2, tp->t_col - i); + while (i > 0) { + (void) ttyoutput('\b', tp); + i--; + } + } + } +endcase: + /* + * IXANY means allow any character to restart output. + */ + if ((tp->t_state&TS_TTSTOP) && (iflag&IXANY) == 0 && + cc[VSTART] != cc[VSTOP]) + return; +restartoutput: + tp->t_state &= ~TS_TTSTOP; + tp->t_lflag &= ~FLUSHO; +startoutput: + ttstart(tp); +} + +/* + * Output a single character on a tty, doing output processing + * as needed (expanding tabs, newline processing, etc.). + * Returns < 0 if putc succeeds, otherwise returns char to resend. + * Must be recursive. + */ +ttyoutput(c, tp) + register c; + register struct tty *tp; +{ + register int col; + register long oflag = tp->t_oflag; + + if ((oflag&OPOST) == 0) { + if (tp->t_lflag&FLUSHO) + return (-1); + if (putc(c, &tp->t_out)) + return (c); + tk_nout++; + tp->t_outcc++; + return (-1); + } + c &= TTY_CHARMASK; + /* + * Do tab expansion if OXTABS is set. + * Special case if we have external processing, we don't + * do the tab expansion because we'll probably get it + * wrong. If tab expansion needs to be done, let it + * happen externally. + */ + if (c == '\t' && oflag&OXTABS && (tp->t_lflag&EXTPROC) == 0) { + register int s; + + c = 8 - (tp->t_col&7); + if ((tp->t_lflag&FLUSHO) == 0) { + int i; + + s = spltty(); /* don't interrupt tabs */ +#ifdef was + c -= b_to_q(" ", c, &tp->t_outq); +#else + i = min (c, RB_CONTIGPUT(&tp->t_out)); + bcopy(" ", tp->t_out.rb_tl, i); + tp->t_out.rb_tl = + RB_ROLLOVER(&tp->t_out, tp->t_out.rb_tl+i); + i = min (c-i, RB_CONTIGPUT(&tp->t_out)); + + /* off end and still have space? */ + if (i) { + bcopy(" ", tp->t_out.rb_tl, i); + tp->t_out.rb_tl = + RB_ROLLOVER(&tp->t_out, tp->t_out.rb_tl+i); + } +#endif + tk_nout += c; + tp->t_outcc += c; + splx(s); + } + tp->t_col += c; + return (c ? -1 : '\t'); + } + if (c == CEOT && oflag&ONOEOT) + return (-1); + tk_nout++; + tp->t_outcc++; + /* + * Newline translation: if ONLCR is set, + * translate newline into "\r\n". + */ + if (c == '\n' && (tp->t_oflag&ONLCR) && ttyoutput('\r', tp) >= 0) + return (c); + if ((tp->t_lflag&FLUSHO) == 0 && putc(c, &tp->t_out)) + return (c); + + col = tp->t_col; + switch (CCLASS(c)) { + + case ORDINARY: + col++; + + case CONTROL: + break; + + case BACKSPACE: + if (col > 0) + col--; + break; + + case NEWLINE: + col = 0; + break; + + case TAB: + col = (col + 8) &~ 0x7; + break; + + case RETURN: + col = 0; + } + tp->t_col = col; + return (-1); +} + +/* + * Process a read call on a tty device. + */ +ttread(tp, uio, flag) + register struct tty *tp; + struct uio *uio; +{ + register struct ringb *qp; + register int c; + register long lflag; + register u_char *cc = tp->t_cc; + register struct proc *p = curproc; + int s, first, error = 0; + +loop: + lflag = tp->t_lflag; + s = spltty(); + /* + * take pending input first + */ + if (lflag&PENDIN) + ttypend(tp); + splx(s); + + /* + * Hang process if it's in the background. + */ + if (isbackground(p, tp)) { + if ((p->p_sigignore & sigmask(SIGTTIN)) || + (p->p_sigmask & sigmask(SIGTTIN)) || + p->p_flag&SPPWAIT || p->p_pgrp->pg_jobc == 0) + return (EIO); + pgsignal(p->p_pgrp, SIGTTIN, 1); + if (error = ttysleep(tp, (caddr_t)&lbolt, TTIPRI | PCATCH, + ttybg, 0)) + return (error); + goto loop; + } + + /* + * If canonical, use the canonical queue, + * else use the raw queue. + */ + qp = lflag&ICANON ? &tp->t_can : &tp->t_raw; + + /* + * If there is no input, sleep on rawq + * awaiting hardware receipt and notification. + * If we have data, we don't need to check for carrier. + */ + s = spltty(); + if (RB_LEN(qp) <= 0) { + int carrier; + + carrier = (tp->t_state&TS_CARR_ON) || (tp->t_cflag&CLOCAL); + if (!carrier && tp->t_state&TS_ISOPEN) { + splx(s); + return (0); /* EOF */ + } + if (flag & IO_NDELAY) { + splx(s); + return (EWOULDBLOCK); + } + error = ttysleep(tp, (caddr_t)&tp->t_raw, TTIPRI | PCATCH, + carrier ? ttyin : ttopen, 0); + splx(s); + if (error) + return (error); + goto loop; + } + splx(s); + + /* + * Input present, check for input mapping and processing. + */ + first = 1; + while ((c = getc(qp)) >= 0) { + /* + * delayed suspend (^Y) + */ + if (CCEQ(cc[VDSUSP], c) && lflag&ISIG) { + pgsignal(tp->t_pgrp, SIGTSTP, 1); + if (first) { + if (error = ttysleep(tp, (caddr_t)&lbolt, + TTIPRI | PCATCH, ttybg, 0)) + break; + goto loop; + } + break; + } + /* + * Interpret EOF only in canonical mode. + */ + if (CCEQ(cc[VEOF], c) && lflag&ICANON) + break; + /* + * Give user character. + */ + error = ureadc(c, uio); + if (error) + break; + if (uio->uio_resid == 0) + break; + /* + * In canonical mode check for a "break character" + * marking the end of a "line of input". + */ + if (lflag&ICANON && ttbreakc(c)) + break; + first = 0; + } + /* + * Look to unblock output now that (presumably) + * the input queue has gone down. + */ +#if 0 + if (tp->t_state&TS_TBLOCK && RB_LEN(&tp->t_raw) < TTYHOG/5) { + if (cc[VSTART] != _POSIX_VDISABLE && + putc(cc[VSTART], &tp->t_out) == 0) { + tp->t_state &= ~TS_TBLOCK; + ttstart(tp); + } + } +#else +#define TS_RTSBLOCK TS_TBLOCK /* XXX */ +#define RB_I_LOW_WATER ((RBSZ - 2 * 256) * 7 / 8) /* XXX */ + if (tp->t_state&TS_RTSBLOCK && RB_LEN(&tp->t_raw) <= RB_I_LOW_WATER) { + tp->t_state &= ~TS_RTSBLOCK; + ttstart(tp); + } +#endif + return (error); +} + +/* + * Check the output queue on tp for space for a kernel message + * (from uprintf/tprintf). Allow some space over the normal + * hiwater mark so we don't lose messages due to normal flow + * control, but don't let the tty run amok. + * Sleeps here are not interruptible, but we return prematurely + * if new signals come in. + */ +ttycheckoutq(tp, wait) + register struct tty *tp; + int wait; +{ + int hiwat, s, oldsig; + extern int wakeup(); + + hiwat = tp->t_hiwat; + s = spltty(); + if (curproc) + oldsig = curproc->p_sig; + else + oldsig = 0; + if (RB_LEN(&tp->t_out) > hiwat + 200) + while (RB_LEN(&tp->t_out) > hiwat) { + ttstart(tp); + if (wait == 0 || (curproc && curproc->p_sig != oldsig)) { + splx(s); + return (0); + } + timeout(wakeup, (caddr_t)&tp->t_out, hz); + tp->t_state |= TS_ASLEEP; + sleep((caddr_t)&tp->t_out, PZERO - 1); + } + splx(s); + return (1); +} + +/* + * Process a write call on a tty device. + */ +ttwrite(tp, uio, flag) + register struct tty *tp; + register struct uio *uio; +{ + register char *cp; + register int cc = 0, ce; + register struct proc *p = curproc; + int i, hiwat, cnt, error, s; + char obuf[OBUFSIZ]; + + hiwat = tp->t_hiwat; + cnt = uio->uio_resid; + error = 0; +loop: + s = spltty(); + if ((tp->t_state&TS_CARR_ON) == 0 && (tp->t_cflag&CLOCAL) == 0) { + if (tp->t_state&TS_ISOPEN) { + splx(s); + return (EIO); + } else if (flag & IO_NDELAY) { + splx(s); + error = EWOULDBLOCK; + goto out; + } else { + /* + * sleep awaiting carrier + */ + error = ttysleep(tp, (caddr_t)&tp->t_raw, + TTIPRI | PCATCH,ttopen, 0); + splx(s); + if (error) + goto out; + goto loop; + } + } + splx(s); + /* + * Hang the process if it's in the background. + */ + if (isbackground(p, tp) && + tp->t_lflag&TOSTOP && (p->p_flag&SPPWAIT) == 0 && + (p->p_sigignore & sigmask(SIGTTOU)) == 0 && + (p->p_sigmask & sigmask(SIGTTOU)) == 0 && + p->p_pgrp->pg_jobc) { + pgsignal(p->p_pgrp, SIGTTOU, 1); + if (error = ttysleep(tp, (caddr_t)&lbolt, TTIPRI | PCATCH, + ttybg, 0)) + goto out; + goto loop; + } + /* + * Process the user's data in at most OBUFSIZ + * chunks. Perform any output translation. + * Keep track of high water mark, sleep on overflow + * awaiting device aid in acquiring new space. + */ + while (uio->uio_resid > 0 || cc > 0) { + if (tp->t_lflag&FLUSHO) { + uio->uio_resid = 0; + return (0); + } + if (RB_LEN(&tp->t_out) > hiwat) + goto ovhiwat; + /* + * Grab a hunk of data from the user, + * unless we have some leftover from last time. + */ + if (cc == 0) { + cc = min(uio->uio_resid, OBUFSIZ); + cp = obuf; + error = uiomove(cp, cc, uio); + if (error) { + cc = 0; + break; + } + } + /* + * If nothing fancy need be done, grab those characters we + * can handle without any of ttyoutput's processing and + * just transfer them to the output q. For those chars + * which require special processing (as indicated by the + * bits in partab), call ttyoutput. After processing + * a hunk of data, look for FLUSHO so ^O's will take effect + * immediately. + */ + while (cc > 0) { + if ((tp->t_oflag&OPOST) == 0) + ce = cc; + else { + ce = cc - scanc((unsigned)cc, (u_char *)cp, + (u_char *)partab, CCLASSMASK); + /* + * If ce is zero, then we're processing + * a special character through ttyoutput. + */ + if (ce == 0) { + tp->t_rocount = 0; + if (ttyoutput(*cp, tp) >= 0) { + /* no c-lists, wait a bit */ + ttstart(tp); +printf("\nttysleep - no c-lists\n"); /* XXX */ + if (error = ttysleep(tp, + (caddr_t)&lbolt, + TTOPRI | PCATCH, ttybuf, 0)) + break; + goto loop; + } + cp++, cc--; + if ((tp->t_lflag&FLUSHO) || + RB_LEN(&tp->t_out) > hiwat) + goto ovhiwat; + continue; + } + } + /* + * A bunch of normal characters have been found, + * transfer them en masse to the output queue and + * continue processing at the top of the loop. + * If there are any further characters in this + * <= OBUFSIZ chunk, the first should be a character + * requiring special handling by ttyoutput. + */ + tp->t_rocount = 0; +#ifdef was + i = b_to_q(cp, ce, &tp->t_outq); + ce -= i; +#else + i = ce; + ce = min (ce, RB_CONTIGPUT(&tp->t_out)); + bcopy(cp, tp->t_out.rb_tl, ce); + tp->t_out.rb_tl = RB_ROLLOVER(&tp->t_out, + tp->t_out.rb_tl + ce); + i -= ce; + if (i > 0) { + int ii; + + ii = min (i, RB_CONTIGPUT(&tp->t_out)); + bcopy(cp + ce, tp->t_out.rb_tl, ii); + tp->t_out.rb_tl = RB_ROLLOVER(&tp->t_out, + tp->t_out.rb_tl + ii); + i -= ii; + ce += ii; + } +#endif + tp->t_col += ce; + cp += ce, cc -= ce, tk_nout += ce; + tp->t_outcc += ce; + if (i > 0) { + ttstart(tp); + if (RB_CONTIGPUT(&tp->t_out) > 0) + goto loop; /* synchronous/fast */ + /* out of space, wait a bit */ + tp->t_state |= TS_ASLEEP; + if (error = ttysleep(tp, (caddr_t)&tp->t_out, + TTOPRI | PCATCH, ttybuf, 0)) + break; + goto loop; + } + if (tp->t_lflag&FLUSHO || RB_LEN(&tp->t_out) > hiwat) + break; + } + ttstart(tp); + } +out: + /* + * If cc is nonzero, we leave the uio structure inconsistent, + * as the offset and iov pointers have moved forward, + * but it doesn't matter (the call will either return short + * or restart with a new uio). + */ + uio->uio_resid += cc; + return (error); + +ovhiwat: + ttstart(tp); + s = spltty(); + /* + * This can only occur if FLUSHO is set in t_lflag, + * or if ttstart/oproc is synchronous (or very fast). + */ + if (RB_LEN(&tp->t_out) <= hiwat) { + splx(s); + goto loop; + } + if (flag & IO_NDELAY) { + splx(s); + uio->uio_resid += cc; + if (uio->uio_resid == cnt) + return (EWOULDBLOCK); + return (0); + } + tp->t_state |= TS_ASLEEP; + error = ttysleep(tp, (caddr_t)&tp->t_out, TTOPRI | PCATCH, ttyout, 0); + splx(s); + if (error) + goto out; + goto loop; +} + +/* + * Rubout one character from the rawq of tp + * as cleanly as possible. + */ +ttyrub(c, tp) + register c; + register struct tty *tp; +{ + char *cp; + register int savecol; + int s; + + if ((tp->t_lflag&ECHO) == 0 || (tp->t_lflag&EXTPROC)) + return; + tp->t_lflag &= ~FLUSHO; + if (tp->t_lflag&ECHOE) { + if (tp->t_rocount == 0) { + /* + * Screwed by ttwrite; retype + */ + ttyretype(tp); + return; + } + if (c == ('\t'|TTY_QUOTE) || c == ('\n'|TTY_QUOTE)) + ttyrubo(tp, 2); + else switch (CCLASS(c &= TTY_CHARMASK)) { + + case ORDINARY: + ttyrubo(tp, 1); + break; + + case VTAB: + case BACKSPACE: + case CONTROL: + case RETURN: + case NEWLINE: + if (tp->t_lflag&ECHOCTL) + ttyrubo(tp, 2); + break; + + case TAB: { + int c; + + if (tp->t_rocount < RB_LEN(&tp->t_raw)) { + ttyretype(tp); + return; + } + s = spltty(); + savecol = tp->t_col; + tp->t_state |= TS_CNTTB; + tp->t_lflag |= FLUSHO; + tp->t_col = tp->t_rocol; + cp = tp->t_raw.rb_hd; + for (c = nextc(&cp, &tp->t_raw); c ; + c = nextc(&cp, &tp->t_raw)) + ttyecho(c, tp); + tp->t_lflag &= ~FLUSHO; + tp->t_state &= ~TS_CNTTB; + splx(s); + /* + * savecol will now be length of the tab + */ + savecol -= tp->t_col; + tp->t_col += savecol; + if (savecol > 8) + savecol = 8; /* overflow screw */ + while (--savecol >= 0) + (void) ttyoutput('\b', tp); + break; + } + + default: + /* XXX */ + printf("ttyrub: would panic c = %d, val = %d\n", + c, CCLASS(c)); + /*panic("ttyrub");*/ + } + } else if (tp->t_lflag&ECHOPRT) { + if ((tp->t_state&TS_ERASE) == 0) { + (void) ttyoutput('\\', tp); + tp->t_state |= TS_ERASE; + } + ttyecho(c, tp); + } else + ttyecho(tp->t_cc[VERASE], tp); + tp->t_rocount--; +} + +/* + * Crt back over cnt chars perhaps + * erasing them. + */ +ttyrubo(tp, cnt) + register struct tty *tp; + int cnt; +{ + + while (--cnt >= 0) + ttyoutstr("\b \b", tp); +} + +/* + * Reprint the rawq line. + * We assume c_cc has already been checked. + */ +ttyretype(tp) + register struct tty *tp; +{ + char *cp; + int s, c; + + if (tp->t_cc[VREPRINT] != _POSIX_VDISABLE) + ttyecho(tp->t_cc[VREPRINT], tp); + (void) ttyoutput('\n', tp); + + s = spltty(); + cp = tp->t_can.rb_hd; + for (c = nextc(&cp, &tp->t_can); c ; c = nextc(&cp, &tp->t_can)) + ttyecho(c, tp); + cp = tp->t_raw.rb_hd; + for (c = nextc(&cp, &tp->t_raw); c ; c = nextc(&cp, &tp->t_raw)) + ttyecho(c, tp); + tp->t_state &= ~TS_ERASE; + splx(s); + + tp->t_rocount = RB_LEN(&tp->t_raw); + tp->t_rocol = 0; +} + +/* + * Echo a typed character to the terminal. + */ +ttyecho(c, tp) + register c; + register struct tty *tp; +{ + if ((tp->t_state & TS_CNTTB) == 0) + tp->t_lflag &= ~FLUSHO; + if (tp->t_lflag & EXTPROC) + return; + if ((tp->t_lflag & ECHO) == 0) { + if ((tp->t_lflag & ECHONL) == 0) + return; + else if (c != '\n') + return; + } + if (tp->t_lflag & ECHOCTL) { + if ((c & TTY_CHARMASK) <= 037 && c != '\t' && c != '\n' || + c == 0177) { + (void) ttyoutput('^', tp); + c &= TTY_CHARMASK; + if (c == 0177) + c = '?'; + else + c += 'A' - 1; + } + } + (void) ttyoutput(c, tp); +} + +/* + * send string cp to tp + */ +ttyoutstr(cp, tp) + register char *cp; + register struct tty *tp; +{ + register char c; + + while (c = *cp++) + (void) ttyoutput(c, tp); +} + +/* + * Wake up any readers on a tty. + */ +ttwakeup(tp) + register struct tty *tp; +{ + + if (tp->t_rsel) { + selwakeup(tp->t_rsel, tp->t_state&TS_RCOLL); + tp->t_state &= ~TS_RCOLL; + tp->t_rsel = 0; + } + if (tp->t_state & TS_ASYNC) + pgsignal(tp->t_pgrp, SIGIO, 1); + wakeup((caddr_t)&tp->t_raw); +} + +/* + * Look up a code for a specified speed in a conversion table; + * used by drivers to map software speed values to hardware parameters. + */ +ttspeedtab(speed, table) + register struct speedtab *table; +{ + + for ( ; table->sp_speed != -1; table++) + if (table->sp_speed == speed) + return (table->sp_code); + return (-1); +} + +/* + * set tty hi and low water marks + * + * Try to arrange the dynamics so there's about one second + * from hi to low water. + * + */ +ttsetwater(tp) + struct tty *tp; +{ + register cps = tp->t_ospeed / 10; + register x; + +#define clamp(x, h, l) ((x)>h ? h : ((x)<l) ? l : (x)) + tp->t_lowat = x = clamp(cps/2, TTMAXLOWAT, TTMINLOWAT); + x += cps; + x = clamp(x, TTMAXHIWAT, TTMINHIWAT); + tp->t_hiwat = roundup(x, CBSIZE); +#undef clamp +} + +/* + * Report on state of foreground process group. + */ +ttyinfo(tp) + register struct tty *tp; +{ + register struct proc *p, *pick; + struct timeval utime, stime; + int tmp; + + if (ttycheckoutq(tp,0) == 0) + return; + + /* Print load average. */ + tmp = (averunnable[0] * 100 + FSCALE / 2) >> FSHIFT; + ttyprintf(tp, "load: %d.%02d ", tmp / 100, tmp % 100); + + if (tp->t_session == NULL) + ttyprintf(tp, "not a controlling terminal\n"); + else if (tp->t_pgrp == NULL) + ttyprintf(tp, "no foreground process group\n"); + else if ((p = tp->t_pgrp->pg_mem) == NULL) + ttyprintf(tp, "empty foreground process group\n"); + else { + /* Pick interesting process. */ + for (pick = NULL; p != NULL; p = p->p_pgrpnxt) + if (proc_compare(pick, p)) + pick = p; + + ttyprintf(tp, " cmd: %s %d [%s] ", pick->p_comm, pick->p_pid, + pick->p_stat == SRUN ? "running" : + pick->p_wmesg ? pick->p_wmesg : "iowait"); + + /* + * Lock out clock if process is running; get user/system + * cpu time. + */ + if (curproc == pick) + tmp = splclock(); + utime = pick->p_utime; + stime = pick->p_stime; + if (curproc == pick) + splx(tmp); + + /* Print user time. */ + ttyprintf(tp, "%d.%02du ", + utime.tv_sec, (utime.tv_usec + 5000) / 10000); + + /* Print system time. */ + ttyprintf(tp, "%d.%02ds ", + stime.tv_sec, (stime.tv_usec + 5000) / 10000); + +#define pgtok(a) (((a) * NBPG) / 1024) + /* Print percentage cpu, resident set size. */ + tmp = pick->p_pctcpu * 10000 + FSCALE / 2 >> FSHIFT; + ttyprintf(tp, "%d%% %dk\n", + tmp / 100, pgtok(pick->p_vmspace->vm_rssize)); + } + tp->t_rocount = 0; /* so pending input will be retyped if BS */ +} + +/* + * Returns 1 if p2 is "better" than p1 + * + * The algorithm for picking the "interesting" process is thus: + * + * 1) (Only foreground processes are eligable - implied) + * 2) Runnable processes are favored over anything + * else. The runner with the highest cpu + * utilization is picked (p_cpu). Ties are + * broken by picking the highest pid. + * 3 Next, the sleeper with the shortest sleep + * time is favored. With ties, we pick out + * just "short-term" sleepers (SSINTR == 0). + * Further ties are broken by picking the highest + * pid. + * + */ +#define isrun(p) (((p)->p_stat == SRUN) || ((p)->p_stat == SIDL)) +#define TESTAB(a, b) ((a)<<1 | (b)) +#define ONLYA 2 +#define ONLYB 1 +#define BOTH 3 + +static int +proc_compare(p1, p2) + register struct proc *p1, *p2; +{ + + if (p1 == NULL) + return (1); + /* + * see if at least one of them is runnable + */ + switch (TESTAB(isrun(p1), isrun(p2))) { + case ONLYA: + return (0); + case ONLYB: + return (1); + case BOTH: + /* + * tie - favor one with highest recent cpu utilization + */ + if (p2->p_cpu > p1->p_cpu) + return (1); + if (p1->p_cpu > p2->p_cpu) + return (0); + return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ + } + /* + * weed out zombies + */ + switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) { + case ONLYA: + return (1); + case ONLYB: + return (0); + case BOTH: + return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ + } + /* + * pick the one with the smallest sleep time + */ + if (p2->p_slptime > p1->p_slptime) + return (0); + if (p1->p_slptime > p2->p_slptime) + return (1); + /* + * favor one sleeping in a non-interruptible sleep + */ + if (p1->p_flag&SSINTR && (p2->p_flag&SSINTR) == 0) + return (1); + if (p2->p_flag&SSINTR && (p1->p_flag&SSINTR) == 0) + return (0); + return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ +} + +/* + * Output char to tty; console putchar style. + */ +tputchar(c, tp) + int c; + struct tty *tp; +{ + register s = spltty(); + + if ((tp->t_state & (TS_CARR_ON|TS_ISOPEN)) == (TS_CARR_ON|TS_ISOPEN)) { + if (c == '\n') + (void) ttyoutput('\r', tp); + (void) ttyoutput(c, tp); + ttstart(tp); + splx(s); + return (0); + } + splx(s); + return (-1); +} + +/* + * Sleep on chan, returning ERESTART if tty changed + * while we napped and returning any errors (e.g. EINTR/ETIMEDOUT) + * reported by tsleep. If the tty is revoked, restarting a pending + * call will redo validation done at the start of the call. + */ +ttysleep(tp, chan, pri, wmesg, timo) + struct tty *tp; + caddr_t chan; + int pri; + char *wmesg; + int timo; +{ + int error; + short gen = tp->t_gen; + + if (error = tsleep(chan, pri, wmesg, timo)) + return (error); + if (tp->t_gen != gen) + return (ERESTART); + return (0); +} diff --git a/sys/kern/tty_compat.c b/sys/kern/tty_compat.c new file mode 100644 index 000000000000..a3b92500059c --- /dev/null +++ b/sys/kern/tty_compat.c @@ -0,0 +1,422 @@ +/*- + * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)tty_compat.c 7.10 (Berkeley) 5/9/91 + * $Id: tty_compat.c,v 1.2 1993/10/16 15:24:57 rgrimes Exp $ + */ + +/* + * mapping routines for old line discipline (yuck) + */ +#ifdef COMPAT_43 + +#include "param.h" +#include "systm.h" +#include "ioctl.h" +#include "tty.h" +#include "termios.h" +#include "proc.h" +#include "file.h" +#include "conf.h" +#include "dkstat.h" +#include "kernel.h" +#include "syslog.h" + +int ttydebug = 0; + +static struct speedtab compatspeeds[] = { +#define MAX_SPEED 17 + 115200, 17, + 57600, 16, + 38400, 15, + 19200, 14, + 9600, 13, + 4800, 12, + 2400, 11, + 1800, 10, + 1200, 9, + 600, 8, + 300, 7, + 200, 6, + 150, 5, + 134, 4, + 110, 3, + 75, 2, + 50, 1, + 0, 0, + -1, -1, +}; +static int compatspcodes[] = { + 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, + 1800, 2400, 4800, 9600, 19200, 38400, 57600, 115200, +}; + +/*ARGSUSED*/ +ttcompat(tp, com, data, flag) + register struct tty *tp; + caddr_t data; +{ + + switch (com) { + case TIOCGETP: { + register struct sgttyb *sg = (struct sgttyb *)data; + register u_char *cc = tp->t_cc; + register speed; + + speed = ttspeedtab(tp->t_ospeed, compatspeeds); + sg->sg_ospeed = (speed == -1) ? MAX_SPEED : speed; + if (tp->t_ispeed == 0) + sg->sg_ispeed = sg->sg_ospeed; + else { + speed = ttspeedtab(tp->t_ispeed, compatspeeds); + sg->sg_ispeed = (speed == -1) ? MAX_SPEED : speed; + } + sg->sg_erase = cc[VERASE]; + sg->sg_kill = cc[VKILL]; + sg->sg_flags = tp->t_flags = ttcompatgetflags(tp); + break; + } + + case TIOCSETP: + case TIOCSETN: { + register struct sgttyb *sg = (struct sgttyb *)data; + struct termios term; + int speed; + + term = tp->t_termios; + if ((speed = sg->sg_ispeed) > MAX_SPEED || speed < 0) + term.c_ispeed = speed; + else + term.c_ispeed = compatspcodes[speed]; + if ((speed = sg->sg_ospeed) > MAX_SPEED || speed < 0) + term.c_ospeed = speed; + else + term.c_ospeed = compatspcodes[speed]; + term.c_cc[VERASE] = sg->sg_erase; + term.c_cc[VKILL] = sg->sg_kill; + tp->t_flags = tp->t_flags&0xffff0000 | sg->sg_flags&0xffff; + ttcompatsetflags(tp, &term); + return (ttioctl(tp, com == TIOCSETP ? TIOCSETAF : TIOCSETA, + (caddr_t)&term, flag)); + } + + case TIOCGETC: { + struct tchars *tc = (struct tchars *)data; + register u_char *cc = tp->t_cc; + + tc->t_intrc = cc[VINTR]; + tc->t_quitc = cc[VQUIT]; + tc->t_startc = cc[VSTART]; + tc->t_stopc = cc[VSTOP]; + tc->t_eofc = cc[VEOF]; + tc->t_brkc = cc[VEOL]; + break; + } + case TIOCSETC: { + struct tchars *tc = (struct tchars *)data; + register u_char *cc = tp->t_cc; + + cc[VINTR] = tc->t_intrc; + cc[VQUIT] = tc->t_quitc; + cc[VSTART] = tc->t_startc; + cc[VSTOP] = tc->t_stopc; + cc[VEOF] = tc->t_eofc; + cc[VEOL] = tc->t_brkc; + if (tc->t_brkc == -1) + cc[VEOL2] = _POSIX_VDISABLE; + break; + } + case TIOCSLTC: { + struct ltchars *ltc = (struct ltchars *)data; + register u_char *cc = tp->t_cc; + + cc[VSUSP] = ltc->t_suspc; + cc[VDSUSP] = ltc->t_dsuspc; + cc[VREPRINT] = ltc->t_rprntc; + cc[VDISCARD] = ltc->t_flushc; + cc[VWERASE] = ltc->t_werasc; + cc[VLNEXT] = ltc->t_lnextc; + break; + } + case TIOCGLTC: { + struct ltchars *ltc = (struct ltchars *)data; + register u_char *cc = tp->t_cc; + + ltc->t_suspc = cc[VSUSP]; + ltc->t_dsuspc = cc[VDSUSP]; + ltc->t_rprntc = cc[VREPRINT]; + ltc->t_flushc = cc[VDISCARD]; + ltc->t_werasc = cc[VWERASE]; + ltc->t_lnextc = cc[VLNEXT]; + break; + } + case TIOCLBIS: + case TIOCLBIC: + case TIOCLSET: { + struct termios term; + + term = tp->t_termios; + if (com == TIOCLSET) + tp->t_flags = (tp->t_flags&0xffff) | *(int *)data<<16; + else { + tp->t_flags = + (ttcompatgetflags(tp)&0xffff0000)|(tp->t_flags&0xffff); + if (com == TIOCLBIS) + tp->t_flags |= *(int *)data<<16; + else + tp->t_flags &= ~(*(int *)data<<16); + } + ttcompatsetlflags(tp, &term); + return (ttioctl(tp, TIOCSETA, (caddr_t)&term, flag)); + } + case TIOCLGET: + tp->t_flags = + (ttcompatgetflags(tp)&0xffff0000)|(tp->t_flags&0xffff); + *(int *)data = tp->t_flags>>16; + if (ttydebug) + printf("CLGET: returning %x\n", *(int *)data); + break; + + case OTIOCGETD: + *(int *)data = tp->t_line ? tp->t_line : 2; + break; + + case OTIOCSETD: { + int ldisczero = 0; + + return (ttioctl(tp, TIOCSETD, + *(int *)data == 2 ? (caddr_t)&ldisczero : data, flag)); + } + + case OTIOCCONS: + *(int *)data = 1; + return (ttioctl(tp, TIOCCONS, data, flag)); + + default: + return (-1); + } + return (0); +} + +ttcompatgetflags(tp) + register struct tty *tp; +{ + register long iflag = tp->t_iflag; + register long lflag = tp->t_lflag; + register long oflag = tp->t_oflag; + register long cflag = tp->t_cflag; + register flags = 0; + + if (iflag&IXOFF) + flags |= TANDEM; + if (iflag&ICRNL || oflag&ONLCR) + flags |= CRMOD; + if ((cflag&CSIZE) == CS8) { + flags |= PASS8; + if (iflag&ISTRIP) + flags |= ANYP; + } + else if (cflag&PARENB) { + if (iflag&INPCK) { + if (cflag&PARODD) + flags |= ODDP; + else + flags |= EVENP; + } + else + flags |= EVENP | ODDP; + } + + if ((lflag&ICANON) == 0) { + /* fudge */ + if (iflag&(INPCK|ISTRIP|IXON) || lflag&(IEXTEN|ISIG) + || cflag&(CSIZE|PARENB) != CS8) + flags |= CBREAK; + else + flags |= RAW; + } + if (!(flags&RAW) && !(oflag&OPOST) && cflag&(CSIZE|PARENB) == CS8) + flags |= LITOUT; + if (oflag&OXTABS) + flags |= XTABS; + if (lflag&ECHOE) + flags |= CRTERA|CRTBS; + if (lflag&ECHOKE) + flags |= CRTKIL|CRTBS; + if (lflag&ECHOPRT) + flags |= PRTERA; + if (lflag&ECHOCTL) + flags |= CTLECH; + if ((iflag&IXANY) == 0) + flags |= DECCTQ; + flags |= lflag&(ECHO|MDMBUF|TOSTOP|FLUSHO|NOHANG|PENDIN|NOFLSH); +if (ttydebug) + printf("getflags: %x\n", flags); + return (flags); +} + +ttcompatsetflags(tp, t) + register struct tty *tp; + register struct termios *t; +{ + register flags = tp->t_flags; + register long iflag = t->c_iflag; + register long oflag = t->c_oflag; + register long lflag = t->c_lflag; + register long cflag = t->c_cflag; + + if (flags & RAW) { + iflag &= IXOFF|IXANY; + lflag &= ~(ECHOCTL|ISIG|ICANON|IEXTEN); + } else { + iflag |= BRKINT|IXON|IMAXBEL; + lflag |= ISIG|IEXTEN|ECHOCTL; /* XXX was echoctl on ? */ + if (flags & XTABS) + oflag |= OXTABS; + else + oflag &= ~OXTABS; + if (flags & CBREAK) + lflag &= ~ICANON; + else + lflag |= ICANON; + if (flags&CRMOD) { + iflag |= ICRNL; + oflag |= ONLCR; + } else { + iflag &= ~ICRNL; + oflag &= ~ONLCR; + } + } + if (flags&ECHO) + lflag |= ECHO; + else + lflag &= ~ECHO; + + cflag &= ~(CSIZE|PARENB); + if (flags&(RAW|LITOUT|PASS8)) { + cflag |= CS8; + if (!(flags&(RAW|PASS8)) + || (flags&(RAW|PASS8|ANYP)) == (PASS8|ANYP)) + iflag |= ISTRIP; + else + iflag &= ~ISTRIP; + if (flags&(RAW|LITOUT)) + oflag &= ~OPOST; + else + oflag |= OPOST; + } else { + cflag |= CS7|PARENB; + iflag |= ISTRIP; + oflag |= OPOST; + } + if ((flags&(EVENP|ODDP)) == EVENP) { + iflag |= INPCK; + cflag &= ~PARODD; + } else if ((flags&(EVENP|ODDP)) == ODDP) { + iflag |= INPCK; + cflag |= PARODD; + } else + iflag &= ~INPCK; + if (flags&TANDEM) + iflag |= IXOFF; + else + iflag &= ~IXOFF; + t->c_iflag = iflag; + t->c_oflag = oflag; + t->c_lflag = lflag; + t->c_cflag = cflag; +} + +ttcompatsetlflags(tp, t) + register struct tty *tp; + register struct termios *t; +{ + register flags = tp->t_flags; + register long iflag = t->c_iflag; + register long oflag = t->c_oflag; + register long lflag = t->c_lflag; + register long cflag = t->c_cflag; + + if (flags&CRTERA) + lflag |= ECHOE; + else + lflag &= ~ECHOE; + if (flags&CRTKIL) + lflag |= ECHOKE; + else + lflag &= ~ECHOKE; + if (flags&PRTERA) + lflag |= ECHOPRT; + else + lflag &= ~ECHOPRT; + if (flags&CTLECH) + lflag |= ECHOCTL; + else + lflag &= ~ECHOCTL; + if ((flags&DECCTQ) == 0) + lflag |= IXANY; + else + lflag &= ~IXANY; + lflag &= ~(MDMBUF|TOSTOP|FLUSHO|NOHANG|PENDIN|NOFLSH); + lflag |= flags&(MDMBUF|TOSTOP|FLUSHO|NOHANG|PENDIN|NOFLSH); + + cflag &= ~(CSIZE|PARENB); + /* + * The next if-else statement is copied from above so don't bother + * checking it separately. We could avoid fiddlling with the + * character size if the mode is already RAW or if neither the + * LITOUT bit or the PASS8 bit is being changed, but the delta of + * the change is not available here and skipping the RAW case would + * make the code different from above. + */ + if (flags&(RAW|LITOUT|PASS8)) { + cflag |= CS8; + if (!(flags&(RAW|PASS8)) + || (flags&(RAW|PASS8|ANYP)) == (PASS8|ANYP)) + iflag |= ISTRIP; + else + iflag &= ~ISTRIP; + if (flags&(RAW|LITOUT)) + oflag &= ~OPOST; + else + oflag |= OPOST; + } else { + cflag |= CS7|PARENB; + iflag |= ISTRIP; + oflag |= OPOST; + } + t->c_iflag = iflag; + t->c_oflag = oflag; + t->c_lflag = lflag; + t->c_cflag = cflag; +} +#endif /* COMPAT_43 */ diff --git a/sys/kern/tty_conf.c b/sys/kern/tty_conf.c new file mode 100644 index 000000000000..861adfe4b324 --- /dev/null +++ b/sys/kern/tty_conf.c @@ -0,0 +1,116 @@ +/*- + * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)tty_conf.c 7.6 (Berkeley) 5/9/91 + * $Id: tty_conf.c,v 1.3 1993/10/16 15:24:58 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "buf.h" +#include "ioctl.h" +#include "tty.h" +#include "conf.h" + +int enodev(); +int nullop(); + +int ttyopen(),ttylclose(),ttread(),ttwrite(),nullioctl(),ttstart(); +int ttymodem(), nullmodem(), ttyinput(); + +#include "tb.h" +#if NTB > 0 +int tbopen(),tbclose(),tbread(),tbinput(),tbioctl(); +#endif + +#include "sl.h" +#if NSL > 0 +int slopen(),slclose(),slinput(),sltioctl(),slstart(); +#endif +#include "ppp.h" +#if NPPP > 0 +int pppopen(),pppclose(),pppread(),pppwrite(),pppinput(); +int ppptioctl(),pppstart(),pppselect(); +#endif + + +struct linesw linesw[] = +{ + ttyopen, ttylclose, ttread, ttwrite, nullioctl, + ttyinput, enodev, nullop, ttstart, ttymodem, /* 0- termios */ + + enodev, enodev, enodev, enodev, enodev, /* 1- defunct */ + enodev, enodev, enodev, enodev, enodev, + + enodev, enodev, enodev, enodev, enodev, /* 2- defunct */ + enodev, enodev, enodev, enodev, enodev, +#if NTB > 0 + tbopen, tbclose, tbread, enodev, tbioctl, + tbinput, enodev, nullop, ttstart, nullmodem, /* 3- TABLDISC */ +#else + enodev, enodev, enodev, enodev, enodev, + enodev, enodev, enodev, enodev, enodev, +#endif +#if NSL > 0 + slopen, slclose, enodev, enodev, sltioctl, + slinput, enodev, nullop, slstart, nullmodem, /* 4- SLIPDISC */ +#else + enodev, enodev, enodev, enodev, enodev, + enodev, enodev, enodev, enodev, enodev, +#endif +#if NPPP > 0 + pppopen, pppclose, pppread, pppwrite, ppptioctl, + pppinput, enodev, nullop, pppstart, ttymodem, /* 5- PPPDISC */ +#else + enodev, enodev, enodev, enodev, enodev, + enodev, enodev, enodev, enodev, enodev, +#endif +}; + +int nldisp = sizeof (linesw) / sizeof (linesw[0]); + +/* + * Do nothing specific version of line + * discipline specific ioctl command. + */ +/*ARGSUSED*/ +nullioctl(tp, cmd, data, flags) + struct tty *tp; + char *data; + int flags; +{ + +#ifdef lint + tp = tp; data = data; flags = flags; +#endif + return (-1); +} diff --git a/sys/kern/tty_pty.c b/sys/kern/tty_pty.c new file mode 100644 index 000000000000..b324ca4a1b29 --- /dev/null +++ b/sys/kern/tty_pty.c @@ -0,0 +1,711 @@ +/* + * Copyright (c) 1982, 1986, 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)tty_pty.c 7.21 (Berkeley) 5/30/91 + * $Id: tty_pty.c,v 1.4 1993/10/16 15:25:00 rgrimes Exp $ + */ + +/* + * Pseudo-teletype Driver + * (Actually two drivers, requiring two entries in 'cdevsw') + */ +#include "pty.h" + +#if NPTY > 0 +#include "param.h" +#include "systm.h" +#include "ioctl.h" +#include "tty.h" +#include "conf.h" +#include "file.h" +#include "proc.h" +#include "uio.h" +#include "kernel.h" +#include "vnode.h" + +#if NPTY == 1 +#undef NPTY +#define NPTY 32 /* crude XXX */ +#endif + +#define BUFSIZ 100 /* Chunk size iomoved to/from user */ + +/* + * pts == /dev/tty[pqrs]? + * ptc == /dev/pty[pqrs]? + */ +struct tty pt_tty[NPTY]; +struct pt_ioctl { + int pt_flags; + pid_t pt_selr, pt_selw; + u_char pt_send; + u_char pt_ucntl; +} pt_ioctl[NPTY]; +int npty = NPTY; /* for pstat -t */ + +#define PF_RCOLL 0x01 +#define PF_WCOLL 0x02 +#define PF_PKT 0x08 /* packet mode */ +#define PF_STOPPED 0x10 /* user told stopped */ +#define PF_REMOTE 0x20 /* remote and flow controlled input */ +#define PF_NOSTOP 0x40 +#define PF_UCNTL 0x80 /* user control mode */ + +/*ARGSUSED*/ +ptsopen(dev, flag, devtype, p) + dev_t dev; + struct proc *p; +{ + register struct tty *tp; + int error; + +#ifdef lint + npty = npty; +#endif + if (minor(dev) >= NPTY) + return (ENXIO); + tp = &pt_tty[minor(dev)]; + if ((tp->t_state & TS_ISOPEN) == 0) { + tp->t_state |= TS_WOPEN; + ttychars(tp); /* Set up default chars */ + tp->t_iflag = TTYDEF_IFLAG; + tp->t_oflag = TTYDEF_OFLAG; + tp->t_lflag = TTYDEF_LFLAG; + tp->t_cflag = TTYDEF_CFLAG; + tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; + ttsetwater(tp); /* would be done in xxparam() */ + } else if (tp->t_state&TS_XCLUDE && p->p_ucred->cr_uid != 0) + return (EBUSY); + if (tp->t_oproc) /* Ctrlr still around. */ + tp->t_state |= TS_CARR_ON; + while ((tp->t_state & TS_CARR_ON) == 0) { + tp->t_state |= TS_WOPEN; + if (flag&FNONBLOCK) + break; + if (error = ttysleep(tp, (caddr_t)&tp->t_raw, TTIPRI | PCATCH, + ttopen, 0)) + return (error); + } + error = (*linesw[tp->t_line].l_open)(dev, tp, flag); + ptcwakeup(tp, FREAD|FWRITE); + return (error); +} + +ptsclose(dev, flag, mode, p) + dev_t dev; + int flag, mode; + struct proc *p; +{ + register struct tty *tp; + + tp = &pt_tty[minor(dev)]; + (*linesw[tp->t_line].l_close)(tp, flag); + ttyclose(tp); + ptcwakeup(tp, FREAD|FWRITE); + return(0); +} + +ptsread(dev, uio, flag) + dev_t dev; + struct uio *uio; +{ + struct proc *p = curproc; + register struct tty *tp = &pt_tty[minor(dev)]; + register struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; + int error = 0; + +again: + if (pti->pt_flags & PF_REMOTE) { + while (isbackground(p, tp)) { + if ((p->p_sigignore & sigmask(SIGTTIN)) || + (p->p_sigmask & sigmask(SIGTTIN)) || + p->p_pgrp->pg_jobc == 0 || + p->p_flag&SPPWAIT) + return (EIO); + pgsignal(p->p_pgrp, SIGTTIN, 1); + if (error = ttysleep(tp, (caddr_t)&lbolt, + TTIPRI | PCATCH, ttybg, 0)) + return (error); + } + if (RB_LEN(&tp->t_can) == 0) { + if (flag & IO_NDELAY) + return (EWOULDBLOCK); + if (error = ttysleep(tp, (caddr_t)&tp->t_can, + TTIPRI | PCATCH, ttyin, 0)) + return (error); + goto again; + } + while (RB_LEN(&tp->t_can) > 1 && uio->uio_resid > 0) + if (ureadc(getc(&tp->t_can), uio) < 0) { + error = EFAULT; + break; + } + if (RB_LEN(&tp->t_can) == 1) + (void) getc(&tp->t_can); + if (RB_LEN(&tp->t_can)) + return (error); + } else + if (tp->t_oproc) + error = (*linesw[tp->t_line].l_read)(tp, uio, flag); + ptcwakeup(tp, FWRITE); + return (error); +} + +/* + * Write to pseudo-tty. + * Wakeups of controlling tty will happen + * indirectly, when tty driver calls ptsstart. + */ +ptswrite(dev, uio, flag) + dev_t dev; + struct uio *uio; +{ + register struct tty *tp; + + tp = &pt_tty[minor(dev)]; + if (tp->t_oproc == 0) + return (EIO); + return ((*linesw[tp->t_line].l_write)(tp, uio, flag)); +} + +/* + * Start output on pseudo-tty. + * Wake up process selecting or sleeping for input from controlling tty. + */ +ptsstart(tp) + struct tty *tp; +{ + register struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; + + if (tp->t_state & TS_TTSTOP) + return; + if (pti->pt_flags & PF_STOPPED) { + pti->pt_flags &= ~PF_STOPPED; + pti->pt_send = TIOCPKT_START; + } + ptcwakeup(tp, FREAD); +} + +ptcwakeup(tp, flag) + struct tty *tp; +{ + struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; + + if (flag & FREAD) { + if (pti->pt_selr) { + selwakeup(pti->pt_selr, pti->pt_flags & PF_RCOLL); + pti->pt_selr = 0; + pti->pt_flags &= ~PF_RCOLL; + } + wakeup((caddr_t)&tp->t_out.rb_tl); + } + if (flag & FWRITE) { + if (pti->pt_selw) { + selwakeup(pti->pt_selw, pti->pt_flags & PF_WCOLL); + pti->pt_selw = 0; + pti->pt_flags &= ~PF_WCOLL; + } + wakeup((caddr_t)&tp->t_raw.rb_hd); + } +} + +/*ARGSUSED*/ +#ifdef __STDC__ +ptcopen(dev_t dev, int flag, int devtype, struct proc *p) +#else +ptcopen(dev, flag, devtype, p) + dev_t dev; + int flag, devtype; + struct proc *p; +#endif +{ + register struct tty *tp; + struct pt_ioctl *pti; + + if (minor(dev) >= NPTY) + return (ENXIO); + tp = &pt_tty[minor(dev)]; + if (tp->t_oproc) + return (EIO); + tp->t_oproc = ptsstart; + (void)(*linesw[tp->t_line].l_modem)(tp, 1); + tp->t_lflag &= ~EXTPROC; + pti = &pt_ioctl[minor(dev)]; + pti->pt_flags = 0; + pti->pt_send = 0; + pti->pt_ucntl = 0; + return (0); +} + +extern struct tty *constty; /* -hv- 06.Oct.92*/ +ptcclose(dev) + dev_t dev; +{ + register struct tty *tp; + + tp = &pt_tty[minor(dev)]; + (void)(*linesw[tp->t_line].l_modem)(tp, 0); + tp->t_state &= ~TS_CARR_ON; + tp->t_oproc = 0; /* mark closed */ + tp->t_session = 0; + +/* XXX -hv- 6.Oct.92 this prevents the "hanging console bug" with X11 */ + if (constty==tp) + constty = 0; + + return (0); +} + +ptcread(dev, uio, flag) + dev_t dev; + struct uio *uio; +{ + register struct tty *tp = &pt_tty[minor(dev)]; + struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; + char buf[BUFSIZ]; + int error = 0, cc; + + /* + * We want to block until the slave + * is open, and there's something to read; + * but if we lost the slave or we're NBIO, + * then return the appropriate error instead. + */ + for (;;) { + if (tp->t_state&TS_ISOPEN) { + if (pti->pt_flags&PF_PKT && pti->pt_send) { + error = ureadc((int)pti->pt_send, uio); + if (error) + return (error); + if (pti->pt_send & TIOCPKT_IOCTL) { + cc = MIN(uio->uio_resid, + sizeof(tp->t_termios)); + uiomove(&tp->t_termios, cc, uio); + } + pti->pt_send = 0; + return (0); + } + if (pti->pt_flags&PF_UCNTL && pti->pt_ucntl) { + error = ureadc((int)pti->pt_ucntl, uio); + if (error) + return (error); + pti->pt_ucntl = 0; + return (0); + } + if (RB_LEN(&tp->t_out) && (tp->t_state&TS_TTSTOP) == 0) + break; + } + if ((tp->t_state&TS_CARR_ON) == 0) + return (0); /* EOF */ + if (flag & IO_NDELAY) + return (EWOULDBLOCK); + if (error = tsleep((caddr_t)&tp->t_out.rb_tl, TTIPRI | PCATCH, + ttyin, 0)) + return (error); + } + if (pti->pt_flags & (PF_PKT|PF_UCNTL)) + error = ureadc(0, uio); + while (uio->uio_resid > 0 && error == 0) { +#ifdef was + cc = q_to_b(&tp->t_outq, buf, MIN(uio->uio_resid, BUFSIZ)); +#else + cc = min(MIN(uio->uio_resid, BUFSIZ), RB_CONTIGGET(&tp->t_out)); + if (cc) { + bcopy(tp->t_out.rb_hd, buf, cc); + tp->t_out.rb_hd = + RB_ROLLOVER(&tp->t_out, tp->t_out.rb_hd+cc); + } +#endif + if (cc <= 0) + break; + error = uiomove(buf, cc, uio); + } + if (RB_LEN(&tp->t_out) <= tp->t_lowat) { + if (tp->t_state&TS_ASLEEP) { + tp->t_state &= ~TS_ASLEEP; + wakeup((caddr_t)&tp->t_out); + } + if (tp->t_wsel) { + selwakeup(tp->t_wsel, tp->t_state & TS_WCOLL); + tp->t_wsel = 0; + tp->t_state &= ~TS_WCOLL; + } + } + return (error); +} + +ptsstop(tp, flush) + register struct tty *tp; + int flush; +{ + struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; + int flag; + + /* note: FLUSHREAD and FLUSHWRITE already ok */ + if (flush == 0) { + flush = TIOCPKT_STOP; + pti->pt_flags |= PF_STOPPED; + } else + pti->pt_flags &= ~PF_STOPPED; + pti->pt_send |= flush; + /* change of perspective */ + flag = 0; + if (flush & FREAD) + flag |= FWRITE; + if (flush & FWRITE) + flag |= FREAD; + ptcwakeup(tp, flag); +} + +ptcselect(dev, rw, p) + dev_t dev; + int rw; + struct proc *p; +{ + register struct tty *tp = &pt_tty[minor(dev)]; + struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; + struct proc *prev; + int s; + + if ((tp->t_state&TS_CARR_ON) == 0) + return (1); + switch (rw) { + + case FREAD: + /* + * Need to block timeouts (ttrstart). + */ + s = spltty(); + if ((tp->t_state&TS_ISOPEN) && + RB_LEN(&tp->t_out) && (tp->t_state&TS_TTSTOP) == 0) { + splx(s); + return (1); + } + splx(s); + /* FALLTHROUGH */ + + case 0: /* exceptional */ + if ((tp->t_state&TS_ISOPEN) && + (pti->pt_flags&PF_PKT && pti->pt_send || + pti->pt_flags&PF_UCNTL && pti->pt_ucntl)) + return (1); + if (pti->pt_selr && (prev = pfind(pti->pt_selr)) && prev->p_wchan == (caddr_t)&selwait) + pti->pt_flags |= PF_RCOLL; + else + pti->pt_selr = p->p_pid; + break; + + + case FWRITE: + if (tp->t_state&TS_ISOPEN) { + if (pti->pt_flags & PF_REMOTE) { + if (RB_LEN(&tp->t_can) == 0) + return (1); + } else { + if (RB_LEN(&tp->t_raw) + RB_LEN(&tp->t_can) < TTYHOG-2) + return (1); + if (RB_LEN(&tp->t_can) == 0 && (tp->t_iflag&ICANON)) + return (1); + } + } + if (pti->pt_selw && (prev = pfind(pti->pt_selw)) && prev->p_wchan == (caddr_t)&selwait) + pti->pt_flags |= PF_WCOLL; + else + pti->pt_selw = p->p_pid; + break; + + } + return (0); +} + +ptcwrite(dev, uio, flag) + dev_t dev; + register struct uio *uio; +{ + register struct tty *tp = &pt_tty[minor(dev)]; + register u_char *cp; + register int cc = 0; + u_char locbuf[BUFSIZ]; + int cnt = 0; + struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; + int error = 0; + +again: + if ((tp->t_state&TS_ISOPEN) == 0) + goto block; + if (pti->pt_flags & PF_REMOTE) { + if (RB_LEN(&tp->t_can)) + goto block; + while (uio->uio_resid > 0 && RB_LEN(&tp->t_can) < TTYHOG - 1) { + if (cc == 0) { + cc = min(uio->uio_resid, BUFSIZ); + cc = min(cc, RB_CONTIGPUT(&tp->t_can)); + cp = locbuf; + error = uiomove((caddr_t)cp, cc, uio); + if (error) + return (error); + /* check again for safety */ + if ((tp->t_state&TS_ISOPEN) == 0) + return (EIO); + } +#ifdef was + if (cc) + (void) b_to_q((char *)cp, cc, &tp->t_canq); +#else + if (cc) { + bcopy(cp, tp->t_can.rb_tl, cc); + tp->t_can.rb_tl = + RB_ROLLOVER(&tp->t_can, tp->t_can.rb_tl+cc); + } +#endif + cc = 0; + } + (void) putc(0, &tp->t_can); + ttwakeup(tp); + wakeup((caddr_t)&tp->t_can); + return (0); + } + while (uio->uio_resid > 0) { + if (cc == 0) { + cc = min(uio->uio_resid, BUFSIZ); + cp = locbuf; + error = uiomove((caddr_t)cp, cc, uio); + if (error) + return (error); + /* check again for safety */ + if ((tp->t_state&TS_ISOPEN) == 0) + return (EIO); + } + while (cc > 0) { + if ((RB_LEN(&tp->t_raw) + RB_LEN(&tp->t_can)) >= TTYHOG - 2 && + (RB_LEN(&tp->t_can) > 0 || !(tp->t_iflag&ICANON))) { + wakeup((caddr_t)&tp->t_raw); + goto block; + } + (*linesw[tp->t_line].l_rint)(*cp++, tp); + cnt++; + cc--; + } + cc = 0; + } + return (0); +block: + /* + * Come here to wait for slave to open, for space + * in outq, or space in rawq. + */ + if ((tp->t_state&TS_CARR_ON) == 0) + return (EIO); + if (flag & IO_NDELAY) { + /* adjust for data copied in but not written */ + uio->uio_resid += cc; + if (cnt == 0) + return (EWOULDBLOCK); + return (0); + } + if (error = tsleep((caddr_t)&tp->t_raw.rb_hd, TTOPRI | PCATCH, + ttyout, 0)) { + /* adjust for data copied in but not written */ + uio->uio_resid += cc; + return (error); + } + goto again; +} + +/*ARGSUSED*/ +ptyioctl(dev, cmd, data, flag) + caddr_t data; + dev_t dev; +{ + register struct tty *tp = &pt_tty[minor(dev)]; + register struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; + register u_char *cc = tp->t_cc; + int stop, error; + extern ttyinput(); + + /* + * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG. + * ttywflush(tp) will hang if there are characters in the outq. + */ + if (cmd == TIOCEXT) { + /* + * When the EXTPROC bit is being toggled, we need + * to send an TIOCPKT_IOCTL if the packet driver + * is turned on. + */ + if (*(int *)data) { + if (pti->pt_flags & PF_PKT) { + pti->pt_send |= TIOCPKT_IOCTL; + ptcwakeup(tp, FREAD); + } + tp->t_lflag |= EXTPROC; + } else { + if ((tp->t_state & EXTPROC) && + (pti->pt_flags & PF_PKT)) { + pti->pt_send |= TIOCPKT_IOCTL; + ptcwakeup(tp, FREAD); + } + tp->t_lflag &= ~EXTPROC; + } + return(0); + } else + if (cdevsw[major(dev)].d_open == ptcopen) + switch (cmd) { + + case TIOCGPGRP: + /* + * We aviod calling ttioctl on the controller since, + * in that case, tp must be the controlling terminal. + */ + *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : 0; + return (0); + + case TIOCPKT: + if (*(int *)data) { + if (pti->pt_flags & PF_UCNTL) + return (EINVAL); + pti->pt_flags |= PF_PKT; + } else + pti->pt_flags &= ~PF_PKT; + return (0); + + case TIOCUCNTL: + if (*(int *)data) { + if (pti->pt_flags & PF_PKT) + return (EINVAL); + pti->pt_flags |= PF_UCNTL; + } else + pti->pt_flags &= ~PF_UCNTL; + return (0); + + case TIOCREMOTE: + if (*(int *)data) + pti->pt_flags |= PF_REMOTE; + else + pti->pt_flags &= ~PF_REMOTE; + ttyflush(tp, FREAD|FWRITE); + return (0); + +#ifdef COMPAT_43 + /* wkt */ + case TIOCSETP: + case TIOCSETN: +#endif + case TIOCSETD: + case TIOCSETA: + case TIOCSETAW: + case TIOCSETAF: + while (getc(&tp->t_out) >= 0) + ; + break; + + case TIOCSIG: + if (*(unsigned int *)data >= NSIG) + return(EINVAL); + if ((tp->t_lflag&NOFLSH) == 0) + ttyflush(tp, FREAD|FWRITE); + pgsignal(tp->t_pgrp, *(unsigned int *)data, 1); + if ((*(unsigned int *)data == SIGINFO) && + ((tp->t_lflag&NOKERNINFO) == 0)) + ttyinfo(tp); + return(0); + } + error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag); + if (error >= 0) + return (error); + error = ttioctl(tp, cmd, data, flag); + /* + * Since we use the tty queues internally, + * pty's can't be switched to disciplines which overwrite + * the queues. We can't tell anything about the discipline + * from here... + */ + if (linesw[tp->t_line].l_rint != ttyinput) { + (*linesw[tp->t_line].l_close)(tp, flag); + tp->t_line = TTYDISC; + (void)(*linesw[tp->t_line].l_open)(dev, tp, flag); + error = ENOTTY; + } + if (error < 0) { + if (pti->pt_flags & PF_UCNTL && + (cmd & ~0xff) == UIOCCMD(0)) { + if (cmd & 0xff) { + pti->pt_ucntl = (u_char)cmd; + ptcwakeup(tp, FREAD); + } + return (0); + } + error = ENOTTY; + } + /* + * If external processing and packet mode send ioctl packet. + */ + if ((tp->t_lflag&EXTPROC) && (pti->pt_flags & PF_PKT)) { + switch(cmd) { + case TIOCSETA: + case TIOCSETAW: + case TIOCSETAF: +#ifdef COMPAT_43 + /* wkt */ + case TIOCSETP: + case TIOCSETN: + case TIOCSETC: + case TIOCSLTC: + case TIOCLBIS: + case TIOCLBIC: + case TIOCLSET: +#endif + pti->pt_send |= TIOCPKT_IOCTL; + default: + break; + } + } + stop = (tp->t_iflag & IXON) && CCEQ(cc[VSTOP], CTRL('s')) + && CCEQ(cc[VSTART], CTRL('q')); + if (pti->pt_flags & PF_NOSTOP) { + if (stop) { + pti->pt_send &= ~TIOCPKT_NOSTOP; + pti->pt_send |= TIOCPKT_DOSTOP; + pti->pt_flags &= ~PF_NOSTOP; + ptcwakeup(tp, FREAD); + } + } else { + if (!stop) { + pti->pt_send &= ~TIOCPKT_DOSTOP; + pti->pt_send |= TIOCPKT_NOSTOP; + pti->pt_flags |= PF_NOSTOP; + ptcwakeup(tp, FREAD); + } + } + return (error); +} +#endif diff --git a/sys/kern/tty_ring.c b/sys/kern/tty_ring.c new file mode 100644 index 000000000000..f4149e576214 --- /dev/null +++ b/sys/kern/tty_ring.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This software is a component of "386BSD" developed by + * William F. Jolitz, TeleMuse. + * 4. Neither the name of the developer nor the name "386BSD" + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ + * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS + * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT. + * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT + * NOT MAKE USE OF THIS WORK. + * + * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED + * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN + * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES + * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING + * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND + * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE + * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS + * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992. + * + * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: tty_ring.c,v 1.2 1993/10/16 15:25:01 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "buf.h" +#include "ioctl.h" +#include "tty.h" + +/* + * XXX - put this in tty.h someday. + */ +size_t rb_write __P((struct ringb *to, char *buf, size_t nfrom)); + +putc(c, rbp) struct ringb *rbp; +{ + char *nxtp; + + /* ring buffer full? */ + if ( (nxtp = RB_SUCC(rbp, rbp->rb_tl)) == rbp->rb_hd) return (-1); + + /* stuff character */ + *rbp->rb_tl = c; + rbp->rb_tl = nxtp; + return(0); +} + +getc(rbp) struct ringb *rbp; +{ + u_char c; + + /* ring buffer empty? */ + if (rbp->rb_hd == rbp->rb_tl) return(-1); + + /* fetch character, locate next character */ + c = *(u_char *) rbp->rb_hd; + rbp->rb_hd = RB_SUCC(rbp, rbp->rb_hd); + return (c); +} + +nextc(cpp, rbp) struct ringb *rbp; char **cpp; { + + if (*cpp == rbp->rb_tl) return (0); + else { char *cp; + cp = *cpp; + *cpp = RB_SUCC(rbp, cp); + return(*cp); + } +} + +ungetc(c, rbp) struct ringb *rbp; +{ + char *backp; + + /* ring buffer full? */ + if ( (backp = RB_PRED(rbp, rbp->rb_hd)) == rbp->rb_tl) return (-1); + rbp->rb_hd = backp; + + /* stuff character */ + *rbp->rb_hd = c; + return(0); +} + +unputc(rbp) struct ringb *rbp; +{ + char *backp; + int c; + + /* ring buffer empty? */ + if (rbp->rb_hd == rbp->rb_tl) return(-1); + + /* backup buffer and dig out previous character */ + backp = RB_PRED(rbp, rbp->rb_tl); + c = *(u_char *)backp; + rbp->rb_tl = backp; + + return(c); +} + +#define peekc(rbp) (*(rbp)->rb_hd) + +initrb(rbp) struct ringb *rbp; { + rbp->rb_hd = rbp->rb_tl = rbp->rb_buf; +} + +/* + * Example code for contiguous operations: + ... + nc = RB_CONTIGPUT(&rb); + if (nc) { + if (nc > 9) nc = 9; + bcopy("ABCDEFGHI", rb.rb_tl, nc); + rb.rb_tl += nc; + rb.rb_tl = RB_ROLLOVER(&rb, rb.rb_tl); + } + ... + ... + nc = RB_CONTIGGET(&rb); + if (nc) { + if (nc > 79) nc = 79; + bcopy(rb.rb_hd, stringbuf, nc); + rb.rb_hd += nc; + rb.rb_hd = RB_ROLLOVER(&rb, rb.rb_hd); + stringbuf[nc] = 0; + printf("%s|", stringbuf); + } + ... + */ + +/* + * Concatenate ring buffers. + */ +catb(from, to) + struct ringb *from, *to; +{ + size_t nfromleft; + size_t nfromright; + + nfromright = RB_CONTIGGET(from); + rb_write(to, from->rb_hd, nfromright); + from->rb_hd += nfromright; + from->rb_hd = RB_ROLLOVER(from, from->rb_hd); + nfromleft = RB_CONTIGGET(from); + rb_write(to, from->rb_hd, nfromleft); + from->rb_hd += nfromleft; +} + +/* + * Copy ordinary buffer to ring buffer, return count of what fitted. + */ +size_t rb_write(to, buf, nfrom) + struct ringb *to; + char *buf; + size_t nfrom; +{ + char *toleft; + size_t ntoleft; + size_t ntoright; + + ntoright = RB_CONTIGPUT(to); + if (nfrom < ntoright) { + bcopy(buf, to->rb_tl, nfrom); + to->rb_tl += nfrom; + return (nfrom); + } + bcopy(buf, to->rb_tl, ntoright); + nfrom -= ntoright; + toleft = to->rb_buf; /* fast RB_ROLLOVER */ + ntoleft = to->rb_hd - toleft; /* fast RB_CONTIGPUT */ + if (nfrom > ntoleft) + nfrom = ntoleft; + bcopy(buf + ntoright, toleft, nfrom); + to->rb_tl = toleft + nfrom; + return (ntoright + nfrom); +} diff --git a/sys/kern/tty_tb.c b/sys/kern/tty_tb.c new file mode 100644 index 000000000000..c3b2f99cc1c5 --- /dev/null +++ b/sys/kern/tty_tb.c @@ -0,0 +1,369 @@ +/*- + * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)tty_tb.c 7.7 (Berkeley) 5/9/91 + * $Id: tty_tb.c,v 1.2 1993/10/16 15:25:03 rgrimes Exp $ + */ + +#include "tb.h" +#if NTB > 0 + +/* + * Line discipline for RS232 tablets; + * supplies binary coordinate data. + */ +#include "param.h" +#include "tablet.h" +#include "tty.h" + +/* + * Tablet configuration table. + */ +struct tbconf { + short tbc_recsize; /* input record size in bytes */ + short tbc_uiosize; /* size of data record returned user */ + int tbc_sync; /* mask for finding sync byte/bit */ + int (*tbc_decode)();/* decoding routine */ + char *tbc_run; /* enter run mode sequence */ + char *tbc_point; /* enter point mode sequence */ + char *tbc_stop; /* stop sequence */ + char *tbc_start; /* start/restart sequence */ + int tbc_flags; +#define TBF_POL 0x1 /* polhemus hack */ +#define TBF_INPROX 0x2 /* tablet has proximity info */ +}; + +static int tbdecode(), gtcodecode(), poldecode(); +static int tblresdecode(), tbhresdecode(); + +struct tbconf tbconf[TBTYPE] = { +{ 0 }, +{ 5, sizeof (struct tbpos), 0200, tbdecode, "6", "4" }, +{ 5, sizeof (struct tbpos), 0200, tbdecode, "\1CN", "\1RT", "\2", "\4" }, +{ 8, sizeof (struct gtcopos), 0200, gtcodecode }, +{17, sizeof (struct polpos), 0200, poldecode, 0, 0, "\21", "\5\22\2\23", + TBF_POL }, +{ 5, sizeof (struct tbpos), 0100, tblresdecode, "\1CN", "\1PT", "\2", "\4", + TBF_INPROX }, +{ 6, sizeof (struct tbpos), 0200, tbhresdecode, "\1CN", "\1PT", "\2", "\4", + TBF_INPROX }, +{ 5, sizeof (struct tbpos), 0100, tblresdecode, "\1CL\33", "\1PT\33", 0, 0}, +{ 6, sizeof (struct tbpos), 0200, tbhresdecode, "\1CL\33", "\1PT\33", 0, 0}, +}; + +/* + * Tablet state + */ +struct tb { + int tbflags; /* mode & type bits */ +#define TBMAXREC 17 /* max input record size */ + char cbuf[TBMAXREC]; /* input buffer */ + union { + struct tbpos tbpos; + struct gtcopos gtcopos; + struct polpos polpos; + } rets; /* processed state */ +#define NTBS 16 +} tb[NTBS]; + +/* + * Open as tablet discipline; called on discipline change. + */ +/*ARGSUSED*/ +tbopen(dev, tp) + dev_t dev; + register struct tty *tp; +{ + register struct tb *tbp; + + if (tp->t_line == TABLDISC) + return (ENODEV); + ttywflush(tp); + for (tbp = tb; tbp < &tb[NTBS]; tbp++) + if (tbp->tbflags == 0) + break; + if (tbp >= &tb[NTBS]) + return (EBUSY); + tbp->tbflags = TBTIGER|TBPOINT; /* default */ + tp->t_cp = tbp->cbuf; + tp->t_inbuf = 0; + bzero((caddr_t)&tbp->rets, sizeof (tbp->rets)); + tp->T_LINEP = (caddr_t)tbp; + tp->t_flags = ttcompatgetflags(tp); + tp->t_flags |= LITOUT; + ttcompatsetlflags(tp, &tp->t_termios); + return (0); +} + +/* + * Line discipline change or last device close. + */ +tbclose(tp) + register struct tty *tp; +{ + register int s; + int modebits = TBPOINT|TBSTOP; + + tbioctl(tp, BIOSMODE, &modebits, 0); + s = spltty(); + ((struct tb *)tp->T_LINEP)->tbflags = 0; + tp->t_cp = 0; + tp->t_inbuf = 0; + tp->t_rawq.c_cc = 0; /* clear queues -- paranoid */ + tp->t_canq.c_cc = 0; + tp->t_line = 0; /* paranoid: avoid races */ + splx(s); +} + +/* + * Read from a tablet line. + * Characters have been buffered in a buffer and decoded. + */ +tbread(tp, uio) + register struct tty *tp; + struct uio *uio; +{ + register struct tb *tbp = (struct tb *)tp->T_LINEP; + register struct tbconf *tc = &tbconf[tbp->tbflags & TBTYPE]; + int ret; + + if ((tp->t_state&TS_CARR_ON) == 0) + return (EIO); + ret = uiomove(&tbp->rets, tc->tbc_uiosize, uio); + if (tc->tbc_flags&TBF_POL) + tbp->rets.polpos.p_key = ' '; + return (ret); +} + +/* + * Low level character input routine. + * Stuff the character in the buffer, and decode + * if all the chars are there. + * + * This routine could be expanded in-line in the receiver + * interrupt routine to make it run as fast as possible. + */ +tbinput(c, tp) + register int c; + register struct tty *tp; +{ + register struct tb *tbp = (struct tb *)tp->T_LINEP; + register struct tbconf *tc = &tbconf[tbp->tbflags & TBTYPE]; + + if (tc->tbc_recsize == 0 || tc->tbc_decode == 0) /* paranoid? */ + return; + /* + * Locate sync bit/byte or reset input buffer. + */ + if (c&tc->tbc_sync || tp->t_inbuf == tc->tbc_recsize) { + tp->t_cp = tbp->cbuf; + tp->t_inbuf = 0; + } + *tp->t_cp++ = c&0177; + /* + * Call decode routine only if a full record has been collected. + */ + if (++tp->t_inbuf == tc->tbc_recsize) + (*tc->tbc_decode)(tc, tbp->cbuf, &tbp->rets); +} + +/* + * Decode GTCO 8 byte format (high res, tilt, and pressure). + */ +static +gtcodecode(tc, cp, tbpos) + struct tbconf *tc; + register char *cp; + register struct gtcopos *tbpos; +{ + + tbpos->pressure = *cp >> 2; + tbpos->status = (tbpos->pressure > 16) | TBINPROX; /* half way down */ + tbpos->xpos = (*cp++ & 03) << 14; + tbpos->xpos |= *cp++ << 7; + tbpos->xpos |= *cp++; + tbpos->ypos = (*cp++ & 03) << 14; + tbpos->ypos |= *cp++ << 7; + tbpos->ypos |= *cp++; + tbpos->xtilt = *cp++; + tbpos->ytilt = *cp++; + tbpos->scount++; +} + +/* + * Decode old Hitachi 5 byte format (low res). + */ +static +tbdecode(tc, cp, tbpos) + struct tbconf *tc; + register char *cp; + register struct tbpos *tbpos; +{ + register char byte; + + byte = *cp++; + tbpos->status = (byte&0100) ? TBINPROX : 0; + byte &= ~0100; + if (byte > 036) + tbpos->status |= 1 << ((byte-040)/2); + tbpos->xpos = *cp++ << 7; + tbpos->xpos |= *cp++; + if (tbpos->xpos < 256) /* tablet wraps around at 256 */ + tbpos->status &= ~TBINPROX; /* make it out of proximity */ + tbpos->ypos = *cp++ << 7; + tbpos->ypos |= *cp++; + tbpos->scount++; +} + +/* + * Decode new Hitach 5-byte format (low res). + */ +static +tblresdecode(tc, cp, tbpos) + struct tbconf *tc; + register char *cp; + register struct tbpos *tbpos; +{ + + *cp &= ~0100; /* mask sync bit */ + tbpos->status = (*cp++ >> 2) | TBINPROX; + if (tc->tbc_flags&TBF_INPROX && tbpos->status&020) + tbpos->status &= ~(020|TBINPROX); + tbpos->xpos = *cp++; + tbpos->xpos |= *cp++ << 6; + tbpos->ypos = *cp++; + tbpos->ypos |= *cp++ << 6; + tbpos->scount++; +} + +/* + * Decode new Hitach 6-byte format (high res). + */ +static +tbhresdecode(tc, cp, tbpos) + struct tbconf *tc; + register char *cp; + register struct tbpos *tbpos; +{ + char byte; + + byte = *cp++; + tbpos->xpos = (byte & 03) << 14; + tbpos->xpos |= *cp++ << 7; + tbpos->xpos |= *cp++; + tbpos->ypos = *cp++ << 14; + tbpos->ypos |= *cp++ << 7; + tbpos->ypos |= *cp++; + tbpos->status = (byte >> 2) | TBINPROX; + if (tc->tbc_flags&TBF_INPROX && tbpos->status&020) + tbpos->status &= ~(020|TBINPROX); + tbpos->scount++; +} + +/* + * Polhemus decode. + */ +static +poldecode(tc, cp, polpos) + struct tbconf *tc; + register char *cp; + register struct polpos *polpos; +{ + + polpos->p_x = cp[4] | cp[3]<<7 | (cp[9] & 0x03) << 14; + polpos->p_y = cp[6] | cp[5]<<7 | (cp[9] & 0x0c) << 12; + polpos->p_z = cp[8] | cp[7]<<7 | (cp[9] & 0x30) << 10; + polpos->p_azi = cp[11] | cp[10]<<7 | (cp[16] & 0x03) << 14; + polpos->p_pit = cp[13] | cp[12]<<7 | (cp[16] & 0x0c) << 12; + polpos->p_rol = cp[15] | cp[14]<<7 | (cp[16] & 0x30) << 10; + polpos->p_stat = cp[1] | cp[0]<<7; + if (cp[2] != ' ') + polpos->p_key = cp[2]; +} + +/*ARGSUSED*/ +tbioctl(tp, cmd, data, flag) + struct tty *tp; + caddr_t data; +{ + register struct tb *tbp = (struct tb *)tp->T_LINEP; + + switch (cmd) { + + case BIOGMODE: + *(int *)data = tbp->tbflags & TBMODE; + break; + + case BIOSTYPE: + if (tbconf[*(int *)data & TBTYPE].tbc_recsize == 0 || + tbconf[*(int *)data & TBTYPE].tbc_decode == 0) + return (EINVAL); + tbp->tbflags &= ~TBTYPE; + tbp->tbflags |= *(int *)data & TBTYPE; + /* fall thru... to set mode bits */ + + case BIOSMODE: { + register struct tbconf *tc; + + tbp->tbflags &= ~TBMODE; + tbp->tbflags |= *(int *)data & TBMODE; + tc = &tbconf[tbp->tbflags & TBTYPE]; + if (tbp->tbflags&TBSTOP) { + if (tc->tbc_stop) + ttyout(tc->tbc_stop, tp); + } else if (tc->tbc_start) + ttyout(tc->tbc_start, tp); + if (tbp->tbflags&TBPOINT) { + if (tc->tbc_point) + ttyout(tc->tbc_point, tp); + } else if (tc->tbc_run) + ttyout(tc->tbc_run, tp); + ttstart(tp); + break; + } + + case BIOGTYPE: + *(int *)data = tbp->tbflags & TBTYPE; + break; + + case TIOCSETD: + case TIOCGETD: + case TIOCGETP: + case TIOCGETC: + return (-1); /* pass thru... */ + + default: + return (ENOTTY); + } + return (0); +} +#endif diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c new file mode 100644 index 000000000000..d00e79092680 --- /dev/null +++ b/sys/kern/tty_tty.c @@ -0,0 +1,136 @@ +/*- + * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)tty_tty.c 7.15 (Berkeley) 5/28/91 + * $Id: tty_tty.c,v 1.2 1993/10/16 15:25:05 rgrimes Exp $ + */ + +/* + * Indirect driver for controlling tty. + */ +#include "param.h" +#include "systm.h" +#include "conf.h" +#include "ioctl.h" +#include "tty.h" +#include "proc.h" +#include "vnode.h" +#include "file.h" + +#define cttyvp(p) ((p)->p_flag&SCTTY ? (p)->p_session->s_ttyvp : NULL) + +/*ARGSUSED*/ +cttyopen(dev, flag, mode, p) + dev_t dev; + int flag, mode; + struct proc *p; +{ + struct vnode *ttyvp = cttyvp(p); + int error; + + if (ttyvp == NULL) + return (ENXIO); + VOP_LOCK(ttyvp); + error = VOP_ACCESS(ttyvp, + (flag&FREAD ? VREAD : 0) | (flag&FWRITE ? VWRITE : 0), p->p_ucred, p); + if (!error) + error = VOP_OPEN(ttyvp, flag, NOCRED, p); + VOP_UNLOCK(ttyvp); + return (error); +} + +/*ARGSUSED*/ +cttyread(dev, uio, flag) + dev_t dev; + struct uio *uio; +{ + register struct vnode *ttyvp = cttyvp(uio->uio_procp); + int error; + + if (ttyvp == NULL) + return (EIO); + VOP_LOCK(ttyvp); + error = VOP_READ(ttyvp, uio, flag, NOCRED); + VOP_UNLOCK(ttyvp); + return (error); +} + +/*ARGSUSED*/ +cttywrite(dev, uio, flag) + dev_t dev; + struct uio *uio; +{ + register struct vnode *ttyvp = cttyvp(uio->uio_procp); + int error; + + if (ttyvp == NULL) + return (EIO); + VOP_LOCK(ttyvp); + error = VOP_WRITE(ttyvp, uio, flag, NOCRED); + VOP_UNLOCK(ttyvp); + return (error); +} + +/*ARGSUSED*/ +cttyioctl(dev, cmd, addr, flag, p) + dev_t dev; + int cmd; + caddr_t addr; + int flag; + struct proc *p; +{ + struct vnode *ttyvp = cttyvp(p); + + if (ttyvp == NULL) + return (EIO); + if (cmd == TIOCNOTTY) { + if (!SESS_LEADER(p)) { + p->p_flag &= ~SCTTY; + return (0); + } else + return (EINVAL); + } + return (VOP_IOCTL(ttyvp, cmd, addr, flag, NOCRED, p)); +} + +/*ARGSUSED*/ +cttyselect(dev, flag, p) + dev_t dev; + int flag; + struct proc *p; +{ + struct vnode *ttyvp = cttyvp(p); + + if (ttyvp == NULL) + return (1); /* try operation to get EOF/failure */ + return (VOP_SELECT(ttyvp, flag, FREAD|FWRITE, NOCRED, p)); +} diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c new file mode 100644 index 000000000000..41a132996202 --- /dev/null +++ b/sys/kern/uipc_domain.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 1982, 1986 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)uipc_domain.c 7.9 (Berkeley) 3/4/91 + * $Id: uipc_domain.c,v 1.2 1993/10/16 15:25:06 rgrimes Exp $ + */ + +#include <sys/cdefs.h> +#include "param.h" +#include "socket.h" +#include "protosw.h" +#include "domain.h" +#include "mbuf.h" +#include "time.h" +#include "kernel.h" + +#define ADDDOMAIN(x) { \ + extern struct domain __CONCAT(x,domain); \ + __CONCAT(x,domain.dom_next) = domains; \ + domains = &__CONCAT(x,domain); \ +} + +domaininit() +{ + register struct domain *dp; + register struct protosw *pr; + +#undef unix +#ifndef lint + ADDDOMAIN(unix); + ADDDOMAIN(route); +#ifdef INET + ADDDOMAIN(inet); +#endif +#ifdef NS + ADDDOMAIN(ns); +#endif +#ifdef ISO + ADDDOMAIN(iso); +#endif +#ifdef RMP + ADDDOMAIN(rmp); +#endif +#ifdef CCITT + ADDDOMAIN(ccitt); +#endif +#include "imp.h" +#if NIMP > 0 + ADDDOMAIN(imp); +#endif +#endif + + for (dp = domains; dp; dp = dp->dom_next) { + if (dp->dom_init) + (*dp->dom_init)(); + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_init) + (*pr->pr_init)(); + } + +if (max_linkhdr < 16) /* XXX */ +max_linkhdr = 16; + max_hdr = max_linkhdr + max_protohdr; + max_datalen = MHLEN - max_hdr; + pffasttimo(); + pfslowtimo(); +} + +struct protosw * +pffindtype(family, type) + int family, type; +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + if (dp->dom_family == family) + goto found; + return (0); +found: + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_type && pr->pr_type == type) + return (pr); + return (0); +} + +struct protosw * +pffindproto(family, protocol, type) + int family, protocol, type; +{ + register struct domain *dp; + register struct protosw *pr; + struct protosw *maybe = 0; + + if (family == 0) + return (0); + for (dp = domains; dp; dp = dp->dom_next) + if (dp->dom_family == family) + goto found; + return (0); +found: + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { + if ((pr->pr_protocol == protocol) && (pr->pr_type == type)) + return (pr); + + if (type == SOCK_RAW && pr->pr_type == SOCK_RAW && + pr->pr_protocol == 0 && maybe == (struct protosw *)0) + maybe = pr; + } + return (maybe); +} + +pfctlinput(cmd, sa) + int cmd; + struct sockaddr *sa; +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_ctlinput) + (*pr->pr_ctlinput)(cmd, sa, (caddr_t) 0); +} + +pfslowtimo() +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_slowtimo) + (*pr->pr_slowtimo)(); + timeout(pfslowtimo, (caddr_t)0, hz/2); +} + +pffasttimo() +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_fasttimo) + (*pr->pr_fasttimo)(); + timeout(pffasttimo, (caddr_t)0, hz/5); +} diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c new file mode 100644 index 000000000000..552653436890 --- /dev/null +++ b/sys/kern/uipc_mbuf.c @@ -0,0 +1,518 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)uipc_mbuf.c 7.19 (Berkeley) 4/20/91 + * $Id: uipc_mbuf.c,v 1.3 1993/10/16 15:25:07 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "proc.h" +#include "malloc.h" +#define MBTYPES +#include "mbuf.h" +#include "kernel.h" +#include "syslog.h" +#include "domain.h" +#include "protosw.h" +#include "vm/vm.h" + +extern vm_map_t mb_map; +struct mbuf *mbutl; +char *mclrefcnt; + +mbinit() +{ + int s; + +#if CLBYTES < 4096 +#define NCL_INIT (4096/CLBYTES) +#else +#define NCL_INIT 1 +#endif + s = splimp(); + if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) + goto bad; + splx(s); + return; +bad: + panic("mbinit"); +} + +/* + * Allocate some number of mbuf clusters + * and place on cluster free list. + * Must be called at splimp. + */ +/* ARGSUSED */ +m_clalloc(ncl, how) /* 31 Aug 92*/ + register int ncl; +{ + int npg, mbx; + register caddr_t p; + register int i; + static int logged; + + npg = ncl * CLSIZE; + /* 31 Aug 92*/ + p = (caddr_t)kmem_malloc(mb_map, ctob(npg), !(how&M_DONTWAIT)); + if (p == NULL) { + if (logged == 0) { + logged++; + log(LOG_ERR, "mb_map full\n"); + } + return (0); + } + ncl = ncl * CLBYTES / MCLBYTES; + for (i = 0; i < ncl; i++) { + ((union mcluster *)p)->mcl_next = mclfree; + mclfree = (union mcluster *)p; + p += MCLBYTES; + mbstat.m_clfree++; + } + mbstat.m_clusters += ncl; + return (1); +} + +/* + * When MGET failes, ask protocols to free space when short of memory, + * then re-attempt to allocate an mbuf. + */ +struct mbuf * +m_retry(i, t) + int i, t; +{ + register struct mbuf *m; + + m_reclaim(); +#define m_retry(i, t) (struct mbuf *)0 + MGET(m, i, t); +#undef m_retry + return (m); +} + +/* + * As above; retry an MGETHDR. + */ +struct mbuf * +m_retryhdr(i, t) + int i, t; +{ + register struct mbuf *m; + + m_reclaim(); +#define m_retryhdr(i, t) (struct mbuf *)0 + MGETHDR(m, i, t); +#undef m_retryhdr + return (m); +} + +m_reclaim() +{ + register struct domain *dp; + register struct protosw *pr; + int s = splimp(); + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_drain) + (*pr->pr_drain)(); + splx(s); + mbstat.m_drain++; +} + +/* + * Space allocation routines. + * These are also available as macros + * for critical paths. + */ +struct mbuf * +m_get(how, type) /* 31 Aug 92*/ + int how, type; +{ + register struct mbuf *m; + + MGET(m, how, type); + return (m); +} + +struct mbuf * +m_gethdr(how, type) /* 31 Aug 92*/ + int how, type; +{ + register struct mbuf *m; + + MGETHDR(m, how, type); + return (m); +} + +struct mbuf * +m_getclr(how, type) /* 31 Aug 92*/ + int how, type; +{ + register struct mbuf *m; + + MGET(m, how, type); + if (m == 0) + return (0); + bzero(mtod(m, caddr_t), MLEN); + return (m); +} + +struct mbuf * +m_free(m) + struct mbuf *m; +{ + register struct mbuf *n; + + MFREE(m, n); + return (n); +} + +m_freem(m) + register struct mbuf *m; +{ + register struct mbuf *n; + + if (m == NULL) + return; + do { + MFREE(m, n); + } while (m = n); +} + +/* + * Mbuffer utility routines. + */ + +/* + * Lesser-used path for M_PREPEND: + * allocate new mbuf to prepend to chain, + * copy junk along. + */ +struct mbuf * +m_prepend(m, len, how) + register struct mbuf *m; + int len, how; +{ + struct mbuf *mn; + + MGET(mn, how, m->m_type); + if (mn == (struct mbuf *)NULL) { + m_freem(m); + return ((struct mbuf *)NULL); + } + if (m->m_flags & M_PKTHDR) { + M_COPY_PKTHDR(mn, m); + m->m_flags &= ~M_PKTHDR; + } + mn->m_next = m; + m = mn; + if (len < MHLEN) + MH_ALIGN(m, len); + m->m_len = len; + return (m); +} + +/* + * Make a copy of an mbuf chain starting "off0" bytes from the beginning, + * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. + * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. + */ +int MCFail; + +struct mbuf * +m_copym(m, off0, len, wait) + register struct mbuf *m; + int off0, wait; + register int len; +{ + register struct mbuf *n, **np; + register int off = off0; + struct mbuf *top; + int copyhdr = 0; + + if (off < 0 || len < 0) + panic("m_copym"); + if (off == 0 && m->m_flags & M_PKTHDR) + copyhdr = 1; + while (off > 0) { + if (m == 0) + panic("m_copym"); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + np = ⊤ + top = 0; + while (len > 0) { + if (m == 0) { + if (len != M_COPYALL) + panic("m_copym"); + break; + } + MGET(n, wait, m->m_type); + *np = n; + if (n == 0) + goto nospace; + if (copyhdr) { + M_COPY_PKTHDR(n, m); + if (len == M_COPYALL) + n->m_pkthdr.len -= off0; + else + n->m_pkthdr.len = len; + copyhdr = 0; + } + n->m_len = MIN(len, m->m_len - off); + if (m->m_flags & M_EXT) { + n->m_data = m->m_data + off; + mclrefcnt[mtocl(m->m_ext.ext_buf)]++; + n->m_ext = m->m_ext; + n->m_flags |= M_EXT; + } else + bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), + (unsigned)n->m_len); + if (len != M_COPYALL) + len -= n->m_len; + off = 0; + m = m->m_next; + np = &n->m_next; + } + if (top == 0) + MCFail++; + return (top); +nospace: + m_freem(top); + MCFail++; + return (0); +} + +/* + * Copy data from an mbuf chain starting "off" bytes from the beginning, + * continuing for "len" bytes, into the indicated buffer. + */ +m_copydata(m, off, len, cp) + register struct mbuf *m; + register int off; + register int len; + caddr_t cp; +{ + register unsigned count; + + if (off < 0 || len < 0) + panic("m_copydata"); + while (off > 0) { + if (m == 0) + panic("m_copydata"); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + while (len > 0) { + if (m == 0) + panic("m_copydata"); + count = MIN(m->m_len - off, len); + bcopy(mtod(m, caddr_t) + off, cp, count); + len -= count; + cp += count; + off = 0; + m = m->m_next; + } +} + +/* + * Concatenate mbuf chain n to m. + * Both chains must be of the same type (e.g. MT_DATA). + * Any m_pkthdr is not updated. + */ +m_cat(m, n) + register struct mbuf *m, *n; +{ + while (m->m_next) + m = m->m_next; + while (n) { + if (m->m_flags & M_EXT || + m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { + /* just join the two chains */ + m->m_next = n; + return; + } + /* splat the data from one into the other */ + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (u_int)n->m_len); + m->m_len += n->m_len; + n = m_free(n); + } +} + +m_adj(mp, req_len) + struct mbuf *mp; +{ + register int len = req_len; + register struct mbuf *m; + register count; + + if ((m = mp) == NULL) + return; + if (len >= 0) { + /* + * Trim from head. + */ + while (m != NULL && len > 0) { + if (m->m_len <= len) { + len -= m->m_len; + m->m_len = 0; + m = m->m_next; + } else { + m->m_len -= len; + m->m_data += len; + len = 0; + } + } + m = mp; + if (mp->m_flags & M_PKTHDR) + m->m_pkthdr.len -= (req_len - len); + } else { + /* + * Trim from tail. Scan the mbuf chain, + * calculating its length and finding the last mbuf. + * If the adjustment only affects this mbuf, then just + * adjust and return. Otherwise, rescan and truncate + * after the remaining size. + */ + len = -len; + count = 0; + for (;;) { + count += m->m_len; + if (m->m_next == (struct mbuf *)0) + break; + m = m->m_next; + } + if (m->m_len >= len) { + m->m_len -= len; + if ((mp = m)->m_flags & M_PKTHDR) + m->m_pkthdr.len -= len; + return; + } + count -= len; + if (count < 0) + count = 0; + /* + * Correct length for chain is "count". + * Find the mbuf with last data, adjust its length, + * and toss data from remaining mbufs on chain. + */ + m = mp; + if (m->m_flags & M_PKTHDR) + m->m_pkthdr.len = count; + for (; m; m = m->m_next) { + if (m->m_len >= count) { + m->m_len = count; + break; + } + count -= m->m_len; + } + while (m = m->m_next) + m->m_len = 0; + } +} + +/* + * Rearange an mbuf chain so that len bytes are contiguous + * and in the data area of an mbuf (so that mtod and dtom + * will work for a structure of size len). Returns the resulting + * mbuf chain on success, frees it and returns null on failure. + * If there is room, it will add up to max_protohdr-len extra bytes to the + * contiguous region in an attempt to avoid being called next time. + */ +int MPFail; + +struct mbuf * +m_pullup(n, len) + register struct mbuf *n; + int len; +{ + register struct mbuf *m; + register int count; + int space; + + /* + * If first mbuf has no cluster, and has room for len bytes + * without shifting current data, pullup into it, + * otherwise allocate a new mbuf to prepend to the chain. + */ + if ((n->m_flags & M_EXT) == 0 && + n->m_data + len < &n->m_dat[MLEN] && n->m_next) { + if (n->m_len >= len) + return (n); + m = n; + n = n->m_next; + len -= m->m_len; + } else { + if (len > MHLEN) + goto bad; + MGET(m, M_DONTWAIT, n->m_type); + if (m == 0) + goto bad; + m->m_len = 0; + if (n->m_flags & M_PKTHDR) { + M_COPY_PKTHDR(m, n); + n->m_flags &= ~M_PKTHDR; + } + } + space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + do { + count = min(min(max(len, max_protohdr), space), n->m_len); + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (unsigned)count); + len -= count; + m->m_len += count; + n->m_len -= count; + space -= count; + if (n->m_len) + n->m_data += count; + else + n = m_free(n); + } while (len > 0 && n); + if (len > 0) { + (void) m_free(m); + goto bad; + } + m->m_next = n; + return (m); +bad: + m_freem(n); + MPFail++; + return (0); +} diff --git a/sys/kern/uipc_proto.c b/sys/kern/uipc_proto.c new file mode 100644 index 000000000000..4c25dab81108 --- /dev/null +++ b/sys/kern/uipc_proto.c @@ -0,0 +1,73 @@ +/*- + * Copyright (c) 1982, 1986 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)uipc_proto.c 7.6 (Berkeley) 5/9/91 + * $Id: uipc_proto.c,v 1.2 1993/10/16 15:25:09 rgrimes Exp $ + */ + +#include "param.h" +#include "socket.h" +#include "protosw.h" +#include "domain.h" +#include "mbuf.h" + +/* + * Definitions of protocols supported in the UNIX domain. + */ + +int uipc_usrreq(); +int raw_init(),raw_usrreq(),raw_input(),raw_ctlinput(); +extern struct domain unixdomain; /* or at least forward */ + +struct protosw unixsw[] = { +{ SOCK_STREAM, &unixdomain, 0, PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, + 0, 0, 0, 0, + uipc_usrreq, + 0, 0, 0, 0, +}, +{ SOCK_DGRAM, &unixdomain, 0, PR_ATOMIC|PR_ADDR|PR_RIGHTS, + 0, 0, 0, 0, + uipc_usrreq, + 0, 0, 0, 0, +}, +{ 0, 0, 0, 0, + raw_input, 0, raw_ctlinput, 0, + raw_usrreq, + raw_init, 0, 0, 0, +} +}; + +int unp_externalize(), unp_dispose(); + +struct domain unixdomain = + { AF_UNIX, "unix", 0, unp_externalize, unp_dispose, + unixsw, &unixsw[sizeof(unixsw)/sizeof(unixsw[0])] }; diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c new file mode 100644 index 000000000000..3195bcd3d4c2 --- /dev/null +++ b/sys/kern/uipc_socket.c @@ -0,0 +1,1005 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)uipc_socket.c 7.28 (Berkeley) 5/4/91 + * $Id: uipc_socket.c,v 1.8 1993/10/23 16:23:49 davidg Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "proc.h" +#include "file.h" +#include "malloc.h" +#include "mbuf.h" +#include "domain.h" +#include "kernel.h" +#include "protosw.h" +#include "socket.h" +#include "socketvar.h" +#include "resourcevar.h" + +/* + * Socket operation routines. + * These routines are called by the routines in + * sys_socket.c or from a system process, and + * implement the semantics of socket operations by + * switching out to the protocol specific routines. + */ +/*ARGSUSED*/ +socreate(dom, aso, type, proto) + struct socket **aso; + register int type; + int proto; +{ + struct proc *p = curproc; /* XXX */ + register struct protosw *prp; + register struct socket *so; + register int error; + + if (proto) + prp = pffindproto(dom, proto, type); + else + prp = pffindtype(dom, type); + if (prp == 0 || !prp->pr_usrreq) + return (EPROTONOSUPPORT); + if (prp->pr_type != type) + return (EPROTOTYPE); + MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); + bzero((caddr_t)so, sizeof(*so)); + so->so_type = type; + if (p->p_ucred->cr_uid == 0) + so->so_state = SS_PRIV; + so->so_proto = prp; + error = + (*prp->pr_usrreq)(so, PRU_ATTACH, + (struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0); + if (error) { + so->so_state |= SS_NOFDREF; + sofree(so); + return (error); + } + *aso = so; + return (0); +} + +sobind(so, nam) + struct socket *so; + struct mbuf *nam; +{ + int s = splnet(); + int error; + + error = + (*so->so_proto->pr_usrreq)(so, PRU_BIND, + (struct mbuf *)0, nam, (struct mbuf *)0); + splx(s); + return (error); +} + +solisten(so, backlog) + register struct socket *so; + int backlog; +{ + int s = splnet(), error; + + error = + (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, + (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); + if (error) { + splx(s); + return (error); + } + if (so->so_q == 0) + so->so_options |= SO_ACCEPTCONN; + if (backlog < 0) + backlog = 0; + so->so_qlimit = min(backlog, SOMAXCONN); + splx(s); + return (0); +} + +sofree(so) + register struct socket *so; +{ + + if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) + return; + if (so->so_head) { + if (!soqremque(so, 0) && !soqremque(so, 1)) + panic("sofree dq"); + so->so_head = 0; + } + sbrelease(&so->so_snd); + sorflush(so); + FREE(so, M_SOCKET); +} + +/* + * Close a socket on last file table reference removal. + * Initiate disconnect if connected. + * Free socket when disconnect complete. + */ +soclose(so) + register struct socket *so; +{ + int s = splnet(); /* conservative */ + int error = 0; + + if (so->so_options & SO_ACCEPTCONN) { + while (so->so_q0) + (void) soabort(so->so_q0); + while (so->so_q) + (void) soabort(so->so_q); + } + if (so->so_pcb == 0) + goto discard; + if (so->so_state & SS_ISCONNECTED) { + if ((so->so_state & SS_ISDISCONNECTING) == 0) { + error = sodisconnect(so); + if (error) + goto drop; + } + if (so->so_options & SO_LINGER) { + if ((so->so_state & SS_ISDISCONNECTING) && + (so->so_state & SS_NBIO)) + goto drop; + while (so->so_state & SS_ISCONNECTED) + if (error = tsleep((caddr_t)&so->so_timeo, + PSOCK | PCATCH, netcls, so->so_linger)) + break; + } + } +drop: + if (so->so_pcb) { + int error2 = + (*so->so_proto->pr_usrreq)(so, PRU_DETACH, + (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); + if (error == 0) + error = error2; + } +discard: + if (so->so_state & SS_NOFDREF) + panic("soclose: NOFDREF"); + so->so_state |= SS_NOFDREF; + sofree(so); + splx(s); + return (error); +} + +/* + * Must be called at splnet... + */ +soabort(so) + struct socket *so; +{ + + return ( + (*so->so_proto->pr_usrreq)(so, PRU_ABORT, + (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)); +} + +soaccept(so, nam) + register struct socket *so; + struct mbuf *nam; +{ + int s = splnet(); + int error; + + if ((so->so_state & SS_NOFDREF) == 0) + panic("soaccept: !NOFDREF"); + so->so_state &= ~SS_NOFDREF; + error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, + (struct mbuf *)0, nam, (struct mbuf *)0); + splx(s); + return (error); +} + +soconnect(so, nam) + register struct socket *so; + struct mbuf *nam; +{ + int s; + int error; + + if (so->so_options & SO_ACCEPTCONN) + return (EOPNOTSUPP); + s = splnet(); + /* + * If protocol is connection-based, can only connect once. + * Otherwise, if connected, try to disconnect first. + * This allows user to disconnect by connecting to, e.g., + * a null address. + */ + if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && + ((so->so_proto->pr_flags & PR_CONNREQUIRED) || + (error = sodisconnect(so)))) + error = EISCONN; + else + error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, + (struct mbuf *)0, nam, (struct mbuf *)0); + splx(s); + return (error); +} + +soconnect2(so1, so2) + register struct socket *so1; + struct socket *so2; +{ + int s = splnet(); + int error; + + error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, + (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0); + splx(s); + return (error); +} + +sodisconnect(so) + register struct socket *so; +{ + int s = splnet(); + int error; + + if ((so->so_state & SS_ISCONNECTED) == 0) { + error = ENOTCONN; + goto bad; + } + if (so->so_state & SS_ISDISCONNECTING) { + error = EALREADY; + goto bad; + } + error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, + (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); +bad: + splx(s); + return (error); +} + +/* + * Send on a socket. + * If send must go all at once and message is larger than + * send buffering, then hard error. + * Lock against other senders. + * If must go all at once and not enough room now, then + * inform user that this would block and do nothing. + * Otherwise, if nonblocking, send as much as possible. + * The data to be sent is described by "uio" if nonzero, + * otherwise by the mbuf chain "top" (which must be null + * if uio is not). Data provided in mbuf chain must be small + * enough to send all at once. + * + * Returns nonzero on error, timeout or signal; callers + * must check for short counts if EINTR/ERESTART are returned. + * Data and control buffers are freed on return. + */ +sosend(so, addr, uio, top, control, flags) + register struct socket *so; + struct mbuf *addr; + struct uio *uio; + struct mbuf *top; + struct mbuf *control; + int flags; +{ + struct proc *p = curproc; /* XXX */ + struct mbuf **mp; + register struct mbuf *m; + register long space, len, resid; + int clen = 0, error, s, dontroute, mlen; + int atomic = sosendallatonce(so) || top; + + if (uio) + resid = uio->uio_resid; + else + resid = top->m_pkthdr.len; + + /* Don't allow negative sized sends */ + if (resid < 0) + return (EINVAL); + + dontroute = + (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && + (so->so_proto->pr_flags & PR_ATOMIC); + p->p_stats->p_ru.ru_msgsnd++; + if (control) + clen = control->m_len; +#define snderr(errno) { error = errno; splx(s); goto release; } + +restart: + if (error = sblock(&so->so_snd)) + goto out; + do { + s = splnet(); + if (so->so_state & SS_CANTSENDMORE) + snderr(EPIPE); + if (so->so_error) + snderr(so->so_error); + if ((so->so_state & SS_ISCONNECTED) == 0) { + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + if ((so->so_state & SS_ISCONFIRMING) == 0 && + !(resid == 0 && clen != 0)) + snderr(ENOTCONN); + } else if (addr == 0) + snderr(EDESTADDRREQ); + } + space = sbspace(&so->so_snd); + if (flags & MSG_OOB) + space += 1024; + if (space < resid + clen && + (atomic || space < so->so_snd.sb_lowat || space < clen)) { + if (atomic && resid > so->so_snd.sb_hiwat || + clen > so->so_snd.sb_hiwat) + snderr(EMSGSIZE); + if (so->so_state & SS_NBIO) + snderr(EWOULDBLOCK); + sbunlock(&so->so_snd); + error = sbwait(&so->so_snd); + splx(s); + if (error) + goto out; + goto restart; + } + splx(s); + mp = ⊤ + space -= clen; + do { + if (uio == NULL) { + /* + * Data is prepackaged in "top". + */ + resid = 0; + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + } else do { + if (top == 0) { + MGETHDR(m, M_WAIT, MT_DATA); + mlen = MHLEN; + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; + } else { + MGET(m, M_WAIT, MT_DATA); + mlen = MLEN; + } + if (resid >= MINCLSIZE) { + MCLGET(m, M_WAIT); + if ((m->m_flags & M_EXT) == 0) + goto nopages; + mlen = MCLBYTES; + len = min(min(mlen, resid), space); + } else { +nopages: + len = min(min(mlen, resid), space); + /* + * For datagram protocols, leave room + * for protocol headers in first mbuf. + */ + if (atomic && top == 0 && len < mlen) + MH_ALIGN(m, len); + } + space -= len; + error = uiomove(mtod(m, caddr_t), (int)len, uio); + resid = uio->uio_resid; + m->m_len = len; + *mp = m; + top->m_pkthdr.len += len; + if (error) + goto release; + mp = &m->m_next; + if (resid <= 0) { + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + break; + } + } while (space > 0 && atomic); + if (dontroute) + so->so_options |= SO_DONTROUTE; + s = splnet(); /* XXX */ + error = (*so->so_proto->pr_usrreq)(so, + (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, + top, addr, control); + splx(s); + if (dontroute) + so->so_options &= ~SO_DONTROUTE; + clen = 0; + control = 0; + top = 0; + mp = ⊤ + if (error) + goto release; + } while (resid && space > 0); + } while (resid); + +release: + sbunlock(&so->so_snd); +out: + if (top) + m_freem(top); + if (control) + m_freem(control); + return (error); +} + +/* + * Implement receive operations on a socket. + * We depend on the way that records are added to the sockbuf + * by sbappend*. In particular, each record (mbufs linked through m_next) + * must begin with an address if the protocol so specifies, + * followed by an optional mbuf or mbufs containing ancillary data, + * and then zero or more mbufs of data. + * In order to avoid blocking network interrupts for the entire time here, + * we splx() while doing the actual copy to user space. + * Although the sockbuf is locked, new data may still be appended, + * and thus we must maintain consistency of the sockbuf during that time. + * + * The caller may receive the data as a single mbuf chain by supplying + * an mbuf **mp0 for use in returning the chain. The uio is then used + * only for the count in uio_resid. + */ +soreceive(so, paddr, uio, mp0, controlp, flagsp) + register struct socket *so; + struct mbuf **paddr; + struct uio *uio; + struct mbuf **mp0; + struct mbuf **controlp; + int *flagsp; +{ + struct proc *p = curproc; /* XXX */ + register struct mbuf *m, **mp; + register int flags, len, error, s, offset; + struct protosw *pr = so->so_proto; + struct mbuf *nextrecord; + int moff, type; + int orig_resid = uio->uio_resid; + + mp = mp0; + if (paddr) + *paddr = 0; + if (controlp) + *controlp = 0; + if (flagsp) + flags = *flagsp &~ MSG_EOR; + else + flags = 0; + if (flags & MSG_OOB) { + m = m_get(M_WAIT, MT_DATA); + error = (*pr->pr_usrreq)(so, PRU_RCVOOB, + m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0); + if (error) + goto bad; + do { + error = uiomove(mtod(m, caddr_t), + (int) min(uio->uio_resid, m->m_len), uio); + m = m_free(m); + } while (uio->uio_resid && error == 0 && m); +bad: + if (m) + m_freem(m); + return (error); + } + if (mp) + *mp = (struct mbuf *)0; + if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) + (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, + (struct mbuf *)0, (struct mbuf *)0); + +restart: + if (error = sblock(&so->so_rcv)) + return (error); + s = splnet(); + + m = so->so_rcv.sb_mb; + /* + * If we have less data than requested, block awaiting more + * (subject to any timeout) if: + * 1. the current count is less than the low water mark, or + * 2. MSG_WAITALL is set, and it is possible to do the entire + * receive operation at once if we block (resid <= hiwat). + * If MSG_WAITALL is set but resid is larger than the receive buffer, + * we have to do the receive in sections, and thus risk returning + * a short count if a timeout or signal occurs after we start. + */ + while (m == 0 || so->so_rcv.sb_cc < uio->uio_resid && + (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || + ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && + m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) { +#ifdef DIAGNOSTIC + if (m == 0 && so->so_rcv.sb_cc) + panic("receive 1"); +#endif + if (so->so_error) { + if (m) + break; + error = so->so_error; + if ((flags & MSG_PEEK) == 0) + so->so_error = 0; + goto release; + } + if (so->so_state & SS_CANTRCVMORE) { + if (m) + break; + else + goto release; + } + for (; m; m = m->m_next) + if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { + m = so->so_rcv.sb_mb; + goto dontblock; + } + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && + (so->so_proto->pr_flags & PR_CONNREQUIRED)) { + error = ENOTCONN; + goto release; + } + if (uio->uio_resid == 0) + goto release; + if (so->so_state & SS_NBIO) { + error = EWOULDBLOCK; + goto release; + } + sbunlock(&so->so_rcv); + error = sbwait(&so->so_rcv); + splx(s); + if (error) + return (error); + goto restart; + } +dontblock: + p->p_stats->p_ru.ru_msgrcv++; + nextrecord = m->m_nextpkt; + if (pr->pr_flags & PR_ADDR) { +#ifdef DIAGNOSTIC + if (m->m_type != MT_SONAME) + panic("receive 1a"); +#endif + orig_resid = 0; + if (flags & MSG_PEEK) { + if (paddr) + *paddr = m_copy(m, 0, m->m_len); + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + if (paddr) { + *paddr = m; + so->so_rcv.sb_mb = m->m_next; + m->m_next = 0; + m = so->so_rcv.sb_mb; + } else { + MFREE(m, so->so_rcv.sb_mb); + m = so->so_rcv.sb_mb; + } + } + } + while (m && m->m_type == MT_CONTROL && error == 0) { + if (flags & MSG_PEEK) { + if (controlp) + *controlp = m_copy(m, 0, m->m_len); + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + if (controlp) { + if (pr->pr_domain->dom_externalize && + mtod(m, struct cmsghdr *)->cmsg_type == + SCM_RIGHTS) + error = (*pr->pr_domain->dom_externalize)(m); + *controlp = m; + so->so_rcv.sb_mb = m->m_next; + m->m_next = 0; + m = so->so_rcv.sb_mb; + } else { + MFREE(m, so->so_rcv.sb_mb); + m = so->so_rcv.sb_mb; + } + } + if (controlp) { + orig_resid = 0; + controlp = &(*controlp)->m_next; + } + } + if (m) { + if ((flags & MSG_PEEK) == 0) + m->m_nextpkt = nextrecord; + type = m->m_type; + if (type == MT_OOBDATA) + flags |= MSG_OOB; + } + moff = 0; + offset = 0; + while (m && uio->uio_resid > 0 && error == 0) { + if (m->m_type == MT_OOBDATA) { + if (type != MT_OOBDATA) + break; + } else if (type == MT_OOBDATA) + break; +#ifdef DIAGNOSTIC + else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) + panic("receive 3"); +#endif + so->so_state &= ~SS_RCVATMARK; + len = uio->uio_resid; + if (so->so_oobmark && len > so->so_oobmark - offset) + len = so->so_oobmark - offset; + if (len > m->m_len - moff) + len = m->m_len - moff; + /* + * If mp is set, just pass back the mbufs. + * Otherwise copy them out via the uio, then free. + * Sockbuf must be consistent here (points to current mbuf, + * it points to next record) when we drop priority; + * we must note any additions to the sockbuf when we + * block interrupts again. + */ + if (mp == 0) { + splx(s); + error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); + s = splnet(); + } else + uio->uio_resid -= len; + if (len == m->m_len - moff) { + if (m->m_flags & M_EOR) + flags |= MSG_EOR; + if (flags & MSG_PEEK) { + m = m->m_next; + moff = 0; + } else { + nextrecord = m->m_nextpkt; + sbfree(&so->so_rcv, m); + if (mp) { + *mp = m; + mp = &m->m_next; + so->so_rcv.sb_mb = m = m->m_next; + *mp = (struct mbuf *)0; + } else { + MFREE(m, so->so_rcv.sb_mb); + m = so->so_rcv.sb_mb; + } + if (m) + m->m_nextpkt = nextrecord; + } + } else { + if (flags & MSG_PEEK) + moff += len; + else { + if (mp) + *mp = m_copym(m, 0, len, M_WAIT); + m->m_data += len; + m->m_len -= len; + so->so_rcv.sb_cc -= len; + } + } + if (so->so_oobmark) { + if ((flags & MSG_PEEK) == 0) { + so->so_oobmark -= len; + if (so->so_oobmark == 0) { + so->so_state |= SS_RCVATMARK; + break; + } + } else { + offset += len; + if (offset == so->so_oobmark) + break; + } + } + if (flags & MSG_EOR) + break; + /* + * If the MSG_WAITALL flag is set (for non-atomic socket), + * we must not quit until "uio->uio_resid == 0" or an error + * termination. If a signal/timeout occurs, return + * with a short count but without error. + * Keep sockbuf locked against other readers. + */ + while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && + !sosendallatonce(so) && !nextrecord) { + if (so->so_error || so->so_state & SS_CANTRCVMORE) + break; + error = sbwait(&so->so_rcv); + if (error) { + sbunlock(&so->so_rcv); + splx(s); + return (0); + } + if (m = so->so_rcv.sb_mb) + nextrecord = m->m_nextpkt; + } + } + + if (m && pr->pr_flags & PR_ATOMIC) { + flags |= MSG_TRUNC; + if ((flags & MSG_PEEK) == 0) + (void) sbdroprecord(&so->so_rcv); + } + if ((flags & MSG_PEEK) == 0) { + if (m == 0) + so->so_rcv.sb_mb = nextrecord; + if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) + (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, + (struct mbuf *)flags, (struct mbuf *)0, + (struct mbuf *)0); + } + if (orig_resid == uio->uio_resid && orig_resid && + (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { + sbunlock(&so->so_rcv); + splx(s); + goto restart; + } + + if (flagsp) + *flagsp |= flags; +release: + sbunlock(&so->so_rcv); + splx(s); + return (error); +} + +soshutdown(so, how) + register struct socket *so; + register int how; +{ + register struct protosw *pr = so->so_proto; + + how++; + if (how & FREAD) + sorflush(so); + if (how & FWRITE) + return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN, + (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)); + return (0); +} + +sorflush(so) + register struct socket *so; +{ + register struct sockbuf *sb = &so->so_rcv; + register struct protosw *pr = so->so_proto; + register int s; + struct sockbuf asb; + + sb->sb_flags |= SB_NOINTR; + (void) sblock(sb); + s = splimp(); + socantrcvmore(so); + sbunlock(sb); + asb = *sb; + bzero((caddr_t)sb, sizeof (*sb)); + splx(s); + if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) + (*pr->pr_domain->dom_dispose)(asb.sb_mb); + sbrelease(&asb); +} + +sosetopt(so, level, optname, m0) + register struct socket *so; + int level, optname; + struct mbuf *m0; +{ + int error = 0; + register struct mbuf *m = m0; + + if (level != SOL_SOCKET) { + if (so->so_proto && so->so_proto->pr_ctloutput) + return ((*so->so_proto->pr_ctloutput) + (PRCO_SETOPT, so, level, optname, &m0)); + error = ENOPROTOOPT; + } else { + switch (optname) { + + case SO_LINGER: + if (m == NULL || m->m_len != sizeof (struct linger)) { + error = EINVAL; + goto bad; + } + so->so_linger = mtod(m, struct linger *)->l_linger; + /* fall thru... */ + + case SO_DEBUG: + case SO_KEEPALIVE: + case SO_DONTROUTE: + case SO_USELOOPBACK: + case SO_BROADCAST: + case SO_REUSEADDR: + case SO_OOBINLINE: + if (m == NULL || m->m_len < sizeof (int)) { + error = EINVAL; + goto bad; + } + if (*mtod(m, int *)) + so->so_options |= optname; + else + so->so_options &= ~optname; + break; + + case SO_SNDBUF: + case SO_RCVBUF: + case SO_SNDLOWAT: + case SO_RCVLOWAT: + if (m == NULL || m->m_len < sizeof (int)) { + error = EINVAL; + goto bad; + } + switch (optname) { + + case SO_SNDBUF: + case SO_RCVBUF: + if (sbreserve(optname == SO_SNDBUF ? + &so->so_snd : &so->so_rcv, + (u_long) *mtod(m, int *)) == 0) { + error = ENOBUFS; + goto bad; + } + break; + + case SO_SNDLOWAT: + so->so_snd.sb_lowat = *mtod(m, int *); + break; + case SO_RCVLOWAT: + so->so_rcv.sb_lowat = *mtod(m, int *); + break; + } + break; + + case SO_SNDTIMEO: + case SO_RCVTIMEO: + { + struct timeval *tv; + short val; + + if (m == NULL || m->m_len < sizeof (*tv)) { + error = EINVAL; + goto bad; + } + tv = mtod(m, struct timeval *); + if (tv->tv_sec > SHRT_MAX / hz - hz) { + error = EDOM; + goto bad; + } + val = tv->tv_sec * hz + tv->tv_usec / tick; + + switch (optname) { + + case SO_SNDTIMEO: + so->so_snd.sb_timeo = val; + break; + case SO_RCVTIMEO: + so->so_rcv.sb_timeo = val; + break; + } + break; + } + + default: + error = ENOPROTOOPT; + break; + } + } +bad: + if (m) + (void) m_free(m); + return (error); +} + +sogetopt(so, level, optname, mp) + register struct socket *so; + int level, optname; + struct mbuf **mp; +{ + register struct mbuf *m; + + if (level != SOL_SOCKET) { + if (so->so_proto && so->so_proto->pr_ctloutput) { + return ((*so->so_proto->pr_ctloutput) + (PRCO_GETOPT, so, level, optname, mp)); + } else + return (ENOPROTOOPT); + } else { + m = m_get(M_WAIT, MT_SOOPTS); + m->m_len = sizeof (int); + + switch (optname) { + + case SO_LINGER: + m->m_len = sizeof (struct linger); + mtod(m, struct linger *)->l_onoff = + so->so_options & SO_LINGER; + mtod(m, struct linger *)->l_linger = so->so_linger; + break; + + case SO_USELOOPBACK: + case SO_DONTROUTE: + case SO_DEBUG: + case SO_KEEPALIVE: + case SO_REUSEADDR: + case SO_BROADCAST: + case SO_OOBINLINE: + *mtod(m, int *) = so->so_options & optname; + break; + + case SO_TYPE: + *mtod(m, int *) = so->so_type; + break; + + case SO_ERROR: + *mtod(m, int *) = so->so_error; + so->so_error = 0; + break; + + case SO_SNDBUF: + *mtod(m, int *) = so->so_snd.sb_hiwat; + break; + + case SO_RCVBUF: + *mtod(m, int *) = so->so_rcv.sb_hiwat; + break; + + case SO_SNDLOWAT: + *mtod(m, int *) = so->so_snd.sb_lowat; + break; + + case SO_RCVLOWAT: + *mtod(m, int *) = so->so_rcv.sb_lowat; + break; + + case SO_SNDTIMEO: + case SO_RCVTIMEO: + { + int val = (optname == SO_SNDTIMEO ? + so->so_snd.sb_timeo : so->so_rcv.sb_timeo); + + m->m_len = sizeof(struct timeval); + mtod(m, struct timeval *)->tv_sec = val / hz; + mtod(m, struct timeval *)->tv_usec = + (val % hz) / tick; + break; + } + + default: + (void)m_free(m); + return (ENOPROTOOPT); + } + *mp = m; + return (0); + } +} + +sohasoutofband(so) + register struct socket *so; +{ + struct proc *p; + + if (so->so_pgid < 0) + gsignal(-so->so_pgid, SIGURG); + else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) + psignal(p, SIGURG); + if (so->so_rcv.sb_sel) { + selwakeup(so->so_rcv.sb_sel, so->so_rcv.sb_flags & SB_COLL); + so->so_rcv.sb_sel = 0; + so->so_rcv.sb_flags &= ~SB_COLL; + } +} diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c new file mode 100644 index 000000000000..349096aeee29 --- /dev/null +++ b/sys/kern/uipc_socket2.c @@ -0,0 +1,780 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)uipc_socket2.c 7.17 (Berkeley) 5/4/91 + * $Id: uipc_socket2.c,v 1.2 1993/10/16 15:25:12 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "proc.h" +#include "file.h" +#include "buf.h" +#include "malloc.h" +#include "mbuf.h" +#include "protosw.h" +#include "socket.h" +#include "socketvar.h" + +/* + * Primitive routines for operating on sockets and socket buffers + */ + +/* strings for sleep message: */ +char netio[] = "netio"; +char netcon[] = "netcon"; +char netcls[] = "netcls"; + +u_long sb_max = SB_MAX; /* patchable */ + +/* + * Procedures to manipulate state flags of socket + * and do appropriate wakeups. Normal sequence from the + * active (originating) side is that soisconnecting() is + * called during processing of connect() call, + * resulting in an eventual call to soisconnected() if/when the + * connection is established. When the connection is torn down + * soisdisconnecting() is called during processing of disconnect() call, + * and soisdisconnected() is called when the connection to the peer + * is totally severed. The semantics of these routines are such that + * connectionless protocols can call soisconnected() and soisdisconnected() + * only, bypassing the in-progress calls when setting up a ``connection'' + * takes no time. + * + * From the passive side, a socket is created with + * two queues of sockets: so_q0 for connections in progress + * and so_q for connections already made and awaiting user acceptance. + * As a protocol is preparing incoming connections, it creates a socket + * structure queued on so_q0 by calling sonewconn(). When the connection + * is established, soisconnected() is called, and transfers the + * socket structure to so_q, making it available to accept(). + * + * If a socket is closed with sockets on either + * so_q0 or so_q, these sockets are dropped. + * + * If higher level protocols are implemented in + * the kernel, the wakeups done here will sometimes + * cause software-interrupt process scheduling. + */ + +soisconnecting(so) + register struct socket *so; +{ + + so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); + so->so_state |= SS_ISCONNECTING; +} + +soisconnected(so) + register struct socket *so; +{ + register struct socket *head = so->so_head; + + so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); + so->so_state |= SS_ISCONNECTED; + if (head && soqremque(so, 0)) { + soqinsque(head, so, 1); + sorwakeup(head); + wakeup((caddr_t)&head->so_timeo); + } else { + wakeup((caddr_t)&so->so_timeo); + sorwakeup(so); + sowwakeup(so); + } +} + +soisdisconnecting(so) + register struct socket *so; +{ + + so->so_state &= ~SS_ISCONNECTING; + so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); + wakeup((caddr_t)&so->so_timeo); + sowwakeup(so); + sorwakeup(so); +} + +soisdisconnected(so) + register struct socket *so; +{ + + so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); + so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); + wakeup((caddr_t)&so->so_timeo); + sowwakeup(so); + sorwakeup(so); +} + +/* + * When an attempt at a new connection is noted on a socket + * which accepts connections, sonewconn is called. If the + * connection is possible (subject to space constraints, etc.) + * then we allocate a new structure, propoerly linked into the + * data structure of the original socket, and return this. + * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. + * + * Currently, sonewconn() is defined as sonewconn1() in socketvar.h + * to catch calls that are missing the (new) second parameter. + */ +struct socket * +sonewconn1(head, connstatus) + register struct socket *head; + int connstatus; +{ + register struct socket *so; + int soqueue = connstatus ? 1 : 0; + + if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) + return ((struct socket *)0); + MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT); + if (so == NULL) + return ((struct socket *)0); + bzero((caddr_t)so, sizeof(*so)); + so->so_type = head->so_type; + so->so_options = head->so_options &~ SO_ACCEPTCONN; + so->so_linger = head->so_linger; + so->so_state = head->so_state | SS_NOFDREF; + so->so_proto = head->so_proto; + so->so_timeo = head->so_timeo; + so->so_pgid = head->so_pgid; + (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); + soqinsque(head, so, soqueue); + if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, + (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { + (void) soqremque(so, soqueue); + (void) free((caddr_t)so, M_SOCKET); + return ((struct socket *)0); + } + if (connstatus) { + sorwakeup(head); + wakeup((caddr_t)&head->so_timeo); + so->so_state |= connstatus; + } + return (so); +} + +soqinsque(head, so, q) + register struct socket *head, *so; + int q; +{ + + register struct socket **prev; + so->so_head = head; + if (q == 0) { + head->so_q0len++; + so->so_q0 = 0; + for (prev = &(head->so_q0); *prev; ) + prev = &((*prev)->so_q0); + } else { + head->so_qlen++; + so->so_q = 0; + for (prev = &(head->so_q); *prev; ) + prev = &((*prev)->so_q); + } + *prev = so; +} + +soqremque(so, q) + register struct socket *so; + int q; +{ + register struct socket *head, *prev, *next; + + head = so->so_head; + prev = head; + for (;;) { + next = q ? prev->so_q : prev->so_q0; + if (next == so) + break; + if (next == 0) + return (0); + prev = next; + } + if (q == 0) { + prev->so_q0 = next->so_q0; + head->so_q0len--; + } else { + prev->so_q = next->so_q; + head->so_qlen--; + } + next->so_q0 = next->so_q = 0; + next->so_head = 0; + return (1); +} + +/* + * Socantsendmore indicates that no more data will be sent on the + * socket; it would normally be applied to a socket when the user + * informs the system that no more data is to be sent, by the protocol + * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data + * will be received, and will normally be applied to the socket by a + * protocol when it detects that the peer will send no more data. + * Data queued for reading in the socket may yet be read. + */ + +socantsendmore(so) + struct socket *so; +{ + + so->so_state |= SS_CANTSENDMORE; + sowwakeup(so); +} + +socantrcvmore(so) + struct socket *so; +{ + + so->so_state |= SS_CANTRCVMORE; + sorwakeup(so); +} + +/* + * Socket select/wakeup routines. + */ + +/* + * Queue a process for a select on a socket buffer. + */ +sbselqueue(sb, cp) + struct sockbuf *sb; + struct proc *cp; +{ + struct proc *p; + + if (sb->sb_sel && (p = pfind(sb->sb_sel)) && p->p_wchan == (caddr_t)&selwait) + sb->sb_flags |= SB_COLL; + else { + sb->sb_sel = cp->p_pid; + sb->sb_flags |= SB_SEL; + } +} + +/* + * Wait for data to arrive at/drain from a socket buffer. + */ +sbwait(sb) + struct sockbuf *sb; +{ + + sb->sb_flags |= SB_WAIT; + return (tsleep((caddr_t)&sb->sb_cc, + (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, + sb->sb_timeo)); +} + +/* + * Lock a sockbuf already known to be locked; + * return any error returned from sleep (EINTR). + */ +sb_lock(sb) + register struct sockbuf *sb; +{ + int error; + + while (sb->sb_flags & SB_LOCK) { + sb->sb_flags |= SB_WANT; + if (error = tsleep((caddr_t)&sb->sb_flags, + (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, + netio, 0)) + return (error); + } + sb->sb_flags |= SB_LOCK; + return (0); +} + +/* + * Wakeup processes waiting on a socket buffer. + * Do asynchronous notification via SIGIO + * if the socket has the SS_ASYNC flag set. + */ +sowakeup(so, sb) + register struct socket *so; + register struct sockbuf *sb; +{ + struct proc *p; + + if (sb->sb_sel) { + selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); + sb->sb_sel = 0; + sb->sb_flags &= ~(SB_SEL|SB_COLL); + } + if (sb->sb_flags & SB_WAIT) { + sb->sb_flags &= ~SB_WAIT; + wakeup((caddr_t)&sb->sb_cc); + } + if (so->so_state & SS_ASYNC) { + if (so->so_pgid < 0) + gsignal(-so->so_pgid, SIGIO); + else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) + psignal(p, SIGIO); + } +} + +/* + * Socket buffer (struct sockbuf) utility routines. + * + * Each socket contains two socket buffers: one for sending data and + * one for receiving data. Each buffer contains a queue of mbufs, + * information about the number of mbufs and amount of data in the + * queue, and other fields allowing select() statements and notification + * on data availability to be implemented. + * + * Data stored in a socket buffer is maintained as a list of records. + * Each record is a list of mbufs chained together with the m_next + * field. Records are chained together with the m_nextpkt field. The upper + * level routine soreceive() expects the following conventions to be + * observed when placing information in the receive buffer: + * + * 1. If the protocol requires each message be preceded by the sender's + * name, then a record containing that name must be present before + * any associated data (mbuf's must be of type MT_SONAME). + * 2. If the protocol supports the exchange of ``access rights'' (really + * just additional data associated with the message), and there are + * ``rights'' to be received, then a record containing this data + * should be present (mbuf's must be of type MT_RIGHTS). + * 3. If a name or rights record exists, then it must be followed by + * a data record, perhaps of zero length. + * + * Before using a new socket structure it is first necessary to reserve + * buffer space to the socket, by calling sbreserve(). This should commit + * some of the available buffer space in the system buffer pool for the + * socket (currently, it does nothing but enforce limits). The space + * should be released by calling sbrelease() when the socket is destroyed. + */ + +soreserve(so, sndcc, rcvcc) + register struct socket *so; + u_long sndcc, rcvcc; +{ + + if (sbreserve(&so->so_snd, sndcc) == 0) + goto bad; + if (sbreserve(&so->so_rcv, rcvcc) == 0) + goto bad2; + if (so->so_rcv.sb_lowat == 0) + so->so_rcv.sb_lowat = 1; + if (so->so_snd.sb_lowat == 0) + so->so_snd.sb_lowat = MCLBYTES; + if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) + so->so_snd.sb_lowat = so->so_snd.sb_hiwat; + return (0); +bad2: + sbrelease(&so->so_snd); +bad: + return (ENOBUFS); +} + +/* + * Allot mbufs to a sockbuf. + * Attempt to scale mbmax so that mbcnt doesn't become limiting + * if buffering efficiency is near the normal case. + */ +sbreserve(sb, cc) + struct sockbuf *sb; + u_long cc; +{ + + if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES)) + return (0); + sb->sb_hiwat = cc; + sb->sb_mbmax = min(cc * 2, sb_max); + if (sb->sb_lowat > sb->sb_hiwat) + sb->sb_lowat = sb->sb_hiwat; + return (1); +} + +/* + * Free mbufs held by a socket, and reserved mbuf space. + */ +sbrelease(sb) + struct sockbuf *sb; +{ + + sbflush(sb); + sb->sb_hiwat = sb->sb_mbmax = 0; +} + +/* + * Routines to add and remove + * data from an mbuf queue. + * + * The routines sbappend() or sbappendrecord() are normally called to + * append new mbufs to a socket buffer, after checking that adequate + * space is available, comparing the function sbspace() with the amount + * of data to be added. sbappendrecord() differs from sbappend() in + * that data supplied is treated as the beginning of a new record. + * To place a sender's address, optional access rights, and data in a + * socket receive buffer, sbappendaddr() should be used. To place + * access rights and data in a socket receive buffer, sbappendrights() + * should be used. In either case, the new data begins a new record. + * Note that unlike sbappend() and sbappendrecord(), these routines check + * for the caller that there will be enough space to store the data. + * Each fails if there is not enough space, or if it cannot find mbufs + * to store additional information in. + * + * Reliable protocols may use the socket send buffer to hold data + * awaiting acknowledgement. Data is normally copied from a socket + * send buffer in a protocol with m_copy for output to a peer, + * and then removing the data from the socket buffer with sbdrop() + * or sbdroprecord() when the data is acknowledged by the peer. + */ + +/* + * Append mbuf chain m to the last record in the + * socket buffer sb. The additional space associated + * the mbuf chain is recorded in sb. Empty mbufs are + * discarded and mbufs are compacted where possible. + */ +sbappend(sb, m) + struct sockbuf *sb; + struct mbuf *m; +{ + register struct mbuf *n; + + if (m == 0) + return; + if (n = sb->sb_mb) { + while (n->m_nextpkt) + n = n->m_nextpkt; + do { + if (n->m_flags & M_EOR) { + sbappendrecord(sb, m); /* XXXXXX!!!! */ + return; + } + } while (n->m_next && (n = n->m_next)); + } + sbcompress(sb, m, n); +} + +#ifdef SOCKBUF_DEBUG +sbcheck(sb) + register struct sockbuf *sb; +{ + register struct mbuf *m; + register int len = 0, mbcnt = 0; + + for (m = sb->sb_mb; m; m = m->m_next) { + len += m->m_len; + mbcnt += MSIZE; + if (m->m_flags & M_EXT) + mbcnt += m->m_ext.ext_size; + if (m->m_nextpkt) + panic("sbcheck nextpkt"); + } + if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { + printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc, + mbcnt, sb->sb_mbcnt); + panic("sbcheck"); + } +} +#endif + +/* + * As above, except the mbuf chain + * begins a new record. + */ +sbappendrecord(sb, m0) + register struct sockbuf *sb; + register struct mbuf *m0; +{ + register struct mbuf *m; + + if (m0 == 0) + return; + if (m = sb->sb_mb) + while (m->m_nextpkt) + m = m->m_nextpkt; + /* + * Put the first mbuf on the queue. + * Note this permits zero length records. + */ + sballoc(sb, m0); + if (m) + m->m_nextpkt = m0; + else + sb->sb_mb = m0; + m = m0->m_next; + m0->m_next = 0; + if (m && (m0->m_flags & M_EOR)) { + m0->m_flags &= ~M_EOR; + m->m_flags |= M_EOR; + } + sbcompress(sb, m, m0); +} + +/* + * As above except that OOB data + * is inserted at the beginning of the sockbuf, + * but after any other OOB data. + */ +sbinsertoob(sb, m0) + register struct sockbuf *sb; + register struct mbuf *m0; +{ + register struct mbuf *m; + register struct mbuf **mp; + + if (m0 == 0) + return; + for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) { + again: + switch (m->m_type) { + + case MT_OOBDATA: + continue; /* WANT next train */ + + case MT_CONTROL: + if (m = m->m_next) + goto again; /* inspect THIS train further */ + } + break; + } + /* + * Put the first mbuf on the queue. + * Note this permits zero length records. + */ + sballoc(sb, m0); + m0->m_nextpkt = *mp; + *mp = m0; + m = m0->m_next; + m0->m_next = 0; + if (m && (m0->m_flags & M_EOR)) { + m0->m_flags &= ~M_EOR; + m->m_flags |= M_EOR; + } + sbcompress(sb, m, m0); +} + +/* + * Append address and data, and optionally, control (ancillary) data + * to the receive queue of a socket. If present, + * m0 must include a packet header with total length. + * Returns 0 if no space in sockbuf or insufficient mbufs. + */ +sbappendaddr(sb, asa, m0, control) + register struct sockbuf *sb; + struct sockaddr *asa; + struct mbuf *m0, *control; +{ + register struct mbuf *m, *n; + int space = asa->sa_len; + +if (m0 && (m0->m_flags & M_PKTHDR) == 0) +panic("sbappendaddr"); + if (m0) + space += m0->m_pkthdr.len; + for (n = control; n; n = n->m_next) { + space += n->m_len; + if (n->m_next == 0) /* keep pointer to last control buf */ + break; + } + if (space > sbspace(sb)) + return (0); + if (asa->sa_len > MLEN) + return (0); + MGET(m, M_DONTWAIT, MT_SONAME); + if (m == 0) + return (0); + m->m_len = asa->sa_len; + bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len); + if (n) + n->m_next = m0; /* concatenate data to control */ + else + control = m0; + m->m_next = control; + for (n = m; n; n = n->m_next) + sballoc(sb, n); + if (n = sb->sb_mb) { + while (n->m_nextpkt) + n = n->m_nextpkt; + n->m_nextpkt = m; + } else + sb->sb_mb = m; + return (1); +} + +sbappendcontrol(sb, m0, control) + struct sockbuf *sb; + struct mbuf *control, *m0; +{ + register struct mbuf *m, *n; + int space = 0; + + if (control == 0) + panic("sbappendcontrol"); + for (m = control; ; m = m->m_next) { + space += m->m_len; + if (m->m_next == 0) + break; + } + n = m; /* save pointer to last control buffer */ + for (m = m0; m; m = m->m_next) + space += m->m_len; + if (space > sbspace(sb)) + return (0); + n->m_next = m0; /* concatenate data to control */ + for (m = control; m; m = m->m_next) + sballoc(sb, m); + if (n = sb->sb_mb) { + while (n->m_nextpkt) + n = n->m_nextpkt; + n->m_nextpkt = control; + } else + sb->sb_mb = control; + return (1); +} + +/* + * Compress mbuf chain m into the socket + * buffer sb following mbuf n. If n + * is null, the buffer is presumed empty. + */ +sbcompress(sb, m, n) + register struct sockbuf *sb; + register struct mbuf *m, *n; +{ + register int eor = 0; + register struct mbuf *o; + + while (m) { + eor |= m->m_flags & M_EOR; + if (m->m_len == 0 && + (eor == 0 || + (((o = m->m_next) || (o = n)) && + o->m_type == m->m_type))) { + m = m_free(m); + continue; + } + if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 && + (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] && + n->m_type == m->m_type) { + bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, + (unsigned)m->m_len); + n->m_len += m->m_len; + sb->sb_cc += m->m_len; + m = m_free(m); + continue; + } + if (n) + n->m_next = m; + else + sb->sb_mb = m; + sballoc(sb, m); + n = m; + m->m_flags &= ~M_EOR; + m = m->m_next; + n->m_next = 0; + } + if (eor) { + if (n) + n->m_flags |= eor; + else + printf("semi-panic: sbcompress\n"); + } +} + +/* + * Free all mbufs in a sockbuf. + * Check that all resources are reclaimed. + */ +sbflush(sb) + register struct sockbuf *sb; +{ + + if (sb->sb_flags & SB_LOCK) + panic("sbflush"); + while (sb->sb_mbcnt) + sbdrop(sb, (int)sb->sb_cc); + if (sb->sb_cc || sb->sb_mb) + panic("sbflush 2"); +} + +/* + * Drop data from (the front of) a sockbuf. + */ +sbdrop(sb, len) + register struct sockbuf *sb; + register int len; +{ + register struct mbuf *m, *mn; + struct mbuf *next; + + next = (m = sb->sb_mb) ? m->m_nextpkt : 0; + while (len > 0) { + if (m == 0) { + if (next == 0) + panic("sbdrop"); + m = next; + next = m->m_nextpkt; + continue; + } + if (m->m_len > len) { + m->m_len -= len; + m->m_data += len; + sb->sb_cc -= len; + break; + } + len -= m->m_len; + sbfree(sb, m); + MFREE(m, mn); + m = mn; + } + while (m && m->m_len == 0) { + sbfree(sb, m); + MFREE(m, mn); + m = mn; + } + if (m) { + sb->sb_mb = m; + m->m_nextpkt = next; + } else + sb->sb_mb = next; +} + +/* + * Drop a record off the front of a sockbuf + * and move the next record to the front. + */ +sbdroprecord(sb) + register struct sockbuf *sb; +{ + register struct mbuf *m, *mn; + + m = sb->sb_mb; + if (m) { + sb->sb_mb = m->m_nextpkt; + do { + sbfree(sb, m); + MFREE(m, mn); + } while (m = mn); + } +} diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c new file mode 100644 index 000000000000..cf82c089a5bc --- /dev/null +++ b/sys/kern/uipc_syscalls.c @@ -0,0 +1,1287 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1990 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)uipc_syscalls.c 7.24 (Berkeley) 6/3/91 + * $Id: uipc_syscalls.c,v 1.4 1993/10/16 15:25:14 rgrimes Exp $ + */ + +#include "param.h" +#include "filedesc.h" +#include "proc.h" +#include "file.h" +#include "buf.h" +#include "malloc.h" +#include "mbuf.h" +#include "protosw.h" +#include "socket.h" +#include "socketvar.h" +#ifdef KTRACE +#include "ktrace.h" +#endif + +/* + * System call interface to the socket abstraction. + */ + +extern struct fileops socketops; + +struct socket_args { + int domain; + int type; + int protocol; +}; + +socket(p, uap, retval) + struct proc *p; + register struct socket_args *uap; + int *retval; +{ + struct filedesc *fdp = p->p_fd; + struct socket *so; + struct file *fp; + int fd, error; + + if (error = falloc(p, &fp, &fd)) + return (error); + fp->f_flag = FREAD|FWRITE; + fp->f_type = DTYPE_SOCKET; + fp->f_ops = &socketops; + if (error = socreate(uap->domain, &so, uap->type, uap->protocol)) { + fdp->fd_ofiles[fd] = 0; + ffree(fp); + } else { + fp->f_data = (caddr_t)so; + *retval = fd; + } + return (error); +} + +struct bind_args { + int s; + caddr_t name; + int namelen; +}; + +/* ARGSUSED */ +bind(p, uap, retval) + struct proc *p; + register struct bind_args *uap; + int *retval; +{ + struct file *fp; + struct mbuf *nam; + int error; + + if (error = getsock(p->p_fd, uap->s, &fp)) + return (error); + if (error = sockargs(&nam, uap->name, uap->namelen, MT_SONAME)) + return (error); + error = sobind((struct socket *)fp->f_data, nam); + m_freem(nam); + return (error); +} + +struct listen_args { + int s; + int backlog; +}; + +/* ARGSUSED */ +listen(p, uap, retval) + struct proc *p; + register struct listen_args *uap; + int *retval; +{ + struct file *fp; + int error; + + if (error = getsock(p->p_fd, uap->s, &fp)) + return (error); + return (solisten((struct socket *)fp->f_data, uap->backlog)); +} + +#ifdef COMPAT_43 + +struct accept_args { + int s; + caddr_t name; + int *anamelen; + int compat_43; +}; + +accept(p, uap, retval) + struct proc *p; + struct accept_args *uap; + int *retval; +{ + + uap->compat_43 = 0; + return (accept1(p, uap, retval)); +} + +struct oaccept_args { + int s; + caddr_t name; + int *anamelen; + int compat_43; +}; + +oaccept(p, uap, retval) + struct proc *p; + struct oaccept_args *uap; + int *retval; +{ + + uap->compat_43 = 1; + return (accept1(p, uap, retval)); +} +#else /* COMPAT_43 */ + +#define accept1 accept +#endif + +struct accept1_args { + int s; + caddr_t name; + int *anamelen; +#ifdef COMPAT_43 + int compat_43; +#endif +}; + +accept1(p, uap, retval) + struct proc *p; + register struct accept1_args *uap; + int *retval; +{ + struct file *fp; + struct mbuf *nam; + int namelen, error, s; + register struct socket *so; + + if (uap->name && (error = copyin((caddr_t)uap->anamelen, + (caddr_t)&namelen, sizeof (namelen)))) + return (error); + if (error = getsock(p->p_fd, uap->s, &fp)) + return (error); + s = splnet(); + so = (struct socket *)fp->f_data; + if ((so->so_options & SO_ACCEPTCONN) == 0) { + splx(s); + return (EINVAL); + } + if ((so->so_state & SS_NBIO) && so->so_qlen == 0) { + splx(s); + return (EWOULDBLOCK); + } + while (so->so_qlen == 0 && so->so_error == 0) { + if (so->so_state & SS_CANTRCVMORE) { + so->so_error = ECONNABORTED; + break; + } + if (error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, + netcon, 0)) { + splx(s); + return (error); + } + } + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + splx(s); + return (error); + } + if (error = falloc(p, &fp, retval)) { + splx(s); + return (error); + } + { struct socket *aso = so->so_q; + if (soqremque(aso, 1) == 0) + panic("accept"); + so = aso; + } + fp->f_type = DTYPE_SOCKET; + fp->f_flag = FREAD|FWRITE; + fp->f_ops = &socketops; + fp->f_data = (caddr_t)so; + nam = m_get(M_WAIT, MT_SONAME); + (void) soaccept(so, nam); + if (uap->name) { +#ifdef COMPAT_43 + if (uap->compat_43) + mtod(nam, struct osockaddr *)->sa_family = + mtod(nam, struct sockaddr *)->sa_family; +#endif + if (namelen > nam->m_len) + namelen = nam->m_len; + /* SHOULD COPY OUT A CHAIN HERE */ + if ((error = copyout(mtod(nam, caddr_t), (caddr_t)uap->name, + (u_int)namelen)) == 0) + error = copyout((caddr_t)&namelen, + (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); + } + m_freem(nam); + splx(s); + return (error); +} + +struct connect_args { + int s; + caddr_t name; + int namelen; +}; + +/* ARGSUSED */ +connect(p, uap, retval) + struct proc *p; + register struct connect_args *uap; + int *retval; +{ + struct file *fp; + register struct socket *so; + struct mbuf *nam; + int error, s; + + if (error = getsock(p->p_fd, uap->s, &fp)) + return (error); + so = (struct socket *)fp->f_data; + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) + return (EALREADY); + if (error = sockargs(&nam, uap->name, uap->namelen, MT_SONAME)) + return (error); + error = soconnect(so, nam); + if (error) + goto bad; + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + m_freem(nam); + return (EINPROGRESS); + } + s = splnet(); + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) + if (error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, + netcon, 0)) + break; + if (error == 0) { + error = so->so_error; + so->so_error = 0; + } + splx(s); +bad: + so->so_state &= ~SS_ISCONNECTING; + m_freem(nam); + if (error == ERESTART) + error = EINTR; + return (error); +} + +struct socketpair_args { + int domain; + int type; + int protocol; + int *rsv; +}; + +socketpair(p, uap, retval) + struct proc *p; + register struct socketpair_args *uap; + int retval[]; +{ + register struct filedesc *fdp = p->p_fd; + struct file *fp1, *fp2; + struct socket *so1, *so2; + int fd, error, sv[2]; + + if (error = socreate(uap->domain, &so1, uap->type, uap->protocol)) + return (error); + if (error = socreate(uap->domain, &so2, uap->type, uap->protocol)) + goto free1; + if (error = falloc(p, &fp1, &fd)) + goto free2; + sv[0] = fd; + fp1->f_flag = FREAD|FWRITE; + fp1->f_type = DTYPE_SOCKET; + fp1->f_ops = &socketops; + fp1->f_data = (caddr_t)so1; + if (error = falloc(p, &fp2, &fd)) + goto free3; + fp2->f_flag = FREAD|FWRITE; + fp2->f_type = DTYPE_SOCKET; + fp2->f_ops = &socketops; + fp2->f_data = (caddr_t)so2; + sv[1] = fd; + if (error = soconnect2(so1, so2)) + goto free4; + if (uap->type == SOCK_DGRAM) { + /* + * Datagram socket connection is asymmetric. + */ + if (error = soconnect2(so2, so1)) + goto free4; + } + error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); + retval[0] = sv[0]; /* XXX ??? */ + retval[1] = sv[1]; /* XXX ??? */ + return (error); +free4: + ffree(fp2); + fdp->fd_ofiles[sv[1]] = 0; +free3: + ffree(fp1); + fdp->fd_ofiles[sv[0]] = 0; +free2: + (void)soclose(so2); +free1: + (void)soclose(so1); + return (error); +} + +struct sendto_args { + int s; + caddr_t buf; + int len; + int flags; + caddr_t to; + int tolen; +}; + +sendto(p, uap, retval) + struct proc *p; + register struct sendto_args *uap; + int *retval; +{ + struct msghdr msg; + struct iovec aiov; + int error; + + msg.msg_name = uap->to; + msg.msg_namelen = uap->tolen; + msg.msg_iov = &aiov; + msg.msg_iovlen = 1; + msg.msg_control = 0; +#ifdef COMPAT_43 + msg.msg_flags = 0; +#endif + aiov.iov_base = uap->buf; + aiov.iov_len = uap->len; + return (sendit(p, uap->s, &msg, uap->flags, retval)); +} + +#ifdef COMPAT_43 + +struct osend_args { + int s; + caddr_t buf; + int len; + int flags; +}; + +osend(p, uap, retval) + struct proc *p; + register struct osend_args *uap; + int *retval; +{ + struct msghdr msg; + struct iovec aiov; + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &aiov; + msg.msg_iovlen = 1; + aiov.iov_base = uap->buf; + aiov.iov_len = uap->len; + msg.msg_control = 0; + msg.msg_flags = 0; + return (sendit(p, uap->s, &msg, uap->flags, retval)); +} + +#define MSG_COMPAT 0x8000 + +struct osendmsg_args { + int s; + caddr_t msg; + int flags; +}; + +osendmsg(p, uap, retval) + struct proc *p; + register struct osendmsg_args *uap; + int *retval; +{ + struct msghdr msg; + struct iovec aiov[UIO_SMALLIOV], *iov; + int error; + + if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr))) + return (error); + if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { + if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) + return (EMSGSIZE); + MALLOC(iov, struct iovec *, + sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, + M_WAITOK); + } else + iov = aiov; + if (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, + (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))) + goto done; + msg.msg_flags = MSG_COMPAT; + msg.msg_iov = iov; + error = sendit(p, uap->s, &msg, uap->flags, retval); +done: + if (iov != aiov) + FREE(iov, M_IOV); + return (error); +} +#endif + +struct sendmsg_args { + int s; + caddr_t msg; + int flags; +}; + +sendmsg(p, uap, retval) + struct proc *p; + register struct sendmsg_args *uap; + int *retval; +{ + struct msghdr msg; + struct iovec aiov[UIO_SMALLIOV], *iov; + int error; + + if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg))) + return (error); + if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { + if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) + return (EMSGSIZE); + MALLOC(iov, struct iovec *, + sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, + M_WAITOK); + } else + iov = aiov; + if (msg.msg_iovlen && + (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, + (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) + goto done; + msg.msg_iov = iov; +#ifdef COMPAT_43 + msg.msg_flags = 0; +#endif + error = sendit(p, uap->s, &msg, uap->flags, retval); +done: + if (iov != aiov) + FREE(iov, M_IOV); + return (error); +} + +sendit(p, s, mp, flags, retsize) + register struct proc *p; + int s; + register struct msghdr *mp; + int flags, *retsize; +{ + struct file *fp; + struct uio auio; + register struct iovec *iov; + register int i; + struct mbuf *to, *control; + int len, error; +#ifdef KTRACE + struct iovec *ktriov = NULL; +#endif + + if (error = getsock(p->p_fd, s, &fp)) + return (error); + auio.uio_iov = mp->msg_iov; + auio.uio_iovcnt = mp->msg_iovlen; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_procp = p; + auio.uio_offset = 0; /* XXX */ + auio.uio_resid = 0; + iov = mp->msg_iov; + for (i = 0; i < mp->msg_iovlen; i++, iov++) { + if (iov->iov_len < 0) + return (EINVAL); + if ((auio.uio_resid += iov->iov_len) < 0) + return (EINVAL); + } + if (mp->msg_name) { + if (error = sockargs(&to, mp->msg_name, mp->msg_namelen, + MT_SONAME)) + return (error); + } else + to = 0; + if (mp->msg_control) { + if (mp->msg_controllen < sizeof(struct cmsghdr) +#ifdef COMPAT_43 + && mp->msg_flags != MSG_COMPAT +#endif + ) { + error = EINVAL; + goto bad; + } + if (error = sockargs(&control, mp->msg_control, + mp->msg_controllen, MT_CONTROL)) + goto bad; +#ifdef COMPAT_43 + if (mp->msg_flags == MSG_COMPAT) { + register struct cmsghdr *cm; + + M_PREPEND(control, sizeof(*cm), M_WAIT); + if (control == 0) { + error = ENOBUFS; + goto bad; + } else { + cm = mtod(control, struct cmsghdr *); + cm->cmsg_len = control->m_len; + cm->cmsg_level = SOL_SOCKET; + cm->cmsg_type = SCM_RIGHTS; + } + } +#endif + } else + control = 0; +#ifdef KTRACE + if (KTRPOINT(p, KTR_GENIO)) { + int iovlen = auio.uio_iovcnt * sizeof (struct iovec); + + MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); + bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); + } +#endif + len = auio.uio_resid; + if (error = sosend((struct socket *)fp->f_data, to, &auio, + (struct mbuf *)0, control, flags)) { + if (auio.uio_resid != len && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + if (error == EPIPE) + psignal(p, SIGPIPE); + } + if (error == 0) + *retsize = len - auio.uio_resid; +#ifdef KTRACE + if (ktriov != NULL) { + if (error == 0) + ktrgenio(p->p_tracep, s, UIO_WRITE, + ktriov, *retsize, error); + FREE(ktriov, M_TEMP); + } +#endif +bad: + if (to) + m_freem(to); + return (error); +} + +#ifdef COMPAT_43 + +struct orecvfrom_args { + int s; + caddr_t buf; + int len; + int flags; + caddr_t from; + int *fromlenaddr; +}; + +orecvfrom(p, uap, retval) + struct proc *p; + struct orecvfrom_args *uap; + int *retval; +{ + + uap->flags |= MSG_COMPAT; + return (recvfrom(p, uap, retval)); +} +#endif + +struct recvfrom_args { + int s; + caddr_t buf; + int len; + int flags; + caddr_t from; + int *fromlenaddr; +}; + +recvfrom(p, uap, retval) + struct proc *p; + register struct recvfrom_args *uap; + int *retval; +{ + struct msghdr msg; + struct iovec aiov; + int error; + + if (uap->fromlenaddr) { + if (error = copyin((caddr_t)uap->fromlenaddr, + (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen))) + return (error); + } else + msg.msg_namelen = 0; + msg.msg_name = uap->from; + msg.msg_iov = &aiov; + msg.msg_iovlen = 1; + aiov.iov_base = uap->buf; + aiov.iov_len = uap->len; + msg.msg_control = 0; + msg.msg_flags = uap->flags; + return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr, retval)); +} + +#ifdef COMPAT_43 + +struct orecv_args { + int s; + caddr_t buf; + int len; + int flags; +}; + +orecv(p, uap, retval) + struct proc *p; + register struct orecv_args *uap; + int *retval; +{ + struct msghdr msg; + struct iovec aiov; + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &aiov; + msg.msg_iovlen = 1; + aiov.iov_base = uap->buf; + aiov.iov_len = uap->len; + msg.msg_control = 0; + msg.msg_flags = uap->flags; + return (recvit(p, uap->s, &msg, (caddr_t)0, retval)); +} + +/* + * Old recvmsg. This code takes advantage of the fact that the old msghdr + * overlays the new one, missing only the flags, and with the (old) access + * rights where the control fields are now. + */ + +struct orecvmsg_args { + int s; + struct omsghdr *msg; + int flags; +}; + +orecvmsg(p, uap, retval) + struct proc *p; + register struct orecvmsg_args *uap; + int *retval; +{ + struct msghdr msg; + struct iovec aiov[UIO_SMALLIOV], *iov; + int error; + + if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg, + sizeof (struct omsghdr))) + return (error); + if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { + if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) + return (EMSGSIZE); + MALLOC(iov, struct iovec *, + sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, + M_WAITOK); + } else + iov = aiov; + msg.msg_flags = uap->flags | MSG_COMPAT; + if (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, + (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))) + goto done; + msg.msg_iov = iov; + error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen, retval); + + if (msg.msg_controllen && error == 0) + error = copyout((caddr_t)&msg.msg_controllen, + (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); +done: + if (iov != aiov) + FREE(iov, M_IOV); + return (error); +} +#endif + +struct recvmsg_args { + int s; + struct msghdr *msg; + int flags; +}; + +recvmsg(p, uap, retval) + struct proc *p; + register struct recvmsg_args *uap; + int *retval; +{ + struct msghdr msg; + struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; + register int error; + + if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg))) + return (error); + if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { + if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) + return (EMSGSIZE); + MALLOC(iov, struct iovec *, + sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, + M_WAITOK); + } else + iov = aiov; +#ifdef COMPAT_43 + msg.msg_flags = uap->flags &~ MSG_COMPAT; +#else + msg.msg_flags = uap->flags; +#endif + uiov = msg.msg_iov; + msg.msg_iov = iov; + if (error = copyin((caddr_t)uiov, (caddr_t)iov, + (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))) + goto done; + if ((error = recvit(p, uap->s, &msg, (caddr_t)0, retval)) == 0) { + msg.msg_iov = uiov; + error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); + } +done: + if (iov != aiov) + FREE(iov, M_IOV); + return (error); +} + +recvit(p, s, mp, namelenp, retsize) + register struct proc *p; + int s; + register struct msghdr *mp; + caddr_t namelenp; + int *retsize; +{ + struct file *fp; + struct uio auio; + register struct iovec *iov; + register int i; + int len, error; + struct mbuf *from = 0, *control = 0; +#ifdef KTRACE + struct iovec *ktriov = NULL; +#endif + + if (error = getsock(p->p_fd, s, &fp)) + return (error); + auio.uio_iov = mp->msg_iov; + auio.uio_iovcnt = mp->msg_iovlen; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_rw = UIO_READ; + auio.uio_procp = p; + auio.uio_offset = 0; /* XXX */ + auio.uio_resid = 0; + iov = mp->msg_iov; + for (i = 0; i < mp->msg_iovlen; i++, iov++) { + if (iov->iov_len < 0) + return (EINVAL); + if ((auio.uio_resid += iov->iov_len) < 0) + return (EINVAL); + } +#ifdef KTRACE + if (KTRPOINT(p, KTR_GENIO)) { + int iovlen = auio.uio_iovcnt * sizeof (struct iovec); + + MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); + bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); + } +#endif + len = auio.uio_resid; + if (error = soreceive((struct socket *)fp->f_data, &from, &auio, + (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, + &mp->msg_flags)) { + if (auio.uio_resid != len && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + } +#ifdef KTRACE + if (ktriov != NULL) { + if (error == 0) + ktrgenio(p->p_tracep, s, UIO_READ, + ktriov, len - auio.uio_resid, error); + FREE(ktriov, M_TEMP); + } +#endif + if (error) + goto out; + *retsize = len - auio.uio_resid; + if (mp->msg_name) { + len = mp->msg_namelen; + if (len <= 0 || from == 0) + len = 0; + else { +#ifdef COMPAT_43 + if (mp->msg_flags & MSG_COMPAT) + mtod(from, struct osockaddr *)->sa_family = + mtod(from, struct sockaddr *)->sa_family; +#endif + if (len > from->m_len) + len = from->m_len; + /* else if len < from->m_len ??? */ + if (error = copyout(mtod(from, caddr_t), + (caddr_t)mp->msg_name, (unsigned)len)) + goto out; + } + mp->msg_namelen = len; + if (namelenp && + (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { +#ifdef COMPAT_43 + if (mp->msg_flags & MSG_COMPAT) + error = 0; /* old recvfrom didn't check */ + else +#endif + goto out; + } + } + if (mp->msg_control) { +#ifdef COMPAT_43 + /* + * We assume that old recvmsg calls won't receive access + * rights and other control info, esp. as control info + * is always optional and those options didn't exist in 4.3. + * If we receive rights, trim the cmsghdr; anything else + * is tossed. + */ + if (control && mp->msg_flags & MSG_COMPAT) { + if (mtod(control, struct cmsghdr *)->cmsg_level != + SOL_SOCKET || + mtod(control, struct cmsghdr *)->cmsg_type != + SCM_RIGHTS) { + mp->msg_controllen = 0; + goto out; + } + control->m_len -= sizeof (struct cmsghdr); + control->m_data += sizeof (struct cmsghdr); + } +#endif + len = mp->msg_controllen; + if (len <= 0 || control == 0) + len = 0; + else { + if (len >= control->m_len) + len = control->m_len; + else + mp->msg_flags |= MSG_CTRUNC; + error = copyout((caddr_t)mtod(control, caddr_t), + (caddr_t)mp->msg_control, (unsigned)len); + } + mp->msg_controllen = len; + } +out: + if (from) + m_freem(from); + if (control) + m_freem(control); + return (error); +} + +struct shutdown_args { + int s; + int how; +}; + +/* ARGSUSED */ +shutdown(p, uap, retval) + struct proc *p; + register struct shutdown_args *uap; + int *retval; +{ + struct file *fp; + int error; + + if (error = getsock(p->p_fd, uap->s, &fp)) + return (error); + return (soshutdown((struct socket *)fp->f_data, uap->how)); +} + +struct setsocketopt_args { + int s; + int level; + int name; + caddr_t val; + int valsize; +}; + +/* ARGSUSED */ +setsockopt(p, uap, retval) + struct proc *p; + register struct setsocketopt_args *uap; + int *retval; +{ + struct file *fp; + struct mbuf *m = NULL; + int error; + + if (error = getsock(p->p_fd, uap->s, &fp)) + return (error); + if (uap->valsize > MLEN) + return (EINVAL); + if (uap->val) { + m = m_get(M_WAIT, MT_SOOPTS); + if (m == NULL) + return (ENOBUFS); + if (error = copyin(uap->val, mtod(m, caddr_t), + (u_int)uap->valsize)) { + (void) m_free(m); + return (error); + } + m->m_len = uap->valsize; + } + return (sosetopt((struct socket *)fp->f_data, uap->level, + uap->name, m)); +} + +struct getsockopt_args { + int s; + int level; + int name; + caddr_t val; + int *avalsize; +}; + +/* ARGSUSED */ +getsockopt(p, uap, retval) + struct proc *p; + register struct getsockopt_args *uap; + int *retval; +{ + struct file *fp; + struct mbuf *m = NULL; + int valsize, error; + + if (error = getsock(p->p_fd, uap->s, &fp)) + return (error); + if (uap->val) { + if (error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, + sizeof (valsize))) + return (error); + } else + valsize = 0; + if ((error = sogetopt((struct socket *)fp->f_data, uap->level, + uap->name, &m)) == 0 && uap->val && valsize && m != NULL) { + if (valsize > m->m_len) + valsize = m->m_len; + error = copyout(mtod(m, caddr_t), uap->val, (u_int)valsize); + if (error == 0) + error = copyout((caddr_t)&valsize, + (caddr_t)uap->avalsize, sizeof (valsize)); + } + if (m != NULL) + (void) m_free(m); + return (error); +} + +/* ARGSUSED */ +pipe(p, uap, retval) + struct proc *p; + struct args *uap; + int retval[]; +{ + register struct filedesc *fdp = p->p_fd; + struct file *rf, *wf; + struct socket *rso, *wso; + int fd, error; + + if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) + return (error); + if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) + goto free1; + if (error = falloc(p, &rf, &fd)) + goto free2; + retval[0] = fd; + rf->f_flag = FREAD; + rf->f_type = DTYPE_SOCKET; + rf->f_ops = &socketops; + rf->f_data = (caddr_t)rso; + if (error = falloc(p, &wf, &fd)) + goto free3; + wf->f_flag = FWRITE; + wf->f_type = DTYPE_SOCKET; + wf->f_ops = &socketops; + wf->f_data = (caddr_t)wso; + retval[1] = fd; + if (error = unp_connect2(wso, rso)) + goto free4; + return (0); +free4: + ffree(wf); + fdp->fd_ofiles[retval[1]] = 0; +free3: + ffree(rf); + fdp->fd_ofiles[retval[0]] = 0; +free2: + (void)soclose(wso); +free1: + (void)soclose(rso); + return (error); +} + +/* + * Get socket name. + */ +#ifdef COMPAT_43 + +struct getsockname_args { + int fdes; + caddr_t asa; + int *alen; + int compat_43; +}; + +getsockname(p, uap, retval) + struct proc *p; + struct getsockname_args *uap; + int *retval; +{ + + uap->compat_43 = 0; + return (getsockname1(p, uap, retval)); +} + +struct ogetsockname_args { + int fdes; + caddr_t asa; + int *alen; + int compat_43; +}; + +ogetsockname(p, uap, retval) + struct proc *p; + struct ogetsockname_args *uap; + int *retval; +{ + + uap->compat_43 = 1; + return (getsockname1(p, uap, retval)); +} +#else /* COMPAT_43 */ + +#define getsockname1 getsockname +#endif + +struct getsockname1_args { + int fdes; + caddr_t asa; + int *alen; +#ifdef COMPAT_43 + int compat_43; +#endif +}; + +/* ARGSUSED */ +getsockname1(p, uap, retval) + struct proc *p; + register struct getsockname1_args *uap; + int *retval; +{ + struct file *fp; + register struct socket *so; + struct mbuf *m; + int len, error; + + if (error = getsock(p->p_fd, uap->fdes, &fp)) + return (error); + if (error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len))) + return (error); + so = (struct socket *)fp->f_data; + m = m_getclr(M_WAIT, MT_SONAME); + if (m == NULL) + return (ENOBUFS); + if (error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0)) + goto bad; + if (len > m->m_len) + len = m->m_len; +#ifdef COMPAT_43 + if (uap->compat_43) + mtod(m, struct osockaddr *)->sa_family = + mtod(m, struct sockaddr *)->sa_family; +#endif + error = copyout(mtod(m, caddr_t), (caddr_t)uap->asa, (u_int)len); + if (error == 0) + error = copyout((caddr_t)&len, (caddr_t)uap->alen, + sizeof (len)); +bad: + m_freem(m); + return (error); +} + +/* + * Get name of peer for connected socket. + */ +#ifdef COMPAT_43 + +struct getpeername_args { + int fdes; + caddr_t asa; + int *alen; + int compat_43; +}; + +getpeername(p, uap, retval) + struct proc *p; + struct getpeername_args *uap; + int *retval; +{ + + uap->compat_43 = 0; + return (getpeername1(p, uap, retval)); +} + +struct ogetpeername_args { + int fdes; + caddr_t asa; + int *alen; + int compat_43; +}; + +ogetpeername(p, uap, retval) + struct proc *p; + struct ogetpeername_args *uap; + int *retval; +{ + + uap->compat_43 = 1; + return (getpeername1(p, uap, retval)); +} +#else /* COMPAT_43 */ + +#define getpeername1 getpeername +#endif + +struct getpeername1_args { + int fdes; + caddr_t asa; + int *alen; +#ifdef COMPAT_43 + int compat_43; +#endif +}; + +/* ARGSUSED */ +getpeername1(p, uap, retval) + struct proc *p; + register struct getpeername1_args *uap; + int *retval; +{ + struct file *fp; + register struct socket *so; + struct mbuf *m; + int len, error; + + if (error = getsock(p->p_fd, uap->fdes, &fp)) + return (error); + so = (struct socket *)fp->f_data; + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) + return (ENOTCONN); + m = m_getclr(M_WAIT, MT_SONAME); + if (m == NULL) + return (ENOBUFS); + if (error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len))) + return (error); + if (error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0)) + goto bad; + if (len > m->m_len) + len = m->m_len; +#ifdef COMPAT_43 + if (uap->compat_43) + mtod(m, struct osockaddr *)->sa_family = + mtod(m, struct sockaddr *)->sa_family; +#endif + if (error = copyout(mtod(m, caddr_t), (caddr_t)uap->asa, (u_int)len)) + goto bad; + error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); +bad: + m_freem(m); + return (error); +} + +sockargs(mp, buf, buflen, type) + struct mbuf **mp; + caddr_t buf; + int buflen, type; +{ + register struct mbuf *m; + int error; + + if ((u_int)buflen > MLEN) { +#ifdef COMPAT_43 + if (type == MT_SONAME && (u_int)buflen <= 112) + buflen = MLEN; /* unix domain compat. hack */ + else +#endif + return (EINVAL); + } + m = m_get(M_WAIT, type); + if (m == NULL) + return (ENOBUFS); + m->m_len = buflen; + error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); + if (error) { + (void) m_free(m); + return(error); + } + *mp = m; + if (type == MT_SONAME) { + register struct sockaddr *sa = mtod(m, struct sockaddr *); + +#if defined(COMPAT_43) && BYTE_ORDER != BIG_ENDIAN + if (sa->sa_family == 0 && sa->sa_len < AF_MAX) + sa->sa_family = sa->sa_len; +#endif + sa->sa_len = buflen; + } + return (0); +} + +getsock(fdp, fdes, fpp) + struct filedesc *fdp; + int fdes; + struct file **fpp; +{ + register struct file *fp; + + if ((unsigned)fdes >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fdes]) == NULL) + return (EBADF); + if (fp->f_type != DTYPE_SOCKET) + return (ENOTSOCK); + *fpp = fp; + return (0); +} diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c new file mode 100644 index 000000000000..6914ffeac913 --- /dev/null +++ b/sys/kern/uipc_usrreq.c @@ -0,0 +1,779 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1991 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)uipc_usrreq.c 7.26 (Berkeley) 6/3/91 + * $Id: uipc_usrreq.c,v 1.4 1993/10/23 16:34:45 davidg Exp $ + */ + +#include "param.h" +#include "proc.h" +#include "filedesc.h" +#include "domain.h" +#include "protosw.h" +#include "socket.h" +#include "socketvar.h" +#include "unpcb.h" +#include "un.h" +#include "namei.h" +#include "vnode.h" +#include "file.h" +#include "stat.h" +#include "mbuf.h" + +/* + * Unix communications domain. + * + * TODO: + * SEQPACKET, RDM + * rethink name space problems + * need a proper out-of-band + */ +struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; +ino_t unp_ino; /* prototype for fake inode numbers */ + +/*ARGSUSED*/ +uipc_usrreq(so, req, m, nam, control) + struct socket *so; + int req; + struct mbuf *m, *nam, *control; +{ + struct unpcb *unp = sotounpcb(so); + register struct socket *so2; + register int error = 0; + struct proc *p = curproc; /* XXX */ + + if (req == PRU_CONTROL) + return (EOPNOTSUPP); + if (req != PRU_SEND && control && control->m_len) { + error = EOPNOTSUPP; + goto release; + } + if (unp == 0 && req != PRU_ATTACH) { + error = EINVAL; + goto release; + } + switch (req) { + + case PRU_ATTACH: + if (unp) { + error = EISCONN; + break; + } + error = unp_attach(so); + break; + + case PRU_DETACH: + unp_detach(unp); + break; + + case PRU_BIND: + error = unp_bind(unp, nam, p); + break; + + case PRU_LISTEN: + if (unp->unp_vnode == 0) + error = EINVAL; + break; + + case PRU_CONNECT: + error = unp_connect(so, nam, p); + break; + + case PRU_CONNECT2: + error = unp_connect2(so, (struct socket *)nam); + break; + + case PRU_DISCONNECT: + unp_disconnect(unp); + break; + + case PRU_ACCEPT: + /* + * Pass back name of connected socket, + * if it was bound and we are still connected + * (our peer may have closed already!). + */ + if (unp->unp_conn && unp->unp_conn->unp_addr) { + nam->m_len = unp->unp_conn->unp_addr->m_len; + bcopy(mtod(unp->unp_conn->unp_addr, caddr_t), + mtod(nam, caddr_t), (unsigned)nam->m_len); + } else { + nam->m_len = sizeof(sun_noname); + *(mtod(nam, struct sockaddr *)) = sun_noname; + } + break; + + case PRU_SHUTDOWN: + socantsendmore(so); + unp_shutdown(unp); + break; + + case PRU_RCVD: + switch (so->so_type) { + + case SOCK_DGRAM: + panic("uipc 1"); + /*NOTREACHED*/ + + case SOCK_STREAM: +#define rcv (&so->so_rcv) +#define snd (&so2->so_snd) + if (unp->unp_conn == 0) + break; + so2 = unp->unp_conn->unp_socket; + /* + * Adjust backpressure on sender + * and wakeup any waiting to write. + */ + snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; + unp->unp_mbcnt = rcv->sb_mbcnt; + snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; + unp->unp_cc = rcv->sb_cc; + sowwakeup(so2); +#undef snd +#undef rcv + break; + + default: + panic("uipc 2"); + } + break; + + case PRU_SEND: + if (control && (error = unp_internalize(control, p))) + break; + switch (so->so_type) { + + case SOCK_DGRAM: { + struct sockaddr *from; + + if (nam) { + if (unp->unp_conn) { + error = EISCONN; + break; + } + error = unp_connect(so, nam, p); + if (error) + break; + } else { + if (unp->unp_conn == 0) { + error = ENOTCONN; + break; + } + } + so2 = unp->unp_conn->unp_socket; + if (unp->unp_addr) + from = mtod(unp->unp_addr, struct sockaddr *); + else + from = &sun_noname; + if (sbappendaddr(&so2->so_rcv, from, m, control)) { + sorwakeup(so2); + m = 0; + control = 0; + } else + error = ENOBUFS; + if (nam) + unp_disconnect(unp); + break; + } + + case SOCK_STREAM: +#define rcv (&so2->so_rcv) +#define snd (&so->so_snd) + if (so->so_state & SS_CANTSENDMORE) { + error = EPIPE; + break; + } + if (unp->unp_conn == 0) + panic("uipc 3"); + so2 = unp->unp_conn->unp_socket; + /* + * Send to paired receive port, and then reduce + * send buffer hiwater marks to maintain backpressure. + * Wake up readers. + */ + if (control) { + if (sbappendcontrol(rcv, m, control)) + control = 0; + } else + sbappend(rcv, m); + snd->sb_mbmax -= + rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; + unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; + snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; + unp->unp_conn->unp_cc = rcv->sb_cc; + sorwakeup(so2); + m = 0; +#undef snd +#undef rcv + break; + + default: + panic("uipc 4"); + } + break; + + case PRU_ABORT: + unp_drop(unp, ECONNABORTED); + break; + + case PRU_SENSE: + ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; + if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { + so2 = unp->unp_conn->unp_socket; + ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; + } + ((struct stat *) m)->st_dev = NODEV; + if (unp->unp_ino == 0) + unp->unp_ino = unp_ino++; + ((struct stat *) m)->st_ino = unp->unp_ino; + return (0); + + case PRU_RCVOOB: + return (EOPNOTSUPP); + + case PRU_SENDOOB: + error = EOPNOTSUPP; + break; + + case PRU_SOCKADDR: + if (unp->unp_addr) { + nam->m_len = unp->unp_addr->m_len; + bcopy(mtod(unp->unp_addr, caddr_t), + mtod(nam, caddr_t), (unsigned)nam->m_len); + } else + nam->m_len = 0; + break; + + case PRU_PEERADDR: + if (unp->unp_conn && unp->unp_conn->unp_addr) { + nam->m_len = unp->unp_conn->unp_addr->m_len; + bcopy(mtod(unp->unp_conn->unp_addr, caddr_t), + mtod(nam, caddr_t), (unsigned)nam->m_len); + } else + nam->m_len = 0; + break; + + case PRU_SLOWTIMO: + break; + + default: + panic("piusrreq"); + } +release: + if (control) + m_freem(control); + if (m) + m_freem(m); + return (error); +} + +/* + * Both send and receive buffers are allocated PIPSIZ bytes of buffering + * for stream sockets, although the total for sender and receiver is + * actually only PIPSIZ. + * Datagram sockets really use the sendspace as the maximum datagram size, + * and don't really want to reserve the sendspace. Their recvspace should + * be large enough for at least one max-size datagram plus address. + */ +#define PIPSIZ 4096 +u_long unpst_sendspace = PIPSIZ; +u_long unpst_recvspace = PIPSIZ; +u_long unpdg_sendspace = 2*1024; /* really max datagram size */ +u_long unpdg_recvspace = 4*1024; + +int unp_rights; /* file descriptors in flight */ + +unp_attach(so) + struct socket *so; +{ + register struct mbuf *m; + register struct unpcb *unp; + int error; + + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { + switch (so->so_type) { + + case SOCK_STREAM: + error = soreserve(so, unpst_sendspace, unpst_recvspace); + break; + + case SOCK_DGRAM: + error = soreserve(so, unpdg_sendspace, unpdg_recvspace); + break; + } + if (error) + return (error); + } + m = m_getclr(M_DONTWAIT, MT_PCB); + if (m == NULL) + return (ENOBUFS); + unp = mtod(m, struct unpcb *); + so->so_pcb = (caddr_t)unp; + unp->unp_socket = so; + return (0); +} + +unp_detach(unp) + register struct unpcb *unp; +{ + + if (unp->unp_vnode) { + unp->unp_vnode->v_socket = 0; + vrele(unp->unp_vnode); + unp->unp_vnode = 0; + } + if (unp->unp_conn) + unp_disconnect(unp); + while (unp->unp_refs) + unp_drop(unp->unp_refs, ECONNRESET); + soisdisconnected(unp->unp_socket); + unp->unp_socket->so_pcb = 0; + m_freem(unp->unp_addr); + (void) m_free(dtom(unp)); + if (unp_rights) { + /* + * Normally the receive buffer is flushed later, + * in sofree, but if our receive buffer holds references + * to descriptors that are now garbage, we will dispose + * of those descriptor references after the garbage collector + * gets them (resulting in a "panic: closef: count < 0"). + */ + sorflush(unp->unp_socket); + unp_gc(); + } +} + +unp_bind(unp, nam, p) + struct unpcb *unp; + struct mbuf *nam; + struct proc *p; +{ + struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); + register struct vnode *vp; + register struct nameidata *ndp; + struct vattr vattr; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_dirp = soun->sun_path; + if (unp->unp_vnode != NULL) + return (EINVAL); + if (nam->m_len == MLEN) { + if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0) + return (EINVAL); + } else + *(mtod(nam, caddr_t) + nam->m_len) = 0; +/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ + ndp->ni_nameiop = CREATE | FOLLOW | LOCKPARENT; + ndp->ni_segflg = UIO_SYSSPACE; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp != NULL) { + VOP_ABORTOP(ndp); + if (ndp->ni_dvp == vp) + vrele(ndp->ni_dvp); + else + vput(ndp->ni_dvp); + vrele(vp); + return (EADDRINUSE); + } + VATTR_NULL(&vattr); + vattr.va_type = VSOCK; + vattr.va_mode = 0777; + if (error = VOP_CREATE(ndp, &vattr, p)) + return (error); + vp = ndp->ni_vp; + vp->v_socket = unp->unp_socket; + unp->unp_vnode = vp; + unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL); + VOP_UNLOCK(vp); + return (0); +} + +unp_connect(so, nam, p) + struct socket *so; + struct mbuf *nam; + struct proc *p; +{ + register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); + register struct vnode *vp; + register struct socket *so2, *so3; + register struct nameidata *ndp; + struct unpcb *unp2, *unp3; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_dirp = soun->sun_path; + if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */ + if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0) + return (EMSGSIZE); + } else + *(mtod(nam, caddr_t) + nam->m_len) = 0; + ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; + ndp->ni_segflg = UIO_SYSSPACE; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_type != VSOCK) { + error = ENOTSOCK; + goto bad; + } + if (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) + goto bad; + so2 = vp->v_socket; + if (so2 == 0) { + error = ECONNREFUSED; + goto bad; + } + if (so->so_type != so2->so_type) { + error = EPROTOTYPE; + goto bad; + } + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + if ((so2->so_options & SO_ACCEPTCONN) == 0 || + (so3 = sonewconn(so2, 0)) == 0) { + error = ECONNREFUSED; + goto bad; + } + unp2 = sotounpcb(so2); + unp3 = sotounpcb(so3); + if (unp2->unp_addr) + unp3->unp_addr = + m_copy(unp2->unp_addr, 0, (int)M_COPYALL); + so2 = so3; + } + error = unp_connect2(so, so2); +bad: + vput(vp); + return (error); +} + +unp_connect2(so, so2) + register struct socket *so; + register struct socket *so2; +{ + register struct unpcb *unp = sotounpcb(so); + register struct unpcb *unp2; + + if (so2->so_type != so->so_type) + return (EPROTOTYPE); + unp2 = sotounpcb(so2); + unp->unp_conn = unp2; + switch (so->so_type) { + + case SOCK_DGRAM: + unp->unp_nextref = unp2->unp_refs; + unp2->unp_refs = unp; + soisconnected(so); + break; + + case SOCK_STREAM: + unp2->unp_conn = unp; + soisconnected(so); + soisconnected(so2); + break; + + default: + panic("unp_connect2"); + } + return (0); +} + +unp_disconnect(unp) + struct unpcb *unp; +{ + register struct unpcb *unp2 = unp->unp_conn; + + if (unp2 == 0) + return; + unp->unp_conn = 0; + switch (unp->unp_socket->so_type) { + + case SOCK_DGRAM: + if (unp2->unp_refs == unp) + unp2->unp_refs = unp->unp_nextref; + else { + unp2 = unp2->unp_refs; + for (;;) { + if (unp2 == 0) + panic("unp_disconnect"); + if (unp2->unp_nextref == unp) + break; + unp2 = unp2->unp_nextref; + } + unp2->unp_nextref = unp->unp_nextref; + } + unp->unp_nextref = 0; + unp->unp_socket->so_state &= ~SS_ISCONNECTED; + break; + + case SOCK_STREAM: + soisdisconnected(unp->unp_socket); + unp2->unp_conn = 0; + soisdisconnected(unp2->unp_socket); + break; + } +} + +#ifdef notdef +unp_abort(unp) + struct unpcb *unp; +{ + + unp_detach(unp); +} +#endif + +unp_shutdown(unp) + struct unpcb *unp; +{ + struct socket *so; + + if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && + (so = unp->unp_conn->unp_socket)) + socantrcvmore(so); +} + +unp_drop(unp, errno) + struct unpcb *unp; + int errno; +{ + struct socket *so = unp->unp_socket; + + so->so_error = errno; + unp_disconnect(unp); + if (so->so_head) { + so->so_pcb = (caddr_t) 0; + m_freem(unp->unp_addr); + (void) m_free(dtom(unp)); + sofree(so); + } +} + +#ifdef notdef +unp_drain() +{ + +} +#endif + +unp_externalize(rights) + struct mbuf *rights; +{ + struct proc *p = curproc; /* XXX */ + register int i; + register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); + register struct file **rp = (struct file **)(cm + 1); + register struct file *fp; + int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int); + int f; + + if (!fdavail(p, newfds)) { + for (i = 0; i < newfds; i++) { + fp = *rp; + unp_discard(fp); + *rp++ = 0; + } + return (EMSGSIZE); + } + for (i = 0; i < newfds; i++) { + if (fdalloc(p, 0, &f)) + panic("unp_externalize"); + fp = *rp; + p->p_fd->fd_ofiles[f] = fp; + fp->f_msgcount--; + unp_rights--; + *(int *)rp++ = f; + } + return (0); +} + +unp_internalize(control, p) + struct mbuf *control; + struct proc *p; +{ + struct filedesc *fdp = p->p_fd; + register struct cmsghdr *cm = mtod(control, struct cmsghdr *); + register struct file **rp; + register struct file *fp; + register int i, fd; + int oldfds; + + if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || + cm->cmsg_len != control->m_len) + return (EINVAL); + oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); + rp = (struct file **)(cm + 1); + for (i = 0; i < oldfds; i++) { + fd = *(int *)rp++; + if ((unsigned)fd >= fdp->fd_nfiles || + fdp->fd_ofiles[fd] == NULL) + return (EBADF); + } + rp = (struct file **)(cm + 1); + for (i = 0; i < oldfds; i++) { + fp = fdp->fd_ofiles[*(int *)rp]; + *rp++ = fp; + fp->f_count++; + fp->f_msgcount++; + unp_rights++; + } + return (0); +} + +int unp_defer, unp_gcing; +int unp_mark(); +extern struct domain unixdomain; + +unp_gc() +{ + register struct file *fp; + register struct socket *so; + + if (unp_gcing) + return; + unp_gcing = 1; +restart: + unp_defer = 0; + for (fp = filehead; fp; fp = fp->f_filef) + fp->f_flag &= ~(FMARK|FDEFER); + do { + for (fp = filehead; fp; fp = fp->f_filef) { + if (fp->f_count == 0) + continue; + if (fp->f_flag & FDEFER) { + fp->f_flag &= ~FDEFER; + unp_defer--; + } else { + if (fp->f_flag & FMARK) + continue; + if (fp->f_count == fp->f_msgcount) + continue; + fp->f_flag |= FMARK; + } + if (fp->f_type != DTYPE_SOCKET || + (so = (struct socket *)fp->f_data) == 0) + continue; + if (so->so_proto->pr_domain != &unixdomain || + (so->so_proto->pr_flags&PR_RIGHTS) == 0) + continue; +#ifdef notdef + if (so->so_rcv.sb_flags & SB_LOCK) { + /* + * This is problematical; it's not clear + * we need to wait for the sockbuf to be + * unlocked (on a uniprocessor, at least), + * and it's also not clear what to do + * if sbwait returns an error due to receipt + * of a signal. If sbwait does return + * an error, we'll go into an infinite + * loop. Delete all of this for now. + */ + (void) sbwait(&so->so_rcv); + goto restart; + } +#endif + unp_scan(so->so_rcv.sb_mb, unp_mark); + } + } while (unp_defer); + for (fp = filehead; fp; fp = fp->f_filef) { + if (fp->f_count == 0) + continue; + if (fp->f_count == fp->f_msgcount && (fp->f_flag & FMARK) == 0) + while (fp->f_msgcount) + unp_discard(fp); + } + unp_gcing = 0; +} + +unp_dispose(m) + struct mbuf *m; +{ + int unp_discard(); + + if (m) + unp_scan(m, unp_discard); +} + +unp_scan(m0, op) + register struct mbuf *m0; + int (*op)(); +{ + register struct mbuf *m; + register struct file **rp; + register struct cmsghdr *cm; + register int i; + int qfds; + + while (m0) { + for (m = m0; m; m = m->m_next) + if (m->m_type == MT_CONTROL && + m->m_len >= sizeof(*cm)) { + cm = mtod(m, struct cmsghdr *); + if (cm->cmsg_level != SOL_SOCKET || + cm->cmsg_type != SCM_RIGHTS) + continue; + qfds = (cm->cmsg_len - sizeof *cm) + / sizeof (struct file *); + rp = (struct file **)(cm + 1); + for (i = 0; i < qfds; i++) + (*op)(*rp++); + break; /* XXX, but saves time */ + } + m0 = m0->m_act; + } +} + +unp_mark(fp) + struct file *fp; +{ + + if (fp->f_flag & FMARK) + return; + unp_defer++; + fp->f_flag |= (FMARK|FDEFER); +} + +unp_discard(fp) + struct file *fp; +{ + + if (fp->f_msgcount == 0) + return; + fp->f_msgcount--; + unp_rights--; + (void) closef(fp, curproc); +} diff --git a/sys/kern/vfs__bio.c b/sys/kern/vfs__bio.c new file mode 100644 index 000000000000..db8c6ad98e73 --- /dev/null +++ b/sys/kern/vfs__bio.c @@ -0,0 +1,602 @@ +/* + * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This software is a component of "386BSD" developed by + William F. Jolitz, TeleMuse. + * 4. Neither the name of the developer nor the name "386BSD" + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ + * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS + * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT. + * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT + * NOT MAKE USE THIS WORK. + * + * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED + * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN + * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES + * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING + * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND + * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE + * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS + * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992. + * + * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: vfs__bio.c,v 1.7 1993/10/19 01:06:29 nate Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "proc.h" +#include "vnode.h" +#include "buf.h" +#include "specdev.h" +#include "mount.h" +#include "malloc.h" +#include "vm/vm.h" +#include "resourcevar.h" + +static struct buf *getnewbuf(int); +extern vm_map_t buffer_map; + +/* + * Initialize buffer headers and related structures. + */ +void bufinit() +{ + struct bufhd *bh; + struct buf *bp; + + /* first, make a null hash table */ + for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) { + bh->b_flags = 0; + bh->b_forw = (struct buf *)bh; + bh->b_back = (struct buf *)bh; + } + + /* next, make a null set of free lists */ + for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) { + bp->b_flags = 0; + bp->av_forw = bp; + bp->av_back = bp; + bp->b_forw = bp; + bp->b_back = bp; + } + + /* finally, initialize each buffer header and stick on empty q */ + for(bp = buf; bp < buf + nbuf ; bp++) { + bp->b_flags = B_HEAD | B_INVAL; /* we're just an empty header */ + bp->b_dev = NODEV; + bp->b_vp = 0; + binstailfree(bp, bfreelist + BQ_EMPTY); + binshash(bp, bfreelist + BQ_EMPTY); + } +} + +/* + * Find the block in the buffer pool. + * If the buffer is not present, allocate a new buffer and load + * its contents according to the filesystem fill routine. + */ +int +bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred, + struct buf **bpp) +{ + struct buf *bp; + int rv = 0; + + bp = getblk (vp, blkno, size); + + /* if not found in cache, do some I/O */ + if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) { + if (curproc && curproc->p_stats) /* count block I/O */ + curproc->p_stats->p_ru.ru_inblock++; + bp->b_flags |= B_READ; + bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); + if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/ + bp->b_rcred = cred; + VOP_STRATEGY(bp); + rv = biowait (bp); + } + *bpp = bp; + + return (rv); +} + +/* + * Operates like bread, but also starts I/O on the specified + * read-ahead block. [See page 55 of Bach's Book] + */ +int +breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize, + struct ucred *cred, struct buf **bpp) +{ + struct buf *bp, *rabp; + int rv = 0, needwait = 0; + + bp = getblk (vp, blkno, size); + + /* if not found in cache, do some I/O */ + if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) { + if (curproc && curproc->p_stats) /* count block I/O */ + curproc->p_stats->p_ru.ru_inblock++; + bp->b_flags |= B_READ; + bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); + if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/ + bp->b_rcred = cred; + VOP_STRATEGY(bp); + needwait++; + } + + rabp = getblk (vp, rablkno, rabsize); + + /* if not found in cache, do some I/O (overlapped with first) */ + if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) { + if (curproc && curproc->p_stats) /* count block I/O */ + curproc->p_stats->p_ru.ru_inblock++; + rabp->b_flags |= B_READ | B_ASYNC; + rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); + if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/ + rabp->b_rcred = cred; + VOP_STRATEGY(rabp); + } else + brelse(rabp); + + /* wait for original I/O */ + if (needwait) + rv = biowait (bp); + + *bpp = bp; + return (rv); +} + +/* + * Synchronous write. + * Release buffer on completion. + */ +int +bwrite(register struct buf *bp) +{ + int rv; + + if(bp->b_flags & B_INVAL) { + brelse(bp); + return (0); + } else { + int wasdelayed; + + if(!(bp->b_flags & B_BUSY)) + panic("bwrite: not busy"); + + wasdelayed = bp->b_flags & B_DELWRI; + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI); + if(wasdelayed) + reassignbuf(bp, bp->b_vp); + + if (curproc && curproc->p_stats) /* count block I/O */ + curproc->p_stats->p_ru.ru_oublock++; + bp->b_flags |= B_DIRTY; + bp->b_vp->v_numoutput++; + VOP_STRATEGY(bp); + rv = biowait(bp); + brelse(bp); + return (rv); + } +} + +/* + * Delayed write. + * + * The buffer is marked dirty, but is not queued for I/O. + * This routine should be used when the buffer is expected + * to be modified again soon, typically a small write that + * partially fills a buffer. + * + * NB: magnetic tapes cannot be delayed; they must be + * written in the order that the writes are requested. + */ +void +bdwrite(register struct buf *bp) +{ + + if(!(bp->b_flags & B_BUSY)) + panic("bdwrite: not busy"); + + if(bp->b_flags & B_INVAL) { + brelse(bp); + return; + } + if(bp->b_flags & B_TAPE) { + bwrite(bp); + return; + } + bp->b_flags &= ~(B_READ|B_DONE); + bp->b_flags |= B_DIRTY|B_DELWRI; + reassignbuf(bp, bp->b_vp); + brelse(bp); + return; +} + +/* + * Asynchronous write. + * Start I/O on a buffer, but do not wait for it to complete. + * The buffer is released when the I/O completes. + */ +void +bawrite(register struct buf *bp) +{ + + if(!(bp->b_flags & B_BUSY)) + panic("bawrite: not busy"); + + if(bp->b_flags & B_INVAL) + brelse(bp); + else { + int wasdelayed; + + wasdelayed = bp->b_flags & B_DELWRI; + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); + if(wasdelayed) + reassignbuf(bp, bp->b_vp); + + if (curproc && curproc->p_stats) /* count block I/O */ + curproc->p_stats->p_ru.ru_oublock++; + bp->b_flags |= B_DIRTY | B_ASYNC; + bp->b_vp->v_numoutput++; + VOP_STRATEGY(bp); + } +} + +/* + * Release a buffer. + * Even if the buffer is dirty, no I/O is started. + */ +void +brelse(register struct buf *bp) +{ + int x; + + /* anyone need a "free" block? */ + x=splbio(); + if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) { + (bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED; + wakeup(bfreelist); + } + /* anyone need this very block? */ + if (bp->b_flags & B_WANTED) { + bp->b_flags &= ~B_WANTED; + wakeup(bp); + } + + if (bp->b_flags & (B_INVAL|B_ERROR)) { + bp->b_flags |= B_INVAL; + bp->b_flags &= ~(B_DELWRI|B_CACHE); + if(bp->b_vp) + brelvp(bp); + } + + /* enqueue */ + /* just an empty buffer head ... */ + /*if(bp->b_flags & B_HEAD) + binsheadfree(bp, bfreelist + BQ_EMPTY)*/ + /* buffers with junk contents */ + /*else*/ if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE)) + binsheadfree(bp, bfreelist + BQ_AGE) + /* buffers with stale but valid contents */ + else if(bp->b_flags & B_AGE) + binstailfree(bp, bfreelist + BQ_AGE) + /* buffers with valid and quite potentially reuseable contents */ + else + binstailfree(bp, bfreelist + BQ_LRU) + + /* unlock */ + bp->b_flags &= ~B_BUSY; + splx(x); + +} + +int freebufspace; +int allocbufspace; + +/* + * Find a buffer which is available for use. + * If free memory for buffer space and an empty header from the empty list, + * use that. Otherwise, select something from a free list. + * Preference is to AGE list, then LRU list. + */ +static struct buf * +getnewbuf(int sz) +{ + struct buf *bp; + int x; + + x = splbio(); +start: + /* can we constitute a new buffer? */ + if (freebufspace > sz + && bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) { + caddr_t addr; + +/*#define notyet*/ +#ifndef notyet + if ((addr = malloc (sz, M_TEMP, M_WAITOK)) == 0) goto tryfree; +#else /* notyet */ + /* get new memory buffer */ + if (round_page(sz) == sz) + addr = (caddr_t) kmem_alloc_wired_wait(buffer_map, sz); + else + addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK); + /*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/ + bzero(addr, sz); +#endif /* notyet */ + freebufspace -= sz; + allocbufspace += sz; + + bp = bfreelist[BQ_EMPTY].av_forw; + bp->b_flags = B_BUSY | B_INVAL; + bremfree(bp); + bp->b_un.b_addr = addr; + bp->b_bufsize = sz; /* 20 Aug 92*/ + goto fillin; + } + +tryfree: + if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) { + bp = bfreelist[BQ_AGE].av_forw; + bremfree(bp); + } else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) { + bp = bfreelist[BQ_LRU].av_forw; + bremfree(bp); + } else { + /* wait for a free buffer of any kind */ + (bfreelist + BQ_AGE)->b_flags |= B_WANTED; + sleep(bfreelist, PRIBIO); + splx(x); + return (0); + } + + /* if we are a delayed write, convert to an async write! */ + if (bp->b_flags & B_DELWRI) { + bp->b_flags |= B_BUSY; + bawrite (bp); + goto start; + } + + + if(bp->b_vp) + brelvp(bp); + + /* we are not free, nor do we contain interesting data */ + if (bp->b_rcred != NOCRED) crfree(bp->b_rcred); /* 25 Apr 92*/ + if (bp->b_wcred != NOCRED) crfree(bp->b_wcred); + bp->b_flags = B_BUSY; +fillin: + bremhash(bp); + splx(x); + bp->b_dev = NODEV; + bp->b_vp = NULL; + bp->b_blkno = bp->b_lblkno = 0; + bp->b_iodone = 0; + bp->b_error = 0; + bp->b_resid = 0; + bp->b_wcred = bp->b_rcred = NOCRED; + if (bp->b_bufsize != sz) + allocbuf(bp, sz); + bp->b_bcount = bp->b_bufsize = sz; + bp->b_dirtyoff = bp->b_dirtyend = 0; + return (bp); +} + +/* + * Check to see if a block is currently memory resident. + */ +struct buf * +incore(struct vnode *vp, daddr_t blkno) +{ + struct buf *bh; + struct buf *bp; + + bh = BUFHASH(vp, blkno); + + /* Search hash chain */ + bp = bh->b_forw; + while (bp != (struct buf *) bh) { + /* hit */ + if (bp->b_lblkno == blkno && bp->b_vp == vp + && (bp->b_flags & B_INVAL) == 0) + return (bp); + bp = bp->b_forw; + } + + return(0); +} + +/* + * Get a block of requested size that is associated with + * a given vnode and block offset. If it is found in the + * block cache, mark it as having been found, make it busy + * and return it. Otherwise, return an empty block of the + * correct size. It is up to the caller to insure that the + * cached blocks be of the correct size. + */ +struct buf * +getblk(register struct vnode *vp, daddr_t blkno, int size) +{ + struct buf *bp, *bh; + int x; + + for (;;) { + if (bp = incore(vp, blkno)) { + x = splbio(); + if (bp->b_flags & B_BUSY) { + bp->b_flags |= B_WANTED; + sleep (bp, PRIBIO); + splx(x); + continue; + } + bp->b_flags |= B_BUSY | B_CACHE; + bremfree(bp); + if (size > bp->b_bufsize) + panic("now what do we do?"); + /* if (bp->b_bufsize != size) allocbuf(bp, size); */ + } else { + + if((bp = getnewbuf(size)) == 0) continue; + bp->b_blkno = bp->b_lblkno = blkno; + bgetvp(vp, bp); + x = splbio(); + bh = BUFHASH(vp, blkno); + binshash(bp, bh); + bp->b_flags = B_BUSY; + } + splx(x); + return (bp); + } +} + +/* + * Get an empty, disassociated buffer of given size. + */ +struct buf * +geteblk(int size) +{ + struct buf *bp; + int x; + + while ((bp = getnewbuf(size)) == 0) + ; + x = splbio(); + binshash(bp, bfreelist + BQ_AGE); + splx(x); + + return (bp); +} + +/* + * Exchange a buffer's underlying buffer storage for one of different + * size, taking care to maintain contents appropriately. When buffer + * increases in size, caller is responsible for filling out additional + * contents. When buffer shrinks in size, data is lost, so caller must + * first return it to backing store before shrinking the buffer, as + * no implied I/O will be done. + * + * Expanded buffer is returned as value. + */ +void +allocbuf(register struct buf *bp, int size) +{ + caddr_t newcontents; + + /* get new memory buffer */ +#ifndef notyet + newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK); +#else /* notyet */ + if (round_page(size) == size) + newcontents = (caddr_t) kmem_alloc_wired_wait(buffer_map, size); + else + newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK); +#endif /* notyet */ + + /* copy the old into the new, up to the maximum that will fit */ + bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size)); + + /* return old contents to free heap */ +#ifndef notyet + free (bp->b_un.b_addr, M_TEMP); +#else /* notyet */ + if (round_page(bp->b_bufsize) == bp->b_bufsize) + kmem_free_wakeup(buffer_map, bp->b_un.b_addr, bp->b_bufsize); + else + free (bp->b_un.b_addr, M_TEMP); +#endif /* notyet */ + + /* adjust buffer cache's idea of memory allocated to buffer contents */ + freebufspace -= size - bp->b_bufsize; + allocbufspace += size - bp->b_bufsize; + + /* update buffer header */ + bp->b_un.b_addr = newcontents; + bp->b_bcount = bp->b_bufsize = size; +} + +/* + * Patiently await operations to complete on this buffer. + * When they do, extract error value and return it. + * Extract and return any errors associated with the I/O. + * If an invalid block, force it off the lookup hash chains. + */ +int +biowait(register struct buf *bp) +{ + int x; + + x = splbio(); + while ((bp->b_flags & B_DONE) == 0) + sleep((caddr_t)bp, PRIBIO); + if((bp->b_flags & B_ERROR) || bp->b_error) { + if ((bp->b_flags & B_INVAL) == 0) { + bp->b_flags |= B_INVAL; + bremhash(bp); + binshash(bp, bfreelist + BQ_AGE); + } + if (!bp->b_error) + bp->b_error = EIO; + else + bp->b_flags |= B_ERROR; + splx(x); + return (bp->b_error); + } else { + splx(x); + return (0); + } +} + +/* + * Finish up operations on a buffer, calling an optional + * function (if requested), and releasing the buffer if + * marked asynchronous. Then mark this buffer done so that + * others biowait()'ing for it will notice when they are + * woken up from sleep(). + */ +int +biodone(register struct buf *bp) +{ + int x; + + x = splbio(); + if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp); + bp->b_flags &= ~B_CALL; + if ((bp->b_flags & (B_READ|B_DIRTY)) == B_DIRTY) { + bp->b_flags &= ~B_DIRTY; + vwakeup(bp); + } + if (bp->b_flags & B_ASYNC) + brelse(bp); + bp->b_flags &= ~B_ASYNC; + bp->b_flags |= B_DONE; + wakeup(bp); + splx(x); +} diff --git a/sys/kern/vfs_bio.old.c b/sys/kern/vfs_bio.old.c new file mode 100644 index 000000000000..a791bafcc439 --- /dev/null +++ b/sys/kern/vfs_bio.old.c @@ -0,0 +1,593 @@ +/* + * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This software is a component of "386BSD" developed by + William F. Jolitz, TeleMuse. + * 4. Neither the name of the developer nor the name "386BSD" + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ + * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS + * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT. + * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT + * NOT MAKE USE THIS WORK. + * + * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED + * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN + * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES + * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING + * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND + * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE + * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS + * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992. + * + * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: vfs_bio.old.c,v 1.2 1993/10/16 15:25:18 rgrimes Exp $ + */ + +#include "param.h" +#include "proc.h" +#include "vnode.h" +#include "buf.h" +#include "specdev.h" +#include "mount.h" +#include "malloc.h" +#ifdef notyet +#include "vm/vm.h" +#include "vm/vm_kern.h" +#endif /* notyet */ +#include "resourcevar.h" + +/* + * Initialize buffer headers and related structures. + */ +void bufinit() +{ + struct bufhd *bh; + struct buf *bp; + + /* first, make a null hash table */ + for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) { + bh->b_flags = 0; + bh->b_forw = (struct buf *)bh; + bh->b_back = (struct buf *)bh; + } + + /* next, make a null set of free lists */ + for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) { + bp->b_flags = 0; + bp->av_forw = bp; + bp->av_back = bp; + bp->b_forw = bp; + bp->b_back = bp; + } + + /* finally, initialize each buffer header and stick on empty q */ + for(bp = buf; bp < buf + nbuf ; bp++) { + bp->b_flags = B_HEAD | B_INVAL; /* we're just an empty header */ + bp->b_dev = NODEV; + bp->b_vp = 0; + binstailfree(bp, bfreelist + BQ_EMPTY); + binshash(bp, bfreelist + BQ_EMPTY); + } +} + +/* + * Find the block in the buffer pool. + * If the buffer is not present, allocate a new buffer and load + * its contents according to the filesystem fill routine. + */ +bread(vp, blkno, size, cred, bpp) + struct vnode *vp; + daddr_t blkno; + int size; + struct ucred *cred; + struct buf **bpp; +{ + struct buf *bp; + int rv = 0; + + bp = getblk (vp, blkno, size); + + /* if not found in cache, do some I/O */ + if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) { + bp->b_flags |= B_READ; + bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); + bp->b_rcred = cred; + VOP_STRATEGY(bp); + rv = biowait (bp); + } + *bpp = bp; + + return (rv); +} + +/* + * Operates like bread, but also starts I/O on the specified + * read-ahead block. [See page 55 of Bach's Book] + */ +breada(vp, blkno, size, rablkno, rabsize, cred, bpp) + struct vnode *vp; + daddr_t blkno; int size; + daddr_t rablkno; int rabsize; + struct ucred *cred; + struct buf **bpp; +{ + struct buf *bp, *rabp; + int rv = 0, needwait = 0; + + bp = getblk (vp, blkno, size); + + /* if not found in cache, do some I/O */ + if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) { + bp->b_flags |= B_READ; + bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); + bp->b_rcred = cred; + VOP_STRATEGY(bp); + needwait++; + } + + rabp = getblk (vp, rablkno, rabsize); + + /* if not found in cache, do some I/O (overlapped with first) */ + if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) { + rabp->b_flags |= B_READ | B_ASYNC; + rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); + rabp->b_rcred = cred; + VOP_STRATEGY(rabp); + } else + brelse(rabp); + + /* wait for original I/O */ + if (needwait) + rv = biowait (bp); + + *bpp = bp; + return (rv); +} + +/* + * Synchronous write. + * Release buffer on completion. + */ +bwrite(bp) + register struct buf *bp; +{ + int rv; + + if(bp->b_flags & B_INVAL) { + brelse(bp); + return (0); + } else { + int wasdelayed; + + if(!(bp->b_flags & B_BUSY))panic("bwrite: not busy"); + wasdelayed = bp->b_flags & B_DELWRI; + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI); + if(wasdelayed) reassignbuf(bp, bp->b_vp); + bp->b_flags |= B_DIRTY; + bp->b_vp->v_numoutput++; + VOP_STRATEGY(bp); + rv = biowait(bp); + brelse(bp); + return (rv); + } +} + +/* + * Delayed write. + * + * The buffer is marked dirty, but is not queued for I/O. + * This routine should be used when the buffer is expected + * to be modified again soon, typically a small write that + * partially fills a buffer. + * + * NB: magnetic tapes cannot be delayed; they must be + * written in the order that the writes are requested. + */ +void bdwrite(bp) + register struct buf *bp; +{ + + if(!(bp->b_flags & B_BUSY))panic("bdwrite: not busy"); + if(bp->b_flags & B_INVAL) { + brelse(bp); + } + if(bp->b_flags & B_TAPE) { + bwrite(bp); + return; + } + bp->b_flags &= ~(B_READ|B_DONE); + bp->b_flags |= B_DIRTY|B_DELWRI; + reassignbuf(bp, bp->b_vp); + brelse(bp); + return; +} + +/* + * Asynchronous write. + * Start I/O on a buffer, but do not wait for it to complete. + * The buffer is released when the I/O completes. + */ +bawrite(bp) + register struct buf *bp; +{ + + if(!(bp->b_flags & B_BUSY))panic("bawrite: not busy"); + if(bp->b_flags & B_INVAL) + brelse(bp); + else { + int wasdelayed; + + wasdelayed = bp->b_flags & B_DELWRI; + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); + if(wasdelayed) reassignbuf(bp, bp->b_vp); + + bp->b_flags |= B_DIRTY | B_ASYNC; + bp->b_vp->v_numoutput++; + VOP_STRATEGY(bp); + } +} + +/* + * Release a buffer. + * Even if the buffer is dirty, no I/O is started. + */ +brelse(bp) + register struct buf *bp; +{ + int x; + + /* anyone need a "free" block? */ + x=splbio(); + if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) { + (bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED; + wakeup(bfreelist); + } + /* anyone need this very block? */ + if (bp->b_flags & B_WANTED) { + bp->b_flags &= ~B_WANTED; + wakeup(bp); + } + + if (bp->b_flags & (B_INVAL|B_ERROR)) { + bp->b_flags |= B_INVAL; + bp->b_flags &= ~(B_DELWRI|B_CACHE); + if(bp->b_vp) + brelvp(bp); + } + + /* enqueue */ + /* just an empty buffer head ... */ + /*if(bp->b_flags & B_HEAD) + binsheadfree(bp, bfreelist + BQ_EMPTY)*/ + /* buffers with junk contents */ + /*else*/ if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE)) + binsheadfree(bp, bfreelist + BQ_AGE) + /* buffers with stale but valid contents */ + else if(bp->b_flags & B_AGE) + binstailfree(bp, bfreelist + BQ_AGE) + /* buffers with valid and quite potentially reuseable contents */ + else + binstailfree(bp, bfreelist + BQ_LRU) + + /* unlock */ + bp->b_flags &= ~B_BUSY; + splx(x); + + return; +} + +int freebufspace; +int allocbufspace; + +/* + * Find a buffer which is available for use. + * If free memory for buffer space and an empty header from the empty list, + * use that. Otherwise, select something from a free list. + * Preference is to AGE list, then LRU list. + */ +struct buf * +getnewbuf(sz) +{ + struct buf *bp; + int x; + + x = splbio(); +start: + /* can we constitute a new buffer? */ + if (freebufspace > sz + && bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) { + caddr_t addr; + +#ifndef notyet + if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree; +#else /* notyet */ + /* get new memory buffer */ + if (round_page(sz) == sz) + addr = (caddr_t) kmem_alloc(buffer_map, sz); + else + addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK); + /*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/ +#endif /* notyet */ + freebufspace -= sz; + allocbufspace += sz; + + bp = bfreelist[BQ_EMPTY].av_forw; + bp->b_flags = B_BUSY | B_INVAL; + bremfree(bp); +#ifndef notyet + bp->b_un.b_addr = (caddr_t) addr; +#else /* notyet */ + bp->b_un.b_addr = addr; +#endif /* notyet */ + goto fillin; + } + +tryfree: + if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) { + bp = bfreelist[BQ_AGE].av_forw; + bremfree(bp); + } else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) { + bp = bfreelist[BQ_LRU].av_forw; + bremfree(bp); + } else { + /* wait for a free buffer of any kind */ + (bfreelist + BQ_AGE)->b_flags |= B_WANTED; + sleep(bfreelist, PRIBIO); + splx(x); + return (0); + } + + /* if we are a delayed write, convert to an async write! */ + if (bp->b_flags & B_DELWRI) { + /*bp->b_flags &= ~B_DELWRI;*/ + bp->b_flags |= B_BUSY; + bawrite (bp); + goto start; + } + + /*if (bp->b_flags & (B_INVAL|B_ERROR) == 0) { + bremhash(bp); + }*/ + + if(bp->b_vp) + brelvp(bp); + + /* we are not free, nor do we contain interesting data */ + bp->b_flags = B_BUSY; +fillin: + bremhash(bp); + splx(x); + bp->b_dev = NODEV; + bp->b_vp = NULL; + bp->b_blkno = bp->b_lblkno = 0; + bp->b_iodone = 0; + bp->b_error = 0; + bp->b_wcred = bp->b_rcred = NOCRED; + if (bp->b_bufsize != sz) allocbuf(bp, sz); + bp->b_bcount = bp->b_bufsize = sz; + bp->b_dirtyoff = bp->b_dirtyend = 0; + return (bp); +} + +/* + * Check to see if a block is currently memory resident. + */ +struct buf *incore(vp, blkno) + struct vnode *vp; + daddr_t blkno; +{ + struct buf *bh; + struct buf *bp; + + bh = BUFHASH(vp, blkno); + + /* Search hash chain */ + bp = bh->b_forw; + while (bp != (struct buf *) bh) { + /* hit */ + if (bp->b_lblkno == blkno && bp->b_vp == vp + && (bp->b_flags & B_INVAL) == 0) + return (bp); + bp = bp->b_forw; + } + + return(0); +} + +/* + * Get a block of requested size that is associated with + * a given vnode and block offset. If it is found in the + * block cache, mark it as having been found, make it busy + * and return it. Otherwise, return an empty block of the + * correct size. It is up to the caller to insure that the + * cached blocks be of the correct size. + */ +struct buf * +getblk(vp, blkno, size) + register struct vnode *vp; + daddr_t blkno; + int size; +{ + struct buf *bp, *bh; + int x; + + for (;;) { + if (bp = incore(vp, blkno)) { + x = splbio(); + if (bp->b_flags & B_BUSY) { + bp->b_flags |= B_WANTED; + sleep (bp, PRIBIO); + splx(x); + continue; + } + bp->b_flags |= B_BUSY | B_CACHE; + bremfree(bp); + if (size > bp->b_bufsize) + panic("now what do we do?"); + /* if (bp->b_bufsize != size) allocbuf(bp, size); */ + } else { + + if((bp = getnewbuf(size)) == 0) continue; + bp->b_blkno = bp->b_lblkno = blkno; + bgetvp(vp, bp); + x = splbio(); + bh = BUFHASH(vp, blkno); + binshash(bp, bh); + bp->b_flags = B_BUSY; + } + splx(x); + return (bp); + } +} + +/* + * Get an empty, disassociated buffer of given size. + */ +struct buf * +geteblk(size) + int size; +{ + struct buf *bp; + int x; + + while ((bp = getnewbuf(size)) == 0) + ; + x = splbio(); + binshash(bp, bfreelist + BQ_AGE); + splx(x); + + return (bp); +} + +/* + * Exchange a buffer's underlying buffer storage for one of different + * size, taking care to maintain contents appropriately. When buffer + * increases in size, caller is responsible for filling out additional + * contents. When buffer shrinks in size, data is lost, so caller must + * first return it to backing store before shrinking the buffer, as + * no implied I/O will be done. + * + * Expanded buffer is returned as value. + */ +void +allocbuf(bp, size) + register struct buf *bp; + int size; +{ + caddr_t newcontents; + + /* get new memory buffer */ +#ifndef notyet + newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK); +#else /* notyet */ + if (round_page(size) == size) + newcontents = (caddr_t) kmem_alloc(buffer_map, size); + else + newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK); +#endif /* notyet */ + + /* copy the old into the new, up to the maximum that will fit */ + bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size)); + + /* return old contents to free heap */ +#ifndef notyet + free (bp->b_un.b_addr, M_TEMP); +#else /* notyet */ + if (round_page(bp->b_bufsize) == bp->b_bufsize) + kmem_free(buffer_map, bp->b_un.b_addr, bp->b_bufsize); + else + free (bp->b_un.b_addr, M_TEMP); +#endif /* notyet */ + + /* adjust buffer cache's idea of memory allocated to buffer contents */ + freebufspace -= size - bp->b_bufsize; + allocbufspace += size - bp->b_bufsize; + + /* update buffer header */ + bp->b_un.b_addr = newcontents; + bp->b_bcount = bp->b_bufsize = size; +} + +/* + * Patiently await operations to complete on this buffer. + * When they do, extract error value and return it. + * Extract and return any errors associated with the I/O. + * If an invalid block, force it off the lookup hash chains. + */ +biowait(bp) + register struct buf *bp; +{ + int x; + + x = splbio(); + while ((bp->b_flags & B_DONE) == 0) + sleep((caddr_t)bp, PRIBIO); + if((bp->b_flags & B_ERROR) || bp->b_error) { + if ((bp->b_flags & B_INVAL) == 0) { + bp->b_flags |= B_INVAL; + bremhash(bp); + binshash(bp, bfreelist + BQ_AGE); + } + if (!bp->b_error) + bp->b_error = EIO; + else + bp->b_flags |= B_ERROR; + splx(x); + return (bp->b_error); + } else { + splx(x); + return (0); + } +} + +/* + * Finish up operations on a buffer, calling an optional + * function (if requested), and releasing the buffer if + * marked asynchronous. Then mark this buffer done so that + * others biowait()'ing for it will notice when they are + * woken up from sleep(). + */ +biodone(bp) + register struct buf *bp; +{ + int x; + + x = splbio(); + if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp); + bp->b_flags &= ~B_CALL; + if ((bp->b_flags & (B_READ|B_DIRTY)) == B_DIRTY) { + bp->b_flags &= ~B_DIRTY; + vwakeup(bp); + } + if (bp->b_flags & B_ASYNC) + brelse(bp); + bp->b_flags &= ~B_ASYNC; + bp->b_flags |= B_DONE; + wakeup(bp); + splx(x); +} diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c new file mode 100644 index 000000000000..e34c14a04ba4 --- /dev/null +++ b/sys/kern/vfs_cache.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)vfs_cache.c 7.8 (Berkeley) 2/28/91 + * $Id: vfs_cache.c,v 1.2 1993/10/16 15:25:19 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "time.h" +#include "mount.h" +#include "vnode.h" +#include "namei.h" +#include "errno.h" +#include "malloc.h" + +/* + * Name caching works as follows: + * + * Names found by directory scans are retained in a cache + * for future reference. It is managed LRU, so frequently + * used names will hang around. Cache is indexed by hash value + * obtained from (vp, name) where vp refers to the directory + * containing name. + * + * For simplicity (and economy of storage), names longer than + * a maximum length of NCHNAMLEN are not cached; they occur + * infrequently in any case, and are almost never of interest. + * + * Upon reaching the last segment of a path, if the reference + * is for DELETE, or NOCACHE is set (rewrite), and the + * name is located in the cache, it will be dropped. + */ + +/* + * Structures associated with name cacheing. + */ +union nchash { + union nchash *nch_head[2]; + struct namecache *nch_chain[2]; +} *nchashtbl; +#define nch_forw nch_chain[0] +#define nch_back nch_chain[1] + +u_long nchash; /* size of hash table - 1 */ +long numcache; /* number of cache entries allocated */ +struct namecache *nchhead, **nchtail; /* LRU chain pointers */ +struct nchstats nchstats; /* cache effectiveness statistics */ + +int doingcache = 1; /* 1 => enable the cache */ + +/* + * Look for a the name in the cache. We don't do this + * if the segment name is long, simply so the cache can avoid + * holding long names (which would either waste space, or + * add greatly to the complexity). + * + * Lookup is called with ni_dvp pointing to the directory to search, + * ni_ptr pointing to the name of the entry being sought, ni_namelen + * tells the length of the name, and ni_hash contains a hash of + * the name. If the lookup succeeds, the vnode is returned in ni_vp + * and a status of -1 is returned. If the lookup determines that + * the name does not exist (negative cacheing), a status of ENOENT + * is returned. If the lookup fails, a status of zero is returned. + */ +cache_lookup(ndp) + register struct nameidata *ndp; +{ + register struct vnode *dvp; + register struct namecache *ncp; + union nchash *nhp; + + if (!doingcache) + return (0); + if (ndp->ni_namelen > NCHNAMLEN) { + nchstats.ncs_long++; + ndp->ni_makeentry = 0; + return (0); + } + dvp = ndp->ni_dvp; + nhp = &nchashtbl[ndp->ni_hash & nchash]; + for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp; + ncp = ncp->nc_forw) { + if (ncp->nc_dvp == dvp && + ncp->nc_dvpid == dvp->v_id && + ncp->nc_nlen == ndp->ni_namelen && + !bcmp(ncp->nc_name, ndp->ni_ptr, (unsigned)ncp->nc_nlen)) + break; + } + if (ncp == (struct namecache *)nhp) { + nchstats.ncs_miss++; + return (0); + } + if (!ndp->ni_makeentry) { + nchstats.ncs_badhits++; + } else if (ncp->nc_vp == NULL) { + if ((ndp->ni_nameiop & OPMASK) != CREATE) { + nchstats.ncs_neghits++; + /* + * Move this slot to end of LRU chain, + * if not already there. + */ + if (ncp->nc_nxt) { + /* remove from LRU chain */ + *ncp->nc_prev = ncp->nc_nxt; + ncp->nc_nxt->nc_prev = ncp->nc_prev; + /* and replace at end of it */ + ncp->nc_nxt = NULL; + ncp->nc_prev = nchtail; + *nchtail = ncp; + nchtail = &ncp->nc_nxt; + } + return (ENOENT); + } + } else if (ncp->nc_vpid != ncp->nc_vp->v_id) { + nchstats.ncs_falsehits++; + } else { + nchstats.ncs_goodhits++; + /* + * move this slot to end of LRU chain, if not already there + */ + if (ncp->nc_nxt) { + /* remove from LRU chain */ + *ncp->nc_prev = ncp->nc_nxt; + ncp->nc_nxt->nc_prev = ncp->nc_prev; + /* and replace at end of it */ + ncp->nc_nxt = NULL; + ncp->nc_prev = nchtail; + *nchtail = ncp; + nchtail = &ncp->nc_nxt; + } + ndp->ni_vp = ncp->nc_vp; + return (-1); + } + + /* + * Last component and we are renaming or deleting, + * the cache entry is invalid, or otherwise don't + * want cache entry to exist. + */ + /* remove from LRU chain */ + *ncp->nc_prev = ncp->nc_nxt; + if (ncp->nc_nxt) + ncp->nc_nxt->nc_prev = ncp->nc_prev; + else + nchtail = ncp->nc_prev; + /* remove from hash chain */ + remque(ncp); + /* insert at head of LRU list (first to grab) */ + ncp->nc_nxt = nchhead; + ncp->nc_prev = &nchhead; + nchhead->nc_prev = &ncp->nc_nxt; + nchhead = ncp; + /* and make a dummy hash chain */ + ncp->nc_forw = ncp; + ncp->nc_back = ncp; + return (0); +} + +/* + * Add an entry to the cache + */ +cache_enter(ndp) + register struct nameidata *ndp; +{ + register struct namecache *ncp; + union nchash *nhp; + + if (!doingcache) + return; + /* + * Free the cache slot at head of lru chain. + */ + if (numcache < desiredvnodes) { + ncp = (struct namecache *) + malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK); + bzero((char *)ncp, sizeof *ncp); + numcache++; + } else if (ncp = nchhead) { + /* remove from lru chain */ + *ncp->nc_prev = ncp->nc_nxt; + if (ncp->nc_nxt) + ncp->nc_nxt->nc_prev = ncp->nc_prev; + else + nchtail = ncp->nc_prev; + /* remove from old hash chain */ + remque(ncp); + } else + return; + /* grab the vnode we just found */ + ncp->nc_vp = ndp->ni_vp; + if (ndp->ni_vp) + ncp->nc_vpid = ndp->ni_vp->v_id; + else + ncp->nc_vpid = 0; + /* fill in cache info */ + ncp->nc_dvp = ndp->ni_dvp; + ncp->nc_dvpid = ndp->ni_dvp->v_id; + ncp->nc_nlen = ndp->ni_namelen; + bcopy(ndp->ni_ptr, ncp->nc_name, (unsigned)ncp->nc_nlen); + /* link at end of lru chain */ + ncp->nc_nxt = NULL; + ncp->nc_prev = nchtail; + *nchtail = ncp; + nchtail = &ncp->nc_nxt; + /* and insert on hash chain */ + nhp = &nchashtbl[ndp->ni_hash & nchash]; + insque(ncp, nhp); +} + +/* + * Name cache initialization, from vfs_init() when we are booting + */ +nchinit() +{ + register union nchash *nchp; + long nchashsize; + + nchhead = 0; + nchtail = &nchhead; + nchashsize = roundup((desiredvnodes + 1) * sizeof *nchp / 2, + NBPG * CLSIZE); + nchashtbl = (union nchash *)malloc((u_long)nchashsize, + M_CACHE, M_WAITOK); + for (nchash = 1; nchash <= nchashsize / sizeof *nchp; nchash <<= 1) + /* void */; + nchash = (nchash >> 1) - 1; + for (nchp = &nchashtbl[nchash]; nchp >= nchashtbl; nchp--) { + nchp->nch_head[0] = nchp; + nchp->nch_head[1] = nchp; + } +} + +/* + * Cache flush, a particular vnode; called when a vnode is renamed to + * hide entries that would now be invalid + */ +cache_purge(vp) + struct vnode *vp; +{ + union nchash *nhp; + struct namecache *ncp; + + vp->v_id = ++nextvnodeid; + if (nextvnodeid != 0) + return; + for (nhp = &nchashtbl[nchash]; nhp >= nchashtbl; nhp--) { + for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp; + ncp = ncp->nc_forw) { + ncp->nc_vpid = 0; + ncp->nc_dvpid = 0; + } + } + vp->v_id = ++nextvnodeid; +} + +/* + * Cache flush, a whole filesystem; called when filesys is umounted to + * remove entries that would now be invalid + * + * The line "nxtcp = nchhead" near the end is to avoid potential problems + * if the cache lru chain is modified while we are dumping the + * inode. This makes the algorithm O(n^2), but do you think I care? + */ +cache_purgevfs(mp) + struct mount *mp; +{ + register struct namecache *ncp, *nxtcp; + + for (ncp = nchhead; ncp; ncp = nxtcp) { + nxtcp = ncp->nc_nxt; + if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) + continue; + /* free the resources we had */ + ncp->nc_vp = NULL; + ncp->nc_dvp = NULL; + remque(ncp); /* remove entry from its hash chain */ + ncp->nc_forw = ncp; /* and make a dummy one */ + ncp->nc_back = ncp; + /* delete this entry from LRU chain */ + *ncp->nc_prev = nxtcp; + if (nxtcp) + nxtcp->nc_prev = ncp->nc_prev; + else + nchtail = ncp->nc_prev; + /* cause rescan of list, it may have altered */ + nxtcp = nchhead; + /* put the now-free entry at head of LRU */ + ncp->nc_nxt = nxtcp; + ncp->nc_prev = &nchhead; + nxtcp->nc_prev = &ncp->nc_nxt; + nchhead = ncp; + } +} diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c new file mode 100644 index 000000000000..574efd4b8d73 --- /dev/null +++ b/sys/kern/vfs_conf.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)vfs_conf.c 7.3 (Berkeley) 6/28/90 + * $Id: vfs_conf.c,v 1.2 1993/10/16 15:25:21 rgrimes Exp $ + */ + +#include "param.h" +#include "mount.h" + +/* + * This specifies the filesystem used to mount the root. + * This specification should be done by /etc/config. + */ +extern int ufs_mountroot(); +int (*mountroot)() = ufs_mountroot; + +/* + * These define the root filesystem and device. + */ +struct mount *rootfs; +struct vnode *rootdir; + +/* + * Set up the filesystem operations for vnodes. + * The types are defined in mount.h. + */ +extern struct vfsops ufs_vfsops; + +#ifdef NFS +extern struct vfsops nfs_vfsops; +#endif + +#ifdef MFS +extern struct vfsops mfs_vfsops; +#endif + +#ifdef PCFS +extern struct vfsops pcfs_vfsops; +#endif + +#ifdef ISOFS +extern struct vfsops isofs_vfsops; +#endif + +struct vfsops *vfssw[] = { + (struct vfsops *)0, /* 0 = MOUNT_NONE */ + &ufs_vfsops, /* 1 = MOUNT_UFS */ +#ifdef NFS + &nfs_vfsops, /* 2 = MOUNT_NFS */ +#else + (struct vfsops *)0, +#endif +#ifdef MFS + &mfs_vfsops, /* 3 = MOUNT_MFS */ +#else + (struct vfsops *)0, +#endif +#ifdef PCFS + &pcfs_vfsops, /* 4 = MOUNT_MSDOS */ +#else + (struct vfsops *)0, +#endif +#ifdef ISOFS + &isofs_vfsops, /* 5 = MOUNT_ISOFS */ +#else + (struct vfsops *)0, +#endif +}; diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c new file mode 100644 index 000000000000..1202e9de838d --- /dev/null +++ b/sys/kern/vfs_lookup.c @@ -0,0 +1,457 @@ +/* + * Copyright (c) 1982, 1986, 1989 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)vfs_lookup.c 7.32 (Berkeley) 5/21/91 + * $Id: vfs_lookup.c,v 1.2 1993/10/16 15:25:23 rgrimes Exp $ + */ + +#include "param.h" +#include "syslimits.h" +#include "time.h" +#include "namei.h" +#include "vnode.h" +#include "mount.h" +#include "errno.h" +#include "malloc.h" +#include "filedesc.h" +#include "proc.h" + +#ifdef KTRACE +#include "ktrace.h" +#endif + +/* + * Convert a pathname into a pointer to a locked inode. + * + * The FOLLOW flag is set when symbolic links are to be followed + * when they occur at the end of the name translation process. + * Symbolic links are always followed for all other pathname + * components other than the last. + * + * The segflg defines whether the name is to be copied from user + * space or kernel space. + * + * Overall outline of namei: + * + * copy in name + * get starting directory + * while (!done && !error) { + * call lookup to search path. + * if symbolic link, massage name in buffer and continue + * } + */ +namei(ndp, p) + register struct nameidata *ndp; + struct proc *p; +{ + register struct filedesc *fdp; /* pointer to file descriptor state */ + register char *cp; /* pointer into pathname argument */ + register struct vnode *dp; /* the directory we are searching */ + struct iovec aiov; /* uio for reading symbolic links */ + struct uio auio; + int error, linklen; + + ndp->ni_cred = p->p_ucred; + fdp = p->p_fd; + + /* + * Get a buffer for the name to be translated, and copy the + * name into the buffer. + */ + if ((ndp->ni_nameiop & HASBUF) == 0) + MALLOC(ndp->ni_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (ndp->ni_segflg == UIO_SYSSPACE) + error = copystr(ndp->ni_dirp, ndp->ni_pnbuf, + MAXPATHLEN, &ndp->ni_pathlen); + else + error = copyinstr(ndp->ni_dirp, ndp->ni_pnbuf, + MAXPATHLEN, &ndp->ni_pathlen); + if (error) { + free(ndp->ni_pnbuf, M_NAMEI); + ndp->ni_vp = NULL; + return (error); + } + ndp->ni_loopcnt = 0; +#ifdef KTRACE + if (KTRPOINT(p, KTR_NAMEI)) + ktrnamei(p->p_tracep, ndp->ni_pnbuf); +#endif + + /* + * Get starting point for the translation. + */ + if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL) + ndp->ni_rootdir = rootdir; + dp = fdp->fd_cdir; + VREF(dp); + for (;;) { + /* + * Check if root directory should replace current directory. + * Done at start of translation and after symbolic link. + */ + ndp->ni_ptr = ndp->ni_pnbuf; + if (*ndp->ni_ptr == '/') { + vrele(dp); + while (*ndp->ni_ptr == '/') { + ndp->ni_ptr++; + ndp->ni_pathlen--; + } + dp = ndp->ni_rootdir; + VREF(dp); + } + ndp->ni_startdir = dp; + if (error = lookup(ndp, p)) { + FREE(ndp->ni_pnbuf, M_NAMEI); + return (error); + } + /* + * Check for symbolic link + */ + if (ndp->ni_more == 0) { + if ((ndp->ni_nameiop & (SAVENAME | SAVESTART)) == 0) + FREE(ndp->ni_pnbuf, M_NAMEI); + else + ndp->ni_nameiop |= HASBUF; + return (0); + } + if ((ndp->ni_nameiop & LOCKPARENT) && ndp->ni_pathlen == 1) + VOP_UNLOCK(ndp->ni_dvp); + if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { + error = ELOOP; + break; + } + if (ndp->ni_pathlen > 1) + MALLOC(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + else + cp = ndp->ni_pnbuf; + aiov.iov_base = cp; + aiov.iov_len = MAXPATHLEN; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = (struct proc *)0; + auio.uio_resid = MAXPATHLEN; + if (error = VOP_READLINK(ndp->ni_vp, &auio, p->p_ucred)) { + if (ndp->ni_pathlen > 1) + free(cp, M_NAMEI); + break; + } + linklen = MAXPATHLEN - auio.uio_resid; + if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { + if (ndp->ni_pathlen > 1) + free(cp, M_NAMEI); + error = ENAMETOOLONG; + break; + } + if (ndp->ni_pathlen > 1) { + bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); + FREE(ndp->ni_pnbuf, M_NAMEI); + ndp->ni_pnbuf = cp; + } else + ndp->ni_pnbuf[linklen] = '\0'; + ndp->ni_pathlen += linklen; + vput(ndp->ni_vp); + dp = ndp->ni_dvp; + } + FREE(ndp->ni_pnbuf, M_NAMEI); + vrele(ndp->ni_dvp); + vput(ndp->ni_vp); + ndp->ni_vp = NULL; + return (error); +} + +/* + * Search a pathname. + * This is a very central and rather complicated routine. + * + * The pathname is pointed to by ni_ptr and is of length ni_pathlen. + * The starting directory is taken from ni_startdir. The pathname is + * descended until done, or a symbolic link is encountered. The variable + * ni_more is clear if the path is completed; it is set to one if a + * symbolic link needing interpretation is encountered. + * + * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on + * whether the name is to be looked up, created, renamed, or deleted. + * When CREATE, RENAME, or DELETE is specified, information usable in + * creating, renaming, or deleting a directory entry may be calculated. + * If flag has LOCKPARENT or'ed into it, the parent directory is returned + * locked. If flag has WANTPARENT or'ed into it, the parent directory is + * returned unlocked. Otherwise the parent directory is not returned. If + * the target of the pathname exists and LOCKLEAF is or'ed into the flag + * the target is returned locked, otherwise it is returned unlocked. + * When creating or renaming and LOCKPARENT is specified, the target may not + * be ".". When deleting and LOCKPARENT is specified, the target may be ".". + * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent vnode unlocked. + * + * Overall outline of lookup: + * + * dirloop: + * identify next component of name at ndp->ni_ptr + * handle degenerate case where name is null string + * if .. and crossing mount points and on mounted filesys, find parent + * call VOP_LOOKUP routine for next component name + * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set + * component vnode returned in ni_vp (if it exists), locked. + * if result vnode is mounted on and crossing mount points, + * find mounted on vnode + * if more components of name, do next level at dirloop + * return the answer in ni_vp, locked if LOCKLEAF set + * if LOCKPARENT set, return locked parent in ni_dvp + * if WANTPARENT set, return unlocked parent in ni_dvp + */ +lookup(ndp, p) + register struct nameidata *ndp; + struct proc *p; +{ + register char *cp; /* pointer into pathname argument */ + register struct vnode *dp = 0; /* the directory we are searching */ + struct vnode *tdp; /* saved dp */ + struct mount *mp; /* mount table entry */ + int docache; /* == 0 do not cache last component */ + int flag; /* LOOKUP, CREATE, RENAME or DELETE */ + int wantparent; /* 1 => wantparent or lockparent flag */ + int rdonly; /* mounted read-only flag bit(s) */ + int error = 0; + + /* + * Setup: break out flag bits into variables. + */ + flag = ndp->ni_nameiop & OPMASK; + wantparent = ndp->ni_nameiop & (LOCKPARENT|WANTPARENT); + docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE; + if (flag == DELETE || (wantparent && flag != CREATE)) + docache = 0; + rdonly = MNT_RDONLY; + if (ndp->ni_nameiop & REMOTE) + rdonly |= MNT_EXRDONLY; + ndp->ni_dvp = NULL; + ndp->ni_more = 0; + dp = ndp->ni_startdir; + ndp->ni_startdir = NULLVP; + VOP_LOCK(dp); + +dirloop: + /* + * Search a new directory. + * + * The ni_hash value is for use by vfs_cache. + * The last component of the filename is left accessible via + * ndp->ptr for callers that need the name. Callers needing + * the name set the SAVENAME flag. When done, they assume + * responsibility for freeing the pathname buffer. + */ + ndp->ni_hash = 0; + for (cp = ndp->ni_ptr; *cp != 0 && *cp != '/'; cp++) + ndp->ni_hash += (unsigned char)*cp; + ndp->ni_namelen = cp - ndp->ni_ptr; + if (ndp->ni_namelen >= NAME_MAX) { + error = ENAMETOOLONG; + goto bad; + } +#ifdef NAMEI_DIAGNOSTIC + { char c = *cp; + *cp = '\0'; + printf("{%s}: ", ndp->ni_ptr); + *cp = c; } +#endif + ndp->ni_pathlen -= ndp->ni_namelen; + ndp->ni_next = cp; + ndp->ni_makeentry = 1; + if (*cp == '\0' && docache == 0) + ndp->ni_makeentry = 0; + ndp->ni_isdotdot = (ndp->ni_namelen == 2 && + ndp->ni_ptr[1] == '.' && ndp->ni_ptr[0] == '.'); + + /* + * Check for degenerate name (e.g. / or "") + * which is a way of talking about a directory, + * e.g. like "/." or ".". + */ + if (ndp->ni_ptr[0] == '\0') { + if (flag != LOOKUP || wantparent) { + error = EISDIR; + goto bad; + } + if (dp->v_type != VDIR) { + error = ENOTDIR; + goto bad; + } + if (!(ndp->ni_nameiop & LOCKLEAF)) + VOP_UNLOCK(dp); + ndp->ni_vp = dp; + if (ndp->ni_nameiop & SAVESTART) + panic("lookup: SAVESTART"); + return (0); + } + + /* + * Handle "..": two special cases. + * 1. If at root directory (e.g. after chroot) + * then ignore it so can't get out. + * 2. If this vnode is the root of a mounted + * filesystem, then replace it with the + * vnode which was mounted on so we take the + * .. in the other file system. + */ + if (ndp->ni_isdotdot) { + for (;;) { +/* 17 Aug 92*/ if ((dp == ndp->ni_rootdir) || (dp == rootdir)) { + ndp->ni_dvp = dp; + ndp->ni_vp = dp; + VREF(dp); + goto nextname; + } + if ((dp->v_flag & VROOT) == 0 || + (ndp->ni_nameiop & NOCROSSMOUNT)) + break; + tdp = dp; + dp = dp->v_mount->mnt_vnodecovered; + vput(tdp); + VREF(dp); + VOP_LOCK(dp); + } + } + + /* + * We now have a segment name to search for, and a directory to search. + */ + if (error = VOP_LOOKUP(dp, ndp, p)) { +#ifdef DIAGNOSTIC + if (ndp->ni_vp != NULL) + panic("leaf should be empty"); +#endif +#ifdef NAMEI_DIAGNOSTIC + printf("not found\n"); +#endif + if (flag == LOOKUP || flag == DELETE || + error != ENOENT || *cp != 0) + goto bad; + /* + * If creating and at end of pathname, then can consider + * allowing file to be created. + */ + if (ndp->ni_dvp->v_mount->mnt_flag & rdonly) { + error = EROFS; + goto bad; + } + /* + * We return with ni_vp NULL to indicate that the entry + * doesn't currently exist, leaving a pointer to the + * (possibly locked) directory inode in ndp->ni_dvp. + */ + if (ndp->ni_nameiop & SAVESTART) { + ndp->ni_startdir = ndp->ni_dvp; + VREF(ndp->ni_startdir); + } + return (0); + } +#ifdef NAMEI_DIAGNOSTIC + printf("found\n"); +#endif + + dp = ndp->ni_vp; + /* + * Check for symbolic link + */ + if ((dp->v_type == VLNK) && + ((ndp->ni_nameiop & FOLLOW) || *ndp->ni_next == '/')) { + ndp->ni_more = 1; + return (0); + } + + /* + * Check to see if the vnode has been mounted on; + * if so find the root of the mounted file system. + */ +mntloop: + while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && + (ndp->ni_nameiop & NOCROSSMOUNT) == 0) { + while(mp->mnt_flag & MNT_MLOCK) { + mp->mnt_flag |= MNT_MWAIT; + sleep((caddr_t)mp, PVFS); + goto mntloop; + } + if (error = VFS_ROOT(dp->v_mountedhere, &tdp)) + goto bad2; + vput(dp); + ndp->ni_vp = dp = tdp; + } + +nextname: + /* + * Not a symbolic link. If more pathname, + * continue at next component, else return. + */ + if (*ndp->ni_next == '/') { + ndp->ni_ptr = ndp->ni_next; + while (*ndp->ni_ptr == '/') { + ndp->ni_ptr++; + ndp->ni_pathlen--; + } + vrele(ndp->ni_dvp); + goto dirloop; + } + /* + * Check for read-only file systems. + */ + if (flag == DELETE || flag == RENAME) { + /* + * Disallow directory write attempts on read-only + * file systems. + */ + if ((dp->v_mount->mnt_flag & rdonly) || + (wantparent && (ndp->ni_dvp->v_mount->mnt_flag & rdonly))) { + error = EROFS; + goto bad2; + } + } + if (ndp->ni_nameiop & SAVESTART) { + ndp->ni_startdir = ndp->ni_dvp; + VREF(ndp->ni_startdir); + } + if (!wantparent) + vrele(ndp->ni_dvp); + if ((ndp->ni_nameiop & LOCKLEAF) == 0) + VOP_UNLOCK(dp); + return (0); + +bad2: + if ((ndp->ni_nameiop & LOCKPARENT) && *ndp->ni_next == '\0') + VOP_UNLOCK(ndp->ni_dvp); + vrele(ndp->ni_dvp); +bad: + vput(dp); + ndp->ni_vp = NULL; + return (error); +} diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c new file mode 100644 index 000000000000..e0d9d1201431 --- /dev/null +++ b/sys/kern/vfs_subr.c @@ -0,0 +1,1186 @@ +/* + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)vfs_subr.c 7.60 (Berkeley) 6/21/91 + * $Id: vfs_subr.c,v 1.4 1993/10/18 14:22:16 davidg Exp $ + */ + +/* + * External virtual filesystem routines + */ + +#include "param.h" +#include "proc.h" +#include "mount.h" +#include "time.h" +#include "vnode.h" +#include "specdev.h" +#include "namei.h" +#include "ucred.h" +#include "buf.h" +#include "errno.h" +#include "malloc.h" + +/* + * Remove a mount point from the list of mounted filesystems. + * Unmount of the root is illegal. + */ +void +vfs_remove(mp) + register struct mount *mp; +{ + + if (mp == rootfs) + panic("vfs_remove: unmounting root"); + mp->mnt_prev->mnt_next = mp->mnt_next; + mp->mnt_next->mnt_prev = mp->mnt_prev; + mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; + vfs_unlock(mp); +} + +/* + * Lock a filesystem. + * Used to prevent access to it while mounting and unmounting. + */ +vfs_lock(mp) + register struct mount *mp; +{ + + while(mp->mnt_flag & MNT_MLOCK) { + mp->mnt_flag |= MNT_MWAIT; + sleep((caddr_t)mp, PVFS); + } + mp->mnt_flag |= MNT_MLOCK; + return (0); +} + +/* + * Unlock a locked filesystem. + * Panic if filesystem is not locked. + */ +void +vfs_unlock(mp) + register struct mount *mp; +{ + + if ((mp->mnt_flag & MNT_MLOCK) == 0) + panic("vfs_unlock: not locked"); + mp->mnt_flag &= ~MNT_MLOCK; + if (mp->mnt_flag & MNT_MWAIT) { + mp->mnt_flag &= ~MNT_MWAIT; + wakeup((caddr_t)mp); + } +} + +/* + * Mark a mount point as busy. + * Used to synchronize access and to delay unmounting. + */ +vfs_busy(mp) + register struct mount *mp; +{ + + while(mp->mnt_flag & MNT_MPBUSY) { + mp->mnt_flag |= MNT_MPWANT; + sleep((caddr_t)&mp->mnt_flag, PVFS); + } + if (mp->mnt_flag & MNT_UNMOUNT) + return (1); + mp->mnt_flag |= MNT_MPBUSY; + return (0); +} + +/* + * Free a busy filesystem. + * Panic if filesystem is not busy. + */ +vfs_unbusy(mp) + register struct mount *mp; +{ + + if ((mp->mnt_flag & MNT_MPBUSY) == 0) + panic("vfs_unbusy: not busy"); + mp->mnt_flag &= ~MNT_MPBUSY; + if (mp->mnt_flag & MNT_MPWANT) { + mp->mnt_flag &= ~MNT_MPWANT; + wakeup((caddr_t)&mp->mnt_flag); + } +} + +/* + * Lookup a mount point by filesystem identifier. + */ +struct mount * +getvfs(fsid) + fsid_t *fsid; +{ + register struct mount *mp; + + mp = rootfs; + do { + if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && + mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { + return (mp); + } + mp = mp->mnt_next; + } while (mp != rootfs); + return ((struct mount *)0); +} + +/* + * Set vnode attributes to VNOVAL + */ +void vattr_null(vap) + register struct vattr *vap; +{ + + vap->va_type = VNON; + vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = + vap->va_fsid = vap->va_fileid = vap->va_size = + vap->va_size_rsv = vap->va_blocksize = vap->va_rdev = + vap->va_bytes = vap->va_bytes_rsv = + vap->va_atime.tv_sec = vap->va_atime.tv_usec = + vap->va_mtime.tv_sec = vap->va_mtime.tv_usec = + vap->va_ctime.tv_sec = vap->va_ctime.tv_usec = + vap->va_flags = vap->va_gen = VNOVAL; +} + +/* + * Routines having to do with the management of the vnode table. + */ +struct vnode *vfreeh, **vfreet; +extern struct vnodeops dead_vnodeops, spec_vnodeops; +extern void vclean(); +long numvnodes; +struct vattr va_null; + +/* + * Initialize the vnode structures and initialize each file system type. + */ +vfsinit() +{ + struct vfsops **vfsp; + + /* + * Initialize the vnode name cache + */ + nchinit(); + /* + * Initialize each file system type. + */ + vattr_null(&va_null); + for (vfsp = &vfssw[0]; vfsp <= &vfssw[MOUNT_MAXTYPE]; vfsp++) { + if (*vfsp == NULL) + continue; + (*(*vfsp)->vfs_init)(); + } +} + +/* + * Return the next vnode from the free list. + */ +getnewvnode(tag, mp, vops, vpp) + enum vtagtype tag; + struct mount *mp; + struct vnodeops *vops; + struct vnode **vpp; +{ + register struct vnode *vp, *vq; + + if (numvnodes < desiredvnodes) { + vp = (struct vnode *)malloc((u_long)sizeof *vp, + M_VNODE, M_WAITOK); + bzero((char *)vp, sizeof *vp); + numvnodes++; + } else { + if ((vp = vfreeh) == NULL) { + tablefull("vnode"); + *vpp = 0; + return (ENFILE); + } + if (vp->v_usecount) + panic("free vnode isn't"); + if (vq = vp->v_freef) + vq->v_freeb = &vfreeh; + else + vfreet = &vfreeh; + vfreeh = vq; + vp->v_freef = NULL; + vp->v_freeb = NULL; + if (vp->v_type != VBAD) + vgone(vp); + vp->v_flag = 0; + vp->v_lastr = 0; + vp->v_socket = 0; + } + vp->v_type = VNON; + cache_purge(vp); + vp->v_tag = tag; + vp->v_op = vops; + insmntque(vp, mp); + VREF(vp); + *vpp = vp; + return (0); +} + +/* + * Move a vnode from one mount queue to another. + */ +insmntque(vp, mp) + register struct vnode *vp; + register struct mount *mp; +{ + register struct vnode *vq; + + /* + * Delete from old mount point vnode list, if on one. + */ + if (vp->v_mountb) { + if (vq = vp->v_mountf) + vq->v_mountb = vp->v_mountb; + *vp->v_mountb = vq; + } + /* + * Insert into list of vnodes for the new mount point, if available. + */ + vp->v_mount = mp; + if (mp == NULL) { + vp->v_mountf = NULL; + vp->v_mountb = NULL; + return; + } + if (vq = mp->mnt_mounth) + vq->v_mountb = &vp->v_mountf; + vp->v_mountf = vq; + vp->v_mountb = &mp->mnt_mounth; + mp->mnt_mounth = vp; +} + +/* + * Make sure all write-behind blocks associated + * with mount point are flushed out (from sync). + */ +mntflushbuf(mountp, flags) + struct mount *mountp; + int flags; +{ + register struct vnode *vp; + + if ((mountp->mnt_flag & MNT_MPBUSY) == 0) + panic("mntflushbuf: not busy"); +loop: + for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) { + if (VOP_ISLOCKED(vp)) + continue; + if (vget(vp)) + goto loop; + vflushbuf(vp, flags); + vput(vp); + if (vp->v_mount != mountp) + goto loop; + } +} + +/* + * Flush all dirty buffers associated with a vnode. + */ +vflushbuf(vp, flags) + register struct vnode *vp; + int flags; +{ + register struct buf *bp; + struct buf *nbp; + int s; + +loop: + s = splbio(); + for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { + nbp = bp->b_blockf; + if ((bp->b_flags & B_BUSY)) + continue; + if ((bp->b_flags & B_DELWRI) == 0) + panic("vflushbuf: not dirty"); + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + /* + * Wait for I/O associated with indirect blocks to complete, + * since there is no way to quickly wait for them below. + * NB: This is really specific to ufs, but is done here + * as it is easier and quicker. + */ + if (bp->b_vp == vp || (flags & B_SYNC) == 0) + (void) bawrite(bp); + else + (void) bwrite(bp); + goto loop; + } + splx(s); + if ((flags & B_SYNC) == 0) + return; + s = splbio(); + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; + sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); + } + splx(s); + if (vp->v_dirtyblkhd) { + vprint("vflushbuf: dirty", vp); + goto loop; + } +} + +/* + * Update outstanding I/O count and do wakeup if requested. + */ +vwakeup(bp) + register struct buf *bp; +{ + register struct vnode *vp; + + bp->b_dirtyoff = bp->b_dirtyend = 0; + if (vp = bp->b_vp) { + vp->v_numoutput--; + if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { + if (vp->v_numoutput < 0) + panic("vwakeup: neg numoutput"); + vp->v_flag &= ~VBWAIT; + wakeup((caddr_t)&vp->v_numoutput); + } + } +} + +/* + * Invalidate in core blocks belonging to closed or umounted filesystem + * + * Go through the list of vnodes associated with the file system; + * for each vnode invalidate any buffers that it holds. Normally + * this routine is preceeded by a bflush call, so that on a quiescent + * filesystem there will be no dirty buffers when we are done. Binval + * returns the count of dirty buffers when it is finished. + */ +mntinvalbuf(mountp) + struct mount *mountp; +{ + register struct vnode *vp; + int dirty = 0; + + if ((mountp->mnt_flag & MNT_MPBUSY) == 0) + panic("mntinvalbuf: not busy"); +loop: + for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) { + if (vget(vp)) + goto loop; + dirty += vinvalbuf(vp, 1); + vput(vp); + if (vp->v_mount != mountp) + goto loop; + } + return (dirty); +} + +/* + * Flush out and invalidate all buffers associated with a vnode. + * Called with the underlying object locked. + */ +vinvalbuf(vp, save) + register struct vnode *vp; + int save; +{ + register struct buf *bp; + struct buf *nbp, *blist; + int s, dirty = 0; + + for (;;) { + if (blist = vp->v_dirtyblkhd) + /* void */; + else if (blist = vp->v_cleanblkhd) + /* void */; + else + break; + for (bp = blist; bp; bp = nbp) { + nbp = bp->b_blockf; + s = splbio(); + if (bp->b_flags & B_BUSY) { + bp->b_flags |= B_WANTED; + sleep((caddr_t)bp, PRIBIO + 1); + splx(s); + break; + } + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + if (save && (bp->b_flags & B_DELWRI)) { + dirty++; + (void) bwrite(bp); + break; + } + if (bp->b_vp != vp) + reassignbuf(bp, bp->b_vp); + else + bp->b_flags |= B_INVAL; + brelse(bp); + } + } + if (vp->v_dirtyblkhd || vp->v_cleanblkhd) + panic("vinvalbuf: flush failed"); + return (dirty); +} + +/* + * Associate a buffer with a vnode. + */ +bgetvp(vp, bp) + register struct vnode *vp; + register struct buf *bp; +{ + register struct vnode *vq; + register struct buf *bq; + + if (bp->b_vp) + panic("bgetvp: not free"); + VHOLD(vp); + bp->b_vp = vp; + if (vp->v_type == VBLK || vp->v_type == VCHR) + bp->b_dev = vp->v_rdev; + else + bp->b_dev = NODEV; + /* + * Insert onto list for new vnode. + */ + if (bq = vp->v_cleanblkhd) + bq->b_blockb = &bp->b_blockf; + bp->b_blockf = bq; + bp->b_blockb = &vp->v_cleanblkhd; + vp->v_cleanblkhd = bp; +} + +/* + * Disassociate a buffer from a vnode. + */ +brelvp(bp) + register struct buf *bp; +{ + struct buf *bq; + struct vnode *vp; + + if (bp->b_vp == (struct vnode *) 0) + panic("brelvp: NULL"); + /* + * Delete from old vnode list, if on one. + */ + if (bp->b_blockb) { + if (bq = bp->b_blockf) + bq->b_blockb = bp->b_blockb; + *bp->b_blockb = bq; + bp->b_blockf = NULL; + bp->b_blockb = NULL; + } + vp = bp->b_vp; + bp->b_vp = (struct vnode *) 0; + HOLDRELE(vp); +} + +/* + * Reassign a buffer from one vnode to another. + * Used to assign file specific control information + * (indirect blocks) to the vnode to which they belong. + */ +reassignbuf(bp, newvp) + register struct buf *bp; + register struct vnode *newvp; +{ + register struct buf *bq, **listheadp; + + if (newvp == NULL) + panic("reassignbuf: NULL"); + /* + * Delete from old vnode list, if on one. + */ + if (bp->b_blockb) { + if (bq = bp->b_blockf) + bq->b_blockb = bp->b_blockb; + *bp->b_blockb = bq; + } + /* + * If dirty, put on list of dirty buffers; + * otherwise insert onto list of clean buffers. + */ + if (bp->b_flags & B_DELWRI) + listheadp = &newvp->v_dirtyblkhd; + else + listheadp = &newvp->v_cleanblkhd; + if (bq = *listheadp) + bq->b_blockb = &bp->b_blockf; + bp->b_blockf = bq; + bp->b_blockb = listheadp; + *listheadp = bp; +} + +/* + * Create a vnode for a block device. + * Used for root filesystem, argdev, and swap areas. + * Also used for memory file system special devices. + */ +bdevvp(dev, vpp) + dev_t dev; + struct vnode **vpp; +{ + register struct vnode *vp; + struct vnode *nvp; + int error; + + if (dev == NODEV) + return (0); + error = getnewvnode(VT_NON, (struct mount *)0, &spec_vnodeops, &nvp); + if (error) { + *vpp = 0; + return (error); + } + vp = nvp; + vp->v_type = VBLK; + if (nvp = checkalias(vp, dev, (struct mount *)0)) { + vput(vp); + vp = nvp; + } + *vpp = vp; + return (0); +} + +/* + * Check to see if the new vnode represents a special device + * for which we already have a vnode (either because of + * bdevvp() or because of a different vnode representing + * the same block device). If such an alias exists, deallocate + * the existing contents and return the aliased vnode. The + * caller is responsible for filling it with its new contents. + */ +struct vnode * +checkalias(nvp, nvp_rdev, mp) + register struct vnode *nvp; + dev_t nvp_rdev; + struct mount *mp; +{ + register struct vnode *vp; + struct vnode **vpp; + + if (nvp->v_type != VBLK && nvp->v_type != VCHR) + return (NULLVP); + + vpp = &speclisth[SPECHASH(nvp_rdev)]; +loop: + for (vp = *vpp; vp; vp = vp->v_specnext) { + if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) + continue; + /* + * Alias, but not in use, so flush it out. + */ + if (vp->v_usecount == 0) { + vgone(vp); + goto loop; + } + if (vget(vp)) + goto loop; + break; + } + if (vp == NULL || vp->v_tag != VT_NON) { + MALLOC(nvp->v_specinfo, struct specinfo *, + sizeof(struct specinfo), M_VNODE, M_WAITOK); + nvp->v_rdev = nvp_rdev; + nvp->v_hashchain = vpp; + nvp->v_specnext = *vpp; + nvp->v_specflags = 0; + *vpp = nvp; + if (vp != NULL) { + nvp->v_flag |= VALIASED; + vp->v_flag |= VALIASED; + vput(vp); + } + return (NULLVP); + } + VOP_UNLOCK(vp); + vclean(vp, 0); + vp->v_op = nvp->v_op; + vp->v_tag = nvp->v_tag; + nvp->v_type = VNON; + insmntque(vp, mp); + return (vp); +} + +/* + * Grab a particular vnode from the free list, increment its + * reference count and lock it. The vnode lock bit is set the + * vnode is being eliminated in vgone. The process is awakened + * when the transition is completed, and an error returned to + * indicate that the vnode is no longer usable (possibly having + * been changed to a new file system type). + */ +vget(vp) + register struct vnode *vp; +{ + register struct vnode *vq; + + if (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + sleep((caddr_t)vp, PINOD); + return (1); + } + if (vp->v_usecount == 0) { + if (vq = vp->v_freef) + vq->v_freeb = vp->v_freeb; + else + vfreet = vp->v_freeb; + *vp->v_freeb = vq; + vp->v_freef = NULL; + vp->v_freeb = NULL; + } + VREF(vp); + VOP_LOCK(vp); + return (0); +} + +/* + * Vnode reference, just increment the count + */ +void vref(vp) + struct vnode *vp; +{ + + vp->v_usecount++; +} + +/* + * vput(), just unlock and vrele() + */ +void vput(vp) + register struct vnode *vp; +{ + VOP_UNLOCK(vp); + vrele(vp); +} + +/* + * Vnode release. + * If count drops to zero, call inactive routine and return to freelist. + */ +void vrele(vp) + register struct vnode *vp; +{ + struct proc *p = curproc; /* XXX */ + +#ifdef DIAGNOSTIC + if (vp == NULL) + panic("vrele: null vp"); +#endif + vp->v_usecount--; + if (vp->v_usecount > 0) + return; +#ifdef DIAGNOSTIC + if (vp->v_usecount != 0 || vp->v_writecount != 0) { + vprint("vrele: bad ref count", vp); + panic("vrele: ref cnt"); + } +#endif + if (vfreeh == NULLVP) { + /* + * insert into empty list + */ + vfreeh = vp; + vp->v_freeb = &vfreeh; + } else { + /* + * insert at tail of list + */ + *vfreet = vp; + vp->v_freeb = vfreet; + } + vp->v_freef = NULL; + vfreet = &vp->v_freef; + VOP_INACTIVE(vp, p); +} + +/* + * Page or buffer structure gets a reference. + */ +vhold(vp) + register struct vnode *vp; +{ + + vp->v_holdcnt++; +} + +/* + * Page or buffer structure frees a reference. + */ +holdrele(vp) + register struct vnode *vp; +{ + + if (vp->v_holdcnt <= 0) + panic("holdrele: holdcnt"); + vp->v_holdcnt--; +} + +/* + * Remove any vnodes in the vnode table belonging to mount point mp. + * + * If MNT_NOFORCE is specified, there should not be any active ones, + * return error if any are found (nb: this is a user error, not a + * system error). If MNT_FORCE is specified, detach any active vnodes + * that are found. + */ +int busyprt = 0; /* patch to print out busy vnodes */ + +vflush(mp, skipvp, flags) + struct mount *mp; + struct vnode *skipvp; + int flags; +{ + register struct vnode *vp, *nvp; + int busy = 0; + + if ((mp->mnt_flag & MNT_MPBUSY) == 0) + panic("vflush: not busy"); +loop: + for (vp = mp->mnt_mounth; vp; vp = nvp) { + if (vp->v_mount != mp) + goto loop; + nvp = vp->v_mountf; + /* + * Skip over a selected vnode. + */ + if (vp == skipvp) + continue; + /* + * Skip over a vnodes marked VSYSTEM. + */ + if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) + continue; + /* + * With v_usecount == 0, all we need to do is clear + * out the vnode data structures and we are done. + */ + if (vp->v_usecount == 0) { + vgone(vp); + continue; + } + /* + * For block or character devices, revert to an + * anonymous device. For all other files, just kill them. + */ + if (flags & FORCECLOSE) { + if (vp->v_type != VBLK && vp->v_type != VCHR) { + vgone(vp); + } else { + vclean(vp, 0); + vp->v_op = &spec_vnodeops; + insmntque(vp, (struct mount *)0); + } + continue; + } + if (busyprt) + vprint("vflush: busy vnode", vp); + busy++; + } + if (busy) + return (EBUSY); + return (0); +} + +/* + * Disassociate the underlying file system from a vnode. + */ +void vclean(vp, flags) + register struct vnode *vp; + int flags; +{ + struct vnodeops *origops; + int active; + struct proc *p = curproc; /* XXX */ + + /* + * Check to see if the vnode is in use. + * If so we have to reference it before we clean it out + * so that its count cannot fall to zero and generate a + * race against ourselves to recycle it. + */ + if (active = vp->v_usecount) + VREF(vp); + /* + * Prevent the vnode from being recycled or + * brought into use while we clean it out. + */ + if (vp->v_flag & VXLOCK) + panic("vclean: deadlock"); + vp->v_flag |= VXLOCK; + /* + * Even if the count is zero, the VOP_INACTIVE routine may still + * have the object locked while it cleans it out. The VOP_LOCK + * ensures that the VOP_INACTIVE routine is done with its work. + * For active vnodes, it ensures that no other activity can + * occur while the buffer list is being cleaned out. + */ + VOP_LOCK(vp); + if (flags & DOCLOSE) + vinvalbuf(vp, 1); + /* + * Prevent any further operations on the vnode from + * being passed through to the old file system. + */ + origops = vp->v_op; + vp->v_op = &dead_vnodeops; + vp->v_tag = VT_NON; + /* + * If purging an active vnode, it must be unlocked, closed, + * and deactivated before being reclaimed. + */ + (*(origops->vop_unlock))(vp); + if (active) { + if (flags & DOCLOSE) + (*(origops->vop_close))(vp, IO_NDELAY, NOCRED, p); + (*(origops->vop_inactive))(vp, p); + } + /* + * Reclaim the vnode. + */ + if ((*(origops->vop_reclaim))(vp)) + panic("vclean: cannot reclaim"); + if (active) + vrele(vp); + /* + * Done with purge, notify sleepers in vget of the grim news. + */ + vp->v_flag &= ~VXLOCK; + if (vp->v_flag & VXWANT) { + vp->v_flag &= ~VXWANT; + wakeup((caddr_t)vp); + } +} + +/* + * Eliminate all activity associated with the requested vnode + * and with all vnodes aliased to the requested vnode. + */ +void vgoneall(vp) + register struct vnode *vp; +{ + register struct vnode *vq; + + if (vp->v_flag & VALIASED) { + /* + * If a vgone (or vclean) is already in progress, + * wait until it is done and return. + */ + if (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + sleep((caddr_t)vp, PINOD); + return; + } + /* + * Ensure that vp will not be vgone'd while we + * are eliminating its aliases. + */ + vp->v_flag |= VXLOCK; + while (vp->v_flag & VALIASED) { + for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { + if (vq->v_rdev != vp->v_rdev || + vq->v_type != vp->v_type || vp == vq) + continue; + vgone(vq); + break; + } + } + /* + * Remove the lock so that vgone below will + * really eliminate the vnode after which time + * vgone will awaken any sleepers. + */ + vp->v_flag &= ~VXLOCK; + } + vgone(vp); +} + +/* + * Eliminate all activity associated with a vnode + * in preparation for reuse. + */ +void vgone(vp) + register struct vnode *vp; +{ + register struct vnode *vq; + struct vnode *vx; + long count; + + /* + * If a vgone (or vclean) is already in progress, + * wait until it is done and return. + */ + if (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + sleep((caddr_t)vp, PINOD); + return; + } + /* + * Clean out the filesystem specific data. + */ + vclean(vp, DOCLOSE); + /* + * Delete from old mount point vnode list, if on one. + */ + if (vp->v_mountb) { + if (vq = vp->v_mountf) + vq->v_mountb = vp->v_mountb; + *vp->v_mountb = vq; + vp->v_mountf = NULL; + vp->v_mountb = NULL; + } + /* + * If special device, remove it from special device alias list. + */ + if (vp->v_type == VBLK || vp->v_type == VCHR) { + if (*vp->v_hashchain == vp) { + *vp->v_hashchain = vp->v_specnext; + } else { + for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { + if (vq->v_specnext != vp) + continue; + vq->v_specnext = vp->v_specnext; + break; + } + if (vq == NULL) + panic("missing bdev"); + } + if (vp->v_flag & VALIASED) { + count = 0; + for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { + if (vq->v_rdev != vp->v_rdev || + vq->v_type != vp->v_type) + continue; + count++; + vx = vq; + } + if (count == 0) + panic("missing alias"); + if (count == 1) + vx->v_flag &= ~VALIASED; + vp->v_flag &= ~VALIASED; + } + FREE(vp->v_specinfo, M_VNODE); + vp->v_specinfo = NULL; + } + /* + * If it is on the freelist, move it to the head of the list. + */ + if (vp->v_freeb) { + if (vq = vp->v_freef) + vq->v_freeb = vp->v_freeb; + else + vfreet = vp->v_freeb; + *vp->v_freeb = vq; + vp->v_freef = vfreeh; + vp->v_freeb = &vfreeh; + vfreeh->v_freeb = &vp->v_freef; + vfreeh = vp; + } + vp->v_type = VBAD; +} + +/* + * Lookup a vnode by device number. + */ +vfinddev(dev, type, vpp) + dev_t dev; + enum vtype type; + struct vnode **vpp; +{ + register struct vnode *vp; + + for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { + if (dev != vp->v_rdev || type != vp->v_type) + continue; + *vpp = vp; + return (0); + } + return (1); +} + +/* + * Calculate the total number of references to a special device. + */ +vcount(vp) + register struct vnode *vp; +{ + register struct vnode *vq; + int count; + + if ((vp->v_flag & VALIASED) == 0) + return (vp->v_usecount); +loop: + for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { + if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) + continue; + /* + * Alias, but not in use, so flush it out. + */ + if (vq->v_usecount == 0) { + vgone(vq); + goto loop; + } + count += vq->v_usecount; + } + return (count); +} + +/* + * Print out a description of a vnode. + */ +static char *typename[] = + { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; + +vprint(label, vp) + char *label; + register struct vnode *vp; +{ + char buf[64]; + + if (label != NULL) + printf("%s: ", label); + printf("type %s, usecount %d, writecount %d, refcount %d,", + typename[vp->v_type], vp->v_usecount, vp->v_writecount, + vp->v_holdcnt); + buf[0] = '\0'; + if (vp->v_flag & VROOT) + strcat(buf, "|VROOT"); + if (vp->v_flag & VTEXT) + strcat(buf, "|VTEXT"); + if (vp->v_flag & VSYSTEM) + strcat(buf, "|VSYSTEM"); + if (vp->v_flag & VXLOCK) + strcat(buf, "|VXLOCK"); + if (vp->v_flag & VXWANT) + strcat(buf, "|VXWANT"); + if (vp->v_flag & VBWAIT) + strcat(buf, "|VBWAIT"); + if (vp->v_flag & VALIASED) + strcat(buf, "|VALIASED"); + if (buf[0] != '\0') + printf(" flags (%s)", &buf[1]); + printf("\n\t"); + VOP_PRINT(vp); +} + +#ifdef DEBUG +/* + * List all of the locked vnodes in the system. + * Called when debugging the kernel. + */ +printlockedvnodes() +{ + register struct mount *mp; + register struct vnode *vp; + + printf("Locked vnodes\n"); + mp = rootfs; + do { + for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) + if (VOP_ISLOCKED(vp)) + vprint((char *)0, vp); + mp = mp->mnt_next; + } while (mp != rootfs); +} +#endif + +int kinfo_vdebug = 1; +int kinfo_vgetfailed; +#define KINFO_VNODESLOP 10 +/* + * Dump vnode list (via kinfo). + * Copyout address of vnode followed by vnode. + */ +/* ARGSUSED */ +kinfo_vnode(op, where, acopysize, arg, aneeded) + int op; + char *where; + int *acopysize, arg, *aneeded; +{ + register struct mount *mp = rootfs; + struct mount *omp; + struct vnode *vp; + register char *bp = where, *savebp; + char *ewhere = where + *acopysize; + int error; + +#define VPTRSZ sizeof (struct vnode *) +#define VNODESZ sizeof (struct vnode) + if (where == NULL) { + *aneeded = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); + return (0); + } + + do { + if (vfs_busy(mp)) { + mp = mp->mnt_next; + continue; + } + savebp = bp; +again: + for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { + /* + * Check that the vp is still associated with + * this filesystem. RACE: could have been + * recycled onto the same filesystem. + */ + if (vp->v_mount != mp) { + if (kinfo_vdebug) + printf("kinfo: vp changed\n"); + bp = savebp; + goto again; + } + if ((bp + VPTRSZ + VNODESZ <= ewhere) && + ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || + (error = copyout((caddr_t)vp, bp + VPTRSZ, + VNODESZ)))) + return (error); + bp += VPTRSZ + VNODESZ; + } + omp = mp; + mp = mp->mnt_next; + vfs_unbusy(omp); + } while (mp != rootfs); + + *aneeded = bp - where; + if (bp > ewhere) + *acopysize = ewhere - where; + else + *acopysize = bp - where; + return (0); +} diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c new file mode 100644 index 000000000000..395dc90e63aa --- /dev/null +++ b/sys/kern/vfs_syscalls.c @@ -0,0 +1,1912 @@ +/* + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)vfs_syscalls.c 7.74 (Berkeley) 6/21/91 + * $Id: vfs_syscalls.c,v 1.5 1993/10/23 16:02:54 davidg Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "namei.h" +#include "filedesc.h" +#include "kernel.h" +#include "file.h" +#include "stat.h" +#include "vnode.h" +#include "mount.h" +#include "proc.h" +#include "uio.h" +#include "malloc.h" + +/* + * Virtual File System System Calls + */ + +/* + * Mount system call. + */ + +struct mount_args { + int type; + char *dir; + int flags; + caddr_t data; +}; + +/* ARGSUSED */ +mount(p, uap, retval) + struct proc *p; + register struct mount_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + register struct mount *mp; + int error, flag; + struct nameidata nd; + + /* + * Must be super user + */ + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + /* + * Get vnode to be covered + */ + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->dir; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (uap->flags & MNT_UPDATE) { + if ((vp->v_flag & VROOT) == 0) { + vput(vp); + return (EINVAL); + } + mp = vp->v_mount; + /* + * We allow going from read-only to read-write, + * but not from read-write to read-only. + */ + if ((mp->mnt_flag & MNT_RDONLY) == 0 && + (uap->flags & MNT_RDONLY) != 0) { + vput(vp); + return (EOPNOTSUPP); /* Needs translation */ + } + flag = mp->mnt_flag; + mp->mnt_flag |= MNT_UPDATE; + VOP_UNLOCK(vp); + goto update; + } + vinvalbuf(vp, 1); + if (vp->v_usecount != 1) { + vput(vp); + return (EBUSY); + } + if (vp->v_type != VDIR) { + vput(vp); + return (ENOTDIR); + } + if ((unsigned long)uap->type > MOUNT_MAXTYPE || + vfssw[uap->type] == (struct vfsops *)0) { + vput(vp); + return (ENODEV); + } + + /* + * Allocate and initialize the file system. + */ + mp = (struct mount *)malloc((u_long)sizeof(struct mount), + M_MOUNT, M_WAITOK); + mp->mnt_op = vfssw[uap->type]; + mp->mnt_flag = 0; + mp->mnt_exroot = 0; + mp->mnt_mounth = NULLVP; + if (error = vfs_lock(mp)) { + free((caddr_t)mp, M_MOUNT); + vput(vp); + return (error); + } + if (vp->v_mountedhere != (struct mount *)0) { + vfs_unlock(mp); + free((caddr_t)mp, M_MOUNT); + vput(vp); + return (EBUSY); + } + vp->v_mountedhere = mp; + mp->mnt_vnodecovered = vp; +update: + /* + * Set the mount level flags. + */ + if (uap->flags & MNT_RDONLY) + mp->mnt_flag |= MNT_RDONLY; + else + mp->mnt_flag &= ~MNT_RDONLY; + if (uap->flags & MNT_NOSUID) + mp->mnt_flag |= MNT_NOSUID; + else + mp->mnt_flag &= ~MNT_NOSUID; + if (uap->flags & MNT_NOEXEC) + mp->mnt_flag |= MNT_NOEXEC; + else + mp->mnt_flag &= ~MNT_NOEXEC; + if (uap->flags & MNT_NODEV) + mp->mnt_flag |= MNT_NODEV; + else + mp->mnt_flag &= ~MNT_NODEV; + if (uap->flags & MNT_NOCORE) + mp->mnt_flag |= MNT_NOCORE; + else + mp->mnt_flag &= ~MNT_NOCORE; + if (uap->flags & MNT_SYNCHRONOUS) + mp->mnt_flag |= MNT_SYNCHRONOUS; + else + mp->mnt_flag &= ~MNT_SYNCHRONOUS; + /* + * Mount the filesystem. + */ + error = VFS_MOUNT(mp, uap->dir, uap->data, ndp, p); + if (mp->mnt_flag & MNT_UPDATE) { + mp->mnt_flag &= ~MNT_UPDATE; + vrele(vp); + if (error) + mp->mnt_flag = flag; + return (error); + } + /* + * Put the new filesystem on the mount list after root. + */ + mp->mnt_next = rootfs->mnt_next; + mp->mnt_prev = rootfs; + rootfs->mnt_next = mp; + mp->mnt_next->mnt_prev = mp; + cache_purge(vp); + if (!error) { + VOP_UNLOCK(vp); + vfs_unlock(mp); + error = VFS_START(mp, 0, p); + } else { + vfs_remove(mp); + free((caddr_t)mp, M_MOUNT); + vput(vp); + } + return (error); +} + +/* + * Unmount system call. + * + * Note: unmount takes a path to the vnode mounted on as argument, + * not special file (as before). + */ + +struct umount_args { + char *pathp; + int flags; +}; + +/* ARGSUSED */ +unmount(p, uap, retval) + struct proc *p; + register struct umount_args *uap; + int *retval; +{ + register struct vnode *vp; + register struct nameidata *ndp; + struct mount *mp; + int error; + struct nameidata nd; + + /* + * Must be super user + */ + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | LOCKLEAF | FOLLOW; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->pathp; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + /* + * Must be the root of the filesystem + */ + if ((vp->v_flag & VROOT) == 0) { + vput(vp); + return (EINVAL); + } + mp = vp->v_mount; + vput(vp); + return (dounmount(mp, uap->flags, p)); +} + +/* + * Do an unmount. + */ +dounmount(mp, flags, p) + register struct mount *mp; + int flags; + struct proc *p; +{ + struct vnode *coveredvp; + int error; + + coveredvp = mp->mnt_vnodecovered; + if (vfs_busy(mp)) + return (EBUSY); + mp->mnt_flag |= MNT_UNMOUNT; + if (error = vfs_lock(mp)) + return (error); + + vnode_pager_umount(mp); /* release cached vnodes */ + cache_purgevfs(mp); /* remove cache entries for this file sys */ + if ((error = VFS_SYNC(mp, MNT_WAIT)) == 0 || (flags & MNT_FORCE)) + error = VFS_UNMOUNT(mp, flags, p); + mp->mnt_flag &= ~MNT_UNMOUNT; + vfs_unbusy(mp); + if (error) { + vfs_unlock(mp); + } else { + vrele(coveredvp); + vfs_remove(mp); + free((caddr_t)mp, M_MOUNT); + } + return (error); +} + +/* + * Sync system call. + * Sync each mounted filesystem. + */ +/* ARGSUSED */ +sync(p, uap, retval) + struct proc *p; + void *uap; + int *retval; +{ + register struct mount *mp; + struct mount *omp; + + if ((mp = rootfs) == NULL) + return (0); + do { + /* + * The lock check below is to avoid races with mount + * and unmount. + */ + if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 && + !vfs_busy(mp)) { + VFS_SYNC(mp, MNT_NOWAIT); + omp = mp; + mp = mp->mnt_next; + vfs_unbusy(omp); + } else + mp = mp->mnt_next; + } while (mp != rootfs); + return (0); +} + +/* + * Operate on filesystem quotas. + */ + +struct quotactl_args { + char *path; + int cmd; + int uid; + caddr_t arg; +}; + +/* ARGSUSED */ +quotactl(p, uap, retval) + struct proc *p; + register struct quotactl_args *uap; + int *retval; +{ + register struct mount *mp; + register struct nameidata *ndp; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->path; + if (error = namei(ndp, p)) + return (error); + mp = ndp->ni_vp->v_mount; + vrele(ndp->ni_vp); + return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, p)); +} + +/* + * Get filesystem statistics. + */ + +struct statfs_args { + char *path; + struct statfs *buf; +}; + +/* ARGSUSED */ +statfs(p, uap, retval) + struct proc *p; + register struct statfs_args *uap; + int *retval; +{ + register struct mount *mp; + register struct nameidata *ndp; + register struct statfs *sp; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->path; + if (error = namei(ndp, p)) + return (error); + mp = ndp->ni_vp->v_mount; + sp = &mp->mnt_stat; + vrele(ndp->ni_vp); + if (error = VFS_STATFS(mp, sp, p)) + return (error); + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp))); +} + +/* + * Get filesystem statistics. + */ + +struct fstatfs_args { + int fd; + struct statfs *buf; +}; + +/* ARGSUSED */ +fstatfs(p, uap, retval) + struct proc *p; + register struct fstatfs_args *uap; + int *retval; +{ + struct file *fp; + struct mount *mp; + register struct statfs *sp; + int error; + + if (error = getvnode(p->p_fd, uap->fd, &fp)) + return (error); + mp = ((struct vnode *)fp->f_data)->v_mount; + sp = &mp->mnt_stat; + if (error = VFS_STATFS(mp, sp, p)) + return (error); + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp))); +} + +/* + * Get statistics on all filesystems. + */ + +struct getfsstat_args { + struct statfs *buf; + long bufsize; + int flags; +}; + +getfsstat(p, uap, retval) + struct proc *p; + register struct getfsstat_args *uap; + int *retval; +{ + register struct mount *mp; + register struct statfs *sp; + caddr_t sfsp; + long count, maxcount, error; + + maxcount = uap->bufsize / sizeof(struct statfs); + sfsp = (caddr_t)uap->buf; + mp = rootfs; + count = 0; + do { + if (sfsp && count < maxcount && + ((mp->mnt_flag & MNT_MLOCK) == 0)) { + sp = &mp->mnt_stat; + /* + * If MNT_NOWAIT is specified, do not refresh the + * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. + */ + if (((uap->flags & MNT_NOWAIT) == 0 || + (uap->flags & MNT_WAIT)) && + (error = VFS_STATFS(mp, sp, p))) { + mp = mp->mnt_prev; + continue; + } + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp))) + return (error); + sfsp += sizeof(*sp); + } + count++; + mp = mp->mnt_prev; + } while (mp != rootfs); + if (sfsp && count > maxcount) + *retval = maxcount; + else + *retval = count; + return (0); +} + +/* + * Change current working directory to a given file descriptor. + */ + +struct fchdir_args { + int fd; +}; + +/* ARGSUSED */ +fchdir(p, uap, retval) + struct proc *p; + struct fchdir_args *uap; + int *retval; +{ + register struct filedesc *fdp = p->p_fd; + register struct vnode *vp; + struct file *fp; + int error; + + if (error = getvnode(fdp, uap->fd, &fp)) + return (error); + vp = (struct vnode *)fp->f_data; + VOP_LOCK(vp); + if (vp->v_type != VDIR) + error = ENOTDIR; + else + error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); + VOP_UNLOCK(vp); + if (error) + return (error); + VREF(vp); + vrele(fdp->fd_cdir); + fdp->fd_cdir = vp; + return (0); +} + +/* + * Change current working directory (``.''). + */ + +struct chdir_args { + char *fname; +}; + +/* ARGSUSED */ +chdir(p, uap, retval) + struct proc *p; + struct chdir_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct filedesc *fdp = p->p_fd; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = chdirec(ndp, p)) + return (error); + vrele(fdp->fd_cdir); + fdp->fd_cdir = ndp->ni_vp; + return (0); +} + +/* + * Change notion of root (``/'') directory. + */ + +struct chroot_args { + char *fname; +}; + +/* ARGSUSED */ +chroot(p, uap, retval) + struct proc *p; + struct chroot_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct filedesc *fdp = p->p_fd; + int error; + struct nameidata nd; + + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = chdirec(ndp, p)) + return (error); + if (fdp->fd_rdir != NULL) + vrele(fdp->fd_rdir); + fdp->fd_rdir = ndp->ni_vp; + return (0); +} + +/* + * Common routine for chroot and chdir. + */ +chdirec(ndp, p) + struct nameidata *ndp; + struct proc *p; +{ + struct vnode *vp; + int error; + + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_type != VDIR) + error = ENOTDIR; + else + error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); + VOP_UNLOCK(vp); + if (error) + vrele(vp); + return (error); +} + +/* + * Open system call. + * Check permissions, allocate an open file structure, + * and call the device open routine if any. + */ + +struct open_args { + char *fname; + int mode; + int crtmode; +}; + +open(p, uap, retval) + struct proc *p; + register struct open_args *uap; + int *retval; +{ + struct nameidata *ndp; + register struct filedesc *fdp = p->p_fd; + register struct file *fp; + register struct vnode *vp; + int fmode, cmode; + struct file *nfp; + int type, indx, error; + struct flock lf; + struct nameidata nd; + extern struct fileops vnops; + + if (error = falloc(p, &nfp, &indx)) + return (error); + fp = nfp; + fmode = FFLAGS(uap->mode); + cmode = ((uap->crtmode &~ fdp->fd_cmask) & 07777) &~ S_ISVTX; + ndp = &nd; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + p->p_dupfd = -indx - 1; /* XXX check for fdopen */ + if (error = vn_open(ndp, p, fmode, cmode)) { + ffree(fp); + if (error == ENODEV && /* XXX from fdopen */ + ((short) p->p_dupfd) >= 0 && + (error = dupfdopen(fdp, indx, p->p_dupfd, fmode)) == 0) { + *retval = indx; + return (0); + } + if (error == ERESTART) + error = EINTR; + fdp->fd_ofiles[indx] = NULL; + return (error); + } + vp = ndp->ni_vp; + fp->f_flag = fmode & FMASK; + if (fmode & (O_EXLOCK | O_SHLOCK)) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + if (fmode & O_EXLOCK) + lf.l_type = F_WRLCK; + else + lf.l_type = F_RDLCK; + type = F_FLOCK; + if ((fmode & FNONBLOCK) == 0) + type |= F_WAIT; + if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) { + VOP_UNLOCK(vp); + (void) vn_close(vp, fp->f_flag, fp->f_cred, p); + ffree(fp); + fdp->fd_ofiles[indx] = NULL; + return (error); + } + fp->f_flag |= FHASLOCK; + } + VOP_UNLOCK(vp); + fp->f_type = DTYPE_VNODE; + fp->f_ops = &vnops; + fp->f_data = (caddr_t)vp; + *retval = indx; + return (0); +} + +#ifdef COMPAT_43 +/* + * Creat system call. + */ + +struct ocreat_args { + char *fname; + int fmode; +}; + +ocreat(p, uap, retval) + struct proc *p; + register struct ocreat_args *uap; + int *retval; +{ + struct newargs { + char *fname; + int mode; + int crtmode; + } openuap; + + openuap.fname = uap->fname; + openuap.crtmode = uap->fmode; + openuap.mode = O_WRONLY | O_CREAT | O_TRUNC; + return (open(p, (struct args *)&openuap, retval)); +} +#endif /* COMPAT_43 */ + +/* + * Mknod system call. + */ + +struct mknod_args { + char *fname; + int fmode; + int dev; +}; + +/* ARGSUSED */ +mknod(p, uap, retval) + struct proc *p; + register struct mknod_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + ndp = &nd; + ndp->ni_nameiop = CREATE | LOCKPARENT; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp != NULL) { + error = EEXIST; + goto out; + } + VATTR_NULL(&vattr); + switch (uap->fmode & S_IFMT) { + + case S_IFMT: /* used by badsect to flag bad sectors */ + vattr.va_type = VBAD; + break; + case S_IFCHR: + vattr.va_type = VCHR; + break; + case S_IFBLK: + vattr.va_type = VBLK; + break; + default: + error = EINVAL; + goto out; + } + vattr.va_mode = (uap->fmode & 07777) &~ p->p_fd->fd_cmask; + vattr.va_rdev = uap->dev; +out: + if (!error) { + error = VOP_MKNOD(ndp, &vattr, p->p_ucred, p); + } else { + VOP_ABORTOP(ndp); + if (ndp->ni_dvp == vp) + vrele(ndp->ni_dvp); + else + vput(ndp->ni_dvp); + if (vp) + vrele(vp); + } + return (error); +} + +/* + * Mkfifo system call. + */ + +struct mkfifo_args { + char *fname; + int fmode; +}; + +/* ARGSUSED */ +mkfifo(p, uap, retval) + struct proc *p; + register struct mkfifo_args *uap; + int *retval; +{ + register struct nameidata *ndp; + struct vattr vattr; + int error; + struct nameidata nd; + +#ifndef FIFO + return (EOPNOTSUPP); +#else + ndp = &nd; + ndp->ni_nameiop = CREATE | LOCKPARENT; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + if (ndp->ni_vp != NULL) { + VOP_ABORTOP(ndp); + if (ndp->ni_dvp == ndp->ni_vp) + vrele(ndp->ni_dvp); + else + vput(ndp->ni_dvp); + vrele(ndp->ni_vp); + return (EEXIST); + } + VATTR_NULL(&vattr); + vattr.va_type = VFIFO; + vattr.va_mode = (uap->fmode & 07777) &~ p->p_fd->fd_cmask; + return (VOP_MKNOD(ndp, &vattr, p->p_ucred, p)); +#endif /* FIFO */ +} + +/* + * Link system call. + */ + +struct link_args { + char *target; + char *linkname; +}; + +/* ARGSUSED */ +link(p, uap, retval) + struct proc *p; + register struct link_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp, *xp; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->target; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_type == VDIR && + (error = suser(p->p_ucred, &p->p_acflag))) + goto out1; + ndp->ni_nameiop = CREATE | LOCKPARENT; + ndp->ni_dirp = (caddr_t)uap->linkname; + if (error = namei(ndp, p)) + goto out1; + xp = ndp->ni_vp; + if (xp != NULL) { + error = EEXIST; + goto out; + } + xp = ndp->ni_dvp; + if (vp->v_mount != xp->v_mount) + error = EXDEV; +out: + if (!error) { + error = VOP_LINK(vp, ndp, p); + } else { + VOP_ABORTOP(ndp); + if (ndp->ni_dvp == ndp->ni_vp) + vrele(ndp->ni_dvp); + else + vput(ndp->ni_dvp); + if (ndp->ni_vp) + vrele(ndp->ni_vp); + } +out1: + vrele(vp); + return (error); +} + +/* + * Make a symbolic link. + */ + +struct symlink_args { + char *target; + char *linkname; +}; + +/* ARGSUSED */ +symlink(p, uap, retval) + struct proc *p; + register struct symlink_args *uap; + int *retval; +{ + register struct nameidata *ndp; + struct vattr vattr; + char *target; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->linkname; + MALLOC(target, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (error = copyinstr(uap->target, target, MAXPATHLEN, (u_int *)0)) + goto out; + ndp->ni_nameiop = CREATE | LOCKPARENT; + if (error = namei(ndp, p)) + goto out; + if (ndp->ni_vp) { + VOP_ABORTOP(ndp); + if (ndp->ni_dvp == ndp->ni_vp) + vrele(ndp->ni_dvp); + else + vput(ndp->ni_dvp); + vrele(ndp->ni_vp); + error = EEXIST; + goto out; + } + VATTR_NULL(&vattr); + vattr.va_mode = 0777 &~ p->p_fd->fd_cmask; + error = VOP_SYMLINK(ndp, &vattr, target, p); +out: + FREE(target, M_NAMEI); + return (error); +} + +/* + * Delete a name from the filesystem. + */ + +struct unlink_args { + char *fname; +}; + +/* ARGSUSED */ +unlink(p, uap, retval) + struct proc *p; + struct unlink_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = DELETE | LOCKPARENT | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_type == VDIR && + (error = suser(p->p_ucred, &p->p_acflag))) + goto out; + /* + * The root of a mounted filesystem cannot be deleted. + */ + if (vp->v_flag & VROOT) { + error = EBUSY; + goto out; + } + (void) vnode_pager_uncache(vp); +out: + if (!error) { + error = VOP_REMOVE(ndp, p); + } else { + VOP_ABORTOP(ndp); + if (ndp->ni_dvp == vp) + vrele(ndp->ni_dvp); + else + vput(ndp->ni_dvp); + vput(vp); + } + return (error); +} + +/* + * Seek system call. + */ + +struct lseek_args { + int fdes; + off_t off; + int sbase; +}; + +lseek(p, uap, retval) + struct proc *p; + register struct lseek_args *uap; + off_t *retval; +{ + struct ucred *cred = p->p_ucred; + register struct filedesc *fdp = p->p_fd; + register struct file *fp; + struct vattr vattr; + int error; + + if ((unsigned)uap->fdes >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->fdes]) == NULL) + return (EBADF); + if (fp->f_type != DTYPE_VNODE) + return (ESPIPE); + switch (uap->sbase) { + + case L_INCR: + fp->f_offset += uap->off; + break; + + case L_XTND: + if (error = VOP_GETATTR((struct vnode *)fp->f_data, + &vattr, cred, p)) + return (error); + fp->f_offset = uap->off + vattr.va_size; + break; + + case L_SET: + fp->f_offset = uap->off; + break; + + default: + return (EINVAL); + } + *retval = fp->f_offset; + return (0); +} + +/* + * Check access permissions. + */ + +struct saccess_args { + char *fname; + int fmode; +}; + +/* ARGSUSED */ +saccess(p, uap, retval) + struct proc *p; + register struct saccess_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct ucred *cred = p->p_ucred; + register struct vnode *vp; + int error, mode, svuid, svgid; + struct nameidata nd; + + ndp = &nd; + svuid = cred->cr_uid; + svgid = cred->cr_groups[0]; + cred->cr_uid = p->p_cred->p_ruid; + cred->cr_groups[0] = p->p_cred->p_rgid; + ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + goto out1; + vp = ndp->ni_vp; + /* + * fmode == 0 means only check for exist + */ + if (uap->fmode) { + mode = 0; + if (uap->fmode & R_OK) + mode |= VREAD; + if (uap->fmode & W_OK) + mode |= VWRITE; + if (uap->fmode & X_OK) + mode |= VEXEC; + if ((mode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) + error = VOP_ACCESS(vp, mode, cred, p); + } + vput(vp); +out1: + cred->cr_uid = svuid; + cred->cr_groups[0] = svgid; + return (error); +} + +/* + * Stat system call. + * This version follows links. + */ + +struct stat_args { + char *fname; + struct stat *ub; +}; + +/* ARGSUSED */ +stat(p, uap, retval) + struct proc *p; + register struct stat_args *uap; + int *retval; +{ + register struct nameidata *ndp; + struct stat sb; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | LOCKLEAF | FOLLOW; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + error = vn_stat(ndp->ni_vp, &sb, p); + vput(ndp->ni_vp); + if (error) + return (error); + error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb)); + return (error); +} + +/* + * Lstat system call. + * This version does not follow links. + */ + +struct lstat_args { + char *fname; + struct stat *ub; +}; + +/* ARGSUSED */ +lstat(p, uap, retval) + struct proc *p; + register struct lstat_args *uap; + int *retval; +{ + register struct nameidata *ndp; + struct stat sb; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | LOCKLEAF | NOFOLLOW; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + error = vn_stat(ndp->ni_vp, &sb, p); + vput(ndp->ni_vp); + if (error) + return (error); + error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb)); + return (error); +} + +/* + * Return target name of a symbolic link. + */ + +struct readlink_args { + char *name; + char *buf; + int count; +}; + +/* ARGSUSED */ +readlink(p, uap, retval) + struct proc *p; + register struct readlink_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + struct iovec aiov; + struct uio auio; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->name; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_type != VLNK) { + error = EINVAL; + goto out; + } + aiov.iov_base = uap->buf; + aiov.iov_len = uap->count; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + auio.uio_resid = uap->count; + error = VOP_READLINK(vp, &auio, p->p_ucred); +out: + vput(vp); + *retval = uap->count - auio.uio_resid; + return (error); +} + +/* + * Change flags of a file given path name. + */ + +struct chflags_args { + char *fname; + int flags; +}; + +/* ARGSUSED */ +chflags(p, uap, retval) + struct proc *p; + register struct chflags_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + error = EROFS; + goto out; + } + VATTR_NULL(&vattr); + vattr.va_flags = uap->flags; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); +out: + vput(vp); + return (error); +} + +/* + * Change flags of a file given a file descriptor. + */ + +struct fdchflags_args { + int fd; + int flags; +}; + +/* ARGSUSED */ +fchflags(p, uap, retval) + struct proc *p; + register struct fdchflags_args *uap; + int *retval; +{ + struct vattr vattr; + struct vnode *vp; + struct file *fp; + int error; + + if (error = getvnode(p->p_fd, uap->fd, &fp)) + return (error); + vp = (struct vnode *)fp->f_data; + VOP_LOCK(vp); + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + error = EROFS; + goto out; + } + VATTR_NULL(&vattr); + vattr.va_flags = uap->flags; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); +out: + VOP_UNLOCK(vp); + return (error); +} + +/* + * Change mode of a file given path name. + */ + +struct chmod_args { + char *fname; + int fmode; +}; + +/* ARGSUSED */ +chmod(p, uap, retval) + struct proc *p; + register struct chmod_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + error = EROFS; + goto out; + } + VATTR_NULL(&vattr); + vattr.va_mode = uap->fmode & 07777; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); +out: + vput(vp); + return (error); +} + +/* + * Change mode of a file given a file descriptor. + */ + +struct fchmod_args { + int fd; + int fmode; +}; + +/* ARGSUSED */ +fchmod(p, uap, retval) + struct proc *p; + register struct fchmod_args *uap; + int *retval; +{ + struct vattr vattr; + struct vnode *vp; + struct file *fp; + int error; + + if (error = getvnode(p->p_fd, uap->fd, &fp)) + return (error); + vp = (struct vnode *)fp->f_data; + VOP_LOCK(vp); + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + error = EROFS; + goto out; + } + VATTR_NULL(&vattr); + vattr.va_mode = uap->fmode & 07777; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); +out: + VOP_UNLOCK(vp); + return (error); +} + +/* + * Set ownership given a path name. + */ + +struct chown_args { + char *fname; + int uid; + int gid; +}; + +/* ARGSUSED */ +chown(p, uap, retval) + struct proc *p; + register struct chown_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | NOFOLLOW | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + error = EROFS; + goto out; + } + VATTR_NULL(&vattr); + vattr.va_uid = uap->uid; + vattr.va_gid = uap->gid; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); +out: + vput(vp); + return (error); +} + +/* + * Set ownership given a file descriptor. + */ + +struct fchown_args { + int fd; + int uid; + int gid; +}; + +/* ARGSUSED */ +fchown(p, uap, retval) + struct proc *p; + register struct fchown_args *uap; + int *retval; +{ + struct vattr vattr; + struct vnode *vp; + struct file *fp; + int error; + + if (error = getvnode(p->p_fd, uap->fd, &fp)) + return (error); + vp = (struct vnode *)fp->f_data; + VOP_LOCK(vp); + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + error = EROFS; + goto out; + } + VATTR_NULL(&vattr); + vattr.va_uid = uap->uid; + vattr.va_gid = uap->gid; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); +out: + VOP_UNLOCK(vp); + return (error); +} + +/* + * Set the access and modification times of a file. + */ + +struct utimes_args { + char *fname; + struct timeval *tptr; +}; + +/* ARGSUSED */ +utimes(p, uap, retval) + struct proc *p; + register struct utimes_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + struct timeval tv[2]; + struct vattr vattr; + int error; + struct nameidata nd; + + if (error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv))) + return (error); + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + error = EROFS; + goto out; + } + VATTR_NULL(&vattr); + vattr.va_atime = tv[0]; + vattr.va_mtime = tv[1]; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); +out: + vput(vp); + return (error); +} + +/* + * Truncate a file given its path name. + */ + +struct truncate_args { + char *fname; + off_t length; +}; + +/* ARGSUSED */ +truncate(p, uap, retval) + struct proc *p; + register struct truncate_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_type == VDIR) { + error = EISDIR; + goto out; + } + if ((error = vn_writechk(vp)) || + (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p))) + goto out; + VATTR_NULL(&vattr); + vattr.va_size = uap->length; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); +out: + vput(vp); + return (error); +} + +/* + * Truncate a file given a file descriptor. + */ + +struct ftruncate_args { + int fd; + off_t length; +}; + +/* ARGSUSED */ +ftruncate(p, uap, retval) + struct proc *p; + register struct ftruncate_args *uap; + int *retval; +{ + struct vattr vattr; + struct vnode *vp; + struct file *fp; + int error; + + if (error = getvnode(p->p_fd, uap->fd, &fp)) + return (error); + if ((fp->f_flag & FWRITE) == 0) + return (EINVAL); + vp = (struct vnode *)fp->f_data; + VOP_LOCK(vp); + if (vp->v_type == VDIR) { + error = EISDIR; + goto out; + } + if (error = vn_writechk(vp)) + goto out; + VATTR_NULL(&vattr); + vattr.va_size = uap->length; + error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); +out: + VOP_UNLOCK(vp); + return (error); +} + +/* + * Synch an open file. + */ + +struct fsync_args { + int fd; +}; + +/* ARGSUSED */ +fsync(p, uap, retval) + struct proc *p; + struct fsync_args *uap; + int *retval; +{ + register struct vnode *vp; + struct file *fp; + int error; + + if (error = getvnode(p->p_fd, uap->fd, &fp)) + return (error); + vp = (struct vnode *)fp->f_data; + VOP_LOCK(vp); + error = VOP_FSYNC(vp, fp->f_flag, fp->f_cred, MNT_WAIT, p); + VOP_UNLOCK(vp); + return (error); +} + +/* + * Rename system call. + * + * Source and destination must either both be directories, or both + * not be directories. If target is a directory, it must be empty. + */ + +struct rename_args { + char *from; + char *to; +}; + +/* ARGSUSED */ +rename(p, uap, retval) + struct proc *p; + register struct rename_args *uap; + int *retval; +{ + register struct vnode *tvp, *fvp, *tdvp; + struct nameidata fromnd, tond; + int error; + + fromnd.ni_nameiop = DELETE | WANTPARENT | SAVESTART; + fromnd.ni_segflg = UIO_USERSPACE; + fromnd.ni_dirp = uap->from; + if (error = namei(&fromnd, p)) + return (error); + fvp = fromnd.ni_vp; + tond.ni_nameiop = RENAME | LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; + tond.ni_segflg = UIO_USERSPACE; + tond.ni_dirp = uap->to; + if (error = namei(&tond, p)) { + VOP_ABORTOP(&fromnd); + vrele(fromnd.ni_dvp); + vrele(fvp); + goto out1; + } + tdvp = tond.ni_dvp; + tvp = tond.ni_vp; + if (tvp != NULL) { + if (fvp->v_type == VDIR && tvp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { + error = EISDIR; + goto out; + } + if (fvp->v_mount != tvp->v_mount) { + error = EXDEV; + goto out; + } + } + if (fvp->v_mount != tdvp->v_mount) { + error = EXDEV; + goto out; + } + if (fvp == tdvp) + error = EINVAL; + /* + * If source is the same as the destination (that is the + * same inode number with the same name in the same directory), + * then there is nothing to do. + */ + if (fvp == tvp && fromnd.ni_dvp == tdvp && + fromnd.ni_namelen == tond.ni_namelen && + !bcmp(fromnd.ni_ptr, tond.ni_ptr, fromnd.ni_namelen)) + error = -1; +out: + if (!error) { + error = VOP_RENAME(&fromnd, &tond, p); + } else { + VOP_ABORTOP(&tond); + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + VOP_ABORTOP(&fromnd); + vrele(fromnd.ni_dvp); + vrele(fvp); + } + vrele(tond.ni_startdir); + FREE(tond.ni_pnbuf, M_NAMEI); +out1: + if (fromnd.ni_startdir) + vrele(fromnd.ni_startdir); + FREE(fromnd.ni_pnbuf, M_NAMEI); + if (error == -1) + return (0); + return (error); +} + +/* + * Mkdir system call. + */ + +struct mkdir_args { + char *name; + int dmode; +}; + +/* ARGSUSED */ +mkdir(p, uap, retval) + struct proc *p; + register struct mkdir_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = CREATE | LOCKPARENT; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->name; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp != NULL) { + VOP_ABORTOP(ndp); + if (ndp->ni_dvp == vp) + vrele(ndp->ni_dvp); + else + vput(ndp->ni_dvp); + vrele(vp); + return (EEXIST); + } + VATTR_NULL(&vattr); + vattr.va_type = VDIR; + vattr.va_mode = (uap->dmode & 0777) &~ p->p_fd->fd_cmask; + error = VOP_MKDIR(ndp, &vattr, p); + if (!error) + vput(ndp->ni_vp); + return (error); +} + +/* + * Rmdir system call. + */ + +struct rmdir_args { + char *name; +}; + +/* ARGSUSED */ +rmdir(p, uap, retval) + struct proc *p; + struct rmdir_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = DELETE | LOCKPARENT | LOCKLEAF; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->name; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + /* + * No rmdir "." please. + */ + if (ndp->ni_dvp == vp) { + error = EINVAL; + goto out; + } + /* + * The root of a mounted filesystem cannot be deleted. + */ + if (vp->v_flag & VROOT) + error = EBUSY; +out: + if (!error) { + error = VOP_RMDIR(ndp, p); + } else { + VOP_ABORTOP(ndp); + if (ndp->ni_dvp == vp) + vrele(ndp->ni_dvp); + else + vput(ndp->ni_dvp); + vput(vp); + } + return (error); +} + +/* + * Read a block of directory entries in a file system independent format. + */ + +struct getdirentries_args { + int fd; + char *buf; + unsigned count; + long *basep; +}; + +getdirentries(p, uap, retval) + struct proc *p; + register struct getdirentries_args *uap; + int *retval; +{ + register struct vnode *vp; + struct file *fp; + struct uio auio; + struct iovec aiov; + off_t off; + int error, eofflag; + + if (error = getvnode(p->p_fd, uap->fd, &fp)) + return (error); + if ((fp->f_flag & FREAD) == 0) + return (EBADF); + vp = (struct vnode *)fp->f_data; + if (vp->v_type != VDIR) + return (EINVAL); + aiov.iov_base = uap->buf; + aiov.iov_len = uap->count; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + auio.uio_resid = uap->count; + VOP_LOCK(vp); + auio.uio_offset = off = fp->f_offset; + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag); + fp->f_offset = auio.uio_offset; + VOP_UNLOCK(vp); + if (error) + return (error); + error = copyout((caddr_t)&off, (caddr_t)uap->basep, sizeof(long)); + *retval = uap->count - auio.uio_resid; + return (error); +} + +/* + * Set the mode mask for creation of filesystem nodes. + */ + +struct umask_args { + int mask; +}; + +mode_t +umask(p, uap, retval) + struct proc *p; + struct umask_args *uap; + int *retval; +{ + register struct filedesc *fdp = p->p_fd; + + *retval = fdp->fd_cmask; + fdp->fd_cmask = uap->mask & 07777; + return (0); +} + +/* + * Void all references to file by ripping underlying filesystem + * away from vnode. + */ + +struct revoke_args { + char *fname; +}; + +/* ARGSUSED */ +revoke(p, uap, retval) + struct proc *p; + register struct revoke_args *uap; + int *retval; +{ + register struct nameidata *ndp; + register struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + ndp = &nd; + ndp->ni_nameiop = LOOKUP | FOLLOW; + ndp->ni_segflg = UIO_USERSPACE; + ndp->ni_dirp = uap->fname; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + if (vp->v_type != VCHR && vp->v_type != VBLK) { + error = EINVAL; + goto out; + } + if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) + goto out; + if (p->p_ucred->cr_uid != vattr.va_uid && + (error = suser(p->p_ucred, &p->p_acflag))) + goto out; + if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) + vgoneall(vp); +out: + vrele(vp); + return (error); +} + +/* + * Convert a user file descriptor to a kernel file entry. + */ +getvnode(fdp, fdes, fpp) + struct filedesc *fdp; + struct file **fpp; + int fdes; +{ + struct file *fp; + + if ((unsigned)fdes >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fdes]) == NULL) + return (EBADF); + if (fp->f_type != DTYPE_VNODE) + return (EINVAL); + *fpp = fp; + return (0); +} diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c new file mode 100644 index 000000000000..ed2dc6214a45 --- /dev/null +++ b/sys/kern/vfs_vnops.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 1982, 1986, 1989 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)vfs_vnops.c 7.33 (Berkeley) 6/27/91 + * $Id: vfs_vnops.c,v 1.2 1993/10/16 15:25:29 rgrimes Exp $ + */ + +#include "param.h" +#include "systm.h" +#include "kernel.h" +#include "file.h" +#include "stat.h" +#include "buf.h" +#include "proc.h" +#include "mount.h" +#include "namei.h" +#include "vnode.h" +#include "ioctl.h" +#include "tty.h" + +struct fileops vnops = + { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile }; + +/* + * Common code for vnode open operations. + * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. + */ +vn_open(ndp, p, fmode, cmode) + register struct nameidata *ndp; + struct proc *p; + int fmode, cmode; +{ + register struct vnode *vp; + register struct ucred *cred = p->p_ucred; + struct vattr vat; + struct vattr *vap = &vat; + int error; + + if (fmode & O_CREAT) { + ndp->ni_nameiop = CREATE | LOCKPARENT | LOCKLEAF; + if ((fmode & O_EXCL) == 0) + ndp->ni_nameiop |= FOLLOW; + if (error = namei(ndp, p)) + return (error); + if (ndp->ni_vp == NULL) { + VATTR_NULL(vap); + vap->va_type = VREG; + vap->va_mode = cmode; + if (error = VOP_CREATE(ndp, vap, p)) + return (error); + fmode &= ~O_TRUNC; + vp = ndp->ni_vp; + } else { + VOP_ABORTOP(ndp); + if (ndp->ni_dvp == ndp->ni_vp) + vrele(ndp->ni_dvp); + else + vput(ndp->ni_dvp); + ndp->ni_dvp = NULL; + vp = ndp->ni_vp; + if (fmode & O_EXCL) { + error = EEXIST; + goto bad; + } + fmode &= ~O_CREAT; + } + } else { + ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; + if (error = namei(ndp, p)) + return (error); + vp = ndp->ni_vp; + } + if (vp->v_type == VSOCK) { + error = EOPNOTSUPP; + goto bad; + } + if ((fmode & O_CREAT) == 0) { + if (fmode & FREAD) { + if (error = VOP_ACCESS(vp, VREAD, cred, p)) + goto bad; + } + if (fmode & (FWRITE | O_TRUNC)) { + if (vp->v_type == VDIR) { + error = EISDIR; + goto bad; + } + if ((error = vn_writechk(vp)) || + (error = VOP_ACCESS(vp, VWRITE, cred, p))) + goto bad; + } + } + if (fmode & O_TRUNC) { + VATTR_NULL(vap); + vap->va_size = 0; + if (error = VOP_SETATTR(vp, vap, cred, p)) + goto bad; + } + if (error = VOP_OPEN(vp, fmode, cred, p)) + goto bad; + if (fmode & FWRITE) + vp->v_writecount++; + return (0); +bad: + vput(vp); + return (error); +} + +/* + * Check for write permissions on the specified vnode. + * The read-only status of the file system is checked. + * Also, prototype text segments cannot be written. + */ +vn_writechk(vp) + register struct vnode *vp; +{ + + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket or a block or character + * device resident on the file system. + */ + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + switch (vp->v_type) { + case VREG: case VDIR: case VLNK: + return (EROFS); + } + } + /* + * If there's shared text associated with + * the vnode, try to free it up once. If + * we fail, we can't allow writing. + */ + if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp)) + return (ETXTBSY); + return (0); +} + +/* + * Vnode close call + */ +vn_close(vp, flags, cred, p) + register struct vnode *vp; + int flags; + struct ucred *cred; + struct proc *p; +{ + int error; + + if (flags & FWRITE) + vp->v_writecount--; + error = VOP_CLOSE(vp, flags, cred, p); + vrele(vp); + return (error); +} + +/* + * Package up an I/O request on a vnode into a uio and do it. + * [internal interface to file i/o for kernel only] + */ +vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) + enum uio_rw rw; + struct vnode *vp; + caddr_t base; + int len; + off_t offset; + enum uio_seg segflg; + int ioflg; + struct ucred *cred; + int *aresid; + struct proc *p; +{ + struct uio auio; + struct iovec aiov; + int error; + + if ((ioflg & IO_NODELOCKED) == 0) + VOP_LOCK(vp); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + aiov.iov_base = base; + aiov.iov_len = len; + auio.uio_resid = len; + auio.uio_offset = offset; + auio.uio_segflg = segflg; + auio.uio_rw = rw; + auio.uio_procp = p; + if (rw == UIO_READ) + error = VOP_READ(vp, &auio, ioflg, cred); + else + error = VOP_WRITE(vp, &auio, ioflg, cred); + if (aresid) + *aresid = auio.uio_resid; + else + if (auio.uio_resid && error == 0) + error = EIO; + if ((ioflg & IO_NODELOCKED) == 0) + VOP_UNLOCK(vp); + return (error); +} + +/* + * File table vnode read routine. + */ +vn_read(fp, uio, cred) + struct file *fp; + struct uio *uio; + struct ucred *cred; +{ + register struct vnode *vp = (struct vnode *)fp->f_data; + int count, error; + + VOP_LOCK(vp); + uio->uio_offset = fp->f_offset; + count = uio->uio_resid; + error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, + cred); + fp->f_offset += count - uio->uio_resid; + VOP_UNLOCK(vp); + return (error); +} + +/* + * File table vnode write routine. + */ +vn_write(fp, uio, cred) + struct file *fp; + struct uio *uio; + struct ucred *cred; +{ + register struct vnode *vp = (struct vnode *)fp->f_data; + int count, error, ioflag = 0; + + if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) + ioflag |= IO_APPEND; + if (fp->f_flag & FNONBLOCK) + ioflag |= IO_NDELAY; + VOP_LOCK(vp); + uio->uio_offset = fp->f_offset; + count = uio->uio_resid; + error = VOP_WRITE(vp, uio, ioflag, cred); + if (ioflag & IO_APPEND) + fp->f_offset = uio->uio_offset; + else + fp->f_offset += count - uio->uio_resid; + VOP_UNLOCK(vp); + return (error); +} + +/* + * File table vnode stat routine. + */ +vn_stat(vp, sb, p) + struct vnode *vp; + register struct stat *sb; + struct proc *p; +{ + struct vattr vattr; + register struct vattr *vap; + int error; + u_short mode; + + vap = &vattr; + error = VOP_GETATTR(vp, vap, p->p_ucred, p); + if (error) + return (error); + /* + * Copy from vattr table + */ + sb->st_dev = vap->va_fsid; + sb->st_ino = vap->va_fileid; + mode = vap->va_mode; + switch (vp->v_type) { + case VREG: + mode |= S_IFREG; + break; + case VDIR: + mode |= S_IFDIR; + break; + case VBLK: + mode |= S_IFBLK; + break; + case VCHR: + mode |= S_IFCHR; + break; + case VLNK: + mode |= S_IFLNK; + break; + case VSOCK: + mode |= S_IFSOCK; + break; + case VFIFO: + mode |= S_IFIFO; + break; + default: + return (EBADF); + }; + sb->st_mode = mode; + sb->st_nlink = vap->va_nlink; + sb->st_uid = vap->va_uid; + sb->st_gid = vap->va_gid; + sb->st_rdev = vap->va_rdev; + sb->st_size = vap->va_size; + sb->st_atime = vap->va_atime.tv_sec; + sb->st_spare1 = 0; + sb->st_mtime = vap->va_mtime.tv_sec; + sb->st_spare2 = 0; + sb->st_ctime = vap->va_ctime.tv_sec; + sb->st_spare3 = 0; + sb->st_blksize = vap->va_blocksize; + sb->st_flags = vap->va_flags; + sb->st_gen = vap->va_gen; + sb->st_blocks = vap->va_bytes / S_BLKSIZE; + return (0); +} + +/* + * File table vnode ioctl routine. + */ +vn_ioctl(fp, com, data, p) + struct file *fp; + int com; + caddr_t data; + struct proc *p; +{ + register struct vnode *vp = ((struct vnode *)fp->f_data); + struct vattr vattr; + int error; + + switch (vp->v_type) { + + case VREG: + case VDIR: + if (com == FIONREAD) { + if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) + return (error); + *(off_t *)data = vattr.va_size - fp->f_offset; + return (0); + } + if (com == FIONBIO || com == FIOASYNC) /* XXX */ + return (0); /* XXX */ + /* fall into ... */ + + default: + return (ENOTTY); + + case VFIFO: + case VCHR: + case VBLK: + error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); + if (error == 0 && com == TIOCSCTTY) { + p->p_session->s_ttyvp = vp; + VREF(vp); + } + return (error); + } +} + +/* + * File table vnode select routine. + */ +vn_select(fp, which, p) + struct file *fp; + int which; + struct proc *p; +{ + + return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag, + fp->f_cred, p)); +} + +/* + * File table vnode close routine. + */ +vn_closefile(fp, p) + struct file *fp; + struct proc *p; +{ + + return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, + fp->f_cred, p)); +} + +/* + * vn_fhtovp() - convert a fh to a vnode ptr (optionally locked) + * - look up fsid in mount list (if not found ret error) + * - get vp by calling VFS_FHTOVP() macro + * - if lockflag lock it with VOP_LOCK() + */ +vn_fhtovp(fhp, lockflag, vpp) + fhandle_t *fhp; + int lockflag; + struct vnode **vpp; +{ + register struct mount *mp; + + if ((mp = getvfs(&fhp->fh_fsid)) == NULL) + return (ESTALE); + if (VFS_FHTOVP(mp, &fhp->fh_fid, vpp)) + return (ESTALE); + if (!lockflag) + VOP_UNLOCK(*vpp); + return (0); +} |
