diff options
-rw-r--r-- | sys/conf/param.c | 9 | ||||
-rw-r--r-- | sys/kern/subr_param.c | 9 | ||||
-rw-r--r-- | sys/kern/uipc_domain.c | 33 | ||||
-rw-r--r-- | sys/kern/uipc_proto.c | 5 | ||||
-rw-r--r-- | sys/kern/uipc_sockbuf.c | 59 | ||||
-rw-r--r-- | sys/kern/uipc_socket.c | 54 | ||||
-rw-r--r-- | sys/kern/uipc_socket2.c | 59 | ||||
-rw-r--r-- | sys/kern/uipc_usrreq.c | 161 | ||||
-rw-r--r-- | sys/netinet/in_pcb.c | 3 | ||||
-rw-r--r-- | sys/netinet/in_pcb.h | 25 | ||||
-rw-r--r-- | sys/netinet/ip_divert.c | 6 | ||||
-rw-r--r-- | sys/netinet/raw_ip.c | 90 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 117 | ||||
-rw-r--r-- | sys/netinet/tcp_timewait.c | 117 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 21 | ||||
-rw-r--r-- | sys/netinet/udp_usrreq.c | 90 | ||||
-rw-r--r-- | sys/netinet/udp_var.h | 6 | ||||
-rw-r--r-- | sys/sys/socketvar.h | 46 | ||||
-rw-r--r-- | sys/sys/un.h | 7 | ||||
-rw-r--r-- | sys/sys/unpcb.h | 43 |
20 files changed, 827 insertions, 133 deletions
diff --git a/sys/conf/param.c b/sys/conf/param.c index b38edcc27541..b78a29273e40 100644 --- a/sys/conf/param.c +++ b/sys/conf/param.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)param.c 8.3 (Berkeley) 8/20/94 - * $Id: param.c,v 1.25 1997/06/14 11:38:46 bde Exp $ + * $Id: param.c,v 1.26 1998/02/27 19:58:29 guido Exp $ */ #include "opt_sysvipc.h" @@ -85,6 +85,13 @@ int ncallout = 16 + NPROC + MAXFILES; /* maximum # of timer events */ #endif int nmbclusters = NMBCLUSTERS; +#if MAXFILES > NMBCLUSTERS +#define MAXSOCKETS MAXFILES +#else +#define MAXSOCKETS NMBCLUSTERS +#endif +int maxsockets = MAXSOCKETS; + /* allocate 1/4th amount of virtual address space for mbufs XXX */ int nmbufs = NMBCLUSTERS * 4; diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c index b38edcc27541..b78a29273e40 100644 --- a/sys/kern/subr_param.c +++ b/sys/kern/subr_param.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)param.c 8.3 (Berkeley) 8/20/94 - * $Id: param.c,v 1.25 1997/06/14 11:38:46 bde Exp $ + * $Id: param.c,v 1.26 1998/02/27 19:58:29 guido Exp $ */ #include "opt_sysvipc.h" @@ -85,6 +85,13 @@ int ncallout = 16 + NPROC + MAXFILES; /* maximum # of timer events */ #endif int nmbclusters = NMBCLUSTERS; +#if MAXFILES > NMBCLUSTERS +#define MAXSOCKETS MAXFILES +#else +#define MAXSOCKETS NMBCLUSTERS +#endif +int maxsockets = MAXSOCKETS; + /* allocate 1/4th amount of virtual address space for mbufs XXX */ int nmbufs = NMBCLUSTERS * 4; diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c index 69d6bf862333..22078b1c3f3d 100644 --- a/sys/kern/uipc_domain.c +++ b/sys/kern/uipc_domain.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)uipc_domain.c 8.2 (Berkeley) 10/18/93 - * $Id: uipc_domain.c,v 1.17 1997/04/27 20:00:42 wollman Exp $ + * $Id: uipc_domain.c,v 1.18 1997/09/16 11:43:36 bde Exp $ */ #include <sys/param.h> @@ -40,7 +40,9 @@ #include <sys/domain.h> #include <sys/mbuf.h> #include <sys/kernel.h> +#include <sys/socketvar.h> #include <sys/systm.h> +#include <vm/vm_zone.h> /* * System initialization @@ -85,6 +87,21 @@ domaininit(dummy) register struct protosw *pr; /* + * Before we do any setup, make sure to initialize the + * zone allocator we get struct sockets from. The obvious + * maximum number of sockets is `maxfiles', but it is possible + * to have a socket without an open file (e.g., a connection waiting + * to be accept(2)ed). Rather than think up and define a + * better value, we just use nmbclusters, since that's what people + * are told to increase first when the network runs out of memory. + * Perhaps we should have two pools, one of unlimited size + * for use during socreate(), and one ZONE_INTERRUPT pool for + * use in sonewconn(). + */ + socket_zone = zinit("socket", sizeof(struct socket), maxsockets, + ZONE_INTERRUPT, 0); + + /* * NB - local domain is always present. */ ADDDOMAIN(local); @@ -94,26 +111,14 @@ domaininit(dummy) domains = *dpp; } -/* - not in our sources -#ifdef ISDN - ADDDOMAIN(isdn); -#endif -*/ - for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_init) (*dp->dom_init)(); for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++){ -#ifdef PRU_OLDSTYLE - /* See comments in uipc_socket2.c. */ - if (pr->pr_usrreqs == 0 && pr->pr_ousrreq) - pr->pr_usrreqs = &pru_oldstyle; -#else if (pr->pr_usrreqs == 0) panic("domaininit: %ssw[%d] has no usrreqs!", dp->dom_name, (int)(pr - dp->dom_protosw)); -#endif if (pr->pr_init) (*pr->pr_init)(); } @@ -151,7 +156,7 @@ kludge_splx(udata) { int *savesplp = udata; - splx( *savesplp); + splx(*savesplp); } diff --git a/sys/kern/uipc_proto.c b/sys/kern/uipc_proto.c index 6e2ef5de4a50..ec2b9f04ad02 100644 --- a/sys/kern/uipc_proto.c +++ b/sys/kern/uipc_proto.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)uipc_proto.c 8.1 (Berkeley) 6/10/93 - * $Id: uipc_proto.c,v 1.13 1997/08/02 14:31:42 bde Exp $ + * $Id: uipc_proto.c,v 1.14 1998/02/20 13:11:48 bde Exp $ */ #include <sys/param.h> @@ -41,6 +41,7 @@ #include <sys/socket.h> #include <sys/sysctl.h> #include <sys/un.h> +#include <sys/unpcb.h> #include <net/raw_cb.h> @@ -70,7 +71,7 @@ static struct protosw localsw[] = { }; struct domain localdomain = - { AF_LOCAL, "local", 0, unp_externalize, unp_dispose, + { AF_LOCAL, "local", unp_init, unp_externalize, unp_dispose, localsw, &localsw[sizeof(localsw)/sizeof(localsw[0])] }; SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain"); diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c index 40c8f19a6122..8ad352269662 100644 --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -31,11 +31,12 @@ * SUCH DAMAGE. * * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 - * $Id: uipc_socket2.c,v 1.32 1998/04/04 13:25:40 phk Exp $ + * $Id: uipc_socket2.c,v 1.33 1998/04/24 04:15:18 dg Exp $ */ #include <sys/param.h> #include <sys/systm.h> +#include <sys/domain.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/malloc.h> @@ -202,10 +203,9 @@ sonewconn(head, connstatus) if (head->so_qlen > 3 * head->so_qlimit / 2) return ((struct socket *)0); - MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT); + so = soalloc(0); if (so == NULL) return ((struct socket *)0); - bzero((caddr_t)so, sizeof(*so)); so->so_head = head; so->so_type = head->so_type; so->so_options = head->so_options &~ SO_ACCEPTCONN; @@ -218,7 +218,7 @@ sonewconn(head, connstatus) (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { - (void) free((caddr_t)so, M_SOCKET); + sodealloc(so); return ((struct socket *)0); } @@ -890,6 +890,56 @@ dup_sockaddr(sa, canwait) } /* + * Create an external-format (``xsocket'') structure using the information + * in the kernel-format socket structure pointed to by so. This is done + * to reduce the spew of irrelevant information over this interface, + * to isolate user code from changes in the kernel structure, and + * potentially to provide information-hiding if we decide that + * some of this information should be hidden from users. + */ +void +sotoxsocket(struct socket *so, struct xsocket *xso) +{ + xso->xso_len = sizeof *xso; + xso->xso_so = so; + xso->so_type = so->so_type; + xso->so_options = so->so_options; + xso->so_linger = so->so_linger; + xso->so_state = so->so_state; + xso->so_pcb = so->so_pcb; + xso->xso_protocol = so->so_proto->pr_protocol; + xso->xso_family = so->so_proto->pr_domain->dom_family; + xso->so_qlen = so->so_qlen; + xso->so_incqlen = so->so_incqlen; + xso->so_qlimit = so->so_qlimit; + xso->so_timeo = so->so_timeo; + xso->so_error = so->so_error; + xso->so_pgid = so->so_pgid; + xso->so_oobmark = so->so_oobmark; + sbtoxsockbuf(&so->so_snd, &xso->so_snd); + sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); + xso->so_uid = so->so_uid; +} + +/* + * This does the same for sockbufs. Note that the xsockbuf structure, + * since it is always embedded in a socket, does not include a self + * pointer nor a length. We make this entry point public in case + * some other mechanism needs it. + */ +void +sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) +{ + xsb->sb_cc = sb->sb_cc; + xsb->sb_hiwat = sb->sb_hiwat; + xsb->sb_mbcnt = sb->sb_mbcnt; + xsb->sb_mbmax = sb->sb_mbmax; + xsb->sb_lowat = sb->sb_lowat; + xsb->sb_flags = sb->sb_flags; + xsb->sb_timeo = sb->sb_timeo; +} + +/* * Here is the definition of some of the basic objects in the kern.ipc * branch of the MIB. */ @@ -900,6 +950,7 @@ static int dummy; SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, &sb_efficiency, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, ""); diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 58e309ab78ad..2ca79fc50098 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 - * $Id: uipc_socket.c,v 1.38 1998/03/01 19:39:17 guido Exp $ + * $Id: uipc_socket.c,v 1.39 1998/03/28 10:33:08 bde Exp $ */ #include <sys/param.h> @@ -50,10 +50,13 @@ #include <sys/signalvar.h> #include <sys/sysctl.h> #include <sys/uio.h> +#include <vm/vm_zone.h> #include <machine/limits.h> -MALLOC_DEFINE(M_SOCKET, "socket", "socket structure"); +struct vm_zone *socket_zone; +so_gen_t so_gencnt; /* generation count for sockets */ + MALLOC_DEFINE(M_SONAME, "soname", "socket name"); MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); @@ -68,7 +71,30 @@ SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, * implement the semantics of socket operations by * switching out to the protocol specific routines. */ -/*ARGSUSED*/ + +/* + * Get a socket structure from our zone, and initialize it. + * We don't implement `waitok' yet (see comments in uipc_domain.c). + * Note that it would probably be better to allocate socket + * and PCB at the same time, but I'm not convinced that all + * the protocols can be easily modified to do this. + */ +struct socket * +soalloc(waitok) + int waitok; +{ + struct socket *so; + + so = zalloci(socket_zone); + if (so) { + /* XXX race condition for reentrant kernel */ + bzero(so, sizeof *so); + so->so_gencnt = ++so_gencnt; + so->so_zone = socket_zone; + } + return so; +} + int socreate(dom, aso, type, proto, p) int dom; @@ -89,12 +115,15 @@ socreate(dom, aso, type, proto, p) return (EPROTONOSUPPORT); if (prp->pr_type != type) return (EPROTOTYPE); - MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); - bzero((caddr_t)so, sizeof(*so)); + so = soalloc(p != 0); + if (so == 0) + return (ENOBUFS); + TAILQ_INIT(&so->so_incomp); TAILQ_INIT(&so->so_comp); so->so_type = type; - so->so_uid = p->p_ucred->cr_uid;; + if (p != 0) + so->so_uid = p->p_ucred->cr_uid; so->so_proto = prp; error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); if (error) { @@ -120,14 +149,23 @@ sobind(so, nam, p) return (error); } +void +sodealloc(so) + struct socket *so; +{ + so->so_gencnt = ++so_gencnt; + zfreei(so->so_zone, so); +} + int solisten(so, backlog, p) register struct socket *so; int backlog; struct proc *p; { - int s = splnet(), error; + int s, error; + s = splnet(); error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); if (error) { splx(s); @@ -165,7 +203,7 @@ sofree(so) } sbrelease(&so->so_snd); sorflush(so); - FREE(so, M_SOCKET); + sodealloc(so); } /* diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 40c8f19a6122..8ad352269662 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -31,11 +31,12 @@ * SUCH DAMAGE. * * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 - * $Id: uipc_socket2.c,v 1.32 1998/04/04 13:25:40 phk Exp $ + * $Id: uipc_socket2.c,v 1.33 1998/04/24 04:15:18 dg Exp $ */ #include <sys/param.h> #include <sys/systm.h> +#include <sys/domain.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/malloc.h> @@ -202,10 +203,9 @@ sonewconn(head, connstatus) if (head->so_qlen > 3 * head->so_qlimit / 2) return ((struct socket *)0); - MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT); + so = soalloc(0); if (so == NULL) return ((struct socket *)0); - bzero((caddr_t)so, sizeof(*so)); so->so_head = head; so->so_type = head->so_type; so->so_options = head->so_options &~ SO_ACCEPTCONN; @@ -218,7 +218,7 @@ sonewconn(head, connstatus) (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { - (void) free((caddr_t)so, M_SOCKET); + sodealloc(so); return ((struct socket *)0); } @@ -890,6 +890,56 @@ dup_sockaddr(sa, canwait) } /* + * Create an external-format (``xsocket'') structure using the information + * in the kernel-format socket structure pointed to by so. This is done + * to reduce the spew of irrelevant information over this interface, + * to isolate user code from changes in the kernel structure, and + * potentially to provide information-hiding if we decide that + * some of this information should be hidden from users. + */ +void +sotoxsocket(struct socket *so, struct xsocket *xso) +{ + xso->xso_len = sizeof *xso; + xso->xso_so = so; + xso->so_type = so->so_type; + xso->so_options = so->so_options; + xso->so_linger = so->so_linger; + xso->so_state = so->so_state; + xso->so_pcb = so->so_pcb; + xso->xso_protocol = so->so_proto->pr_protocol; + xso->xso_family = so->so_proto->pr_domain->dom_family; + xso->so_qlen = so->so_qlen; + xso->so_incqlen = so->so_incqlen; + xso->so_qlimit = so->so_qlimit; + xso->so_timeo = so->so_timeo; + xso->so_error = so->so_error; + xso->so_pgid = so->so_pgid; + xso->so_oobmark = so->so_oobmark; + sbtoxsockbuf(&so->so_snd, &xso->so_snd); + sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); + xso->so_uid = so->so_uid; +} + +/* + * This does the same for sockbufs. Note that the xsockbuf structure, + * since it is always embedded in a socket, does not include a self + * pointer nor a length. We make this entry point public in case + * some other mechanism needs it. + */ +void +sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) +{ + xsb->sb_cc = sb->sb_cc; + xsb->sb_hiwat = sb->sb_hiwat; + xsb->sb_mbcnt = sb->sb_mbcnt; + xsb->sb_mbmax = sb->sb_mbmax; + xsb->sb_lowat = sb->sb_lowat; + xsb->sb_flags = sb->sb_flags; + xsb->sb_timeo = sb->sb_timeo; +} + +/* * Here is the definition of some of the basic objects in the kern.ipc * branch of the MIB. */ @@ -900,6 +950,7 @@ static int dummy; SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, &sb_efficiency, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, ""); diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index b225c7c5e91f..3921513b2dd7 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 - * $Id: uipc_usrreq.c,v 1.33 1998/04/17 22:36:50 des Exp $ + * $Id: uipc_usrreq.c,v 1.34 1998/05/07 04:58:21 msmith Exp $ */ #include <sys/param.h> @@ -42,6 +42,7 @@ #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ #include <sys/file.h> #include <sys/filedesc.h> +#include <sys/lock.h> #include <sys/mbuf.h> #include <sys/namei.h> #include <sys/proc.h> @@ -51,8 +52,17 @@ #include <sys/stat.h> #include <sys/sysctl.h> #include <sys/un.h> +#include <sys/unpcb.h> #include <sys/vnode.h> +#include <vm/vm_zone.h> + +struct vm_zone *unp_zone; +static unp_gen_t unp_gencnt; +static u_int unp_count; + +static struct unp_head unp_shead, unp_dhead; + /* * Unix communications domain. * @@ -60,6 +70,7 @@ * SEQPACKET, RDM * rethink name space problems * need a proper out-of-band + * lock pushdown */ static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; static ino_t unp_ino; /* prototype for fake inode numbers */ @@ -468,12 +479,17 @@ unp_attach(so) if (error) return (error); } - MALLOC(unp, struct unpcb *, sizeof *unp, M_PCB, M_NOWAIT); + unp = zalloc(unp_zone); if (unp == NULL) return (ENOBUFS); bzero(unp, sizeof *unp); - so->so_pcb = (caddr_t)unp; + unp->unp_gencnt = ++unp_gencnt; + unp_count++; + LIST_INIT(&unp->unp_refs); unp->unp_socket = so; + LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead + : &unp_shead, unp, unp_link); + so->so_pcb = (caddr_t)unp; return (0); } @@ -481,6 +497,9 @@ static void unp_detach(unp) register struct unpcb *unp; { + LIST_REMOVE(unp, unp_link); + unp->unp_gencnt = ++unp_gencnt; + --unp_count; if (unp->unp_vnode) { unp->unp_vnode->v_socket = 0; vrele(unp->unp_vnode); @@ -488,8 +507,8 @@ unp_detach(unp) } if (unp->unp_conn) unp_disconnect(unp); - while (unp->unp_refs) - unp_drop(unp->unp_refs, ECONNRESET); + while (unp->unp_refs.lh_first) + unp_drop(unp->unp_refs.lh_first, ECONNRESET); soisdisconnected(unp->unp_socket); unp->unp_socket->so_pcb = 0; if (unp_rights) { @@ -505,7 +524,7 @@ unp_detach(unp) } if (unp->unp_addr) FREE(unp->unp_addr, M_SONAME); - FREE(unp, M_PCB); + zfree(unp_zone, unp); } static int @@ -637,8 +656,7 @@ unp_connect2(so, so2) switch (so->so_type) { case SOCK_DGRAM: - unp->unp_nextref = unp2->unp_refs; - unp2->unp_refs = unp; + LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); soisconnected(so); break; @@ -666,20 +684,7 @@ unp_disconnect(unp) switch (unp->unp_socket->so_type) { case SOCK_DGRAM: - if (unp2->unp_refs == unp) - unp2->unp_refs = unp->unp_nextref; - else { - unp2 = unp2->unp_refs; - for (;;) { - if (unp2 == 0) - panic("unp_disconnect"); - if (unp2->unp_nextref == unp) - break; - unp2 = unp2->unp_nextref; - } - unp2->unp_nextref = unp->unp_nextref; - } - unp->unp_nextref = 0; + LIST_REMOVE(unp, unp_reflink); unp->unp_socket->so_state &= ~SS_ISCONNECTED; break; @@ -701,6 +706,103 @@ unp_abort(unp) } #endif +static int +unp_pcblist SYSCTL_HANDLER_ARGS +{ + int error, i, n, s; + struct unpcb *unp, **unp_list; + unp_gen_t gencnt; + struct xunpgen xug; + struct unp_head *head; + + head = ((long)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); + + /* + * The process of preparing the PCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + if (req->oldptr == 0) { + n = unp_count; + req->oldidx = 2 * (sizeof xug) + + (n + n/8) * sizeof(struct xunpcb); + return 0; + } + + if (req->newptr != 0) + return EPERM; + + /* + * OK, now we're committed to doing something. + */ + gencnt = unp_gencnt; + n = unp_count; + + xug.xug_len = sizeof xug; + xug.xug_count = n; + xug.xug_gen = gencnt; + xug.xug_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xug, sizeof xug); + if (error) + return error; + + unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); + if (unp_list == 0) + return ENOMEM; + + for (unp = head->lh_first, i = 0; unp && i < n; + unp = unp->unp_link.le_next) { + if (unp->unp_gencnt <= gencnt) + unp_list[i++] = unp; + } + n = i; /* in case we lost some during malloc */ + + error = 0; + for (i = 0; i < n; i++) { + unp = unp_list[i]; + if (unp->unp_gencnt <= gencnt) { + struct xunpcb xu; + xu.xu_len = sizeof xu; + xu.xu_unpp = unp; + /* + * XXX - need more locking here to protect against + * connect/disconnect races for SMP. + */ + if (unp->unp_addr) + bcopy(unp->unp_addr, &xu.xu_addr, + unp->unp_addr->sun_len); + if (unp->unp_conn && unp->unp_conn->unp_addr) + bcopy(unp->unp_conn->unp_addr, + &xu.xu_caddr, + unp->unp_conn->unp_addr->sun_len); + bcopy(unp, &xu.xu_unp, sizeof *unp); + sotoxsocket(unp->unp_socket, &xu.xu_socket); + error = SYSCTL_OUT(req, &xu, sizeof xu); + } + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + xug.xug_gen = unp_gencnt; + xug.xug_sogen = so_gencnt; + xug.xug_count = unp_count; + error = SYSCTL_OUT(req, &xug, sizeof xug); + } + free(unp_list, M_TEMP); + return error; +} + +SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, + (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", + "List of active local datagram sockets"); +SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, + (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", + "List of active local stream sockets"); + static void unp_shutdown(unp) struct unpcb *unp; @@ -722,10 +824,13 @@ unp_drop(unp, errno) so->so_error = errno; unp_disconnect(unp); if (so->so_head) { + LIST_REMOVE(unp, unp_link); + unp->unp_gencnt = ++unp_gencnt; + unp_count--; so->so_pcb = (caddr_t) 0; if (unp->unp_addr) FREE(unp->unp_addr, M_SONAME); - FREE(unp, M_PCB); + zfree(unp_zone, unp); sofree(so); } } @@ -779,6 +884,16 @@ unp_externalize(rights) return (0); } +void +unp_init(void) +{ + unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0); + if (unp_zone == 0) + panic("unp_init"); + LIST_INIT(&unp_dhead); + LIST_INIT(&unp_shead); +} + #ifndef MIN #define MIN(a,b) (((a)<(b))?(a):(b)) #endif diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 15616a7913aa..424ff9ff2787 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 - * $Id: in_pcb.c,v 1.42 1998/03/28 10:33:13 bde Exp $ + * $Id: in_pcb.c,v 1.43 1998/04/19 17:22:30 phk Exp $ */ #include <sys/param.h> @@ -875,6 +875,7 @@ static void in_pcbremlists(inp) struct inpcb *inp; { + inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; if (inp->inp_lport) { struct inpcbport *phd = inp->inp_phd; diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 467a5b2dd610..aa2d0bec301f 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 - * $Id: in_pcb.h,v 1.24 1998/03/24 18:06:11 wollman Exp $ + * $Id: in_pcb.h,v 1.25 1998/03/28 10:18:22 bde Exp $ */ #ifndef _NETINET_IN_PCB_H_ @@ -48,6 +48,7 @@ */ LIST_HEAD(inpcbhead, inpcb); LIST_HEAD(inpcbporthead, inpcbport); +typedef u_quad_t inp_gen_t; /* * NB: the zone allocator is type-stable EXCEPT FOR THE FIRST TWO LONGS @@ -75,7 +76,7 @@ struct inpcb { struct ip_moptions *inp_moptions; /* IP multicast options */ LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */ struct inpcbport *inp_phd; /* head of this list */ - u_quad_t inp_gencnt; /* generation count of this instance */ + inp_gen_t inp_gencnt; /* generation count of this instance */ }; /* * The range of the generation count, as used in this implementation, @@ -84,6 +85,26 @@ struct inpcb { * unlikely that we simply don't concern ourselves with that possibility. */ +/* + * Interface exported to userland by various protocols which use + * inpcbs. Hack alert -- only define if struct xsocket is in scope. + */ +#ifdef _SYS_SOCKETVAR_H_ +struct xinpcb { + size_t xi_len; /* length of this structure */ + struct inpcb xi_inp; + struct xsocket xi_socket; + u_quad_t xi_alignment_hack; +}; + +struct xinpgen { + size_t xig_len; /* length of this structure */ + u_int xig_count; /* number of PCBs at this time */ + inp_gen_t xig_gen; /* generation count at this time */ + so_gen_t xig_sogen; /* socket generation count at this time */ +}; +#endif /* _SYS_SOCKETVAR_H_ */ + struct inpcbport { LIST_ENTRY(inpcbport) phd_hash; struct inpcbhead phd_pcblist; diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index d37e2bf6547d..ad214c786db5 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -30,13 +30,13 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: ip_divert.c,v 1.21 1998/03/24 18:06:15 wollman Exp $ + * $Id: ip_divert.c,v 1.22 1998/03/28 10:18:23 bde Exp $ */ #include "opt_inet.h" #ifndef INET -#error IPDIVERT requires INET. +#error "IPDIVERT requires INET." #endif #include <sys/param.h> @@ -119,7 +119,7 @@ div_init(void) divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask); divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask); divcbinfo.ipi_zone = zinit("divcb", sizeof(struct inpcb), - nmbclusters / 4, ZONE_INTERRUPT, 0); + maxsockets, ZONE_INTERRUPT, 0); } /* diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index ecd90c03a8c0..35f4391f52a1 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 - * $Id: raw_ip.c,v 1.52 1998/03/24 18:06:23 wollman Exp $ + * $Id: raw_ip.c,v 1.53 1998/03/28 10:18:24 bde Exp $ */ #include <sys/param.h> @@ -97,7 +97,7 @@ rip_init() ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); ripcbinfo.ipi_zone = zinit("ripcb", sizeof(struct inpcb), - nmbclusters / 4, ZONE_INTERRUPT, 0); + maxsockets, ZONE_INTERRUPT, 0); } static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; @@ -515,6 +515,92 @@ rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, return rip_output(m, so, dst); } +static int +rip_pcblist SYSCTL_HANDLER_ARGS +{ + int error, i, n, s; + struct inpcb *inp, **inp_list; + inp_gen_t gencnt; + struct xinpgen xig; + + /* + * The process of preparing the TCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + if (req->oldptr == 0) { + n = ripcbinfo.ipi_count; + req->oldidx = 2 * (sizeof xig) + + (n + n/8) * sizeof(struct xinpcb); + return 0; + } + + if (req->newptr != 0) + return EPERM; + + /* + * OK, now we're committed to doing something. + */ + s = splnet(); + gencnt = ripcbinfo.ipi_gencnt; + n = ripcbinfo.ipi_count; + splx(s); + + xig.xig_len = sizeof xig; + xig.xig_count = n; + xig.xig_gen = gencnt; + xig.xig_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xig, sizeof xig); + if (error) + return error; + + inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); + if (inp_list == 0) + return ENOMEM; + + s = splnet(); + for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n; + inp = inp->inp_list.le_next) { + if (inp->inp_gencnt <= gencnt) + inp_list[i++] = inp; + } + splx(s); + n = i; + + error = 0; + for (i = 0; i < n; i++) { + inp = inp_list[i]; + if (inp->inp_gencnt <= gencnt) { + struct xinpcb xi; + xi.xi_len = sizeof xi; + /* XXX should avoid extra copy */ + bcopy(inp, &xi.xi_inp, sizeof *inp); + if (inp->inp_socket) + sotoxsocket(inp->inp_socket, &xi.xi_socket); + error = SYSCTL_OUT(req, &xi, sizeof xi); + } + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + s = splnet(); + xig.xig_gen = ripcbinfo.ipi_gencnt; + xig.xig_sogen = so_gencnt; + xig.xig_count = ripcbinfo.ipi_count; + splx(s); + error = SYSCTL_OUT(req, &xig, sizeof xig); + } + free(inp_list, M_TEMP); + return error; +} + +SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0, + rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); + struct pr_usrreqs rip_usrreqs = { rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect, pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 7820b636cf9b..43139cac63b4 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 - * $Id: tcp_subr.c,v 1.43 1998/03/24 18:06:28 wollman Exp $ + * $Id: tcp_subr.c,v 1.44 1998/03/28 10:18:24 bde Exp $ */ #include "opt_compat.h" @@ -85,6 +85,9 @@ static int tcp_do_rfc1644 = 1; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, &tcp_do_rfc1644 , 0, ""); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &tcbinfo.ipi_count, + 0, "Number of active PCBs"); + static void tcp_cleartaocache __P((void)); static void tcp_notify __P((struct inpcb *, int)); @@ -130,22 +133,7 @@ tcp_init() tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask); tcbinfo.porthashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.porthashmask); - /* For the moment, we just worry about putting inpcbs here. */ - /* - * Rationale for a maximum of `nmbclusters': - * 1) It's a convenient value, sized by config, based on - * parameters already known to be tweakable as needed - * for network-intensive systems. - * 2) Under the Old World Order, when pcbs were stored in - * mbufs, it was of course impossible to have more - * pcbs than mbufs. - * 3) The zone allocator doesn't allocate physical memory - * for this many pcbs; it just sizes the virtual - * address space appropriately. Thus, even for very large - * values of nmbclusters, we don't actually take up much - * memory unless required. - */ - tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), nmbclusters, + tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets, ZONE_INTERRUPT, 0); if (max_protohdr < sizeof(struct tcpiphdr)) max_protohdr = sizeof(struct tcpiphdr); @@ -421,14 +409,10 @@ tcp_close(tp) * way to calculate the pipesize, it will have to do. */ i = tp->snd_ssthresh; -#if 1 if (rt->rt_rmx.rmx_sendpipe != 0) dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2); else dosavessthresh = (i < so->so_snd.sb_hiwat / 2); -#else - dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2); -#endif if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && i != 0 && rt->rt_rmx.rmx_ssthresh != 0) || dosavessthresh) { @@ -505,6 +489,93 @@ tcp_notify(inp, error) sowwakeup(so); } +static int +tcp_pcblist SYSCTL_HANDLER_ARGS +{ + int error, i, n, s; + struct inpcb *inp, **inp_list; + inp_gen_t gencnt; + struct xinpgen xig; + + /* + * The process of preparing the TCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + if (req->oldptr == 0) { + n = tcbinfo.ipi_count; + req->oldidx = 2 * (sizeof xig) + + (n + n/8) * sizeof(struct xtcpcb); + return 0; + } + + if (req->newptr != 0) + return EPERM; + + /* + * OK, now we're committed to doing something. + */ + s = splnet(); + gencnt = tcbinfo.ipi_gencnt; + n = tcbinfo.ipi_count; + splx(s); + + xig.xig_len = sizeof xig; + xig.xig_count = n; + xig.xig_gen = gencnt; + xig.xig_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xig, sizeof xig); + if (error) + return error; + + inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); + if (inp_list == 0) + return ENOMEM; + + s = splnet(); + for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n; + inp = inp->inp_list.le_next) { + if (inp->inp_gencnt <= gencnt) + inp_list[i++] = inp; + } + splx(s); + n = i; + + error = 0; + for (i = 0; i < n; i++) { + inp = inp_list[i]; + if (inp->inp_gencnt <= gencnt) { + struct xtcpcb xt; + xt.xt_len = sizeof xt; + /* XXX should avoid extra copy */ + bcopy(inp, &xt.xt_inp, sizeof *inp); + bcopy(inp->inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); + if (inp->inp_socket) + sotoxsocket(inp->inp_socket, &xt.xt_socket); + error = SYSCTL_OUT(req, &xt, sizeof xt); + } + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + s = splnet(); + xig.xig_gen = tcbinfo.ipi_gencnt; + xig.xig_sogen = so_gencnt; + xig.xig_count = tcbinfo.ipi_count; + splx(s); + error = SYSCTL_OUT(req, &xig, sizeof xig); + } + free(inp_list, M_TEMP); + return error; +} + +SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, + tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); + void tcp_ctlinput(cmd, sa, vip) int cmd; @@ -517,10 +588,8 @@ tcp_ctlinput(cmd, sa, vip) if (cmd == PRC_QUENCH) notify = tcp_quench; -#if 1 else if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc; -#endif else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)) return; @@ -548,7 +617,6 @@ tcp_quench(inp, errno) tp->snd_cwnd = tp->t_maxseg; } -#if 1 /* * When `need fragmentation' ICMP is received, update our idea of the MSS * based on the new value in the route. Also nudge TCP to send something, @@ -623,7 +691,6 @@ tcp_mtudisc(inp, errno) tcp_output(tp); } } -#endif /* * Look-up the routing entry to the peer of this inpcb. If no route diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 7820b636cf9b..43139cac63b4 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 - * $Id: tcp_subr.c,v 1.43 1998/03/24 18:06:28 wollman Exp $ + * $Id: tcp_subr.c,v 1.44 1998/03/28 10:18:24 bde Exp $ */ #include "opt_compat.h" @@ -85,6 +85,9 @@ static int tcp_do_rfc1644 = 1; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, &tcp_do_rfc1644 , 0, ""); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &tcbinfo.ipi_count, + 0, "Number of active PCBs"); + static void tcp_cleartaocache __P((void)); static void tcp_notify __P((struct inpcb *, int)); @@ -130,22 +133,7 @@ tcp_init() tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask); tcbinfo.porthashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.porthashmask); - /* For the moment, we just worry about putting inpcbs here. */ - /* - * Rationale for a maximum of `nmbclusters': - * 1) It's a convenient value, sized by config, based on - * parameters already known to be tweakable as needed - * for network-intensive systems. - * 2) Under the Old World Order, when pcbs were stored in - * mbufs, it was of course impossible to have more - * pcbs than mbufs. - * 3) The zone allocator doesn't allocate physical memory - * for this many pcbs; it just sizes the virtual - * address space appropriately. Thus, even for very large - * values of nmbclusters, we don't actually take up much - * memory unless required. - */ - tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), nmbclusters, + tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets, ZONE_INTERRUPT, 0); if (max_protohdr < sizeof(struct tcpiphdr)) max_protohdr = sizeof(struct tcpiphdr); @@ -421,14 +409,10 @@ tcp_close(tp) * way to calculate the pipesize, it will have to do. */ i = tp->snd_ssthresh; -#if 1 if (rt->rt_rmx.rmx_sendpipe != 0) dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2); else dosavessthresh = (i < so->so_snd.sb_hiwat / 2); -#else - dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2); -#endif if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && i != 0 && rt->rt_rmx.rmx_ssthresh != 0) || dosavessthresh) { @@ -505,6 +489,93 @@ tcp_notify(inp, error) sowwakeup(so); } +static int +tcp_pcblist SYSCTL_HANDLER_ARGS +{ + int error, i, n, s; + struct inpcb *inp, **inp_list; + inp_gen_t gencnt; + struct xinpgen xig; + + /* + * The process of preparing the TCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + if (req->oldptr == 0) { + n = tcbinfo.ipi_count; + req->oldidx = 2 * (sizeof xig) + + (n + n/8) * sizeof(struct xtcpcb); + return 0; + } + + if (req->newptr != 0) + return EPERM; + + /* + * OK, now we're committed to doing something. + */ + s = splnet(); + gencnt = tcbinfo.ipi_gencnt; + n = tcbinfo.ipi_count; + splx(s); + + xig.xig_len = sizeof xig; + xig.xig_count = n; + xig.xig_gen = gencnt; + xig.xig_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xig, sizeof xig); + if (error) + return error; + + inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); + if (inp_list == 0) + return ENOMEM; + + s = splnet(); + for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n; + inp = inp->inp_list.le_next) { + if (inp->inp_gencnt <= gencnt) + inp_list[i++] = inp; + } + splx(s); + n = i; + + error = 0; + for (i = 0; i < n; i++) { + inp = inp_list[i]; + if (inp->inp_gencnt <= gencnt) { + struct xtcpcb xt; + xt.xt_len = sizeof xt; + /* XXX should avoid extra copy */ + bcopy(inp, &xt.xt_inp, sizeof *inp); + bcopy(inp->inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); + if (inp->inp_socket) + sotoxsocket(inp->inp_socket, &xt.xt_socket); + error = SYSCTL_OUT(req, &xt, sizeof xt); + } + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + s = splnet(); + xig.xig_gen = tcbinfo.ipi_gencnt; + xig.xig_sogen = so_gencnt; + xig.xig_count = tcbinfo.ipi_count; + splx(s); + error = SYSCTL_OUT(req, &xig, sizeof xig); + } + free(inp_list, M_TEMP); + return error; +} + +SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, + tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); + void tcp_ctlinput(cmd, sa, vip) int cmd; @@ -517,10 +588,8 @@ tcp_ctlinput(cmd, sa, vip) if (cmd == PRC_QUENCH) notify = tcp_quench; -#if 1 else if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc; -#endif else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)) return; @@ -548,7 +617,6 @@ tcp_quench(inp, errno) tp->snd_cwnd = tp->t_maxseg; } -#if 1 /* * When `need fragmentation' ICMP is received, update our idea of the MSS * based on the new value in the route. Also nudge TCP to send something, @@ -623,7 +691,6 @@ tcp_mtudisc(inp, errno) tcp_output(tp); } } -#endif /* * Look-up the routing entry to the peer of this inpcb. If no route diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index e928449d908f..157631b8f831 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 - * $Id: tcp_var.h,v 1.42 1998/04/06 06:52:47 phk Exp $ + * $Id: tcp_var.h,v 1.43 1998/04/24 09:25:39 dg Exp $ */ #ifndef _NETINET_TCP_VAR_H_ @@ -287,6 +287,21 @@ struct tcpstat { }; /* + * TCB structure exported to user-land via sysctl(3). + * Evil hack: declare only if in_pcb.h has been included. + * Not all of our clients do. + */ +#ifdef _NETINET_IN_PCB_H_ +struct xtcpcb { + size_t xt_len; + struct inpcb xt_inp; + struct tcpcb xt_tp; + struct xsocket xt_socket; + u_quad_t xt_alignment_hack; +}; +#endif + +/* * Names for TCP sysctl objects */ #define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */ @@ -299,7 +314,8 @@ struct tcpstat { #define TCPCTL_SENDSPACE 8 /* send buffer space */ #define TCPCTL_RECVSPACE 9 /* receive buffer space */ #define TCPCTL_KEEPINIT 10 /* receive buffer space */ -#define TCPCTL_MAXID 11 +#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */ +#define TCPCTL_MAXID 12 #define TCPCTL_NAMES { \ { 0, 0 }, \ @@ -313,6 +329,7 @@ struct tcpstat { { "sendspace", CTLTYPE_INT }, \ { "recvspace", CTLTYPE_INT }, \ { "keepinit", CTLTYPE_INT }, \ + { "pcblist", CTLTYPE_STRUCT }, \ } #ifdef KERNEL diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index bb1e6da3fba5..107fd06df73c 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 - * $Id: udp_usrreq.c,v 1.45 1998/03/24 18:06:34 wollman Exp $ + * $Id: udp_usrreq.c,v 1.46 1998/03/28 10:18:26 bde Exp $ */ #include <sys/param.h> @@ -101,7 +101,7 @@ udp_init() udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask); udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.porthashmask); - udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), nmbclusters, + udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), maxsockets, ZONE_INTERRUPT, 0); } @@ -363,6 +363,92 @@ udp_ctlinput(cmd, sa, vip) } static int +udp_pcblist SYSCTL_HANDLER_ARGS +{ + int error, i, n, s; + struct inpcb *inp, **inp_list; + inp_gen_t gencnt; + struct xinpgen xig; + + /* + * The process of preparing the TCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + if (req->oldptr == 0) { + n = udbinfo.ipi_count; + req->oldidx = 2 * (sizeof xig) + + (n + n/8) * sizeof(struct xinpcb); + return 0; + } + + if (req->newptr != 0) + return EPERM; + + /* + * OK, now we're committed to doing something. + */ + s = splnet(); + gencnt = udbinfo.ipi_gencnt; + n = udbinfo.ipi_count; + splx(s); + + xig.xig_len = sizeof xig; + xig.xig_count = n; + xig.xig_gen = gencnt; + xig.xig_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xig, sizeof xig); + if (error) + return error; + + inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); + if (inp_list == 0) + return ENOMEM; + + s = splnet(); + for (inp = udbinfo.listhead->lh_first, i = 0; inp && i < n; + inp = inp->inp_list.le_next) { + if (inp->inp_gencnt <= gencnt) + inp_list[i++] = inp; + } + splx(s); + n = i; + + error = 0; + for (i = 0; i < n; i++) { + inp = inp_list[i]; + if (inp->inp_gencnt <= gencnt) { + struct xinpcb xi; + xi.xi_len = sizeof xi; + /* XXX should avoid extra copy */ + bcopy(inp, &xi.xi_inp, sizeof *inp); + if (inp->inp_socket) + sotoxsocket(inp->inp_socket, &xi.xi_socket); + error = SYSCTL_OUT(req, &xi, sizeof xi); + } + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + s = splnet(); + xig.xig_gen = udbinfo.ipi_gencnt; + xig.xig_sogen = so_gencnt; + xig.xig_count = udbinfo.ipi_count; + splx(s); + error = SYSCTL_OUT(req, &xig, sizeof xig); + } + free(inp_list, M_TEMP); + return error; +} + +SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, + udp_pcblist, "S,xinpcb", "List of active UDP sockets"); + +static int udp_output(inp, m, addr, control, p) register struct inpcb *inp; register struct mbuf *m; diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h index fb21bfd119ad..8aee42687512 100644 --- a/sys/netinet/udp_var.h +++ b/sys/netinet/udp_var.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)udp_var.h 8.1 (Berkeley) 6/10/93 - * $Id: udp_var.h,v 1.13 1997/08/16 19:15:42 wollman Exp $ + * $Id: udp_var.h,v 1.14 1997/09/07 05:26:51 bde Exp $ */ #ifndef _NETINET_UDP_VAR_H_ @@ -89,7 +89,8 @@ struct udpstat { #define UDPCTL_STATS 2 /* statistics (read-only) */ #define UDPCTL_MAXDGRAM 3 /* max datagram size */ #define UDPCTL_RECVSPACE 4 /* default receive buffer space */ -#define UDPCTL_MAXID 5 +#define UDPCTL_PCBLIST 5 /* list of PCBs for UDP sockets */ +#define UDPCTL_MAXID 6 #define UDPCTL_NAMES { \ { 0, 0 }, \ @@ -97,6 +98,7 @@ struct udpstat { { "stats", CTLTYPE_STRUCT }, \ { "maxdgram", CTLTYPE_INT }, \ { "recvspace", CTLTYPE_INT }, \ + { "pcblist", CTLTYPE_STRUCT }, \ } #ifdef KERNEL diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index f0ae22b45614..8eaadcf7391b 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 - * $Id: socketvar.h,v 1.24 1998/02/01 20:08:38 bde Exp $ + * $Id: socketvar.h,v 1.25 1998/03/01 19:39:29 guido Exp $ */ #ifndef _SYS_SOCKETVAR_H_ @@ -46,7 +46,10 @@ * handle on protocol and pointer to protocol * private data and error information. */ +typedef u_quad_t so_gen_t; + struct socket { + struct vm_zone *so_zone; /* zone we were allocated from */ short so_type; /* generic type, see socket.h */ short so_options; /* from socket call, see socket.h */ short so_linger; /* time to linger while closing */ @@ -99,10 +102,10 @@ struct socket { #define SB_NOTIFY (SB_WAIT|SB_SEL|SB_ASYNC) #define SB_NOINTR 0x40 /* operations not interruptible */ - caddr_t so_tpcb; /* Wisc. protocol control block XXX */ void (*so_upcall) __P((struct socket *so, caddr_t arg, int waitf)); caddr_t so_upcallarg; /* Arg for above */ uid_t so_uid; /* who opened the socket */ + so_gen_t so_gencnt; /* generation count */ }; /* @@ -124,6 +127,37 @@ struct socket { #define SS_INCOMP 0x0800 /* unaccepted, incomplete connection */ #define SS_COMP 0x1000 /* unaccepted, complete connection */ +/* + * Externalized form of struct socket used by the sysctl(3) interface. + */ +struct xsocket { + size_t xso_len; /* length of this structure */ + struct socket *xso_so; /* makes a convenient handle sometimes */ + short so_type; + short so_options; + short so_linger; + short so_state; + caddr_t so_pcb; /* another convenient handle */ + int xso_protocol; + int xso_family; + short so_qlen; + short so_incqlen; + short so_qlimit; + short so_timeo; + u_short so_error; + pid_t so_pgid; + u_long so_oobmark; + struct xsockbuf { + u_long sb_cc; + u_long sb_hiwat; + u_long sb_mbcnt; + u_long sb_mbmax; + long sb_lowat; + short sb_flags; + short sb_timeo; + } so_rcv, so_snd; + uid_t so_uid; /* XXX */ +}; /* * Macros for sockets and socket buffering. @@ -202,11 +236,13 @@ struct socket { #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_PCB); -MALLOC_DECLARE(M_SOCKET); MALLOC_DECLARE(M_SONAME); #endif +extern int maxsockets; extern u_long sb_max; +extern struct vm_zone *socket_zone; +extern so_gen_t so_gencnt; struct file; struct filedesc; @@ -248,10 +284,12 @@ void sbflush __P((struct sockbuf *sb)); void sbinsertoob __P((struct sockbuf *sb, struct mbuf *m0)); void sbrelease __P((struct sockbuf *sb)); int sbreserve __P((struct sockbuf *sb, u_long cc)); +void sbtoxsockbuf __P((struct sockbuf *sb, struct xsockbuf *xsb)); int sbwait __P((struct sockbuf *sb)); int sb_lock __P((struct sockbuf *sb)); int soabort __P((struct socket *so)); int soaccept __P((struct socket *so, struct sockaddr **nam)); +struct socket *soalloc __P((int waitok)); int sobind __P((struct socket *so, struct sockaddr *nam, struct proc *p)); void socantrcvmore __P((struct socket *so)); void socantsendmore __P((struct socket *so)); @@ -260,6 +298,7 @@ int soconnect __P((struct socket *so, struct sockaddr *nam, struct proc *p)); int soconnect2 __P((struct socket *so1, struct socket *so2)); int socreate __P((int dom, struct socket **aso, int type, int proto, struct proc *p)); +void sodealloc __P((struct socket *so)); int sodisconnect __P((struct socket *so)); void sofree __P((struct socket *so)); int sogetopt __P((struct socket *so, int level, int optname, @@ -287,6 +326,7 @@ int sosend __P((struct socket *so, struct sockaddr *addr, struct uio *uio, int sosetopt __P((struct socket *so, int level, int optname, struct mbuf *m0, struct proc *p)); int soshutdown __P((struct socket *so, int how)); +void sotoxsocket __P((struct socket *so, struct xsocket *xso)); void sowakeup __P((struct socket *so, struct sockbuf *sb)); #endif /* KERNEL */ diff --git a/sys/sys/un.h b/sys/sys/un.h index 3d7d7ede9131..d0c0af9e4e04 100644 --- a/sys/sys/un.h +++ b/sys/sys/un.h @@ -31,16 +31,12 @@ * SUCH DAMAGE. * * @(#)un.h 8.3 (Berkeley) 2/19/95 - * $Id: un.h,v 1.12 1997/04/27 20:01:29 wollman Exp $ + * $Id: un.h,v 1.13 1997/08/16 19:16:15 wollman Exp $ */ #ifndef _SYS_UN_H_ #define _SYS_UN_H_ -#ifdef KERNEL -#include <sys/unpcb.h> -#endif /* KERNEL */ - /* * Definitions for UNIX IPC domain. */ @@ -59,6 +55,7 @@ int uipc_usrreq __P((struct socket *so, int req, struct mbuf *m, int unp_connect2 __P((struct socket *so, struct socket *so2)); void unp_dispose __P((struct mbuf *m)); int unp_externalize __P((struct mbuf *rights)); +void unp_init __P((void)); extern struct pr_usrreqs uipc_usrreqs; #else /* !KERNEL */ diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h index 6cf0855f339d..14191537f059 100644 --- a/sys/sys/unpcb.h +++ b/sys/sys/unpcb.h @@ -31,12 +31,14 @@ * SUCH DAMAGE. * * @(#)unpcb.h 8.1 (Berkeley) 6/2/93 - * $Id: unpcb.h,v 1.5 1997/02/22 09:46:22 peter Exp $ + * $Id: unpcb.h,v 1.6 1997/08/16 19:16:16 wollman Exp $ */ #ifndef _SYS_UNPCB_H_ #define _SYS_UNPCB_H_ +#include <sys/queue.h> + /* * Protocol control block for an active * instance of a UNIX internal protocol. @@ -62,18 +64,51 @@ * so that changes in the sockbuf may be computed to modify * back pressure on the sender accordingly. */ +typedef u_quad_t unp_gen_t; +LIST_HEAD(unp_head, unpcb); + struct unpcb { + LIST_ENTRY(unpcb) unp_link; /* glue on list of all PCBs */ struct socket *unp_socket; /* pointer back to socket */ struct vnode *unp_vnode; /* if associated with file */ ino_t unp_ino; /* fake inode number */ struct unpcb *unp_conn; /* control block of connected socket */ - struct unpcb *unp_refs; /* referencing socket linked list */ - struct unpcb *unp_nextref; /* link in unp_refs list */ + struct unp_head unp_refs; /* referencing socket linked list */ + LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */ struct sockaddr_un *unp_addr; /* bound address of socket */ int unp_cc; /* copy of rcv.sb_cc */ int unp_mbcnt; /* copy of rcv.sb_mbcnt */ + unp_gen_t unp_gencnt; /* generation count of this instance */ }; #define sotounpcb(so) ((struct unpcb *)((so)->so_pcb)) -#endif +/* Hack alert -- this structure depends on <sys/socketvar.h>. */ +#ifdef _SYS_SOCKETVAR_H_ +struct xunpcb { + size_t xu_len; /* length of this structure */ + struct unpcb *xu_unpp; /* to help netstat, fstat */ + struct unpcb xu_unp; /* our information */ + union { + struct sockaddr_un xuu_addr; /* our bound address */ + char xu_dummy1[256]; + } xu_au; +#define xu_addr xu_au.xuu_addr + union { + struct sockaddr_un xuu_caddr; /* their bound address */ + char xu_dummy2[256]; + } xu_cau; +#define xu_caddr xu_cau.xuu_caddr + struct xsocket xu_socket; + u_quad_t xu_alignment_hack; +}; + +struct xunpgen { + size_t xug_len; + u_int xug_count; + unp_gen_t xug_gen; + so_gen_t xug_sogen; +}; +#endif /* _SYS_SOCKETVAR_H_ */ + +#endif /* _SYS_UNPCB_H_ */ |