aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorGarrett Wollman <wollman@FreeBSD.org>1998-05-15 20:11:40 +0000
committerGarrett Wollman <wollman@FreeBSD.org>1998-05-15 20:11:40 +0000
commit98271db4d5d00999e63492d6550727afc907e54c (patch)
treebc0add632f758bedefe2376aa377106fc29c7c20 /sys
parenteb92a3476660337bff6b68d001390e77d47b208b (diff)
downloadsrc-98271db4d5d00999e63492d6550727afc907e54c.tar.gz
src-98271db4d5d00999e63492d6550727afc907e54c.zip
Convert socket structures to be type-stable and add a version number.
Define a parameter which indicates the maximum number of sockets in a system, and use this to size the zone allocators used for sockets and for certain PCBs. Convert PF_LOCAL PCB structures to be type-stable and add a version number. Define an external format for infomation about socket structures and use it in several places. Define a mechanism to get all PF_LOCAL and PF_INET PCB lists through sysctl(3) without blocking network interrupts for an unreasonable length of time. This probably still has some bugs and/or race conditions, but it seems to work well enough on my machines. It is now possible for `netstat' to get almost all of its information via the sysctl(3) interface rather than reading kmem (changes to follow).
Notes
Notes: svn path=/head/; revision=36079
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/param.c9
-rw-r--r--sys/kern/subr_param.c9
-rw-r--r--sys/kern/uipc_domain.c33
-rw-r--r--sys/kern/uipc_proto.c5
-rw-r--r--sys/kern/uipc_sockbuf.c59
-rw-r--r--sys/kern/uipc_socket.c54
-rw-r--r--sys/kern/uipc_socket2.c59
-rw-r--r--sys/kern/uipc_usrreq.c161
-rw-r--r--sys/netinet/in_pcb.c3
-rw-r--r--sys/netinet/in_pcb.h25
-rw-r--r--sys/netinet/ip_divert.c6
-rw-r--r--sys/netinet/raw_ip.c90
-rw-r--r--sys/netinet/tcp_subr.c117
-rw-r--r--sys/netinet/tcp_timewait.c117
-rw-r--r--sys/netinet/tcp_var.h21
-rw-r--r--sys/netinet/udp_usrreq.c90
-rw-r--r--sys/netinet/udp_var.h6
-rw-r--r--sys/sys/socketvar.h46
-rw-r--r--sys/sys/un.h7
-rw-r--r--sys/sys/unpcb.h43
20 files changed, 827 insertions, 133 deletions
diff --git a/sys/conf/param.c b/sys/conf/param.c
index b38edcc27541..b78a29273e40 100644
--- a/sys/conf/param.c
+++ b/sys/conf/param.c
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)param.c 8.3 (Berkeley) 8/20/94
- * $Id: param.c,v 1.25 1997/06/14 11:38:46 bde Exp $
+ * $Id: param.c,v 1.26 1998/02/27 19:58:29 guido Exp $
*/
#include "opt_sysvipc.h"
@@ -85,6 +85,13 @@ int ncallout = 16 + NPROC + MAXFILES; /* maximum # of timer events */
#endif
int nmbclusters = NMBCLUSTERS;
+#if MAXFILES > NMBCLUSTERS
+#define MAXSOCKETS MAXFILES
+#else
+#define MAXSOCKETS NMBCLUSTERS
+#endif
+int maxsockets = MAXSOCKETS;
+
/* allocate 1/4th amount of virtual address space for mbufs XXX */
int nmbufs = NMBCLUSTERS * 4;
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index b38edcc27541..b78a29273e40 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)param.c 8.3 (Berkeley) 8/20/94
- * $Id: param.c,v 1.25 1997/06/14 11:38:46 bde Exp $
+ * $Id: param.c,v 1.26 1998/02/27 19:58:29 guido Exp $
*/
#include "opt_sysvipc.h"
@@ -85,6 +85,13 @@ int ncallout = 16 + NPROC + MAXFILES; /* maximum # of timer events */
#endif
int nmbclusters = NMBCLUSTERS;
+#if MAXFILES > NMBCLUSTERS
+#define MAXSOCKETS MAXFILES
+#else
+#define MAXSOCKETS NMBCLUSTERS
+#endif
+int maxsockets = MAXSOCKETS;
+
/* allocate 1/4th amount of virtual address space for mbufs XXX */
int nmbufs = NMBCLUSTERS * 4;
diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
index 69d6bf862333..22078b1c3f3d 100644
--- a/sys/kern/uipc_domain.c
+++ b/sys/kern/uipc_domain.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)uipc_domain.c 8.2 (Berkeley) 10/18/93
- * $Id: uipc_domain.c,v 1.17 1997/04/27 20:00:42 wollman Exp $
+ * $Id: uipc_domain.c,v 1.18 1997/09/16 11:43:36 bde Exp $
*/
#include <sys/param.h>
@@ -40,7 +40,9 @@
#include <sys/domain.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
+#include <sys/socketvar.h>
#include <sys/systm.h>
+#include <vm/vm_zone.h>
/*
* System initialization
@@ -85,6 +87,21 @@ domaininit(dummy)
register struct protosw *pr;
/*
+ * Before we do any setup, make sure to initialize the
+ * zone allocator we get struct sockets from. The obvious
+ * maximum number of sockets is `maxfiles', but it is possible
+ * to have a socket without an open file (e.g., a connection waiting
+ * to be accept(2)ed). Rather than think up and define a
+ * better value, we just use nmbclusters, since that's what people
+ * are told to increase first when the network runs out of memory.
+ * Perhaps we should have two pools, one of unlimited size
+ * for use during socreate(), and one ZONE_INTERRUPT pool for
+ * use in sonewconn().
+ */
+ socket_zone = zinit("socket", sizeof(struct socket), maxsockets,
+ ZONE_INTERRUPT, 0);
+
+ /*
* NB - local domain is always present.
*/
ADDDOMAIN(local);
@@ -94,26 +111,14 @@ domaininit(dummy)
domains = *dpp;
}
-/* - not in our sources
-#ifdef ISDN
- ADDDOMAIN(isdn);
-#endif
-*/
-
for (dp = domains; dp; dp = dp->dom_next) {
if (dp->dom_init)
(*dp->dom_init)();
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++){
-#ifdef PRU_OLDSTYLE
- /* See comments in uipc_socket2.c. */
- if (pr->pr_usrreqs == 0 && pr->pr_ousrreq)
- pr->pr_usrreqs = &pru_oldstyle;
-#else
if (pr->pr_usrreqs == 0)
panic("domaininit: %ssw[%d] has no usrreqs!",
dp->dom_name,
(int)(pr - dp->dom_protosw));
-#endif
if (pr->pr_init)
(*pr->pr_init)();
}
@@ -151,7 +156,7 @@ kludge_splx(udata)
{
int *savesplp = udata;
- splx( *savesplp);
+ splx(*savesplp);
}
diff --git a/sys/kern/uipc_proto.c b/sys/kern/uipc_proto.c
index 6e2ef5de4a50..ec2b9f04ad02 100644
--- a/sys/kern/uipc_proto.c
+++ b/sys/kern/uipc_proto.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)uipc_proto.c 8.1 (Berkeley) 6/10/93
- * $Id: uipc_proto.c,v 1.13 1997/08/02 14:31:42 bde Exp $
+ * $Id: uipc_proto.c,v 1.14 1998/02/20 13:11:48 bde Exp $
*/
#include <sys/param.h>
@@ -41,6 +41,7 @@
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/un.h>
+#include <sys/unpcb.h>
#include <net/raw_cb.h>
@@ -70,7 +71,7 @@ static struct protosw localsw[] = {
};
struct domain localdomain =
- { AF_LOCAL, "local", 0, unp_externalize, unp_dispose,
+ { AF_LOCAL, "local", unp_init, unp_externalize, unp_dispose,
localsw, &localsw[sizeof(localsw)/sizeof(localsw[0])] };
SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain");
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 40c8f19a6122..8ad352269662 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -31,11 +31,12 @@
* SUCH DAMAGE.
*
* @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
- * $Id: uipc_socket2.c,v 1.32 1998/04/04 13:25:40 phk Exp $
+ * $Id: uipc_socket2.c,v 1.33 1998/04/24 04:15:18 dg Exp $
*/
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/malloc.h>
@@ -202,10 +203,9 @@ sonewconn(head, connstatus)
if (head->so_qlen > 3 * head->so_qlimit / 2)
return ((struct socket *)0);
- MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
+ so = soalloc(0);
if (so == NULL)
return ((struct socket *)0);
- bzero((caddr_t)so, sizeof(*so));
so->so_head = head;
so->so_type = head->so_type;
so->so_options = head->so_options &~ SO_ACCEPTCONN;
@@ -218,7 +218,7 @@ sonewconn(head, connstatus)
(void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
- (void) free((caddr_t)so, M_SOCKET);
+ sodealloc(so);
return ((struct socket *)0);
}
@@ -890,6 +890,56 @@ dup_sockaddr(sa, canwait)
}
/*
+ * Create an external-format (``xsocket'') structure using the information
+ * in the kernel-format socket structure pointed to by so. This is done
+ * to reduce the spew of irrelevant information over this interface,
+ * to isolate user code from changes in the kernel structure, and
+ * potentially to provide information-hiding if we decide that
+ * some of this information should be hidden from users.
+ */
+void
+sotoxsocket(struct socket *so, struct xsocket *xso)
+{
+ xso->xso_len = sizeof *xso;
+ xso->xso_so = so;
+ xso->so_type = so->so_type;
+ xso->so_options = so->so_options;
+ xso->so_linger = so->so_linger;
+ xso->so_state = so->so_state;
+ xso->so_pcb = so->so_pcb;
+ xso->xso_protocol = so->so_proto->pr_protocol;
+ xso->xso_family = so->so_proto->pr_domain->dom_family;
+ xso->so_qlen = so->so_qlen;
+ xso->so_incqlen = so->so_incqlen;
+ xso->so_qlimit = so->so_qlimit;
+ xso->so_timeo = so->so_timeo;
+ xso->so_error = so->so_error;
+ xso->so_pgid = so->so_pgid;
+ xso->so_oobmark = so->so_oobmark;
+ sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+ sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ xso->so_uid = so->so_uid;
+}
+
+/*
+ * This does the same for sockbufs. Note that the xsockbuf structure,
+ * since it is always embedded in a socket, does not include a self
+ * pointer nor a length. We make this entry point public in case
+ * some other mechanism needs it.
+ */
+void
+sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
+{
+ xsb->sb_cc = sb->sb_cc;
+ xsb->sb_hiwat = sb->sb_hiwat;
+ xsb->sb_mbcnt = sb->sb_mbcnt;
+ xsb->sb_mbmax = sb->sb_mbmax;
+ xsb->sb_lowat = sb->sb_lowat;
+ xsb->sb_flags = sb->sb_flags;
+ xsb->sb_timeo = sb->sb_timeo;
+}
+
+/*
* Here is the definition of some of the basic objects in the kern.ipc
* branch of the MIB.
*/
@@ -900,6 +950,7 @@ static int dummy;
SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, "");
SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
&sb_efficiency, 0, "");
SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 58e309ab78ad..2ca79fc50098 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
- * $Id: uipc_socket.c,v 1.38 1998/03/01 19:39:17 guido Exp $
+ * $Id: uipc_socket.c,v 1.39 1998/03/28 10:33:08 bde Exp $
*/
#include <sys/param.h>
@@ -50,10 +50,13 @@
#include <sys/signalvar.h>
#include <sys/sysctl.h>
#include <sys/uio.h>
+#include <vm/vm_zone.h>
#include <machine/limits.h>
-MALLOC_DEFINE(M_SOCKET, "socket", "socket structure");
+struct vm_zone *socket_zone;
+so_gen_t so_gencnt; /* generation count for sockets */
+
MALLOC_DEFINE(M_SONAME, "soname", "socket name");
MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
@@ -68,7 +71,30 @@ SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
* implement the semantics of socket operations by
* switching out to the protocol specific routines.
*/
-/*ARGSUSED*/
+
+/*
+ * Get a socket structure from our zone, and initialize it.
+ * We don't implement `waitok' yet (see comments in uipc_domain.c).
+ * Note that it would probably be better to allocate socket
+ * and PCB at the same time, but I'm not convinced that all
+ * the protocols can be easily modified to do this.
+ */
+struct socket *
+soalloc(waitok)
+ int waitok;
+{
+ struct socket *so;
+
+ so = zalloci(socket_zone);
+ if (so) {
+ /* XXX race condition for reentrant kernel */
+ bzero(so, sizeof *so);
+ so->so_gencnt = ++so_gencnt;
+ so->so_zone = socket_zone;
+ }
+ return so;
+}
+
int
socreate(dom, aso, type, proto, p)
int dom;
@@ -89,12 +115,15 @@ socreate(dom, aso, type, proto, p)
return (EPROTONOSUPPORT);
if (prp->pr_type != type)
return (EPROTOTYPE);
- MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
- bzero((caddr_t)so, sizeof(*so));
+ so = soalloc(p != 0);
+ if (so == 0)
+ return (ENOBUFS);
+
TAILQ_INIT(&so->so_incomp);
TAILQ_INIT(&so->so_comp);
so->so_type = type;
- so->so_uid = p->p_ucred->cr_uid;;
+ if (p != 0)
+ so->so_uid = p->p_ucred->cr_uid;
so->so_proto = prp;
error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
if (error) {
@@ -120,14 +149,23 @@ sobind(so, nam, p)
return (error);
}
+void
+sodealloc(so)
+ struct socket *so;
+{
+ so->so_gencnt = ++so_gencnt;
+ zfreei(so->so_zone, so);
+}
+
int
solisten(so, backlog, p)
register struct socket *so;
int backlog;
struct proc *p;
{
- int s = splnet(), error;
+ int s, error;
+ s = splnet();
error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
if (error) {
splx(s);
@@ -165,7 +203,7 @@ sofree(so)
}
sbrelease(&so->so_snd);
sorflush(so);
- FREE(so, M_SOCKET);
+ sodealloc(so);
}
/*
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
index 40c8f19a6122..8ad352269662 100644
--- a/sys/kern/uipc_socket2.c
+++ b/sys/kern/uipc_socket2.c
@@ -31,11 +31,12 @@
* SUCH DAMAGE.
*
* @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
- * $Id: uipc_socket2.c,v 1.32 1998/04/04 13:25:40 phk Exp $
+ * $Id: uipc_socket2.c,v 1.33 1998/04/24 04:15:18 dg Exp $
*/
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/malloc.h>
@@ -202,10 +203,9 @@ sonewconn(head, connstatus)
if (head->so_qlen > 3 * head->so_qlimit / 2)
return ((struct socket *)0);
- MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
+ so = soalloc(0);
if (so == NULL)
return ((struct socket *)0);
- bzero((caddr_t)so, sizeof(*so));
so->so_head = head;
so->so_type = head->so_type;
so->so_options = head->so_options &~ SO_ACCEPTCONN;
@@ -218,7 +218,7 @@ sonewconn(head, connstatus)
(void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
- (void) free((caddr_t)so, M_SOCKET);
+ sodealloc(so);
return ((struct socket *)0);
}
@@ -890,6 +890,56 @@ dup_sockaddr(sa, canwait)
}
/*
+ * Create an external-format (``xsocket'') structure using the information
+ * in the kernel-format socket structure pointed to by so. This is done
+ * to reduce the spew of irrelevant information over this interface,
+ * to isolate user code from changes in the kernel structure, and
+ * potentially to provide information-hiding if we decide that
+ * some of this information should be hidden from users.
+ */
+void
+sotoxsocket(struct socket *so, struct xsocket *xso)
+{
+ xso->xso_len = sizeof *xso;
+ xso->xso_so = so;
+ xso->so_type = so->so_type;
+ xso->so_options = so->so_options;
+ xso->so_linger = so->so_linger;
+ xso->so_state = so->so_state;
+ xso->so_pcb = so->so_pcb;
+ xso->xso_protocol = so->so_proto->pr_protocol;
+ xso->xso_family = so->so_proto->pr_domain->dom_family;
+ xso->so_qlen = so->so_qlen;
+ xso->so_incqlen = so->so_incqlen;
+ xso->so_qlimit = so->so_qlimit;
+ xso->so_timeo = so->so_timeo;
+ xso->so_error = so->so_error;
+ xso->so_pgid = so->so_pgid;
+ xso->so_oobmark = so->so_oobmark;
+ sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+ sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ xso->so_uid = so->so_uid;
+}
+
+/*
+ * This does the same for sockbufs. Note that the xsockbuf structure,
+ * since it is always embedded in a socket, does not include a self
+ * pointer nor a length. We make this entry point public in case
+ * some other mechanism needs it.
+ */
+void
+sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
+{
+ xsb->sb_cc = sb->sb_cc;
+ xsb->sb_hiwat = sb->sb_hiwat;
+ xsb->sb_mbcnt = sb->sb_mbcnt;
+ xsb->sb_mbmax = sb->sb_mbmax;
+ xsb->sb_lowat = sb->sb_lowat;
+ xsb->sb_flags = sb->sb_flags;
+ xsb->sb_timeo = sb->sb_timeo;
+}
+
+/*
* Here is the definition of some of the basic objects in the kern.ipc
* branch of the MIB.
*/
@@ -900,6 +950,7 @@ static int dummy;
SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, "");
SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
&sb_efficiency, 0, "");
SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index b225c7c5e91f..3921513b2dd7 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
- * $Id: uipc_usrreq.c,v 1.33 1998/04/17 22:36:50 des Exp $
+ * $Id: uipc_usrreq.c,v 1.34 1998/05/07 04:58:21 msmith Exp $
*/
#include <sys/param.h>
@@ -42,6 +42,7 @@
#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
#include <sys/file.h>
#include <sys/filedesc.h>
+#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/namei.h>
#include <sys/proc.h>
@@ -51,8 +52,17 @@
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/un.h>
+#include <sys/unpcb.h>
#include <sys/vnode.h>
+#include <vm/vm_zone.h>
+
+struct vm_zone *unp_zone;
+static unp_gen_t unp_gencnt;
+static u_int unp_count;
+
+static struct unp_head unp_shead, unp_dhead;
+
/*
* Unix communications domain.
*
@@ -60,6 +70,7 @@
* SEQPACKET, RDM
* rethink name space problems
* need a proper out-of-band
+ * lock pushdown
*/
static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
static ino_t unp_ino; /* prototype for fake inode numbers */
@@ -468,12 +479,17 @@ unp_attach(so)
if (error)
return (error);
}
- MALLOC(unp, struct unpcb *, sizeof *unp, M_PCB, M_NOWAIT);
+ unp = zalloc(unp_zone);
if (unp == NULL)
return (ENOBUFS);
bzero(unp, sizeof *unp);
- so->so_pcb = (caddr_t)unp;
+ unp->unp_gencnt = ++unp_gencnt;
+ unp_count++;
+ LIST_INIT(&unp->unp_refs);
unp->unp_socket = so;
+ LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
+ : &unp_shead, unp, unp_link);
+ so->so_pcb = (caddr_t)unp;
return (0);
}
@@ -481,6 +497,9 @@ static void
unp_detach(unp)
register struct unpcb *unp;
{
+ LIST_REMOVE(unp, unp_link);
+ unp->unp_gencnt = ++unp_gencnt;
+ --unp_count;
if (unp->unp_vnode) {
unp->unp_vnode->v_socket = 0;
vrele(unp->unp_vnode);
@@ -488,8 +507,8 @@ unp_detach(unp)
}
if (unp->unp_conn)
unp_disconnect(unp);
- while (unp->unp_refs)
- unp_drop(unp->unp_refs, ECONNRESET);
+ while (unp->unp_refs.lh_first)
+ unp_drop(unp->unp_refs.lh_first, ECONNRESET);
soisdisconnected(unp->unp_socket);
unp->unp_socket->so_pcb = 0;
if (unp_rights) {
@@ -505,7 +524,7 @@ unp_detach(unp)
}
if (unp->unp_addr)
FREE(unp->unp_addr, M_SONAME);
- FREE(unp, M_PCB);
+ zfree(unp_zone, unp);
}
static int
@@ -637,8 +656,7 @@ unp_connect2(so, so2)
switch (so->so_type) {
case SOCK_DGRAM:
- unp->unp_nextref = unp2->unp_refs;
- unp2->unp_refs = unp;
+ LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
soisconnected(so);
break;
@@ -666,20 +684,7 @@ unp_disconnect(unp)
switch (unp->unp_socket->so_type) {
case SOCK_DGRAM:
- if (unp2->unp_refs == unp)
- unp2->unp_refs = unp->unp_nextref;
- else {
- unp2 = unp2->unp_refs;
- for (;;) {
- if (unp2 == 0)
- panic("unp_disconnect");
- if (unp2->unp_nextref == unp)
- break;
- unp2 = unp2->unp_nextref;
- }
- unp2->unp_nextref = unp->unp_nextref;
- }
- unp->unp_nextref = 0;
+ LIST_REMOVE(unp, unp_reflink);
unp->unp_socket->so_state &= ~SS_ISCONNECTED;
break;
@@ -701,6 +706,103 @@ unp_abort(unp)
}
#endif
+static int
+unp_pcblist SYSCTL_HANDLER_ARGS
+{
+ int error, i, n, s;
+ struct unpcb *unp, **unp_list;
+ unp_gen_t gencnt;
+ struct xunpgen xug;
+ struct unp_head *head;
+
+ head = ((long)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
+
+ /*
+ * The process of preparing the PCB list is too time-consuming and
+ * resource-intensive to repeat twice on every request.
+ */
+ if (req->oldptr == 0) {
+ n = unp_count;
+ req->oldidx = 2 * (sizeof xug)
+ + (n + n/8) * sizeof(struct xunpcb);
+ return 0;
+ }
+
+ if (req->newptr != 0)
+ return EPERM;
+
+ /*
+ * OK, now we're committed to doing something.
+ */
+ gencnt = unp_gencnt;
+ n = unp_count;
+
+ xug.xug_len = sizeof xug;
+ xug.xug_count = n;
+ xug.xug_gen = gencnt;
+ xug.xug_sogen = so_gencnt;
+ error = SYSCTL_OUT(req, &xug, sizeof xug);
+ if (error)
+ return error;
+
+ unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
+ if (unp_list == 0)
+ return ENOMEM;
+
+ for (unp = head->lh_first, i = 0; unp && i < n;
+ unp = unp->unp_link.le_next) {
+ if (unp->unp_gencnt <= gencnt)
+ unp_list[i++] = unp;
+ }
+ n = i; /* in case we lost some during malloc */
+
+ error = 0;
+ for (i = 0; i < n; i++) {
+ unp = unp_list[i];
+ if (unp->unp_gencnt <= gencnt) {
+ struct xunpcb xu;
+ xu.xu_len = sizeof xu;
+ xu.xu_unpp = unp;
+ /*
+ * XXX - need more locking here to protect against
+ * connect/disconnect races for SMP.
+ */
+ if (unp->unp_addr)
+ bcopy(unp->unp_addr, &xu.xu_addr,
+ unp->unp_addr->sun_len);
+ if (unp->unp_conn && unp->unp_conn->unp_addr)
+ bcopy(unp->unp_conn->unp_addr,
+ &xu.xu_caddr,
+ unp->unp_conn->unp_addr->sun_len);
+ bcopy(unp, &xu.xu_unp, sizeof *unp);
+ sotoxsocket(unp->unp_socket, &xu.xu_socket);
+ error = SYSCTL_OUT(req, &xu, sizeof xu);
+ }
+ }
+ if (!error) {
+ /*
+ * Give the user an updated idea of our state.
+ * If the generation differs from what we told
+ * her before, she knows that something happened
+ * while we were processing this request, and it
+ * might be necessary to retry.
+ */
+ xug.xug_gen = unp_gencnt;
+ xug.xug_sogen = so_gencnt;
+ xug.xug_count = unp_count;
+ error = SYSCTL_OUT(req, &xug, sizeof xug);
+ }
+ free(unp_list, M_TEMP);
+ return error;
+}
+
+SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
+ (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
+ "List of active local datagram sockets");
+SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
+ (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
+ "List of active local stream sockets");
+
static void
unp_shutdown(unp)
struct unpcb *unp;
@@ -722,10 +824,13 @@ unp_drop(unp, errno)
so->so_error = errno;
unp_disconnect(unp);
if (so->so_head) {
+ LIST_REMOVE(unp, unp_link);
+ unp->unp_gencnt = ++unp_gencnt;
+ unp_count--;
so->so_pcb = (caddr_t) 0;
if (unp->unp_addr)
FREE(unp->unp_addr, M_SONAME);
- FREE(unp, M_PCB);
+ zfree(unp_zone, unp);
sofree(so);
}
}
@@ -779,6 +884,16 @@ unp_externalize(rights)
return (0);
}
+void
+unp_init(void)
+{
+ unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0);
+ if (unp_zone == 0)
+ panic("unp_init");
+ LIST_INIT(&unp_dhead);
+ LIST_INIT(&unp_shead);
+}
+
#ifndef MIN
#define MIN(a,b) (((a)<(b))?(a):(b))
#endif
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 15616a7913aa..424ff9ff2787 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
- * $Id: in_pcb.c,v 1.42 1998/03/28 10:33:13 bde Exp $
+ * $Id: in_pcb.c,v 1.43 1998/04/19 17:22:30 phk Exp $
*/
#include <sys/param.h>
@@ -875,6 +875,7 @@ static void
in_pcbremlists(inp)
struct inpcb *inp;
{
+ inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
if (inp->inp_lport) {
struct inpcbport *phd = inp->inp_phd;
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 467a5b2dd610..aa2d0bec301f 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)in_pcb.h 8.1 (Berkeley) 6/10/93
- * $Id: in_pcb.h,v 1.24 1998/03/24 18:06:11 wollman Exp $
+ * $Id: in_pcb.h,v 1.25 1998/03/28 10:18:22 bde Exp $
*/
#ifndef _NETINET_IN_PCB_H_
@@ -48,6 +48,7 @@
*/
LIST_HEAD(inpcbhead, inpcb);
LIST_HEAD(inpcbporthead, inpcbport);
+typedef u_quad_t inp_gen_t;
/*
* NB: the zone allocator is type-stable EXCEPT FOR THE FIRST TWO LONGS
@@ -75,7 +76,7 @@ struct inpcb {
struct ip_moptions *inp_moptions; /* IP multicast options */
LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */
struct inpcbport *inp_phd; /* head of this list */
- u_quad_t inp_gencnt; /* generation count of this instance */
+ inp_gen_t inp_gencnt; /* generation count of this instance */
};
/*
* The range of the generation count, as used in this implementation,
@@ -84,6 +85,26 @@ struct inpcb {
* unlikely that we simply don't concern ourselves with that possibility.
*/
+/*
+ * Interface exported to userland by various protocols which use
+ * inpcbs. Hack alert -- only define if struct xsocket is in scope.
+ */
+#ifdef _SYS_SOCKETVAR_H_
+struct xinpcb {
+ size_t xi_len; /* length of this structure */
+ struct inpcb xi_inp;
+ struct xsocket xi_socket;
+ u_quad_t xi_alignment_hack;
+};
+
+struct xinpgen {
+ size_t xig_len; /* length of this structure */
+ u_int xig_count; /* number of PCBs at this time */
+ inp_gen_t xig_gen; /* generation count at this time */
+ so_gen_t xig_sogen; /* socket generation count at this time */
+};
+#endif /* _SYS_SOCKETVAR_H_ */
+
struct inpcbport {
LIST_ENTRY(inpcbport) phd_hash;
struct inpcbhead phd_pcblist;
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index d37e2bf6547d..ad214c786db5 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -30,13 +30,13 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: ip_divert.c,v 1.21 1998/03/24 18:06:15 wollman Exp $
+ * $Id: ip_divert.c,v 1.22 1998/03/28 10:18:23 bde Exp $
*/
#include "opt_inet.h"
#ifndef INET
-#error IPDIVERT requires INET.
+#error "IPDIVERT requires INET."
#endif
#include <sys/param.h>
@@ -119,7 +119,7 @@ div_init(void)
divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask);
divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask);
divcbinfo.ipi_zone = zinit("divcb", sizeof(struct inpcb),
- nmbclusters / 4, ZONE_INTERRUPT, 0);
+ maxsockets, ZONE_INTERRUPT, 0);
}
/*
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index ecd90c03a8c0..35f4391f52a1 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
- * $Id: raw_ip.c,v 1.52 1998/03/24 18:06:23 wollman Exp $
+ * $Id: raw_ip.c,v 1.53 1998/03/28 10:18:24 bde Exp $
*/
#include <sys/param.h>
@@ -97,7 +97,7 @@ rip_init()
ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
ripcbinfo.ipi_zone = zinit("ripcb", sizeof(struct inpcb),
- nmbclusters / 4, ZONE_INTERRUPT, 0);
+ maxsockets, ZONE_INTERRUPT, 0);
}
static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
@@ -515,6 +515,92 @@ rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
return rip_output(m, so, dst);
}
+static int
+rip_pcblist SYSCTL_HANDLER_ARGS
+{
+ int error, i, n, s;
+ struct inpcb *inp, **inp_list;
+ inp_gen_t gencnt;
+ struct xinpgen xig;
+
+ /*
+ * The process of preparing the TCB list is too time-consuming and
+ * resource-intensive to repeat twice on every request.
+ */
+ if (req->oldptr == 0) {
+ n = ripcbinfo.ipi_count;
+ req->oldidx = 2 * (sizeof xig)
+ + (n + n/8) * sizeof(struct xinpcb);
+ return 0;
+ }
+
+ if (req->newptr != 0)
+ return EPERM;
+
+ /*
+ * OK, now we're committed to doing something.
+ */
+ s = splnet();
+ gencnt = ripcbinfo.ipi_gencnt;
+ n = ripcbinfo.ipi_count;
+ splx(s);
+
+ xig.xig_len = sizeof xig;
+ xig.xig_count = n;
+ xig.xig_gen = gencnt;
+ xig.xig_sogen = so_gencnt;
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ if (error)
+ return error;
+
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+ if (inp_list == 0)
+ return ENOMEM;
+
+ s = splnet();
+ for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n;
+ inp = inp->inp_list.le_next) {
+ if (inp->inp_gencnt <= gencnt)
+ inp_list[i++] = inp;
+ }
+ splx(s);
+ n = i;
+
+ error = 0;
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ if (inp->inp_gencnt <= gencnt) {
+ struct xinpcb xi;
+ xi.xi_len = sizeof xi;
+ /* XXX should avoid extra copy */
+ bcopy(inp, &xi.xi_inp, sizeof *inp);
+ if (inp->inp_socket)
+ sotoxsocket(inp->inp_socket, &xi.xi_socket);
+ error = SYSCTL_OUT(req, &xi, sizeof xi);
+ }
+ }
+ if (!error) {
+ /*
+ * Give the user an updated idea of our state.
+ * If the generation differs from what we told
+ * her before, she knows that something happened
+ * while we were processing this request, and it
+ * might be necessary to retry.
+ */
+ s = splnet();
+ xig.xig_gen = ripcbinfo.ipi_gencnt;
+ xig.xig_sogen = so_gencnt;
+ xig.xig_count = ripcbinfo.ipi_count;
+ splx(s);
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ }
+ free(inp_list, M_TEMP);
+ return error;
+}
+
+SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0,
+ rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
+
struct pr_usrreqs rip_usrreqs = {
rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 7820b636cf9b..43139cac63b4 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
- * $Id: tcp_subr.c,v 1.43 1998/03/24 18:06:28 wollman Exp $
+ * $Id: tcp_subr.c,v 1.44 1998/03/28 10:18:24 bde Exp $
*/
#include "opt_compat.h"
@@ -85,6 +85,9 @@ static int tcp_do_rfc1644 = 1;
SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644,
CTLFLAG_RW, &tcp_do_rfc1644 , 0, "");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &tcbinfo.ipi_count,
+ 0, "Number of active PCBs");
+
static void tcp_cleartaocache __P((void));
static void tcp_notify __P((struct inpcb *, int));
@@ -130,22 +133,7 @@ tcp_init()
tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask);
tcbinfo.porthashbase = hashinit(TCBHASHSIZE, M_PCB,
&tcbinfo.porthashmask);
- /* For the moment, we just worry about putting inpcbs here. */
- /*
- * Rationale for a maximum of `nmbclusters':
- * 1) It's a convenient value, sized by config, based on
- * parameters already known to be tweakable as needed
- * for network-intensive systems.
- * 2) Under the Old World Order, when pcbs were stored in
- * mbufs, it was of course impossible to have more
- * pcbs than mbufs.
- * 3) The zone allocator doesn't allocate physical memory
- * for this many pcbs; it just sizes the virtual
- * address space appropriately. Thus, even for very large
- * values of nmbclusters, we don't actually take up much
- * memory unless required.
- */
- tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), nmbclusters,
+ tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
ZONE_INTERRUPT, 0);
if (max_protohdr < sizeof(struct tcpiphdr))
max_protohdr = sizeof(struct tcpiphdr);
@@ -421,14 +409,10 @@ tcp_close(tp)
* way to calculate the pipesize, it will have to do.
*/
i = tp->snd_ssthresh;
-#if 1
if (rt->rt_rmx.rmx_sendpipe != 0)
dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
else
dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
-#else
- dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
-#endif
if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
|| dosavessthresh) {
@@ -505,6 +489,93 @@ tcp_notify(inp, error)
sowwakeup(so);
}
+static int
+tcp_pcblist SYSCTL_HANDLER_ARGS
+{
+ int error, i, n, s;
+ struct inpcb *inp, **inp_list;
+ inp_gen_t gencnt;
+ struct xinpgen xig;
+
+ /*
+ * The process of preparing the TCB list is too time-consuming and
+ * resource-intensive to repeat twice on every request.
+ */
+ if (req->oldptr == 0) {
+ n = tcbinfo.ipi_count;
+ req->oldidx = 2 * (sizeof xig)
+ + (n + n/8) * sizeof(struct xtcpcb);
+ return 0;
+ }
+
+ if (req->newptr != 0)
+ return EPERM;
+
+ /*
+ * OK, now we're committed to doing something.
+ */
+ s = splnet();
+ gencnt = tcbinfo.ipi_gencnt;
+ n = tcbinfo.ipi_count;
+ splx(s);
+
+ xig.xig_len = sizeof xig;
+ xig.xig_count = n;
+ xig.xig_gen = gencnt;
+ xig.xig_sogen = so_gencnt;
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ if (error)
+ return error;
+
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+ if (inp_list == 0)
+ return ENOMEM;
+
+ s = splnet();
+ for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n;
+ inp = inp->inp_list.le_next) {
+ if (inp->inp_gencnt <= gencnt)
+ inp_list[i++] = inp;
+ }
+ splx(s);
+ n = i;
+
+ error = 0;
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ if (inp->inp_gencnt <= gencnt) {
+ struct xtcpcb xt;
+ xt.xt_len = sizeof xt;
+ /* XXX should avoid extra copy */
+ bcopy(inp, &xt.xt_inp, sizeof *inp);
+ bcopy(inp->inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
+ if (inp->inp_socket)
+ sotoxsocket(inp->inp_socket, &xt.xt_socket);
+ error = SYSCTL_OUT(req, &xt, sizeof xt);
+ }
+ }
+ if (!error) {
+ /*
+ * Give the user an updated idea of our state.
+ * If the generation differs from what we told
+ * her before, she knows that something happened
+ * while we were processing this request, and it
+ * might be necessary to retry.
+ */
+ s = splnet();
+ xig.xig_gen = tcbinfo.ipi_gencnt;
+ xig.xig_sogen = so_gencnt;
+ xig.xig_count = tcbinfo.ipi_count;
+ splx(s);
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ }
+ free(inp_list, M_TEMP);
+ return error;
+}
+
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+ tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
+
void
tcp_ctlinput(cmd, sa, vip)
int cmd;
@@ -517,10 +588,8 @@ tcp_ctlinput(cmd, sa, vip)
if (cmd == PRC_QUENCH)
notify = tcp_quench;
-#if 1
else if (cmd == PRC_MSGSIZE)
notify = tcp_mtudisc;
-#endif
else if (!PRC_IS_REDIRECT(cmd) &&
((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
return;
@@ -548,7 +617,6 @@ tcp_quench(inp, errno)
tp->snd_cwnd = tp->t_maxseg;
}
-#if 1
/*
* When `need fragmentation' ICMP is received, update our idea of the MSS
* based on the new value in the route. Also nudge TCP to send something,
@@ -623,7 +691,6 @@ tcp_mtudisc(inp, errno)
tcp_output(tp);
}
}
-#endif
/*
* Look-up the routing entry to the peer of this inpcb. If no route
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index 7820b636cf9b..43139cac63b4 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
- * $Id: tcp_subr.c,v 1.43 1998/03/24 18:06:28 wollman Exp $
+ * $Id: tcp_subr.c,v 1.44 1998/03/28 10:18:24 bde Exp $
*/
#include "opt_compat.h"
@@ -85,6 +85,9 @@ static int tcp_do_rfc1644 = 1;
SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644,
CTLFLAG_RW, &tcp_do_rfc1644 , 0, "");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &tcbinfo.ipi_count,
+ 0, "Number of active PCBs");
+
static void tcp_cleartaocache __P((void));
static void tcp_notify __P((struct inpcb *, int));
@@ -130,22 +133,7 @@ tcp_init()
tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask);
tcbinfo.porthashbase = hashinit(TCBHASHSIZE, M_PCB,
&tcbinfo.porthashmask);
- /* For the moment, we just worry about putting inpcbs here. */
- /*
- * Rationale for a maximum of `nmbclusters':
- * 1) It's a convenient value, sized by config, based on
- * parameters already known to be tweakable as needed
- * for network-intensive systems.
- * 2) Under the Old World Order, when pcbs were stored in
- * mbufs, it was of course impossible to have more
- * pcbs than mbufs.
- * 3) The zone allocator doesn't allocate physical memory
- * for this many pcbs; it just sizes the virtual
- * address space appropriately. Thus, even for very large
- * values of nmbclusters, we don't actually take up much
- * memory unless required.
- */
- tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), nmbclusters,
+ tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
ZONE_INTERRUPT, 0);
if (max_protohdr < sizeof(struct tcpiphdr))
max_protohdr = sizeof(struct tcpiphdr);
@@ -421,14 +409,10 @@ tcp_close(tp)
* way to calculate the pipesize, it will have to do.
*/
i = tp->snd_ssthresh;
-#if 1
if (rt->rt_rmx.rmx_sendpipe != 0)
dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
else
dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
-#else
- dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
-#endif
if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
|| dosavessthresh) {
@@ -505,6 +489,93 @@ tcp_notify(inp, error)
sowwakeup(so);
}
+static int
+tcp_pcblist SYSCTL_HANDLER_ARGS
+{
+ int error, i, n, s;
+ struct inpcb *inp, **inp_list;
+ inp_gen_t gencnt;
+ struct xinpgen xig;
+
+ /*
+ * The process of preparing the TCB list is too time-consuming and
+ * resource-intensive to repeat twice on every request.
+ */
+ if (req->oldptr == 0) {
+ n = tcbinfo.ipi_count;
+ req->oldidx = 2 * (sizeof xig)
+ + (n + n/8) * sizeof(struct xtcpcb);
+ return 0;
+ }
+
+ if (req->newptr != 0)
+ return EPERM;
+
+ /*
+ * OK, now we're committed to doing something.
+ */
+ s = splnet();
+ gencnt = tcbinfo.ipi_gencnt;
+ n = tcbinfo.ipi_count;
+ splx(s);
+
+ xig.xig_len = sizeof xig;
+ xig.xig_count = n;
+ xig.xig_gen = gencnt;
+ xig.xig_sogen = so_gencnt;
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ if (error)
+ return error;
+
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+ if (inp_list == 0)
+ return ENOMEM;
+
+ s = splnet();
+ for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n;
+ inp = inp->inp_list.le_next) {
+ if (inp->inp_gencnt <= gencnt)
+ inp_list[i++] = inp;
+ }
+ splx(s);
+ n = i;
+
+ error = 0;
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ if (inp->inp_gencnt <= gencnt) {
+ struct xtcpcb xt;
+ xt.xt_len = sizeof xt;
+ /* XXX should avoid extra copy */
+ bcopy(inp, &xt.xt_inp, sizeof *inp);
+ bcopy(inp->inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
+ if (inp->inp_socket)
+ sotoxsocket(inp->inp_socket, &xt.xt_socket);
+ error = SYSCTL_OUT(req, &xt, sizeof xt);
+ }
+ }
+ if (!error) {
+ /*
+ * Give the user an updated idea of our state.
+ * If the generation differs from what we told
+ * her before, she knows that something happened
+ * while we were processing this request, and it
+ * might be necessary to retry.
+ */
+ s = splnet();
+ xig.xig_gen = tcbinfo.ipi_gencnt;
+ xig.xig_sogen = so_gencnt;
+ xig.xig_count = tcbinfo.ipi_count;
+ splx(s);
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ }
+ free(inp_list, M_TEMP);
+ return error;
+}
+
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+ tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
+
void
tcp_ctlinput(cmd, sa, vip)
int cmd;
@@ -517,10 +588,8 @@ tcp_ctlinput(cmd, sa, vip)
if (cmd == PRC_QUENCH)
notify = tcp_quench;
-#if 1
else if (cmd == PRC_MSGSIZE)
notify = tcp_mtudisc;
-#endif
else if (!PRC_IS_REDIRECT(cmd) &&
((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
return;
@@ -548,7 +617,6 @@ tcp_quench(inp, errno)
tp->snd_cwnd = tp->t_maxseg;
}
-#if 1
/*
* When `need fragmentation' ICMP is received, update our idea of the MSS
* based on the new value in the route. Also nudge TCP to send something,
@@ -623,7 +691,6 @@ tcp_mtudisc(inp, errno)
tcp_output(tp);
}
}
-#endif
/*
* Look-up the routing entry to the peer of this inpcb. If no route
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index e928449d908f..157631b8f831 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $Id: tcp_var.h,v 1.42 1998/04/06 06:52:47 phk Exp $
+ * $Id: tcp_var.h,v 1.43 1998/04/24 09:25:39 dg Exp $
*/
#ifndef _NETINET_TCP_VAR_H_
@@ -287,6 +287,21 @@ struct tcpstat {
};
/*
+ * TCB structure exported to user-land via sysctl(3).
+ * Evil hack: declare only if in_pcb.h has been included.
+ * Not all of our clients do.
+ */
+#ifdef _NETINET_IN_PCB_H_
+struct xtcpcb {
+ size_t xt_len;
+ struct inpcb xt_inp;
+ struct tcpcb xt_tp;
+ struct xsocket xt_socket;
+ u_quad_t xt_alignment_hack;
+};
+#endif
+
+/*
* Names for TCP sysctl objects
*/
#define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */
@@ -299,7 +314,8 @@ struct tcpstat {
#define TCPCTL_SENDSPACE 8 /* send buffer space */
#define TCPCTL_RECVSPACE 9 /* receive buffer space */
#define TCPCTL_KEEPINIT 10 /* receive buffer space */
-#define TCPCTL_MAXID 11
+#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */
+#define TCPCTL_MAXID 12
#define TCPCTL_NAMES { \
{ 0, 0 }, \
@@ -313,6 +329,7 @@ struct tcpstat {
{ "sendspace", CTLTYPE_INT }, \
{ "recvspace", CTLTYPE_INT }, \
{ "keepinit", CTLTYPE_INT }, \
+ { "pcblist", CTLTYPE_STRUCT }, \
}
#ifdef KERNEL
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index bb1e6da3fba5..107fd06df73c 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
- * $Id: udp_usrreq.c,v 1.45 1998/03/24 18:06:34 wollman Exp $
+ * $Id: udp_usrreq.c,v 1.46 1998/03/28 10:18:26 bde Exp $
*/
#include <sys/param.h>
@@ -101,7 +101,7 @@ udp_init()
udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
&udbinfo.porthashmask);
- udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), nmbclusters,
+ udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), maxsockets,
ZONE_INTERRUPT, 0);
}
@@ -363,6 +363,92 @@ udp_ctlinput(cmd, sa, vip)
}
static int
+udp_pcblist SYSCTL_HANDLER_ARGS
+{
+ int error, i, n, s;
+ struct inpcb *inp, **inp_list;
+ inp_gen_t gencnt;
+ struct xinpgen xig;
+
+ /*
+ * The process of preparing the TCB list is too time-consuming and
+ * resource-intensive to repeat twice on every request.
+ */
+ if (req->oldptr == 0) {
+ n = udbinfo.ipi_count;
+ req->oldidx = 2 * (sizeof xig)
+ + (n + n/8) * sizeof(struct xinpcb);
+ return 0;
+ }
+
+ if (req->newptr != 0)
+ return EPERM;
+
+ /*
+ * OK, now we're committed to doing something.
+ */
+ s = splnet();
+ gencnt = udbinfo.ipi_gencnt;
+ n = udbinfo.ipi_count;
+ splx(s);
+
+ xig.xig_len = sizeof xig;
+ xig.xig_count = n;
+ xig.xig_gen = gencnt;
+ xig.xig_sogen = so_gencnt;
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ if (error)
+ return error;
+
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+ if (inp_list == 0)
+ return ENOMEM;
+
+ s = splnet();
+ for (inp = udbinfo.listhead->lh_first, i = 0; inp && i < n;
+ inp = inp->inp_list.le_next) {
+ if (inp->inp_gencnt <= gencnt)
+ inp_list[i++] = inp;
+ }
+ splx(s);
+ n = i;
+
+ error = 0;
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ if (inp->inp_gencnt <= gencnt) {
+ struct xinpcb xi;
+ xi.xi_len = sizeof xi;
+ /* XXX should avoid extra copy */
+ bcopy(inp, &xi.xi_inp, sizeof *inp);
+ if (inp->inp_socket)
+ sotoxsocket(inp->inp_socket, &xi.xi_socket);
+ error = SYSCTL_OUT(req, &xi, sizeof xi);
+ }
+ }
+ if (!error) {
+ /*
+ * Give the user an updated idea of our state.
+ * If the generation differs from what we told
+ * her before, she knows that something happened
+ * while we were processing this request, and it
+ * might be necessary to retry.
+ */
+ s = splnet();
+ xig.xig_gen = udbinfo.ipi_gencnt;
+ xig.xig_sogen = so_gencnt;
+ xig.xig_count = udbinfo.ipi_count;
+ splx(s);
+ error = SYSCTL_OUT(req, &xig, sizeof xig);
+ }
+ free(inp_list, M_TEMP);
+ return error;
+}
+
+SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+ udp_pcblist, "S,xinpcb", "List of active UDP sockets");
+
+static int
udp_output(inp, m, addr, control, p)
register struct inpcb *inp;
register struct mbuf *m;
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
index fb21bfd119ad..8aee42687512 100644
--- a/sys/netinet/udp_var.h
+++ b/sys/netinet/udp_var.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)udp_var.h 8.1 (Berkeley) 6/10/93
- * $Id: udp_var.h,v 1.13 1997/08/16 19:15:42 wollman Exp $
+ * $Id: udp_var.h,v 1.14 1997/09/07 05:26:51 bde Exp $
*/
#ifndef _NETINET_UDP_VAR_H_
@@ -89,7 +89,8 @@ struct udpstat {
#define UDPCTL_STATS 2 /* statistics (read-only) */
#define UDPCTL_MAXDGRAM 3 /* max datagram size */
#define UDPCTL_RECVSPACE 4 /* default receive buffer space */
-#define UDPCTL_MAXID 5
+#define UDPCTL_PCBLIST 5 /* list of PCBs for UDP sockets */
+#define UDPCTL_MAXID 6
#define UDPCTL_NAMES { \
{ 0, 0 }, \
@@ -97,6 +98,7 @@ struct udpstat {
{ "stats", CTLTYPE_STRUCT }, \
{ "maxdgram", CTLTYPE_INT }, \
{ "recvspace", CTLTYPE_INT }, \
+ { "pcblist", CTLTYPE_STRUCT }, \
}
#ifdef KERNEL
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index f0ae22b45614..8eaadcf7391b 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)socketvar.h 8.3 (Berkeley) 2/19/95
- * $Id: socketvar.h,v 1.24 1998/02/01 20:08:38 bde Exp $
+ * $Id: socketvar.h,v 1.25 1998/03/01 19:39:29 guido Exp $
*/
#ifndef _SYS_SOCKETVAR_H_
@@ -46,7 +46,10 @@
* handle on protocol and pointer to protocol
* private data and error information.
*/
+typedef u_quad_t so_gen_t;
+
struct socket {
+ struct vm_zone *so_zone; /* zone we were allocated from */
short so_type; /* generic type, see socket.h */
short so_options; /* from socket call, see socket.h */
short so_linger; /* time to linger while closing */
@@ -99,10 +102,10 @@ struct socket {
#define SB_NOTIFY (SB_WAIT|SB_SEL|SB_ASYNC)
#define SB_NOINTR 0x40 /* operations not interruptible */
- caddr_t so_tpcb; /* Wisc. protocol control block XXX */
void (*so_upcall) __P((struct socket *so, caddr_t arg, int waitf));
caddr_t so_upcallarg; /* Arg for above */
uid_t so_uid; /* who opened the socket */
+ so_gen_t so_gencnt; /* generation count */
};
/*
@@ -124,6 +127,37 @@ struct socket {
#define SS_INCOMP 0x0800 /* unaccepted, incomplete connection */
#define SS_COMP 0x1000 /* unaccepted, complete connection */
+/*
+ * Externalized form of struct socket used by the sysctl(3) interface.
+ */
+struct xsocket {
+ size_t xso_len; /* length of this structure */
+ struct socket *xso_so; /* makes a convenient handle sometimes */
+ short so_type;
+ short so_options;
+ short so_linger;
+ short so_state;
+ caddr_t so_pcb; /* another convenient handle */
+ int xso_protocol;
+ int xso_family;
+ short so_qlen;
+ short so_incqlen;
+ short so_qlimit;
+ short so_timeo;
+ u_short so_error;
+ pid_t so_pgid;
+ u_long so_oobmark;
+ struct xsockbuf {
+ u_long sb_cc;
+ u_long sb_hiwat;
+ u_long sb_mbcnt;
+ u_long sb_mbmax;
+ long sb_lowat;
+ short sb_flags;
+ short sb_timeo;
+ } so_rcv, so_snd;
+ uid_t so_uid; /* XXX */
+};
/*
* Macros for sockets and socket buffering.
@@ -202,11 +236,13 @@ struct socket {
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_PCB);
-MALLOC_DECLARE(M_SOCKET);
MALLOC_DECLARE(M_SONAME);
#endif
+extern int maxsockets;
extern u_long sb_max;
+extern struct vm_zone *socket_zone;
+extern so_gen_t so_gencnt;
struct file;
struct filedesc;
@@ -248,10 +284,12 @@ void sbflush __P((struct sockbuf *sb));
void sbinsertoob __P((struct sockbuf *sb, struct mbuf *m0));
void sbrelease __P((struct sockbuf *sb));
int sbreserve __P((struct sockbuf *sb, u_long cc));
+void sbtoxsockbuf __P((struct sockbuf *sb, struct xsockbuf *xsb));
int sbwait __P((struct sockbuf *sb));
int sb_lock __P((struct sockbuf *sb));
int soabort __P((struct socket *so));
int soaccept __P((struct socket *so, struct sockaddr **nam));
+struct socket *soalloc __P((int waitok));
int sobind __P((struct socket *so, struct sockaddr *nam, struct proc *p));
void socantrcvmore __P((struct socket *so));
void socantsendmore __P((struct socket *so));
@@ -260,6 +298,7 @@ int soconnect __P((struct socket *so, struct sockaddr *nam, struct proc *p));
int soconnect2 __P((struct socket *so1, struct socket *so2));
int socreate __P((int dom, struct socket **aso, int type, int proto,
struct proc *p));
+void sodealloc __P((struct socket *so));
int sodisconnect __P((struct socket *so));
void sofree __P((struct socket *so));
int sogetopt __P((struct socket *so, int level, int optname,
@@ -287,6 +326,7 @@ int sosend __P((struct socket *so, struct sockaddr *addr, struct uio *uio,
int sosetopt __P((struct socket *so, int level, int optname,
struct mbuf *m0, struct proc *p));
int soshutdown __P((struct socket *so, int how));
+void sotoxsocket __P((struct socket *so, struct xsocket *xso));
void sowakeup __P((struct socket *so, struct sockbuf *sb));
#endif /* KERNEL */
diff --git a/sys/sys/un.h b/sys/sys/un.h
index 3d7d7ede9131..d0c0af9e4e04 100644
--- a/sys/sys/un.h
+++ b/sys/sys/un.h
@@ -31,16 +31,12 @@
* SUCH DAMAGE.
*
* @(#)un.h 8.3 (Berkeley) 2/19/95
- * $Id: un.h,v 1.12 1997/04/27 20:01:29 wollman Exp $
+ * $Id: un.h,v 1.13 1997/08/16 19:16:15 wollman Exp $
*/
#ifndef _SYS_UN_H_
#define _SYS_UN_H_
-#ifdef KERNEL
-#include <sys/unpcb.h>
-#endif /* KERNEL */
-
/*
* Definitions for UNIX IPC domain.
*/
@@ -59,6 +55,7 @@ int uipc_usrreq __P((struct socket *so, int req, struct mbuf *m,
int unp_connect2 __P((struct socket *so, struct socket *so2));
void unp_dispose __P((struct mbuf *m));
int unp_externalize __P((struct mbuf *rights));
+void unp_init __P((void));
extern struct pr_usrreqs uipc_usrreqs;
#else /* !KERNEL */
diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h
index 6cf0855f339d..14191537f059 100644
--- a/sys/sys/unpcb.h
+++ b/sys/sys/unpcb.h
@@ -31,12 +31,14 @@
* SUCH DAMAGE.
*
* @(#)unpcb.h 8.1 (Berkeley) 6/2/93
- * $Id: unpcb.h,v 1.5 1997/02/22 09:46:22 peter Exp $
+ * $Id: unpcb.h,v 1.6 1997/08/16 19:16:16 wollman Exp $
*/
#ifndef _SYS_UNPCB_H_
#define _SYS_UNPCB_H_
+#include <sys/queue.h>
+
/*
* Protocol control block for an active
* instance of a UNIX internal protocol.
@@ -62,18 +64,51 @@
* so that changes in the sockbuf may be computed to modify
* back pressure on the sender accordingly.
*/
+typedef u_quad_t unp_gen_t;
+LIST_HEAD(unp_head, unpcb);
+
struct unpcb {
+ LIST_ENTRY(unpcb) unp_link; /* glue on list of all PCBs */
struct socket *unp_socket; /* pointer back to socket */
struct vnode *unp_vnode; /* if associated with file */
ino_t unp_ino; /* fake inode number */
struct unpcb *unp_conn; /* control block of connected socket */
- struct unpcb *unp_refs; /* referencing socket linked list */
- struct unpcb *unp_nextref; /* link in unp_refs list */
+ struct unp_head unp_refs; /* referencing socket linked list */
+ LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */
struct sockaddr_un *unp_addr; /* bound address of socket */
int unp_cc; /* copy of rcv.sb_cc */
int unp_mbcnt; /* copy of rcv.sb_mbcnt */
+ unp_gen_t unp_gencnt; /* generation count of this instance */
};
#define sotounpcb(so) ((struct unpcb *)((so)->so_pcb))
-#endif
+/* Hack alert -- this structure depends on <sys/socketvar.h>. */
+#ifdef _SYS_SOCKETVAR_H_
+struct xunpcb {
+ size_t xu_len; /* length of this structure */
+ struct unpcb *xu_unpp; /* to help netstat, fstat */
+ struct unpcb xu_unp; /* our information */
+ union {
+ struct sockaddr_un xuu_addr; /* our bound address */
+ char xu_dummy1[256];
+ } xu_au;
+#define xu_addr xu_au.xuu_addr
+ union {
+ struct sockaddr_un xuu_caddr; /* their bound address */
+ char xu_dummy2[256];
+ } xu_cau;
+#define xu_caddr xu_cau.xuu_caddr
+ struct xsocket xu_socket;
+ u_quad_t xu_alignment_hack;
+};
+
+struct xunpgen {
+ size_t xug_len;
+ u_int xug_count;
+ unp_gen_t xug_gen;
+ so_gen_t xug_sogen;
+};
+#endif /* _SYS_SOCKETVAR_H_ */
+
+#endif /* _SYS_UNPCB_H_ */