aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet/tcp_usrreq.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/netinet/tcp_usrreq.c')
-rw-r--r--sys/netinet/tcp_usrreq.c1319
1 files changed, 651 insertions, 668 deletions
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 198852cc8fac..3e705181d5e8 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -33,19 +33,14 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
#include "opt_ddb.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_kern_tls.h"
-#include "opt_tcpdebug.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -82,6 +77,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
+#include <netinet/ip.h>
#include <netinet/ip_var.h>
#ifdef INET6
#include <netinet/ip6.h>
@@ -102,9 +98,6 @@ __FBSDID("$FreeBSD$");
#ifdef TCPPCAP
#include <netinet/tcp_pcap.h>
#endif
-#ifdef TCPDEBUG
-#include <netinet/tcp_debug.h>
-#endif
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
@@ -121,39 +114,39 @@ __FBSDID("$FreeBSD$");
* TCP protocol interface to socket abstraction.
*/
#ifdef INET
-static int tcp_connect(struct tcpcb *, struct sockaddr *,
+static int tcp_connect(struct tcpcb *, struct sockaddr_in *,
struct thread *td);
#endif /* INET */
#ifdef INET6
-static int tcp6_connect(struct tcpcb *, struct sockaddr *,
+static int tcp6_connect(struct tcpcb *, struct sockaddr_in6 *,
struct thread *td);
#endif /* INET6 */
static void tcp_disconnect(struct tcpcb *);
static void tcp_usrclosed(struct tcpcb *);
-static void tcp_fill_info(struct tcpcb *, struct tcp_info *);
+static void tcp_fill_info(const struct tcpcb *, struct tcp_info *);
static int tcp_pru_options_support(struct tcpcb *tp, int flags);
-#ifdef TCPDEBUG
-#define TCPDEBUG0 int ostate = 0
-#define TCPDEBUG1() ostate = tp ? tp->t_state : 0
-#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
- tcp_trace(TA_USER, ostate, tp, 0, 0, req)
-#else
-#define TCPDEBUG0
-#define TCPDEBUG1()
-#define TCPDEBUG2(req)
-#endif
+static void
+tcp_bblog_pru(struct tcpcb *tp, uint32_t pru, int error)
+{
+ struct tcp_log_buffer *lgb;
-/*
- * tcp_require_unique port requires a globally-unique source port for each
- * outgoing connection. The default is to require the 4-tuple to be unique.
- */
-VNET_DEFINE(int, tcp_require_unique_port) = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, require_unique_port,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_require_unique_port), 0,
- "Require globally-unique ephemeral port for outgoing connections");
-#define V_tcp_require_unique_port VNET(tcp_require_unique_port)
+ KASSERT(tp != NULL, ("tcp_bblog_pru: tp == NULL"));
+ INP_WLOCK_ASSERT(tptoinpcb(tp));
+ if (tcp_bblogging_on(tp)) {
+ lgb = tcp_log_event(tp, NULL, NULL, NULL, TCP_LOG_PRU, error,
+ 0, NULL, false, NULL, NULL, 0, NULL);
+ } else {
+ lgb = NULL;
+ }
+ if (lgb != NULL) {
+ if (error >= 0) {
+ lgb->tlb_errno = (uint32_t)error;
+ }
+ lgb->tlb_flex1 = pru;
+ }
+}
/*
* TCP attaches to socket via pru_attach(), reserving space,
@@ -165,17 +158,13 @@ tcp_usr_attach(struct socket *so, int proto, struct thread *td)
struct inpcb *inp;
struct tcpcb *tp = NULL;
int error;
- TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
- TCPDEBUG1();
- if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
- error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
- if (error)
- goto out;
- }
+ error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
+ if (error)
+ goto out;
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;
@@ -183,28 +172,23 @@ tcp_usr_attach(struct socket *so, int proto, struct thread *td)
if (error)
goto out;
inp = sotoinpcb(so);
-#ifdef INET6
- if (inp->inp_vflag & INP_IPV6PROTO) {
- inp->inp_vflag |= INP_IPV6;
- if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
- inp->inp_vflag |= INP_IPV4;
- inp->in6p_hops = -1; /* use kernel default */
- }
- else
-#endif
- inp->inp_vflag |= INP_IPV4;
tp = tcp_newtcpcb(inp);
if (tp == NULL) {
error = ENOBUFS;
- in_pcbdetach(inp);
in_pcbfree(inp);
goto out;
}
tp->t_state = TCPS_CLOSED;
+ /* Can we inherit anything from the listener? */
+ if ((so->so_listen != NULL) &&
+ (so->so_listen->so_pcb != NULL) &&
+ (tp->t_fb->tfb_inherit != NULL)) {
+ (*tp->t_fb->tfb_inherit)(tp, sotoinpcb(so->so_listen));
+ }
+ tcp_bblog_pru(tp, PRU_ATTACH, error);
INP_WUNLOCK(inp);
TCPSTATES_INC(TCPS_CLOSED);
out:
- TCPDEBUG2(PRU_ATTACH);
TCP_PROBE2(debug__user, tp, PRU_ATTACH);
return (error);
}
@@ -229,66 +213,12 @@ tcp_usr_detach(struct socket *so)
tp = intotcpcb(inp);
- if (inp->inp_flags & INP_TIMEWAIT) {
- /*
- * There are two cases to handle: one in which the time wait
- * state is being discarded (INP_DROPPED), and one in which
- * this connection will remain in timewait. In the former,
- * it is time to discard all state (except tcptw, which has
- * already been discarded by the timewait close code, which
- * should be further up the call stack somewhere). In the
- * latter case, we detach from the socket, but leave the pcb
- * present until timewait ends.
- *
- * XXXRW: Would it be cleaner to free the tcptw here?
- *
- * Astute question indeed, from twtcp perspective there are
- * four cases to consider:
- *
- * #1 tcp_usr_detach is called at tcptw creation time by
- * tcp_twstart, then do not discard the newly created tcptw
- * and leave inpcb present until timewait ends
- * #2 tcp_usr_detach is called at tcptw creation time by
- * tcp_twstart, but connection is local and tw will be
- * discarded immediately
- * #3 tcp_usr_detach is called at timewait end (or reuse) by
- * tcp_twclose, then the tcptw has already been discarded
- * (or reused) and inpcb is freed here
- * #4 tcp_usr_detach is called() after timewait ends (or reuse)
- * (e.g. by soclose), then tcptw has already been discarded
- * (or reused) and inpcb is freed here
- *
- * In all three cases the tcptw should not be freed here.
- */
- if (inp->inp_flags & INP_DROPPED) {
- KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
- "INP_DROPPED && tp != NULL"));
- in_pcbdetach(inp);
- in_pcbfree(inp);
- } else {
- in_pcbdetach(inp);
- INP_WUNLOCK(inp);
- }
- } else {
- /*
- * If the connection is not in timewait, we consider two
- * two conditions: one in which no further processing is
- * necessary (dropped || embryonic), and one in which TCP is
- * not yet done, but no longer requires the socket, so the
- * pcb will persist for the time being.
- *
- * XXXRW: Does the second case still occur?
- */
- if (inp->inp_flags & INP_DROPPED ||
- tp->t_state < TCPS_SYN_SENT) {
- tcp_discardcb(tp);
- in_pcbdetach(inp);
- in_pcbfree(inp);
- } else {
- in_pcbdetach(inp);
- INP_WUNLOCK(inp);
- }
- }
+ KASSERT(inp->inp_flags & INP_DROPPED ||
+ tp->t_state < TCPS_SYN_SENT,
+ ("%s: inp %p not dropped or embryonic", __func__, inp));
+
+ tcp_discardcb(tp);
+ in_pcbfree(inp);
}
#ifdef INET
@@ -300,9 +230,18 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp = NULL;
+ struct tcpcb *tp;
struct sockaddr_in *sinp;
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
+ INP_WLOCK(inp);
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (EINVAL);
+ }
+ tp = intotcpcb(inp);
+
sinp = (struct sockaddr_in *)nam;
if (nam->sa_family != AF_INET) {
/*
@@ -310,35 +249,29 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
*/
if (nam->sa_family != AF_UNSPEC ||
nam->sa_len < offsetof(struct sockaddr_in, sin_zero) ||
- sinp->sin_addr.s_addr != INADDR_ANY)
- return (EAFNOSUPPORT);
+ sinp->sin_addr.s_addr != INADDR_ANY) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
nam->sa_family = AF_INET;
}
- if (nam->sa_len != sizeof(*sinp))
- return (EINVAL);
-
+ if (nam->sa_len != sizeof(*sinp)) {
+ error = EINVAL;
+ goto out;
+ }
/*
* Must check for multicast addresses and disallow binding
* to them.
*/
- if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
- return (EAFNOSUPPORT);
-
- TCPDEBUG0;
- inp = sotoinpcb(so);
- KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
- INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = EINVAL;
+ if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
goto out;
}
- tp = intotcpcb(inp);
- TCPDEBUG1();
INP_HASH_WLOCK(&V_tcbinfo);
- error = in_pcbbind(inp, nam, td->td_ucred);
+ error = in_pcbbind(inp, sinp, td->td_ucred);
INP_HASH_WUNLOCK(&V_tcbinfo);
out:
- TCPDEBUG2(PRU_BIND);
+ tcp_bblog_pru(tp, PRU_BIND, error);
TCP_PROBE2(debug__user, tp, PRU_BIND);
INP_WUNLOCK(inp);
@@ -352,34 +285,39 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp = NULL;
+ struct tcpcb *tp;
struct sockaddr_in6 *sin6;
u_char vflagsav;
- sin6 = (struct sockaddr_in6 *)nam;
- if (nam->sa_family != AF_INET6)
- return (EAFNOSUPPORT);
- if (nam->sa_len != sizeof(*sin6))
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
+ INP_WLOCK(inp);
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
return (EINVAL);
+ }
+ tp = intotcpcb(inp);
+
+ vflagsav = inp->inp_vflag;
+ sin6 = (struct sockaddr_in6 *)nam;
+ if (nam->sa_family != AF_INET6) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ if (nam->sa_len != sizeof(*sin6)) {
+ error = EINVAL;
+ goto out;
+ }
/*
* Must check for multicast addresses and disallow binding
* to them.
*/
- if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
- return (EAFNOSUPPORT);
-
- TCPDEBUG0;
- inp = sotoinpcb(so);
- KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
- INP_WLOCK(inp);
- vflagsav = inp->inp_vflag;
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = EINVAL;
+ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
+ error = EAFNOSUPPORT;
goto out;
}
- tp = intotcpcb(inp);
- TCPDEBUG1();
+
INP_HASH_WLOCK(&V_tcbinfo);
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
@@ -398,19 +336,18 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
}
inp->inp_vflag |= INP_IPV4;
inp->inp_vflag &= ~INP_IPV6;
- error = in_pcbbind(inp, (struct sockaddr *)&sin,
- td->td_ucred);
+ error = in_pcbbind(inp, &sin, td->td_ucred);
INP_HASH_WUNLOCK(&V_tcbinfo);
goto out;
}
}
#endif
- error = in6_pcbbind(inp, nam, td->td_ucred);
+ error = in6_pcbbind(inp, sin6, td->td_ucred);
INP_HASH_WUNLOCK(&V_tcbinfo);
out:
if (error != 0)
inp->inp_vflag = vflagsav;
- TCPDEBUG2(PRU_BIND);
+ tcp_bblog_pru(tp, PRU_BIND, error);
TCP_PROBE2(debug__user, tp, PRU_BIND);
INP_WUNLOCK(inp);
return (error);
@@ -426,18 +363,17 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp = NULL;
+ struct tcpcb *tp;
- TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = EINVAL;
- goto out;
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (EINVAL);
}
tp = intotcpcb(inp);
- TCPDEBUG1();
+
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error != 0) {
@@ -461,11 +397,11 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
}
SOCK_UNLOCK(so);
- if (IS_FASTOPEN(tp->t_flags))
+ if (tp->t_flags & TF_FASTOPEN)
tp->t_tfo_pending = tcp_fastopen_alloc_counter();
out:
- TCPDEBUG2(PRU_LISTEN);
+ tcp_bblog_pru(tp, PRU_LISTEN, error);
TCP_PROBE2(debug__user, tp, PRU_LISTEN);
INP_WUNLOCK(inp);
return (error);
@@ -478,20 +414,20 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp = NULL;
+ struct tcpcb *tp;
u_char vflagsav;
- TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = EINVAL;
- goto out;
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (EINVAL);
}
- vflagsav = inp->inp_vflag;
tp = intotcpcb(inp);
- TCPDEBUG1();
+
+ vflagsav = inp->inp_vflag;
+
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error != 0) {
@@ -518,14 +454,14 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
}
SOCK_UNLOCK(so);
- if (IS_FASTOPEN(tp->t_flags))
+ if (tp->t_flags & TF_FASTOPEN)
tp->t_tfo_pending = tcp_fastopen_alloc_counter();
if (error != 0)
inp->inp_vflag = vflagsav;
out:
- TCPDEBUG2(PRU_LISTEN);
+ tcp_bblog_pru(tp, PRU_LISTEN, error);
TCP_PROBE2(debug__user, tp, PRU_LISTEN);
INP_WUNLOCK(inp);
return (error);
@@ -546,45 +482,46 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
struct epoch_tracker et;
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp = NULL;
+ struct tcpcb *tp;
struct sockaddr_in *sinp;
- sinp = (struct sockaddr_in *)nam;
- if (nam->sa_family != AF_INET)
- return (EAFNOSUPPORT);
- if (nam->sa_len != sizeof (*sinp))
- return (EINVAL);
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
+ INP_WLOCK(inp);
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (ECONNREFUSED);
+ }
+ tp = intotcpcb(inp);
+ sinp = (struct sockaddr_in *)nam;
+ if (nam->sa_family != AF_INET) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ if (nam->sa_len != sizeof (*sinp)) {
+ error = EINVAL;
+ goto out;
+ }
/*
* Must disallow TCP ``connections'' to multicast addresses.
*/
- if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
- return (EAFNOSUPPORT);
- if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST)
- return (EACCES);
- if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
- return (error);
-
- TCPDEBUG0;
- inp = sotoinpcb(so);
- KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
- INP_WLOCK(inp);
- if (inp->inp_flags & INP_TIMEWAIT) {
- error = EADDRINUSE;
+ if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
goto out;
}
- if (inp->inp_flags & INP_DROPPED) {
- error = ECONNREFUSED;
+ if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
+ error = EACCES;
goto out;
}
+ if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
+ goto out;
if (SOLISTENING(so)) {
error = EOPNOTSUPP;
goto out;
}
- tp = intotcpcb(inp);
- TCPDEBUG1();
NET_EPOCH_ENTER(et);
- if ((error = tcp_connect(tp, nam, td)) != 0)
+ if ((error = tcp_connect(tp, sinp, td)) != 0)
goto out_in_epoch;
#ifdef TCP_OFFLOAD
if (registered_toedevs > 0 &&
@@ -593,11 +530,13 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out_in_epoch;
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- error = tp->t_fb->tfb_tcp_output(tp);
+ error = tcp_output(tp);
+ KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
+ ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
out_in_epoch:
NET_EPOCH_EXIT(et);
out:
- TCPDEBUG2(PRU_CONNECT);
+ tcp_bblog_pru(tp, PRU_CONNECT, error);
TCP_PROBE2(debug__user, tp, PRU_CONNECT);
INP_WUNLOCK(inp);
return (error);
@@ -611,44 +550,43 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
struct epoch_tracker et;
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp = NULL;
+ struct tcpcb *tp;
struct sockaddr_in6 *sin6;
u_int8_t incflagsav;
u_char vflagsav;
- TCPDEBUG0;
-
- sin6 = (struct sockaddr_in6 *)nam;
- if (nam->sa_family != AF_INET6)
- return (EAFNOSUPPORT);
- if (nam->sa_len != sizeof (*sin6))
- return (EINVAL);
-
- /*
- * Must disallow TCP ``connections'' to multicast addresses.
- */
- if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
- return (EAFNOSUPPORT);
-
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
INP_WLOCK(inp);
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (ECONNREFUSED);
+ }
+ tp = intotcpcb(inp);
+
vflagsav = inp->inp_vflag;
incflagsav = inp->inp_inc.inc_flags;
- if (inp->inp_flags & INP_TIMEWAIT) {
- error = EADDRINUSE;
+
+ sin6 = (struct sockaddr_in6 *)nam;
+ if (nam->sa_family != AF_INET6) {
+ error = EAFNOSUPPORT;
goto out;
}
- if (inp->inp_flags & INP_DROPPED) {
- error = ECONNREFUSED;
+ if (nam->sa_len != sizeof (*sin6)) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Must disallow TCP ``connections'' to multicast addresses.
+ */
+ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
+ error = EAFNOSUPPORT;
goto out;
}
if (SOLISTENING(so)) {
error = EINVAL;
goto out;
}
- tp = intotcpcb(inp);
- TCPDEBUG1();
#ifdef INET
/*
* XXXRW: Some confusion: V4/V6 flags relate to binding, and
@@ -682,7 +620,7 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
inp->inp_vflag |= INP_IPV4;
inp->inp_vflag &= ~INP_IPV6;
NET_EPOCH_ENTER(et);
- if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
+ if ((error = tcp_connect(tp, &sin, td)) != 0)
goto out_in_epoch;
#ifdef TCP_OFFLOAD
if (registered_toedevs > 0 &&
@@ -690,7 +628,7 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
(error = tcp_offload_connect(so, nam)) == 0)
goto out_in_epoch;
#endif
- error = tp->t_fb->tfb_tcp_output(tp);
+ error = tcp_output(tp);
goto out_in_epoch;
} else {
if ((inp->inp_vflag & INP_IPV6) == 0) {
@@ -704,22 +642,22 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
inp->inp_inc.inc_flags |= INC_ISIPV6;
- if ((error = tcp6_connect(tp, nam, td)) != 0)
- goto out;
+ NET_EPOCH_ENTER(et);
+ if ((error = tcp6_connect(tp, sin6, td)) != 0)
+ goto out_in_epoch;
#ifdef TCP_OFFLOAD
if (registered_toedevs > 0 &&
(so->so_options & SO_NO_OFFLOAD) == 0 &&
(error = tcp_offload_connect(so, nam)) == 0)
- goto out;
+ goto out_in_epoch;
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- NET_EPOCH_ENTER(et);
- error = tp->t_fb->tfb_tcp_output(tp);
-#ifdef INET
+ error = tcp_output(tp);
out_in_epoch:
-#endif
NET_EPOCH_EXIT(et);
out:
+ KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
+ ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
/*
* If the implicit bind in the connect call fails, restore
* the flags we modified.
@@ -729,7 +667,7 @@ out:
inp->inp_inc.inc_flags = incflagsav;
}
- TCPDEBUG2(PRU_CONNECT);
+ tcp_bblog_pru(tp, PRU_CONNECT, error);
TCP_PROBE2(debug__user, tp, PRU_CONNECT);
INP_WUNLOCK(inp);
return (error);
@@ -755,22 +693,22 @@ tcp_usr_disconnect(struct socket *so)
struct epoch_tracker et;
int error = 0;
- TCPDEBUG0;
NET_EPOCH_ENTER(et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & INP_TIMEWAIT)
- goto out;
if (inp->inp_flags & INP_DROPPED) {
- error = ECONNRESET;
- goto out;
+ INP_WUNLOCK(inp);
+ NET_EPOCH_EXIT(et);
+ return (ECONNRESET);
}
tp = intotcpcb(inp);
- TCPDEBUG1();
+
+ if (tp->t_state == TCPS_TIME_WAIT)
+ goto out;
tcp_disconnect(tp);
out:
- TCPDEBUG2(PRU_DISCONNECT);
+ tcp_bblog_pru(tp, PRU_DISCONNECT, error);
TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
INP_WUNLOCK(inp);
NET_EPOCH_EXIT(et);
@@ -783,100 +721,83 @@ out:
* just return the address of the peer, storing through addr.
*/
static int
-tcp_usr_accept(struct socket *so, struct sockaddr **nam)
+tcp_usr_accept(struct socket *so, struct sockaddr *sa)
{
+ struct inpcb *inp;
+ struct tcpcb *tp;
int error = 0;
- struct inpcb *inp = NULL;
- struct tcpcb *tp = NULL;
- struct in_addr addr;
- in_port_t port = 0;
- TCPDEBUG0;
-
- if (so->so_state & SS_ISDISCONNECTED)
- return (ECONNABORTED);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = ECONNABORTED;
- goto out;
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (ECONNABORTED);
}
tp = intotcpcb(inp);
- TCPDEBUG1();
-
- /*
- * We inline in_getpeeraddr and COMMON_END here, so that we can
- * copy the data of interest and defer the malloc until after we
- * release the lock.
- */
- port = inp->inp_fport;
- addr = inp->inp_faddr;
-out:
- TCPDEBUG2(PRU_ACCEPT);
+ if (so->so_state & SS_ISDISCONNECTED)
+ error = ECONNABORTED;
+ else
+ *(struct sockaddr_in *)sa = (struct sockaddr_in ){
+ .sin_family = AF_INET,
+ .sin_len = sizeof(struct sockaddr_in),
+ .sin_port = inp->inp_fport,
+ .sin_addr = inp->inp_faddr,
+ };
+ tcp_bblog_pru(tp, PRU_ACCEPT, error);
TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
INP_WUNLOCK(inp);
- if (error == 0)
- *nam = in_sockaddr(port, &addr);
- return error;
+
+ return (error);
}
#endif /* INET */
#ifdef INET6
static int
-tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
+tcp6_usr_accept(struct socket *so, struct sockaddr *sa)
{
- struct inpcb *inp = NULL;
+ struct inpcb *inp;
+ struct tcpcb *tp;
int error = 0;
- struct tcpcb *tp = NULL;
- struct in_addr addr;
- struct in6_addr addr6;
- struct epoch_tracker et;
- in_port_t port = 0;
- int v4 = 0;
- TCPDEBUG0;
-
- if (so->so_state & SS_ISDISCONNECTED)
- return (ECONNABORTED);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
- NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = ECONNABORTED;
- goto out;
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (ECONNABORTED);
}
tp = intotcpcb(inp);
- TCPDEBUG1();
- /*
- * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
- * copy the data of interest and defer the malloc until after we
- * release the lock.
- */
- if (inp->inp_vflag & INP_IPV4) {
- v4 = 1;
- port = inp->inp_fport;
- addr = inp->inp_faddr;
+ if (so->so_state & SS_ISDISCONNECTED) {
+ error = ECONNABORTED;
} else {
- port = inp->inp_fport;
- addr6 = inp->in6p_faddr;
+ if (inp->inp_vflag & INP_IPV4) {
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_len = sizeof(struct sockaddr_in),
+ .sin_port = inp->inp_fport,
+ .sin_addr = inp->inp_faddr,
+ };
+ in6_sin_2_v4mapsin6(&sin, (struct sockaddr_in6 *)sa);
+ } else {
+ *(struct sockaddr_in6 *)sa = (struct sockaddr_in6 ){
+ .sin6_family = AF_INET6,
+ .sin6_len = sizeof(struct sockaddr_in6),
+ .sin6_port = inp->inp_fport,
+ .sin6_addr = inp->in6p_faddr,
+ };
+ /* XXX: should catch errors */
+ (void)sa6_recoverscope((struct sockaddr_in6 *)sa);
+ }
}
-out:
- TCPDEBUG2(PRU_ACCEPT);
+ tcp_bblog_pru(tp, PRU_ACCEPT, error);
TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
INP_WUNLOCK(inp);
- NET_EPOCH_EXIT(et);
- if (error == 0) {
- if (v4)
- *nam = in6_v4mapsin6_sockaddr(port, &addr);
- else
- *nam = in6_sockaddr(port, &addr6);
- }
- return error;
+
+ return (error);
}
#endif /* INET6 */
@@ -884,34 +805,56 @@ out:
* Mark the connection as being incapable of further output.
*/
static int
-tcp_usr_shutdown(struct socket *so)
+tcp_usr_shutdown(struct socket *so, enum shutdown_how how)
{
- int error = 0;
- struct inpcb *inp;
- struct tcpcb *tp = NULL;
struct epoch_tracker et;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+ int error = 0;
- TCPDEBUG0;
- NET_EPOCH_ENTER(et);
- inp = sotoinpcb(so);
- KASSERT(inp != NULL, ("inp == NULL"));
- INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = ECONNRESET;
- goto out;
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ if (how != SHUT_WR) {
+ so->so_error = ECONNABORTED;
+ solisten_wakeup(so); /* unlocks so */
+ } else
+ SOCK_UNLOCK(so);
+ return (ENOTCONN);
+ } else if ((so->so_state &
+ (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
+ SOCK_UNLOCK(so);
+ return (ENOTCONN);
}
- tp = intotcpcb(inp);
- TCPDEBUG1();
- socantsendmore(so);
- tcp_usrclosed(tp);
- if (!(inp->inp_flags & INP_DROPPED))
- error = tp->t_fb->tfb_tcp_output(tp);
+ SOCK_UNLOCK(so);
-out:
- TCPDEBUG2(PRU_SHUTDOWN);
- TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
- INP_WUNLOCK(inp);
- NET_EPOCH_EXIT(et);
+ switch (how) {
+ case SHUT_RD:
+ sorflush(so);
+ break;
+ case SHUT_RDWR:
+ sorflush(so);
+ /* FALLTHROUGH */
+ case SHUT_WR:
+ /*
+ * XXXGL: mimicing old soshutdown() here. But shouldn't we
+ * return ECONNRESEST for SHUT_RD as well?
+ */
+ INP_WLOCK(inp);
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (ECONNRESET);
+ }
+
+ socantsendmore(so);
+ NET_EPOCH_ENTER(et);
+ tcp_usrclosed(tp);
+ error = tcp_output_nodrop(tp);
+ tcp_bblog_pru(tp, PRU_SHUTDOWN, error);
+ TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
+ error = tcp_unlock_or_drop(tp, error);
+ NET_EPOCH_EXIT(et);
+ }
+ wakeup(&so->so_timeo);
return (error);
}
@@ -924,19 +867,19 @@ tcp_usr_rcvd(struct socket *so, int flags)
{
struct epoch_tracker et;
struct inpcb *inp;
- struct tcpcb *tp = NULL;
- int error = 0;
+ struct tcpcb *tp;
+ int outrv = 0, error = 0;
- TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = ECONNRESET;
- goto out;
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (ECONNRESET);
}
tp = intotcpcb(inp);
- TCPDEBUG1();
+
+ NET_EPOCH_ENTER(et);
/*
* For passively-created TFO connections, don't attempt a window
* update while still in SYN_RECEIVED as this may trigger an early
@@ -944,21 +887,19 @@ tcp_usr_rcvd(struct socket *so, int flags)
* application response data, or failing that, when the DELACK timer
* expires.
*/
- if (IS_FASTOPEN(tp->t_flags) &&
- (tp->t_state == TCPS_SYN_RECEIVED))
+ if ((tp->t_flags & TF_FASTOPEN) && (tp->t_state == TCPS_SYN_RECEIVED))
goto out;
- NET_EPOCH_ENTER(et);
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
tcp_offload_rcvd(tp);
else
#endif
- tp->t_fb->tfb_tcp_output(tp);
- NET_EPOCH_EXIT(et);
+ outrv = tcp_output_nodrop(tp);
out:
- TCPDEBUG2(PRU_RCVD);
+ tcp_bblog_pru(tp, PRU_RCVD, error);
TCP_PROBE2(debug__user, tp, PRU_RCVD);
- INP_WUNLOCK(inp);
+ (void) tcp_unlock_or_drop(tp, outrv);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -976,7 +917,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
struct epoch_tracker et;
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp = NULL;
+ struct tcpcb *tp;
#ifdef INET
#ifdef INET6
struct sockaddr_in sin;
@@ -984,46 +925,43 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
struct sockaddr_in *sinp;
#endif
#ifdef INET6
+ struct sockaddr_in6 *sin6;
int isipv6;
#endif
u_int8_t incflagsav;
u_char vflagsav;
bool restoreflags;
- TCPDEBUG0;
- /*
- * We require the pcbinfo "read lock" if we will close the socket
- * as part of this call.
- */
- NET_EPOCH_ENTER(et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
+ if (inp->inp_flags & INP_DROPPED) {
+ if (m != NULL && (flags & PRUS_NOTREADY) == 0)
+ m_freem(m);
+ INP_WUNLOCK(inp);
+ return (ECONNRESET);
+ }
+ tp = intotcpcb(inp);
+
vflagsav = inp->inp_vflag;
incflagsav = inp->inp_inc.inc_flags;
restoreflags = false;
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- if (control)
- m_freem(control);
- error = ECONNRESET;
- goto out;
- }
+
+ NET_EPOCH_ENTER(et);
if (control != NULL) {
/* TCP doesn't do control messages (rights, creds, etc) */
- if (control->m_len) {
+ if (control->m_len > 0) {
m_freem(control);
error = EINVAL;
goto out;
}
m_freem(control); /* empty control, just free it */
- control = NULL;
}
- tp = intotcpcb(inp);
+
if ((flags & PRUS_OOB) != 0 &&
(error = tcp_pru_options_support(tp, PRUS_OOB)) != 0)
goto out;
- TCPDEBUG1();
if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
if (tp->t_state == TCPS_LISTEN) {
error = EINVAL;
@@ -1059,9 +997,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
#endif /* INET */
#ifdef INET6
case AF_INET6:
- {
- struct sockaddr_in6 *sin6;
-
sin6 = (struct sockaddr_in6 *)nam;
if (sin6->sin6_len != sizeof(*sin6)) {
error = EINVAL;
@@ -1116,7 +1051,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
isipv6 = 1;
}
break;
- }
#endif /* INET6 */
default:
error = EAFNOSUPPORT;
@@ -1124,6 +1058,8 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
}
}
if (!(flags & PRUS_OOB)) {
+ if (tp->t_acktime == 0)
+ tp->t_acktime = ticks;
sbappendstream(&so->so_snd, m, flags);
m = NULL;
if (nam && tp->t_state < TCPS_SYN_SENT) {
@@ -1137,14 +1073,13 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
*/
#ifdef INET6
if (isipv6)
- error = tcp6_connect(tp, nam, td);
+ error = tcp6_connect(tp, sin6, td);
#endif /* INET6 */
#if defined(INET6) && defined(INET)
else
#endif
#ifdef INET
- error = tcp_connect(tp,
- (struct sockaddr *)sinp, td);
+ error = tcp_connect(tp, sinp, td);
#endif
/*
* The bind operation in tcp_connect succeeded. We
@@ -1159,7 +1094,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
sbflush(&so->so_snd);
goto out;
}
- if (IS_FASTOPEN(tp->t_flags))
+ if (tp->t_flags & TF_FASTOPEN)
tcp_fastopen_connect(tp);
else {
tp->snd_wnd = TTCP_CLIENT_SND_WND;
@@ -1188,7 +1123,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
!(flags & PRUS_NOTREADY)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
- error = tp->t_fb->tfb_tcp_output(tp);
+ error = tcp_output_nodrop(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
@@ -1210,6 +1145,8 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* of data past the urgent section.
* Otherwise, snd_up should be one lower.
*/
+ if (tp->t_acktime == 0)
+ tp->t_acktime = ticks;
sbappendstream_locked(&so->so_snd, m, flags);
SOCKBUF_UNLOCK(&so->so_snd);
m = NULL;
@@ -1223,18 +1160,17 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
/*
* Not going to contemplate SYN|URG
*/
- if (IS_FASTOPEN(tp->t_flags))
+ if (tp->t_flags & TF_FASTOPEN)
tp->t_flags &= ~TF_FASTOPEN;
#ifdef INET6
if (isipv6)
- error = tcp6_connect(tp, nam, td);
+ error = tcp6_connect(tp, sin6, td);
#endif /* INET6 */
#if defined(INET6) && defined(INET)
else
#endif
#ifdef INET
- error = tcp_connect(tp,
- (struct sockaddr *)sinp, td);
+ error = tcp_connect(tp, sinp, td);
#endif
/*
* The bind operation in tcp_connect succeeded. We
@@ -1255,7 +1191,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
if ((flags & PRUS_NOTREADY) == 0) {
tp->t_flags |= TF_FORCEDATA;
- error = tp->t_fb->tfb_tcp_output(tp);
+ error = tcp_output_nodrop(tp);
tp->t_flags &= ~TF_FORCEDATA;
}
}
@@ -1281,11 +1217,11 @@ out:
inp->inp_vflag = vflagsav;
inp->inp_inc.inc_flags = incflagsav;
}
- TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
- ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
+ tcp_bblog_pru(tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
+ ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND), error);
TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
- INP_WUNLOCK(inp);
+ error = tcp_unlock_or_drop(tp, error);
NET_EPOCH_EXIT(et);
return (error);
}
@@ -1300,7 +1236,7 @@ tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
inp = sotoinpcb(so);
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ if (inp->inp_flags & INP_DROPPED) {
INP_WUNLOCK(inp);
mb_free_notready(m, count);
return (ECONNRESET);
@@ -1310,12 +1246,13 @@ tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
SOCKBUF_LOCK(&so->so_snd);
error = sbready(&so->so_snd, m, count);
SOCKBUF_UNLOCK(&so->so_snd);
- if (error == 0) {
- NET_EPOCH_ENTER(et);
- error = tp->t_fb->tfb_tcp_output(tp);
- NET_EPOCH_EXIT(et);
+ if (error) {
+ INP_WUNLOCK(inp);
+ return (error);
}
- INP_WUNLOCK(inp);
+ NET_EPOCH_ENTER(et);
+ error = tcp_output_unlock(tp);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -1327,9 +1264,8 @@ static void
tcp_usr_abort(struct socket *so)
{
struct inpcb *inp;
- struct tcpcb *tp = NULL;
+ struct tcpcb *tp;
struct epoch_tracker et;
- TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
@@ -1342,20 +1278,16 @@ tcp_usr_abort(struct socket *so)
/*
* If we still have full TCP state, and we're not dropped, drop.
*/
- if (!(inp->inp_flags & INP_TIMEWAIT) &&
- !(inp->inp_flags & INP_DROPPED)) {
+ if (!(inp->inp_flags & INP_DROPPED)) {
tp = intotcpcb(inp);
- TCPDEBUG1();
tp = tcp_drop(tp, ECONNABORTED);
if (tp == NULL)
goto dropped;
- TCPDEBUG2(PRU_ABORT);
+ tcp_bblog_pru(tp, PRU_ABORT, 0);
TCP_PROBE2(debug__user, tp, PRU_ABORT);
}
if (!(inp->inp_flags & INP_DROPPED)) {
- SOCK_LOCK(so);
- so->so_state |= SS_PROTOREF;
- SOCK_UNLOCK(so);
+ soref(so);
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
@@ -1370,9 +1302,8 @@ static void
tcp_usr_close(struct socket *so)
{
struct inpcb *inp;
- struct tcpcb *tp = NULL;
+ struct tcpcb *tp;
struct epoch_tracker et;
- TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
@@ -1383,21 +1314,20 @@ tcp_usr_close(struct socket *so)
("tcp_usr_close: inp_socket == NULL"));
/*
- * If we still have full TCP state, and we're not dropped, initiate
+ * If we are still connected and we're not dropped, initiate
* a disconnect.
*/
- if (!(inp->inp_flags & INP_TIMEWAIT) &&
- !(inp->inp_flags & INP_DROPPED)) {
+ if (!(inp->inp_flags & INP_DROPPED)) {
tp = intotcpcb(inp);
- TCPDEBUG1();
- tcp_disconnect(tp);
- TCPDEBUG2(PRU_CLOSE);
- TCP_PROBE2(debug__user, tp, PRU_CLOSE);
+ if (tp->t_state != TCPS_TIME_WAIT) {
+ tp->t_flags |= TF_CLOSED;
+ tcp_disconnect(tp);
+ tcp_bblog_pru(tp, PRU_CLOSE, 0);
+ TCP_PROBE2(debug__user, tp, PRU_CLOSE);
+ }
}
if (!(inp->inp_flags & INP_DROPPED)) {
- SOCK_LOCK(so);
- so->so_state |= SS_PROTOREF;
- SOCK_UNLOCK(so);
+ soref(so);
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
@@ -1430,22 +1360,21 @@ tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp = NULL;
+ struct tcpcb *tp;
- TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = ECONNRESET;
- goto out;
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (ECONNRESET);
}
tp = intotcpcb(inp);
+
error = tcp_pru_options_support(tp, PRUS_OOB);
if (error) {
goto out;
}
- TCPDEBUG1();
if ((so->so_oobmark == 0 &&
(so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
so->so_options & SO_OOBINLINE ||
@@ -1463,115 +1392,95 @@ tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
out:
- TCPDEBUG2(PRU_RCVOOB);
+ tcp_bblog_pru(tp, PRU_RCVOOB, error);
TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
INP_WUNLOCK(inp);
return (error);
}
#ifdef INET
-struct pr_usrreqs tcp_usrreqs = {
- .pru_abort = tcp_usr_abort,
- .pru_accept = tcp_usr_accept,
- .pru_attach = tcp_usr_attach,
- .pru_bind = tcp_usr_bind,
- .pru_connect = tcp_usr_connect,
- .pru_control = in_control,
- .pru_detach = tcp_usr_detach,
- .pru_disconnect = tcp_usr_disconnect,
- .pru_listen = tcp_usr_listen,
- .pru_peeraddr = in_getpeeraddr,
- .pru_rcvd = tcp_usr_rcvd,
- .pru_rcvoob = tcp_usr_rcvoob,
- .pru_send = tcp_usr_send,
- .pru_ready = tcp_usr_ready,
- .pru_shutdown = tcp_usr_shutdown,
- .pru_sockaddr = in_getsockaddr,
- .pru_sosetlabel = in_pcbsosetlabel,
- .pru_close = tcp_usr_close,
+struct protosw tcp_protosw = {
+ .pr_type = SOCK_STREAM,
+ .pr_protocol = IPPROTO_TCP,
+ .pr_flags = PR_CONNREQUIRED | PR_IMPLOPCL | PR_WANTRCVD |
+ PR_CAPATTACH,
+ .pr_ctloutput = tcp_ctloutput,
+ .pr_abort = tcp_usr_abort,
+ .pr_accept = tcp_usr_accept,
+ .pr_attach = tcp_usr_attach,
+ .pr_bind = tcp_usr_bind,
+ .pr_connect = tcp_usr_connect,
+ .pr_control = in_control,
+ .pr_detach = tcp_usr_detach,
+ .pr_disconnect = tcp_usr_disconnect,
+ .pr_listen = tcp_usr_listen,
+ .pr_peeraddr = in_getpeeraddr,
+ .pr_rcvd = tcp_usr_rcvd,
+ .pr_rcvoob = tcp_usr_rcvoob,
+ .pr_send = tcp_usr_send,
+ .pr_ready = tcp_usr_ready,
+ .pr_shutdown = tcp_usr_shutdown,
+ .pr_sockaddr = in_getsockaddr,
+ .pr_sosetlabel = in_pcbsosetlabel,
+ .pr_close = tcp_usr_close,
};
#endif /* INET */
#ifdef INET6
-struct pr_usrreqs tcp6_usrreqs = {
- .pru_abort = tcp_usr_abort,
- .pru_accept = tcp6_usr_accept,
- .pru_attach = tcp_usr_attach,
- .pru_bind = tcp6_usr_bind,
- .pru_connect = tcp6_usr_connect,
- .pru_control = in6_control,
- .pru_detach = tcp_usr_detach,
- .pru_disconnect = tcp_usr_disconnect,
- .pru_listen = tcp6_usr_listen,
- .pru_peeraddr = in6_mapped_peeraddr,
- .pru_rcvd = tcp_usr_rcvd,
- .pru_rcvoob = tcp_usr_rcvoob,
- .pru_send = tcp_usr_send,
- .pru_ready = tcp_usr_ready,
- .pru_shutdown = tcp_usr_shutdown,
- .pru_sockaddr = in6_mapped_sockaddr,
- .pru_sosetlabel = in_pcbsosetlabel,
- .pru_close = tcp_usr_close,
+struct protosw tcp6_protosw = {
+ .pr_type = SOCK_STREAM,
+ .pr_protocol = IPPROTO_TCP,
+ .pr_flags = PR_CONNREQUIRED | PR_IMPLOPCL |PR_WANTRCVD |
+ PR_CAPATTACH,
+ .pr_ctloutput = tcp_ctloutput,
+ .pr_abort = tcp_usr_abort,
+ .pr_accept = tcp6_usr_accept,
+ .pr_attach = tcp_usr_attach,
+ .pr_bind = tcp6_usr_bind,
+ .pr_connect = tcp6_usr_connect,
+ .pr_control = in6_control,
+ .pr_detach = tcp_usr_detach,
+ .pr_disconnect = tcp_usr_disconnect,
+ .pr_listen = tcp6_usr_listen,
+ .pr_peeraddr = in6_mapped_peeraddr,
+ .pr_rcvd = tcp_usr_rcvd,
+ .pr_rcvoob = tcp_usr_rcvoob,
+ .pr_send = tcp_usr_send,
+ .pr_ready = tcp_usr_ready,
+ .pr_shutdown = tcp_usr_shutdown,
+ .pr_sockaddr = in6_mapped_sockaddr,
+ .pr_sosetlabel = in_pcbsosetlabel,
+ .pr_close = tcp_usr_close,
};
#endif /* INET6 */
#ifdef INET
/*
* Common subroutine to open a TCP connection to remote host specified
- * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
- * port number if needed. Call in_pcbconnect_setup to do the routing and
- * to choose a local host address (interface). If there is an existing
- * incarnation of the same connection in TIME-WAIT state and if the remote
- * host was sending CC options and if the connection duration was < MSL, then
- * truncate the previous TIME-WAIT state and proceed.
+ * by struct sockaddr_in. Call in_pcbconnect() to choose local host address
+ * and assign a local port number and install the inpcb into the hash.
* Initialize connection parameters and enter SYN-SENT state.
*/
static int
-tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
+tcp_connect(struct tcpcb *tp, struct sockaddr_in *sin, struct thread *td)
{
- struct inpcb *inp = tp->t_inpcb, *oinp;
- struct socket *so = inp->inp_socket;
- struct in_addr laddr;
- u_short lport;
+ struct inpcb *inp = tptoinpcb(tp);
+ struct socket *so = tptosocket(tp);
int error;
NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
- INP_HASH_WLOCK(&V_tcbinfo);
- if (V_tcp_require_unique_port && inp->inp_lport == 0) {
- error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
- if (error)
- goto out;
- }
+ if (__predict_false((so->so_state &
+ (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING |
+ SS_ISDISCONNECTED)) != 0))
+ return (EISCONN);
- /*
- * Cannot simply call in_pcbconnect, because there might be an
- * earlier incarnation of this same connection still in
- * TIME_WAIT state, creating an ADDRINUSE error.
- */
- laddr = inp->inp_laddr;
- lport = inp->inp_lport;
- error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
- &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
- if (error && oinp == NULL)
- goto out;
- if (oinp) {
- error = EADDRINUSE;
- goto out;
- }
- /* Handle initial bind if it hadn't been done in advance. */
- if (inp->inp_lport == 0) {
- inp->inp_lport = lport;
- if (in_pcbinshash(inp) != 0) {
- inp->inp_lport = 0;
- error = EAGAIN;
- goto out;
- }
- }
- inp->inp_laddr = laddr;
- in_pcbrehash(inp);
+ INP_HASH_WLOCK(&V_tcbinfo);
+ error = in_pcbconnect(inp, sin, td->td_ucred, true);
INP_HASH_WUNLOCK(&V_tcbinfo);
+ if (error != 0)
+ return (error);
/*
* Compute window scaling to request:
@@ -1590,40 +1499,37 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
tcp_sendseqinit(tp);
- return 0;
-
-out:
- INP_HASH_WUNLOCK(&V_tcbinfo);
- return (error);
+ return (0);
}
#endif /* INET */
#ifdef INET6
static int
-tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
+tcp6_connect(struct tcpcb *tp, struct sockaddr_in6 *sin6, struct thread *td)
{
- struct inpcb *inp = tp->t_inpcb;
+ struct inpcb *inp = tptoinpcb(tp);
+ struct socket *so = tptosocket(tp);
int error;
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
- INP_HASH_WLOCK(&V_tcbinfo);
- if (V_tcp_require_unique_port && inp->inp_lport == 0) {
- error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
- if (error)
- goto out;
- }
- error = in6_pcbconnect(inp, nam, td->td_ucred);
- if (error != 0)
- goto out;
+ if (__predict_false((so->so_state &
+ (SS_ISCONNECTING | SS_ISCONNECTED)) != 0))
+ return (EISCONN);
+
+ INP_HASH_WLOCK(&V_tcbinfo);
+ error = in6_pcbconnect(inp, sin6, td->td_ucred, true);
INP_HASH_WUNLOCK(&V_tcbinfo);
+ if (error != 0)
+ return (error);
/* Compute window scaling to request. */
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
(TCP_MAXWIN << tp->request_r_scale) < sb_max)
tp->request_r_scale++;
- soisconnecting(inp->inp_socket);
+ soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tcp_state_change(tp, TCPS_SYN_SENT);
tp->iss = tcp_new_isn(&inp->inp_inc);
@@ -1631,11 +1537,7 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
tcp_sendseqinit(tp);
- return 0;
-
-out:
- INP_HASH_WUNLOCK(&V_tcbinfo);
- return error;
+ return (0);
}
#endif /* INET6 */
@@ -1646,11 +1548,11 @@ out:
* constants -- for example, the numeric values for tcpi_state will differ
* from Linux.
*/
-static void
-tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
+void
+tcp_fill_info(const struct tcpcb *tp, struct tcp_info *ti)
{
- INP_WLOCK_ASSERT(tp->t_inpcb);
+ INP_LOCK_ASSERT(tptoinpcb(tp));
bzero(ti, sizeof(*ti));
ti->tcpi_state = tp->t_state;
@@ -1663,8 +1565,20 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
ti->tcpi_snd_wscale = tp->snd_scale;
ti->tcpi_rcv_wscale = tp->rcv_scale;
}
- if (tp->t_flags2 & TF2_ECN_PERMIT)
- ti->tcpi_options |= TCPI_OPT_ECN;
+ switch (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
+ case TF2_ECN_PERMIT:
+ ti->tcpi_options |= TCPI_OPT_ECN;
+ break;
+ case TF2_ACE_PERMIT:
+ /* FALLTHROUGH */
+ case TF2_ECN_PERMIT | TF2_ACE_PERMIT:
+ ti->tcpi_options |= TCPI_OPT_ACE;
+ break;
+ default:
+ break;
+ }
+ if (tp->t_flags & TF_FASTOPEN)
+ ti->tcpi_options |= TCPI_OPT_TFO;
ti->tcpi_rto = tp->t_rxtcur * tick;
ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
@@ -1687,12 +1601,31 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
ti->tcpi_snd_zerowin = tp->t_sndzerowin;
+ ti->tcpi_snd_una = tp->snd_una;
+ ti->tcpi_snd_max = tp->snd_max;
+ ti->tcpi_rcv_numsacks = tp->rcv_numsacks;
+ ti->tcpi_rcv_adv = tp->rcv_adv;
+ ti->tcpi_dupacks = tp->t_dupacks;
+ ti->tcpi_rttmin = tp->t_rttlow;
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE) {
ti->tcpi_options |= TCPI_OPT_TOE;
tcp_offload_tcp_info(tp, ti);
}
#endif
+ /*
+ * AccECN related counters.
+ */
+ if ((tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) ==
+ (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
+ /*
+ * Internal counter starts at 5 for AccECN
+ * but 0 for RFC3168 ECN.
+ */
+ ti->tcpi_delivered_ce = tp->t_scep - 5;
+ else
+ ti->tcpi_delivered_ce = tp->t_scep;
+ ti->tcpi_received_ce = tp->t_rcep;
}
/*
@@ -1703,7 +1636,7 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
*/
#define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do { \
INP_WLOCK(inp); \
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \
+ if (inp->inp_flags & INP_DROPPED) { \
INP_WUNLOCK(inp); \
cleanup; \
return (ECONNRESET); \
@@ -1712,24 +1645,30 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
} while(0)
#define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
-static int
+int
tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
{
- struct tcpcb *tp = intotcpcb(inp);
+ struct socket *so = inp->inp_socket;
+ struct tcpcb *tp = intotcpcb(inp);
int error = 0;
MPASS(sopt->sopt_dir == SOPT_SET);
+ INP_WLOCK_ASSERT(inp);
+ KASSERT((inp->inp_flags & INP_DROPPED) == 0,
+ ("inp_flags == %x", inp->inp_flags));
+ KASSERT(so != NULL, ("inp_socket == NULL"));
if (sopt->sopt_level != IPPROTO_TCP) {
+ INP_WUNLOCK(inp);
#ifdef INET6
if (inp->inp_vflag & INP_IPV6PROTO)
- error = ip6_ctloutput(inp->inp_socket, sopt);
+ error = ip6_ctloutput(so, sopt);
#endif
#if defined(INET6) && defined(INET)
else
#endif
#ifdef INET
- error = ip_ctloutput(inp->inp_socket, sopt);
+ error = ip_ctloutput(so, sopt);
#endif
/*
* When an IP-level socket option affects TCP, pass control
@@ -1757,6 +1696,8 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
case IPPROTO_IP:
switch (sopt->sopt_name) {
case IP_TOS:
+ inp->inp_ip_tos &= ~IPTOS_ECN_MASK;
+ break;
case IP_TTL:
/* Notify tcp stacks that care (e.g. RACK). */
break;
@@ -1768,6 +1709,11 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
default:
return (error);
}
+ INP_WLOCK(inp);
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (ECONNRESET);
+ }
} else if (sopt->sopt_name == TCP_FUNCTION_BLK) {
/*
* Protect the TCP option TCP_FUNCTION_BLK so
@@ -1775,16 +1721,14 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
*/
struct tcp_function_set fsn;
struct tcp_function_block *blk;
+ void *ptr = NULL;
+ INP_WUNLOCK(inp);
error = sooptcopyin(sopt, &fsn, sizeof fsn, sizeof fsn);
if (error)
return (error);
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- INP_WUNLOCK(inp);
- return (ECONNRESET);
- }
tp = intotcpcb(inp);
blk = find_and_ref_tcp_functions(&fsn);
@@ -1825,10 +1769,33 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
return (ENOENT);
}
/*
- * Release the old refcnt, the
- * lookup acquired a ref on the
- * new one already.
+ * Ensure the new stack takes ownership with a
+ * clean slate on peak rate threshold.
*/
+ if (tp->t_fb->tfb_tcp_timer_stop_all != NULL)
+ tp->t_fb->tfb_tcp_timer_stop_all(tp);
+ if (blk->tfb_tcp_fb_init) {
+ error = (*blk->tfb_tcp_fb_init)(tp, &ptr);
+ if (error) {
+ /*
+ * Release the ref count the lookup
+ * acquired.
+ */
+ refcount_release(&blk->tfb_refcnt);
+ /*
+ * Now there is a chance that the
+ * init() function mucked with some
+ * things before it failed, such as
+ * hpts or inp_flags2 or timer granularity.
+ * It should not of, but lets give the old
+ * stack a chance to reset to a known good state.
+ */
+ if (tp->t_fb->tfb_switch_failed) {
+ (*tp->t_fb->tfb_switch_failed)(tp);
+ }
+ goto err_out;
+ }
+ }
if (tp->t_fb->tfb_tcp_fb_fini) {
struct epoch_tracker et;
/*
@@ -1839,27 +1806,17 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
NET_EPOCH_EXIT(et);
}
-#ifdef TCPHPTS
- /* Assure that we are not on any hpts */
- tcp_hpts_remove(tp->t_inpcb, HPTS_REMOVE_ALL);
-#endif
- if (blk->tfb_tcp_fb_init) {
- error = (*blk->tfb_tcp_fb_init)(tp);
- if (error) {
- refcount_release(&blk->tfb_refcnt);
- if (tp->t_fb->tfb_tcp_fb_init) {
- if((*tp->t_fb->tfb_tcp_fb_init)(tp) != 0) {
- /* Fall back failed, drop the connection */
- INP_WUNLOCK(inp);
- soabort(inp->inp_socket);
- return(error);
- }
- }
- goto err_out;
- }
- }
+ /*
+ * Release the old refcnt, the
+ * lookup acquired a ref on the
+ * new one already.
+ */
refcount_release(&tp->t_fb->tfb_refcnt);
+ /*
+ * Set in the new stack.
+ */
tp->t_fb = blk;
+ tp->t_fb_ptr = ptr;
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE) {
tcp_offload_ctloutput(tp, sopt->sopt_dir,
@@ -1869,46 +1826,40 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
err_out:
INP_WUNLOCK(inp);
return (error);
- }
- INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- INP_WUNLOCK(inp);
- return (ECONNRESET);
}
- tp = intotcpcb(inp);
- /* Pass in the INP locked, caller must unlock it. */
- return (tp->t_fb->tfb_tcp_ctloutput(inp->inp_socket, sopt, inp, tp));
+ /* Pass in the INP locked, callee must unlock it. */
+ return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt));
}
static int
tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt)
{
- int error = 0;
- struct tcpcb *tp;
+ struct socket *so = inp->inp_socket;
+ struct tcpcb *tp = intotcpcb(inp);
+ int error = 0;
MPASS(sopt->sopt_dir == SOPT_GET);
+ INP_WLOCK_ASSERT(inp);
+ KASSERT((inp->inp_flags & INP_DROPPED) == 0,
+ ("inp_flags == %x", inp->inp_flags));
+ KASSERT(so != NULL, ("inp_socket == NULL"));
if (sopt->sopt_level != IPPROTO_TCP) {
+ INP_WUNLOCK(inp);
#ifdef INET6
if (inp->inp_vflag & INP_IPV6PROTO)
- error = ip6_ctloutput(inp->inp_socket, sopt);
+ error = ip6_ctloutput(so, sopt);
#endif /* INET6 */
#if defined(INET6) && defined(INET)
else
#endif
#ifdef INET
- error = ip_ctloutput(inp->inp_socket, sopt);
+ error = ip_ctloutput(so, sopt);
#endif
return (error);
}
- INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- INP_WUNLOCK(inp);
- return (ECONNRESET);
- }
- tp = intotcpcb(inp);
if (((sopt->sopt_name == TCP_FUNCTION_BLK) ||
(sopt->sopt_name == TCP_FUNCTION_ALIAS))) {
struct tcp_function_set fsn;
@@ -1928,20 +1879,23 @@ tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt)
return (error);
}
- /* Pass in the INP locked, caller must unlock it. */
- return (tp->t_fb->tfb_tcp_ctloutput(inp->inp_socket, sopt, inp, tp));
+ /* Pass in the INP locked, callee must unlock it. */
+ return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt));
}
int
tcp_ctloutput(struct socket *so, struct sockopt *sopt)
{
- int error;
struct inpcb *inp;
- error = 0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
+ INP_WLOCK(inp);
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ return (ECONNRESET);
+ }
if (sopt->sopt_dir == SOPT_SET)
return (tcp_ctloutput_set(inp, sopt));
else if (sopt->sopt_dir == SOPT_GET)
@@ -1959,44 +1913,14 @@ CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN);
CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN);
#endif
-#ifdef KERN_TLS
-static int
-copyin_tls_enable(struct sockopt *sopt, struct tls_enable *tls)
-{
- struct tls_enable_v0 tls_v0;
- int error;
-
- if (sopt->sopt_valsize == sizeof(tls_v0)) {
- error = sooptcopyin(sopt, &tls_v0, sizeof(tls_v0),
- sizeof(tls_v0));
- if (error)
- return (error);
- memset(tls, 0, sizeof(*tls));
- tls->cipher_key = tls_v0.cipher_key;
- tls->iv = tls_v0.iv;
- tls->auth_key = tls_v0.auth_key;
- tls->cipher_algorithm = tls_v0.cipher_algorithm;
- tls->cipher_key_len = tls_v0.cipher_key_len;
- tls->iv_len = tls_v0.iv_len;
- tls->auth_algorithm = tls_v0.auth_algorithm;
- tls->auth_key_len = tls_v0.auth_key_len;
- tls->flags = tls_v0.flags;
- tls->tls_vmajor = tls_v0.tls_vmajor;
- tls->tls_vminor = tls_v0.tls_vminor;
- return (0);
- }
-
- return (sooptcopyin(sopt, tls, sizeof(*tls), sizeof(*tls)));
-}
-#endif
-
extern struct cc_algo newreno_cc_algo;
static int
-tcp_congestion(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
+tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt)
{
struct cc_algo *algo;
void *ptr = NULL;
+ struct tcpcb *tp;
struct cc_var cc_mem;
char buf[TCP_CA_NAME_MAX];
size_t mem_sz;
@@ -2008,7 +1932,7 @@ tcp_congestion(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struc
return(error);
buf[sopt->sopt_valsize] = '\0';
CC_LIST_RLOCK();
- STAILQ_FOREACH(algo, &cc_list, entries)
+ STAILQ_FOREACH(algo, &cc_list, entries) {
if (strncmp(buf, algo->name,
TCP_CA_NAME_MAX) == 0) {
if (algo->flags & CC_MODULE_BEING_REMOVED) {
@@ -2017,30 +1941,24 @@ tcp_congestion(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struc
}
break;
}
+ }
if (algo == NULL) {
CC_LIST_RUNLOCK();
return(ESRCH);
}
-do_over:
+ /*
+ * With a reference the algorithm cannot be removed
+ * so we hold a reference through the change process.
+ */
+ cc_refer(algo);
+ CC_LIST_RUNLOCK();
if (algo->cb_init != NULL) {
/* We can now pre-get the memory for the CC */
mem_sz = (*algo->cc_data_sz)();
if (mem_sz == 0) {
goto no_mem_needed;
}
- CC_LIST_RUNLOCK();
ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK);
- CC_LIST_RLOCK();
- STAILQ_FOREACH(algo, &cc_list, entries)
- if (strncmp(buf, algo->name,
- TCP_CA_NAME_MAX) == 0)
- break;
- if (algo == NULL) {
- if (ptr)
- free(ptr, M_CC_MEM);
- CC_LIST_RUNLOCK();
- return(ESRCH);
- }
} else {
no_mem_needed:
mem_sz = 0;
@@ -2051,22 +1969,20 @@ no_mem_needed:
* back the inplock.
*/
memset(&cc_mem, 0, sizeof(cc_mem));
- if (mem_sz != (*algo->cc_data_sz)()) {
- if (ptr)
- free(ptr, M_CC_MEM);
- goto do_over;
- }
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ if (inp->inp_flags & INP_DROPPED) {
INP_WUNLOCK(inp);
+ if (ptr)
+ free(ptr, M_CC_MEM);
+ /* Release our temp reference */
+ CC_LIST_RLOCK();
+ cc_release(algo);
CC_LIST_RUNLOCK();
- free(ptr, M_CC_MEM);
return (ECONNRESET);
}
tp = intotcpcb(inp);
if (ptr != NULL)
memset(ptr, 0, mem_sz);
- CC_LIST_RUNLOCK();
cc_mem.ccvc.tcp = tp;
/*
* We once again hold a write lock over the tcb so it's
@@ -2090,28 +2006,38 @@ no_mem_needed:
* the old ones cleanup (if any).
*/
if (CC_ALGO(tp)->cb_destroy != NULL)
- CC_ALGO(tp)->cb_destroy(tp->ccv);
- memcpy(tp->ccv, &cc_mem, sizeof(struct cc_var));
- tp->cc_algo = algo;
+ CC_ALGO(tp)->cb_destroy(&tp->t_ccv);
+ /* Detach the old CC from the tcpcb */
+ cc_detach(tp);
+ /* Copy in our temp memory that was inited */
+ memcpy(&tp->t_ccv, &cc_mem, sizeof(struct cc_var));
+ /* Now attach the new, which takes a reference */
+ cc_attach(tp, algo);
/* Ok now are we where we have gotten past any conn_init? */
if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) {
/* Yep run the connection init for the new CC */
- CC_ALGO(tp)->conn_init(tp->ccv);
+ CC_ALGO(tp)->conn_init(&tp->t_ccv);
}
} else if (ptr)
free(ptr, M_CC_MEM);
INP_WUNLOCK(inp);
+ /* Now lets release our temp reference */
+ CC_LIST_RLOCK();
+ cc_release(algo);
+ CC_LIST_RUNLOCK();
return (error);
}
int
-tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
+tcp_default_ctloutput(struct tcpcb *tp, struct sockopt *sopt)
{
+ struct inpcb *inp = tptoinpcb(tp);
int error, opt, optval;
u_int ui;
struct tcp_info ti;
#ifdef KERN_TLS
struct tls_enable tls;
+ struct socket *so = inp->inp_socket;
#endif
char *pbuf, buf[TCP_LOG_ID_LEN];
#ifdef STATS
@@ -2120,6 +2046,9 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
size_t len;
INP_WLOCK_ASSERT(inp);
+ KASSERT((inp->inp_flags & INP_DROPPED) == 0,
+ ("inp_flags == %x", inp->inp_flags));
+ KASSERT(inp->inp_socket != NULL, ("inp_socket == NULL"));
switch (sopt->sopt_level) {
#ifdef INET6
@@ -2158,7 +2087,7 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
}
INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
if (CC_ALGO(tp)->ctl_output != NULL)
- error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, pbuf);
+ error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, sopt, pbuf);
else
error = ENOENT;
INP_WUNLOCK(inp);
@@ -2173,19 +2102,18 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
switch (sopt->sopt_name) {
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
case TCP_MD5SIG:
- if (!TCPMD5_ENABLED()) {
- INP_WUNLOCK(inp);
+ INP_WUNLOCK(inp);
+ if (!TCPMD5_ENABLED())
return (ENOPROTOOPT);
- }
error = TCPMD5_PCBCTL(inp, sopt);
if (error)
return (error);
+ INP_WLOCK_RECHECK(inp);
goto unlock_and_done;
#endif /* IPSEC */
case TCP_NODELAY:
case TCP_NOOPT:
- case TCP_LRD:
INP_WUNLOCK(inp);
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
@@ -2200,9 +2128,6 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
case TCP_NOOPT:
opt = TF_NOOPT;
break;
- case TCP_LRD:
- opt = TF_LRD;
- break;
default:
opt = 0; /* dead code to fool gcc */
break;
@@ -2238,7 +2163,7 @@ unlock_and_done:
struct epoch_tracker et;
NET_EPOCH_ENTER(et);
- error = tp->t_fb->tfb_tcp_output(tp);
+ error = tcp_output_nodrop(tp);
NET_EPOCH_EXIT(et);
}
}
@@ -2278,9 +2203,19 @@ unlock_and_done:
INP_WLOCK_RECHECK(inp);
if (optval > 0 && optval <= tp->t_maxseg &&
- optval + 40 >= V_tcp_minmss)
+ optval + 40 >= V_tcp_minmss) {
tp->t_maxseg = optval;
- else
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
+ } else
error = EINVAL;
goto unlock_and_done;
@@ -2319,7 +2254,7 @@ unlock_and_done:
break;
case TCP_CONGESTION:
- error = tcp_congestion(so, sopt, inp, tp);
+ error = tcp_set_cc_mod(inp, sopt);
break;
case TCP_REUSPORT_LB_NUMA:
@@ -2335,15 +2270,16 @@ unlock_and_done:
#ifdef KERN_TLS
case TCP_TXTLS_ENABLE:
INP_WUNLOCK(inp);
- error = copyin_tls_enable(sopt, &tls);
- if (error)
+ error = ktls_copyin_tls_enable(sopt, &tls);
+ if (error != 0)
break;
error = ktls_enable_tx(so, &tls);
+ ktls_cleanup_tls_enable(&tls);
break;
case TCP_TXTLS_MODE:
INP_WUNLOCK(inp);
error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
- if (error)
+ if (error != 0)
return (error);
INP_WLOCK_RECHECK(inp);
@@ -2352,14 +2288,14 @@ unlock_and_done:
break;
case TCP_RXTLS_ENABLE:
INP_WUNLOCK(inp);
- error = sooptcopyin(sopt, &tls, sizeof(tls),
- sizeof(tls));
- if (error)
+ error = ktls_copyin_tls_enable(sopt, &tls);
+ if (error != 0)
break;
error = ktls_enable_rx(so, &tls);
+ ktls_cleanup_tls_enable(&tls);
break;
#endif
-
+ case TCP_MAXUNACKTIME:
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
case TCP_KEEPINIT:
@@ -2376,6 +2312,10 @@ unlock_and_done:
INP_WLOCK_RECHECK(inp);
switch (sopt->sopt_name) {
+ case TCP_MAXUNACKTIME:
+ tp->t_maxunacktime = ui;
+ break;
+
case TCP_KEEPIDLE:
tp->t_keepidle = ui;
/*
@@ -2429,7 +2369,8 @@ unlock_and_done:
INP_WLOCK_RECHECK(inp);
if (optval >= 0)
- tcp_pcap_set_sock_max(TCP_PCAP_OUT ?
+ tcp_pcap_set_sock_max(
+ (sopt->sopt_name == TCP_PCAP_OUT) ?
&(tp->t_outpkts) : &(tp->t_inpkts),
optval);
else
@@ -2551,10 +2492,9 @@ unlock_and_done:
switch (sopt->sopt_name) {
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
case TCP_MD5SIG:
- if (!TCPMD5_ENABLED()) {
- INP_WUNLOCK(inp);
+ INP_WUNLOCK(inp);
+ if (!TCPMD5_ENABLED())
return (ENOPROTOOPT);
- }
error = TCPMD5_PCBCTL(inp, sopt);
break;
#endif
@@ -2643,11 +2583,15 @@ unhold:
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, buf, len + 1);
break;
+ case TCP_MAXUNACKTIME:
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
case TCP_KEEPINIT:
case TCP_KEEPCNT:
switch (sopt->sopt_name) {
+ case TCP_MAXUNACKTIME:
+ ui = TP_MAXUNACKTIME(tp) / hz;
+ break;
case TCP_KEEPIDLE:
ui = TP_KEEPIDLE(tp) / hz;
break;
@@ -2667,7 +2611,8 @@ unhold:
#ifdef TCPPCAP
case TCP_PCAP_OUT:
case TCP_PCAP_IN:
- optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ?
+ optval = tcp_pcap_get_sock_max(
+ (sopt->sopt_name == TCP_PCAP_OUT) ?
&(tp->t_outpkts) : &(tp->t_inpkts));
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, &optval, sizeof optval);
@@ -2680,7 +2625,7 @@ unhold:
break;
#ifdef TCP_BLACKBOX
case TCP_LOG:
- optval = tp->t_logstate;
+ optval = tcp_get_bblog_state(tp);
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, &optval, sizeof(optval));
break;
@@ -2715,11 +2660,6 @@ unhold:
sizeof(optval));
break;
#endif
- case TCP_LRD:
- optval = tp->t_flags & TF_LRD;
- INP_WUNLOCK(inp);
- error = sooptcopyout(sopt, &optval, sizeof optval);
- break;
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -2743,8 +2683,8 @@ unhold:
static void
tcp_disconnect(struct tcpcb *tp)
{
- struct inpcb *inp = tp->t_inpcb;
- struct socket *so = inp->inp_socket;
+ struct inpcb *inp = tptoinpcb(tp);
+ struct socket *so = tptosocket(tp);
NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
@@ -2754,7 +2694,7 @@ tcp_disconnect(struct tcpcb *tp)
* socket is still open.
*/
if (tp->t_state < TCPS_ESTABLISHED &&
- !(tp->t_state > TCPS_LISTEN && IS_FASTOPEN(tp->t_flags))) {
+ !(tp->t_state > TCPS_LISTEN && (tp->t_flags & TF_FASTOPEN))) {
tp = tcp_close(tp);
KASSERT(tp != NULL,
("tcp_disconnect: tcp_close() returned NULL"));
@@ -2767,7 +2707,8 @@ tcp_disconnect(struct tcpcb *tp)
sbflush(&so->so_rcv);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- tp->t_fb->tfb_tcp_output(tp);
+ /* Ignore stack's drop request, we already at it. */
+ (void)tcp_output_nodrop(tp);
}
}
@@ -2786,7 +2727,7 @@ tcp_usrclosed(struct tcpcb *tp)
{
NET_EPOCH_ASSERT();
- INP_WLOCK_ASSERT(tp->t_inpcb);
+ INP_WLOCK_ASSERT(tptoinpcb(tp));
switch (tp->t_state) {
case TCPS_LISTEN:
@@ -2818,8 +2759,11 @@ tcp_usrclosed(struct tcpcb *tp)
tcp_state_change(tp, TCPS_LAST_ACK);
break;
}
+ if (tp->t_acktime == 0)
+ tp->t_acktime = ticks;
if (tp->t_state >= TCPS_FIN_WAIT_2) {
- soisdisconnected(tp->t_inpcb->inp_socket);
+ tcp_free_sackholes(tp);
+ soisdisconnected(tptosocket(tp));
/* Prevent the connection hanging in FIN_WAIT_2 forever. */
if (tp->t_state == TCPS_FIN_WAIT_2) {
int timeout;
@@ -2954,12 +2898,16 @@ db_print_tflags(u_int t_flags)
db_printf("%sTF_NOPUSH", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_PREVVALID) {
+ db_printf("%sTF_PREVVALID", comma ? ", " : "");
+ comma = 1;
+ }
if (t_flags & TF_MORETOCOME) {
db_printf("%sTF_MORETOCOME", comma ? ", " : "");
comma = 1;
}
- if (t_flags & TF_LQ_OVERFLOW) {
- db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
+ if (t_flags & TF_SONOTCONN) {
+ db_printf("%sTF_SONOTCONN", comma ? ", " : "");
comma = 1;
}
if (t_flags & TF_LASTIDLE) {
@@ -2982,6 +2930,10 @@ db_print_tflags(u_int t_flags)
db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_WASCRECOVERY) {
+ db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
+ comma = 1;
+ }
if (t_flags & TF_SIGNATURE) {
db_printf("%sTF_SIGNATURE", comma ? ", " : "");
comma = 1;
@@ -3006,10 +2958,46 @@ db_print_tflags2(u_int t_flags2)
int comma;
comma = 0;
+ if (t_flags2 & TF2_PLPMTU_BLACKHOLE) {
+ db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_PLPMTU_PMTUD) {
+ db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) {
+ db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_LOG_AUTO) {
+ db_printf("%sTF2_LOG_AUTO", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_DROP_AF_DATA) {
+ db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : "");
+ comma = 1;
+ }
if (t_flags2 & TF2_ECN_PERMIT) {
db_printf("%sTF2_ECN_PERMIT", comma ? ", " : "");
comma = 1;
}
+ if (t_flags2 & TF2_ECN_SND_CWR) {
+ db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_ECN_SND_ECE) {
+ db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_ACE_PERMIT) {
+ db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_FBYTES_COMPLETE) {
+ db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : "");
+ comma = 1;
+ }
}
static void
@@ -3042,12 +3030,8 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
db_print_indent(indent);
- db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",
- &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
-
- db_print_indent(indent);
- db_printf("tt_2msl: %p tt_delack: %p t_inpcb: %p\n", &tp->t_timers->tt_2msl,
- &tp->t_timers->tt_delack, tp->t_inpcb);
+ db_printf("t_callout: %p t_timers: %p\n",
+ &tp->t_callout, &tp->t_timers);
db_print_indent(indent);
db_printf("t_state: %d (", tp->t_state);
@@ -3065,7 +3049,7 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
db_printf(")\n");
db_print_indent(indent);
- db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n",
+ db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: 0x%08x\n",
tp->snd_una, tp->snd_max, tp->snd_nxt);
db_print_indent(indent);
@@ -3101,12 +3085,11 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
db_print_indent(indent);
- db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u "
- "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
- tp->t_rttbest);
+ db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u\n",
+ tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin);
db_print_indent(indent);
- db_printf("t_rttupdated: %lu max_sndwnd: %u t_softerror: %d\n",
+ db_printf("t_rttupdated: %u max_sndwnd: %u t_softerror: %d\n",
tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
db_print_indent(indent);