aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet/tcp_syncache.c
diff options
context:
space:
mode:
authorMichael Tuexen <tuexen@FreeBSD.org>2021-04-18 14:08:08 +0000
committerMichael Tuexen <tuexen@FreeBSD.org>2021-04-18 14:16:42 +0000
commit9e644c23000c2f5028b235f6263d17ffb24d3605 (patch)
tree846286d2c178f8c442c7bd711ceb0a50a9c06f9e /sys/netinet/tcp_syncache.c
parent136f6b6c0cc1343a7637c3250ff9dd0eced4b4d0 (diff)
downloadsrc-9e644c23000c2f5028b235f6263d17ffb24d3605.tar.gz
src-9e644c23000c2f5028b235f6263d17ffb24d3605.zip
tcp: add support for TCP over UDP
Adding support for TCP over UDP allows communication with TCP stacks which can be implemented in userspace without requiring special priviledges or specific support by the OS. This is joint work with rrs. Reviewed by: rrs Sponsored by: Netflix, Inc. MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D29469
Diffstat (limited to 'sys/netinet/tcp_syncache.c')
-rw-r--r--sys/netinet/tcp_syncache.c127
1 files changed, 98 insertions, 29 deletions
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 4cd8411af8d5..35d9c091ab96 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -96,6 +96,8 @@ __FBSDID("$FreeBSD$");
#ifdef TCP_OFFLOAD
#include <netinet/toecore.h>
#endif
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
#include <netipsec/ipsec_support.h>
@@ -143,14 +145,14 @@ static tcp_seq syncookie_generate(struct syncache_head *, struct syncache *);
static struct syncache
*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
struct syncache *, struct tcphdr *, struct tcpopt *,
- struct socket *);
+ struct socket *, uint16_t);
static void syncache_pause(struct in_conninfo *);
static void syncache_unpause(void *);
static void syncookie_reseed(void *);
#ifdef INVARIANTS
static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
- struct socket *lso);
+ struct socket *lso, uint16_t port);
#endif
/*
@@ -610,7 +612,8 @@ syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
* If required send a challenge ACK.
*/
void
-syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m)
+syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m,
+ uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@@ -650,6 +653,16 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m)
goto done;
}
+ /* The remote UDP encaps port does not match. */
+ if (sc->sc_port != port) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious RST with matching "
+ "syncache entry but non-matching UDP encaps port, "
+ "segment ignored\n", s, __func__);
+ TCPSTAT_INC(tcps_badrst);
+ goto done;
+ }
+
/*
* If the RST bit is set, check the sequence number to see
* if this is a valid reset segment.
@@ -716,7 +729,7 @@ done:
}
void
-syncache_badack(struct in_conninfo *inc)
+syncache_badack(struct in_conninfo *inc, uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@@ -725,7 +738,7 @@ syncache_badack(struct in_conninfo *inc)
return;
sc = syncache_lookup(inc, &sch); /* returns locked sch */
SCH_LOCK_ASSERT(sch);
- if (sc != NULL) {
+ if ((sc != NULL) && (sc->sc_port == port)) {
syncache_drop(sc, sch);
TCPSTAT_INC(tcps_sc_badack);
}
@@ -733,7 +746,7 @@ syncache_badack(struct in_conninfo *inc)
}
void
-syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq)
+syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@@ -745,6 +758,10 @@ syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq)
if (sc == NULL)
goto done;
+ /* If the port != sc_port, then it's a bogus ICMP msg */
+ if (port != sc->sc_port)
+ goto done;
+
/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
if (ntohl(th_seq) != sc->sc_iss)
goto done;
@@ -951,6 +968,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
tcp_state_change(tp, TCPS_SYN_RECEIVED);
tp->iss = sc->sc_iss;
tp->irs = sc->sc_irs;
+ tp->t_port = sc->sc_port;
tcp_rcvseqinit(tp);
tcp_sendseqinit(tp);
blk = sototcpcb(lso)->t_fb;
@@ -1071,7 +1089,7 @@ abort2:
*/
int
syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct socket **lsop, struct mbuf *m)
+ struct socket **lsop, struct mbuf *m, uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@@ -1099,7 +1117,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* values with the reconstructed values from the cookie.
*/
if (sc != NULL)
- syncookie_cmp(inc, sch, sc, th, to, *lsop);
+ syncookie_cmp(inc, sch, sc, th, to, *lsop, port);
#endif
if (sc == NULL) {
@@ -1133,7 +1151,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
goto failed;
}
bzero(&scs, sizeof(scs));
- sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop);
+ sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop, port);
if (locked)
SCH_UNLOCK(sch);
if (sc == NULL) {
@@ -1160,6 +1178,10 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
#endif /* TCP_SIGNATURE */
} else {
+ if (sc->sc_port != port) {
+ SCH_UNLOCK(sch);
+ return (0);
+ }
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
/*
* If listening socket requested TCP digests, check that
@@ -1380,7 +1402,7 @@ syncache_tfo_expand(struct syncache *sc, struct socket *lso, struct mbuf *m,
struct socket *
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket *so, struct mbuf *m, void *tod,
- void *todctx, uint8_t iptos)
+ void *todctx, uint8_t iptos, uint16_t port)
{
struct tcpcb *tp;
struct socket *rv = NULL;
@@ -1640,6 +1662,7 @@ skip_alloc:
sc->sc_label = maclabel;
#endif
sc->sc_cred = cred;
+ sc->sc_port = port;
cred = NULL;
sc->sc_ipopts = ipopts;
bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
@@ -1797,8 +1820,9 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
struct ip *ip = NULL;
struct mbuf *m;
struct tcphdr *th = NULL;
+ struct udphdr *udp = NULL;
int optlen, error = 0; /* Make compiler happy */
- u_int16_t hlen, tlen, mssopt;
+ u_int16_t hlen, tlen, mssopt, ulen;
struct tcpopt to;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
@@ -1812,9 +1836,14 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
#endif
sizeof(struct ip);
tlen = hlen + sizeof(struct tcphdr);
-
+ if (sc->sc_port) {
+ tlen += sizeof(struct udphdr);
+ }
/* Determine MSS we advertize to other end of connection. */
- mssopt = max(tcp_mssopt(&sc->sc_inc), V_tcp_minmss);
+ mssopt = tcp_mssopt(&sc->sc_inc);
+ if (sc->sc_port)
+ mssopt -= V_tcp_udp_tunneling_overhead;
+ mssopt = max(mssopt, V_tcp_minmss);
/* XXX: Assume that the entire packet will fit in a header mbuf. */
KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
@@ -1836,7 +1865,6 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_vfc = IPV6_VERSION;
- ip6->ip6_nxt = IPPROTO_TCP;
ip6->ip6_src = sc->sc_inc.inc6_laddr;
ip6->ip6_dst = sc->sc_inc.inc6_faddr;
ip6->ip6_plen = htons(tlen - hlen);
@@ -1844,9 +1872,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
/* Zero out traffic class and flow label. */
ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK;
ip6->ip6_flow |= sc->sc_flowlabel;
+ if (sc->sc_port != 0) {
+ ip6->ip6_nxt = IPPROTO_UDP;
+ udp = (struct udphdr *)(ip6 + 1);
+ udp->uh_sport = htons(V_tcp_udp_tunneling_port);
+ udp->uh_dport = sc->sc_port;
+ ulen = (tlen - sizeof(struct ip6_hdr));
+ th = (struct tcphdr *)(udp + 1);
+ } else {
+ ip6->ip6_nxt = IPPROTO_TCP;
+ th = (struct tcphdr *)(ip6 + 1);
+ }
ip6->ip6_flow |= htonl(sc->sc_ip_tos << 20);
-
- th = (struct tcphdr *)(ip6 + 1);
}
#endif
#if defined(INET6) && defined(INET)
@@ -1861,7 +1898,6 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
ip->ip_id = 0;
ip->ip_off = 0;
ip->ip_sum = 0;
- ip->ip_p = IPPROTO_TCP;
ip->ip_src = sc->sc_inc.inc_laddr;
ip->ip_dst = sc->sc_inc.inc_faddr;
ip->ip_ttl = sc->sc_ip_ttl;
@@ -1876,8 +1912,17 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
*/
if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
ip->ip_off |= htons(IP_DF);
-
- th = (struct tcphdr *)(ip + 1);
+ if (sc->sc_port == 0) {
+ ip->ip_p = IPPROTO_TCP;
+ th = (struct tcphdr *)(ip + 1);
+ } else {
+ ip->ip_p = IPPROTO_UDP;
+ udp = (struct udphdr *)(ip + 1);
+ udp->uh_sport = htons(V_tcp_udp_tunneling_port);
+ udp->uh_dport = sc->sc_port;
+ ulen = (tlen - sizeof(struct ip));
+ th = (struct tcphdr *)(udp + 1);
+ }
}
#endif /* INET */
th->th_sport = sc->sc_inc.inc_lport;
@@ -1957,8 +2002,11 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
} else
optlen = 0;
+ if (udp) {
+ ulen += optlen;
+ udp->uh_ulen = htons(ulen);
+ }
M_SETFIB(m, sc->sc_inc.inc_fibnum);
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
/*
* If we have peer's SYN and it has a flowid, then let's assign it to
* our SYN|ACK. ip6_output() and ip_output() will not assign flowid
@@ -1970,9 +2018,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
}
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
- m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
- th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
- IPPROTO_TCP, 0);
+ if (sc->sc_port) {
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ udp->uh_sum = in6_cksum_pseudo(ip6, ulen,
+ IPPROTO_UDP, 0);
+ th->th_sum = htons(0);
+ } else {
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
+ IPPROTO_TCP, 0);
+ }
ip6->ip6_hlim = sc->sc_ip_ttl;
#ifdef TCP_OFFLOAD
if (ADDED_BY_TOE(sc)) {
@@ -1992,9 +2049,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
#endif
#ifdef INET
{
- m->m_pkthdr.csum_flags = CSUM_TCP;
- th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(tlen + optlen - hlen + IPPROTO_TCP));
+ if (sc->sc_port) {
+ m->m_pkthdr.csum_flags = CSUM_UDP;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
+ th->th_sum = htons(0);
+ } else {
+ m->m_pkthdr.csum_flags = CSUM_TCP;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(tlen + optlen - hlen + IPPROTO_TCP));
+ }
#ifdef TCP_OFFLOAD
if (ADDED_BY_TOE(sc)) {
struct toedev *tod = sc->sc_tod;
@@ -2224,7 +2290,7 @@ syncookie_generate(struct syncache_head *sch, struct syncache *sc)
static struct syncache *
syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
- struct socket *lso)
+ struct socket *lso, uint16_t port)
{
uint32_t hash;
uint8_t *secbits;
@@ -2310,6 +2376,8 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
sc->sc_rxmits = 0;
+ sc->sc_port = port;
+
TCPSTAT_INC(tcps_sc_recvcookie);
return (sc);
}
@@ -2318,13 +2386,13 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
static int
syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
- struct socket *lso)
+ struct socket *lso, uint16_t port)
{
struct syncache scs, *scx;
char *s;
bzero(&scs, sizeof(scs));
- scx = syncookie_lookup(inc, sch, &scs, th, to, lso);
+ scx = syncookie_lookup(inc, sch, &scs, th, to, lso, port);
if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
return (0);
@@ -2510,6 +2578,7 @@ syncache_pcblist(struct sysctl_req *req)
xt.xt_inp.inp_vflag = INP_IPV6;
else
xt.xt_inp.inp_vflag = INP_IPV4;
+ xt.xt_encaps_port = sc->sc_port;
bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc,
sizeof (struct in_conninfo));
error = SYSCTL_OUT(req, &xt, sizeof xt);