diff options
author | Michael Tuexen <tuexen@FreeBSD.org> | 2021-04-18 14:08:08 +0000 |
---|---|---|
committer | Michael Tuexen <tuexen@FreeBSD.org> | 2021-04-18 14:16:42 +0000 |
commit | 9e644c23000c2f5028b235f6263d17ffb24d3605 (patch) | |
tree | 846286d2c178f8c442c7bd711ceb0a50a9c06f9e /sys/netinet/tcp_syncache.c | |
parent | 136f6b6c0cc1343a7637c3250ff9dd0eced4b4d0 (diff) | |
download | src-9e644c23000c2f5028b235f6263d17ffb24d3605.tar.gz src-9e644c23000c2f5028b235f6263d17ffb24d3605.zip |
tcp: add support for TCP over UDP
Adding support for TCP over UDP allows communication with
TCP stacks which can be implemented in userspace without
requiring special priviledges or specific support by the OS.
This is joint work with rrs.
Reviewed by: rrs
Sponsored by: Netflix, Inc.
MFC after: 1 week
Differential Revision: https://reviews.freebsd.org/D29469
Diffstat (limited to 'sys/netinet/tcp_syncache.c')
-rw-r--r-- | sys/netinet/tcp_syncache.c | 127 |
1 files changed, 98 insertions, 29 deletions
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 4cd8411af8d5..35d9c091ab96 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -96,6 +96,8 @@ __FBSDID("$FreeBSD$"); #ifdef TCP_OFFLOAD #include <netinet/toecore.h> #endif +#include <netinet/udp.h> +#include <netinet/udp_var.h> #include <netipsec/ipsec_support.h> @@ -143,14 +145,14 @@ static tcp_seq syncookie_generate(struct syncache_head *, struct syncache *); static struct syncache *syncookie_lookup(struct in_conninfo *, struct syncache_head *, struct syncache *, struct tcphdr *, struct tcpopt *, - struct socket *); + struct socket *, uint16_t); static void syncache_pause(struct in_conninfo *); static void syncache_unpause(void *); static void syncookie_reseed(void *); #ifdef INVARIANTS static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch, struct syncache *sc, struct tcphdr *th, struct tcpopt *to, - struct socket *lso); + struct socket *lso, uint16_t port); #endif /* @@ -610,7 +612,8 @@ syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp) * If required send a challenge ACK. */ void -syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m) +syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m, + uint16_t port) { struct syncache *sc; struct syncache_head *sch; @@ -650,6 +653,16 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m) goto done; } + /* The remote UDP encaps port does not match. */ + if (sc->sc_port != port) { + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Spurious RST with matching " + "syncache entry but non-matching UDP encaps port, " + "segment ignored\n", s, __func__); + TCPSTAT_INC(tcps_badrst); + goto done; + } + /* * If the RST bit is set, check the sequence number to see * if this is a valid reset segment. @@ -716,7 +729,7 @@ done: } void -syncache_badack(struct in_conninfo *inc) +syncache_badack(struct in_conninfo *inc, uint16_t port) { struct syncache *sc; struct syncache_head *sch; @@ -725,7 +738,7 @@ syncache_badack(struct in_conninfo *inc) return; sc = syncache_lookup(inc, &sch); /* returns locked sch */ SCH_LOCK_ASSERT(sch); - if (sc != NULL) { + if ((sc != NULL) && (sc->sc_port == port)) { syncache_drop(sc, sch); TCPSTAT_INC(tcps_sc_badack); } @@ -733,7 +746,7 @@ syncache_badack(struct in_conninfo *inc) } void -syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq) +syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port) { struct syncache *sc; struct syncache_head *sch; @@ -745,6 +758,10 @@ syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq) if (sc == NULL) goto done; + /* If the port != sc_port, then it's a bogus ICMP msg */ + if (port != sc->sc_port) + goto done; + /* If the sequence number != sc_iss, then it's a bogus ICMP msg */ if (ntohl(th_seq) != sc->sc_iss) goto done; @@ -951,6 +968,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) tcp_state_change(tp, TCPS_SYN_RECEIVED); tp->iss = sc->sc_iss; tp->irs = sc->sc_irs; + tp->t_port = sc->sc_port; tcp_rcvseqinit(tp); tcp_sendseqinit(tp); blk = sototcpcb(lso)->t_fb; @@ -1071,7 +1089,7 @@ abort2: */ int syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, - struct socket **lsop, struct mbuf *m) + struct socket **lsop, struct mbuf *m, uint16_t port) { struct syncache *sc; struct syncache_head *sch; @@ -1099,7 +1117,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, * values with the reconstructed values from the cookie. */ if (sc != NULL) - syncookie_cmp(inc, sch, sc, th, to, *lsop); + syncookie_cmp(inc, sch, sc, th, to, *lsop, port); #endif if (sc == NULL) { @@ -1133,7 +1151,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, goto failed; } bzero(&scs, sizeof(scs)); - sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop); + sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop, port); if (locked) SCH_UNLOCK(sch); if (sc == NULL) { @@ -1160,6 +1178,10 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, } #endif /* TCP_SIGNATURE */ } else { + if (sc->sc_port != port) { + SCH_UNLOCK(sch); + return (0); + } #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* * If listening socket requested TCP digests, check that @@ -1380,7 +1402,7 @@ syncache_tfo_expand(struct syncache *sc, struct socket *lso, struct mbuf *m, struct socket * syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct inpcb *inp, struct socket *so, struct mbuf *m, void *tod, - void *todctx, uint8_t iptos) + void *todctx, uint8_t iptos, uint16_t port) { struct tcpcb *tp; struct socket *rv = NULL; @@ -1640,6 +1662,7 @@ skip_alloc: sc->sc_label = maclabel; #endif sc->sc_cred = cred; + sc->sc_port = port; cred = NULL; sc->sc_ipopts = ipopts; bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo)); @@ -1797,8 +1820,9 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) struct ip *ip = NULL; struct mbuf *m; struct tcphdr *th = NULL; + struct udphdr *udp = NULL; int optlen, error = 0; /* Make compiler happy */ - u_int16_t hlen, tlen, mssopt; + u_int16_t hlen, tlen, mssopt, ulen; struct tcpopt to; #ifdef INET6 struct ip6_hdr *ip6 = NULL; @@ -1812,9 +1836,14 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) #endif sizeof(struct ip); tlen = hlen + sizeof(struct tcphdr); - + if (sc->sc_port) { + tlen += sizeof(struct udphdr); + } /* Determine MSS we advertize to other end of connection. */ - mssopt = max(tcp_mssopt(&sc->sc_inc), V_tcp_minmss); + mssopt = tcp_mssopt(&sc->sc_inc); + if (sc->sc_port) + mssopt -= V_tcp_udp_tunneling_overhead; + mssopt = max(mssopt, V_tcp_minmss); /* XXX: Assume that the entire packet will fit in a header mbuf. */ KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN, @@ -1836,7 +1865,6 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) if (sc->sc_inc.inc_flags & INC_ISIPV6) { ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_vfc = IPV6_VERSION; - ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_src = sc->sc_inc.inc6_laddr; ip6->ip6_dst = sc->sc_inc.inc6_faddr; ip6->ip6_plen = htons(tlen - hlen); @@ -1844,9 +1872,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) /* Zero out traffic class and flow label. */ ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK; ip6->ip6_flow |= sc->sc_flowlabel; + if (sc->sc_port != 0) { + ip6->ip6_nxt = IPPROTO_UDP; + udp = (struct udphdr *)(ip6 + 1); + udp->uh_sport = htons(V_tcp_udp_tunneling_port); + udp->uh_dport = sc->sc_port; + ulen = (tlen - sizeof(struct ip6_hdr)); + th = (struct tcphdr *)(udp + 1); + } else { + ip6->ip6_nxt = IPPROTO_TCP; + th = (struct tcphdr *)(ip6 + 1); + } ip6->ip6_flow |= htonl(sc->sc_ip_tos << 20); - - th = (struct tcphdr *)(ip6 + 1); } #endif #if defined(INET6) && defined(INET) @@ -1861,7 +1898,6 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) ip->ip_id = 0; ip->ip_off = 0; ip->ip_sum = 0; - ip->ip_p = IPPROTO_TCP; ip->ip_src = sc->sc_inc.inc_laddr; ip->ip_dst = sc->sc_inc.inc_faddr; ip->ip_ttl = sc->sc_ip_ttl; @@ -1876,8 +1912,17 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) */ if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0)) ip->ip_off |= htons(IP_DF); - - th = (struct tcphdr *)(ip + 1); + if (sc->sc_port == 0) { + ip->ip_p = IPPROTO_TCP; + th = (struct tcphdr *)(ip + 1); + } else { + ip->ip_p = IPPROTO_UDP; + udp = (struct udphdr *)(ip + 1); + udp->uh_sport = htons(V_tcp_udp_tunneling_port); + udp->uh_dport = sc->sc_port; + ulen = (tlen - sizeof(struct ip)); + th = (struct tcphdr *)(udp + 1); + } } #endif /* INET */ th->th_sport = sc->sc_inc.inc_lport; @@ -1957,8 +2002,11 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) } else optlen = 0; + if (udp) { + ulen += optlen; + udp->uh_ulen = htons(ulen); + } M_SETFIB(m, sc->sc_inc.inc_fibnum); - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); /* * If we have peer's SYN and it has a flowid, then let's assign it to * our SYN|ACK. ip6_output() and ip_output() will not assign flowid @@ -1970,9 +2018,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) } #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { - m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; - th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen, - IPPROTO_TCP, 0); + if (sc->sc_port) { + m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + udp->uh_sum = in6_cksum_pseudo(ip6, ulen, + IPPROTO_UDP, 0); + th->th_sum = htons(0); + } else { + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen, + IPPROTO_TCP, 0); + } ip6->ip6_hlim = sc->sc_ip_ttl; #ifdef TCP_OFFLOAD if (ADDED_BY_TOE(sc)) { @@ -1992,9 +2049,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) #endif #ifdef INET { - m->m_pkthdr.csum_flags = CSUM_TCP; - th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(tlen + optlen - hlen + IPPROTO_TCP)); + if (sc->sc_port) { + m->m_pkthdr.csum_flags = CSUM_UDP; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + udp->uh_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP)); + th->th_sum = htons(0); + } else { + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(tlen + optlen - hlen + IPPROTO_TCP)); + } #ifdef TCP_OFFLOAD if (ADDED_BY_TOE(sc)) { struct toedev *tod = sc->sc_tod; @@ -2224,7 +2290,7 @@ syncookie_generate(struct syncache_head *sch, struct syncache *sc) static struct syncache * syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, struct syncache *sc, struct tcphdr *th, struct tcpopt *to, - struct socket *lso) + struct socket *lso, uint16_t port) { uint32_t hash; uint8_t *secbits; @@ -2310,6 +2376,8 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, sc->sc_rxmits = 0; + sc->sc_port = port; + TCPSTAT_INC(tcps_sc_recvcookie); return (sc); } @@ -2318,13 +2386,13 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch, struct syncache *sc, struct tcphdr *th, struct tcpopt *to, - struct socket *lso) + struct socket *lso, uint16_t port) { struct syncache scs, *scx; char *s; bzero(&scs, sizeof(scs)); - scx = syncookie_lookup(inc, sch, &scs, th, to, lso); + scx = syncookie_lookup(inc, sch, &scs, th, to, lso, port); if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL) return (0); @@ -2510,6 +2578,7 @@ syncache_pcblist(struct sysctl_req *req) xt.xt_inp.inp_vflag = INP_IPV6; else xt.xt_inp.inp_vflag = INP_IPV4; + xt.xt_encaps_port = sc->sc_port; bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo)); error = SYSCTL_OUT(req, &xt, sizeof xt); |