aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNavdeep Parhar <np@FreeBSD.org>2021-04-13 00:25:22 +0000
committerNavdeep Parhar <np@FreeBSD.org>2021-04-21 20:00:16 +0000
commit01d74fe1ffc32dc7f42dc0fb0c4861276a6b2bd2 (patch)
tree132c54588397ba1fb3bc8fedb690522c6b26a681
parent652908599b6fa7285ee60cb567b97e70b648ac29 (diff)
downloadsrc-01d74fe1ffc32dc7f42dc0fb0c4861276a6b2bd2.tar.gz
src-01d74fe1ffc32dc7f42dc0fb0c4861276a6b2bd2.zip
Path MTU discovery hooks for offloaded TCP connections.
Notify the TOE driver when when an ICMP type 3 code 4 (Fragmentation needed and DF set) message is received for an offloaded connection. This gives the driver an opportunity to lower the path MTU for the connection and resume transmission, much like what the kernel does for the connections that it handles. Reviewed by: glebius@ Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D29755
-rw-r--r--sys/netinet/tcp_offload.c11
-rw-r--r--sys/netinet/tcp_offload.h3
-rw-r--r--sys/netinet/tcp_subr.c80
-rw-r--r--sys/netinet/toecore.c9
-rw-r--r--sys/netinet/toecore.h4
5 files changed, 81 insertions, 26 deletions
diff --git a/sys/netinet/tcp_offload.c b/sys/netinet/tcp_offload.c
index ba190f0303f1..84a4bc3c31a3 100644
--- a/sys/netinet/tcp_offload.c
+++ b/sys/netinet/tcp_offload.c
@@ -219,3 +219,14 @@ tcp_offload_detach(struct tcpcb *tp)
tod->tod_pcb_detach(tod, tp);
}
+
+void
+tcp_offload_pmtu_update(struct tcpcb *tp, tcp_seq seq, int mtu)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_pmtu_update(tod, tp, seq, mtu);
+}
diff --git a/sys/netinet/tcp_offload.h b/sys/netinet/tcp_offload.h
index 19c120ccdd7d..8f3786e9f7eb 100644
--- a/sys/netinet/tcp_offload.h
+++ b/sys/netinet/tcp_offload.h
@@ -36,6 +36,8 @@
#error "no user-serviceable parts inside"
#endif
+#include <netinet/tcp.h>
+
extern int registered_toedevs;
int tcp_offload_connect(struct socket *, struct sockaddr *);
@@ -48,5 +50,6 @@ void tcp_offload_ctloutput(struct tcpcb *, int, int);
void tcp_offload_tcp_info(struct tcpcb *, struct tcp_info *);
int tcp_offload_alloc_tls_session(struct tcpcb *, struct ktls_session *, int);
void tcp_offload_detach(struct tcpcb *);
+void tcp_offload_pmtu_update(struct tcpcb *, tcp_seq, int);
#endif
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 1ce7a5b1fcf3..b5ecdc6f2307 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2791,6 +2791,21 @@ SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
#endif /* INET6 */
#ifdef INET
+/* Path MTU to try next when a fragmentation-needed message is received. */
+static inline int
+tcp_next_pmtu(const struct icmp *icp, const struct ip *ip)
+{
+ int mtu = ntohs(icp->icmp_nextmtu);
+
+ /* If no alternative MTU was proposed, try the next smaller one. */
+ if (!mtu)
+ mtu = ip_next_mtu(ntohs(ip->ip_len), 1);
+ if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr))
+ mtu = V_tcp_minmss + sizeof(struct tcpiphdr);
+
+ return (mtu);
+}
+
static void
tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
{
@@ -2846,6 +2861,17 @@ tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
tp = intotcpcb(inp);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) {
+ /*
+ * MTU discovery for offloaded connections. Let
+ * the TOE driver verify seq# and process it.
+ */
+ mtu = tcp_next_pmtu(icp, ip);
+ tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu);
+ goto out;
+ }
+#endif
if (tp->t_port != port) {
goto out;
}
@@ -2853,24 +2879,11 @@ tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
if (cmd == PRC_MSGSIZE) {
/*
- * MTU discovery:
- * If we got a needfrag set the MTU
- * in the route to the suggested new
- * value (if given) and then notify.
+ * MTU discovery: we got a needfrag and
+ * will potentially try a lower MTU.
*/
- mtu = ntohs(icp->icmp_nextmtu);
- /*
- * If no alternative MTU was
- * proposed, try the next smaller
- * one.
- */
- if (!mtu)
- mtu = ip_next_mtu(
- ntohs(ip->ip_len), 1);
- if (mtu < V_tcp_minmss +
- sizeof(struct tcpiphdr))
- mtu = V_tcp_minmss +
- sizeof(struct tcpiphdr);
+ mtu = tcp_next_pmtu(icp, ip);
+
/*
* Only process the offered MTU if it
* is smaller than the current one.
@@ -2948,6 +2961,20 @@ tcp_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *vip, void *unused)
#endif /* INET */
#ifdef INET6
+static inline int
+tcp6_next_pmtu(const struct icmp6_hdr *icmp6)
+{
+ int mtu = ntohl(icmp6->icmp6_mtu);
+
+ /*
+ * If no alternative MTU was proposed, or the proposed MTU was too
+ * small, set to the min.
+ */
+ if (mtu < IPV6_MMTU)
+ mtu = IPV6_MMTU - 8; /* XXXNP: what is the adjustment for? */
+ return (mtu);
+}
+
static void
tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
{
@@ -3039,6 +3066,14 @@ tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
tp = intotcpcb(inp);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) {
+ /* MTU discovery for offloaded connections. */
+ mtu = tcp6_next_pmtu(icmp6);
+ tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu);
+ goto out;
+ }
+#endif
if (tp->t_port != port) {
goto out;
}
@@ -3051,15 +3086,8 @@ tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
* in the route to the suggested new
* value (if given) and then notify.
*/
- mtu = ntohl(icmp6->icmp6_mtu);
- /*
- * If no alternative MTU was
- * proposed, or the proposed
- * MTU was too small, set to
- * the min.
- */
- if (mtu < IPV6_MMTU)
- mtu = IPV6_MMTU - 8;
+ mtu = tcp6_next_pmtu(icmp6);
+
bzero(&inc, sizeof(inc));
inc.inc_fibnum = M_GETFIB(m);
inc.inc_flags |= INC_ISIPV6;
diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c
index d8d499a6fde3..5792298d2883 100644
--- a/sys/netinet/toecore.c
+++ b/sys/netinet/toecore.c
@@ -199,6 +199,14 @@ toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused,
return (EINVAL);
}
+static void
+toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused,
+ tcp_seq seq __unused, int mtu __unused)
+{
+
+ return;
+}
+
/*
* Inform one or more TOE devices about a listening socket.
*/
@@ -290,6 +298,7 @@ init_toedev(struct toedev *tod)
tod->tod_ctloutput = toedev_ctloutput;
tod->tod_tcp_info = toedev_tcp_info;
tod->tod_alloc_tls_session = toedev_alloc_tls_session;
+ tod->tod_pmtu_update = toedev_pmtu_update;
}
/*
diff --git a/sys/netinet/toecore.h b/sys/netinet/toecore.h
index 36493abf7149..ce796ab54dc5 100644
--- a/sys/netinet/toecore.h
+++ b/sys/netinet/toecore.h
@@ -35,6 +35,7 @@
#error "no user-serviceable parts inside"
#endif
+#include <netinet/tcp.h>
#include <sys/_eventhandler.h>
struct tcpopt;
@@ -114,6 +115,9 @@ struct toedev {
/* Create a TLS session */
int (*tod_alloc_tls_session)(struct toedev *, struct tcpcb *,
struct ktls_session *, int);
+
+ /* ICMP fragmentation-needed received, adjust PMTU. */
+ void (*tod_pmtu_update)(struct toedev *, struct tcpcb *, tcp_seq, int);
};
typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);