aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet/tcp_output.c
diff options
context:
space:
mode:
authorAndre Oppermann <andre@FreeBSD.org>2006-09-07 12:53:01 +0000
committerAndre Oppermann <andre@FreeBSD.org>2006-09-07 12:53:01 +0000
commitb3c0f300fbf1e175fc42b573f3f324a3c6bb85c7 (patch)
tree51d8d16aaacf803627a56c901c7f26fe11f9193e /sys/netinet/tcp_output.c
parent3c89486cc7d42563ef66c660954ff140d4986c10 (diff)
downloadsrc-b3c0f300fbf1e175fc42b573f3f324a3c6bb85c7.tar.gz
src-b3c0f300fbf1e175fc42b573f3f324a3c6bb85c7.zip
Second step of TSO (TCP segmentation offload) support in our network stack.
TSO is only used if we are in a pure bulk sending state. The presence of TCP-MD5, SACK retransmits, SACK advertizements, IPSEC and IP options prevent using TSO. With TSO the TCP header is the same (except for the sequence number) for all generated packets. This makes it impossible to transmit any options which vary per generated segment or packet. The length of TSO bursts is limited to TCP_MAXWIN. The sysctl net.inet.tcp.tso globally controls the use of TSO and is enabled. TSO enabled sends originating from tcp_output() have the CSUM_TCP and CSUM_TSO flags set, m_pkthdr.csum_data filled with the header pseudo-checksum and m_pkthdr.tso_segsz set to the segment size (net payload size, not counting IP+TCP headers or TCP options). IPv6 currently lacks a pseudo-header checksum function and thus doesn't support TSO yet. Tested by: Jack Vogel <jfvogel-at-gmail.com> Sponsored by: TCP/IP Optimization Fundraise 2005
Notes
Notes: svn path=/head/; revision=162110
Diffstat (limited to 'sys/netinet/tcp_output.c')
-rw-r--r--sys/netinet/tcp_output.c88
1 files changed, 73 insertions, 15 deletions
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index ead2c1a4a3b7..3479c1da6b5f 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -105,6 +105,10 @@ int tcp_do_newreno = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
0, "Enable NewReno Algorithms");
+int tcp_do_tso = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
+ &tcp_do_tso, 0, "Enable TCP Segmentation Offload");
+
/*
* Tcp output routine: figure out what should be sent and send it.
*/
@@ -127,6 +131,7 @@ tcp_output(struct tcpcb *tp)
int i, sack_rxmit;
int sack_bytes_rxmt;
struct sackhole *p;
+ int tso = 0;
#if 0
int maxburst = TCP_MAXBURST;
#endif
@@ -376,12 +381,34 @@ after_sack_rexmit:
/*
* len will be >= 0 after this point. Truncate to the maximum
- * segment length and ensure that FIN is removed if the length
- * no longer contains the last data byte.
+ * segment length or enable TCP Segmentation Offloading (if supported
+ * by hardware) and ensure that FIN is removed if the length no longer
+ * contains the last data byte.
+ *
+ * TSO may only be used if we are in a pure bulk sending state. The
+ * presence of TCP-MD5, SACK retransmits, SACK advertizements and
+ * IP options prevent using TSO. With TSO the TCP header is the same
+ * (except for the sequence number) for all generated packets. This
+ * makes it impossible to transmit any options which vary per generated
+ * segment or packet.
+ *
+ * The length of TSO bursts is limited to TCP_MAXWIN. That limit and
+ * removal of FIN (if not already catched here) are handled later after
+ * the exact length of the TCP options are known.
*/
if (len > tp->t_maxseg) {
- len = tp->t_maxseg;
- sendalot = 1;
+ if ((tp->t_flags & TF_TSO) && tcp_do_tso &&
+ ((tp->t_flags & TF_SIGNATURE) == 0) &&
+ tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
+ tp->t_inpcb->inp_options == NULL &&
+ tp->t_inpcb->in6p_options == NULL &&
+ tp->t_inpcb->inp_sp == NULL) {
+ tso = 1;
+ } else {
+ len = tp->t_maxseg;
+ sendalot = 1;
+ tso = 0;
+ }
}
if (sack_rxmit) {
if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
@@ -397,7 +424,7 @@ after_sack_rexmit:
* Sender silly window avoidance. We transmit under the following
* conditions when len is non-zero:
*
- * - We have a full segment
+ * - We have a full segment (or more with TSO)
* - This is the last buffer in a write()/send() and we are
* either idle or running NODELAY
* - we've timed out (e.g. persist timer)
@@ -406,7 +433,7 @@ after_sack_rexmit:
* - we need to retransmit
*/
if (len) {
- if (len == tp->t_maxseg)
+ if (len >= tp->t_maxseg)
goto send;
/*
* NOTE! on localhost connections an 'ack' from the remote
@@ -702,14 +729,24 @@ send:
* bump the packet length beyond the t_maxopd length.
* Clear the FIN bit because we cut off the tail of
* the segment.
+ *
+ * When doing TSO limit a burst to TCP_MAXWIN and set the
+ * flag to continue sending and prevent the last segment
+ * from being fractional thus making them all equal sized.
*/
if (len + optlen + ipoptlen > tp->t_maxopd) {
- /*
- * If there is still more to send, don't close the connection.
- */
flags &= ~TH_FIN;
- len = tp->t_maxopd - optlen - ipoptlen;
- sendalot = 1;
+ if (tso) {
+ if (len > TCP_MAXWIN) {
+ len = TCP_MAXWIN - TCP_MAXWIN %
+ (tp->t_maxopd - optlen);
+ sendalot = 1;
+ } else if (tp->t_flags & TF_NEEDFIN)
+ sendalot = 1;
+ } else {
+ len = tp->t_maxopd - optlen - ipoptlen;
+ sendalot = 1;
+ }
}
/*#ifdef DIAGNOSTIC*/
@@ -947,6 +984,16 @@ send:
}
/*
+ * Enable TSO and specify the size of the segments.
+ * The TCP pseudo header checksum is always provided.
+ * XXX: Fixme: This is currently not the case for IPv6.
+ */
+ if (tso) {
+ m->m_pkthdr.csum_flags = CSUM_TSO;
+ m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
+ }
+
+ /*
* In transmit state, time the transmission and arrange for
* the retransmit. In persist state, just set snd_max.
*/
@@ -1119,11 +1166,22 @@ out:
}
if (error == EMSGSIZE) {
/*
- * ip_output() will have already fixed the route
- * for us. tcp_mtudisc() will, as its last action,
- * initiate retransmission, so it is important to
- * not do so here.
+ * For some reason the interface we used initially
+ * to send segments changed to another or lowered
+ * its MTU.
+ *
+ * tcp_mtudisc() will find out the new MTU and as
+ * its last action, initiate retransmission, so it
+ * is important to not do so here.
+ *
+ * If TSO was active we either got an interface
+ * without TSO capabilits or TSO was turned off.
+ * Disable it for this connection as too and
+ * immediatly retry with MSS sized segments generated
+ * by this function.
*/
+ if (tso)
+ tp->t_flags &= ~TF_TSO;
tcp_mtudisc(tp->t_inpcb, 0);
return 0;
}