aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet/tcp_var.h
diff options
context:
space:
mode:
authorAndrew Gallatin <gallatin@FreeBSD.org>2021-07-06 14:17:33 +0000
committerAndrew Gallatin <gallatin@FreeBSD.org>2021-07-06 14:28:32 +0000
commit28d0a740dd9a67e4a4fa9fda5bb39b5963316f35 (patch)
tree20f30d10ba70a01a4a37a50617d92a8570062394 /sys/netinet/tcp_var.h
parentc9144ec14d2a5a53cfe91ada1b3b9c06b78dc999 (diff)
downloadsrc-28d0a740dd9a67e4a4fa9fda5bb39b5963316f35.tar.gz
src-28d0a740dd9a67e4a4fa9fda5bb39b5963316f35.zip
ktls: auto-disable ifnet (inline hw) kTLS
Ifnet (inline) hw kTLS NICs typically keep state within a TLS record, so that when transmitting in-order, they can continue encryption on each segment sent without DMA'ing extra state from the host. This breaks down when transmits are out of order (eg, TCP retransmits). In this case, the NIC must re-DMA the entire TLS record up to and including the segment being retransmitted. This means that when re-transmitting the last 1448 byte segment of a TLS record, the NIC will have to re-DMA the entire 16KB TLS record. This can lead to the NIC running out of PCIe bus bandwidth well before it saturates the network link if a lot of TCP connections have a high retransmoit rate. This change introduces a new sysctl (kern.ipc.tls.ifnet_max_rexmit_pct), where TCP connections with higher retransmit rate will be switched to SW kTLS so as to conserve PCIe bandwidth. Reviewed by: hselasky, markj, rrs Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D30908
Diffstat (limited to 'sys/netinet/tcp_var.h')
-rw-r--r--sys/netinet/tcp_var.h13
1 files changed, 12 insertions, 1 deletions
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index dd30f89896d2..3f72a821e71f 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -39,8 +39,10 @@
#include <netinet/tcp_fsm.h>
#ifdef _KERNEL
+#include "opt_kern_tls.h"
#include <net/vnet.h>
#include <sys/mbuf.h>
+#include <sys/ktls.h>
#endif
#define TCP_END_BYTE_INFO 8 /* Bytes that makeup the "end information array" */
@@ -1139,8 +1141,10 @@ tcp_fields_to_net(struct tcphdr *th)
static inline void
tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt,
- uint8_t is_tlp, int hw_tls __unused)
+ uint8_t is_tlp, int hw_tls)
{
+ uint64_t rexmit_percent;
+
if (is_tlp) {
tp->t_sndtlppack++;
tp->t_sndtlpbyte += len;
@@ -1150,6 +1154,13 @@ tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt,
tp->t_snd_rxt_bytes += len;
else
tp->t_sndbytes += len;
+
+ if (hw_tls && is_rxt) {
+ rexmit_percent = (1000ULL * tp->t_snd_rxt_bytes) / (10ULL * (tp->t_snd_rxt_bytes + tp->t_sndbytes));
+ if (rexmit_percent > ktls_ifnet_max_rexmit_pct)
+ ktls_disable_ifnet(tp);
+ }
+
}
#endif /* _KERNEL */