aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/kern/uipc_ktls.c107
-rw-r--r--sys/netinet/tcp_var.h13
-rw-r--r--sys/sys/ktls.h15
3 files changed, 133 insertions, 2 deletions
diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c
index 7e87e7c740e3..88e29157289d 100644
--- a/sys/kern/uipc_ktls.c
+++ b/sys/kern/uipc_ktls.c
@@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_kern_tls.h"
#include "opt_ratelimit.h"
#include "opt_rss.h"
@@ -121,6 +122,11 @@ SYSCTL_INT(_kern_ipc_tls_stats, OID_AUTO, threads, CTLFLAG_RD,
&ktls_number_threads, 0,
"Number of TLS threads in thread-pool");
+unsigned int ktls_ifnet_max_rexmit_pct = 2;
+SYSCTL_UINT(_kern_ipc_tls, OID_AUTO, ifnet_max_rexmit_pct, CTLFLAG_RWTUN,
+ &ktls_ifnet_max_rexmit_pct, 2,
+ "Max percent bytes retransmitted before ifnet TLS is disabled");
+
static bool ktls_offload_enable;
SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, enable, CTLFLAG_RWTUN,
&ktls_offload_enable, 0,
@@ -184,6 +190,14 @@ static COUNTER_U64_DEFINE_EARLY(ktls_switch_failed);
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_failed, CTLFLAG_RD,
&ktls_switch_failed, "TLS sessions unable to switch between SW and ifnet");
+static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_disable_fail);
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ifnet_disable_failed, CTLFLAG_RD,
+ &ktls_ifnet_disable_fail, "TLS sessions unable to switch to SW from ifnet");
+
+static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_disable_ok);
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ifnet_disable_ok, CTLFLAG_RD,
+ &ktls_ifnet_disable_ok, "TLS sessions able to switch to SW from ifnet");
+
SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, sw, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"Software TLS session stats");
SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, ifnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
@@ -2187,3 +2201,96 @@ ktls_work_thread(void *ctx)
}
}
}
+
+static void
+ktls_disable_ifnet_help(void *context, int pending __unused)
+{
+ struct ktls_session *tls;
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ struct socket *so;
+ int err;
+
+ tls = context;
+ inp = tls->inp;
+ if (inp == NULL)
+ return;
+ INP_WLOCK(inp);
+ so = inp->inp_socket;
+ MPASS(so != NULL);
+ if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
+ (inp->inp_flags2 & INP_FREED)) {
+ goto out;
+ }
+
+ if (so->so_snd.sb_tls_info != NULL)
+ err = ktls_set_tx_mode(so, TCP_TLS_MODE_SW);
+ else
+ err = ENXIO;
+ if (err == 0) {
+ counter_u64_add(ktls_ifnet_disable_ok, 1);
+ /* ktls_set_tx_mode() drops inp wlock, so recheck flags */
+ if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0 &&
+ (inp->inp_flags2 & INP_FREED) == 0 &&
+ (tp = intotcpcb(inp)) != NULL &&
+ tp->t_fb->tfb_hwtls_change != NULL)
+ (*tp->t_fb->tfb_hwtls_change)(tp, 0);
+ } else {
+ counter_u64_add(ktls_ifnet_disable_fail, 1);
+ }
+
+out:
+ SOCK_LOCK(so);
+ sorele(so);
+ if (!in_pcbrele_wlocked(inp))
+ INP_WUNLOCK(inp);
+ ktls_free(tls);
+}
+
+/*
+ * Called when re-transmits are becoming a substantial portion of the
+ * sends on this connection. When this happens, we transition the
+ * connection to software TLS. This is needed because most inline TLS
+ * NICs keep crypto state only for in-order transmits. This means
+ * that to handle a TCP rexmit (which is out-of-order), the NIC must
+ * re-DMA the entire TLS record up to and including the current
+ * segment. This means that when re-transmitting the last ~1448 byte
+ * segment of a 16KB TLS record, we could wind up re-DMA'ing an order
+ * of magnitude more data than we are sending. This can cause the
+ * PCIe link to saturate well before the network, which can cause
+ * output drops, and a general loss of capacity.
+ */
+void
+ktls_disable_ifnet(void *arg)
+{
+ struct tcpcb *tp;
+ struct inpcb *inp;
+ struct socket *so;
+ struct ktls_session *tls;
+
+ tp = arg;
+ inp = tp->t_inpcb;
+ INP_WLOCK_ASSERT(inp);
+ so = inp->inp_socket;
+ SOCK_LOCK(so);
+ tls = so->so_snd.sb_tls_info;
+ if (tls->disable_ifnet_pending) {
+ SOCK_UNLOCK(so);
+ return;
+ }
+
+ /*
+ * note that disable_ifnet_pending is never cleared; disabling
+ * ifnet can only be done once per session, so we never want
+ * to do it again
+ */
+
+ (void)ktls_hold(tls);
+ in_pcbref(inp);
+ soref(so);
+ tls->disable_ifnet_pending = true;
+ tls->inp = inp;
+ SOCK_UNLOCK(so);
+ TASK_INIT(&tls->disable_ifnet_task, 0, ktls_disable_ifnet_help, tls);
+ (void)taskqueue_enqueue(taskqueue_thread, &tls->disable_ifnet_task);
+}
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index dd30f89896d2..3f72a821e71f 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -39,8 +39,10 @@
#include <netinet/tcp_fsm.h>
#ifdef _KERNEL
+#include "opt_kern_tls.h"
#include <net/vnet.h>
#include <sys/mbuf.h>
+#include <sys/ktls.h>
#endif
#define TCP_END_BYTE_INFO 8 /* Bytes that makeup the "end information array" */
@@ -1139,8 +1141,10 @@ tcp_fields_to_net(struct tcphdr *th)
static inline void
tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt,
- uint8_t is_tlp, int hw_tls __unused)
+ uint8_t is_tlp, int hw_tls)
{
+ uint64_t rexmit_percent;
+
if (is_tlp) {
tp->t_sndtlppack++;
tp->t_sndtlpbyte += len;
@@ -1150,6 +1154,13 @@ tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt,
tp->t_snd_rxt_bytes += len;
else
tp->t_sndbytes += len;
+
+ if (hw_tls && is_rxt) {
+ rexmit_percent = (1000ULL * tp->t_snd_rxt_bytes) / (10ULL * (tp->t_snd_rxt_bytes + tp->t_sndbytes));
+ if (rexmit_percent > ktls_ifnet_max_rexmit_pct)
+ ktls_disable_ifnet(tp);
+ }
+
}
#endif /* _KERNEL */
diff --git a/sys/sys/ktls.h b/sys/sys/ktls.h
index b28c94965c97..7fd8831878b4 100644
--- a/sys/sys/ktls.h
+++ b/sys/sys/ktls.h
@@ -189,10 +189,12 @@ struct ktls_session {
u_int wq_index;
volatile u_int refcount;
int mode;
- bool reset_pending;
struct task reset_tag_task;
+ struct task disable_ifnet_task;
struct inpcb *inp;
+ bool reset_pending;
+ bool disable_ifnet_pending;
} __aligned(CACHE_LINE_SIZE);
void ktls_check_rx(struct sockbuf *sb);
@@ -231,5 +233,16 @@ ktls_free(struct ktls_session *tls)
ktls_destroy(tls);
}
+#ifdef KERN_TLS
+extern unsigned int ktls_ifnet_max_rexmit_pct;
+void ktls_disable_ifnet(void *arg);
+#else
+#define ktls_ifnet_max_rexmit_pct 1
+inline void
+ktls_disable_ifnet(void *arg __unused)
+{
+}
+#endif
+
#endif /* !_KERNEL */
#endif /* !_SYS_KTLS_H_ */