aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Kelsey <pkelsey@FreeBSD.org>2015-12-24 19:09:48 +0000
committerPatrick Kelsey <pkelsey@FreeBSD.org>2015-12-24 19:09:48 +0000
commit281a0fd4f9dd2b96396b634e282cd136b12724fc (patch)
tree2ddced10e905baf7fbbdb6e54064a138da8144e0
parent18010b98c28d0f5a2bad13174eda9b30c486320e (diff)
downloadsrc-281a0fd4f9dd2b96396b634e282cd136b12724fc.tar.gz
src-281a0fd4f9dd2b96396b634e282cd136b12724fc.zip
Implementation of server-side TCP Fast Open (TFO) [RFC7413].
TFO is disabled by default in the kernel build. See the top comment in sys/netinet/tcp_fastopen.c for implementation particulars. Reviewed by: gnn, jch, stas MFC after: 3 days Sponsored by: Verisign, Inc. Differential Revision: https://reviews.freebsd.org/D4350
Notes
Notes: svn path=/head/; revision=292706
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/options2
-rw-r--r--sys/netinet/tcp.h5
-rw-r--r--sys/netinet/tcp_fastopen.c442
-rw-r--r--sys/netinet/tcp_fastopen.h47
-rw-r--r--sys/netinet/tcp_input.c99
-rw-r--r--sys/netinet/tcp_output.c71
-rw-r--r--sys/netinet/tcp_subr.c21
-rw-r--r--sys/netinet/tcp_syncache.c131
-rw-r--r--sys/netinet/tcp_syncache.h6
-rw-r--r--sys/netinet/tcp_timer.c3
-rw-r--r--sys/netinet/tcp_usrreq.c58
-rw-r--r--sys/netinet/tcp_var.h16
13 files changed, 880 insertions, 22 deletions
diff --git a/sys/conf/files b/sys/conf/files
index 272360f6e8cb..0436bd2d04a5 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3688,6 +3688,7 @@ netinet/sctp_usrreq.c optional inet sctp | inet6 sctp
netinet/sctputil.c optional inet sctp | inet6 sctp
netinet/siftr.c optional inet siftr alq | inet6 siftr alq
netinet/tcp_debug.c optional tcpdebug
+netinet/tcp_fastopen.c optional inet tcp_rfc7413 | inet6 tcp_rfc7413
netinet/tcp_hostcache.c optional inet | inet6
netinet/tcp_input.c optional inet | inet6
netinet/tcp_lro.c optional inet | inet6
diff --git a/sys/conf/options b/sys/conf/options
index 65932b864d9f..acded0522f1d 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -440,6 +440,8 @@ TCPDEBUG
TCPPCAP opt_global.h
SIFTR
TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading
+TCP_RFC7413 opt_inet.h
+TCP_RFC7413_MAX_KEYS opt_inet.h
TCP_SIGNATURE opt_inet.h
VLAN_ARRAY opt_vlan.h
XBONEHACK
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index 294e9944ae6e..bfc8073fc996 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -97,6 +97,10 @@ struct tcphdr {
#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */
#define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */
#define TCPOLEN_SIGNATURE 18
+#define TCPOPT_FAST_OPEN 34
+#define TCPOLEN_FAST_OPEN_EMPTY 2
+#define TCPOLEN_FAST_OPEN_MIN 6
+#define TCPOLEN_FAST_OPEN_MAX 18
/* Miscellaneous constants */
#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */
@@ -165,6 +169,7 @@ struct tcphdr {
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */
+#define TCP_FASTOPEN 1025 /* enable TFO / was created via TFO */
#define TCP_PCAP_OUT 2048 /* number of output packets to keep */
#define TCP_PCAP_IN 4096 /* number of input packets to keep */
#define TCP_FUNCTION_BLK 8192 /* Set the tcp function pointers to the specified stack */
diff --git a/sys/netinet/tcp_fastopen.c b/sys/netinet/tcp_fastopen.c
new file mode 100644
index 000000000000..482320e5b730
--- /dev/null
+++ b/sys/netinet/tcp_fastopen.c
@@ -0,0 +1,442 @@
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413].
+ *
+ * This implementation is currently considered to be experimental and is not
+ * included in kernel builds by default. To include this code, add the
+ * following line to your kernel config:
+ *
+ * options TCP_RFC7413
+ *
+ * The generated TFO cookies are the 64-bit output of
+ * SipHash24(<16-byte-key><client-ip>). Multiple concurrent valid keys are
+ * supported so that time-based rolling cookie invalidation policies can be
+ * implemented in the system. The default number of concurrent keys is 2.
+ * This can be adjusted in the kernel config as follows:
+ *
+ * options TCP_RFC7413_MAX_KEYS=<num-keys>
+ *
+ *
+ * The following TFO-specific sysctls are defined:
+ *
+ * net.inet.tcp.fastopen.acceptany (RW, default 0)
+ * When non-zero, all client-supplied TFO cookies will be considered to
+ * be valid.
+ *
+ * net.inet.tcp.fastopen.autokey (RW, default 120)
+ * When this and net.inet.tcp.fastopen.enabled are non-zero, a new key
+ * will be automatically generated after this many seconds.
+ *
+ * net.inet.tcp.fastopen.enabled (RW, default 0)
+ * When zero, no new TFO connections can be created. On the transition
+ * from enabled to disabled, all installed keys are removed. On the
+ * transition from disabled to enabled, if net.inet.tcp.fastopen.autokey
+ * is non-zero and there are no keys installed, a new key will be
+ * generated immediately. The transition from enabled to disabled does
+ * not affect any TFO connections in progress; it only prevents new ones
+ * from being made.
+ *
+ * net.inet.tcp.fastopen.keylen (RO)
+ * The key length in bytes.
+ *
+ * net.inet.tcp.fastopen.maxkeys (RO)
+ * The maximum number of keys supported.
+ *
+ * net.inet.tcp.fastopen.numkeys (RO)
+ * The current number of keys installed.
+ *
+ * net.inet.tcp.fastopen.setkey (WO)
+ * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this
+ * sysctl.
+ *
+ *
+ * In order for TFO connections to be created via a listen socket, that
+ * socket must have the TCP_FASTOPEN socket option set on it. This option
+ * can be set on the socket either before or after the listen() is invoked.
+ * Clearing this option on a listen socket after it has been set has no
+ * effect on existing TFO connections or TFO connections in progress; it
+ * only prevents new TFO connections from being made.
+ *
+ * For passively-created sockets, the TCP_FASTOPEN socket option can be
+ * queried to determine whether the connection was established using TFO.
+ * Note that connections that are established via a TFO SYN, but that fall
+ * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
+ * set.
+ *
+ * Per the RFC, this implementation limits the number of TFO connections
+ * that can be in the SYN_RECEIVED state on a per listen-socket basis.
+ * Whenever this limit is exceeded, requests for new TFO connections are
+ * serviced as non-TFO requests. Without such a limit, given a valid TFO
+ * cookie, an attacker could keep the listen queue in an overflow condition
+ * using a TFO SYN flood. This implementation sets the limit at half the
+ * configured listen backlog.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <crypto/siphash/siphash.h>
+
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_fastopen.h>
+#include <netinet/tcp_var.h>
+
+
+#define TCP_FASTOPEN_KEY_LEN SIPHASH_KEY_LENGTH
+
+#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
+#define TCP_FASTOPEN_MAX_KEYS 2
+#else
+#define TCP_FASTOPEN_MAX_KEYS TCP_RFC7413_MAX_KEYS
+#endif
+
+struct tcp_fastopen_keylist {
+ unsigned int newest;
+ uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
+};
+
+struct tcp_fastopen_callout {
+ struct callout c;
+ struct vnet *v;
+};
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open");
+
+static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0;
+#define V_tcp_fastopen_acceptany VNET(tcp_fastopen_acceptany)
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
+ "Accept any non-empty cookie");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_autokey) = 120;
+#define V_tcp_fastopen_autokey VNET(tcp_fastopen_autokey)
+static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_autokey, "IU",
+ "Number of seconds between auto-generation of a new key; zero disables");
+
+VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0;
+static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_enabled, "IU",
+ "Enable/disable TCP Fast Open processing");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
+ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
+ "Key length in bytes");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
+ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
+ "Maximum number of keys supported");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0;
+#define V_tcp_fastopen_numkeys VNET(tcp_fastopen_numkeys)
+SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
+ CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
+ "Number of keys installed");
+
+static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
+ CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_setkey, "",
+ "Install a new key");
+
+static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock);
+#define V_tcp_fastopen_keylock VNET(tcp_fastopen_keylock)
+
+#define TCP_FASTOPEN_KEYS_RLOCK(t) rm_rlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_RUNLOCK(t) rm_runlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_WLOCK() rm_wlock(&V_tcp_fastopen_keylock)
+#define TCP_FASTOPEN_KEYS_WUNLOCK() rm_wunlock(&V_tcp_fastopen_keylock)
+
+static VNET_DEFINE(struct tcp_fastopen_keylist, tcp_fastopen_keys);
+#define V_tcp_fastopen_keys VNET(tcp_fastopen_keys)
+
+static VNET_DEFINE(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
+#define V_tcp_fastopen_autokey_ctx VNET(tcp_fastopen_autokey_ctx)
+
+static VNET_DEFINE(uma_zone_t, counter_zone);
+#define V_counter_zone VNET(counter_zone)
+
+void
+tcp_fastopen_init(void)
+{
+ V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
+ callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
+ &V_tcp_fastopen_keylock, 0);
+ V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+}
+
+void
+tcp_fastopen_destroy(void)
+{
+ callout_drain(&V_tcp_fastopen_autokey_ctx.c);
+ rm_destroy(&V_tcp_fastopen_keylock);
+ uma_zdestroy(V_counter_zone);
+}
+
+unsigned int *
+tcp_fastopen_alloc_counter(void)
+{
+ unsigned int *counter;
+ counter = uma_zalloc(V_counter_zone, M_NOWAIT);
+ if (counter)
+ *counter = 1;
+ return (counter);
+}
+
+void
+tcp_fastopen_decrement_counter(unsigned int *counter)
+{
+ if (*counter == 1)
+ uma_zfree(V_counter_zone, counter);
+ else
+ atomic_subtract_int(counter, 1);
+}
+
+static void
+tcp_fastopen_addkey_locked(uint8_t *key)
+{
+
+ V_tcp_fastopen_keys.newest++;
+ if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
+ V_tcp_fastopen_keys.newest = 0;
+ memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
+ TCP_FASTOPEN_KEY_LEN);
+ if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
+ V_tcp_fastopen_numkeys++;
+}
+
+static void
+tcp_fastopen_autokey_locked(void)
+{
+ uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+ arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
+ tcp_fastopen_addkey_locked(newkey);
+}
+
+static void
+tcp_fastopen_autokey_callout(void *arg)
+{
+ struct tcp_fastopen_callout *ctx = arg;
+
+ CURVNET_SET(ctx->v);
+ tcp_fastopen_autokey_locked();
+ callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
+ tcp_fastopen_autokey_callout, ctx);
+ CURVNET_RESTORE();
+}
+
+
+static uint64_t
+tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
+{
+ SIPHASH_CTX ctx;
+ uint64_t siphash;
+
+ SipHash24_Init(&ctx);
+ SipHash_SetKey(&ctx, key);
+ switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+ case 0:
+ SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
+ break;
+#endif
+#ifdef INET6
+ case INC_ISIPV6:
+ SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
+ break;
+#endif
+ }
+ SipHash_Final((u_int8_t *)&siphash, &ctx);
+
+ return (siphash);
+}
+
+
+/*
+ * Return values:
+ * -1 the cookie is invalid and no valid cookie is available
+ * 0 the cookie is invalid and the latest cookie has been returned
+ * 1 the cookie is valid and the latest cookie has been returned
+ */
+int
+tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+ unsigned int len, uint64_t *latest_cookie)
+{
+ struct rm_priotracker tracker;
+ unsigned int i, key_index;
+ uint64_t cur_cookie;
+
+ if (V_tcp_fastopen_acceptany) {
+ *latest_cookie = 0;
+ return (1);
+ }
+
+ if (len != TCP_FASTOPEN_COOKIE_LEN) {
+ if (V_tcp_fastopen_numkeys > 0) {
+ *latest_cookie =
+ tcp_fastopen_make_cookie(
+ V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
+ inc);
+ return (0);
+ }
+ return (-1);
+ }
+
+ /*
+ * Check against each available key, from newest to oldest.
+ */
+ TCP_FASTOPEN_KEYS_RLOCK(&tracker);
+ key_index = V_tcp_fastopen_keys.newest;
+ for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
+ cur_cookie =
+ tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
+ inc);
+ if (i == 0)
+ *latest_cookie = cur_cookie;
+ if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) {
+ TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+ return (1);
+ }
+ if (key_index == 0)
+ key_index = TCP_FASTOPEN_MAX_KEYS - 1;
+ else
+ key_index--;
+ }
+ TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+
+ return (0);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int new;
+
+ new = V_tcp_fastopen_autokey;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if (new > (INT_MAX / hz))
+ return (EINVAL);
+
+ TCP_FASTOPEN_KEYS_WLOCK();
+ if (V_tcp_fastopen_enabled) {
+ if (V_tcp_fastopen_autokey && !new)
+ callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+ else if (new)
+ callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+ new * hz, tcp_fastopen_autokey_callout,
+ &V_tcp_fastopen_autokey_ctx);
+ }
+ V_tcp_fastopen_autokey = new;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ }
+
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int new;
+
+ new = V_tcp_fastopen_enabled;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if (V_tcp_fastopen_enabled && !new) {
+ /* enabled -> disabled */
+ TCP_FASTOPEN_KEYS_WLOCK();
+ V_tcp_fastopen_numkeys = 0;
+ V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+ if (V_tcp_fastopen_autokey)
+ callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+ V_tcp_fastopen_enabled = 0;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ } else if (!V_tcp_fastopen_enabled && new) {
+ /* disabled -> enabled */
+ TCP_FASTOPEN_KEYS_WLOCK();
+ if (V_tcp_fastopen_autokey &&
+ (V_tcp_fastopen_numkeys == 0)) {
+ tcp_fastopen_autokey_locked();
+ callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+ V_tcp_fastopen_autokey * hz,
+ tcp_fastopen_autokey_callout,
+ &V_tcp_fastopen_autokey_ctx);
+ }
+ V_tcp_fastopen_enabled = 1;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ }
+ }
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+ if (req->oldptr != NULL || req->oldlen != 0)
+ return (EINVAL);
+ if (req->newptr == NULL)
+ return (EPERM);
+ if (req->newlen != sizeof(newkey))
+ return (EINVAL);
+ error = SYSCTL_IN(req, newkey, sizeof(newkey));
+ if (error)
+ return (error);
+
+ TCP_FASTOPEN_KEYS_WLOCK();
+ tcp_fastopen_addkey_locked(newkey);
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+
+ return (0);
+}
diff --git a/sys/netinet/tcp_fastopen.h b/sys/netinet/tcp_fastopen.h
new file mode 100644
index 000000000000..c64ba2c04d5d
--- /dev/null
+++ b/sys/netinet/tcp_fastopen.h
@@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _TCP_FASTOPEN_H_
+#define _TCP_FASTOPEN_H_
+
+#ifdef _KERNEL
+
+#define TCP_FASTOPEN_COOKIE_LEN 8 /* tied to SipHash24 64-bit output */
+
+VNET_DECLARE(unsigned int, tcp_fastopen_enabled);
+#define V_tcp_fastopen_enabled VNET(tcp_fastopen_enabled)
+
+void tcp_fastopen_init(void);
+void tcp_fastopen_destroy(void);
+unsigned int *tcp_fastopen_alloc_counter(void);
+void tcp_fastopen_decrement_counter(unsigned int *counter);
+int tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+ unsigned int len, uint64_t *latest_cookie);
+#endif /* _KERNEL */
+
+#endif /* _TCP_FASTOPEN_H_ */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 140cee771039..9aae6852a1b1 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -98,6 +98,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -999,7 +1002,8 @@ relocked:
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
#endif
if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
- (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
+ (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
+ !(tp->t_flags & TF_FASTOPEN)))) {
if (ti_locked == TI_UNLOCKED) {
if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
@@ -1091,6 +1095,9 @@ relocked:
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
+#ifdef TCP_RFC7413
+new_tfo_socket:
+#endif
if (so == NULL) {
/*
* We completed the 3-way handshake
@@ -1353,7 +1360,12 @@ relocked:
#endif
TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
tcp_dooptions(&to, optp, optlen, TO_SYN);
+#ifdef TCP_RFC7413
+ if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+ goto new_tfo_socket;
+#else
syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
+#endif
/*
* Entry added to syncache and mbuf consumed.
* Only the listen socket is unlocked by syncache_add().
@@ -1468,7 +1480,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct in_conninfo *inc;
struct mbuf *mfree;
struct tcpopt to;
-
+ int tfo_syn;
+
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
@@ -1921,6 +1934,28 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN) {
+ /*
+ * When a TFO connection is in SYN_RECEIVED, the
+ * only valid packets are the initial SYN, a
+ * retransmit/copy of the initial SYN (possibly with
+ * a subset of the original data), a valid ACK, a
+ * FIN, or a RST.
+ */
+ if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
+ rstreason = BANDLIM_RST_OPENPORT;
+ goto dropwithreset;
+ } else if (thflags & TH_SYN) {
+ /* non-initial SYN is ignored */
+ if ((tcp_timer_active(tp, TT_DELACK) ||
+ tcp_timer_active(tp, TT_REXMT)))
+ goto drop;
+ } else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
+ goto drop;
+ }
+ }
+#endif
break;
/*
@@ -2136,7 +2171,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* RFC5961 Section 4.2
* Send challenge ACK for any SYN in synchronized state.
*/
- if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
+ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
+ tp->t_state != TCPS_SYN_RECEIVED) {
KASSERT(ti_locked == TI_RLOCKED,
("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@@ -2330,9 +2366,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((thflags & TH_ACK) == 0) {
if (tp->t_state == TCPS_SYN_RECEIVED ||
- (tp->t_flags & TF_NEEDSYN))
+ (tp->t_flags & TF_NEEDSYN)) {
+#ifdef TCP_RFC7413
+ if (tp->t_state == TCPS_SYN_RECEIVED &&
+ tp->t_flags & TF_FASTOPEN) {
+ tp->snd_wnd = tiwin;
+ cc_conn_init(tp);
+ }
+#endif
goto step6;
- else if (tp->t_flags & TF_ACKNOW)
+ } else if (tp->t_flags & TF_ACKNOW)
goto dropafterack;
else
goto drop;
@@ -2371,7 +2414,27 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_state_change(tp, TCPS_ESTABLISHED);
TCP_PROBE5(accept__established, NULL, tp,
mtod(m, const char *), tp, th);
- cc_conn_init(tp);
+#ifdef TCP_RFC7413
+ if (tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+
+ /*
+ * Account for the ACK of our SYN prior to
+ * regular ACK processing below.
+ */
+ tp->snd_una++;
+ }
+ /*
+ * TFO connections call cc_conn_init() during SYN
+ * processing. Calling it again here for such
+ * connections is not harmless as it would undo the
+ * snd_cwnd reduction that occurs when a TFO SYN|ACK
+ * is retransmitted.
+ */
+ if (!(tp->t_flags & TF_FASTOPEN))
+#endif
+ cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
}
/*
@@ -2919,7 +2982,9 @@ dodata: /* XXX */
* case PRU_RCVD). If a FIN has already been received on this
* connection then we just ignore the text.
*/
- if ((tlen || (thflags & TH_FIN)) &&
+ tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags & TF_FASTOPEN));
+ if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
m_adj(m, drop_hdrlen); /* delayed header drop */
@@ -2937,8 +3002,9 @@ dodata: /* XXX */
*/
if (th->th_seq == tp->rcv_nxt &&
LIST_EMPTY(&tp->t_segq) &&
- TCPS_HAVEESTABLISHED(tp->t_state)) {
- if (DELAY_ACK(tp, tlen))
+ (TCPS_HAVEESTABLISHED(tp->t_state) ||
+ tfo_syn)) {
+ if (DELAY_ACK(tp, tlen) || tfo_syn)
tp->t_flags |= TF_DELACK;
else
tp->t_flags |= TF_ACKNOW;
@@ -3293,6 +3359,21 @@ tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
to->to_sacks = cp + 2;
TCPSTAT_INC(tcps_sack_rcv_blocks);
break;
+#ifdef TCP_RFC7413
+ case TCPOPT_FAST_OPEN:
+ if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) &&
+ (optlen < TCPOLEN_FAST_OPEN_MIN) &&
+ (optlen > TCPOLEN_FAST_OPEN_MAX))
+ continue;
+ if (!(flags & TO_SYN))
+ continue;
+ if (!V_tcp_fastopen_enabled)
+ continue;
+ to->to_flags |= TOF_FASTOPEN;
+ to->to_tfo_len = optlen - 2;
+ to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
+ break;
+#endif
default:
continue;
}
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index d295dcfbcd6b..482ead5a329b 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -68,6 +68,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#define TCPOUTFLAGS
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
@@ -204,6 +207,17 @@ tcp_output(struct tcpcb *tp)
return (tcp_offload_output(tp));
#endif
+#ifdef TCP_RFC7413
+ /*
+ * For TFO connections in SYN_RECEIVED, only allow the initial
+ * SYN|ACK and those sent by the retransmit timer.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED) &&
+ SEQ_GT(tp->snd_max, tp->snd_una) && /* inital SYN|ACK sent */
+ (tp->snd_nxt != tp->snd_una)) /* not a retransmit */
+ return (0);
+#endif
/*
* Determine length of data that should be transmitted,
* and flags that will be used.
@@ -390,6 +404,15 @@ after_sack_rexmit:
if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
if (tp->t_state != TCPS_SYN_RECEIVED)
flags &= ~TH_SYN;
+#ifdef TCP_RFC7413
+ /*
+ * When sending additional segments following a TFO SYN|ACK,
+ * do not include the SYN bit.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ flags &= ~TH_SYN;
+#endif
off--, len++;
}
@@ -403,6 +426,17 @@ after_sack_rexmit:
flags &= ~TH_FIN;
}
+#ifdef TCP_RFC7413
+ /*
+ * When retransmitting SYN|ACK on a passively-created TFO socket,
+ * don't include data, as the presence of data may have caused the
+ * original SYN|ACK to have been dropped by a middlebox.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) ||
+ (flags & TH_RST)))
+ len = 0;
+#endif
if (len <= 0) {
/*
* If FIN has been sent but not acked,
@@ -725,6 +759,22 @@ send:
tp->snd_nxt = tp->iss;
to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
to.to_flags |= TOF_MSS;
+#ifdef TCP_RFC7413
+ /*
+ * Only include the TFO option on the first
+ * transmission of the SYN|ACK on a
+ * passively-created TFO socket, as the presence of
+ * the TFO option may have caused the original
+ * SYN|ACK to have been dropped by a middlebox.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_rxtshift == 0)) {
+ to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+ to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
+ to.to_flags |= TOF_FASTOPEN;
+ }
+#endif
}
/* Window scaling. */
if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
@@ -1004,7 +1054,7 @@ send:
* give data to the user when a buffer fills or
* a PUSH comes in.)
*/
- if (off + len == sbused(&so->so_snd))
+ if ((off + len == sbused(&so->so_snd)) && !(flags & TH_SYN))
flags |= TH_PUSH;
SOCKBUF_UNLOCK(&so->so_snd);
} else {
@@ -1711,6 +1761,25 @@ tcp_addoptions(struct tcpopt *to, u_char *optp)
TCPSTAT_INC(tcps_sack_send_blocks);
break;
}
+#ifdef TCP_RFC7413
+ case TOF_FASTOPEN:
+ {
+ int total_len;
+
+ /* XXX is there any point to aligning this option? */
+ total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
+ if (TCP_MAXOLEN - optlen < total_len)
+ continue;
+ *optp++ = TCPOPT_FAST_OPEN;
+ *optp++ = total_len;
+ if (to->to_tfo_len > 0) {
+ bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
+ optp += to->to_tfo_len;
+ }
+ optlen += total_len;
+ break;
+ }
+#endif
default:
panic("%s: unknown TCP option type", __func__);
break;
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 045d516b383c..c2e0696394a5 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -84,6 +84,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/nd6.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -704,6 +707,10 @@ tcp_init(void)
#ifdef TCPPCAP
tcp_pcap_init();
#endif
+
+#ifdef TCP_RFC7413
+ tcp_fastopen_init();
+#endif
}
#ifdef VIMAGE
@@ -712,6 +719,9 @@ tcp_destroy(void)
{
int error;
+#ifdef TCP_RFC7413
+ tcp_fastopen_destroy();
+#endif
tcp_hc_destroy();
syncache_destroy();
tcp_tw_destroy();
@@ -1439,6 +1449,17 @@ tcp_close(struct tcpcb *tp)
if (tp->t_state == TCPS_LISTEN)
tcp_offload_listen_stop(tp);
#endif
+#ifdef TCP_RFC7413
+ /*
+ * This releases the TFO pending counter resource for TFO listen
+ * sockets as well as passively-created TFO sockets that transition
+ * from SYN_RECEIVED to CLOSED.
+ */
+ if (tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+ }
+#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 8ad1b2232235..940d3de237ce 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -81,6 +81,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_pcb.h>
#endif
#include <netinet/tcp.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -1083,6 +1086,39 @@ failed:
return (0);
}
+#ifdef TCP_RFC7413
+static void
+syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
+ uint64_t response_cookie)
+{
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ unsigned int *pending_counter;
+
+ /*
+ * Global TCP locks are held because we manipulate the PCB lists
+ * and create a new socket.
+ */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+ pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
+ *lsop = syncache_socket(sc, *lsop, m);
+ if (*lsop == NULL) {
+ TCPSTAT_INC(tcps_sc_aborted);
+ atomic_subtract_int(pending_counter, 1);
+ } else {
+ inp = sotoinpcb(*lsop);
+ tp = intotcpcb(inp);
+ tp->t_flags |= TF_FASTOPEN;
+ tp->t_tfo_cookie = response_cookie;
+ tp->snd_max = tp->iss;
+ tp->snd_nxt = tp->iss;
+ tp->t_tfo_pending = pending_counter;
+ TCPSTAT_INC(tcps_sc_completed);
+ }
+}
+#endif /* TCP_RFC7413 */
+
/*
* Given a LISTEN socket and an inbound SYN request, add
* this to the syn cache, and send back a segment:
@@ -1095,8 +1131,15 @@ failed:
* DoS attack, an attacker could send data which would eventually
* consume all available buffer space if it were ACKed. By not ACKing
* the data, we avoid this DoS scenario.
+ *
+ * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
+ * cookie is processed, V_tcp_fastopen_enabled set to true, and the
+ * TCP_FASTOPEN socket option is set. In this case, a new socket is created
+ * and returned via lsop, the mbuf is not freed so that tcp_input() can
+ * queue its data to the socket, and 1 is returned to indicate the
+ * TFO-socket-creation path was taken.
*/
-void
+int
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
void *todctx)
@@ -1109,6 +1152,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
u_int ltflags;
int win, sb_hiwat, ip_ttl, ip_tos;
char *s;
+ int rv = 0;
#ifdef INET6
int autoflowlabel = 0;
#endif
@@ -1117,6 +1161,11 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#endif
struct syncache scs;
struct ucred *cred;
+#ifdef TCP_RFC7413
+ uint64_t tfo_response_cookie;
+ int tfo_cookie_valid = 0;
+ int tfo_response_cookie_valid = 0;
+#endif
INP_WLOCK_ASSERT(inp); /* listen socket */
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
@@ -1141,6 +1190,29 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sb_hiwat = so->so_rcv.sb_hiwat;
ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
+#ifdef TCP_RFC7413
+ if (V_tcp_fastopen_enabled && (tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) {
+ /*
+ * Limit the number of pending TFO connections to
+ * approximately half of the queue limit. This prevents TFO
+ * SYN floods from starving the service by filling the
+ * listen queue with bogus TFO connections.
+ */
+ if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
+ (so->so_qlimit / 2)) {
+ int result;
+
+ result = tcp_fastopen_check_cookie(inc,
+ to->to_tfo_cookie, to->to_tfo_len,
+ &tfo_response_cookie);
+ tfo_cookie_valid = (result > 0);
+ tfo_response_cookie_valid = (result >= 0);
+ } else
+ atomic_subtract_int(tp->t_tfo_pending, 1);
+ }
+#endif
+
/* By the time we drop the lock these should no longer be used. */
so = NULL;
tp = NULL;
@@ -1152,7 +1224,10 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
} else
mac_syncache_create(maclabel, inp);
#endif
- INP_WUNLOCK(inp);
+#ifdef TCP_RFC7413
+ if (!tfo_cookie_valid)
+#endif
+ INP_WUNLOCK(inp);
/*
* Remember the IP options, if any.
@@ -1181,6 +1256,10 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc = syncache_lookup(inc, &sch); /* returns locked entry */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid)
+ INP_WUNLOCK(inp);
+#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
/*
@@ -1223,6 +1302,14 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
goto done;
}
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ bzero(&scs, sizeof(scs));
+ sc = &scs;
+ goto skip_alloc;
+ }
+#endif
+
sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
/*
@@ -1246,7 +1333,13 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
}
}
-
+
+#ifdef TCP_RFC7413
+skip_alloc:
+ if (!tfo_cookie_valid && tfo_response_cookie_valid)
+ sc->sc_tfo_cookie = &tfo_response_cookie;
+#endif
+
/*
* Fill in the syncache values.
*/
@@ -1354,6 +1447,15 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#endif
SCH_UNLOCK(sch);
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ syncache_tfo_expand(sc, lsop, m, tfo_response_cookie);
+ /* INP_WUNLOCK(inp) will be performed by the called */
+ rv = 1;
+ goto tfo_done;
+ }
+#endif
+
/*
* Do a standard 3-way handshake.
*/
@@ -1371,17 +1473,20 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
done:
+ if (m) {
+ *lsop = NULL;
+ m_freem(m);
+ }
+#ifdef TCP_RFC7413
+tfo_done:
+#endif
if (cred != NULL)
crfree(cred);
#ifdef MAC
if (sc == &scs)
mac_syncache_destroy(&maclabel);
#endif
- if (m) {
-
- *lsop = NULL;
- m_freem(m);
- }
+ return (rv);
}
static int
@@ -1533,6 +1638,16 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked)
}
}
#endif
+
+#ifdef TCP_RFC7413
+ if (sc->sc_tfo_cookie) {
+ to.to_flags |= TOF_FASTOPEN;
+ to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+ to.to_tfo_cookie = sc->sc_tfo_cookie;
+ /* don't send cookie again when retransmitting response */
+ sc->sc_tfo_cookie = NULL;
+ }
+#endif
optlen = tcp_addoptions(&to, (u_char *)(th + 1));
/* Adjust headers by option size. */
diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h
index c0c854da8c40..9fcbbec6414a 100644
--- a/sys/netinet/tcp_syncache.h
+++ b/sys/netinet/tcp_syncache.h
@@ -41,7 +41,7 @@ void syncache_destroy(void);
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
-void syncache_add(struct in_conninfo *, struct tcpopt *,
+int syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
@@ -74,7 +74,9 @@ struct syncache {
#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */
-
+#ifdef TCP_RFC7413
+ void *sc_tfo_cookie; /* for TCP Fast Open response */
+#endif
void *sc_pspare; /* TCP_SIGNATURE */
u_int32_t sc_spare[2]; /* UTO */
};
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 6a24ce781c20..9767eb709056 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -643,7 +643,8 @@ tcp_timer_rexmt(void * xtp)
} else
tp->t_flags &= ~TF_PREVVALID;
TCPSTAT_INC(tcps_rexmttimeo);
- if (tp->t_state == TCPS_SYN_SENT)
+ if ((tp->t_state == TCPS_SYN_SENT) ||
+ (tp->t_state == TCPS_SYN_RECEIVED))
rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 42e2ea79f50a..3435668dd8cb 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -82,6 +82,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -405,6 +408,10 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
}
SOCK_UNLOCK(so);
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN)
+ tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
out:
TCPDEBUG2(PRU_LISTEN);
TCP_PROBE2(debug__user, tp, PRU_LISTEN);
@@ -451,6 +458,10 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
}
SOCK_UNLOCK(so);
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN)
+ tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
out:
TCPDEBUG2(PRU_LISTEN);
TCP_PROBE2(debug__user, tp, PRU_LISTEN);
@@ -805,6 +816,18 @@ tcp_usr_rcvd(struct socket *so, int flags)
}
tp = intotcpcb(inp);
TCPDEBUG1();
+#ifdef TCP_RFC7413
+ /*
+ * For passively-created TFO connections, don't attempt a window
+ * update while still in SYN_RECEIVED as this may trigger an early
+ * SYN|ACK. It is preferable to have the SYN|ACK be sent along with
+ * application response data, or failing that, when the DELACK timer
+ * expires.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ goto out;
+#endif
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
tcp_offload_rcvd(tp);
@@ -1674,6 +1697,29 @@ unlock_and_done:
goto unlock_and_done;
#endif
+#ifdef TCP_RFC7413
+ case TCP_FASTOPEN:
+ INP_WUNLOCK(inp);
+ if (!V_tcp_fastopen_enabled)
+ return (EPERM);
+
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ return (error);
+
+ INP_WLOCK_RECHECK(inp);
+ if (optval) {
+ tp->t_flags |= TF_FASTOPEN;
+ if ((tp->t_state == TCPS_LISTEN) &&
+ (tp->t_tfo_pending == NULL))
+ tp->t_tfo_pending =
+ tcp_fastopen_alloc_counter();
+ } else
+ tp->t_flags &= ~TF_FASTOPEN;
+ goto unlock_and_done;
+#endif
+
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -1753,6 +1799,14 @@ unlock_and_done:
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
#endif
+
+#ifdef TCP_RFC7413
+ case TCP_FASTOPEN:
+ optval = tp->t_flags & TF_FASTOPEN;
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof optval);
+ break;
+#endif
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -2076,6 +2130,10 @@ db_print_tflags(u_int t_flags)
db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_FASTOPEN) {
+ db_printf("%sTF_FASTOPEN", comma ? ", " : "");
+ comma = 1;
+ }
}
static void
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 2b3954a30174..8d76912275a1 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -253,10 +253,20 @@ struct tcpcb {
u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */
u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */
u_int t_flags2; /* More tcpcb flags storage */
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+ uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */
+ uint64_t t_tfo_cookie; /* TCP Fast Open cookie */
+#else
uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
+#endif
struct tcp_function_block *t_fb;/* TCP function call block */
void *t_fb_ptr; /* Pointer to t_fb specific data */
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+ unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */
+ void *t_pspare2[1]; /* 1 TCP_SIGNATURE */
+#else
void *t_pspare2[2]; /* 1 TCP_SIGNATURE, 1 TBD */
+#endif
#if defined(_KERNEL) && defined(TCPPCAP)
struct mbufq t_inpkts; /* List of saved input packets. */
struct mbufq t_outpkts; /* List of saved output packets. */
@@ -302,6 +312,7 @@ struct tcpcb {
#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
+#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
#define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY)
#define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY
@@ -361,14 +372,17 @@ struct tcpopt {
#define TOF_TS 0x0010 /* timestamp */
#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
#define TOF_SACK 0x0080 /* Peer sent SACK option */
-#define TOF_MAXOPT 0x0100
+#define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */
+#define TOF_MAXOPT 0x0200
u_int32_t to_tsval; /* new timestamp */
u_int32_t to_tsecr; /* reflected timestamp */
u_char *to_sacks; /* pointer to the first SACK blocks */
u_char *to_signature; /* pointer to the TCP-MD5 signature */
+ u_char *to_tfo_cookie; /* pointer to the TFO cookie */
u_int16_t to_mss; /* maximum segment size */
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
+ u_int8_t to_tfo_len; /* TFO cookie length */
u_int32_t to_spare; /* UTO */
};