aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorKristof Provost <kp@FreeBSD.org>2021-05-20 09:54:41 +0000
committerKristof Provost <kp@FreeBSD.org>2021-07-27 07:42:25 +0000
commitc3d03672e119df47a43014a212d65983ae2cf230 (patch)
tree5f3ebd13949aec207a2d05c1c39191a5103dba01 /sys
parent0df576d98e15bbafa73522a099bf0f34990496b4 (diff)
downloadsrc-c3d03672e119df47a43014a212d65983ae2cf230.tar.gz
src-c3d03672e119df47a43014a212d65983ae2cf230.zip
pf: syncookie support
Import OpenBSD's syncookie support for pf. This feature help pf resist TCP SYN floods by only creating states once the remote host completes the TCP handshake rather than when the initial SYN packet is received. This is accomplished by using the initial sequence numbers to encode a cookie (hence the name) in the SYN+ACK response and verifying this on receipt of the client ACK. Reviewed by: kbowling Obtained from: OpenBSD MFC after: 1 week Sponsored by: Modirum MDPay Differential Revision: https://reviews.freebsd.org/D31138 (cherry picked from commit 8e1864ed07121b479b95d7e3a5931a9e0ffd4713)
Diffstat (limited to 'sys')
-rw-r--r--sys/modules/pf/Makefile2
-rw-r--r--sys/net/pfvar.h34
-rw-r--r--sys/netinet/tcp.h2
-rw-r--r--sys/netpfil/pf/pf.c129
-rw-r--r--sys/netpfil/pf/pf.h3
-rw-r--r--sys/netpfil/pf/pf_ioctl.c3
-rw-r--r--sys/netpfil/pf/pf_mtag.h1
-rw-r--r--sys/netpfil/pf/pf_syncookies.c350
8 files changed, 492 insertions, 32 deletions
diff --git a/sys/modules/pf/Makefile b/sys/modules/pf/Makefile
index 7293b30cda9d..d361ea0802fb 100644
--- a/sys/modules/pf/Makefile
+++ b/sys/modules/pf/Makefile
@@ -4,7 +4,7 @@
KMOD= pf
SRCS= pf.c pf_if.c pf_lb.c pf_osfp.c pf_ioctl.c pf_norm.c pf_table.c \
- pf_ruleset.c pf_nv.c in4_cksum.c \
+ pf_ruleset.c pf_nv.c pf_syncookies.c in4_cksum.c \
bus_if.h device_if.h \
opt_pf.h opt_inet.h opt_inet6.h opt_bpf.h opt_sctp.h opt_global.h
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 73f3168aa31f..50634f39a549 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1132,6 +1132,12 @@ struct pf_pdesc {
counter_u64_add(V_pf_status.counters[x], 1); \
} while (0)
+enum pf_syncookies_mode {
+ PF_SYNCOOKIES_NEVER = 0,
+ PF_SYNCOOKIES_ALWAYS = 1,
+ PF_SYNCOOKIES_MODE_MAX = PF_SYNCOOKIES_ALWAYS
+};
+
struct pf_kstatus {
counter_u64_t counters[PFRES_MAX]; /* reason for passing/dropping */
counter_u64_t lcounters[LCNT_MAX]; /* limit counters */
@@ -1146,6 +1152,8 @@ struct pf_kstatus {
char ifname[IFNAMSIZ];
uint8_t pf_chksum[PF_MD5_DIGEST_LENGTH];
bool keep_counters;
+ enum pf_syncookies_mode syncookies_mode;
+ bool syncookies_active;
};
struct pf_divert {
@@ -1486,6 +1494,8 @@ struct pfioc_iface {
#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill)
#define DIOCKEEPCOUNTERS _IOWR('D', 92, struct pfioc_nv)
#define DIOCGETSTATESV2 _IOWR('D', 93, struct pfioc_states_v2)
+#define DIOCGETSYNCOOKIES _IOWR('D', 94, struct pfioc_nv)
+#define DIOCSETSYNCOOKIES _IOWR('D', 95, struct pfioc_nv)
struct pf_ifspeed_v0 {
char ifname[IFNAMSIZ];
@@ -1816,6 +1826,30 @@ int pf_addr_cmp(struct pf_addr *, struct pf_addr *,
sa_family_t);
void pf_qid2qname(u_int32_t, char *);
+u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, sa_family_t);
+u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t);
+struct mbuf *pf_build_tcp(const struct pf_krule *, sa_family_t,
+ const struct pf_addr *, const struct pf_addr *,
+ u_int16_t, u_int16_t, u_int32_t, u_int32_t,
+ u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
+ u_int16_t);
+void pf_send_tcp(const struct pf_krule *, sa_family_t,
+ const struct pf_addr *, const struct pf_addr *,
+ u_int16_t, u_int16_t, u_int32_t, u_int32_t,
+ u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
+ u_int16_t);
+
+void pf_syncookies_init(void);
+int pf_syncookies_setmode(u_int8_t);
+int pf_get_syncookies(struct pfioc_nv *);
+int pf_set_syncookies(struct pfioc_nv *);
+int pf_synflood_check(struct pf_pdesc *);
+void pf_syncookie_send(struct mbuf *m, int off,
+ struct pf_pdesc *);
+u_int8_t pf_syncookie_validate(struct pf_pdesc *);
+struct mbuf * pf_syncookie_recreate_syn(uint8_t, int,
+ struct pf_pdesc *);
+
VNET_DECLARE(struct pf_kstatus, pf_status);
#define V_pf_status VNET(pf_status)
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index 7273cb5104ea..4f062b31a051 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -105,6 +105,8 @@ struct tcphdr {
#define TCPOPT_FAST_OPEN 34
#define TCPOLEN_FAST_OPEN_EMPTY 2
+#define MAX_TCPOPTLEN 40 /* Absolute maximum TCP options len */
+
/* Miscellaneous constants */
#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */
#define TCP_MAX_SACK 4 /* MAX # SACKs sent in any segment */
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index a08f38f3a286..4005e453cfb7 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -243,11 +243,6 @@ static void pf_change_icmp(struct pf_addr *, u_int16_t *,
struct pf_addr *, struct pf_addr *, u_int16_t,
u_int16_t *, u_int16_t *, u_int16_t *,
u_int16_t *, u_int8_t, sa_family_t);
-static void pf_send_tcp(const struct pf_krule *, sa_family_t,
- const struct pf_addr *, const struct pf_addr *,
- u_int16_t, u_int16_t, u_int32_t, u_int32_t,
- u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
- u_int16_t);
static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
sa_family_t, struct pf_krule *);
static void pf_detach_state(struct pf_kstate *);
@@ -289,10 +284,6 @@ static int pf_test_state_icmp(struct pf_kstate **, int,
void *, struct pf_pdesc *, u_short *);
static int pf_test_state_other(struct pf_kstate **, int,
struct pfi_kkif *, struct mbuf *, struct pf_pdesc *);
-static u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t,
- sa_family_t);
-static u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t,
- sa_family_t);
static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
int, u_int16_t);
static int pf_check_proto_cksum(struct mbuf *, int, int,
@@ -2460,14 +2451,13 @@ pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
return (copyback);
}
-static void
-pf_send_tcp(const struct pf_krule *r, sa_family_t af,
+struct mbuf *
+pf_build_tcp(const struct pf_krule *r, sa_family_t af,
const struct pf_addr *saddr, const struct pf_addr *daddr,
u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
u_int16_t rtag)
{
- struct pf_send_entry *pfse;
struct mbuf *m;
int len, tlen;
#ifdef INET
@@ -2503,22 +2493,16 @@ pf_send_tcp(const struct pf_krule *r, sa_family_t af,
panic("%s: unsupported af %d", __func__, af);
}
- /* Allocate outgoing queue entry, mbuf and mbuf tag. */
- pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
- if (pfse == NULL)
- return;
m = m_gethdr(M_NOWAIT, MT_DATA);
- if (m == NULL) {
- free(pfse, M_PFTEMP);
- return;
- }
+ if (m == NULL)
+ return (NULL);
+
#ifdef MAC
mac_netinet_firewall_send(m);
#endif
if ((pf_mtag = pf_get_mtag(m)) == NULL) {
- free(pfse, M_PFTEMP);
m_freem(m);
- return;
+ return (NULL);
}
if (tag)
m->m_flags |= M_SKIP_FIREWALL;
@@ -2599,8 +2583,6 @@ pf_send_tcp(const struct pf_krule *r, sa_family_t af,
h->ip_len = htons(len);
h->ip_ttl = ttl ? ttl : V_ip_defttl;
h->ip_sum = 0;
-
- pfse->pfse_type = PFSE_IP;
break;
#endif /* INET */
#ifdef INET6
@@ -2611,11 +2593,48 @@ pf_send_tcp(const struct pf_krule *r, sa_family_t af,
h6->ip6_vfc |= IPV6_VERSION;
h6->ip6_hlim = IPV6_DEFHLIM;
+ break;
+#endif /* INET6 */
+ }
+
+ return (m);
+}
+
+void
+pf_send_tcp(const struct pf_krule *r, sa_family_t af,
+ const struct pf_addr *saddr, const struct pf_addr *daddr,
+ u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
+ u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
+ u_int16_t rtag)
+{
+ struct pf_send_entry *pfse;
+ struct mbuf *m;
+
+ m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, flags,
+ win, mss, ttl, tag, rtag);
+ if (m == NULL)
+ return;
+
+ /* Allocate outgoing queue entry, mbuf and mbuf tag. */
+ pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
+ if (pfse == NULL) {
+ m_freem(m);
+ return;
+ }
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ pfse->pfse_type = PFSE_IP;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
pfse->pfse_type = PFSE_IP6;
break;
#endif /* INET6 */
}
+
pfse->pfse_m = m;
pf_send(pfse);
}
@@ -3190,7 +3209,7 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m)
return (1);
}
-static u_int8_t
+u_int8_t
pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
{
int hlen;
@@ -3230,7 +3249,7 @@ pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
return (wscale);
}
-static u_int16_t
+u_int16_t
pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
{
int hlen;
@@ -6047,6 +6066,18 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *
goto done;
}
pd.p_len = pd.tot_len - off - (pd.hdr.tcp.th_off << 2);
+
+ pd.sport = &pd.hdr.tcp.th_sport;
+ pd.dport = &pd.hdr.tcp.th_dport;
+
+ /* Respond to SYN with a syncookie. */
+ if ((pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_SYN &&
+ pd.dir == PF_IN && pf_synflood_check(&pd)) {
+ pf_syncookie_send(m, off, &pd);
+ action = PF_DROP;
+ break;
+ }
+
if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0)
pqid = 1;
action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
@@ -6060,9 +6091,49 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *
r = s->rule.ptr;
a = s->anchor.ptr;
log = s->log;
- } else if (s == NULL)
- action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
- &a, &ruleset, inp);
+ } else if (s == NULL) {
+ /* Validate remote SYN|ACK, re-create original SYN if
+ * valid. */
+ if ((pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) ==
+ TH_ACK && pf_syncookie_validate(&pd) &&
+ pd.dir == PF_IN) {
+ struct mbuf *msyn;
+
+ msyn = pf_syncookie_recreate_syn(h->ip_ttl,
+ off,&pd);
+ if (msyn == NULL) {
+ action = PF_DROP;
+ break;
+ }
+
+ action = pf_test(dir, pflags, ifp, &msyn, inp);
+ m_freem(msyn);
+
+ if (action == PF_PASS) {
+ action = pf_test_state_tcp(&s, dir,
+ kif, m, off, h, &pd, &reason);
+ if (action != PF_PASS || s == NULL) {
+ action = PF_DROP;
+ break;
+ }
+
+ s->src.seqhi = ntohl(pd.hdr.tcp.th_ack)
+ - 1;
+ s->src.seqlo = ntohl(pd.hdr.tcp.th_seq)
+ - 1;
+ s->src.state = PF_TCPS_PROXY_DST;
+
+ action = pf_synproxy(&pd, &s, &reason);
+ if (action != PF_PASS)
+ break;
+ }
+ break;
+ }
+ else {
+ action = pf_test_rule(&r, &s, dir, kif, m, off,
+ &pd, &a, &ruleset, inp);
+ }
+ }
break;
}
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index 011d69a746dd..869e8e32eb5c 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -159,7 +159,8 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
#define LCNT_SRCCONNRATE 4 /* max-src-conn-rate */
#define LCNT_OVERLOAD_TABLE 5 /* entry added to overload table */
#define LCNT_OVERLOAD_FLUSH 6 /* state entries flushed */
-#define LCNT_MAX 7 /* total+1 */
+#define LCNT_SYNCOOKIES_VALID 7 /* syncookies validated */ /* XXX TODO: Ensure no API breakage! */
+#define LCNT_MAX 8 /* total+1 */
#define LCNT_NAMES { \
"max states per rule", \
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index c9105bf22385..d5b33a48a57c 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -285,6 +285,7 @@ pfattach_vnet(void)
pfr_initialize();
pfi_initialize_vnet();
pf_normalize_init();
+ pf_syncookies_init();
V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
@@ -5525,7 +5526,7 @@ pf_load(void)
{
int error;
- rm_init(&pf_rules_lock, "pf rulesets");
+ rm_init_flags(&pf_rules_lock, "pf rulesets", RM_RECURSE);
sx_init(&pf_ioctl_lock, "pf ioctl");
sx_init(&pf_end_lock, "pf end thread");
diff --git a/sys/netpfil/pf/pf_mtag.h b/sys/netpfil/pf/pf_mtag.h
index ad28ab7a7c30..2135c9e69dbd 100644
--- a/sys/netpfil/pf/pf_mtag.h
+++ b/sys/netpfil/pf/pf_mtag.h
@@ -43,6 +43,7 @@
#define PF_FASTFWD_OURS_PRESENT 0x10
#define PF_REASSEMBLED 0x20
#define PF_DUPLICATED 0x40
+#define PF_TAG_SYNCOOKIE_RECREATED 0x80
struct pf_mtag {
void *hdr; /* saved hdr pos in mbuf, for ECN */
diff --git a/sys/netpfil/pf/pf_syncookies.c b/sys/netpfil/pf/pf_syncookies.c
new file mode 100644
index 000000000000..0071bab0d7d5
--- /dev/null
+++ b/sys/netpfil/pf/pf_syncookies.c
@@ -0,0 +1,350 @@
+/* $OpenBSD: pf_syncookies.c,v 1.7 2018/09/10 15:54:28 henning Exp $ */
+
+/* Copyright (c) 2016,2017 Henning Brauer <henning@openbsd.org>
+ * Copyright (c) 2016 Alexandr Nedvedicky <sashan@openbsd.org>
+ *
+ * syncookie parts based on FreeBSD sys/netinet/tcp_syncache.c
+ *
+ * Copyright (c) 2001 McAfee, Inc.
+ * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Jonathan Lemon
+ * and McAfee Research, the Security Research Division of McAfee, Inc. under
+ * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
+ * DARPA CHATS research program. [2001 McAfee, Inc.]
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * when we're under synflood, we use syncookies to prevent state table
+ * exhaustion. Trigger for the synflood mode is the number of half-open
+ * connections in the state table.
+ * We leave synflood mode when the number of half-open states - including
+ * in-flight syncookies - drops far enough again
+ */
+
+/*
+ * syncookie enabled Initial Sequence Number:
+ * 24 bit MAC
+ * 3 bit WSCALE index
+ * 3 bit MSS index
+ * 1 bit SACK permitted
+ * 1 bit odd/even secret
+ *
+ * References:
+ * RFC4987 TCP SYN Flooding Attacks and Common Mitigations
+ * http://cr.yp.to/syncookies.html (overview)
+ * http://cr.yp.to/syncookies/archive (details)
+ */
+
+//#include "pflog.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/filio.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/syslog.h>
+
+#include <crypto/siphash/siphash.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+
+#include <net/pfvar.h>
+
+#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
+
+union pf_syncookie {
+ uint8_t cookie;
+ struct {
+ uint8_t oddeven:1,
+ sack_ok:1,
+ wscale_idx:3,
+ mss_idx:3;
+ } flags;
+};
+
+#define PF_SYNCOOKIE_SECRET_SIZE SIPHASH_KEY_LENGTH
+#define PF_SYNCOOKIE_SECRET_LIFETIME 15 /* seconds */
+
+/* Protected by PF_RULES_xLOCK. */
+struct pf_syncookie_status {
+ struct callout keytimeout;
+ uint8_t oddeven;
+ uint8_t key[2][SIPHASH_KEY_LENGTH];
+};
+VNET_DEFINE_STATIC(struct pf_syncookie_status, pf_syncookie_status);
+#define V_pf_syncookie_status VNET(pf_syncookie_status)
+
+void pf_syncookie_rotate(void *);
+void pf_syncookie_newkey(void);
+uint32_t pf_syncookie_mac(struct pf_pdesc *, union pf_syncookie,
+ uint32_t);
+uint32_t pf_syncookie_generate(struct mbuf *m, int off, struct pf_pdesc *,
+ uint16_t);
+
+void
+pf_syncookies_init(void)
+{
+ callout_init(&V_pf_syncookie_status.keytimeout, 1);
+ PF_RULES_WLOCK();
+ pf_syncookies_setmode(PF_SYNCOOKIES_NEVER);
+ PF_RULES_WUNLOCK();
+}
+
+int
+pf_syncookies_setmode(u_int8_t mode)
+{
+ if (mode > PF_SYNCOOKIES_MODE_MAX)
+ return (EINVAL);
+
+ if (V_pf_status.syncookies_mode == mode)
+ return (0);
+
+ V_pf_status.syncookies_mode = mode;
+ if (V_pf_status.syncookies_mode == PF_SYNCOOKIES_ALWAYS) {
+ pf_syncookie_newkey();
+ V_pf_status.syncookies_active = true;
+ }
+ return (0);
+}
+
+int
+pf_synflood_check(struct pf_pdesc *pd)
+{
+ MPASS(pd->proto == IPPROTO_TCP);
+ PF_RULES_RASSERT();
+
+ if (pd->pf_mtag && (pd->pf_mtag->tag & PF_TAG_SYNCOOKIE_RECREATED))
+ return (0);
+
+ return (V_pf_status.syncookies_mode);
+}
+
+void
+pf_syncookie_send(struct mbuf *m, int off, struct pf_pdesc *pd)
+{
+ uint16_t mss;
+ uint32_t iss;
+
+ mss = max(V_tcp_mssdflt, pf_get_mss(m, off, pd->hdr.tcp.th_off, pd->af));
+ iss = pf_syncookie_generate(m, off, pd, mss);
+ pf_send_tcp(NULL, pd->af, pd->dst, pd->src, *pd->dport, *pd->sport,
+ iss, ntohl(pd->hdr.tcp.th_seq) + 1, TH_SYN|TH_ACK, 0, mss,
+ 0, 1, 0);
+}
+
+uint8_t
+pf_syncookie_validate(struct pf_pdesc *pd)
+{
+ uint32_t hash, ack, seq;
+ union pf_syncookie cookie;
+
+ MPASS(pd->proto == IPPROTO_TCP);
+ PF_RULES_RASSERT();
+
+ seq = ntohl(pd->hdr.tcp.th_seq) - 1;
+ ack = ntohl(pd->hdr.tcp.th_ack) - 1;
+ cookie.cookie = (ack & 0xff) ^ (ack >> 24);
+
+ hash = pf_syncookie_mac(pd, cookie, seq);
+ if ((ack & ~0xff) != (hash & ~0xff))
+ return (0);
+
+ counter_u64_add(V_pf_status.lcounters[LCNT_SYNCOOKIES_VALID], 1);
+ return (1);
+}
+
+/*
+ * all following functions private
+ */
+void
+pf_syncookie_rotate(void *arg)
+{
+ CURVNET_SET((struct vnet *)arg);
+
+ /* do we want to disable syncookies? */
+ if (V_pf_status.syncookies_active) {
+ V_pf_status.syncookies_active = false;
+ DPFPRINTF(PF_DEBUG_MISC, ("syncookies disabled"));
+ }
+
+ /* nothing in flight any more? delete keys and return */
+ if (!V_pf_status.syncookies_active) {
+ memset(V_pf_syncookie_status.key[0], 0,
+ PF_SYNCOOKIE_SECRET_SIZE);
+ memset(V_pf_syncookie_status.key[1], 0,
+ PF_SYNCOOKIE_SECRET_SIZE);
+ CURVNET_RESTORE();
+ return;
+ }
+
+ /* new key, including timeout */
+ pf_syncookie_newkey();
+
+ CURVNET_RESTORE();
+ printf("KP: %s() return\n", __func__);
+}
+
+void
+pf_syncookie_newkey(void)
+{
+ PF_RULES_WASSERT();
+
+ V_pf_syncookie_status.oddeven = (V_pf_syncookie_status.oddeven + 1) & 0x1;
+ arc4random_buf(V_pf_syncookie_status.key[V_pf_syncookie_status.oddeven],
+ PF_SYNCOOKIE_SECRET_SIZE);
+ callout_reset(&V_pf_syncookie_status.keytimeout,
+ PF_SYNCOOKIE_SECRET_LIFETIME, pf_syncookie_rotate, curvnet);
+}
+
+/*
+ * Distribution and probability of certain MSS values. Those in between are
+ * rounded down to the next lower one.
+ * [An Analysis of TCP Maximum Segment Sizes, S. Alcock and R. Nelson, 2011]
+ * .2% .3% 5% 7% 7% 20% 15% 45%
+ */
+static int pf_syncookie_msstab[] =
+ { 216, 536, 1200, 1360, 1400, 1440, 1452, 1460 };
+
+/*
+ * Distribution and probability of certain WSCALE values.
+ * The absence of the WSCALE option is encoded with index zero.
+ * [WSCALE values histograms, Allman, 2012]
+ * X 10 10 35 5 6 14 10% by host
+ * X 11 4 5 5 18 49 3% by connections
+ */
+static int pf_syncookie_wstab[] = { 0, 0, 1, 2, 4, 6, 7, 8 };
+
+uint32_t
+pf_syncookie_mac(struct pf_pdesc *pd, union pf_syncookie cookie, uint32_t seq)
+{
+ SIPHASH_CTX ctx;
+ uint32_t siphash[2];
+
+ PF_RULES_RASSERT();
+ MPASS(pd->proto == IPPROTO_TCP);
+
+ SipHash24_Init(&ctx);
+ SipHash_SetKey(&ctx, V_pf_syncookie_status.key[cookie.flags.oddeven]);
+
+ switch (pd->af) {
+ case AF_INET:
+ SipHash_Update(&ctx, pd->src, sizeof(pd->src->v4));
+ SipHash_Update(&ctx, pd->dst, sizeof(pd->dst->v4));
+ break;
+ case AF_INET6:
+ SipHash_Update(&ctx, pd->src, sizeof(pd->src->v6));
+ SipHash_Update(&ctx, pd->dst, sizeof(pd->dst->v6));
+ break;
+ default:
+ panic("unknown address family");
+ }
+
+ SipHash_Update(&ctx, pd->sport, sizeof(*pd->sport));
+ SipHash_Update(&ctx, pd->dport, sizeof(*pd->dport));
+ SipHash_Update(&ctx, &seq, sizeof(seq));
+ SipHash_Update(&ctx, &cookie, sizeof(cookie));
+ SipHash_Final((uint8_t *)&siphash, &ctx);
+
+ return (siphash[0] ^ siphash[1]);
+}
+
+uint32_t
+pf_syncookie_generate(struct mbuf *m, int off, struct pf_pdesc *pd,
+ uint16_t mss)
+{
+ uint8_t i, wscale;
+ uint32_t iss, hash;
+ union pf_syncookie cookie;
+
+ PF_RULES_RASSERT();
+
+ cookie.cookie = 0;
+
+ /* map MSS */
+ for (i = nitems(pf_syncookie_msstab) - 1;
+ pf_syncookie_msstab[i] > mss && i > 0; i--)
+ /* nada */;
+ cookie.flags.mss_idx = i;
+
+ /* map WSCALE */
+ wscale = pf_get_wscale(m, off, pd->hdr.tcp.th_off, pd->af);
+ for (i = nitems(pf_syncookie_wstab) - 1;
+ pf_syncookie_wstab[i] > wscale && i > 0; i--)
+ /* nada */;
+ cookie.flags.wscale_idx = i;
+ cookie.flags.sack_ok = 0; /* XXX */
+
+ cookie.flags.oddeven = V_pf_syncookie_status.oddeven;
+ hash = pf_syncookie_mac(pd, cookie, ntohl(pd->hdr.tcp.th_seq));
+
+ /*
+ * Put the flags into the hash and XOR them to get better ISS number
+ * variance. This doesn't enhance the cryptographic strength and is
+ * done to prevent the 8 cookie bits from showing up directly on the
+ * wire.
+ */
+ iss = hash & ~0xff;
+ iss |= cookie.cookie ^ (hash >> 24);
+
+ return (iss);
+}
+
+struct mbuf *
+pf_syncookie_recreate_syn(uint8_t ttl, int off, struct pf_pdesc *pd)
+{
+ uint8_t wscale;
+ uint16_t mss;
+ uint32_t ack, seq;
+ union pf_syncookie cookie;
+
+ seq = ntohl(pd->hdr.tcp.th_seq) - 1;
+ ack = ntohl(pd->hdr.tcp.th_ack) - 1;
+ cookie.cookie = (ack & 0xff) ^ (ack >> 24);
+
+ if (cookie.flags.mss_idx >= nitems(pf_syncookie_msstab) ||
+ cookie.flags.wscale_idx >= nitems(pf_syncookie_wstab))
+ return (NULL);
+
+ mss = pf_syncookie_msstab[cookie.flags.mss_idx];
+ wscale = pf_syncookie_wstab[cookie.flags.wscale_idx];
+
+ return (pf_build_tcp(NULL, pd->af, pd->src, pd->dst, *pd->sport,
+ *pd->dport, seq, 0, TH_SYN, wscale, mss, ttl, 0,
+ PF_TAG_SYNCOOKIE_RECREATED));
+}