aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRandall Stewart <rrs@FreeBSD.org>2019-12-17 16:08:07 +0000
committerRandall Stewart <rrs@FreeBSD.org>2019-12-17 16:08:07 +0000
commit1cf55767b800492f15d2d49884f78f05506049c0 (patch)
tree0d634265921d2276fbd6722431a2e137e2b29851
parentc7b0edf219290a9a77a4c59987b5783061ae19c9 (diff)
downloadsrc-1cf55767b800492f15d2d49884f78f05506049c0.tar.gz
src-1cf55767b800492f15d2d49884f78f05506049c0.zip
This commit is a bit of a re-arrange of deck chairs. It
gets both rack and bbr ready for the completion of the STATs framework in FreeBSD. For now if you don't have both NF_stats and stats on it disables them. As soon as the rest of the stats framework lands we can remove that restriction and then just uses stats when defined. Sponsored by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D22479
Notes
Notes: svn path=/head/; revision=355859
-rw-r--r--sys/netinet/tcp_stacks/bbr.c38
-rw-r--r--sys/netinet/tcp_stacks/rack.c64
-rw-r--r--sys/netinet/tcp_stacks/rack_bbr_common.c5
-rw-r--r--sys/netinet/tcp_stacks/rack_bbr_common.h34
-rw-r--r--sys/netinet/tcp_stacks/sack_filter.c5
-rw-r--r--sys/netinet/tcp_stacks/tcp_bbr.h13
-rw-r--r--sys/netinet/tcp_stacks/tcp_rack.h2
-rw-r--r--sys/netinet/tcp_subr.c80
-rw-r--r--sys/netinet/tcp_var.h21
9 files changed, 187 insertions, 75 deletions
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 41f423c94b17..c28b0aee05e9 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2016-2019
+ * Copyright (c) 2016-9
* Netflix Inc.
* All rights reserved.
*
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
+#include <sys/arb.h>
#include <sys/module.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
@@ -57,9 +58,9 @@ __FBSDID("$FreeBSD$");
#endif
#include <sys/sysctl.h>
#include <sys/systm.h>
+#ifdef STATS
#include <sys/qmath.h>
#include <sys/tree.h>
-#ifdef NETFLIX_STATS
#include <sys/stats.h> /* Must come after qmath.h and tree.h */
#endif
#include <sys/refcount.h>
@@ -161,8 +162,7 @@ static int32_t bbr_num_pktepo_for_del_limit = BBR_NUM_RTTS_FOR_DEL_LIMIT;
static int32_t bbr_hardware_pacing_limit = 8000;
static int32_t bbr_quanta = 3; /* How much extra quanta do we get? */
static int32_t bbr_no_retran = 0;
-static int32_t bbr_tcp_map_entries_limit = 1500;
-static int32_t bbr_tcp_map_split_limit = 256;
+
static int32_t bbr_error_base_paceout = 10000; /* usec to pace */
static int32_t bbr_max_net_error_cnt = 10;
@@ -3381,8 +3381,8 @@ bbr_alloc(struct tcp_bbr *bbr)
static struct bbr_sendmap *
bbr_alloc_full_limit(struct tcp_bbr *bbr)
{
- if ((bbr_tcp_map_entries_limit > 0) &&
- (bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
+ if ((V_tcp_map_entries_limit > 0) &&
+ (bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
BBR_STAT_INC(bbr_alloc_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@@ -3402,8 +3402,8 @@ bbr_alloc_limit(struct tcp_bbr *bbr, uint8_t limit_type)
if (limit_type) {
/* currently there is only one limit type */
- if (bbr_tcp_map_split_limit > 0 &&
- bbr->r_ctl.rc_num_split_allocs >= bbr_tcp_map_split_limit) {
+ if (V_tcp_map_split_limit > 0 &&
+ bbr->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
BBR_STAT_INC(bbr_split_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@@ -3685,7 +3685,7 @@ bbr_ack_received(struct tcpcb *tp, struct tcp_bbr *bbr, struct tcphdr *th, uint3
uint32_t cwnd, target_cwnd, saved_bytes, maxseg;
int32_t meth;
-#ifdef NETFLIX_STATS
+#ifdef STATS
if ((tp->t_flags & TF_GPUTINPROG) &&
SEQ_GEQ(th->th_ack, tp->gput_ack)) {
/*
@@ -6510,7 +6510,7 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts)
}
TCPSTAT_INC(tcps_rttupdated);
tp->t_rttupdated++;
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt_ticks));
#endif
/*
@@ -8490,6 +8490,7 @@ dodata: /* XXX */
return (0);
}
}
+
#endif
if (DELAY_ACK(tp, bbr, nsegs) || tfo_syn) {
bbr->bbr_segs_rcvd += max(1, nsegs);
@@ -8698,6 +8699,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
* reassembly queue and we have enough buffer space to take it.
*/
nsegs = max(1, m->m_pkthdr.lro_nsegs);
+
#ifdef NETFLIX_SB_LIMITS
if (so->so_rcv.sb_shlim) {
mcnt = m_memcnt(m);
@@ -8746,6 +8748,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
newsize, so, NULL))
so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
m_adj(m, drop_hdrlen); /* delayed header drop */
+
#ifdef NETFLIX_SB_LIMITS
appended =
#endif
@@ -11561,7 +11564,7 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
* the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
#endif
/*
@@ -11960,7 +11963,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
/* Window probe */
TCPSTAT_INC(tcps_sndprobe);
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats,
VOI_TCP_RETXPB, len);
#endif
@@ -11981,7 +11984,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
tp->t_sndrexmitpack++;
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
len);
#endif
@@ -12017,7 +12020,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
/* Place in 17's the total sent */
counter_u64_add(bbr_state_resend[17], len);
counter_u64_add(bbr_state_lost[17], len);
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
len);
#endif
@@ -12517,8 +12520,8 @@ recheck_resend:
* as long as we are not retransmiting.
*/
if ((rsm == NULL) &&
- (bbr_tcp_map_entries_limit > 0) &&
- (bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
+ (V_tcp_map_entries_limit > 0) &&
+ (bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
BBR_STAT_INC(bbr_alloc_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@@ -13256,7 +13259,6 @@ send:
SOCKBUF_UNLOCK(&so->so_snd);
return (EHOSTUNREACH);
}
-
hdrlen += sizeof(struct udphdr);
}
#endif
@@ -14276,7 +14278,7 @@ nomore:
bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0);
return (error);
}
-#ifdef NETFLIX_STATS
+#ifdef STATS
} else if (((tp->t_flags & TF_GPUTINPROG) == 0) &&
len &&
(rsm == NULL) &&
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index f6ad63b001ea..74a2a7e81c2d 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2016-2019 Netflix, Inc.
+ * Copyright (c) 2016-9 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
+#include <sys/arb.h>
#include <sys/module.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
@@ -52,7 +53,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <sys/sysctl.h>
#include <sys/systm.h>
-#ifdef NETFLIX_STATS
+#ifdef STATS
#include <sys/qmath.h>
#include <sys/tree.h>
#include <sys/stats.h> /* Must come after qmath.h and tree.h */
@@ -187,21 +188,6 @@ static int32_t rack_persist_max = 1000; /* 1 Second */
static int32_t rack_sack_not_required = 0; /* set to one to allow non-sack to use rack */
static int32_t rack_hw_tls_max_seg = 0; /* 0 means use hw-tls single segment */
-/* Sack attack detection thresholds and such */
-static int32_t tcp_force_detection = 0;
-
-#ifdef NETFLIX_EXP_DETECTION
-static int32_t tcp_sack_to_ack_thresh = 700; /* 70 % */
-static int32_t tcp_sack_to_move_thresh = 600; /* 60 % */
-static int32_t tcp_restoral_thresh = 650; /* 65 % (sack:2:ack -5%) */
-static int32_t tcp_attack_on_turns_on_logging = 0;
-static int32_t tcp_map_minimum = 500;
-#endif
-static int32_t tcp_sad_decay_val = 800;
-static int32_t tcp_sad_pacing_interval = 2000;
-static int32_t tcp_sad_low_pps = 100;
-
-
/*
* Currently regular tcp has a rto_min of 30ms
* the backoff goes 12 times so that ends up
@@ -226,9 +212,6 @@ static int32_t rack_always_send_oldest = 0;
static int32_t rack_use_sack_filter = 1;
static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
static int32_t rack_per_of_gp = 50;
-static int32_t rack_tcp_map_entries_limit = 1500;
-static int32_t rack_tcp_map_split_limit = 256;
-
/* Rack specific counters */
counter_u64_t rack_badfr;
@@ -1577,9 +1560,9 @@ rack_alloc(struct tcp_rack *rack)
static struct rack_sendmap *
rack_alloc_full_limit(struct tcp_rack *rack)
{
- if ((rack_tcp_map_entries_limit > 0) &&
+ if ((V_tcp_map_entries_limit > 0) &&
(rack->do_detection == 0) &&
- (rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) {
+ (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
counter_u64_add(rack_to_alloc_limited, 1);
if (!rack->alloc_limit_reported) {
rack->alloc_limit_reported = 1;
@@ -1598,9 +1581,9 @@ rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_type)
if (limit_type) {
/* currently there is only one limit type */
- if (rack_tcp_map_split_limit > 0 &&
+ if (V_tcp_map_split_limit > 0 &&
(rack->do_detection == 0) &&
- rack->r_ctl.rc_num_split_allocs >= rack_tcp_map_split_limit) {
+ rack->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
counter_u64_add(rack_split_limited, 1);
if (!rack->alloc_limit_reported) {
rack->alloc_limit_reported = 1;
@@ -1648,7 +1631,7 @@ static void
rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, uint16_t nsegs,
uint16_t type, int32_t recovery)
{
-#ifdef NETFLIX_STATS
+#ifdef STATS
int32_t gput;
#endif
@@ -1671,7 +1654,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, ui
tp->ccv->flags &= ~CCF_CWND_LIMITED;
if (type == CC_ACK) {
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
((int32_t) tp->snd_cwnd) - tp->snd_wnd);
if ((tp->t_flags & TF_GPUTINPROG) &&
@@ -1725,7 +1708,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, ui
tp->ccv->curack = th->th_ack;
CC_ALGO(tp)->ack_received(tp->ccv, type);
}
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
#endif
if (rack->r_ctl.rc_rack_largest_cwnd < tp->snd_cwnd) {
@@ -2436,6 +2419,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
slot = 1;
}
hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack);
+#ifdef NETFLIX_EXP_DETECTION
if (rack->sack_attack_disable &&
(slot < USEC_TO_MSEC(tcp_sad_pacing_interval))) {
/*
@@ -2450,6 +2434,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
*/
slot = USEC_TO_MSEC(tcp_sad_pacing_interval);
}
+#endif
if (tp->t_flags & TF_DELACK) {
delayed_ack = TICKS_2_MSEC(tcp_delacktime);
rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK;
@@ -3776,7 +3761,8 @@ refind:
seq_out = rack_update_entry(tp, rack, nrsm, ts, &len);
if (len == 0) {
return;
- }
+ } else if (len > 0)
+ goto refind;
}
}
/*
@@ -3912,7 +3898,7 @@ tcp_rack_xmit_timer_commit(struct tcp_rack *rack, struct tcpcb *tp)
TCPSTAT_INC(tcps_rttupdated);
rack_log_rtt_upd(tp, rack, rtt, o_srtt, o_var);
tp->t_rttupdated++;
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt));
#endif
tp->t_rxtshift = 0;
@@ -4620,6 +4606,7 @@ rack_peer_reneges(struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ac
static void
rack_do_decay(struct tcp_rack *rack)
{
+#ifdef NETFLIX_EXP_DETECTION
struct timeval res;
#define timersub(tvp, uvp, vvp) \
@@ -4670,6 +4657,7 @@ rack_do_decay(struct tcp_rack *rack)
rack->r_ctl.sack_noextra_move = ctf_decay_count(rack->r_ctl.sack_noextra_move,
tcp_sad_decay_val);
}
+#endif
}
static void
@@ -7406,9 +7394,11 @@ rack_init(struct tcpcb *tp)
rack->r_ctl.rc_last_time_decay = rack->r_ctl.rc_last_ack;
rack->r_ctl.rc_tlp_rxt_last_time = tcp_ts_getticks();
/* Do we force on detection? */
+#ifdef NETFLIX_EXP_DETECTION
if (tcp_force_detection)
rack->do_detection = 1;
else
+#endif
rack->do_detection = 0;
if (tp->snd_una != tp->snd_max) {
/* Create a send map for the current outstanding data */
@@ -7701,7 +7691,7 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
* the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
#endif
if (tiwin > rack->r_ctl.rc_high_rwnd)
@@ -8390,8 +8380,8 @@ again:
*/
if ((rsm == NULL) &&
(rack->do_detection == 0) &&
- (rack_tcp_map_entries_limit > 0) &&
- (rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) {
+ (V_tcp_map_entries_limit > 0) &&
+ (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
counter_u64_add(rack_to_alloc_limited, 1);
if (!rack->alloc_limit_reported) {
rack->alloc_limit_reported = 1;
@@ -9318,7 +9308,7 @@ send:
}
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
TCPSTAT_INC(tcps_sndprobe);
-#ifdef NETFLIX_STATS
+#ifdef STATS
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
stats_voi_update_abs_u32(tp->t_stats,
VOI_TCP_RETXPB, len);
@@ -9339,14 +9329,14 @@ send:
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
}
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
len);
#endif
} else {
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
len);
#endif
@@ -9929,7 +9919,7 @@ out:
tp->t_rtseq = startseq;
TCPSTAT_INC(tcps_segstimed);
}
-#ifdef NETFLIX_STATS
+#ifdef STATS
if (!(tp->t_flags & TF_GPUTINPROG) && len) {
tp->t_flags |= TF_GPUTINPROG;
tp->gput_seq = startseq;
@@ -10142,7 +10132,7 @@ rack_set_sockopt(struct socket *so, struct sockopt *sopt,
rack = (struct tcp_rack *)tp->t_fb_ptr;
switch (sopt->sopt_name) {
case TCP_RACK_DO_DETECTION:
- RACK_OPTS_INC(tcp_rack_no_sack);
+ RACK_OPTS_INC(tcp_rack_do_detection);
if (optval == 0)
rack->do_detection = 0;
else
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c
index ad9d59a3e1f4..b574fc14bf96 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2016-2018
+ * Copyright (c) 2016-9
* Netflix Inc.
* All rights reserved.
*
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
+#include <sys/arb.h>
#include <sys/module.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
@@ -133,8 +134,6 @@ __FBSDID("$FreeBSD$");
* Common TCP Functions - These are shared by borth
* rack and BBR.
*/
-
-
#ifdef KERN_TLS
uint32_t
ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd)
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.h b/sys/netinet/tcp_stacks/rack_bbr_common.h
index 6cb2fed7c2fa..5eb304ddf4ad 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.h
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.h
@@ -1,5 +1,5 @@
-#ifndef __pacer_timer_h__
-#define __pacer_timer_h__
+#ifndef __rack_bbr_common_h__
+#define __rack_bbr_common_h__
/*-
* Copyright (c) 2017-9 Netflix, Inc.
*
@@ -26,6 +26,12 @@
*
* __FBSDID("$FreeBSD$");
*/
+
+/* XXXLAS: Couple STATS to NETFLIX_STATS until stats(3) is fully upstreamed. */
+#ifndef NETFLIX_STATS
+#undef STATS
+#endif
+
/* Common defines and such used by both RACK and BBR */
/* Special values for mss accounting array */
#define TCP_MSS_ACCT_JUSTRET 0
@@ -46,6 +52,23 @@
#define PROGRESS_CLEAR 3
#define PROGRESS_START 4
+/* codes for just-return */
+#define CTF_JR_SENT_DATA 0
+#define CTF_JR_CWND_LIMITED 1
+#define CTF_JR_RWND_LIMITED 2
+#define CTF_JR_APP_LIMITED 3
+#define CTF_JR_ASSESSING 4
+#define CTF_JR_PERSISTS 5
+#define CTF_JR_PRR 6
+
+/* Compat. */
+#define BBR_JR_SENT_DATA CTF_JR_SENT_DATA
+#define BBR_JR_CWND_LIMITED CTF_JR_CWND_LIMITED
+#define BBR_JR_RWND_LIMITED CTF_JR_RWND_LIMITED
+#define BBR_JR_APP_LIMITED CTF_JR_APP_LIMITED
+#define BBR_JR_ASSESSING CTF_JR_ASSESSING
+#define BBR_JR_PERSISTS CTF_JR_PERSISTS
+#define BBR_JR_PRR CTF_JR_PRR
/* RTT sample methods */
#define USE_RTT_HIGH 0
@@ -59,6 +82,13 @@
#define USEC_TO_MSEC(x) (x / MS_IN_USEC)
#define TCP_TS_OVERHEAD 12 /* Overhead of having Timestamps on */
+/* Bits per second in bytes per second */
+#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
+#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
+#define TWENTY_THREE_MBPS 2896000
+#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
+#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
+
#ifdef _KERNEL
/* We have only 7 bits in rack so assert its true */
CTASSERT((PACE_TMR_MASK & 0x80) == 0);
diff --git a/sys/netinet/tcp_stacks/sack_filter.c b/sys/netinet/tcp_stacks/sack_filter.c
index c4b35d5b8ca8..978f6670c50a 100644
--- a/sys/netinet/tcp_stacks/sack_filter.c
+++ b/sys/netinet/tcp_stacks/sack_filter.c
@@ -25,11 +25,16 @@
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifndef _KERNEL
+#define _WANT_TCPCB 1
+#endif
#include <sys/types.h>
#include <sys/queue.h>
#include <sys/socket.h>
+#ifdef _KERNEL
#include <sys/mbuf.h>
#include <sys/sockopt.h>
+#endif
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_seq.h>
diff --git a/sys/netinet/tcp_stacks/tcp_bbr.h b/sys/netinet/tcp_stacks/tcp_bbr.h
index f09e25a18390..98fcb69f9684 100644
--- a/sys/netinet/tcp_stacks/tcp_bbr.h
+++ b/sys/netinet/tcp_stacks/tcp_bbr.h
@@ -128,12 +128,6 @@ TAILQ_HEAD(bbr_head, bbr_sendmap);
* an clear to start measuring */
#define BBR_RED_BW_PE_NOEARLY_OUT 7 /* Set pkt epoch judged that we do not
* get out of jail early */
-/* codes for just-return */
-#define BBR_JR_SENT_DATA 0
-#define BBR_JR_CWND_LIMITED 1
-#define BBR_JR_RWND_LIMITED 2
-#define BBR_JR_APP_LIMITED 3
-#define BBR_JR_ASSESSING 4
/* For calculating a rate */
#define BBR_CALC_BW 1
#define BBR_CALC_LOSS 2
@@ -385,13 +379,6 @@ struct bbr_log_sysctl_out {
#define BBR_BIG_LOG_SIZE 300000
-/* Bits per second in bytes per second */
-#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
-#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
-#define TWENTY_THREE_MBPS 2896000
-#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
-#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
-
struct bbr_stats {
uint64_t bbr_badfr; /* 0 */
uint64_t bbr_badfr_bytes; /* 1 */
diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
index 41eca27666fd..9020f362ec09 100644
--- a/sys/netinet/tcp_stacks/tcp_rack.h
+++ b/sys/netinet/tcp_stacks/tcp_rack.h
@@ -137,7 +137,7 @@ struct rack_opts_stats {
uint64_t tcp_rack_min_pace_seg;
uint64_t tcp_rack_min_pace;
uint64_t tcp_rack_cheat;
- uint64_t tcp_rack_no_sack;
+ uint64_t tcp_rack_do_detection;
};
#define TLP_USE_ID 1 /* Internet draft behavior */
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 56b470c0aa07..8534f74d9059 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -138,6 +138,58 @@ VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
#endif
+#ifdef NETFLIX_EXP_DETECTION
+/* Sack attack detection thresholds and such */
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack_attack, CTLFLAG_RW, 0,
+ "Sack Attack detection thresholds");
+int32_t tcp_force_detection = 0;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, force_detection,
+ CTLFLAG_RW,
+ &tcp_force_detection, 0,
+ "Do we force detection even if the INP has it off?");
+int32_t tcp_sack_to_ack_thresh = 700; /* 70 % */
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sack_to_ack_thresh,
+ CTLFLAG_RW,
+ &tcp_sack_to_ack_thresh, 700,
+ "Percentage of sacks to acks we must see above (10.1 percent is 101)?");
+int32_t tcp_sack_to_move_thresh = 600; /* 60 % */
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, move_thresh,
+ CTLFLAG_RW,
+ &tcp_sack_to_move_thresh, 600,
+ "Percentage of sack moves we must see above (10.1 percent is 101)");
+int32_t tcp_restoral_thresh = 650; /* 65 % (sack:2:ack -5%) */
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, restore_thresh,
+ CTLFLAG_RW,
+ &tcp_restoral_thresh, 550,
+ "Percentage of sack to ack percentage we must see below to restore(10.1 percent is 101)");
+int32_t tcp_sad_decay_val = 800;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, decay_per,
+ CTLFLAG_RW,
+ &tcp_sad_decay_val, 800,
+ "The decay percentage (10.1 percent equals 101 )");
+int32_t tcp_map_minimum = 500;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, nummaps,
+ CTLFLAG_RW,
+ &tcp_map_minimum, 500,
+ "Number of Map enteries before we start detection");
+int32_t tcp_attack_on_turns_on_logging = 0;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, attacks_logged,
+ CTLFLAG_RW,
+ &tcp_attack_on_turns_on_logging, 0,
+ "When we have a positive hit on attack, do we turn on logging?");
+int32_t tcp_sad_pacing_interval = 2000;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_pacing_int,
+ CTLFLAG_RW,
+ &tcp_sad_pacing_interval, 2000,
+ "What is the minimum pacing interval for a classified attacker?");
+
+int32_t tcp_sad_low_pps = 100;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_low_pps,
+ CTLFLAG_RW,
+ &tcp_sad_low_pps, 100,
+ "What is the input pps that below which we do not decay?");
+#endif
+
struct rwlock tcp_function_lock;
static int
@@ -240,6 +292,34 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
VNET_DEFINE(uma_zone_t, sack_hole_zone);
#define V_sack_hole_zone VNET(sack_hole_zone)
+VNET_DEFINE(uint32_t, tcp_map_entries_limit) = 0; /* unlimited */
+static int
+sysctl_net_inet_tcp_map_limit_check(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ uint32_t new;
+
+ new = V_tcp_map_entries_limit;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ /* only allow "0" and value > minimum */
+ if (new > 0 && new < TCP_MIN_MAP_ENTRIES_LIMIT)
+ error = EINVAL;
+ else
+ V_tcp_map_entries_limit = new;
+ }
+ return (error);
+}
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, map_limit,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ &VNET_NAME(tcp_map_entries_limit), 0,
+ &sysctl_net_inet_tcp_map_limit_check, "IU",
+ "Total sendmap entries limit");
+
+VNET_DEFINE(uint32_t, tcp_map_split_limit) = 0; /* unlimited */
+SYSCTL_UINT(_net_inet_tcp, OID_AUTO, split_limit, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(tcp_map_split_limit), 0,
+ "Total sendmap split entries limit");
#ifdef TCP_HHOOK
VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 8ad3b2f9a483..d070adfc6f70 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -235,6 +235,9 @@ struct tcptemp {
struct tcphdr tt_t;
};
+/* Minimum map entries limit value, if set */
+#define TCP_MIN_MAP_ENTRIES_LIMIT 128
+
/*
* TODO: We yet need to brave plowing in
* to tcp_input() and the pru_usrreq() block.
@@ -790,6 +793,8 @@ VNET_DECLARE(int, tcp_ecn_maxretries);
VNET_DECLARE(int, tcp_initcwnd_segments);
VNET_DECLARE(int, tcp_insecure_rst);
VNET_DECLARE(int, tcp_insecure_syn);
+VNET_DECLARE(uint32_t, tcp_map_entries_limit);
+VNET_DECLARE(uint32_t, tcp_map_split_limit);
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_mssdflt);
#ifdef STATS
@@ -830,6 +835,8 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
+#define V_tcp_map_entries_limit VNET(tcp_map_entries_limit)
+#define V_tcp_map_split_limit VNET(tcp_map_split_limit)
#define V_tcp_minmss VNET(tcp_minmss)
#define V_tcp_mssdflt VNET(tcp_mssdflt)
#ifdef STATS
@@ -845,7 +852,6 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead)
#define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port)
-
#ifdef TCP_HHOOK
VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
#define V_tcp_hhh VNET(tcp_hhh)
@@ -915,6 +921,19 @@ extern counter_u64_t tcp_inp_lro_single_push;
extern counter_u64_t tcp_inp_lro_locks_taken;
extern counter_u64_t tcp_inp_lro_sack_wake;
+#ifdef NETFLIX_EXP_DETECTION
+/* Various SACK attack thresholds */
+extern int32_t tcp_force_detection;
+extern int32_t tcp_sack_to_ack_thresh;
+extern int32_t tcp_sack_to_move_thresh;
+extern int32_t tcp_restoral_thresh;
+extern int32_t tcp_sad_decay_val;
+extern int32_t tcp_sad_pacing_interval;
+extern int32_t tcp_sad_low_pps;
+extern int32_t tcp_map_minimum;
+extern int32_t tcp_attack_on_turns_on_logging;
+#endif
+
uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
u_int tcp_maxseg(const struct tcpcb *);