aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/netinet/tcp_stacks/bbr.c38
-rw-r--r--sys/netinet/tcp_stacks/rack.c64
-rw-r--r--sys/netinet/tcp_stacks/rack_bbr_common.c5
-rw-r--r--sys/netinet/tcp_stacks/rack_bbr_common.h34
-rw-r--r--sys/netinet/tcp_stacks/sack_filter.c5
-rw-r--r--sys/netinet/tcp_stacks/tcp_bbr.h13
-rw-r--r--sys/netinet/tcp_stacks/tcp_rack.h2
-rw-r--r--sys/netinet/tcp_subr.c80
-rw-r--r--sys/netinet/tcp_var.h21
9 files changed, 187 insertions, 75 deletions
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 41f423c94b17..c28b0aee05e9 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2016-2019
+ * Copyright (c) 2016-9
* Netflix Inc.
* All rights reserved.
*
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
+#include <sys/arb.h>
#include <sys/module.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
@@ -57,9 +58,9 @@ __FBSDID("$FreeBSD$");
#endif
#include <sys/sysctl.h>
#include <sys/systm.h>
+#ifdef STATS
#include <sys/qmath.h>
#include <sys/tree.h>
-#ifdef NETFLIX_STATS
#include <sys/stats.h> /* Must come after qmath.h and tree.h */
#endif
#include <sys/refcount.h>
@@ -161,8 +162,7 @@ static int32_t bbr_num_pktepo_for_del_limit = BBR_NUM_RTTS_FOR_DEL_LIMIT;
static int32_t bbr_hardware_pacing_limit = 8000;
static int32_t bbr_quanta = 3; /* How much extra quanta do we get? */
static int32_t bbr_no_retran = 0;
-static int32_t bbr_tcp_map_entries_limit = 1500;
-static int32_t bbr_tcp_map_split_limit = 256;
+
static int32_t bbr_error_base_paceout = 10000; /* usec to pace */
static int32_t bbr_max_net_error_cnt = 10;
@@ -3381,8 +3381,8 @@ bbr_alloc(struct tcp_bbr *bbr)
static struct bbr_sendmap *
bbr_alloc_full_limit(struct tcp_bbr *bbr)
{
- if ((bbr_tcp_map_entries_limit > 0) &&
- (bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
+ if ((V_tcp_map_entries_limit > 0) &&
+ (bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
BBR_STAT_INC(bbr_alloc_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@@ -3402,8 +3402,8 @@ bbr_alloc_limit(struct tcp_bbr *bbr, uint8_t limit_type)
if (limit_type) {
/* currently there is only one limit type */
- if (bbr_tcp_map_split_limit > 0 &&
- bbr->r_ctl.rc_num_split_allocs >= bbr_tcp_map_split_limit) {
+ if (V_tcp_map_split_limit > 0 &&
+ bbr->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
BBR_STAT_INC(bbr_split_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@@ -3685,7 +3685,7 @@ bbr_ack_received(struct tcpcb *tp, struct tcp_bbr *bbr, struct tcphdr *th, uint3
uint32_t cwnd, target_cwnd, saved_bytes, maxseg;
int32_t meth;
-#ifdef NETFLIX_STATS
+#ifdef STATS
if ((tp->t_flags & TF_GPUTINPROG) &&
SEQ_GEQ(th->th_ack, tp->gput_ack)) {
/*
@@ -6510,7 +6510,7 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts)
}
TCPSTAT_INC(tcps_rttupdated);
tp->t_rttupdated++;
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt_ticks));
#endif
/*
@@ -8490,6 +8490,7 @@ dodata: /* XXX */
return (0);
}
}
+
#endif
if (DELAY_ACK(tp, bbr, nsegs) || tfo_syn) {
bbr->bbr_segs_rcvd += max(1, nsegs);
@@ -8698,6 +8699,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
* reassembly queue and we have enough buffer space to take it.
*/
nsegs = max(1, m->m_pkthdr.lro_nsegs);
+
#ifdef NETFLIX_SB_LIMITS
if (so->so_rcv.sb_shlim) {
mcnt = m_memcnt(m);
@@ -8746,6 +8748,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
newsize, so, NULL))
so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
m_adj(m, drop_hdrlen); /* delayed header drop */
+
#ifdef NETFLIX_SB_LIMITS
appended =
#endif
@@ -11561,7 +11564,7 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
* the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
#endif
/*
@@ -11960,7 +11963,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
/* Window probe */
TCPSTAT_INC(tcps_sndprobe);
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats,
VOI_TCP_RETXPB, len);
#endif
@@ -11981,7 +11984,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
tp->t_sndrexmitpack++;
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
len);
#endif
@@ -12017,7 +12020,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
/* Place in 17's the total sent */
counter_u64_add(bbr_state_resend[17], len);
counter_u64_add(bbr_state_lost[17], len);
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
len);
#endif
@@ -12517,8 +12520,8 @@ recheck_resend:
* as long as we are not retransmiting.
*/
if ((rsm == NULL) &&
- (bbr_tcp_map_entries_limit > 0) &&
- (bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
+ (V_tcp_map_entries_limit > 0) &&
+ (bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
BBR_STAT_INC(bbr_alloc_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@@ -13256,7 +13259,6 @@ send:
SOCKBUF_UNLOCK(&so->so_snd);
return (EHOSTUNREACH);
}
-
hdrlen += sizeof(struct udphdr);
}
#endif
@@ -14276,7 +14278,7 @@ nomore:
bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0);
return (error);
}
-#ifdef NETFLIX_STATS
+#ifdef STATS
} else if (((tp->t_flags & TF_GPUTINPROG) == 0) &&
len &&
(rsm == NULL) &&
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index f6ad63b001ea..74a2a7e81c2d 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2016-2019 Netflix, Inc.
+ * Copyright (c) 2016-9 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
+#include <sys/arb.h>
#include <sys/module.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
@@ -52,7 +53,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <sys/sysctl.h>
#include <sys/systm.h>
-#ifdef NETFLIX_STATS
+#ifdef STATS
#include <sys/qmath.h>
#include <sys/tree.h>
#include <sys/stats.h> /* Must come after qmath.h and tree.h */
@@ -187,21 +188,6 @@ static int32_t rack_persist_max = 1000; /* 1 Second */
static int32_t rack_sack_not_required = 0; /* set to one to allow non-sack to use rack */
static int32_t rack_hw_tls_max_seg = 0; /* 0 means use hw-tls single segment */
-/* Sack attack detection thresholds and such */
-static int32_t tcp_force_detection = 0;
-
-#ifdef NETFLIX_EXP_DETECTION
-static int32_t tcp_sack_to_ack_thresh = 700; /* 70 % */
-static int32_t tcp_sack_to_move_thresh = 600; /* 60 % */
-static int32_t tcp_restoral_thresh = 650; /* 65 % (sack:2:ack -5%) */
-static int32_t tcp_attack_on_turns_on_logging = 0;
-static int32_t tcp_map_minimum = 500;
-#endif
-static int32_t tcp_sad_decay_val = 800;
-static int32_t tcp_sad_pacing_interval = 2000;
-static int32_t tcp_sad_low_pps = 100;
-
-
/*
* Currently regular tcp has a rto_min of 30ms
* the backoff goes 12 times so that ends up
@@ -226,9 +212,6 @@ static int32_t rack_always_send_oldest = 0;
static int32_t rack_use_sack_filter = 1;
static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
static int32_t rack_per_of_gp = 50;
-static int32_t rack_tcp_map_entries_limit = 1500;
-static int32_t rack_tcp_map_split_limit = 256;
-
/* Rack specific counters */
counter_u64_t rack_badfr;
@@ -1577,9 +1560,9 @@ rack_alloc(struct tcp_rack *rack)
static struct rack_sendmap *
rack_alloc_full_limit(struct tcp_rack *rack)
{
- if ((rack_tcp_map_entries_limit > 0) &&
+ if ((V_tcp_map_entries_limit > 0) &&
(rack->do_detection == 0) &&
- (rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) {
+ (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
counter_u64_add(rack_to_alloc_limited, 1);
if (!rack->alloc_limit_reported) {
rack->alloc_limit_reported = 1;
@@ -1598,9 +1581,9 @@ rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_type)
if (limit_type) {
/* currently there is only one limit type */
- if (rack_tcp_map_split_limit > 0 &&
+ if (V_tcp_map_split_limit > 0 &&
(rack->do_detection == 0) &&
- rack->r_ctl.rc_num_split_allocs >= rack_tcp_map_split_limit) {
+ rack->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
counter_u64_add(rack_split_limited, 1);
if (!rack->alloc_limit_reported) {
rack->alloc_limit_reported = 1;
@@ -1648,7 +1631,7 @@ static void
rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, uint16_t nsegs,
uint16_t type, int32_t recovery)
{
-#ifdef NETFLIX_STATS
+#ifdef STATS
int32_t gput;
#endif
@@ -1671,7 +1654,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, ui
tp->ccv->flags &= ~CCF_CWND_LIMITED;
if (type == CC_ACK) {
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
((int32_t) tp->snd_cwnd) - tp->snd_wnd);
if ((tp->t_flags & TF_GPUTINPROG) &&
@@ -1725,7 +1708,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, ui
tp->ccv->curack = th->th_ack;
CC_ALGO(tp)->ack_received(tp->ccv, type);
}
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
#endif
if (rack->r_ctl.rc_rack_largest_cwnd < tp->snd_cwnd) {
@@ -2436,6 +2419,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
slot = 1;
}
hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack);
+#ifdef NETFLIX_EXP_DETECTION
if (rack->sack_attack_disable &&
(slot < USEC_TO_MSEC(tcp_sad_pacing_interval))) {
/*
@@ -2450,6 +2434,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
*/
slot = USEC_TO_MSEC(tcp_sad_pacing_interval);
}
+#endif
if (tp->t_flags & TF_DELACK) {
delayed_ack = TICKS_2_MSEC(tcp_delacktime);
rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK;
@@ -3776,7 +3761,8 @@ refind:
seq_out = rack_update_entry(tp, rack, nrsm, ts, &len);
if (len == 0) {
return;
- }
+ } else if (len > 0)
+ goto refind;
}
}
/*
@@ -3912,7 +3898,7 @@ tcp_rack_xmit_timer_commit(struct tcp_rack *rack, struct tcpcb *tp)
TCPSTAT_INC(tcps_rttupdated);
rack_log_rtt_upd(tp, rack, rtt, o_srtt, o_var);
tp->t_rttupdated++;
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt));
#endif
tp->t_rxtshift = 0;
@@ -4620,6 +4606,7 @@ rack_peer_reneges(struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ac
static void
rack_do_decay(struct tcp_rack *rack)
{
+#ifdef NETFLIX_EXP_DETECTION
struct timeval res;
#define timersub(tvp, uvp, vvp) \
@@ -4670,6 +4657,7 @@ rack_do_decay(struct tcp_rack *rack)
rack->r_ctl.sack_noextra_move = ctf_decay_count(rack->r_ctl.sack_noextra_move,
tcp_sad_decay_val);
}
+#endif
}
static void
@@ -7406,9 +7394,11 @@ rack_init(struct tcpcb *tp)
rack->r_ctl.rc_last_time_decay = rack->r_ctl.rc_last_ack;
rack->r_ctl.rc_tlp_rxt_last_time = tcp_ts_getticks();
/* Do we force on detection? */
+#ifdef NETFLIX_EXP_DETECTION
if (tcp_force_detection)
rack->do_detection = 1;
else
+#endif
rack->do_detection = 0;
if (tp->snd_una != tp->snd_max) {
/* Create a send map for the current outstanding data */
@@ -7701,7 +7691,7 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
* the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
#endif
if (tiwin > rack->r_ctl.rc_high_rwnd)
@@ -8390,8 +8380,8 @@ again:
*/
if ((rsm == NULL) &&
(rack->do_detection == 0) &&
- (rack_tcp_map_entries_limit > 0) &&
- (rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) {
+ (V_tcp_map_entries_limit > 0) &&
+ (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
counter_u64_add(rack_to_alloc_limited, 1);
if (!rack->alloc_limit_reported) {
rack->alloc_limit_reported = 1;
@@ -9318,7 +9308,7 @@ send:
}
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
TCPSTAT_INC(tcps_sndprobe);
-#ifdef NETFLIX_STATS
+#ifdef STATS
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
stats_voi_update_abs_u32(tp->t_stats,
VOI_TCP_RETXPB, len);
@@ -9339,14 +9329,14 @@ send:
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
}
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
len);
#endif
} else {
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
len);
#endif
@@ -9929,7 +9919,7 @@ out:
tp->t_rtseq = startseq;
TCPSTAT_INC(tcps_segstimed);
}
-#ifdef NETFLIX_STATS
+#ifdef STATS
if (!(tp->t_flags & TF_GPUTINPROG) && len) {
tp->t_flags |= TF_GPUTINPROG;
tp->gput_seq = startseq;
@@ -10142,7 +10132,7 @@ rack_set_sockopt(struct socket *so, struct sockopt *sopt,
rack = (struct tcp_rack *)tp->t_fb_ptr;
switch (sopt->sopt_name) {
case TCP_RACK_DO_DETECTION:
- RACK_OPTS_INC(tcp_rack_no_sack);
+ RACK_OPTS_INC(tcp_rack_do_detection);
if (optval == 0)
rack->do_detection = 0;
else
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c
index ad9d59a3e1f4..b574fc14bf96 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2016-2018
+ * Copyright (c) 2016-9
* Netflix Inc.
* All rights reserved.
*
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ratelimit.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
+#include <sys/arb.h>
#include <sys/module.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
@@ -133,8 +134,6 @@ __FBSDID("$FreeBSD$");
* Common TCP Functions - These are shared by borth
* rack and BBR.
*/
-
-
#ifdef KERN_TLS
uint32_t
ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd)
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.h b/sys/netinet/tcp_stacks/rack_bbr_common.h
index 6cb2fed7c2fa..5eb304ddf4ad 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.h
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.h
@@ -1,5 +1,5 @@
-#ifndef __pacer_timer_h__
-#define __pacer_timer_h__
+#ifndef __rack_bbr_common_h__
+#define __rack_bbr_common_h__
/*-
* Copyright (c) 2017-9 Netflix, Inc.
*
@@ -26,6 +26,12 @@
*
* __FBSDID("$FreeBSD$");
*/
+
+/* XXXLAS: Couple STATS to NETFLIX_STATS until stats(3) is fully upstreamed. */
+#ifndef NETFLIX_STATS
+#undef STATS
+#endif
+
/* Common defines and such used by both RACK and BBR */
/* Special values for mss accounting array */
#define TCP_MSS_ACCT_JUSTRET 0
@@ -46,6 +52,23 @@
#define PROGRESS_CLEAR 3
#define PROGRESS_START 4
+/* codes for just-return */
+#define CTF_JR_SENT_DATA 0
+#define CTF_JR_CWND_LIMITED 1
+#define CTF_JR_RWND_LIMITED 2
+#define CTF_JR_APP_LIMITED 3
+#define CTF_JR_ASSESSING 4
+#define CTF_JR_PERSISTS 5
+#define CTF_JR_PRR 6
+
+/* Compat. */
+#define BBR_JR_SENT_DATA CTF_JR_SENT_DATA
+#define BBR_JR_CWND_LIMITED CTF_JR_CWND_LIMITED
+#define BBR_JR_RWND_LIMITED CTF_JR_RWND_LIMITED
+#define BBR_JR_APP_LIMITED CTF_JR_APP_LIMITED
+#define BBR_JR_ASSESSING CTF_JR_ASSESSING
+#define BBR_JR_PERSISTS CTF_JR_PERSISTS
+#define BBR_JR_PRR CTF_JR_PRR
/* RTT sample methods */
#define USE_RTT_HIGH 0
@@ -59,6 +82,13 @@
#define USEC_TO_MSEC(x) (x / MS_IN_USEC)
#define TCP_TS_OVERHEAD 12 /* Overhead of having Timestamps on */
+/* Bits per second in bytes per second */
+#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
+#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
+#define TWENTY_THREE_MBPS 2896000
+#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
+#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
+
#ifdef _KERNEL
/* We have only 7 bits in rack so assert its true */
CTASSERT((PACE_TMR_MASK & 0x80) == 0);
diff --git a/sys/netinet/tcp_stacks/sack_filter.c b/sys/netinet/tcp_stacks/sack_filter.c
index c4b35d5b8ca8..978f6670c50a 100644
--- a/sys/netinet/tcp_stacks/sack_filter.c
+++ b/sys/netinet/tcp_stacks/sack_filter.c
@@ -25,11 +25,16 @@
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifndef _KERNEL
+#define _WANT_TCPCB 1
+#endif
#include <sys/types.h>
#include <sys/queue.h>
#include <sys/socket.h>
+#ifdef _KERNEL
#include <sys/mbuf.h>
#include <sys/sockopt.h>
+#endif
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_seq.h>
diff --git a/sys/netinet/tcp_stacks/tcp_bbr.h b/sys/netinet/tcp_stacks/tcp_bbr.h
index f09e25a18390..98fcb69f9684 100644
--- a/sys/netinet/tcp_stacks/tcp_bbr.h
+++ b/sys/netinet/tcp_stacks/tcp_bbr.h
@@ -128,12 +128,6 @@ TAILQ_HEAD(bbr_head, bbr_sendmap);
* an clear to start measuring */
#define BBR_RED_BW_PE_NOEARLY_OUT 7 /* Set pkt epoch judged that we do not
* get out of jail early */
-/* codes for just-return */
-#define BBR_JR_SENT_DATA 0
-#define BBR_JR_CWND_LIMITED 1
-#define BBR_JR_RWND_LIMITED 2
-#define BBR_JR_APP_LIMITED 3
-#define BBR_JR_ASSESSING 4
/* For calculating a rate */
#define BBR_CALC_BW 1
#define BBR_CALC_LOSS 2
@@ -385,13 +379,6 @@ struct bbr_log_sysctl_out {
#define BBR_BIG_LOG_SIZE 300000
-/* Bits per second in bytes per second */
-#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
-#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
-#define TWENTY_THREE_MBPS 2896000
-#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
-#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
-
struct bbr_stats {
uint64_t bbr_badfr; /* 0 */
uint64_t bbr_badfr_bytes; /* 1 */
diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
index 41eca27666fd..9020f362ec09 100644
--- a/sys/netinet/tcp_stacks/tcp_rack.h
+++ b/sys/netinet/tcp_stacks/tcp_rack.h
@@ -137,7 +137,7 @@ struct rack_opts_stats {
uint64_t tcp_rack_min_pace_seg;
uint64_t tcp_rack_min_pace;
uint64_t tcp_rack_cheat;
- uint64_t tcp_rack_no_sack;
+ uint64_t tcp_rack_do_detection;
};
#define TLP_USE_ID 1 /* Internet draft behavior */
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 56b470c0aa07..8534f74d9059 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -138,6 +138,58 @@ VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
#endif
+#ifdef NETFLIX_EXP_DETECTION
+/* Sack attack detection thresholds and such */
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack_attack, CTLFLAG_RW, 0,
+ "Sack Attack detection thresholds");
+int32_t tcp_force_detection = 0;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, force_detection,
+ CTLFLAG_RW,
+ &tcp_force_detection, 0,
+ "Do we force detection even if the INP has it off?");
+int32_t tcp_sack_to_ack_thresh = 700; /* 70 % */
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sack_to_ack_thresh,
+ CTLFLAG_RW,
+ &tcp_sack_to_ack_thresh, 700,
+ "Percentage of sacks to acks we must see above (10.1 percent is 101)?");
+int32_t tcp_sack_to_move_thresh = 600; /* 60 % */
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, move_thresh,
+ CTLFLAG_RW,
+ &tcp_sack_to_move_thresh, 600,
+ "Percentage of sack moves we must see above (10.1 percent is 101)");
+int32_t tcp_restoral_thresh = 650; /* 65 % (sack:2:ack -5%) */
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, restore_thresh,
+ CTLFLAG_RW,
+ &tcp_restoral_thresh, 550,
+ "Percentage of sack to ack percentage we must see below to restore(10.1 percent is 101)");
+int32_t tcp_sad_decay_val = 800;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, decay_per,
+ CTLFLAG_RW,
+ &tcp_sad_decay_val, 800,
+ "The decay percentage (10.1 percent equals 101 )");
+int32_t tcp_map_minimum = 500;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, nummaps,
+ CTLFLAG_RW,
+ &tcp_map_minimum, 500,
+ "Number of Map enteries before we start detection");
+int32_t tcp_attack_on_turns_on_logging = 0;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, attacks_logged,
+ CTLFLAG_RW,
+ &tcp_attack_on_turns_on_logging, 0,
+ "When we have a positive hit on attack, do we turn on logging?");
+int32_t tcp_sad_pacing_interval = 2000;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_pacing_int,
+ CTLFLAG_RW,
+ &tcp_sad_pacing_interval, 2000,
+ "What is the minimum pacing interval for a classified attacker?");
+
+int32_t tcp_sad_low_pps = 100;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_low_pps,
+ CTLFLAG_RW,
+ &tcp_sad_low_pps, 100,
+ "What is the input pps that below which we do not decay?");
+#endif
+
struct rwlock tcp_function_lock;
static int
@@ -240,6 +292,34 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
VNET_DEFINE(uma_zone_t, sack_hole_zone);
#define V_sack_hole_zone VNET(sack_hole_zone)
+VNET_DEFINE(uint32_t, tcp_map_entries_limit) = 0; /* unlimited */
+static int
+sysctl_net_inet_tcp_map_limit_check(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ uint32_t new;
+
+ new = V_tcp_map_entries_limit;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ /* only allow "0" and value > minimum */
+ if (new > 0 && new < TCP_MIN_MAP_ENTRIES_LIMIT)
+ error = EINVAL;
+ else
+ V_tcp_map_entries_limit = new;
+ }
+ return (error);
+}
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, map_limit,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ &VNET_NAME(tcp_map_entries_limit), 0,
+ &sysctl_net_inet_tcp_map_limit_check, "IU",
+ "Total sendmap entries limit");
+
+VNET_DEFINE(uint32_t, tcp_map_split_limit) = 0; /* unlimited */
+SYSCTL_UINT(_net_inet_tcp, OID_AUTO, split_limit, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(tcp_map_split_limit), 0,
+ "Total sendmap split entries limit");
#ifdef TCP_HHOOK
VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 8ad3b2f9a483..d070adfc6f70 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -235,6 +235,9 @@ struct tcptemp {
struct tcphdr tt_t;
};
+/* Minimum map entries limit value, if set */
+#define TCP_MIN_MAP_ENTRIES_LIMIT 128
+
/*
* TODO: We yet need to brave plowing in
* to tcp_input() and the pru_usrreq() block.
@@ -790,6 +793,8 @@ VNET_DECLARE(int, tcp_ecn_maxretries);
VNET_DECLARE(int, tcp_initcwnd_segments);
VNET_DECLARE(int, tcp_insecure_rst);
VNET_DECLARE(int, tcp_insecure_syn);
+VNET_DECLARE(uint32_t, tcp_map_entries_limit);
+VNET_DECLARE(uint32_t, tcp_map_split_limit);
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_mssdflt);
#ifdef STATS
@@ -830,6 +835,8 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
+#define V_tcp_map_entries_limit VNET(tcp_map_entries_limit)
+#define V_tcp_map_split_limit VNET(tcp_map_split_limit)
#define V_tcp_minmss VNET(tcp_minmss)
#define V_tcp_mssdflt VNET(tcp_mssdflt)
#ifdef STATS
@@ -845,7 +852,6 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead)
#define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port)
-
#ifdef TCP_HHOOK
VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
#define V_tcp_hhh VNET(tcp_hhh)
@@ -915,6 +921,19 @@ extern counter_u64_t tcp_inp_lro_single_push;
extern counter_u64_t tcp_inp_lro_locks_taken;
extern counter_u64_t tcp_inp_lro_sack_wake;
+#ifdef NETFLIX_EXP_DETECTION
+/* Various SACK attack thresholds */
+extern int32_t tcp_force_detection;
+extern int32_t tcp_sack_to_ack_thresh;
+extern int32_t tcp_sack_to_move_thresh;
+extern int32_t tcp_restoral_thresh;
+extern int32_t tcp_sad_decay_val;
+extern int32_t tcp_sad_pacing_interval;
+extern int32_t tcp_sad_low_pps;
+extern int32_t tcp_map_minimum;
+extern int32_t tcp_attack_on_turns_on_logging;
+#endif
+
uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
u_int tcp_maxseg(const struct tcpcb *);