aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet/tcp_stacks/bbr.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/netinet/tcp_stacks/bbr.c')
-rw-r--r--sys/netinet/tcp_stacks/bbr.c110
1 files changed, 84 insertions, 26 deletions
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 66f19ccd6c2b..621357494a02 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -500,7 +500,7 @@ static void
bbr_enter_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts,
int32_t line);
static void
-bbr_stop_all_timers(struct tcpcb *tp);
+bbr_stop_all_timers(struct tcpcb *tp, struct tcp_bbr *bbr);
static void
bbr_exit_probe_rtt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts);
static void
@@ -1970,7 +1970,7 @@ bbr_log_type_enter_rec(struct tcp_bbr *bbr, uint32_t seq)
static void
bbr_log_msgsize_fail(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t len, uint32_t maxseg, uint32_t mtu, int32_t csum_flags, int32_t tso, uint32_t cts)
{
- if (tcp_bblogging_on(bbr->rc_tp)) {
+ if (tcp_bblogging_on(tp)) {
union tcp_log_stackspecific log;
bbr_fill_in_logging_data(bbr, &log.u_bbr, cts);
@@ -2669,7 +2669,7 @@ bbr_log_type_ltbw(struct tcp_bbr *bbr, uint32_t cts, int32_t reason,
uint32_t newbw, uint32_t obw, uint32_t diff,
uint32_t tim)
{
- if (tcp_bblogging_on(bbr->rc_tp)) {
+ if (/*bbr_verbose_logging && */tcp_bblogging_on(bbr->rc_tp)) {
union tcp_log_stackspecific log;
bbr_fill_in_logging_data(bbr, &log.u_bbr, cts);
@@ -2697,7 +2697,7 @@ bbr_log_type_ltbw(struct tcp_bbr *bbr, uint32_t cts, int32_t reason,
static inline void
bbr_log_progress_event(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t tick, int event, int line)
{
- if (tcp_bblogging_on(bbr->rc_tp)) {
+ if (bbr_verbose_logging && tcp_bblogging_on(bbr->rc_tp)) {
union tcp_log_stackspecific log;
bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime);
@@ -6281,6 +6281,9 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts)
else
apply_filter_min_small(&bbr->r_ctl.rc_rttprop, rtt, cts);
}
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_PATHRTT, imax(0, rtt));
+#endif
if (bbr->rc_ack_was_delayed)
rtt += bbr->r_ctl.rc_ack_hdwr_delay;
@@ -9850,16 +9853,13 @@ bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
}
static void
-bbr_stop_all_timers(struct tcpcb *tp)
+bbr_stop_all_timers(struct tcpcb *tp, struct tcp_bbr *bbr)
{
- struct tcp_bbr *bbr;
-
/*
* Assure no timers are running.
*/
if (tcp_timer_active(tp, TT_PERSIST)) {
/* We enter in persists, set the flag appropriately */
- bbr = (struct tcp_bbr *)tp->t_fb_ptr;
bbr->rc_in_persist = 1;
}
}
@@ -9927,14 +9927,14 @@ bbr_google_mode_off(struct tcp_bbr *bbr)
* which indicates the error (usually no memory).
*/
static int
-bbr_init(struct tcpcb *tp)
+bbr_init(struct tcpcb *tp, void **ptr)
{
struct inpcb *inp = tptoinpcb(tp);
struct tcp_bbr *bbr = NULL;
uint32_t cts;
- tp->t_fb_ptr = uma_zalloc(bbr_pcb_zone, (M_NOWAIT | M_ZERO));
- if (tp->t_fb_ptr == NULL) {
+ *ptr = uma_zalloc(bbr_pcb_zone, (M_NOWAIT | M_ZERO));
+ if (*ptr == NULL) {
/*
* We need to allocate memory but cant. The INP and INP_INFO
* locks and they are recursive (happens during setup. So a
@@ -9943,10 +9943,16 @@ bbr_init(struct tcpcb *tp)
*/
return (ENOMEM);
}
- bbr = (struct tcp_bbr *)tp->t_fb_ptr;
+ bbr = (struct tcp_bbr *)*ptr;
bbr->rtt_valid = 0;
inp->inp_flags2 |= INP_CANNOT_DO_ECN;
inp->inp_flags2 |= INP_SUPPORTS_MBUFQ;
+ /* Take off any undesired flags */
+ inp->inp_flags2 &= ~INP_MBUF_QUEUE_READY;
+ inp->inp_flags2 &= ~INP_DONT_SACK_QUEUE;
+ inp->inp_flags2 &= ~INP_MBUF_ACKCMP;
+ inp->inp_flags2 &= ~INP_MBUF_L_ACKS;
+
TAILQ_INIT(&bbr->r_ctl.rc_map);
TAILQ_INIT(&bbr->r_ctl.rc_free);
TAILQ_INIT(&bbr->r_ctl.rc_tmap);
@@ -10074,8 +10080,8 @@ bbr_init(struct tcpcb *tp)
rsm = bbr_alloc(bbr);
if (rsm == NULL) {
- uma_zfree(bbr_pcb_zone, tp->t_fb_ptr);
- tp->t_fb_ptr = NULL;
+ uma_zfree(bbr_pcb_zone, *ptr);
+ *ptr = NULL;
return (ENOMEM);
}
rsm->r_rtt_not_allowed = 1;
@@ -10128,7 +10134,17 @@ bbr_init(struct tcpcb *tp)
* the TCB on the hptsi wheel if a timer is needed with appropriate
* flags.
*/
- bbr_stop_all_timers(tp);
+ bbr_stop_all_timers(tp, bbr);
+ /*
+ * Validate the timers are not in usec, if they are convert.
+ * BBR should in theory move to USEC and get rid of a
+ * lot of the TICKS_2 calls.. but for now we stay
+ * with tick timers.
+ */
+ tcp_change_time_units(tp, TCP_TMR_GRANULARITY_TICKS);
+ TCPT_RANGESET(tp->t_rxtcur,
+ ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+ tp->t_rttmin, TCPTV_REXMTMAX);
bbr_start_hpts_timer(bbr, tp, cts, 5, 0, 0);
return (0);
}
@@ -10172,7 +10188,6 @@ static void
bbr_fini(struct tcpcb *tp, int32_t tcb_is_purged)
{
if (tp->t_fb_ptr) {
- struct inpcb *inp = tptoinpcb(tp);
uint32_t calc;
struct tcp_bbr *bbr;
struct bbr_sendmap *rsm;
@@ -10182,10 +10197,6 @@ bbr_fini(struct tcpcb *tp, int32_t tcb_is_purged)
tcp_rel_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp);
bbr_log_flowend(bbr);
bbr->rc_tp = NULL;
- /* Backout any flags2 we applied */
- inp->inp_flags2 &= ~INP_CANNOT_DO_ECN;
- inp->inp_flags2 &= ~INP_SUPPORTS_MBUFQ;
- inp->inp_flags2 &= ~INP_MBUF_QUEUE_READY;
if (bbr->bbr_hdrw_pacing)
counter_u64_add(bbr_flows_whdwr_pacing, -1);
else
@@ -11853,7 +11864,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
int32_t isipv6;
#endif
uint8_t app_limited = BBR_JR_SENT_DATA;
- uint8_t filled_all = 0;
bbr = (struct tcp_bbr *)tp->t_fb_ptr;
/* We take a cache hit here */
memcpy(&bbr->rc_tv, tv, sizeof(struct timeval));
@@ -13162,7 +13172,7 @@ send:
if_hw_tsomaxsegsize, msb,
((rsm == NULL) ? hw_tls : 0)
#ifdef NETFLIX_COPY_ARGS
- , &filled_all
+ , NULL, NULL
#endif
);
if (len <= maxseg) {
@@ -13474,7 +13484,7 @@ send:
#endif
/* Log to the black box */
- if (tcp_bblogging_on(bbr->rc_tp)) {
+ if (tcp_bblogging_on(tp)) {
union tcp_log_stackspecific log;
bbr_fill_in_logging_data(bbr, &log.u_bbr, cts);
@@ -13483,13 +13493,10 @@ send:
log.u_bbr.flex2 = (bbr->r_recovery_bw << 3);
log.u_bbr.flex3 = maxseg;
log.u_bbr.flex4 = delay_calc;
- /* Encode filled_all into the upper flex5 bit */
log.u_bbr.flex5 = bbr->rc_past_init_win;
log.u_bbr.flex5 <<= 1;
log.u_bbr.flex5 |= bbr->rc_no_pacing;
log.u_bbr.flex5 <<= 29;
- if (filled_all)
- log.u_bbr.flex5 |= 0x80000000;
log.u_bbr.flex5 |= tp->t_maxseg;
log.u_bbr.flex6 = bbr->r_ctl.rc_pace_max_segs;
log.u_bbr.flex7 = (bbr->rc_bbr_state << 8) | bbr_state_val(bbr);
@@ -14073,6 +14080,56 @@ bbr_pru_options(struct tcpcb *tp, int flags)
return (0);
}
+static void
+bbr_switch_failed(struct tcpcb *tp)
+{
+ /*
+ * If a switch fails we only need to
+ * make sure mbuf_queuing is still in place.
+ * We also need to make sure we are still in
+ * ticks granularity (though we should probably
+ * change bbr to go to USECs).
+ *
+ * For timers we need to see if we are still in the
+ * pacer (if our flags are up) if so we are good, if
+ * not we need to get back into the pacer.
+ */
+ struct inpcb *inp = tptoinpcb(tp);
+ struct timeval tv;
+ uint32_t cts;
+ uint32_t toval;
+ struct tcp_bbr *bbr;
+ struct hpts_diag diag;
+
+ inp->inp_flags2 |= INP_CANNOT_DO_ECN;
+ inp->inp_flags2 |= INP_SUPPORTS_MBUFQ;
+ tcp_change_time_units(tp, TCP_TMR_GRANULARITY_TICKS);
+ if (inp->inp_in_hpts) {
+ return;
+ }
+ bbr = (struct tcp_bbr *)tp->t_fb_ptr;
+ cts = tcp_get_usecs(&tv);
+ if (bbr->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
+ if (TSTMP_GT(bbr->rc_pacer_started, cts)) {
+ toval = bbr->rc_pacer_started - cts;
+ } else {
+ /* one slot please */
+ toval = HPTS_TICKS_PER_SLOT;
+ }
+ } else if (bbr->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
+ if (TSTMP_GT(bbr->r_ctl.rc_timer_exp, cts)) {
+ toval = bbr->r_ctl.rc_timer_exp - cts;
+ } else {
+ /* one slot please */
+ toval = HPTS_TICKS_PER_SLOT;
+ }
+ } else
+ toval = HPTS_TICKS_PER_SLOT;
+ (void)tcp_hpts_insert_diag(inp, HPTS_USEC_TO_SLOTS(toval),
+ __LINE__, &diag);
+ bbr_log_hpts_diag(bbr, cts, &diag);
+}
+
struct tcp_function_block __tcp_bbr = {
.tfb_tcp_block_name = __XSTRING(STACKNAME),
.tfb_tcp_output = bbr_output,
@@ -14087,6 +14144,7 @@ struct tcp_function_block __tcp_bbr = {
.tfb_tcp_handoff_ok = bbr_handoff_ok,
.tfb_tcp_mtu_chg = bbr_mtu_chg,
.tfb_pru_options = bbr_pru_options,
+ .tfb_switch_failed = bbr_switch_failed,
.tfb_flags = TCP_FUNC_OUTPUT_CANDROP,
};