aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/netinet/tcp_output.c2
-rw-r--r--sys/netinet/tcp_stacks/bbr.c47
-rw-r--r--sys/netinet/tcp_stacks/rack.c69
-rw-r--r--sys/netinet/tcp_var.h18
4 files changed, 84 insertions, 52 deletions
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 2a91570acdad..44333d772482 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1551,6 +1551,8 @@ send:
#endif /* INET */
out:
+ if (error == 0)
+ tcp_account_for_send(tp, len, (tp->snd_nxt != tp->snd_max), 0);
/*
* In transmit state, time the transmission and arrange for
* the retransmit. In persist state, just set snd_max.
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 7d709e33f0d7..05db7180e7b2 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -486,7 +486,7 @@ static void
uint32_t line, uint8_t is_start, uint16_t set);
static struct bbr_sendmap *
- bbr_find_lowest_rsm(struct tcp_bbr *bbr);
+ bbr_find_lowest_rsm(struct tcp_bbr *bbr);
static __inline uint32_t
bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type);
static void
@@ -1620,7 +1620,7 @@ bbr_init_sysctls(void)
&bbr_drop_limit, 0,
"Number of segments limit for drop (0=use min_cwnd w/flight)?");
- /* Timeout controls */
+ /* Timeout controls */
bbr_timeout = SYSCTL_ADD_NODE(&bbr_sysctl_ctx,
SYSCTL_CHILDREN(bbr_sysctl_root),
OID_AUTO,
@@ -5750,7 +5750,7 @@ tcp_bbr_tso_size_check(struct tcp_bbr *bbr, uint32_t cts)
* seg = goal_tso / mss
* tso = seg * mss
* else
- * tso = mss
+ * tso = mss
* if (tso > per-tcb-max)
* tso = per-tcb-max
* else if ( bw > 512Mbps)
@@ -6736,7 +6736,7 @@ bbr_update_bbr_info(struct tcp_bbr *bbr, struct bbr_sendmap *rsm, uint32_t rtt,
else
bbr->rc_ack_is_cumack = 0;
old_rttprop = bbr_get_rtt(bbr, BBR_RTT_PROP);
- /*
+ /*
* Note the following code differs to the original
* BBR spec. It calls for <= not <. However after a
* long discussion in email with Neal, he acknowledged
@@ -8306,12 +8306,14 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
appended =
#endif
sbappendstream_locked(&so->so_rcv, m, 0);
- tp->t_flags |= TF_WAKESOR;
+ /* NB: sorwakeup_locked() does an implicit unlock. */
+ sorwakeup_locked(so);
#ifdef NETFLIX_SB_LIMITS
if (so->so_rcv.sb_shlim && appended != mcnt)
counter_fo_release(so->so_rcv.sb_shlim,
mcnt - appended);
#endif
+
} else {
/*
* XXX: Due to the header drop above "th" is
@@ -8323,6 +8325,11 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
thflags = tcp_reass(tp, th, &temp, &tlen, m);
tp->t_flags |= TF_ACKNOW;
+ if (tp->t_flags & TF_WAKESOR) {
+ tp->t_flags &= ~TF_WAKESOR;
+ /* NB: sorwakeup_locked() does an implicit unlock. */
+ sorwakeup_locked(so);
+ }
}
if ((tp->t_flags & TF_SACK_PERMIT) &&
(save_tlen > 0) &&
@@ -8357,7 +8364,6 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
save_start + tlen);
}
}
- tcp_handle_wakeup(tp, so);
} else {
m_freem(m);
thflags &= ~TH_FIN;
@@ -9164,7 +9170,11 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (tlen == 0 && (thflags & TH_FIN) == 0) {
(void)tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
(struct mbuf *)0);
- tcp_handle_wakeup(tp, so);
+ if (tp->t_flags & TF_WAKESOR) {
+ tp->t_flags &= ~TF_WAKESOR;
+ /* NB: sorwakeup_locked() does an implicit unlock. */
+ sorwakeup_locked(so);
+ }
}
tp->snd_wl1 = th->th_seq - 1;
if (bbr_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
@@ -11565,18 +11575,18 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) {
retval = 0;
m_freem(m);
- goto done_with_input;
- }
- /*
- * If a segment with the ACK-bit set arrives in the SYN-SENT state
- * check SEQ.ACK first as described on page 66 of RFC 793, section 3.9.
- */
- if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
- (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
+ goto done_with_input;
+ }
+ /*
+ * If a segment with the ACK-bit set arrives in the SYN-SENT state
+ * check SEQ.ACK first as described on page 66 of RFC 793, section 3.9.
+ */
+ if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
+ (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
- return (1);
- }
+ return (1);
+ }
in_recovery = IN_RECOVERY(tp->t_flags);
if (tiwin > bbr->r_ctl.rc_high_rwnd)
bbr->r_ctl.rc_high_rwnd = tiwin;
@@ -11786,8 +11796,6 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap
* own bin
*/
#ifdef NETFLIX_STATS
- tp->t_sndtlppack++;
- tp->t_sndtlpbyte += len;
KMOD_TCPSTAT_INC(tcps_tlpresends);
KMOD_TCPSTAT_ADD(tcps_tlpresend_bytes, len);
#endif
@@ -13741,6 +13749,7 @@ out:
* retransmit. In persist state, just set snd_max.
*/
if (error == 0) {
+ tcp_account_for_send(tp, len, (rsm != NULL), doing_tlp);
if (TCPS_HAVEESTABLISHED(tp->t_state) &&
(tp->t_flags & TF_SACK_PERMIT) &&
tp->rcv_numsacks > 0)
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index bcf3dcc0e38a..f9ba67088f7a 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -10219,7 +10219,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
sbappendstream_locked(&so->so_rcv, m, 0);
rack_log_wakeup(tp,rack, &so->so_rcv, tlen, 1);
- tp->t_flags |= TF_WAKESOR;
+ /* NB: sorwakeup_locked() does an implicit unlock. */
+ sorwakeup_locked(so);
#ifdef NETFLIX_SB_LIMITS
if (so->so_rcv.sb_shlim && appended != mcnt)
counter_fo_release(so->so_rcv.sb_shlim,
@@ -10236,7 +10237,11 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
thflags = tcp_reass(tp, th, &temp, &tlen, m);
tp->t_flags |= TF_ACKNOW;
-
+ if (tp->t_flags & TF_WAKESOR) {
+ tp->t_flags &= ~TF_WAKESOR;
+ /* NB: sorwakeup_locked() does an implicit unlock. */
+ sorwakeup_locked(so);
+ }
}
if ((tp->t_flags & TF_SACK_PERMIT) &&
(save_tlen > 0) &&
@@ -10276,7 +10281,6 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
save_start + tlen);
}
}
- tcp_handle_wakeup(tp, so);
} else {
m_freem(m);
thflags &= ~TH_FIN;
@@ -10482,7 +10486,8 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
ctf_calc_rwin(so, tp);
}
rack_log_wakeup(tp,rack, &so->so_rcv, tlen, 1);
- tp->t_flags |= TF_WAKESOR;
+ /* NB: sorwakeup_locked() does an implicit unlock. */
+ sorwakeup_locked(so);
#ifdef NETFLIX_SB_LIMITS
if (so->so_rcv.sb_shlim && mcnt != appended)
counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended);
@@ -10490,7 +10495,6 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
rack_handle_delayed_ack(tp, rack, tlen, 0);
if (tp->snd_una == tp->snd_max)
sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una);
- tcp_handle_wakeup(tp, so);
return (1);
}
@@ -11083,7 +11087,11 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (tlen == 0 && (thflags & TH_FIN) == 0) {
(void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
(struct mbuf *)0);
- tcp_handle_wakeup(tp, so);
+ if (tp->t_flags & TF_WAKESOR) {
+ tp->t_flags &= ~TF_WAKESOR;
+ /* NB: sorwakeup_locked() does an implicit unlock. */
+ sorwakeup_locked(so);
+ }
}
tp->snd_wl1 = th->th_seq - 1;
/* For syn-recv we need to possibly update the rtt */
@@ -12331,29 +12339,23 @@ rack_fini(struct tcpcb *tp, int32_t tcb_is_purged)
rack = (struct tcp_rack *)tp->t_fb_ptr;
if (tp->t_in_pkt) {
/*
- * Since we are switching we need to process any
- * inbound packets in case a compressed ack is
- * in queue or the new stack does not support
- * mbuf queuing. These packets in theory should
- * have been handled by the old stack anyway.
+ * It is unsafe to process the packets since a
+ * reset may be lurking in them (its rare but it
+ * can occur). If we were to find a RST, then we
+ * would end up dropping the connection and the
+ * INP lock, so when we return the caller (tcp_usrreq)
+ * will blow up when it trys to unlock the inp.
*/
- if ((rack->rc_inp->inp_flags & (INP_DROPPED|INP_TIMEWAIT)) ||
- (rack->rc_inp->inp_flags2 & INP_FREED)) {
- /* Kill all the packets */
- struct mbuf *save, *m;
-
- m = tp->t_in_pkt;
- tp->t_in_pkt = NULL;
- tp->t_tail_pkt = NULL;
- while (m) {
- save = m->m_nextpkt;
- m->m_nextpkt = NULL;
- m_freem(m);
- m = save;
- }
- } else {
- /* Process all the packets */
- ctf_do_queued_segments(rack->rc_inp->inp_socket, rack->rc_tp, 0);
+ struct mbuf *save, *m;
+
+ m = tp->t_in_pkt;
+ tp->t_in_pkt = NULL;
+ tp->t_tail_pkt = NULL;
+ while (m) {
+ save = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ m_freem(m);
+ m = save;
}
if ((tp->t_inpcb) &&
(tp->t_inpcb->inp_flags2 & INP_MBUF_ACKCMP))
@@ -13995,7 +13997,6 @@ rack_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
}
if (rack_do_segment_nounlock(m, th, so, tp,
drop_hdrlen, tlen, iptos, 0, &tv) == 0) {
- tcp_handle_wakeup(tp, so);
INP_WUNLOCK(tp->t_inpcb);
}
}
@@ -15402,6 +15403,8 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
rack->rc_tlp_in_progress = 1;
rack->r_ctl.rc_tlp_cnt_out++;
}
+ if (error == 0)
+ tcp_account_for_send(tp, len, 1, doing_tlp);
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
rack->forced_ack = 0; /* If we send something zap the FA flag */
if (IN_FASTRECOVERY(tp->t_flags) && rsm)
@@ -15880,6 +15883,9 @@ again:
rack_log_progress_event(rack, tp, ticks, PROGRESS_START, __LINE__);
tp->t_acktime = ticks;
}
+ if (error == 0)
+ tcp_account_for_send(tp, len, 0, 0);
+
rack->forced_ack = 0; /* If we send something zap the FA flag */
tot_len += len;
if ((tp->t_flags & TF_GPUTINPROG) == 0)
@@ -16320,8 +16326,6 @@ again:
tlen = rsm->r_end - rsm->r_start;
if (tlen > segsiz)
tlen = segsiz;
- tp->t_sndtlppack++;
- tp->t_sndtlpbyte += tlen;
KASSERT(SEQ_LEQ(tp->snd_una, rsm->r_start),
("%s:%d: r.start:%u < SND.UNA:%u; tp:%p, rack:%p, rsm:%p",
__func__, __LINE__,
@@ -18107,6 +18111,7 @@ out:
* retransmit. In persist state, just set snd_max.
*/
if (error == 0) {
+ tcp_account_for_send(tp, len, (rsm != NULL), doing_tlp);
rack->forced_ack = 0; /* If we send something zap the FA flag */
if (rsm && (doing_tlp == 0)) {
/* Set we retransmitted */
@@ -18151,8 +18156,6 @@ out:
if (doing_tlp && (rsm == NULL)) {
/* New send doing a TLP */
add_flag |= RACK_TLP;
- tp->t_sndtlppack++;
- tp->t_sndtlpbyte += len;
}
rack_log_output(tp, &to, len, rack_seq, (uint8_t) flags, error,
rack_to_usec_ts(&tv),
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 1858d0146ce5..367dd9765534 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -261,6 +261,8 @@ struct tcpcb {
uint32_t t_maxpeakrate; /* max peak rate set by user, in bytes/s */
uint32_t t_sndtlppack; /* tail loss probe packets sent */
uint64_t t_sndtlpbyte; /* total tail loss probe bytes sent */
+ uint64_t t_sndbytes; /* total bytes sent */
+ uint64_t t_snd_rxt_bytes; /* total bytes retransmitted */
uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */
uint32_t t_end_info_status; /* Status flag of end info */
@@ -1128,6 +1130,22 @@ tcp_fields_to_net(struct tcphdr *th)
th->th_win = htons(th->th_win);
th->th_urp = htons(th->th_urp);
}
+
+static inline void
+tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt, uint8_t is_tlp)
+{
+ if (is_tlp) {
+ tp->t_sndtlppack++;
+ tp->t_sndtlpbyte += len;
+ }
+ /* To get total bytes sent you must add t_snd_rxt_bytes to t_sndbytes */
+ if (is_rxt)
+ tp->t_snd_rxt_bytes += len;
+ else
+ tp->t_sndbytes += len;
+
+}
+
#endif /* _KERNEL */
#endif /* _NETINET_TCP_VAR_H_ */