aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/netinet/tcp_input.c')
-rw-r--r--sys/netinet/tcp_input.c497
1 files changed, 234 insertions, 263 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 6492495dc583..d5dc516c28aa 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -2562,299 +2562,270 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
hhook_run_tcp_est_in(tp, th, &to);
#endif
- if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
- maxseg = tcp_maxseg(tp);
- if (no_data &&
- (tiwin == tp->snd_wnd ||
- (tp->t_flags & TF_SACK_PERMIT))) {
+ if (SEQ_LT(th->th_ack, tp->snd_una)) {
+ /* This is old ACK information, don't process it. */
+ break;
+ }
+ if (th->th_ack == tp->snd_una) {
+ /* Check if this is a duplicate ACK. */
+ if ((tp->t_flags & TF_SACK_PERMIT) &&
+ V_tcp_do_newsack) {
/*
- * If this is the first time we've seen a
- * FIN from the remote, this is not a
- * duplicate and it needs to be processed
- * normally. This happens during a
- * simultaneous close.
+ * If SEG.ACK == SND.UNA, RFC 6675 requires a
+ * duplicate ACK to selectively acknowledge
+ * at least one byte, which was not selectively
+ * acknowledged before.
*/
- if ((thflags & TH_FIN) &&
- (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
- tp->t_dupacks = 0;
+ if (sack_changed == SACK_NOCHANGE) {
break;
}
- TCPSTAT_INC(tcps_rcvdupack);
- /*
- * If we have outstanding data (other than
- * a window probe), this is a completely
- * duplicate ack (ie, window info didn't
- * change and FIN isn't set),
- * the ack is the biggest we've
- * seen and we've seen exactly our rexmt
- * threshold of them, assume a packet
- * has been dropped and retransmit it.
- * Kludge snd_nxt & the congestion
- * window so we send only this one
- * packet.
- *
- * We know we're losing at the current
- * window size so do congestion avoidance
- * (set ssthresh to half the current window
- * and pull our congestion window back to
- * the new ssthresh).
- *
- * Dup acks mean that packets have left the
- * network (they're now cached at the receiver)
- * so bump cwnd by the amount in the receiver
- * to keep a constant cwnd packets in the
- * network.
- *
- * When using TCP ECN, notify the peer that
- * we reduced the cwnd.
- */
+ } else {
/*
- * Following 2 kinds of acks should not affect
- * dupack counting:
- * 1) Old acks
- * 2) Acks with SACK but without any new SACK
- * information in them. These could result from
- * any anomaly in the network like a switch
- * duplicating packets or a possible DoS attack.
+ * If SEG.ACK == SND.UNA, RFC 5681 requires a
+ * duplicate ACK to have no data on it and to
+ * not be a window update.
*/
- if (th->th_ack != tp->snd_una ||
- (tcp_is_sack_recovery(tp, &to) &&
- (sack_changed == SACK_NOCHANGE))) {
+ if (!no_data || tiwin != tp->snd_wnd) {
break;
- } else if (!tcp_timer_active(tp, TT_REXMT)) {
- tp->t_dupacks = 0;
- } else if (++tp->t_dupacks > tcprexmtthresh ||
- IN_FASTRECOVERY(tp->t_flags)) {
- cc_ack_received(tp, th, nsegs,
- CC_DUPACK);
- if (V_tcp_do_prr &&
+ }
+ }
+ /*
+ * If this is the first time we've seen a
+ * FIN from the remote, this is not a
+ * duplicate ACK and it needs to be processed
+ * normally.
+ * This happens during a simultaneous close.
+ */
+ if ((thflags & TH_FIN) &&
+ (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
+ tp->t_dupacks = 0;
+ break;
+ }
+ /* Perform duplicate ACK processing. */
+ TCPSTAT_INC(tcps_rcvdupack);
+ maxseg = tcp_maxseg(tp);
+ if (!tcp_timer_active(tp, TT_REXMT)) {
+ tp->t_dupacks = 0;
+ } else if (++tp->t_dupacks > tcprexmtthresh ||
+ IN_FASTRECOVERY(tp->t_flags)) {
+ cc_ack_received(tp, th, nsegs, CC_DUPACK);
+ if (V_tcp_do_prr &&
+ IN_FASTRECOVERY(tp->t_flags) &&
+ (tp->t_flags & TF_SACK_PERMIT)) {
+ tcp_do_prr_ack(tp, th, &to,
+ sack_changed, &maxseg);
+ } else if (tcp_is_sack_recovery(tp, &to) &&
IN_FASTRECOVERY(tp->t_flags) &&
- (tp->t_flags & TF_SACK_PERMIT)) {
- tcp_do_prr_ack(tp, th, &to,
- sack_changed, &maxseg);
- } else if (tcp_is_sack_recovery(tp, &to) &&
- IN_FASTRECOVERY(tp->t_flags) &&
- (tp->snd_nxt == tp->snd_max)) {
- int awnd;
+ (tp->snd_nxt == tp->snd_max)) {
+ int awnd;
- /*
- * Compute the amount of data in flight first.
- * We can inject new data into the pipe iff
- * we have less than ssthresh
- * worth of data in flight.
- */
- awnd = tcp_compute_pipe(tp);
- if (awnd < tp->snd_ssthresh) {
- tp->snd_cwnd += imax(maxseg,
- imin(2 * maxseg,
- tp->sackhint.delivered_data));
- if (tp->snd_cwnd > tp->snd_ssthresh)
- tp->snd_cwnd = tp->snd_ssthresh;
- }
- } else if (tcp_is_sack_recovery(tp, &to) &&
- IN_FASTRECOVERY(tp->t_flags) &&
- SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ /*
+ * Compute the amount of data in flight first.
+ * We can inject new data into the pipe iff
+ * we have less than ssthresh
+ * worth of data in flight.
+ */
+ awnd = tcp_compute_pipe(tp);
+ if (awnd < tp->snd_ssthresh) {
tp->snd_cwnd += imax(maxseg,
imin(2 * maxseg,
tp->sackhint.delivered_data));
- } else {
- tp->snd_cwnd += maxseg;
+ if (tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd = tp->snd_ssthresh;
}
- (void) tcp_output(tp);
- goto drop;
- } else if (tp->t_dupacks == tcprexmtthresh ||
- (tp->t_flags & TF_SACK_PERMIT &&
- V_tcp_do_newsack &&
- tp->sackhint.sacked_bytes >
- (tcprexmtthresh - 1) * maxseg)) {
+ } else if (tcp_is_sack_recovery(tp, &to) &&
+ IN_FASTRECOVERY(tp->t_flags) &&
+ SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ tp->snd_cwnd += imax(maxseg,
+ imin(2 * maxseg,
+ tp->sackhint.delivered_data));
+ } else {
+ tp->snd_cwnd += maxseg;
+ }
+ (void) tcp_output(tp);
+ goto drop;
+ } else if (tp->t_dupacks == tcprexmtthresh ||
+ (tp->t_flags & TF_SACK_PERMIT &&
+ V_tcp_do_newsack &&
+ tp->sackhint.sacked_bytes >
+ (tcprexmtthresh - 1) * maxseg)) {
enter_recovery:
- /*
- * Above is the RFC6675 trigger condition of
- * more than (dupthresh-1)*maxseg sacked data.
- * If the count of holes in the
- * scoreboard is >= dupthresh, we could
- * also enter loss recovery, but don't
- * have that value readily available.
- */
- tp->t_dupacks = tcprexmtthresh;
- tcp_seq onxt = tp->snd_nxt;
+ /*
+ * Above is the RFC6675 trigger condition of
+ * more than (dupthresh-1)*maxseg sacked data.
+ * If the count of holes in the
+ * scoreboard is >= dupthresh, we could
+ * also enter loss recovery, but don't
+ * have that value readily available.
+ */
+ tp->t_dupacks = tcprexmtthresh;
+ tcp_seq onxt = tp->snd_nxt;
- /*
- * If we're doing sack, check to
- * see if we're already in sack
- * recovery. If we're not doing sack,
- * check to see if we're in newreno
- * recovery.
- */
- if (tcp_is_sack_recovery(tp, &to)) {
- if (IN_FASTRECOVERY(tp->t_flags)) {
- tp->t_dupacks = 0;
- break;
- }
- } else {
- if (SEQ_LEQ(th->th_ack,
- tp->snd_recover)) {
- tp->t_dupacks = 0;
- break;
- }
+ /*
+ * If we're doing sack, check to
+ * see if we're already in sack
+ * recovery. If we're not doing sack,
+ * check to see if we're in newreno
+ * recovery.
+ */
+ if (tcp_is_sack_recovery(tp, &to)) {
+ if (IN_FASTRECOVERY(tp->t_flags)) {
+ tp->t_dupacks = 0;
+ break;
}
- /* Congestion signal before ack. */
- cc_cong_signal(tp, th, CC_NDUPACK);
- cc_ack_received(tp, th, nsegs,
- CC_DUPACK);
- tcp_timer_activate(tp, TT_REXMT, 0);
- tp->t_rtttime = 0;
- if (V_tcp_do_prr) {
- /*
- * snd_ssthresh and snd_recover are
- * already updated by cc_cong_signal.
- */
- if (tcp_is_sack_recovery(tp, &to)) {
- /*
- * Include Limited Transmit
- * segments here
- */
- tp->sackhint.prr_delivered =
- imin(tp->snd_max - th->th_ack,
- (tp->snd_limited + 1) * maxseg);
- } else {
- tp->sackhint.prr_delivered =
- maxseg;
- }
- tp->sackhint.recover_fs = max(1,
- tp->snd_nxt - tp->snd_una);
+ } else {
+ if (SEQ_LEQ(th->th_ack,
+ tp->snd_recover)) {
+ tp->t_dupacks = 0;
+ break;
}
- tp->snd_limited = 0;
+ }
+ /* Congestion signal before ack. */
+ cc_cong_signal(tp, th, CC_NDUPACK);
+ cc_ack_received(tp, th, nsegs, CC_DUPACK);
+ tcp_timer_activate(tp, TT_REXMT, 0);
+ tp->t_rtttime = 0;
+ if (V_tcp_do_prr) {
+ /*
+ * snd_ssthresh and snd_recover are
+ * already updated by cc_cong_signal.
+ */
if (tcp_is_sack_recovery(tp, &to)) {
- TCPSTAT_INC(tcps_sack_recovery_episode);
/*
- * When entering LR after RTO due to
- * Duplicate ACKs, retransmit existing
- * holes from the scoreboard.
+ * Include Limited Transmit
+ * segments here
*/
- tcp_resend_sackholes(tp);
- /* Avoid inflating cwnd in tcp_output */
- tp->snd_nxt = tp->snd_max;
- tp->snd_cwnd = tcp_compute_pipe(tp) +
+ tp->sackhint.prr_delivered =
+ imin(tp->snd_max - th->th_ack,
+ (tp->snd_limited + 1) * maxseg);
+ } else {
+ tp->sackhint.prr_delivered =
maxseg;
- (void) tcp_output(tp);
- /* Set cwnd to the expected flightsize */
- tp->snd_cwnd = tp->snd_ssthresh;
- if (SEQ_GT(th->th_ack, tp->snd_una)) {
- goto resume_partialack;
- }
- goto drop;
}
- tp->snd_nxt = th->th_ack;
- tp->snd_cwnd = maxseg;
- (void) tcp_output(tp);
- KASSERT(tp->snd_limited <= 2,
- ("%s: tp->snd_limited too big",
- __func__));
- tp->snd_cwnd = tp->snd_ssthresh +
- maxseg *
- (tp->t_dupacks - tp->snd_limited);
- if (SEQ_GT(onxt, tp->snd_nxt))
- tp->snd_nxt = onxt;
- goto drop;
- } else if (V_tcp_do_rfc3042) {
- /*
- * Process first and second duplicate
- * ACKs. Each indicates a segment
- * leaving the network, creating room
- * for more. Make sure we can send a
- * packet on reception of each duplicate
- * ACK by increasing snd_cwnd by one
- * segment. Restore the original
- * snd_cwnd after packet transmission.
- */
- cc_ack_received(tp, th, nsegs, CC_DUPACK);
- uint32_t oldcwnd = tp->snd_cwnd;
- tcp_seq oldsndmax = tp->snd_max;
- u_int sent;
- int avail;
-
- KASSERT(tp->t_dupacks == 1 ||
- tp->t_dupacks == 2,
- ("%s: dupacks not 1 or 2",
- __func__));
- if (tp->t_dupacks == 1)
- tp->snd_limited = 0;
- if ((tp->snd_nxt == tp->snd_max) &&
- (tp->t_rxtshift == 0))
- tp->snd_cwnd =
- SEQ_SUB(tp->snd_nxt,
- tp->snd_una) -
- tcp_sack_adjust(tp);
- tp->snd_cwnd +=
- (tp->t_dupacks - tp->snd_limited) *
- maxseg - tcp_sack_adjust(tp);
+ tp->sackhint.recover_fs = max(1,
+ tp->snd_nxt - tp->snd_una);
+ }
+ tp->snd_limited = 0;
+ if (tcp_is_sack_recovery(tp, &to)) {
+ TCPSTAT_INC(tcps_sack_recovery_episode);
/*
- * Only call tcp_output when there
- * is new data available to be sent
- * or we need to send an ACK.
+ * When entering LR after RTO due to
+ * Duplicate ACKs, retransmit existing
+ * holes from the scoreboard.
*/
- SOCK_SENDBUF_LOCK(so);
- avail = sbavail(&so->so_snd);
- SOCK_SENDBUF_UNLOCK(so);
- if (tp->t_flags & TF_ACKNOW ||
- (avail >=
- SEQ_SUB(tp->snd_nxt, tp->snd_una))) {
- (void) tcp_output(tp);
- }
- sent = SEQ_SUB(tp->snd_max, oldsndmax);
- if (sent > maxseg) {
- KASSERT((tp->t_dupacks == 2 &&
- tp->snd_limited == 0) ||
- (sent == maxseg + 1 &&
- tp->t_flags & TF_SENTFIN) ||
- (sent < 2 * maxseg &&
- tp->t_flags & TF_NODELAY),
- ("%s: sent too much: %u>%u",
- __func__, sent, maxseg));
- tp->snd_limited = 2;
- } else if (sent > 0) {
- ++tp->snd_limited;
- }
- tp->snd_cwnd = oldcwnd;
+ tcp_resend_sackholes(tp);
+ /* Avoid inflating cwnd in tcp_output */
+ tp->snd_nxt = tp->snd_max;
+ tp->snd_cwnd = tcp_compute_pipe(tp) +
+ maxseg;
+ (void) tcp_output(tp);
+ /* Set cwnd to the expected flightsize */
+ tp->snd_cwnd = tp->snd_ssthresh;
goto drop;
}
- }
- break;
- } else {
- /*
- * This ack is advancing the left edge, reset the
- * counter.
- */
- tp->t_dupacks = 0;
- /*
- * If this ack also has new SACK info, increment the
- * counter as per rfc6675. The variable
- * sack_changed tracks all changes to the SACK
- * scoreboard, including when partial ACKs without
- * SACK options are received, and clear the scoreboard
- * from the left side. Such partial ACKs should not be
- * counted as dupacks here.
- */
- if (tcp_is_sack_recovery(tp, &to) &&
- (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) ||
- ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) &&
- (tp->snd_nxt == tp->snd_max)) {
- tp->t_dupacks++;
- /* limit overhead by setting maxseg last */
- if (!IN_FASTRECOVERY(tp->t_flags) &&
- (tp->sackhint.sacked_bytes >
- ((tcprexmtthresh - 1) *
- (maxseg = tcp_maxseg(tp))))) {
- goto enter_recovery;
+ tp->snd_nxt = th->th_ack;
+ tp->snd_cwnd = maxseg;
+ (void) tcp_output(tp);
+ KASSERT(tp->snd_limited <= 2,
+ ("%s: tp->snd_limited too big",
+ __func__));
+ tp->snd_cwnd = tp->snd_ssthresh +
+ maxseg *
+ (tp->t_dupacks - tp->snd_limited);
+ if (SEQ_GT(onxt, tp->snd_nxt))
+ tp->snd_nxt = onxt;
+ goto drop;
+ } else if (V_tcp_do_rfc3042) {
+ /*
+ * Process first and second duplicate
+ * ACKs. Each indicates a segment
+ * leaving the network, creating room
+ * for more. Make sure we can send a
+ * packet on reception of each duplicate
+ * ACK by increasing snd_cwnd by one
+ * segment. Restore the original
+ * snd_cwnd after packet transmission.
+ */
+ cc_ack_received(tp, th, nsegs, CC_DUPACK);
+ uint32_t oldcwnd = tp->snd_cwnd;
+ tcp_seq oldsndmax = tp->snd_max;
+ u_int sent;
+ int avail;
+
+ KASSERT(tp->t_dupacks == 1 ||
+ tp->t_dupacks == 2,
+ ("%s: dupacks not 1 or 2",
+ __func__));
+ if (tp->t_dupacks == 1)
+ tp->snd_limited = 0;
+ if ((tp->snd_nxt == tp->snd_max) &&
+ (tp->t_rxtshift == 0))
+ tp->snd_cwnd =
+ SEQ_SUB(tp->snd_nxt, tp->snd_una);
+ tp->snd_cwnd +=
+ (tp->t_dupacks - tp->snd_limited) * maxseg;
+ tp->snd_cwnd -= tcp_sack_adjust(tp);
+ /*
+ * Only call tcp_output when there
+ * is new data available to be sent
+ * or we need to send an ACK.
+ */
+ SOCK_SENDBUF_LOCK(so);
+ avail = sbavail(&so->so_snd);
+ SOCK_SENDBUF_UNLOCK(so);
+ if (tp->t_flags & TF_ACKNOW ||
+ (avail >=
+ SEQ_SUB(tp->snd_nxt, tp->snd_una))) {
+ (void) tcp_output(tp);
+ }
+ sent = SEQ_SUB(tp->snd_max, oldsndmax);
+ if (sent > maxseg) {
+ KASSERT((tp->t_dupacks == 2 &&
+ tp->snd_limited == 0) ||
+ (sent == maxseg + 1 &&
+ tp->t_flags & TF_SENTFIN) ||
+ (sent < 2 * maxseg &&
+ tp->t_flags & TF_NODELAY),
+ ("%s: sent too much: %u>%u",
+ __func__, sent, maxseg));
+ tp->snd_limited = 2;
+ } else if (sent > 0) {
+ ++tp->snd_limited;
}
+ tp->snd_cwnd = oldcwnd;
+ goto drop;
}
+ break;
}
-
-resume_partialack:
KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
- ("%s: th_ack <= snd_una", __func__));
-
+ ("%s: SEQ_LEQ(th_ack, snd_una)", __func__));
+ /*
+ * This ack is advancing the left edge, reset the
+ * counter.
+ */
+ tp->t_dupacks = 0;
+ /*
+ * If this ack also has new SACK info, increment the
+ * t_dupacks as per RFC 6675. The variable
+ * sack_changed tracks all changes to the SACK
+ * scoreboard, including when partial ACKs without
+ * SACK options are received, and clear the scoreboard
+ * from the left side. Such partial ACKs should not be
+ * counted as dupacks here.
+ */
+ if (V_tcp_do_newsack &&
+ tcp_is_sack_recovery(tp, &to) &&
+ (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) ||
+ ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) &&
+ (tp->snd_nxt == tp->snd_max)) {
+ tp->t_dupacks++;
+ /* limit overhead by setting maxseg last */
+ if (!IN_FASTRECOVERY(tp->t_flags) &&
+ (tp->sackhint.sacked_bytes >
+ (tcprexmtthresh - 1) * (maxseg = tcp_maxseg(tp)))) {
+ goto enter_recovery;
+ }
+ }
/*
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.