diff options
Diffstat (limited to 'sys/netinet/tcp_input.c')
-rw-r--r-- | sys/netinet/tcp_input.c | 497 |
1 files changed, 234 insertions, 263 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 6492495dc583..d5dc516c28aa 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -2562,299 +2562,270 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, hhook_run_tcp_est_in(tp, th, &to); #endif - if (SEQ_LEQ(th->th_ack, tp->snd_una)) { - maxseg = tcp_maxseg(tp); - if (no_data && - (tiwin == tp->snd_wnd || - (tp->t_flags & TF_SACK_PERMIT))) { + if (SEQ_LT(th->th_ack, tp->snd_una)) { + /* This is old ACK information, don't process it. */ + break; + } + if (th->th_ack == tp->snd_una) { + /* Check if this is a duplicate ACK. */ + if ((tp->t_flags & TF_SACK_PERMIT) && + V_tcp_do_newsack) { /* - * If this is the first time we've seen a - * FIN from the remote, this is not a - * duplicate and it needs to be processed - * normally. This happens during a - * simultaneous close. + * If SEG.ACK == SND.UNA, RFC 6675 requires a + * duplicate ACK to selectively acknowledge + * at least one byte, which was not selectively + * acknowledged before. */ - if ((thflags & TH_FIN) && - (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { - tp->t_dupacks = 0; + if (sack_changed == SACK_NOCHANGE) { break; } - TCPSTAT_INC(tcps_rcvdupack); - /* - * If we have outstanding data (other than - * a window probe), this is a completely - * duplicate ack (ie, window info didn't - * change and FIN isn't set), - * the ack is the biggest we've - * seen and we've seen exactly our rexmt - * threshold of them, assume a packet - * has been dropped and retransmit it. - * Kludge snd_nxt & the congestion - * window so we send only this one - * packet. - * - * We know we're losing at the current - * window size so do congestion avoidance - * (set ssthresh to half the current window - * and pull our congestion window back to - * the new ssthresh). - * - * Dup acks mean that packets have left the - * network (they're now cached at the receiver) - * so bump cwnd by the amount in the receiver - * to keep a constant cwnd packets in the - * network. - * - * When using TCP ECN, notify the peer that - * we reduced the cwnd. - */ + } else { /* - * Following 2 kinds of acks should not affect - * dupack counting: - * 1) Old acks - * 2) Acks with SACK but without any new SACK - * information in them. These could result from - * any anomaly in the network like a switch - * duplicating packets or a possible DoS attack. + * If SEG.ACK == SND.UNA, RFC 5681 requires a + * duplicate ACK to have no data on it and to + * not be a window update. */ - if (th->th_ack != tp->snd_una || - (tcp_is_sack_recovery(tp, &to) && - (sack_changed == SACK_NOCHANGE))) { + if (!no_data || tiwin != tp->snd_wnd) { break; - } else if (!tcp_timer_active(tp, TT_REXMT)) { - tp->t_dupacks = 0; - } else if (++tp->t_dupacks > tcprexmtthresh || - IN_FASTRECOVERY(tp->t_flags)) { - cc_ack_received(tp, th, nsegs, - CC_DUPACK); - if (V_tcp_do_prr && + } + } + /* + * If this is the first time we've seen a + * FIN from the remote, this is not a + * duplicate ACK and it needs to be processed + * normally. + * This happens during a simultaneous close. + */ + if ((thflags & TH_FIN) && + (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { + tp->t_dupacks = 0; + break; + } + /* Perform duplicate ACK processing. */ + TCPSTAT_INC(tcps_rcvdupack); + maxseg = tcp_maxseg(tp); + if (!tcp_timer_active(tp, TT_REXMT)) { + tp->t_dupacks = 0; + } else if (++tp->t_dupacks > tcprexmtthresh || + IN_FASTRECOVERY(tp->t_flags)) { + cc_ack_received(tp, th, nsegs, CC_DUPACK); + if (V_tcp_do_prr && + IN_FASTRECOVERY(tp->t_flags) && + (tp->t_flags & TF_SACK_PERMIT)) { + tcp_do_prr_ack(tp, th, &to, + sack_changed, &maxseg); + } else if (tcp_is_sack_recovery(tp, &to) && IN_FASTRECOVERY(tp->t_flags) && - (tp->t_flags & TF_SACK_PERMIT)) { - tcp_do_prr_ack(tp, th, &to, - sack_changed, &maxseg); - } else if (tcp_is_sack_recovery(tp, &to) && - IN_FASTRECOVERY(tp->t_flags) && - (tp->snd_nxt == tp->snd_max)) { - int awnd; + (tp->snd_nxt == tp->snd_max)) { + int awnd; - /* - * Compute the amount of data in flight first. - * We can inject new data into the pipe iff - * we have less than ssthresh - * worth of data in flight. - */ - awnd = tcp_compute_pipe(tp); - if (awnd < tp->snd_ssthresh) { - tp->snd_cwnd += imax(maxseg, - imin(2 * maxseg, - tp->sackhint.delivered_data)); - if (tp->snd_cwnd > tp->snd_ssthresh) - tp->snd_cwnd = tp->snd_ssthresh; - } - } else if (tcp_is_sack_recovery(tp, &to) && - IN_FASTRECOVERY(tp->t_flags) && - SEQ_LT(tp->snd_nxt, tp->snd_max)) { + /* + * Compute the amount of data in flight first. + * We can inject new data into the pipe iff + * we have less than ssthresh + * worth of data in flight. + */ + awnd = tcp_compute_pipe(tp); + if (awnd < tp->snd_ssthresh) { tp->snd_cwnd += imax(maxseg, imin(2 * maxseg, tp->sackhint.delivered_data)); - } else { - tp->snd_cwnd += maxseg; + if (tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; } - (void) tcp_output(tp); - goto drop; - } else if (tp->t_dupacks == tcprexmtthresh || - (tp->t_flags & TF_SACK_PERMIT && - V_tcp_do_newsack && - tp->sackhint.sacked_bytes > - (tcprexmtthresh - 1) * maxseg)) { + } else if (tcp_is_sack_recovery(tp, &to) && + IN_FASTRECOVERY(tp->t_flags) && + SEQ_LT(tp->snd_nxt, tp->snd_max)) { + tp->snd_cwnd += imax(maxseg, + imin(2 * maxseg, + tp->sackhint.delivered_data)); + } else { + tp->snd_cwnd += maxseg; + } + (void) tcp_output(tp); + goto drop; + } else if (tp->t_dupacks == tcprexmtthresh || + (tp->t_flags & TF_SACK_PERMIT && + V_tcp_do_newsack && + tp->sackhint.sacked_bytes > + (tcprexmtthresh - 1) * maxseg)) { enter_recovery: - /* - * Above is the RFC6675 trigger condition of - * more than (dupthresh-1)*maxseg sacked data. - * If the count of holes in the - * scoreboard is >= dupthresh, we could - * also enter loss recovery, but don't - * have that value readily available. - */ - tp->t_dupacks = tcprexmtthresh; - tcp_seq onxt = tp->snd_nxt; + /* + * Above is the RFC6675 trigger condition of + * more than (dupthresh-1)*maxseg sacked data. + * If the count of holes in the + * scoreboard is >= dupthresh, we could + * also enter loss recovery, but don't + * have that value readily available. + */ + tp->t_dupacks = tcprexmtthresh; + tcp_seq onxt = tp->snd_nxt; - /* - * If we're doing sack, check to - * see if we're already in sack - * recovery. If we're not doing sack, - * check to see if we're in newreno - * recovery. - */ - if (tcp_is_sack_recovery(tp, &to)) { - if (IN_FASTRECOVERY(tp->t_flags)) { - tp->t_dupacks = 0; - break; - } - } else { - if (SEQ_LEQ(th->th_ack, - tp->snd_recover)) { - tp->t_dupacks = 0; - break; - } + /* + * If we're doing sack, check to + * see if we're already in sack + * recovery. If we're not doing sack, + * check to see if we're in newreno + * recovery. + */ + if (tcp_is_sack_recovery(tp, &to)) { + if (IN_FASTRECOVERY(tp->t_flags)) { + tp->t_dupacks = 0; + break; } - /* Congestion signal before ack. */ - cc_cong_signal(tp, th, CC_NDUPACK); - cc_ack_received(tp, th, nsegs, - CC_DUPACK); - tcp_timer_activate(tp, TT_REXMT, 0); - tp->t_rtttime = 0; - if (V_tcp_do_prr) { - /* - * snd_ssthresh and snd_recover are - * already updated by cc_cong_signal. - */ - if (tcp_is_sack_recovery(tp, &to)) { - /* - * Include Limited Transmit - * segments here - */ - tp->sackhint.prr_delivered = - imin(tp->snd_max - th->th_ack, - (tp->snd_limited + 1) * maxseg); - } else { - tp->sackhint.prr_delivered = - maxseg; - } - tp->sackhint.recover_fs = max(1, - tp->snd_nxt - tp->snd_una); + } else { + if (SEQ_LEQ(th->th_ack, + tp->snd_recover)) { + tp->t_dupacks = 0; + break; } - tp->snd_limited = 0; + } + /* Congestion signal before ack. */ + cc_cong_signal(tp, th, CC_NDUPACK); + cc_ack_received(tp, th, nsegs, CC_DUPACK); + tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rtttime = 0; + if (V_tcp_do_prr) { + /* + * snd_ssthresh and snd_recover are + * already updated by cc_cong_signal. + */ if (tcp_is_sack_recovery(tp, &to)) { - TCPSTAT_INC(tcps_sack_recovery_episode); /* - * When entering LR after RTO due to - * Duplicate ACKs, retransmit existing - * holes from the scoreboard. + * Include Limited Transmit + * segments here */ - tcp_resend_sackholes(tp); - /* Avoid inflating cwnd in tcp_output */ - tp->snd_nxt = tp->snd_max; - tp->snd_cwnd = tcp_compute_pipe(tp) + + tp->sackhint.prr_delivered = + imin(tp->snd_max - th->th_ack, + (tp->snd_limited + 1) * maxseg); + } else { + tp->sackhint.prr_delivered = maxseg; - (void) tcp_output(tp); - /* Set cwnd to the expected flightsize */ - tp->snd_cwnd = tp->snd_ssthresh; - if (SEQ_GT(th->th_ack, tp->snd_una)) { - goto resume_partialack; - } - goto drop; } - tp->snd_nxt = th->th_ack; - tp->snd_cwnd = maxseg; - (void) tcp_output(tp); - KASSERT(tp->snd_limited <= 2, - ("%s: tp->snd_limited too big", - __func__)); - tp->snd_cwnd = tp->snd_ssthresh + - maxseg * - (tp->t_dupacks - tp->snd_limited); - if (SEQ_GT(onxt, tp->snd_nxt)) - tp->snd_nxt = onxt; - goto drop; - } else if (V_tcp_do_rfc3042) { - /* - * Process first and second duplicate - * ACKs. Each indicates a segment - * leaving the network, creating room - * for more. Make sure we can send a - * packet on reception of each duplicate - * ACK by increasing snd_cwnd by one - * segment. Restore the original - * snd_cwnd after packet transmission. - */ - cc_ack_received(tp, th, nsegs, CC_DUPACK); - uint32_t oldcwnd = tp->snd_cwnd; - tcp_seq oldsndmax = tp->snd_max; - u_int sent; - int avail; - - KASSERT(tp->t_dupacks == 1 || - tp->t_dupacks == 2, - ("%s: dupacks not 1 or 2", - __func__)); - if (tp->t_dupacks == 1) - tp->snd_limited = 0; - if ((tp->snd_nxt == tp->snd_max) && - (tp->t_rxtshift == 0)) - tp->snd_cwnd = - SEQ_SUB(tp->snd_nxt, - tp->snd_una) - - tcp_sack_adjust(tp); - tp->snd_cwnd += - (tp->t_dupacks - tp->snd_limited) * - maxseg - tcp_sack_adjust(tp); + tp->sackhint.recover_fs = max(1, + tp->snd_nxt - tp->snd_una); + } + tp->snd_limited = 0; + if (tcp_is_sack_recovery(tp, &to)) { + TCPSTAT_INC(tcps_sack_recovery_episode); /* - * Only call tcp_output when there - * is new data available to be sent - * or we need to send an ACK. + * When entering LR after RTO due to + * Duplicate ACKs, retransmit existing + * holes from the scoreboard. */ - SOCK_SENDBUF_LOCK(so); - avail = sbavail(&so->so_snd); - SOCK_SENDBUF_UNLOCK(so); - if (tp->t_flags & TF_ACKNOW || - (avail >= - SEQ_SUB(tp->snd_nxt, tp->snd_una))) { - (void) tcp_output(tp); - } - sent = SEQ_SUB(tp->snd_max, oldsndmax); - if (sent > maxseg) { - KASSERT((tp->t_dupacks == 2 && - tp->snd_limited == 0) || - (sent == maxseg + 1 && - tp->t_flags & TF_SENTFIN) || - (sent < 2 * maxseg && - tp->t_flags & TF_NODELAY), - ("%s: sent too much: %u>%u", - __func__, sent, maxseg)); - tp->snd_limited = 2; - } else if (sent > 0) { - ++tp->snd_limited; - } - tp->snd_cwnd = oldcwnd; + tcp_resend_sackholes(tp); + /* Avoid inflating cwnd in tcp_output */ + tp->snd_nxt = tp->snd_max; + tp->snd_cwnd = tcp_compute_pipe(tp) + + maxseg; + (void) tcp_output(tp); + /* Set cwnd to the expected flightsize */ + tp->snd_cwnd = tp->snd_ssthresh; goto drop; } - } - break; - } else { - /* - * This ack is advancing the left edge, reset the - * counter. - */ - tp->t_dupacks = 0; - /* - * If this ack also has new SACK info, increment the - * counter as per rfc6675. The variable - * sack_changed tracks all changes to the SACK - * scoreboard, including when partial ACKs without - * SACK options are received, and clear the scoreboard - * from the left side. Such partial ACKs should not be - * counted as dupacks here. - */ - if (tcp_is_sack_recovery(tp, &to) && - (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || - ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && - (tp->snd_nxt == tp->snd_max)) { - tp->t_dupacks++; - /* limit overhead by setting maxseg last */ - if (!IN_FASTRECOVERY(tp->t_flags) && - (tp->sackhint.sacked_bytes > - ((tcprexmtthresh - 1) * - (maxseg = tcp_maxseg(tp))))) { - goto enter_recovery; + tp->snd_nxt = th->th_ack; + tp->snd_cwnd = maxseg; + (void) tcp_output(tp); + KASSERT(tp->snd_limited <= 2, + ("%s: tp->snd_limited too big", + __func__)); + tp->snd_cwnd = tp->snd_ssthresh + + maxseg * + (tp->t_dupacks - tp->snd_limited); + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + goto drop; + } else if (V_tcp_do_rfc3042) { + /* + * Process first and second duplicate + * ACKs. Each indicates a segment + * leaving the network, creating room + * for more. Make sure we can send a + * packet on reception of each duplicate + * ACK by increasing snd_cwnd by one + * segment. Restore the original + * snd_cwnd after packet transmission. + */ + cc_ack_received(tp, th, nsegs, CC_DUPACK); + uint32_t oldcwnd = tp->snd_cwnd; + tcp_seq oldsndmax = tp->snd_max; + u_int sent; + int avail; + + KASSERT(tp->t_dupacks == 1 || + tp->t_dupacks == 2, + ("%s: dupacks not 1 or 2", + __func__)); + if (tp->t_dupacks == 1) + tp->snd_limited = 0; + if ((tp->snd_nxt == tp->snd_max) && + (tp->t_rxtshift == 0)) + tp->snd_cwnd = + SEQ_SUB(tp->snd_nxt, tp->snd_una); + tp->snd_cwnd += + (tp->t_dupacks - tp->snd_limited) * maxseg; + tp->snd_cwnd -= tcp_sack_adjust(tp); + /* + * Only call tcp_output when there + * is new data available to be sent + * or we need to send an ACK. + */ + SOCK_SENDBUF_LOCK(so); + avail = sbavail(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); + if (tp->t_flags & TF_ACKNOW || + (avail >= + SEQ_SUB(tp->snd_nxt, tp->snd_una))) { + (void) tcp_output(tp); + } + sent = SEQ_SUB(tp->snd_max, oldsndmax); + if (sent > maxseg) { + KASSERT((tp->t_dupacks == 2 && + tp->snd_limited == 0) || + (sent == maxseg + 1 && + tp->t_flags & TF_SENTFIN) || + (sent < 2 * maxseg && + tp->t_flags & TF_NODELAY), + ("%s: sent too much: %u>%u", + __func__, sent, maxseg)); + tp->snd_limited = 2; + } else if (sent > 0) { + ++tp->snd_limited; } + tp->snd_cwnd = oldcwnd; + goto drop; } + break; } - -resume_partialack: KASSERT(SEQ_GT(th->th_ack, tp->snd_una), - ("%s: th_ack <= snd_una", __func__)); - + ("%s: SEQ_LEQ(th_ack, snd_una)", __func__)); + /* + * This ack is advancing the left edge, reset the + * counter. + */ + tp->t_dupacks = 0; + /* + * If this ack also has new SACK info, increment the + * t_dupacks as per RFC 6675. The variable + * sack_changed tracks all changes to the SACK + * scoreboard, including when partial ACKs without + * SACK options are received, and clear the scoreboard + * from the left side. Such partial ACKs should not be + * counted as dupacks here. + */ + if (V_tcp_do_newsack && + tcp_is_sack_recovery(tp, &to) && + (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || + ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && + (tp->snd_nxt == tp->snd_max)) { + tp->t_dupacks++; + /* limit overhead by setting maxseg last */ + if (!IN_FASTRECOVERY(tp->t_flags) && + (tp->sackhint.sacked_bytes > + (tcprexmtthresh - 1) * (maxseg = tcp_maxseg(tp)))) { + goto enter_recovery; + } + } /* * If the congestion window was inflated to account * for the other side's cached packets, retract it. |