diff options
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/cc/cc.c | 2 | ||||
-rw-r--r-- | sys/netinet/in.c | 4 | ||||
-rw-r--r-- | sys/netinet/ip_output.c | 13 | ||||
-rw-r--r-- | sys/netinet/ip_var.h | 1 | ||||
-rw-r--r-- | sys/netinet/tcp_hpts.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_input.c | 497 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/bbr.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/rack.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/rack_bbr_common.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/rack_pcm.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/tailq_hash.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_syncache.c | 104 | ||||
-rw-r--r-- | sys/netinet/udp_usrreq.c | 45 |
13 files changed, 328 insertions, 350 deletions
diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c index d85ad4e9f4fd..c20a20cd983d 100644 --- a/sys/netinet/cc/cc.c +++ b/sys/netinet/cc/cc.c @@ -659,7 +659,7 @@ cc_modevent(module_t mod, int event_type, void *data) case MOD_SHUTDOWN: break; case MOD_QUIESCE: - /* Stop any new assigments */ + /* Stop any new assignments */ err = cc_stop_new_assignments(algo); break; case MOD_UNLOAD: diff --git a/sys/netinet/in.c b/sys/netinet/in.c index 963449d4b4b1..0e283a7d099d 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -522,8 +522,8 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct ucred *cred /* * Check if bridge wants to allow adding addrs to member interfaces. */ - if (ifp->if_bridge && bridge_member_ifaddrs_p && - !bridge_member_ifaddrs_p()) + if (ifp->if_bridge != NULL && ifp->if_type != IFT_GIF && + bridge_member_ifaddrs_p != NULL && !bridge_member_ifaddrs_p()) return (EINVAL); /* diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index ec6ba8d92015..ef08b9cfd3d6 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -1044,14 +1044,14 @@ done: } void -in_delayed_cksum(struct mbuf *m) +in_delayed_cksum_o(struct mbuf *m, uint16_t iph_offset) { struct ip *ip; struct udphdr *uh; uint16_t cklen, csum, offset; - ip = mtod(m, struct ip *); - offset = ip->ip_hl << 2 ; + ip = (struct ip *)mtodo(m, iph_offset); + offset = iph_offset + (ip->ip_hl << 2); if (m->m_pkthdr.csum_flags & CSUM_UDP) { /* if udp header is not in the first mbuf copy udplen */ @@ -1078,6 +1078,13 @@ in_delayed_cksum(struct mbuf *m) *(u_short *)mtodo(m, offset) = csum; } +void +in_delayed_cksum(struct mbuf *m) +{ + + in_delayed_cksum_o(m, 0); +} + /* * IP socket option processing. */ diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index f782ebc53eb0..c113484079a3 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -271,6 +271,7 @@ VNET_DECLARE(struct pfil_head *, inet_local_pfil_head); #define PFIL_INET_LOCAL_NAME "inet-local" void in_delayed_cksum(struct mbuf *m); +void in_delayed_cksum_o(struct mbuf *m, uint16_t o); /* Hooks for ipfw, dummynet, divert etc. Most are declared in raw_ip.c */ /* diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c index b77ebc928809..63bbe4bba11b 100644 --- a/sys/netinet/tcp_hpts.c +++ b/sys/netinet/tcp_hpts.c @@ -137,8 +137,6 @@ #include <netinet/in_kdtrace.h> #include <netinet/in_pcb.h> #include <netinet/ip.h> -#include <netinet/ip_icmp.h> /* required for icmp_var.h */ -#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ #include <netinet/ip_var.h> #include <netinet/ip6.h> #include <netinet6/in6_pcb.h> diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 6492495dc583..d5dc516c28aa 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -2562,299 +2562,270 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, hhook_run_tcp_est_in(tp, th, &to); #endif - if (SEQ_LEQ(th->th_ack, tp->snd_una)) { - maxseg = tcp_maxseg(tp); - if (no_data && - (tiwin == tp->snd_wnd || - (tp->t_flags & TF_SACK_PERMIT))) { + if (SEQ_LT(th->th_ack, tp->snd_una)) { + /* This is old ACK information, don't process it. */ + break; + } + if (th->th_ack == tp->snd_una) { + /* Check if this is a duplicate ACK. */ + if ((tp->t_flags & TF_SACK_PERMIT) && + V_tcp_do_newsack) { /* - * If this is the first time we've seen a - * FIN from the remote, this is not a - * duplicate and it needs to be processed - * normally. This happens during a - * simultaneous close. + * If SEG.ACK == SND.UNA, RFC 6675 requires a + * duplicate ACK to selectively acknowledge + * at least one byte, which was not selectively + * acknowledged before. */ - if ((thflags & TH_FIN) && - (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { - tp->t_dupacks = 0; + if (sack_changed == SACK_NOCHANGE) { break; } - TCPSTAT_INC(tcps_rcvdupack); - /* - * If we have outstanding data (other than - * a window probe), this is a completely - * duplicate ack (ie, window info didn't - * change and FIN isn't set), - * the ack is the biggest we've - * seen and we've seen exactly our rexmt - * threshold of them, assume a packet - * has been dropped and retransmit it. - * Kludge snd_nxt & the congestion - * window so we send only this one - * packet. - * - * We know we're losing at the current - * window size so do congestion avoidance - * (set ssthresh to half the current window - * and pull our congestion window back to - * the new ssthresh). - * - * Dup acks mean that packets have left the - * network (they're now cached at the receiver) - * so bump cwnd by the amount in the receiver - * to keep a constant cwnd packets in the - * network. - * - * When using TCP ECN, notify the peer that - * we reduced the cwnd. - */ + } else { /* - * Following 2 kinds of acks should not affect - * dupack counting: - * 1) Old acks - * 2) Acks with SACK but without any new SACK - * information in them. These could result from - * any anomaly in the network like a switch - * duplicating packets or a possible DoS attack. + * If SEG.ACK == SND.UNA, RFC 5681 requires a + * duplicate ACK to have no data on it and to + * not be a window update. */ - if (th->th_ack != tp->snd_una || - (tcp_is_sack_recovery(tp, &to) && - (sack_changed == SACK_NOCHANGE))) { + if (!no_data || tiwin != tp->snd_wnd) { break; - } else if (!tcp_timer_active(tp, TT_REXMT)) { - tp->t_dupacks = 0; - } else if (++tp->t_dupacks > tcprexmtthresh || - IN_FASTRECOVERY(tp->t_flags)) { - cc_ack_received(tp, th, nsegs, - CC_DUPACK); - if (V_tcp_do_prr && + } + } + /* + * If this is the first time we've seen a + * FIN from the remote, this is not a + * duplicate ACK and it needs to be processed + * normally. + * This happens during a simultaneous close. + */ + if ((thflags & TH_FIN) && + (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { + tp->t_dupacks = 0; + break; + } + /* Perform duplicate ACK processing. */ + TCPSTAT_INC(tcps_rcvdupack); + maxseg = tcp_maxseg(tp); + if (!tcp_timer_active(tp, TT_REXMT)) { + tp->t_dupacks = 0; + } else if (++tp->t_dupacks > tcprexmtthresh || + IN_FASTRECOVERY(tp->t_flags)) { + cc_ack_received(tp, th, nsegs, CC_DUPACK); + if (V_tcp_do_prr && + IN_FASTRECOVERY(tp->t_flags) && + (tp->t_flags & TF_SACK_PERMIT)) { + tcp_do_prr_ack(tp, th, &to, + sack_changed, &maxseg); + } else if (tcp_is_sack_recovery(tp, &to) && IN_FASTRECOVERY(tp->t_flags) && - (tp->t_flags & TF_SACK_PERMIT)) { - tcp_do_prr_ack(tp, th, &to, - sack_changed, &maxseg); - } else if (tcp_is_sack_recovery(tp, &to) && - IN_FASTRECOVERY(tp->t_flags) && - (tp->snd_nxt == tp->snd_max)) { - int awnd; + (tp->snd_nxt == tp->snd_max)) { + int awnd; - /* - * Compute the amount of data in flight first. - * We can inject new data into the pipe iff - * we have less than ssthresh - * worth of data in flight. - */ - awnd = tcp_compute_pipe(tp); - if (awnd < tp->snd_ssthresh) { - tp->snd_cwnd += imax(maxseg, - imin(2 * maxseg, - tp->sackhint.delivered_data)); - if (tp->snd_cwnd > tp->snd_ssthresh) - tp->snd_cwnd = tp->snd_ssthresh; - } - } else if (tcp_is_sack_recovery(tp, &to) && - IN_FASTRECOVERY(tp->t_flags) && - SEQ_LT(tp->snd_nxt, tp->snd_max)) { + /* + * Compute the amount of data in flight first. + * We can inject new data into the pipe iff + * we have less than ssthresh + * worth of data in flight. + */ + awnd = tcp_compute_pipe(tp); + if (awnd < tp->snd_ssthresh) { tp->snd_cwnd += imax(maxseg, imin(2 * maxseg, tp->sackhint.delivered_data)); - } else { - tp->snd_cwnd += maxseg; + if (tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; } - (void) tcp_output(tp); - goto drop; - } else if (tp->t_dupacks == tcprexmtthresh || - (tp->t_flags & TF_SACK_PERMIT && - V_tcp_do_newsack && - tp->sackhint.sacked_bytes > - (tcprexmtthresh - 1) * maxseg)) { + } else if (tcp_is_sack_recovery(tp, &to) && + IN_FASTRECOVERY(tp->t_flags) && + SEQ_LT(tp->snd_nxt, tp->snd_max)) { + tp->snd_cwnd += imax(maxseg, + imin(2 * maxseg, + tp->sackhint.delivered_data)); + } else { + tp->snd_cwnd += maxseg; + } + (void) tcp_output(tp); + goto drop; + } else if (tp->t_dupacks == tcprexmtthresh || + (tp->t_flags & TF_SACK_PERMIT && + V_tcp_do_newsack && + tp->sackhint.sacked_bytes > + (tcprexmtthresh - 1) * maxseg)) { enter_recovery: - /* - * Above is the RFC6675 trigger condition of - * more than (dupthresh-1)*maxseg sacked data. - * If the count of holes in the - * scoreboard is >= dupthresh, we could - * also enter loss recovery, but don't - * have that value readily available. - */ - tp->t_dupacks = tcprexmtthresh; - tcp_seq onxt = tp->snd_nxt; + /* + * Above is the RFC6675 trigger condition of + * more than (dupthresh-1)*maxseg sacked data. + * If the count of holes in the + * scoreboard is >= dupthresh, we could + * also enter loss recovery, but don't + * have that value readily available. + */ + tp->t_dupacks = tcprexmtthresh; + tcp_seq onxt = tp->snd_nxt; - /* - * If we're doing sack, check to - * see if we're already in sack - * recovery. If we're not doing sack, - * check to see if we're in newreno - * recovery. - */ - if (tcp_is_sack_recovery(tp, &to)) { - if (IN_FASTRECOVERY(tp->t_flags)) { - tp->t_dupacks = 0; - break; - } - } else { - if (SEQ_LEQ(th->th_ack, - tp->snd_recover)) { - tp->t_dupacks = 0; - break; - } + /* + * If we're doing sack, check to + * see if we're already in sack + * recovery. If we're not doing sack, + * check to see if we're in newreno + * recovery. + */ + if (tcp_is_sack_recovery(tp, &to)) { + if (IN_FASTRECOVERY(tp->t_flags)) { + tp->t_dupacks = 0; + break; } - /* Congestion signal before ack. */ - cc_cong_signal(tp, th, CC_NDUPACK); - cc_ack_received(tp, th, nsegs, - CC_DUPACK); - tcp_timer_activate(tp, TT_REXMT, 0); - tp->t_rtttime = 0; - if (V_tcp_do_prr) { - /* - * snd_ssthresh and snd_recover are - * already updated by cc_cong_signal. - */ - if (tcp_is_sack_recovery(tp, &to)) { - /* - * Include Limited Transmit - * segments here - */ - tp->sackhint.prr_delivered = - imin(tp->snd_max - th->th_ack, - (tp->snd_limited + 1) * maxseg); - } else { - tp->sackhint.prr_delivered = - maxseg; - } - tp->sackhint.recover_fs = max(1, - tp->snd_nxt - tp->snd_una); + } else { + if (SEQ_LEQ(th->th_ack, + tp->snd_recover)) { + tp->t_dupacks = 0; + break; } - tp->snd_limited = 0; + } + /* Congestion signal before ack. */ + cc_cong_signal(tp, th, CC_NDUPACK); + cc_ack_received(tp, th, nsegs, CC_DUPACK); + tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rtttime = 0; + if (V_tcp_do_prr) { + /* + * snd_ssthresh and snd_recover are + * already updated by cc_cong_signal. + */ if (tcp_is_sack_recovery(tp, &to)) { - TCPSTAT_INC(tcps_sack_recovery_episode); /* - * When entering LR after RTO due to - * Duplicate ACKs, retransmit existing - * holes from the scoreboard. + * Include Limited Transmit + * segments here */ - tcp_resend_sackholes(tp); - /* Avoid inflating cwnd in tcp_output */ - tp->snd_nxt = tp->snd_max; - tp->snd_cwnd = tcp_compute_pipe(tp) + + tp->sackhint.prr_delivered = + imin(tp->snd_max - th->th_ack, + (tp->snd_limited + 1) * maxseg); + } else { + tp->sackhint.prr_delivered = maxseg; - (void) tcp_output(tp); - /* Set cwnd to the expected flightsize */ - tp->snd_cwnd = tp->snd_ssthresh; - if (SEQ_GT(th->th_ack, tp->snd_una)) { - goto resume_partialack; - } - goto drop; } - tp->snd_nxt = th->th_ack; - tp->snd_cwnd = maxseg; - (void) tcp_output(tp); - KASSERT(tp->snd_limited <= 2, - ("%s: tp->snd_limited too big", - __func__)); - tp->snd_cwnd = tp->snd_ssthresh + - maxseg * - (tp->t_dupacks - tp->snd_limited); - if (SEQ_GT(onxt, tp->snd_nxt)) - tp->snd_nxt = onxt; - goto drop; - } else if (V_tcp_do_rfc3042) { - /* - * Process first and second duplicate - * ACKs. Each indicates a segment - * leaving the network, creating room - * for more. Make sure we can send a - * packet on reception of each duplicate - * ACK by increasing snd_cwnd by one - * segment. Restore the original - * snd_cwnd after packet transmission. - */ - cc_ack_received(tp, th, nsegs, CC_DUPACK); - uint32_t oldcwnd = tp->snd_cwnd; - tcp_seq oldsndmax = tp->snd_max; - u_int sent; - int avail; - - KASSERT(tp->t_dupacks == 1 || - tp->t_dupacks == 2, - ("%s: dupacks not 1 or 2", - __func__)); - if (tp->t_dupacks == 1) - tp->snd_limited = 0; - if ((tp->snd_nxt == tp->snd_max) && - (tp->t_rxtshift == 0)) - tp->snd_cwnd = - SEQ_SUB(tp->snd_nxt, - tp->snd_una) - - tcp_sack_adjust(tp); - tp->snd_cwnd += - (tp->t_dupacks - tp->snd_limited) * - maxseg - tcp_sack_adjust(tp); + tp->sackhint.recover_fs = max(1, + tp->snd_nxt - tp->snd_una); + } + tp->snd_limited = 0; + if (tcp_is_sack_recovery(tp, &to)) { + TCPSTAT_INC(tcps_sack_recovery_episode); /* - * Only call tcp_output when there - * is new data available to be sent - * or we need to send an ACK. + * When entering LR after RTO due to + * Duplicate ACKs, retransmit existing + * holes from the scoreboard. */ - SOCK_SENDBUF_LOCK(so); - avail = sbavail(&so->so_snd); - SOCK_SENDBUF_UNLOCK(so); - if (tp->t_flags & TF_ACKNOW || - (avail >= - SEQ_SUB(tp->snd_nxt, tp->snd_una))) { - (void) tcp_output(tp); - } - sent = SEQ_SUB(tp->snd_max, oldsndmax); - if (sent > maxseg) { - KASSERT((tp->t_dupacks == 2 && - tp->snd_limited == 0) || - (sent == maxseg + 1 && - tp->t_flags & TF_SENTFIN) || - (sent < 2 * maxseg && - tp->t_flags & TF_NODELAY), - ("%s: sent too much: %u>%u", - __func__, sent, maxseg)); - tp->snd_limited = 2; - } else if (sent > 0) { - ++tp->snd_limited; - } - tp->snd_cwnd = oldcwnd; + tcp_resend_sackholes(tp); + /* Avoid inflating cwnd in tcp_output */ + tp->snd_nxt = tp->snd_max; + tp->snd_cwnd = tcp_compute_pipe(tp) + + maxseg; + (void) tcp_output(tp); + /* Set cwnd to the expected flightsize */ + tp->snd_cwnd = tp->snd_ssthresh; goto drop; } - } - break; - } else { - /* - * This ack is advancing the left edge, reset the - * counter. - */ - tp->t_dupacks = 0; - /* - * If this ack also has new SACK info, increment the - * counter as per rfc6675. The variable - * sack_changed tracks all changes to the SACK - * scoreboard, including when partial ACKs without - * SACK options are received, and clear the scoreboard - * from the left side. Such partial ACKs should not be - * counted as dupacks here. - */ - if (tcp_is_sack_recovery(tp, &to) && - (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || - ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && - (tp->snd_nxt == tp->snd_max)) { - tp->t_dupacks++; - /* limit overhead by setting maxseg last */ - if (!IN_FASTRECOVERY(tp->t_flags) && - (tp->sackhint.sacked_bytes > - ((tcprexmtthresh - 1) * - (maxseg = tcp_maxseg(tp))))) { - goto enter_recovery; + tp->snd_nxt = th->th_ack; + tp->snd_cwnd = maxseg; + (void) tcp_output(tp); + KASSERT(tp->snd_limited <= 2, + ("%s: tp->snd_limited too big", + __func__)); + tp->snd_cwnd = tp->snd_ssthresh + + maxseg * + (tp->t_dupacks - tp->snd_limited); + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + goto drop; + } else if (V_tcp_do_rfc3042) { + /* + * Process first and second duplicate + * ACKs. Each indicates a segment + * leaving the network, creating room + * for more. Make sure we can send a + * packet on reception of each duplicate + * ACK by increasing snd_cwnd by one + * segment. Restore the original + * snd_cwnd after packet transmission. + */ + cc_ack_received(tp, th, nsegs, CC_DUPACK); + uint32_t oldcwnd = tp->snd_cwnd; + tcp_seq oldsndmax = tp->snd_max; + u_int sent; + int avail; + + KASSERT(tp->t_dupacks == 1 || + tp->t_dupacks == 2, + ("%s: dupacks not 1 or 2", + __func__)); + if (tp->t_dupacks == 1) + tp->snd_limited = 0; + if ((tp->snd_nxt == tp->snd_max) && + (tp->t_rxtshift == 0)) + tp->snd_cwnd = + SEQ_SUB(tp->snd_nxt, tp->snd_una); + tp->snd_cwnd += + (tp->t_dupacks - tp->snd_limited) * maxseg; + tp->snd_cwnd -= tcp_sack_adjust(tp); + /* + * Only call tcp_output when there + * is new data available to be sent + * or we need to send an ACK. + */ + SOCK_SENDBUF_LOCK(so); + avail = sbavail(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); + if (tp->t_flags & TF_ACKNOW || + (avail >= + SEQ_SUB(tp->snd_nxt, tp->snd_una))) { + (void) tcp_output(tp); + } + sent = SEQ_SUB(tp->snd_max, oldsndmax); + if (sent > maxseg) { + KASSERT((tp->t_dupacks == 2 && + tp->snd_limited == 0) || + (sent == maxseg + 1 && + tp->t_flags & TF_SENTFIN) || + (sent < 2 * maxseg && + tp->t_flags & TF_NODELAY), + ("%s: sent too much: %u>%u", + __func__, sent, maxseg)); + tp->snd_limited = 2; + } else if (sent > 0) { + ++tp->snd_limited; } + tp->snd_cwnd = oldcwnd; + goto drop; } + break; } - -resume_partialack: KASSERT(SEQ_GT(th->th_ack, tp->snd_una), - ("%s: th_ack <= snd_una", __func__)); - + ("%s: SEQ_LEQ(th_ack, snd_una)", __func__)); + /* + * This ack is advancing the left edge, reset the + * counter. + */ + tp->t_dupacks = 0; + /* + * If this ack also has new SACK info, increment the + * t_dupacks as per RFC 6675. The variable + * sack_changed tracks all changes to the SACK + * scoreboard, including when partial ACKs without + * SACK options are received, and clear the scoreboard + * from the left side. Such partial ACKs should not be + * counted as dupacks here. + */ + if (V_tcp_do_newsack && + tcp_is_sack_recovery(tp, &to) && + (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || + ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && + (tp->snd_nxt == tp->snd_max)) { + tp->t_dupacks++; + /* limit overhead by setting maxseg last */ + if (!IN_FASTRECOVERY(tp->t_flags) && + (tp->sackhint.sacked_bytes > + (tcprexmtthresh - 1) * (maxseg = tcp_maxseg(tp)))) { + goto enter_recovery; + } + } /* * If the congestion window was inflated to account * for the other side's cached packets, retract it. diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index fed259f4d8e1..f2d7867df9b4 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -78,8 +78,6 @@ #include <netinet/in_kdtrace.h> #include <netinet/in_pcb.h> #include <netinet/ip.h> -#include <netinet/ip_icmp.h> /* required for icmp_var.h */ -#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ #include <netinet/ip_var.h> #include <netinet/ip6.h> #include <netinet6/in6_pcb.h> diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 71dd4de6baf9..11ef5ba706c5 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -77,8 +77,6 @@ #include <netinet/in_kdtrace.h> #include <netinet/in_pcb.h> #include <netinet/ip.h> -#include <netinet/ip_icmp.h> /* required for icmp_var.h */ -#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ #include <netinet/ip_var.h> #include <netinet/ip6.h> #include <netinet6/in6_pcb.h> diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c index fc12672a45f7..4a0a5fc118f6 100644 --- a/sys/netinet/tcp_stacks/rack_bbr_common.c +++ b/sys/netinet/tcp_stacks/rack_bbr_common.c @@ -76,8 +76,6 @@ #include <netinet/in_kdtrace.h> #include <netinet/in_pcb.h> #include <netinet/ip.h> -#include <netinet/ip_icmp.h> /* required for icmp_var.h */ -#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ #include <netinet/ip_var.h> #include <netinet/ip6.h> #include <netinet6/in6_pcb.h> diff --git a/sys/netinet/tcp_stacks/rack_pcm.c b/sys/netinet/tcp_stacks/rack_pcm.c index 759bfda98357..1a51097f627c 100644 --- a/sys/netinet/tcp_stacks/rack_pcm.c +++ b/sys/netinet/tcp_stacks/rack_pcm.c @@ -78,8 +78,6 @@ #include <netinet/in_kdtrace.h> #include <netinet/in_pcb.h> #include <netinet/ip.h> -#include <netinet/ip_icmp.h> /* required for icmp_var.h */ -#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ #include <netinet/ip_var.h> #include <netinet/ip6.h> #include <netinet6/in6_pcb.h> diff --git a/sys/netinet/tcp_stacks/tailq_hash.c b/sys/netinet/tcp_stacks/tailq_hash.c index 5ba3e7cd36c0..ff01640524b6 100644 --- a/sys/netinet/tcp_stacks/tailq_hash.c +++ b/sys/netinet/tcp_stacks/tailq_hash.c @@ -51,8 +51,6 @@ #include <netinet/in_kdtrace.h> #include <netinet/in_pcb.h> #include <netinet/ip.h> -#include <netinet/ip_icmp.h> /* required for icmp_var.h */ -#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ #include <netinet/ip_var.h> #include <netinet/ip6.h> #include <netinet6/in6_pcb.h> diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 80e6b53d10df..1ee6c6e31f33 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -102,15 +102,15 @@ #include <security/mac/mac_framework.h> -VNET_DEFINE_STATIC(int, tcp_syncookies) = 1; +VNET_DEFINE_STATIC(bool, tcp_syncookies) = true; #define V_tcp_syncookies VNET(tcp_syncookies) -SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW, +SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_syncookies), 0, "Use TCP SYN cookies if the syncache overflows"); -VNET_DEFINE_STATIC(int, tcp_syncookiesonly) = 0; +VNET_DEFINE_STATIC(bool, tcp_syncookiesonly) = false; #define V_tcp_syncookiesonly VNET(tcp_syncookiesonly) -SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW, +SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_syncookiesonly), 0, "Use only TCP SYN cookies"); @@ -553,9 +553,8 @@ syncache_timer(void *xsch) static inline bool syncache_cookiesonly(void) { - - return (V_tcp_syncookies && (V_tcp_syncache.paused || - V_tcp_syncookiesonly)); + return ((V_tcp_syncookies && V_tcp_syncache.paused) || + V_tcp_syncookiesonly); } /* @@ -1083,40 +1082,48 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, #endif if (sc == NULL) { - /* - * There is no syncache entry, so see if this ACK is - * a returning syncookie. To do this, first: - * A. Check if syncookies are used in case of syncache - * overflows - * B. See if this socket has had a syncache entry dropped in - * the recent past. We don't want to accept a bogus - * syncookie if we've never received a SYN or accept it - * twice. - * C. check that the syncookie is valid. If it is, then - * cobble up a fake syncache entry, and return. - */ - if (locked && !V_tcp_syncookies) { - SCH_UNLOCK(sch); - TCPSTAT_INC(tcps_sc_spurcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) - log(LOG_DEBUG, "%s; %s: Spurious ACK, " - "segment rejected (syncookies disabled)\n", - s, __func__); - goto failed; - } - if (locked && !V_tcp_syncookiesonly && - sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) { + if (locked) { + /* + * The syncache is currently in use (neither disabled, + * nor paused), but no entry was found. + */ + if (!V_tcp_syncookies) { + /* + * Since no syncookies are used in case of + * a bucket overflow, don't even check for + * a valid syncookie. + */ + SCH_UNLOCK(sch); + TCPSTAT_INC(tcps_sc_spurcookie); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Spurious ACK, " + "segment rejected " + "(syncookies disabled)\n", + s, __func__); + goto failed; + } + if (sch->sch_last_overflow < + time_uptime - SYNCOOKIE_LIFETIME) { + /* + * Since the bucket did not overflow recently, + * don't even check for a valid syncookie. + */ + SCH_UNLOCK(sch); + TCPSTAT_INC(tcps_sc_spurcookie); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Spurious ACK, " + "segment rejected " + "(no syncache entry)\n", + s, __func__); + goto failed; + } SCH_UNLOCK(sch); - TCPSTAT_INC(tcps_sc_spurcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) - log(LOG_DEBUG, "%s; %s: Spurious ACK, " - "segment rejected (no syncache entry)\n", - s, __func__); - goto failed; } - if (locked) - SCH_UNLOCK(sch); bzero(&scs, sizeof(scs)); + /* + * Now check, if the syncookie is valid. If it is, create an on + * stack syncache entry. + */ if (syncookie_expand(inc, sch, &scs, th, to, *lsop, port)) { sc = &scs; TCPSTAT_INC(tcps_sc_recvcookie); @@ -1291,10 +1298,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, if (__predict_false(*lsop == NULL)) { TCPSTAT_INC(tcps_sc_aborted); TCPSTATES_DEC(TCPS_SYN_RECEIVED); - } else + } else if (sc != &scs) TCPSTAT_INC(tcps_sc_completed); -/* how do we find the inp for the new socket? */ if (sc != &scs) syncache_free(sc); return (1); @@ -1669,7 +1675,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, sc->sc_tsoff = tcp_new_ts_offset(inc); } if ((to->to_flags & TOF_SCALE) && (V_tcp_do_rfc1323 != 3)) { - int wscale = 0; + u_int wscale = 0; /* * Pick the smallest possible scaling factor that @@ -1719,13 +1725,13 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, if (V_tcp_do_ecn && (tp->t_flags2 & TF2_CANNOT_DO_ECN) == 0) sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos); - if (V_tcp_syncookies) + if (V_tcp_syncookies || V_tcp_syncookiesonly) sc->sc_iss = syncookie_generate(sch, sc); else sc->sc_iss = arc4random(); #ifdef INET6 if (autoflowlabel) { - if (V_tcp_syncookies) + if (V_tcp_syncookies || V_tcp_syncookiesonly) sc->sc_flowlabel = sc->sc_iss; else sc->sc_flowlabel = ip6_randomflowlabel(); @@ -2265,7 +2271,7 @@ syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch, uint32_t hash; uint8_t *secbits; tcp_seq ack, seq; - int wnd, wscale = 0; + int wnd; union syncookie cookie; /* @@ -2316,12 +2322,14 @@ syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch, sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx]; - /* We can simply recompute receive window scale we sent earlier. */ - while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max) - wscale++; - /* Only use wscale if it was enabled in the orignal SYN. */ if (cookie.flags.wscale_idx > 0) { + u_int wscale = 0; + + /* Recompute the receive window scale that was sent earlier. */ + while (wscale < TCP_MAX_WINSHIFT && + (TCP_MAXWIN << wscale) < sb_max) + wscale++; sc->sc_requested_r_scale = wscale; sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx]; sc->sc_flags |= SCF_WINSCALE; diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 3e6519118a40..cea8a916679b 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -223,16 +223,18 @@ VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL); * udp_append() will convert to a sockaddr_in6 before passing the address * into the socket code. * - * In the normal case udp_append() will return 0, indicating that you - * must unlock the inp. However if a tunneling protocol is in place we increment - * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we - * then decrement the reference count. If the inp_rele returns 1, indicating the - * inp is gone, we return that to the caller to tell them *not* to unlock - * the inp. In the case of multi-cast this will cause the distribution - * to stop (though most tunneling protocols known currently do *not* use - * multicast). + * In the normal case udp_append() will return 'false', indicating that you + * must unlock the inpcb. However if a tunneling protocol is in place we + * increment the inpcb refcnt and unlock the inpcb, on return from the tunneling + * protocol we then decrement the reference count. If in_pcbrele_rlocked() + * returns 'true', indicating the inpcb is gone, we return that to the caller + * to tell them *not* to unlock the inpcb. In the case of multicast this will + * cause the distribution to stop (though most tunneling protocols known + * currently do *not* use multicast). + * + * The mbuf is always consumed. */ -static int +static bool udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, struct sockaddr_in *udp_in) { @@ -255,15 +257,16 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, in_pcbref(inp); INP_RUNLOCK(inp); - filtered = (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0], - up->u_tun_ctx); + filtered = (*up->u_tun_func)(n, off, inp, + (struct sockaddr *)&udp_in[0], up->u_tun_ctx); INP_RLOCK(inp); - if (in_pcbrele_rlocked(inp)) - return (1); - if (filtered) { - INP_RUNLOCK(inp); - return (1); + if (in_pcbrele_rlocked(inp)) { + if (!filtered) + m_freem(n); + return (true); } + if (filtered) + return (false); } off += sizeof(struct udphdr); @@ -273,18 +276,18 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, if (IPSEC_ENABLED(ipv4) && IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) { m_freem(n); - return (0); + return (false); } if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */ if (IPSEC_ENABLED(ipv4) && UDPENCAP_INPUT(ipv4, n, off, AF_INET) != 0) - return (0); /* Consumed. */ + return (false); } #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { m_freem(n); - return (0); + return (false); } #endif /* MAC */ if (inp->inp_flags & INP_CONTROLOPTS || @@ -330,7 +333,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, UDPSTAT_INC(udps_fullsock); } else sorwakeup_locked(so); - return (0); + return (false); } static bool @@ -699,7 +702,7 @@ udp_input(struct mbuf **mp, int *offp, int proto) UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh); else UDP_PROBE(receive, NULL, inp, ip, inp, uh); - if (udp_append(inp, ip, m, iphlen, udp_in) == 0) + if (!udp_append(inp, ip, m, iphlen, udp_in)) INP_RUNLOCK(inp); return (IPPROTO_DONE); |