13 files changed, 328 insertions, 350 deletions
diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c
index d85ad4e9f4fd..c20a20cd983d 100644
--- a/sys/netinet/cc/cc.c
+++ b/sys/netinet/cc/cc.c
@@ -659,7 +659,7 @@ cc_modevent(module_t mod, int event_type, void *data)
 	case MOD_SHUTDOWN:
 		break;
 	case MOD_QUIESCE:
-		/* Stop any new assigments */
+		/* Stop any new assignments */
 		err = cc_stop_new_assignments(algo);
 		break;
 	case MOD_UNLOAD:
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index 963449d4b4b1..0e283a7d099d 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -522,8 +522,8 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct ucred *cred
 	/*
 	 * Check if bridge wants to allow adding addrs to member interfaces.
 	 */
-	if (ifp->if_bridge && bridge_member_ifaddrs_p &&
-	    !bridge_member_ifaddrs_p())
+	if (ifp->if_bridge != NULL && ifp->if_type != IFT_GIF &&
+	    bridge_member_ifaddrs_p != NULL && !bridge_member_ifaddrs_p())
 		return (EINVAL);
 
 	/*
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index ec6ba8d92015..ef08b9cfd3d6 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -1044,14 +1044,14 @@ done:
 }
 
 void
-in_delayed_cksum(struct mbuf *m)
+in_delayed_cksum_o(struct mbuf *m, uint16_t iph_offset)
 {
 	struct ip *ip;
 	struct udphdr *uh;
 	uint16_t cklen, csum, offset;
 
-	ip = mtod(m, struct ip *);
-	offset = ip->ip_hl << 2 ;
+	ip = (struct ip *)mtodo(m, iph_offset);
+	offset = iph_offset + (ip->ip_hl << 2);
 
 	if (m->m_pkthdr.csum_flags & CSUM_UDP) {
 		/* if udp header is not in the first mbuf copy udplen */
@@ -1078,6 +1078,13 @@ in_delayed_cksum(struct mbuf *m)
 		*(u_short *)mtodo(m, offset) = csum;
 }
 
+void
+in_delayed_cksum(struct mbuf *m)
+{
+
+	in_delayed_cksum_o(m, 0);
+}
+
 /*
  * IP socket option processing.
  */
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index f782ebc53eb0..c113484079a3 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -271,6 +271,7 @@ VNET_DECLARE(struct pfil_head *, inet_local_pfil_head);
 #define	PFIL_INET_LOCAL_NAME	"inet-local"
 
 void	in_delayed_cksum(struct mbuf *m);
+void	in_delayed_cksum_o(struct mbuf *m, uint16_t o);
 
 /* Hooks for ipfw, dummynet, divert etc. Most are declared in raw_ip.c */
 /*
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index b77ebc928809..63bbe4bba11b 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -137,8 +137,6 @@
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
-#include <netinet/ip_icmp.h>	/* required for icmp_var.h */
-#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 6492495dc583..d5dc516c28aa 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -2562,299 +2562,270 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 		hhook_run_tcp_est_in(tp, th, &to);
 #endif
 
-		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
-			maxseg = tcp_maxseg(tp);
-			if (no_data &&
-			    (tiwin == tp->snd_wnd ||
-			    (tp->t_flags & TF_SACK_PERMIT))) {
+		if (SEQ_LT(th->th_ack, tp->snd_una)) {
+			/* This is old ACK information, don't process it. */
+			break;
+		}
+		if (th->th_ack == tp->snd_una) {
+			/* Check if this is a duplicate ACK. */
+			if ((tp->t_flags & TF_SACK_PERMIT) &&
+			    V_tcp_do_newsack) {
 				/*
-				 * If this is the first time we've seen a
-				 * FIN from the remote, this is not a
-				 * duplicate and it needs to be processed
-				 * normally.  This happens during a
-				 * simultaneous close.
+				 * If SEG.ACK == SND.UNA, RFC 6675 requires a
+				 * duplicate ACK to selectively acknowledge
+				 * at least one byte, which was not selectively
+				 * acknowledged before.
 				 */
-				if ((thflags & TH_FIN) &&
-				    (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
-					tp->t_dupacks = 0;
+				if (sack_changed == SACK_NOCHANGE) {
 					break;
 				}
-				TCPSTAT_INC(tcps_rcvdupack);
-				/*
-				 * If we have outstanding data (other than
-				 * a window probe), this is a completely
-				 * duplicate ack (ie, window info didn't
-				 * change and FIN isn't set),
-				 * the ack is the biggest we've
-				 * seen and we've seen exactly our rexmt
-				 * threshold of them, assume a packet
-				 * has been dropped and retransmit it.
-				 * Kludge snd_nxt & the congestion
-				 * window so we send only this one
-				 * packet.
-				 *
-				 * We know we're losing at the current
-				 * window size so do congestion avoidance
-				 * (set ssthresh to half the current window
-				 * and pull our congestion window back to
-				 * the new ssthresh).
-				 *
-				 * Dup acks mean that packets have left the
-				 * network (they're now cached at the receiver)
-				 * so bump cwnd by the amount in the receiver
-				 * to keep a constant cwnd packets in the
-				 * network.
-				 *
-				 * When using TCP ECN, notify the peer that
-				 * we reduced the cwnd.
-				 */
+			} else {
 				/*
-				 * Following 2 kinds of acks should not affect
-				 * dupack counting:
-				 * 1) Old acks
-				 * 2) Acks with SACK but without any new SACK
-				 * information in them. These could result from
-				 * any anomaly in the network like a switch
-				 * duplicating packets or a possible DoS attack.
+				 * If SEG.ACK == SND.UNA, RFC 5681 requires a
+				 * duplicate ACK to have no data on it and to
+				 * not be a window update.
 				 */
-				if (th->th_ack != tp->snd_una ||
-				    (tcp_is_sack_recovery(tp, &to) &&
-				    (sack_changed == SACK_NOCHANGE))) {
+				if (!no_data || tiwin != tp->snd_wnd) {
 					break;
-				} else if (!tcp_timer_active(tp, TT_REXMT)) {
-					tp->t_dupacks = 0;
-				} else if (++tp->t_dupacks > tcprexmtthresh ||
-					    IN_FASTRECOVERY(tp->t_flags)) {
-					cc_ack_received(tp, th, nsegs,
-					    CC_DUPACK);
-					if (V_tcp_do_prr &&
+				}
+			}
+			/*
+			 * If this is the first time we've seen a
+			 * FIN from the remote, this is not a
+			 * duplicate ACK and it needs to be processed
+			 * normally.
+			 * This happens during a simultaneous close.
+			 */
+			if ((thflags & TH_FIN) &&
+			    (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
+				tp->t_dupacks = 0;
+				break;
+			}
+			/* Perform duplicate ACK processing. */
+			TCPSTAT_INC(tcps_rcvdupack);
+			maxseg = tcp_maxseg(tp);
+			if (!tcp_timer_active(tp, TT_REXMT)) {
+				tp->t_dupacks = 0;
+			} else if (++tp->t_dupacks > tcprexmtthresh ||
+				    IN_FASTRECOVERY(tp->t_flags)) {
+				cc_ack_received(tp, th, nsegs, CC_DUPACK);
+				if (V_tcp_do_prr &&
+				    IN_FASTRECOVERY(tp->t_flags) &&
+				    (tp->t_flags & TF_SACK_PERMIT)) {
+					tcp_do_prr_ack(tp, th, &to,
+					    sack_changed, &maxseg);
+				} else if (tcp_is_sack_recovery(tp, &to) &&
 					    IN_FASTRECOVERY(tp->t_flags) &&
-					    (tp->t_flags & TF_SACK_PERMIT)) {
-						tcp_do_prr_ack(tp, th, &to,
-						    sack_changed, &maxseg);
-					} else if (tcp_is_sack_recovery(tp, &to) &&
-						    IN_FASTRECOVERY(tp->t_flags) &&
-						    (tp->snd_nxt == tp->snd_max)) {
-						int awnd;
+					    (tp->snd_nxt == tp->snd_max)) {
+					int awnd;
 
-						/*
-						 * Compute the amount of data in flight first.
-						 * We can inject new data into the pipe iff
-						 * we have less than ssthresh
-						 * worth of data in flight.
-						 */
-						awnd = tcp_compute_pipe(tp);
-						if (awnd < tp->snd_ssthresh) {
-							tp->snd_cwnd += imax(maxseg,
-							    imin(2 * maxseg,
-							    tp->sackhint.delivered_data));
-							if (tp->snd_cwnd > tp->snd_ssthresh)
-								tp->snd_cwnd = tp->snd_ssthresh;
-						}
-					} else if (tcp_is_sack_recovery(tp, &to) &&
-						    IN_FASTRECOVERY(tp->t_flags) &&
-						    SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+					/*
+					 * Compute the amount of data in flight first.
+					 * We can inject new data into the pipe iff
+					 * we have less than ssthresh
+					 * worth of data in flight.
+					 */
+					awnd = tcp_compute_pipe(tp);
+					if (awnd < tp->snd_ssthresh) {
 						tp->snd_cwnd += imax(maxseg,
 						    imin(2 * maxseg,
 						    tp->sackhint.delivered_data));
-					} else {
-						tp->snd_cwnd += maxseg;
+						if (tp->snd_cwnd > tp->snd_ssthresh)
+							tp->snd_cwnd = tp->snd_ssthresh;
 					}
-					(void) tcp_output(tp);
-					goto drop;
-				} else if (tp->t_dupacks == tcprexmtthresh ||
-					    (tp->t_flags & TF_SACK_PERMIT &&
-					     V_tcp_do_newsack &&
-					     tp->sackhint.sacked_bytes >
-					     (tcprexmtthresh - 1) * maxseg)) {
+				} else if (tcp_is_sack_recovery(tp, &to) &&
+					    IN_FASTRECOVERY(tp->t_flags) &&
+					    SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+					tp->snd_cwnd += imax(maxseg,
+					    imin(2 * maxseg,
+					    tp->sackhint.delivered_data));
+				} else {
+					tp->snd_cwnd += maxseg;
+				}
+				(void) tcp_output(tp);
+				goto drop;
+			} else if (tp->t_dupacks == tcprexmtthresh ||
+				    (tp->t_flags & TF_SACK_PERMIT &&
+				     V_tcp_do_newsack &&
+				     tp->sackhint.sacked_bytes >
+				     (tcprexmtthresh - 1) * maxseg)) {
 enter_recovery:
-					/*
-					 * Above is the RFC6675 trigger condition of
-					 * more than (dupthresh-1)*maxseg sacked data.
-					 * If the count of holes in the
-					 * scoreboard is >= dupthresh, we could
-					 * also enter loss recovery, but don't
-					 * have that value readily available.
-					 */
-					tp->t_dupacks = tcprexmtthresh;
-					tcp_seq onxt = tp->snd_nxt;
+				/*
+				 * Above is the RFC6675 trigger condition of
+				 * more than (dupthresh-1)*maxseg sacked data.
+				 * If the count of holes in the
+				 * scoreboard is >= dupthresh, we could
+				 * also enter loss recovery, but don't
+				 * have that value readily available.
+				 */
+				tp->t_dupacks = tcprexmtthresh;
+				tcp_seq onxt = tp->snd_nxt;
 
-					/*
-					 * If we're doing sack, check to
-					 * see if we're already in sack
-					 * recovery. If we're not doing sack,
-					 * check to see if we're in newreno
-					 * recovery.
-					 */
-					if (tcp_is_sack_recovery(tp, &to)) {
-						if (IN_FASTRECOVERY(tp->t_flags)) {
-							tp->t_dupacks = 0;
-							break;
-						}
-					} else {
-						if (SEQ_LEQ(th->th_ack,
-						    tp->snd_recover)) {
-							tp->t_dupacks = 0;
-							break;
-						}
+				/*
+				 * If we're doing sack, check to
+				 * see if we're already in sack
+				 * recovery. If we're not doing sack,
+				 * check to see if we're in newreno
+				 * recovery.
+				 */
+				if (tcp_is_sack_recovery(tp, &to)) {
+					if (IN_FASTRECOVERY(tp->t_flags)) {
+						tp->t_dupacks = 0;
+						break;
 					}
-					/* Congestion signal before ack. */
-					cc_cong_signal(tp, th, CC_NDUPACK);
-					cc_ack_received(tp, th, nsegs,
-					    CC_DUPACK);
-					tcp_timer_activate(tp, TT_REXMT, 0);
-					tp->t_rtttime = 0;
-					if (V_tcp_do_prr) {
-						/*
-						 * snd_ssthresh and snd_recover are
-						 * already updated by cc_cong_signal.
-						 */
-						if (tcp_is_sack_recovery(tp, &to)) {
-							/*
-							 * Include Limited Transmit
-							 * segments here
-							 */
-							tp->sackhint.prr_delivered =
-							    imin(tp->snd_max - th->th_ack,
-							    (tp->snd_limited + 1) * maxseg);
-						} else {
-							tp->sackhint.prr_delivered =
-							    maxseg;
-						}
-						tp->sackhint.recover_fs = max(1,
-						    tp->snd_nxt - tp->snd_una);
+				} else {
+					if (SEQ_LEQ(th->th_ack,
+					    tp->snd_recover)) {
+						tp->t_dupacks = 0;
+						break;
 					}
-					tp->snd_limited = 0;
+				}
+				/* Congestion signal before ack. */
+				cc_cong_signal(tp, th, CC_NDUPACK);
+				cc_ack_received(tp, th, nsegs, CC_DUPACK);
+				tcp_timer_activate(tp, TT_REXMT, 0);
+				tp->t_rtttime = 0;
+				if (V_tcp_do_prr) {
+					/*
+					 * snd_ssthresh and snd_recover are
+					 * already updated by cc_cong_signal.
+					 */
 					if (tcp_is_sack_recovery(tp, &to)) {
-						TCPSTAT_INC(tcps_sack_recovery_episode);
 						/*
-						 * When entering LR after RTO due to
-						 * Duplicate ACKs, retransmit existing
-						 * holes from the scoreboard.
+						 * Include Limited Transmit
+						 * segments here
 						 */
-						tcp_resend_sackholes(tp);
-						/* Avoid inflating cwnd in tcp_output */
-						tp->snd_nxt = tp->snd_max;
-						tp->snd_cwnd = tcp_compute_pipe(tp) +
+						tp->sackhint.prr_delivered =
+						    imin(tp->snd_max - th->th_ack,
+						    (tp->snd_limited + 1) * maxseg);
+					} else {
+						tp->sackhint.prr_delivered =
 						    maxseg;
-						(void) tcp_output(tp);
-						/* Set cwnd to the expected flightsize */
-						tp->snd_cwnd = tp->snd_ssthresh;
-						if (SEQ_GT(th->th_ack, tp->snd_una)) {
-							goto resume_partialack;
-						}
-						goto drop;
 					}
-					tp->snd_nxt = th->th_ack;
-					tp->snd_cwnd = maxseg;
-					(void) tcp_output(tp);
-					KASSERT(tp->snd_limited <= 2,
-					    ("%s: tp->snd_limited too big",
-					    __func__));
-					tp->snd_cwnd = tp->snd_ssthresh +
-					     maxseg *
-					     (tp->t_dupacks - tp->snd_limited);
-					if (SEQ_GT(onxt, tp->snd_nxt))
-						tp->snd_nxt = onxt;
-					goto drop;
-				} else if (V_tcp_do_rfc3042) {
-					/*
-					 * Process first and second duplicate
-					 * ACKs. Each indicates a segment
-					 * leaving the network, creating room
-					 * for more. Make sure we can send a
-					 * packet on reception of each duplicate
-					 * ACK by increasing snd_cwnd by one
-					 * segment. Restore the original
-					 * snd_cwnd after packet transmission.
-					 */
-					cc_ack_received(tp, th, nsegs, CC_DUPACK);
-					uint32_t oldcwnd = tp->snd_cwnd;
-					tcp_seq oldsndmax = tp->snd_max;
-					u_int sent;
-					int avail;
-
-					KASSERT(tp->t_dupacks == 1 ||
-					    tp->t_dupacks == 2,
-					    ("%s: dupacks not 1 or 2",
-					    __func__));
-					if (tp->t_dupacks == 1)
-						tp->snd_limited = 0;
-					if ((tp->snd_nxt == tp->snd_max) &&
-					    (tp->t_rxtshift == 0))
-						tp->snd_cwnd =
-						    SEQ_SUB(tp->snd_nxt,
-							    tp->snd_una) -
-							tcp_sack_adjust(tp);
-					tp->snd_cwnd +=
-					    (tp->t_dupacks - tp->snd_limited) *
-					    maxseg - tcp_sack_adjust(tp);
+					tp->sackhint.recover_fs = max(1,
+					    tp->snd_nxt - tp->snd_una);
+				}
+				tp->snd_limited = 0;
+				if (tcp_is_sack_recovery(tp, &to)) {
+					TCPSTAT_INC(tcps_sack_recovery_episode);
 					/*
-					 * Only call tcp_output when there
-					 * is new data available to be sent
-					 * or we need to send an ACK.
+					 * When entering LR after RTO due to
+					 * Duplicate ACKs, retransmit existing
+					 * holes from the scoreboard.
 					 */
-					SOCK_SENDBUF_LOCK(so);
-					avail = sbavail(&so->so_snd);
-					SOCK_SENDBUF_UNLOCK(so);
-					if (tp->t_flags & TF_ACKNOW ||
-					    (avail >=
-					     SEQ_SUB(tp->snd_nxt, tp->snd_una))) {
-						(void) tcp_output(tp);
-					}
-					sent = SEQ_SUB(tp->snd_max, oldsndmax);
-					if (sent > maxseg) {
-						KASSERT((tp->t_dupacks == 2 &&
-						    tp->snd_limited == 0) ||
-						   (sent == maxseg + 1 &&
-						    tp->t_flags & TF_SENTFIN) ||
-						   (sent < 2 * maxseg &&
-						    tp->t_flags & TF_NODELAY),
-						    ("%s: sent too much: %u>%u",
-						    __func__, sent, maxseg));
-						tp->snd_limited = 2;
-					} else if (sent > 0) {
-						++tp->snd_limited;
-					}
-					tp->snd_cwnd = oldcwnd;
+					tcp_resend_sackholes(tp);
+					/* Avoid inflating cwnd in tcp_output */
+					tp->snd_nxt = tp->snd_max;
+					tp->snd_cwnd = tcp_compute_pipe(tp) +
+					    maxseg;
+					(void) tcp_output(tp);
+					/* Set cwnd to the expected flightsize */
+					tp->snd_cwnd = tp->snd_ssthresh;
 					goto drop;
 				}
-			}
-			break;
-		} else {
-			/*
-			 * This ack is advancing the left edge, reset the
-			 * counter.
-			 */
-			tp->t_dupacks = 0;
-			/*
-			 * If this ack also has new SACK info, increment the
-			 * counter as per rfc6675. The variable
-			 * sack_changed tracks all changes to the SACK
-			 * scoreboard, including when partial ACKs without
-			 * SACK options are received, and clear the scoreboard
-			 * from the left side. Such partial ACKs should not be
-			 * counted as dupacks here.
-			 */
-			if (tcp_is_sack_recovery(tp, &to) &&
-			    (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) ||
-			     ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) &&
-			    (tp->snd_nxt == tp->snd_max)) {
-				tp->t_dupacks++;
-				/* limit overhead by setting maxseg last */
-				if (!IN_FASTRECOVERY(tp->t_flags) &&
-				    (tp->sackhint.sacked_bytes >
-				    ((tcprexmtthresh - 1) *
-				    (maxseg = tcp_maxseg(tp))))) {
-					goto enter_recovery;
+				tp->snd_nxt = th->th_ack;
+				tp->snd_cwnd = maxseg;
+				(void) tcp_output(tp);
+				KASSERT(tp->snd_limited <= 2,
+				    ("%s: tp->snd_limited too big",
+				    __func__));
+				tp->snd_cwnd = tp->snd_ssthresh +
+				     maxseg *
+				     (tp->t_dupacks - tp->snd_limited);
+				if (SEQ_GT(onxt, tp->snd_nxt))
+					tp->snd_nxt = onxt;
+				goto drop;
+			} else if (V_tcp_do_rfc3042) {
+				/*
+				 * Process first and second duplicate
+				 * ACKs. Each indicates a segment
+				 * leaving the network, creating room
+				 * for more. Make sure we can send a
+				 * packet on reception of each duplicate
+				 * ACK by increasing snd_cwnd by one
+				 * segment. Restore the original
+				 * snd_cwnd after packet transmission.
+				 */
+				cc_ack_received(tp, th, nsegs, CC_DUPACK);
+				uint32_t oldcwnd = tp->snd_cwnd;
+				tcp_seq oldsndmax = tp->snd_max;
+				u_int sent;
+				int avail;
+
+				KASSERT(tp->t_dupacks == 1 ||
+				    tp->t_dupacks == 2,
+				    ("%s: dupacks not 1 or 2",
+				    __func__));
+				if (tp->t_dupacks == 1)
+					tp->snd_limited = 0;
+				if ((tp->snd_nxt == tp->snd_max) &&
+				    (tp->t_rxtshift == 0))
+					tp->snd_cwnd =
+					    SEQ_SUB(tp->snd_nxt, tp->snd_una);
+				tp->snd_cwnd +=
+				    (tp->t_dupacks - tp->snd_limited) * maxseg;
+				tp->snd_cwnd -= tcp_sack_adjust(tp);
+				/*
+				 * Only call tcp_output when there
+				 * is new data available to be sent
+				 * or we need to send an ACK.
+				 */
+				SOCK_SENDBUF_LOCK(so);
+				avail = sbavail(&so->so_snd);
+				SOCK_SENDBUF_UNLOCK(so);
+				if (tp->t_flags & TF_ACKNOW ||
+				    (avail >=
+				     SEQ_SUB(tp->snd_nxt, tp->snd_una))) {
+					(void) tcp_output(tp);
+				}
+				sent = SEQ_SUB(tp->snd_max, oldsndmax);
+				if (sent > maxseg) {
+					KASSERT((tp->t_dupacks == 2 &&
+					    tp->snd_limited == 0) ||
+					   (sent == maxseg + 1 &&
+					    tp->t_flags & TF_SENTFIN) ||
+					   (sent < 2 * maxseg &&
+					    tp->t_flags & TF_NODELAY),
+					    ("%s: sent too much: %u>%u",
+					    __func__, sent, maxseg));
+					tp->snd_limited = 2;
+				} else if (sent > 0) {
+					++tp->snd_limited;
 				}
+				tp->snd_cwnd = oldcwnd;
+				goto drop;
 			}
+			break;
 		}
-
-resume_partialack:
 		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
-		    ("%s: th_ack <= snd_una", __func__));
-
+		    ("%s: SEQ_LEQ(th_ack, snd_una)", __func__));
+		/*
+		 * This ack is advancing the left edge, reset the
+		 * counter.
+		 */
+		tp->t_dupacks = 0;
+		/*
+		 * If this ack also has new SACK info, increment the
+		 * t_dupacks as per RFC 6675. The variable
+		 * sack_changed tracks all changes to the SACK
+		 * scoreboard, including when partial ACKs without
+		 * SACK options are received, and clear the scoreboard
+		 * from the left side. Such partial ACKs should not be
+		 * counted as dupacks here.
+		 */
+		if (V_tcp_do_newsack &&
+		    tcp_is_sack_recovery(tp, &to) &&
+		    (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) ||
+		     ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) &&
+		    (tp->snd_nxt == tp->snd_max)) {
+			tp->t_dupacks++;
+			/* limit overhead by setting maxseg last */
+			if (!IN_FASTRECOVERY(tp->t_flags) &&
+			    (tp->sackhint.sacked_bytes >
+			    (tcprexmtthresh - 1) * (maxseg = tcp_maxseg(tp)))) {
+				goto enter_recovery;
+			}
+		}
 		/*
 		 * If the congestion window was inflated to account
 		 * for the other side's cached packets, retract it.
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index fed259f4d8e1..f2d7867df9b4 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -78,8 +78,6 @@
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
-#include <netinet/ip_icmp.h>	/* required for icmp_var.h */
-#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 71dd4de6baf9..11ef5ba706c5 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -77,8 +77,6 @@
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
-#include <netinet/ip_icmp.h>	/* required for icmp_var.h */
-#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c
index fc12672a45f7..4a0a5fc118f6 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -76,8 +76,6 @@
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
-#include <netinet/ip_icmp.h>	/* required for icmp_var.h */
-#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_stacks/rack_pcm.c b/sys/netinet/tcp_stacks/rack_pcm.c
index 759bfda98357..1a51097f627c 100644
--- a/sys/netinet/tcp_stacks/rack_pcm.c
+++ b/sys/netinet/tcp_stacks/rack_pcm.c
@@ -78,8 +78,6 @@
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
-#include <netinet/ip_icmp.h>	/* required for icmp_var.h */
-#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_stacks/tailq_hash.c b/sys/netinet/tcp_stacks/tailq_hash.c
index 5ba3e7cd36c0..ff01640524b6 100644
--- a/sys/netinet/tcp_stacks/tailq_hash.c
+++ b/sys/netinet/tcp_stacks/tailq_hash.c
@@ -51,8 +51,6 @@
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
-#include <netinet/ip_icmp.h>	/* required for icmp_var.h */
-#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 80e6b53d10df..1ee6c6e31f33 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -102,15 +102,15 @@
 
 #include <security/mac/mac_framework.h>
 
-VNET_DEFINE_STATIC(int, tcp_syncookies) = 1;
+VNET_DEFINE_STATIC(bool, tcp_syncookies) = true;
 #define	V_tcp_syncookies		VNET(tcp_syncookies)
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
+SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncookies), 0,
     "Use TCP SYN cookies if the syncache overflows");
 
-VNET_DEFINE_STATIC(int, tcp_syncookiesonly) = 0;
+VNET_DEFINE_STATIC(bool, tcp_syncookiesonly) = false;
 #define	V_tcp_syncookiesonly		VNET(tcp_syncookiesonly)
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
+SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncookiesonly), 0,
     "Use only TCP SYN cookies");
 
@@ -553,9 +553,8 @@ syncache_timer(void *xsch)
 static inline bool
 syncache_cookiesonly(void)
 {
-
-	return (V_tcp_syncookies && (V_tcp_syncache.paused ||
-	    V_tcp_syncookiesonly));
+	return ((V_tcp_syncookies && V_tcp_syncache.paused) ||
+	    V_tcp_syncookiesonly);
 }
 
 /*
@@ -1083,40 +1082,48 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 #endif
 
 	if (sc == NULL) {
-		/*
-		 * There is no syncache entry, so see if this ACK is
-		 * a returning syncookie.  To do this, first:
-		 *  A. Check if syncookies are used in case of syncache
-		 *     overflows
-		 *  B. See if this socket has had a syncache entry dropped in
-		 *     the recent past. We don't want to accept a bogus
-		 *     syncookie if we've never received a SYN or accept it
-		 *     twice.
-		 *  C. check that the syncookie is valid.  If it is, then
-		 *     cobble up a fake syncache entry, and return.
-		 */
-		if (locked && !V_tcp_syncookies) {
-			SCH_UNLOCK(sch);
-			TCPSTAT_INC(tcps_sc_spurcookie);
-			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
-				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
-				    "segment rejected (syncookies disabled)\n",
-				    s, __func__);
-			goto failed;
-		}
-		if (locked && !V_tcp_syncookiesonly &&
-		    sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) {
+		if (locked) {
+			/*
+			 * The syncache is currently in use (neither disabled,
+			 * nor paused), but no entry was found.
+			 */
+			if (!V_tcp_syncookies) {
+				/*
+				 * Since no syncookies are used in case of
+				 * a bucket overflow, don't even check for
+				 * a valid syncookie.
+				 */
+				SCH_UNLOCK(sch);
+				TCPSTAT_INC(tcps_sc_spurcookie);
+				if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+					log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+					    "segment rejected "
+					    "(syncookies disabled)\n",
+					    s, __func__);
+				goto failed;
+			}
+			if (sch->sch_last_overflow <
+			    time_uptime - SYNCOOKIE_LIFETIME) {
+				/*
+				 * Since the bucket did not overflow recently,
+				 * don't even check for a valid syncookie.
+				 */
+				SCH_UNLOCK(sch);
+				TCPSTAT_INC(tcps_sc_spurcookie);
+				if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+					log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+					    "segment rejected "
+					    "(no syncache entry)\n",
+					    s, __func__);
+				goto failed;
+			}
 			SCH_UNLOCK(sch);
-			TCPSTAT_INC(tcps_sc_spurcookie);
-			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
-				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
-				    "segment rejected (no syncache entry)\n",
-				    s, __func__);
-			goto failed;
 		}
-		if (locked)
-			SCH_UNLOCK(sch);
 		bzero(&scs, sizeof(scs));
+		/*
+		 * Now check, if the syncookie is valid. If it is, create an on
+		 * stack syncache entry.
+		 */
 		if (syncookie_expand(inc, sch, &scs, th, to, *lsop, port)) {
 			sc = &scs;
 			TCPSTAT_INC(tcps_sc_recvcookie);
@@ -1291,10 +1298,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 	if (__predict_false(*lsop == NULL)) {
 		TCPSTAT_INC(tcps_sc_aborted);
 		TCPSTATES_DEC(TCPS_SYN_RECEIVED);
-	} else
+	} else if (sc != &scs)
 		TCPSTAT_INC(tcps_sc_completed);
 
-/* how do we find the inp for the new socket? */
 	if (sc != &scs)
 		syncache_free(sc);
 	return (1);
@@ -1669,7 +1675,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 			sc->sc_tsoff = tcp_new_ts_offset(inc);
 		}
 		if ((to->to_flags & TOF_SCALE) && (V_tcp_do_rfc1323 != 3)) {
-			int wscale = 0;
+			u_int wscale = 0;
 
 			/*
 			 * Pick the smallest possible scaling factor that
@@ -1719,13 +1725,13 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 	if (V_tcp_do_ecn && (tp->t_flags2 & TF2_CANNOT_DO_ECN) == 0)
 		sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
 
-	if (V_tcp_syncookies)
+	if (V_tcp_syncookies || V_tcp_syncookiesonly)
 		sc->sc_iss = syncookie_generate(sch, sc);
 	else
 		sc->sc_iss = arc4random();
 #ifdef INET6
 	if (autoflowlabel) {
-		if (V_tcp_syncookies)
+		if (V_tcp_syncookies || V_tcp_syncookiesonly)
 			sc->sc_flowlabel = sc->sc_iss;
 		else
 			sc->sc_flowlabel = ip6_randomflowlabel();
@@ -2265,7 +2271,7 @@ syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch,
 	uint32_t hash;
 	uint8_t *secbits;
 	tcp_seq ack, seq;
-	int wnd, wscale = 0;
+	int wnd;
 	union syncookie cookie;
 
 	/*
@@ -2316,12 +2322,14 @@ syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch,
 
 	sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx];
 
-	/* We can simply recompute receive window scale we sent earlier. */
-	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max)
-		wscale++;
-
 	/* Only use wscale if it was enabled in the orignal SYN. */
 	if (cookie.flags.wscale_idx > 0) {
+		u_int wscale = 0;
+
+		/* Recompute the receive window scale that was sent earlier. */
+		while (wscale < TCP_MAX_WINSHIFT &&
+		    (TCP_MAXWIN << wscale) < sb_max)
+			wscale++;
 		sc->sc_requested_r_scale = wscale;
 		sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx];
 		sc->sc_flags |= SCF_WINSCALE;
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 3e6519118a40..cea8a916679b 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -223,16 +223,18 @@ VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL);
  * udp_append() will convert to a sockaddr_in6 before passing the address
  * into the socket code.
  *
- * In the normal case udp_append() will return 0, indicating that you
- * must unlock the inp. However if a tunneling protocol is in place we increment
- * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we
- * then decrement the reference count. If the inp_rele returns 1, indicating the
- * inp is gone, we return that to the caller to tell them *not* to unlock
- * the inp. In the case of multi-cast this will cause the distribution
- * to stop (though most tunneling protocols known currently do *not* use
- * multicast).
+ * In the normal case udp_append() will return 'false', indicating that you
+ * must unlock the inpcb.  However if a tunneling protocol is in place we
+ * increment the inpcb refcnt and unlock the inpcb, on return from the tunneling
+ * protocol we then decrement the reference count.  If in_pcbrele_rlocked()
+ * returns 'true', indicating the inpcb is gone, we return that to the caller
+ * to tell them *not* to unlock the inpcb.  In the case of multicast this will
+ * cause the distribution to stop (though most tunneling protocols known
+ * currently do *not* use multicast).
+ *
+ * The mbuf is always consumed.
  */
-static int
+static bool
 udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
     struct sockaddr_in *udp_in)
 {
@@ -255,15 +257,16 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
 
 		in_pcbref(inp);
 		INP_RUNLOCK(inp);
-		filtered = (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0],
-		    up->u_tun_ctx);
+		filtered = (*up->u_tun_func)(n, off, inp,
+		    (struct sockaddr *)&udp_in[0], up->u_tun_ctx);
 		INP_RLOCK(inp);
-		if (in_pcbrele_rlocked(inp))
-			return (1);
-		if (filtered) {
-			INP_RUNLOCK(inp);
-			return (1);
+		if (in_pcbrele_rlocked(inp)) {
+			if (!filtered)
+				m_freem(n);
+			return (true);
 		}
+		if (filtered)
+			return (false);
 	}
 
 	off += sizeof(struct udphdr);
@@ -273,18 +276,18 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
 	if (IPSEC_ENABLED(ipv4) &&
 	    IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) {
 		m_freem(n);
-		return (0);
+		return (false);
 	}
 	if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */
 		if (IPSEC_ENABLED(ipv4) &&
 		    UDPENCAP_INPUT(ipv4, n, off, AF_INET) != 0)
-			return (0);	/* Consumed. */
+			return (false);
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
 		m_freem(n);
-		return (0);
+		return (false);
 	}
 #endif /* MAC */
 	if (inp->inp_flags & INP_CONTROLOPTS ||
@@ -330,7 +333,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
 		UDPSTAT_INC(udps_fullsock);
 	} else
 		sorwakeup_locked(so);
-	return (0);
+	return (false);
 }
 
 static bool
@@ -699,7 +702,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
 		UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
 	else
 		UDP_PROBE(receive, NULL, inp, ip, inp, uh);
-	if (udp_append(inp, ip, m, iphlen, udp_in) == 0)
+	if (!udp_append(inp, ip, m, iphlen, udp_in))
 		INP_RUNLOCK(inp);
 	return (IPPROTO_DONE);