aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRandall Stewart <rrs@FreeBSD.org>2026-01-05 16:30:22 +0000
committerRandall Stewart <rrs@FreeBSD.org>2026-01-05 16:30:22 +0000
commit138e74ceadb237cdd42fcda11ddef18b509f571d (patch)
tree71ad26dd90eabd6a691e775ced18d7f99d2cbeae
parent58a14d9596bd9a72683cbd725e817fdabe926139 (diff)
-rw-r--r--sys/netinet/tcp_stacks/rack.c94
-rw-r--r--sys/netinet/tcp_stacks/tcp_rack.h1
2 files changed, 87 insertions, 8 deletions
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 76bf8f2e3b17..f55abbf919b7 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -204,6 +204,7 @@ static int32_t rack_dnd_default = 0; /* For rr_conf = 3, what is the default fo
static int32_t rack_rxt_controls = 0;
static int32_t rack_fill_cw_state = 0;
static uint8_t rack_req_measurements = 1;
+static uint32_t rack_rtt_divisor = 2;
static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */
static int32_t rack_hw_rate_caps = 0; /* 1; */
static int32_t rack_hw_rate_cap_per = 0; /* 0 -- off */
@@ -497,7 +498,7 @@ static uint64_t rack_get_gp_est(struct tcp_rack *rack);
static void
rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack,
- struct rack_sendmap *rsm, uint32_t cts);
+ struct rack_sendmap *rsm, uint32_t cts, int line);
static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num, struct rack_sendmap *rsm);
static int32_t rack_output(struct tcpcb *tp);
@@ -1351,6 +1352,11 @@ rack_init_sysctls(void)
"When doing recovery -> rto -> recovery do we reset SSthresh?");
SYSCTL_ADD_U32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_timers),
+ OID_AUTO, "rtt_divisor", CTLFLAG_RW,
+ &rack_rtt_divisor, 2,
+ "When calculating the rtt threshold what 1/N is a rtt that indicates reordering");
+ SYSCTL_ADD_U32(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_timers),
OID_AUTO, "scoreboard_thresh", CTLFLAG_RW,
&rack_rxt_scoreboard_clear_thresh, 2,
"How many RTO's are allowed before we clear the scoreboard");
@@ -2663,6 +2669,8 @@ rack_log_map_chg(struct tcpcb *tp, struct tcp_rack *rack,
log.u_bbr.cur_del_rate = (uintptr_t)prev;
log.u_bbr.delRate = (uintptr_t)rsm;
log.u_bbr.rttProp = (uintptr_t)next;
+ if (rsm)
+ log.u_bbr.flex1 = rsm->r_flags;
log.u_bbr.flex7 = 0;
if (prev) {
log.u_bbr.flex1 = prev->r_start;
@@ -5584,6 +5592,7 @@ rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack, int line)
rack->r_ctl.rc_prr_delivered = 0;
rack->r_ctl.rc_prr_out = 0;
rack->r_fast_output = 0;
+ rack->r_ctl.recovery_rxt_cnt = 0;
if (rack->rack_no_prr == 0) {
rack->r_ctl.rc_prr_sndcnt = ctf_fixed_maxseg(tp);
rack_log_to_prr(rack, 2, in_rec_at_entry, line);
@@ -7416,6 +7425,7 @@ rack_remxt_tmr(struct tcpcb *tp)
*/
TAILQ_INIT(&rack->r_ctl.rc_tmap);
+ rack->r_ctl.recovery_rxt_cnt = 0;
TQHASH_FOREACH(rsm, rack->r_ctl.tqh) {
rsm->r_dupack = 0;
if (rack_verbose_logging)
@@ -8042,6 +8052,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
/* We have retransmitted due to the SACK pass */
rsm->r_flags &= ~RACK_SACK_PASSED;
rsm->r_flags |= RACK_WAS_SACKPASS;
+ rack->r_ctl.recovery_rxt_cnt += (rsm->r_end - rsm->r_start);
}
}
@@ -8948,7 +8959,7 @@ ts_not_found:
*/
static void
rack_log_sack_passed(struct tcpcb *tp,
- struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t cts)
+ struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t cts, int line)
{
struct rack_sendmap *nrsm;
uint32_t thresh;
@@ -8997,6 +9008,7 @@ rack_log_sack_passed(struct tcpcb *tp,
*/
break;
}
+ rack_log_dsack_event(rack, 12, __LINE__, nrsm->r_start, nrsm->r_end);
nrsm->r_flags |= RACK_SACK_PASSED;
nrsm->r_flags &= ~RACK_WAS_SACKPASS;
}
@@ -9172,6 +9184,39 @@ is_rsm_inside_declared_tlp_block(struct tcp_rack *rack, struct rack_sendmap *rsm
return (1);
}
+
+static int
+rack_check_reorder_ack(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, int the_end, uint32_t cts, int can_exit_recovery, int line)
+{
+ if ((rack_rtt_divisor > 0) &&
+ (rsm->r_rtr_cnt == 2) &&
+ IN_RECOVERY(tp->t_flags) &&
+ (rsm->r_flags & RACK_WAS_SACKPASS)){
+ uint32_t fractional, snt_to_ack;
+
+ fractional = (tp->t_srtt / rack_rtt_divisor);
+ if (fractional == 0)
+ fractional = 1;
+ snt_to_ack = cts - (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)];
+ if (snt_to_ack <= fractional) {
+ rack->r_ctl.rc_reorder_ts = cts;
+ KASSERT((rack->r_ctl.recovery_rxt_cnt >= (the_end - rsm->r_start)),
+ ("rsm:%p rack:%p recovery_rxt_cnt would go negative recovery_rxt_cnt:%u sub:%u", rsm, rack, rack->r_ctl.recovery_rxt_cnt, (the_end - rsm->r_start)));
+ rack->r_ctl.recovery_rxt_cnt -= (the_end - rsm->r_start);
+ rack_log_to_prr(rack, 18, rack->r_ctl.recovery_rxt_cnt, line);
+ if (can_exit_recovery && (rack->r_ctl.recovery_rxt_cnt == 0)) {
+ tp->snd_ssthresh = rack->r_ctl.rc_ssthresh_at_erec;
+ rack_exit_recovery(tp, rack, 4);
+ rack->r_might_revert = 0;
+ rack->r_ctl.retran_during_recovery = 0;
+ rack_log_to_prr(rack, 17, snt_to_ack, line);
+ }
+ return (1);
+ }
+ }
+ return (0);
+}
+
static uint32_t
rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack, struct sackblk *sack,
struct tcpopt *to, struct rack_sendmap **prsm, uint32_t cts,
@@ -9183,6 +9228,7 @@ rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack, struct sackblk *sack
int insret __diagused;
int32_t used_ref = 1;
int can_use_hookery = 0;
+ int prohibit_marking = 0;
start = sack->start;
end = sack->end;
@@ -9273,6 +9319,8 @@ do_rest_ofb:
(rsm->bindex == next->bindex) &&
((rsm->r_flags & RACK_STRADDLE) == 0) &&
((next->r_flags & RACK_STRADDLE) == 0) &&
+ ((rsm->r_flags & RACK_WAS_SACKPASS) == 0) &&
+ ((next->r_flags & RACK_WAS_SACKPASS) == 0) &&
((rsm->r_flags & RACK_IS_PCM) == 0) &&
((next->r_flags & RACK_IS_PCM) == 0) &&
(rsm->r_flags & RACK_IN_GP_WIN) &&
@@ -9345,6 +9393,8 @@ do_rest_ofb:
rack_log_retran_reason(rack, rsm, __LINE__, 0, 2);
/* Now lets make sure our fudge block is right */
nrsm->r_start = start;
+ /* Check if the ack was too soon i.e. reordering + ack arrives too quickly */
+ prohibit_marking = rack_check_reorder_ack(tp, rack, nrsm, nrsm->r_end, cts, 0, __LINE__);
/* Now lets update all the stats and such */
rack_update_rtt(tp, rack, nrsm, to, cts, SACKED, 0);
if (rack->app_limited_needs_set)
@@ -9388,8 +9438,8 @@ do_rest_ofb:
* Now that we have the next
* one walk backwards from there.
*/
- if (nrsm && nrsm->r_in_tmap)
- rack_log_sack_passed(tp, rack, nrsm, cts);
+ if (nrsm && nrsm->r_in_tmap && (prohibit_marking == 0))
+ rack_log_sack_passed(tp, rack, nrsm, cts, __LINE__);
}
/* Now are we done? */
if (SEQ_LT(end, next->r_end) ||
@@ -9445,6 +9495,8 @@ do_rest_ofb:
}
rack_log_map_chg(tp, rack, NULL, rsm, nrsm, MAP_SACK_M2, end, __LINE__);
rsm->r_flags &= (~RACK_HAS_FIN);
+ /* Check if the ack was too soon i.e. reordering + ack arrives too quickly */
+ prohibit_marking = rack_check_reorder_ack(tp, rack, nrsm, nrsm->r_end, cts, 0, __LINE__);
/* Position us to point to the new nrsm that starts the sack blk */
rsm = nrsm;
}
@@ -9521,6 +9573,8 @@ do_rest_ofb:
}
rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0);
changed += (rsm->r_end - rsm->r_start);
+ /* Check if the ack was too soon i.e. reordering + ack arrives too quickly */
+ prohibit_marking = rack_check_reorder_ack(tp, rack, rsm, rsm->r_end, cts, 0, __LINE__);
/* You get a count for acking a whole segment or more */
if (rsm->r_flags & RACK_WAS_LOST) {
/*
@@ -9530,8 +9584,9 @@ do_rest_ofb:
rack_mark_nolonger_lost(rack, rsm);
}
rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
- if (rsm->r_in_tmap) /* should be true */
- rack_log_sack_passed(tp, rack, rsm, cts);
+ if (rsm->r_in_tmap && (prohibit_marking == 0)) /* should be true */
+ rack_log_sack_passed(tp, rack, rsm, cts, __LINE__);
+
/* Is Reordering occuring? */
if (rsm->r_flags & RACK_SACK_PASSED) {
rsm->r_flags &= ~RACK_SACK_PASSED;
@@ -9620,6 +9675,8 @@ do_rest_ofb:
(rsm->bindex == prev->bindex) &&
((rsm->r_flags & RACK_STRADDLE) == 0) &&
((prev->r_flags & RACK_STRADDLE) == 0) &&
+ ((prev->r_flags & RACK_WAS_SACKPASS) == 0) &&
+ ((rsm->r_flags & RACK_WAS_SACKPASS) == 0) &&
((rsm->r_flags & RACK_IS_PCM) == 0) &&
((prev->r_flags & RACK_IS_PCM) == 0) &&
(rsm->r_flags & RACK_IN_GP_WIN) &&
@@ -9658,6 +9715,8 @@ do_rest_ofb:
*/
nrsm->r_end = end;
rsm->r_dupack = 0;
+ /* Check if the ack was too soon i.e. reordering + ack arrives too quickly */
+ prohibit_marking = rack_check_reorder_ack(tp, rack, nrsm, nrsm->r_end, cts, 0, __LINE__);
/*
* Which timestamp do we keep? It is rather
* important in GP measurements to have the
@@ -9807,6 +9866,8 @@ do_rest_ofb:
nrsm->r_in_tmap = 1;
}
nrsm->r_dupack = 0;
+ /* Check if the ack was too soon i.e. reordering + ack arrives too quickly */
+ prohibit_marking = rack_check_reorder_ack(tp, rack, nrsm, nrsm->r_end, cts, 0, __LINE__);
rack_log_retran_reason(rack, nrsm, __LINE__, 0, 2);
rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0);
changed += (rsm->r_end - rsm->r_start);
@@ -9818,8 +9879,8 @@ do_rest_ofb:
}
rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
- if (rsm->r_in_tmap) /* should be true */
- rack_log_sack_passed(tp, rack, rsm, cts);
+ if (rsm->r_in_tmap && (prohibit_marking == 0)) /* should be true */
+ rack_log_sack_passed(tp, rack, rsm, cts, __LINE__);
/* Is Reordering occuring? */
if (rsm->r_flags & RACK_SACK_PASSED) {
rsm->r_flags &= ~RACK_SACK_PASSED;
@@ -9857,6 +9918,10 @@ out:
((rsm->r_flags & RACK_IN_GP_WIN) == 0)) {
break;
}
+ /* We can't merge retransmitted with sack-pass set */
+ if ((rsm->r_flags & RACK_WAS_SACKPASS) ||
+ (next->r_flags & RACK_WAS_SACKPASS))
+ break;
if ((rsm->r_flags & RACK_IN_GP_WIN) &&
((next->r_flags & RACK_IN_GP_WIN) == 0)) {
break;
@@ -9888,6 +9953,10 @@ out:
((rsm->r_flags & RACK_IN_GP_WIN) == 0)) {
break;
}
+ /* We can't merge retransmitted with sack-pass set */
+ if ((rsm->r_flags & RACK_WAS_SACKPASS) ||
+ (prev->r_flags & RACK_WAS_SACKPASS))
+ break;
if ((rsm->r_flags & RACK_IN_GP_WIN) &&
((prev->r_flags & RACK_IN_GP_WIN) == 0)) {
break;
@@ -10254,6 +10323,7 @@ more:
}
rack_update_pcm_ack(rack, 1, rsm->r_start, rsm->r_end);
} else {
+ (void)rack_check_reorder_ack(tp, rack, rsm, rsm->r_end, cts, 1, __LINE__);
rack_update_pcm_ack(rack, 1, rsm->r_start, rsm->r_end);
}
if ((rsm->r_flags & RACK_TO_REXT) &&
@@ -10354,6 +10424,14 @@ more:
}
rsm->soff += (th_ack - rsm->r_start);
rack_rsm_sender_update(rack, tp, rsm, 5);
+
+ /*
+ * Handle the special case where we retransmitted part of a segment we
+ * in this case pass in th_ack which is shorter than r_end.
+ */
+ if (rsm->r_flags & RACK_WAS_SACKPASS) {
+ rack_check_reorder_ack(tp, rack, rsm, th_ack, cts, 1, __LINE__);
+ }
/* The trim will move th_ack into r_start for us */
tqhash_trim(rack->r_ctl.tqh, th_ack);
/* Now do we need to move the mbuf fwd too? */
diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
index cac17d9aeb50..2e2ced089f67 100644
--- a/sys/netinet/tcp_stacks/tcp_rack.h
+++ b/sys/netinet/tcp_stacks/tcp_rack.h
@@ -539,6 +539,7 @@ struct rack_control {
uint32_t last_rcv_tstmp_for_rtt;
uint32_t last_time_of_arm_rcv;
uint32_t rto_ssthresh;
+ uint32_t recovery_rxt_cnt;
uint32_t rc_saved_beta;
uint32_t rc_saved_beta_ecn; /*
* For newreno cc: rc_saved_beta and