aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheng Cui <cc@FreeBSD.org>2023-06-01 11:48:07 +0000
committerCheng Cui <cc@FreeBSD.org>2023-06-01 11:55:01 +0000
commita3aa6f65290482cedf4aeda1d0875ca6433c7f04 (patch)
tree0e18d480617b9c33015b3f7f62d02aac79cad718
parenta466cc55373fc3cf86837f09da729535b57e69a1 (diff)
downloadsrc-a3aa6f65290482cedf4aeda1d0875ca6433c7f04.tar.gz
src-a3aa6f65290482cedf4aeda1d0875ca6433c7f04.zip
cc_cubic: Use units of micro seconds (usecs) instead of ticks in rtt.
This improves TCP friendly cwnd in cases of low latency high drop rate networks. Tests show +42% and +37% better performance in 1Gpbs and 10Gbps cases. Reported by: Bhaskar Pardeshi from VMware. Reviewed By: rscheff, tuexen Approved by: rscheff (mentor), tuexen (mentor)
-rw-r--r--sys/netinet/cc/cc_cubic.c60
-rw-r--r--sys/netinet/cc/cc_cubic.h33
2 files changed, 50 insertions, 43 deletions
diff --git a/sys/netinet/cc/cc_cubic.c b/sys/netinet/cc/cc_cubic.c
index 8992b9beba13..be9bd9859122 100644
--- a/sys/netinet/cc/cc_cubic.c
+++ b/sys/netinet/cc/cc_cubic.c
@@ -240,7 +240,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
{
struct cubic *cubic_data;
unsigned long w_tf, w_cubic_next;
- int ticks_since_cong;
+ int usecs_since_cong;
cubic_data = ccv->cc_data;
cubic_record_rtt(ccv);
@@ -253,7 +253,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
(ccv->flags & CCF_CWND_LIMITED)) {
/* Use the logic in NewReno ack_received() for slow start. */
if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
- cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) {
+ cubic_data->min_rtt_usecs == TCPTV_SRTTBASE) {
cubic_does_slow_start(ccv, cubic_data);
} else {
if (cubic_data->flags & CUBICFLAG_HYSTART_IN_CSS) {
@@ -282,12 +282,12 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
cubic_data->K = cubic_k(cubic_data->max_cwnd /
CCV(ccv, t_maxseg));
}
- if ((ticks_since_cong =
- ticks - cubic_data->t_last_cong) < 0) {
+ usecs_since_cong = (ticks - cubic_data->t_last_cong) * tick;
+ if (usecs_since_cong < 0) {
/*
* dragging t_last_cong along
*/
- ticks_since_cong = INT_MAX;
+ usecs_since_cong = INT_MAX;
cubic_data->t_last_cong = ticks - INT_MAX;
}
/*
@@ -297,13 +297,14 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
* RTT is dominated by network buffering rather than
* propagation delay.
*/
- w_tf = tf_cwnd(ticks_since_cong,
- cubic_data->mean_rtt_ticks, cubic_data->max_cwnd,
- CCV(ccv, t_maxseg));
+ w_tf = tf_cwnd(usecs_since_cong, cubic_data->mean_rtt_usecs,
+ cubic_data->max_cwnd, CCV(ccv, t_maxseg));
- w_cubic_next = cubic_cwnd(ticks_since_cong +
- cubic_data->mean_rtt_ticks, cubic_data->max_cwnd,
- CCV(ccv, t_maxseg), cubic_data->K);
+ w_cubic_next = cubic_cwnd(usecs_since_cong +
+ cubic_data->mean_rtt_usecs,
+ cubic_data->max_cwnd,
+ CCV(ccv, t_maxseg),
+ cubic_data->K);
ccv->flags &= ~CCF_ABC_SENTAWND;
@@ -397,8 +398,8 @@ cubic_cb_init(struct cc_var *ccv, void *ptr)
/* Init some key variables with sensible defaults. */
cubic_data->t_last_cong = ticks;
- cubic_data->min_rtt_ticks = TCPTV_SRTTBASE;
- cubic_data->mean_rtt_ticks = 1;
+ cubic_data->min_rtt_usecs = TCPTV_SRTTBASE;
+ cubic_data->mean_rtt_usecs = 1;
ccv->cc_data = cubic_data;
cubic_data->flags = CUBICFLAG_HYSTART_ENABLED;
@@ -549,13 +550,13 @@ cubic_post_recovery(struct cc_var *ccv)
/* Calculate the average RTT between congestion epochs. */
if (cubic_data->epoch_ack_count > 0 &&
- cubic_data->sum_rtt_ticks >= cubic_data->epoch_ack_count) {
- cubic_data->mean_rtt_ticks = (int)(cubic_data->sum_rtt_ticks /
+ cubic_data->sum_rtt_usecs >= cubic_data->epoch_ack_count) {
+ cubic_data->mean_rtt_usecs = (int)(cubic_data->sum_rtt_usecs /
cubic_data->epoch_ack_count);
}
cubic_data->epoch_ack_count = 0;
- cubic_data->sum_rtt_ticks = 0;
+ cubic_data->sum_rtt_usecs = 0;
}
/*
@@ -565,13 +566,13 @@ static void
cubic_record_rtt(struct cc_var *ccv)
{
struct cubic *cubic_data;
- int t_srtt_ticks;
+ uint32_t t_srtt_usecs;
/* Ignore srtt until a min number of samples have been taken. */
if (CCV(ccv, t_rttupdated) >= CUBIC_MIN_RTT_SAMPLES) {
cubic_data = ccv->cc_data;
- t_srtt_ticks = tcp_get_srtt(ccv->ccvc.tcp,
- TCP_TMR_GRANULARITY_TICKS);
+ t_srtt_usecs = tcp_get_srtt(ccv->ccvc.tcp,
+ TCP_TMR_GRANULARITY_USEC);
/*
* Record the current SRTT as our minrtt if it's the smallest
* we've seen or minrtt is currently equal to its initialised
@@ -579,24 +580,27 @@ cubic_record_rtt(struct cc_var *ccv)
*
* XXXLAS: Should there be some hysteresis for minrtt?
*/
- if ((t_srtt_ticks < cubic_data->min_rtt_ticks ||
- cubic_data->min_rtt_ticks == TCPTV_SRTTBASE)) {
- cubic_data->min_rtt_ticks = max(1, t_srtt_ticks);
+ if ((t_srtt_usecs < cubic_data->min_rtt_usecs ||
+ cubic_data->min_rtt_usecs == TCPTV_SRTTBASE)) {
+ /* A minimal rtt is a single unshifted tick of a ticks
+ * timer. */
+ cubic_data->min_rtt_usecs = max(tick >> TCP_RTT_SHIFT,
+ t_srtt_usecs);
/*
* If the connection is within its first congestion
- * epoch, ensure we prime mean_rtt_ticks with a
+ * epoch, ensure we prime mean_rtt_usecs with a
* reasonable value until the epoch average RTT is
* calculated in cubic_post_recovery().
*/
- if (cubic_data->min_rtt_ticks >
- cubic_data->mean_rtt_ticks)
- cubic_data->mean_rtt_ticks =
- cubic_data->min_rtt_ticks;
+ if (cubic_data->min_rtt_usecs >
+ cubic_data->mean_rtt_usecs)
+ cubic_data->mean_rtt_usecs =
+ cubic_data->min_rtt_usecs;
}
/* Sum samples for epoch average RTT calculation. */
- cubic_data->sum_rtt_ticks += t_srtt_ticks;
+ cubic_data->sum_rtt_usecs += t_srtt_usecs;
cubic_data->epoch_ack_count++;
}
}
diff --git a/sys/netinet/cc/cc_cubic.h b/sys/netinet/cc/cc_cubic.h
index 0749a9ebbc1a..3d408154c1a5 100644
--- a/sys/netinet/cc/cc_cubic.h
+++ b/sys/netinet/cc/cc_cubic.h
@@ -91,8 +91,8 @@
struct cubic {
/* CUBIC K in fixed point form with CUBIC_SHIFT worth of precision. */
int64_t K;
- /* Sum of RTT samples across an epoch in ticks. */
- int64_t sum_rtt_ticks;
+ /* Sum of RTT samples across an epoch in usecs. */
+ int64_t sum_rtt_usecs;
/* cwnd at the most recent congestion event. */
unsigned long max_cwnd;
/* cwnd at the previous congestion event. */
@@ -101,10 +101,10 @@ struct cubic {
unsigned long prev_max_cwnd_cp;
/* various flags */
uint32_t flags;
- /* Minimum observed rtt in ticks. */
- int min_rtt_ticks;
+ /* Minimum observed rtt in usecs. */
+ int min_rtt_usecs;
/* Mean observed rtt between congestion epochs. */
- int mean_rtt_ticks;
+ int mean_rtt_usecs;
/* ACKs since last congestion event. */
int epoch_ack_count;
/* Timestamp (in ticks) of arriving in congestion avoidance from last
@@ -222,14 +222,15 @@ cubic_k(unsigned long wmax_pkts)
* XXXLAS: Characterise bounds for overflow.
*/
static __inline unsigned long
-cubic_cwnd(int ticks_since_cong, unsigned long wmax, uint32_t smss, int64_t K)
+cubic_cwnd(int usecs_since_cong, unsigned long wmax, uint32_t smss, int64_t K)
{
int64_t cwnd;
/* K is in fixed point form with CUBIC_SHIFT worth of precision. */
/* t - K, with CUBIC_SHIFT worth of precision. */
- cwnd = (((int64_t)ticks_since_cong << CUBIC_SHIFT) - (K * hz)) / hz;
+ cwnd = (((int64_t)usecs_since_cong << CUBIC_SHIFT) - (K * hz * tick)) /
+ (hz * tick);
if (cwnd > CUBED_ROOT_MAX_ULONG)
return INT_MAX;
@@ -255,15 +256,17 @@ cubic_cwnd(int ticks_since_cong, unsigned long wmax, uint32_t smss, int64_t K)
}
/*
- * Compute an approximation of the NewReno cwnd some number of ticks after a
+ * Compute an approximation of the NewReno cwnd some number of usecs after a
* congestion event. RTT should be the average RTT estimate for the path
* measured over the previous congestion epoch and wmax is the value of cwnd at
* the last congestion event. The "TCP friendly" concept in the CUBIC I-D is
* rather tricky to understand and it turns out this function is not required.
* It is left here for reference.
+ *
+ * XXX: Not used
*/
static __inline unsigned long
-reno_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,
+reno_cwnd(int usecs_since_cong, int rtt_usecs, unsigned long wmax,
uint32_t smss)
{
@@ -272,26 +275,26 @@ reno_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,
* W_tcp(t) deals with cwnd/wmax in pkts, so because our cwnd is in
* bytes, we have to multiply by smss.
*/
- return (((wmax * RENO_BETA) + (((ticks_since_cong * smss)
- << CUBIC_SHIFT) / rtt_ticks)) >> CUBIC_SHIFT);
+ return (((wmax * RENO_BETA) + (((usecs_since_cong * smss)
+ << CUBIC_SHIFT) / rtt_usecs)) >> CUBIC_SHIFT);
}
/*
- * Compute an approximation of the "TCP friendly" cwnd some number of ticks
+ * Compute an approximation of the "TCP friendly" cwnd some number of usecs
* after a congestion event that is designed to yield the same average cwnd as
* NewReno while using CUBIC's beta of 0.7. RTT should be the average RTT
* estimate for the path measured over the previous congestion epoch and wmax is
* the value of cwnd at the last congestion event.
*/
static __inline unsigned long
-tf_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,
+tf_cwnd(int usecs_since_cong, int rtt_usecs, unsigned long wmax,
uint32_t smss)
{
/* Equation 4 of I-D. */
return (((wmax * CUBIC_BETA) +
- (((THREE_X_PT3 * (unsigned long)ticks_since_cong *
- (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_ticks)))
+ (((THREE_X_PT3 * (unsigned long)usecs_since_cong *
+ (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_usecs)))
>> CUBIC_SHIFT);
}