src - FreeBSD source tree

diff options


context:
space:
mode:

author	Cheng Cui <cc@FreeBSD.org>	2023-06-01 11:48:07 +0000
committer	Cheng Cui <cc@FreeBSD.org>	2023-06-01 11:55:01 +0000
commit	a3aa6f65290482cedf4aeda1d0875ca6433c7f04 (patch)
tree	0e18d480617b9c33015b3f7f62d02aac79cad718
parent	a466cc55373fc3cf86837f09da729535b57e69a1 (diff)
download	src-a3aa6f65290482cedf4aeda1d0875ca6433c7f04.tar.gz src-a3aa6f65290482cedf4aeda1d0875ca6433c7f04.zip

cc_cubic: Use units of micro seconds (usecs) instead of ticks in rtt.

This improves TCP friendly cwnd in cases of low latency high drop rate networks. Tests show +42% and +37% better performance in 1Gpbs and 10Gbps cases. Reported by: Bhaskar Pardeshi from VMware. Reviewed By: rscheff, tuexen Approved by: rscheff (mentor), tuexen (mentor)

Diffstat

-rw-r--r--

sys/netinet/cc/cc_cubic.c

-rw-r--r--

sys/netinet/cc/cc_cubic.h

2 files changed, 50 insertions, 43 deletions

diff --git a/sys/netinet/cc/cc_cubic.c b/sys/netinet/cc/cc_cubic.c
index 8992b9beba13..be9bd9859122 100644
--- a/sys/netinet/cc/cc_cubic.c
+++ b/sys/netinet/cc/cc_cubic.c

@@ -240,7 +240,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)

{

struct cubic *cubic_data;

unsigned long w_tf, w_cubic_next;

- int ticks_since_cong;

+ int usecs_since_cong;

cubic_data = ccv->cc_data;

cubic_record_rtt(ccv);

@@ -253,7 +253,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)

(ccv->flags & CCF_CWND_LIMITED)) {

/* Use the logic in NewReno ack_received() for slow start. */

if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||

- cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) {

+ cubic_data->min_rtt_usecs == TCPTV_SRTTBASE) {

cubic_does_slow_start(ccv, cubic_data);

} else {

if (cubic_data->flags & CUBICFLAG_HYSTART_IN_CSS) {

@@ -282,12 +282,12 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)

cubic_data->K = cubic_k(cubic_data->max_cwnd /

CCV(ccv, t_maxseg));

}

- if ((ticks_since_cong =

- ticks - cubic_data->t_last_cong) < 0) {

+ usecs_since_cong = (ticks - cubic_data->t_last_cong) * tick;

+ if (usecs_since_cong < 0) {

* dragging t_last_cong along

- ticks_since_cong = INT_MAX;

+ usecs_since_cong = INT_MAX;

cubic_data->t_last_cong = ticks - INT_MAX;

}

@@ -297,13 +297,14 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)

* RTT is dominated by network buffering rather than

* propagation delay.

- w_tf = tf_cwnd(ticks_since_cong,

- cubic_data->mean_rtt_ticks, cubic_data->max_cwnd,

- CCV(ccv, t_maxseg));

+ w_tf = tf_cwnd(usecs_since_cong, cubic_data->mean_rtt_usecs,

+ cubic_data->max_cwnd, CCV(ccv, t_maxseg));

- w_cubic_next = cubic_cwnd(ticks_since_cong +

- cubic_data->mean_rtt_ticks, cubic_data->max_cwnd,

- CCV(ccv, t_maxseg), cubic_data->K);

+ w_cubic_next = cubic_cwnd(usecs_since_cong +

+ cubic_data->mean_rtt_usecs,

+ cubic_data->max_cwnd,

+ CCV(ccv, t_maxseg),

+ cubic_data->K);

ccv->flags &= ~CCF_ABC_SENTAWND;

@@ -397,8 +398,8 @@ cubic_cb_init(struct cc_var *ccv, void *ptr)

/* Init some key variables with sensible defaults. */

cubic_data->t_last_cong = ticks;

- cubic_data->min_rtt_ticks = TCPTV_SRTTBASE;

- cubic_data->mean_rtt_ticks = 1;

+ cubic_data->min_rtt_usecs = TCPTV_SRTTBASE;

+ cubic_data->mean_rtt_usecs = 1;

ccv->cc_data = cubic_data;

cubic_data->flags = CUBICFLAG_HYSTART_ENABLED;

@@ -549,13 +550,13 @@ cubic_post_recovery(struct cc_var *ccv)

/* Calculate the average RTT between congestion epochs. */

if (cubic_data->epoch_ack_count > 0 &&

- cubic_data->sum_rtt_ticks >= cubic_data->epoch_ack_count) {

- cubic_data->mean_rtt_ticks = (int)(cubic_data->sum_rtt_ticks /

+ cubic_data->sum_rtt_usecs >= cubic_data->epoch_ack_count) {

+ cubic_data->mean_rtt_usecs = (int)(cubic_data->sum_rtt_usecs /

cubic_data->epoch_ack_count);

}

cubic_data->epoch_ack_count = 0;

- cubic_data->sum_rtt_ticks = 0;

+ cubic_data->sum_rtt_usecs = 0;

}

@@ -565,13 +566,13 @@ static void

cubic_record_rtt(struct cc_var *ccv)

{

struct cubic *cubic_data;

- int t_srtt_ticks;

+ uint32_t t_srtt_usecs;

/* Ignore srtt until a min number of samples have been taken. */

if (CCV(ccv, t_rttupdated) >= CUBIC_MIN_RTT_SAMPLES) {

cubic_data = ccv->cc_data;

- t_srtt_ticks = tcp_get_srtt(ccv->ccvc.tcp,

- TCP_TMR_GRANULARITY_TICKS);

+ t_srtt_usecs = tcp_get_srtt(ccv->ccvc.tcp,

+ TCP_TMR_GRANULARITY_USEC);

* Record the current SRTT as our minrtt if it's the smallest

* we've seen or minrtt is currently equal to its initialised

@@ -579,24 +580,27 @@ cubic_record_rtt(struct cc_var *ccv)

* XXXLAS: Should there be some hysteresis for minrtt?

- if ((t_srtt_ticks < cubic_data->min_rtt_ticks ||

- cubic_data->min_rtt_ticks == TCPTV_SRTTBASE)) {

- cubic_data->min_rtt_ticks = max(1, t_srtt_ticks);

+ if ((t_srtt_usecs < cubic_data->min_rtt_usecs ||

+ cubic_data->min_rtt_usecs == TCPTV_SRTTBASE)) {

+ /* A minimal rtt is a single unshifted tick of a ticks

+ * timer. */

+ cubic_data->min_rtt_usecs = max(tick >> TCP_RTT_SHIFT,

+ t_srtt_usecs);

* If the connection is within its first congestion

- * epoch, ensure we prime mean_rtt_ticks with a

+ * epoch, ensure we prime mean_rtt_usecs with a

* reasonable value until the epoch average RTT is

* calculated in cubic_post_recovery().

- if (cubic_data->min_rtt_ticks >

- cubic_data->mean_rtt_ticks)

- cubic_data->mean_rtt_ticks =

- cubic_data->min_rtt_ticks;

+ if (cubic_data->min_rtt_usecs >

+ cubic_data->mean_rtt_usecs)

+ cubic_data->mean_rtt_usecs =

+ cubic_data->min_rtt_usecs;

}

/* Sum samples for epoch average RTT calculation. */

- cubic_data->sum_rtt_ticks += t_srtt_ticks;

+ cubic_data->sum_rtt_usecs += t_srtt_usecs;

cubic_data->epoch_ack_count++;

}

diff --git a/sys/netinet/cc/cc_cubic.h b/sys/netinet/cc/cc_cubic.h
index 0749a9ebbc1a..3d408154c1a5 100644
--- a/sys/netinet/cc/cc_cubic.h
+++ b/sys/netinet/cc/cc_cubic.h

@@ -91,8 +91,8 @@

struct cubic {

/* CUBIC K in fixed point form with CUBIC_SHIFT worth of precision. */

int64_t K;

- /* Sum of RTT samples across an epoch in ticks. */

- int64_t sum_rtt_ticks;

+ /* Sum of RTT samples across an epoch in usecs. */

+ int64_t sum_rtt_usecs;

/* cwnd at the most recent congestion event. */

unsigned long max_cwnd;

/* cwnd at the previous congestion event. */

@@ -101,10 +101,10 @@ struct cubic {

unsigned long prev_max_cwnd_cp;

/* various flags */

uint32_t flags;

- /* Minimum observed rtt in ticks. */

- int min_rtt_ticks;

+ /* Minimum observed rtt in usecs. */

+ int min_rtt_usecs;

/* Mean observed rtt between congestion epochs. */

- int mean_rtt_ticks;

+ int mean_rtt_usecs;

/* ACKs since last congestion event. */

int epoch_ack_count;

/* Timestamp (in ticks) of arriving in congestion avoidance from last

@@ -222,14 +222,15 @@ cubic_k(unsigned long wmax_pkts)

* XXXLAS: Characterise bounds for overflow.

static __inline unsigned long

-cubic_cwnd(int ticks_since_cong, unsigned long wmax, uint32_t smss, int64_t K)

+cubic_cwnd(int usecs_since_cong, unsigned long wmax, uint32_t smss, int64_t K)

{

int64_t cwnd;

/* K is in fixed point form with CUBIC_SHIFT worth of precision. */

/* t - K, with CUBIC_SHIFT worth of precision. */

- cwnd = (((int64_t)ticks_since_cong << CUBIC_SHIFT) - (K * hz)) / hz;

+ cwnd = (((int64_t)usecs_since_cong << CUBIC_SHIFT) - (K * hz * tick)) /

+ (hz * tick);

if (cwnd > CUBED_ROOT_MAX_ULONG)

return INT_MAX;

@@ -255,15 +256,17 @@ cubic_cwnd(int ticks_since_cong, unsigned long wmax, uint32_t smss, int64_t K)

}

- * Compute an approximation of the NewReno cwnd some number of ticks after a

+ * Compute an approximation of the NewReno cwnd some number of usecs after a

* congestion event. RTT should be the average RTT estimate for the path

* measured over the previous congestion epoch and wmax is the value of cwnd at

* the last congestion event. The "TCP friendly" concept in the CUBIC I-D is

* rather tricky to understand and it turns out this function is not required.

* It is left here for reference.

+ *

+ * XXX: Not used

static __inline unsigned long

-reno_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,

+reno_cwnd(int usecs_since_cong, int rtt_usecs, unsigned long wmax,

uint32_t smss)

{

@@ -272,26 +275,26 @@ reno_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,

* W_tcp(t) deals with cwnd/wmax in pkts, so because our cwnd is in

* bytes, we have to multiply by smss.

- return (((wmax * RENO_BETA) + (((ticks_since_cong * smss)

- << CUBIC_SHIFT) / rtt_ticks)) >> CUBIC_SHIFT);

+ return (((wmax * RENO_BETA) + (((usecs_since_cong * smss)

+ << CUBIC_SHIFT) / rtt_usecs)) >> CUBIC_SHIFT);

}

- * Compute an approximation of the "TCP friendly" cwnd some number of ticks

+ * Compute an approximation of the "TCP friendly" cwnd some number of usecs

* after a congestion event that is designed to yield the same average cwnd as

* NewReno while using CUBIC's beta of 0.7. RTT should be the average RTT

* estimate for the path measured over the previous congestion epoch and wmax is

* the value of cwnd at the last congestion event.

static __inline unsigned long

-tf_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,

+tf_cwnd(int usecs_since_cong, int rtt_usecs, unsigned long wmax,

uint32_t smss)

{

/* Equation 4 of I-D. */

return (((wmax * CUBIC_BETA) +

- (((THREE_X_PT3 * (unsigned long)ticks_since_cong *

- (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_ticks)))

+ (((THREE_X_PT3 * (unsigned long)usecs_since_cong *

+ (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_usecs)))

>> CUBIC_SHIFT);

}