diff options
author | Marcin Wojtas <mw@FreeBSD.org> | 2020-05-07 11:28:39 +0000 |
---|---|---|
committer | Marcin Wojtas <mw@FreeBSD.org> | 2020-05-07 11:28:39 +0000 |
commit | 04cf2b885d7dc385ed8e48df1d0218b5e4162869 (patch) | |
tree | b8a113ad3a307fb06334acc1f17f5676264e2767 /sys/dev/ena | |
parent | 8717b8f1bb6e7b87f9be64c7a9e08877ed686cce (diff) | |
download | src-04cf2b885d7dc385ed8e48df1d0218b5e4162869.tar.gz src-04cf2b885d7dc385ed8e48df1d0218b5e4162869.zip |
Optimize ENA Rx refill for low memory conditions
Sometimes, especially when there is not much memory in the system left,
allocating mbuf jumbo clusters (like 9KB or 16KB) can take a lot of time
and it is not guaranteed that it'll succeed. In that situation, the
fallback will work, but if the refill needs to take a place for a lot of
descriptors at once, the time spent in m_getjcl looking for memory can
cause system unresponsiveness due to high priority of the Rx task. This
can also lead to driver reset, because Tx cleanup routine is being
blocked and timer service could detect that Tx packets aren't cleaned
up. The reset routine can further create another unresponsiveness - Rx
rings are being refilled there, so m_getjcl will again burn the CPU.
This was causing NVMe driver timeouts and resets, because network driver
is having higher priority.
Instead of 16KB jumbo clusters for the Rx buffers, 9KB clusters are
enough - ENA MTU is being set to 9K anyway, so it's very unlikely that
more space than 9KB will be needed.
However, 9KB jumbo clusters can still cause issues, so by default the
page size mbuf cluster will be used for the Rx descriptors. This can have a
small (~2%) impact on the throughput of the device, so to restore
original behavior, one must change sysctl "hw.ena.enable_9k_mbufs" to
"1" in "/boot/loader.conf" file.
As a part of this patch (important fix), the version of the driver
was updated to v2.1.2.
Submitted by: cperciva
Reviewed by: Michal Krawczyk <mk@semihalf.com>
Reviewed by: Ido Segev <idose@amazon.com>
Reviewed by: Guy Tzalik <gtzalik@amazon.com>
MFC after: 3 days
PR: 225791, 234838, 235856, 236989, 243531
Differential Revision: https://reviews.freebsd.org/D24546
Notes
Notes:
svn path=/head/; revision=360777
Diffstat (limited to 'sys/dev/ena')
-rw-r--r-- | sys/dev/ena/ena.c | 10 | ||||
-rw-r--r-- | sys/dev/ena/ena.h | 11 | ||||
-rw-r--r-- | sys/dev/ena/ena_sysctl.c | 11 | ||||
-rw-r--r-- | sys/dev/ena/ena_sysctl.h | 3 |
4 files changed, 28 insertions, 7 deletions
diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 5506e57ae237..f76c3d467edc 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -368,6 +368,7 @@ ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring, ring->ena_dev = adapter->ena_dev; ring->first_interrupt = false; ring->no_interrupt_event_cnt = 0; + ring->rx_mbuf_sz = ena_mbuf_sz; } static void @@ -508,9 +509,9 @@ ena_setup_rx_dma_tag(struct ena_adapter *adapter) ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */ BUS_SPACE_MAXADDR, /* highaddr of excl window */ NULL, NULL, /* filter, filterarg */ - MJUM16BYTES, /* maxsize */ + ena_mbuf_sz, /* maxsize */ adapter->max_rx_sgl_size, /* nsegments */ - MJUM16BYTES, /* maxsegsize */ + ena_mbuf_sz, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ @@ -963,7 +964,8 @@ ena_alloc_rx_mbuf(struct ena_adapter *adapter, return (0); /* Get mbuf using UMA allocator */ - rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES); + rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, + rx_ring->rx_mbuf_sz); if (unlikely(rx_info->mbuf == NULL)) { counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1); @@ -974,7 +976,7 @@ ena_alloc_rx_mbuf(struct ena_adapter *adapter, } mlen = MCLBYTES; } else { - mlen = MJUM16BYTES; + mlen = rx_ring->rx_mbuf_sz; } /* Set mbuf length*/ rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen; diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h index 570d5a629be4..fc3ee4586b26 100644 --- a/sys/dev/ena/ena.h +++ b/sys/dev/ena/ena.h @@ -41,7 +41,7 @@ #define DRV_MODULE_VER_MAJOR 2 #define DRV_MODULE_VER_MINOR 1 -#define DRV_MODULE_VER_SUBMINOR 1 +#define DRV_MODULE_VER_SUBMINOR 2 #define DRV_MODULE_NAME "ena" @@ -307,8 +307,13 @@ struct ena_ring { /* Determines if device will use LLQ or normal mode for TX */ enum ena_admin_placement_policy_type tx_mem_queue_type; - /* The maximum length the driver can push to the device (For LLQ) */ - uint8_t tx_max_header_size; + union { + /* The maximum length the driver can push to the device (For LLQ) */ + uint8_t tx_max_header_size; + /* The maximum (and default) mbuf size for the Rx descriptor. */ + uint16_t rx_mbuf_sz; + + }; bool first_interrupt; uint16_t no_interrupt_event_cnt; diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c index 0272837027c2..563481f9988e 100644 --- a/sys/dev/ena/ena_sysctl.c +++ b/sys/dev/ena/ena_sysctl.c @@ -48,6 +48,17 @@ int ena_log_level = ENA_ALERT | ENA_WARNING; SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0, "Logging level indicating verbosity of the logs"); +/* + * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead). + * Using 9k mbufs in low memory conditions might cause allocation to take a lot + * of time and lead to the OS instability as it needs to look for the contiguous + * pages. + * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if + * the network performance is the priority, the 9k mbufs can be used. + */ +int ena_enable_9k_mbufs = 0; +SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN, + &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors"); void ena_sysctl_add_nodes(struct ena_adapter *adapter) diff --git a/sys/dev/ena/ena_sysctl.h b/sys/dev/ena/ena_sysctl.h index 5f43d998a3ff..cedb916b980a 100644 --- a/sys/dev/ena/ena_sysctl.h +++ b/sys/dev/ena/ena_sysctl.h @@ -41,4 +41,7 @@ void ena_sysctl_add_nodes(struct ena_adapter *); +extern int ena_enable_9k_mbufs; +#define ena_mbuf_sz (ena_enable_9k_mbufs ? MJUM9BYTES : MJUMPAGESIZE) + #endif /* !(ENA_SYSCTL_H) */ |