aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/ena
diff options
context:
space:
mode:
authorMarcin Wojtas <mw@FreeBSD.org>2020-05-07 11:28:39 +0000
committerMarcin Wojtas <mw@FreeBSD.org>2020-05-07 11:28:39 +0000
commit04cf2b885d7dc385ed8e48df1d0218b5e4162869 (patch)
treeb8a113ad3a307fb06334acc1f17f5676264e2767 /sys/dev/ena
parent8717b8f1bb6e7b87f9be64c7a9e08877ed686cce (diff)
downloadsrc-04cf2b885d7dc385ed8e48df1d0218b5e4162869.tar.gz
src-04cf2b885d7dc385ed8e48df1d0218b5e4162869.zip
Optimize ENA Rx refill for low memory conditions
Sometimes, especially when there is not much memory in the system left, allocating mbuf jumbo clusters (like 9KB or 16KB) can take a lot of time and it is not guaranteed that it'll succeed. In that situation, the fallback will work, but if the refill needs to take a place for a lot of descriptors at once, the time spent in m_getjcl looking for memory can cause system unresponsiveness due to high priority of the Rx task. This can also lead to driver reset, because Tx cleanup routine is being blocked and timer service could detect that Tx packets aren't cleaned up. The reset routine can further create another unresponsiveness - Rx rings are being refilled there, so m_getjcl will again burn the CPU. This was causing NVMe driver timeouts and resets, because network driver is having higher priority. Instead of 16KB jumbo clusters for the Rx buffers, 9KB clusters are enough - ENA MTU is being set to 9K anyway, so it's very unlikely that more space than 9KB will be needed. However, 9KB jumbo clusters can still cause issues, so by default the page size mbuf cluster will be used for the Rx descriptors. This can have a small (~2%) impact on the throughput of the device, so to restore original behavior, one must change sysctl "hw.ena.enable_9k_mbufs" to "1" in "/boot/loader.conf" file. As a part of this patch (important fix), the version of the driver was updated to v2.1.2. Submitted by: cperciva Reviewed by: Michal Krawczyk <mk@semihalf.com> Reviewed by: Ido Segev <idose@amazon.com> Reviewed by: Guy Tzalik <gtzalik@amazon.com> MFC after: 3 days PR: 225791, 234838, 235856, 236989, 243531 Differential Revision: https://reviews.freebsd.org/D24546
Notes
Notes: svn path=/head/; revision=360777
Diffstat (limited to 'sys/dev/ena')
-rw-r--r--sys/dev/ena/ena.c10
-rw-r--r--sys/dev/ena/ena.h11
-rw-r--r--sys/dev/ena/ena_sysctl.c11
-rw-r--r--sys/dev/ena/ena_sysctl.h3
4 files changed, 28 insertions, 7 deletions
diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c
index 5506e57ae237..f76c3d467edc 100644
--- a/sys/dev/ena/ena.c
+++ b/sys/dev/ena/ena.c
@@ -368,6 +368,7 @@ ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
ring->ena_dev = adapter->ena_dev;
ring->first_interrupt = false;
ring->no_interrupt_event_cnt = 0;
+ ring->rx_mbuf_sz = ena_mbuf_sz;
}
static void
@@ -508,9 +509,9 @@ ena_setup_rx_dma_tag(struct ena_adapter *adapter)
ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */
BUS_SPACE_MAXADDR, /* highaddr of excl window */
NULL, NULL, /* filter, filterarg */
- MJUM16BYTES, /* maxsize */
+ ena_mbuf_sz, /* maxsize */
adapter->max_rx_sgl_size, /* nsegments */
- MJUM16BYTES, /* maxsegsize */
+ ena_mbuf_sz, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockarg */
@@ -963,7 +964,8 @@ ena_alloc_rx_mbuf(struct ena_adapter *adapter,
return (0);
/* Get mbuf using UMA allocator */
- rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES);
+ rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
+ rx_ring->rx_mbuf_sz);
if (unlikely(rx_info->mbuf == NULL)) {
counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
@@ -974,7 +976,7 @@ ena_alloc_rx_mbuf(struct ena_adapter *adapter,
}
mlen = MCLBYTES;
} else {
- mlen = MJUM16BYTES;
+ mlen = rx_ring->rx_mbuf_sz;
}
/* Set mbuf length*/
rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h
index 570d5a629be4..fc3ee4586b26 100644
--- a/sys/dev/ena/ena.h
+++ b/sys/dev/ena/ena.h
@@ -41,7 +41,7 @@
#define DRV_MODULE_VER_MAJOR 2
#define DRV_MODULE_VER_MINOR 1
-#define DRV_MODULE_VER_SUBMINOR 1
+#define DRV_MODULE_VER_SUBMINOR 2
#define DRV_MODULE_NAME "ena"
@@ -307,8 +307,13 @@ struct ena_ring {
/* Determines if device will use LLQ or normal mode for TX */
enum ena_admin_placement_policy_type tx_mem_queue_type;
- /* The maximum length the driver can push to the device (For LLQ) */
- uint8_t tx_max_header_size;
+ union {
+ /* The maximum length the driver can push to the device (For LLQ) */
+ uint8_t tx_max_header_size;
+ /* The maximum (and default) mbuf size for the Rx descriptor. */
+ uint16_t rx_mbuf_sz;
+
+ };
bool first_interrupt;
uint16_t no_interrupt_event_cnt;
diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c
index 0272837027c2..563481f9988e 100644
--- a/sys/dev/ena/ena_sysctl.c
+++ b/sys/dev/ena/ena_sysctl.c
@@ -48,6 +48,17 @@ int ena_log_level = ENA_ALERT | ENA_WARNING;
SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
&ena_log_level, 0, "Logging level indicating verbosity of the logs");
+/*
+ * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
+ * Using 9k mbufs in low memory conditions might cause allocation to take a lot
+ * of time and lead to the OS instability as it needs to look for the contiguous
+ * pages.
+ * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
+ * the network performance is the priority, the 9k mbufs can be used.
+ */
+int ena_enable_9k_mbufs = 0;
+SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
+ &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
void
ena_sysctl_add_nodes(struct ena_adapter *adapter)
diff --git a/sys/dev/ena/ena_sysctl.h b/sys/dev/ena/ena_sysctl.h
index 5f43d998a3ff..cedb916b980a 100644
--- a/sys/dev/ena/ena_sysctl.h
+++ b/sys/dev/ena/ena_sysctl.h
@@ -41,4 +41,7 @@
void ena_sysctl_add_nodes(struct ena_adapter *);
+extern int ena_enable_9k_mbufs;
+#define ena_mbuf_sz (ena_enable_9k_mbufs ? MJUM9BYTES : MJUMPAGESIZE)
+
#endif /* !(ENA_SYSCTL_H) */