aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/netmap/netmap_generic.c
diff options
context:
space:
mode:
authorLuigi Rizzo <luigi@FreeBSD.org>2014-02-15 04:53:04 +0000
committerLuigi Rizzo <luigi@FreeBSD.org>2014-02-15 04:53:04 +0000
commitf0ea3689a9c1c27067145ed902811149e78cc4fa (patch)
tree5f40d56905d46741e85cd83a0278b12363e3e2a7 /sys/dev/netmap/netmap_generic.c
parent53bf5ef829d5fd312db3851ce6cb589173b744e1 (diff)
downloadsrc-f0ea3689a9c1c27067145ed902811149e78cc4fa.tar.gz
src-f0ea3689a9c1c27067145ed902811149e78cc4fa.zip
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
Notes
Notes: svn path=/head/; revision=261909
Diffstat (limited to 'sys/dev/netmap/netmap_generic.c')
-rw-r--r--sys/dev/netmap/netmap_generic.c41
1 files changed, 28 insertions, 13 deletions
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
index e695fcbd29f8..63253b6b0693 100644
--- a/sys/dev/netmap/netmap_generic.c
+++ b/sys/dev/netmap/netmap_generic.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#define rtnl_lock() D("rtnl_lock called");
#define rtnl_unlock() D("rtnl_unlock called");
#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid)
+#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid)
#define smp_mb()
/*
@@ -222,6 +223,17 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
#endif /* REG_RESET */
if (enable) { /* Enable netmap mode. */
+ /* Init the mitigation support. */
+ gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!gna->mit) {
+ D("mitigation allocation failed");
+ error = ENOMEM;
+ goto out;
+ }
+ for (r=0; r<na->num_rx_rings; r++)
+ netmap_mitigation_init(&gna->mit[r], na);
+
/* Initialize the rx queue, as generic_rx_handler() can
* be called as soon as netmap_catch_rx() returns.
*/
@@ -229,9 +241,6 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
mbq_safe_init(&na->rx_rings[r].rx_queue);
}
- /* Init the mitigation timer. */
- netmap_mitigation_init(gna);
-
/*
* Preallocate packet buffers for the tx rings.
*/
@@ -306,7 +315,9 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
mbq_safe_destroy(&na->rx_rings[r].rx_queue);
}
- netmap_mitigation_cleanup(gna);
+ for (r=0; r<na->num_rx_rings; r++)
+ netmap_mitigation_cleanup(&gna->mit[r]);
+ free(gna->mit, M_DEVBUF);
for (r=0; r<na->num_tx_rings; r++) {
for (i=0; i<na->num_tx_desc; i++) {
@@ -344,10 +355,12 @@ free_tx_pools:
free(na->tx_rings[r].tx_pool, M_DEVBUF);
na->tx_rings[r].tx_pool = NULL;
}
- netmap_mitigation_cleanup(gna);
for (r=0; r<na->num_rx_rings; r++) {
+ netmap_mitigation_cleanup(&gna->mit[r]);
mbq_safe_destroy(&na->rx_rings[r].rx_queue);
}
+ free(gna->mit, M_DEVBUF);
+out:
return error;
}
@@ -557,12 +570,11 @@ generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
nm_i = nm_next(nm_i, lim);
+ IFRATE(rate_ctx.new.txpkt ++);
}
/* Update hwcur to the next slot to transmit. */
kring->nr_hwcur = nm_i; /* not head, we could break early */
-
- IFRATE(rate_ctx.new.txpkt += ntx);
}
/*
@@ -600,7 +612,11 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
struct netmap_adapter *na = NA(ifp);
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
u_int work_done;
- u_int rr = 0; // receive ring number
+ u_int rr = MBUF_RXQ(m); // receive ring number
+
+ if (rr >= na->num_rx_rings) {
+ rr = rr % na->num_rx_rings; // XXX expensive...
+ }
/* limit the size of the queue */
if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
@@ -617,13 +633,13 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
/* same as send combining, filter notification if there is a
* pending timer, otherwise pass it up and start a timer.
*/
- if (likely(netmap_mitigation_active(gna))) {
+ if (likely(netmap_mitigation_active(&gna->mit[rr]))) {
/* Record that there is some pending work. */
- gna->mit_pending = 1;
+ gna->mit[rr].mit_pending = 1;
} else {
netmap_generic_irq(na->ifp, rr, &work_done);
IFRATE(rate_ctx.new.rxirq++);
- netmap_mitigation_start(gna);
+ netmap_mitigation_start(&gna->mit[rr]);
}
}
}
@@ -682,7 +698,6 @@ generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
ring->slot[nm_i].flags = slot_flags;
m_freem(m);
nm_i = nm_next(nm_i, lim);
- n++;
}
if (n) {
kring->nr_hwtail = nm_i;
@@ -772,7 +787,7 @@ generic_netmap_attach(struct ifnet *ifp)
/* when using generic, IFCAP_NETMAP is set so we force
* NAF_SKIP_INTR to use the regular interrupt handler
*/
- na->na_flags = NAF_SKIP_INTR;
+ na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
ifp->num_tx_queues, ifp->real_num_tx_queues,