diff options
author | Luigi Rizzo <luigi@FreeBSD.org> | 2014-02-15 04:53:04 +0000 |
---|---|---|
committer | Luigi Rizzo <luigi@FreeBSD.org> | 2014-02-15 04:53:04 +0000 |
commit | f0ea3689a9c1c27067145ed902811149e78cc4fa (patch) | |
tree | 5f40d56905d46741e85cd83a0278b12363e3e2a7 /sys/dev/netmap/netmap_generic.c | |
parent | 53bf5ef829d5fd312db3851ce6cb589173b744e1 (diff) | |
download | src-f0ea3689a9c1c27067145ed902811149e78cc4fa.tar.gz src-f0ea3689a9c1c27067145ed902811149e78cc4fa.zip |
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
Notes
Notes:
svn path=/head/; revision=261909
Diffstat (limited to 'sys/dev/netmap/netmap_generic.c')
-rw-r--r-- | sys/dev/netmap/netmap_generic.c | 41 |
1 files changed, 28 insertions, 13 deletions
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c index e695fcbd29f8..63253b6b0693 100644 --- a/sys/dev/netmap/netmap_generic.c +++ b/sys/dev/netmap/netmap_generic.c @@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$"); #define rtnl_lock() D("rtnl_lock called"); #define rtnl_unlock() D("rtnl_unlock called"); #define MBUF_TXQ(m) ((m)->m_pkthdr.flowid) +#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) #define smp_mb() /* @@ -222,6 +223,17 @@ generic_netmap_register(struct netmap_adapter *na, int enable) #endif /* REG_RESET */ if (enable) { /* Enable netmap mode. */ + /* Init the mitigation support. */ + gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit), + M_DEVBUF, M_NOWAIT | M_ZERO); + if (!gna->mit) { + D("mitigation allocation failed"); + error = ENOMEM; + goto out; + } + for (r=0; r<na->num_rx_rings; r++) + netmap_mitigation_init(&gna->mit[r], na); + /* Initialize the rx queue, as generic_rx_handler() can * be called as soon as netmap_catch_rx() returns. */ @@ -229,9 +241,6 @@ generic_netmap_register(struct netmap_adapter *na, int enable) mbq_safe_init(&na->rx_rings[r].rx_queue); } - /* Init the mitigation timer. */ - netmap_mitigation_init(gna); - /* * Preallocate packet buffers for the tx rings. */ @@ -306,7 +315,9 @@ generic_netmap_register(struct netmap_adapter *na, int enable) mbq_safe_destroy(&na->rx_rings[r].rx_queue); } - netmap_mitigation_cleanup(gna); + for (r=0; r<na->num_rx_rings; r++) + netmap_mitigation_cleanup(&gna->mit[r]); + free(gna->mit, M_DEVBUF); for (r=0; r<na->num_tx_rings; r++) { for (i=0; i<na->num_tx_desc; i++) { @@ -344,10 +355,12 @@ free_tx_pools: free(na->tx_rings[r].tx_pool, M_DEVBUF); na->tx_rings[r].tx_pool = NULL; } - netmap_mitigation_cleanup(gna); for (r=0; r<na->num_rx_rings; r++) { + netmap_mitigation_cleanup(&gna->mit[r]); mbq_safe_destroy(&na->rx_rings[r].rx_queue); } + free(gna->mit, M_DEVBUF); +out: return error; } @@ -557,12 +570,11 @@ generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) } slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); nm_i = nm_next(nm_i, lim); + IFRATE(rate_ctx.new.txpkt ++); } /* Update hwcur to the next slot to transmit. */ kring->nr_hwcur = nm_i; /* not head, we could break early */ - - IFRATE(rate_ctx.new.txpkt += ntx); } /* @@ -600,7 +612,11 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m) struct netmap_adapter *na = NA(ifp); struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; u_int work_done; - u_int rr = 0; // receive ring number + u_int rr = MBUF_RXQ(m); // receive ring number + + if (rr >= na->num_rx_rings) { + rr = rr % na->num_rx_rings; // XXX expensive... + } /* limit the size of the queue */ if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) { @@ -617,13 +633,13 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m) /* same as send combining, filter notification if there is a * pending timer, otherwise pass it up and start a timer. */ - if (likely(netmap_mitigation_active(gna))) { + if (likely(netmap_mitigation_active(&gna->mit[rr]))) { /* Record that there is some pending work. */ - gna->mit_pending = 1; + gna->mit[rr].mit_pending = 1; } else { netmap_generic_irq(na->ifp, rr, &work_done); IFRATE(rate_ctx.new.rxirq++); - netmap_mitigation_start(gna); + netmap_mitigation_start(&gna->mit[rr]); } } } @@ -682,7 +698,6 @@ generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) ring->slot[nm_i].flags = slot_flags; m_freem(m); nm_i = nm_next(nm_i, lim); - n++; } if (n) { kring->nr_hwtail = nm_i; @@ -772,7 +787,7 @@ generic_netmap_attach(struct ifnet *ifp) /* when using generic, IFCAP_NETMAP is set so we force * NAF_SKIP_INTR to use the regular interrupt handler */ - na->na_flags = NAF_SKIP_INTR; + na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS; ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", ifp->num_tx_queues, ifp->real_num_tx_queues, |