diff options
Diffstat (limited to 'tools/tools/netmap')
-rw-r--r-- | tools/tools/netmap/Makefile | 21 | ||||
-rw-r--r-- | tools/tools/netmap/README | 17 | ||||
-rw-r--r-- | tools/tools/netmap/bridge.c | 114 | ||||
-rw-r--r-- | tools/tools/netmap/click-test.cfg | 19 | ||||
-rw-r--r-- | tools/tools/netmap/nm_util.c | 278 | ||||
-rw-r--r-- | tools/tools/netmap/nm_util.h | 127 | ||||
-rw-r--r-- | tools/tools/netmap/pcap.c | 528 | ||||
-rw-r--r-- | tools/tools/netmap/pkt-gen.c | 450 | ||||
-rw-r--r-- | tools/tools/netmap/vale-ctl.c | 1 |
9 files changed, 306 insertions, 1249 deletions
diff --git a/tools/tools/netmap/Makefile b/tools/tools/netmap/Makefile index e873389c7179..c50247366b5a 100644 --- a/tools/tools/netmap/Makefile +++ b/tools/tools/netmap/Makefile @@ -3,11 +3,11 @@ # # For multiple programs using a single source file each, # we can just define 'progs' and create custom targets. -PROGS = pkt-gen bridge vale-ctl testpcap libnetmap.so +PROGS = pkt-gen bridge vale-ctl -CLEANFILES = $(PROGS) pcap.o nm_util.o +CLEANFILES = $(PROGS) *.o NO_MAN= -CFLAGS += -Werror -Wall -nostdinc -I/usr/include -I../../../sys +CFLAGS += -Werror -Wall # -nostdinc -I/usr/include -I../../../sys CFLAGS += -Wextra LDFLAGS += -lpthread @@ -22,12 +22,11 @@ LDFLAGS += -lpcap all: $(PROGS) -pkt-gen bridge: nm_util.o - $(CC) $(CFLAGS) -o ${.TARGET} ${.TARGET:=.c} nm_util.o $(LDFLAGS) +pkt-gen: pkt-gen.o + $(CC) $(CFLAGS) -o pkt-gen pkt-gen.o $(LDFLAGS) -testpcap: pcap.c libnetmap.so - $(CC) $(CFLAGS) -DTEST -L. -lnetmap -o ${.TARGET} pcap.c - -libnetmap.so: pcap.c nm_util.c - $(CC) $(CFLAGS) -fpic -c ${.ALLSRC} - $(CC) -shared -o ${.TARGET} ${.ALLSRC:.c=.o} +bridge: bridge.o + $(CC) $(CFLAGS) -o bridge bridge.o + +vale-ctl: vale-ctl.o + $(CC) $(CFLAGS) -o vale-ctl vale-ctl.o diff --git a/tools/tools/netmap/README b/tools/tools/netmap/README index 2bde6f2ab4d8..40378e62bbe6 100644 --- a/tools/tools/netmap/README +++ b/tools/tools/netmap/README @@ -6,19 +6,4 @@ This directory contains examples that use netmap bridge a two-port jumper wire, also using the native API - testpcap a jumper wire using libnetmap (or libpcap) - - click* various click examples - ------------------------------------------------------------- -Some performance data as of may 2012 for applications using libpcap. -Throughput is generally in Mpps computed with the 64-byte frames, -using 1 core on a 2.9GHz CPU and 10Gbit/s interface - -Libpcap version -- Application --------------------- -BSD netmap ---------------------------------------------------- - 0.77 3.82 ports/trafshow (version 5) - 0.94 7.7 net-mgmt/ipcad (ip accounting daemon) - 0.9 5.0 net-mgmt/darkstat (ip accounting + graphing) - 0.83 2.45 net-mgmt/iftop (curses traffic display) + vale-ctl the program to control VALE bridges diff --git a/tools/tools/netmap/bridge.c b/tools/tools/netmap/bridge.c index cab545bfc919..0895d4ede676 100644 --- a/tools/tools/netmap/bridge.c +++ b/tools/tools/netmap/bridge.c @@ -9,14 +9,15 @@ * $FreeBSD$ */ -#include "nm_util.h" - +#include <stdio.h> +#define NETMAP_WITH_LIBS +#include <net/netmap_user.h> +#include <sys/poll.h> int verbose = 0; -char *version = "$Id$"; - static int do_abort = 0; +static int zerocopy = 1; /* enable zerocopy if possible */ static void sigint_h(int sig) @@ -28,6 +29,26 @@ sigint_h(int sig) /* + * how many packets on this set of queues ? + */ +int +pkt_queued(struct nm_desc *d, int tx) +{ + u_int i, tot = 0; + + if (tx) { + for (i = d->first_tx_ring; i <= d->last_tx_ring; i++) { + tot += nm_ring_space(NETMAP_TXRING(d->nifp, i)); + } + } else { + for (i = d->first_rx_ring; i <= d->last_rx_ring; i++) { + tot += nm_ring_space(NETMAP_RXRING(d->nifp, i)); + } + } + return tot; +} + +/* * move up to 'limit' pkts from rxring to txring swapping buffers. */ static int @@ -52,12 +73,6 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, while (limit-- > 0) { struct netmap_slot *rs = &rxring->slot[j]; struct netmap_slot *ts = &txring->slot[k]; -#ifdef NO_SWAP - char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx); - char *txbuf = NETMAP_BUF(txring, ts->buf_idx); -#else - uint32_t pkt; -#endif /* swap packets */ if (ts->buf_idx < 2 || rs->buf_idx < 2) { @@ -65,24 +80,26 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, j, rs->buf_idx, k, ts->buf_idx); sleep(2); } -#ifndef NO_SWAP - pkt = ts->buf_idx; - ts->buf_idx = rs->buf_idx; - rs->buf_idx = pkt; -#endif /* copy the packet length. */ - if (rs->len < 14 || rs->len > 2048) + if (rs->len > 2048) { D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k); - else if (verbose > 1) + rs->len = 0; + } else if (verbose > 1) { D("%s send len %d rx[%d] -> tx[%d]", msg, rs->len, j, k); + } ts->len = rs->len; -#ifdef NO_SWAP - pkt_copy(rxbuf, txbuf, ts->len); -#else - /* report the buffer change. */ - ts->flags |= NS_BUF_CHANGED; - rs->flags |= NS_BUF_CHANGED; -#endif /* NO_SWAP */ + if (zerocopy) { + uint32_t pkt = ts->buf_idx; + ts->buf_idx = rs->buf_idx; + rs->buf_idx = pkt; + /* report the buffer change. */ + ts->flags |= NS_BUF_CHANGED; + rs->flags |= NS_BUF_CHANGED; + } else { + char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx); + char *txbuf = NETMAP_BUF(txring, ts->buf_idx); + nm_pkt_copy(rxbuf, txbuf, ts->len); + } j = nm_ring_next(rxring, j); k = nm_ring_next(txring, k); } @@ -96,7 +113,7 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, /* move packts from src to destination */ static int -move(struct nm_desc_t *src, struct nm_desc_t *dst, u_int limit) +move(struct nm_desc *src, struct nm_desc *dst, u_int limit) { struct netmap_ring *txring, *rxring; u_int m = 0, si = src->first_rx_ring, di = dst->first_tx_ring; @@ -104,8 +121,8 @@ move(struct nm_desc_t *src, struct nm_desc_t *dst, u_int limit) "host->net" : "net->host"; while (si <= src->last_rx_ring && di <= dst->last_tx_ring) { - rxring = src->tx + si; - txring = dst->tx + di; + rxring = NETMAP_RXRING(src->nifp, si); + txring = NETMAP_TXRING(dst->nifp, di); ND("txring %p rxring %p", txring, rxring); if (nm_ring_empty(rxring)) { si++; @@ -141,15 +158,16 @@ int main(int argc, char **argv) { struct pollfd pollfd[2]; - int i, ch; + int ch; u_int burst = 1024, wait_link = 4; - struct nm_desc_t *pa = NULL, *pb = NULL; + struct nm_desc *pa = NULL, *pb = NULL; char *ifa = NULL, *ifb = NULL; + char ifabuf[64] = { 0 }; - fprintf(stderr, "%s %s built %s %s\n", - argv[0], version, __DATE__, __TIME__); + fprintf(stderr, "%s built %s %s\n", + argv[0], __DATE__, __TIME__); - while ( (ch = getopt(argc, argv, "b:i:vw:")) != -1) { + while ( (ch = getopt(argc, argv, "b:ci:vw:")) != -1) { switch (ch) { default: D("bad option %c %s", ch, optarg); @@ -167,6 +185,9 @@ main(int argc, char **argv) D("%s ignored, already have 2 interfaces", optarg); break; + case 'c': + zerocopy = 0; /* do not zerocopy */ + break; case 'v': verbose++; break; @@ -202,20 +223,25 @@ main(int argc, char **argv) } if (!strcmp(ifa, ifb)) { D("same interface, endpoint 0 goes to host"); - i = NETMAP_SW_RING; + snprintf(ifabuf, sizeof(ifabuf) - 1, "%s^", ifa); + ifa = ifabuf; } else { /* two different interfaces. Take all rings on if1 */ - i = 0; // all hw rings } - pa = netmap_open(ifa, i, 1); - if (pa == NULL) + pa = nm_open(ifa, NULL, 0, NULL); + if (pa == NULL) { + D("cannot open %s", ifa); return (1); + } // XXX use a single mmap ? - pb = netmap_open(ifb, 0, 1); + pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, pa); if (pb == NULL) { + D("cannot open %s", ifb); nm_close(pa); return (1); } + zerocopy = zerocopy && (pa->mem == pb->mem); + D("------- zerocopy %ssupported", zerocopy ? "" : "NOT "); /* setup poll(2) variables. */ memset(pollfd, 0, sizeof(pollfd)); @@ -252,23 +278,25 @@ main(int argc, char **argv) pollfd[0].events, pollfd[0].revents, pkt_queued(pa, 0), - pa->rx->cur, + NETMAP_RXRING(pa->nifp, pa->cur_rx_ring)->cur, pkt_queued(pa, 1), pollfd[1].events, pollfd[1].revents, pkt_queued(pb, 0), - pb->rx->cur, + NETMAP_RXRING(pb->nifp, pb->cur_rx_ring)->cur, pkt_queued(pb, 1) ); if (ret < 0) continue; if (pollfd[0].revents & POLLERR) { - D("error on fd0, rx [%d,%d)", - pa->rx->cur, pa->rx->tail); + struct netmap_ring *rx = NETMAP_RXRING(pa->nifp, pa->cur_rx_ring); + D("error on fd0, rx [%d,%d,%d)", + rx->head, rx->cur, rx->tail); } if (pollfd[1].revents & POLLERR) { - D("error on fd1, rx [%d,%d)", - pb->rx->cur, pb->rx->tail); + struct netmap_ring *rx = NETMAP_RXRING(pb->nifp, pb->cur_rx_ring); + D("error on fd1, rx [%d,%d,%d)", + rx->head, rx->cur, rx->tail); } if (pollfd[0].revents & POLLOUT) { move(pb, pa, burst); diff --git a/tools/tools/netmap/click-test.cfg b/tools/tools/netmap/click-test.cfg deleted file mode 100644 index fc5759f88b1e..000000000000 --- a/tools/tools/netmap/click-test.cfg +++ /dev/null @@ -1,19 +0,0 @@ -// -// $FreeBSD$ -// -// A sample test configuration for click -// -// -// create a switch - -myswitch :: EtherSwitch; - -// two input devices - -c0 :: FromDevice(ix0, PROMISC true); -c1 :: FromDevice(ix1, PROMISC true); - -// and now pass packets around - -c0[0] -> [0]sw[0] -> Queue(10000) -> ToDevice(ix0); -c1[0] -> [1]sw[1] -> Queue(10000) -> ToDevice(ix1); diff --git a/tools/tools/netmap/nm_util.c b/tools/tools/netmap/nm_util.c deleted file mode 100644 index deb52bbc87e4..000000000000 --- a/tools/tools/netmap/nm_util.c +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD$ - * $Id$ - * - * utilities to use netmap devices. - * This does the basic functions of opening a device and issuing - * ioctls() - */ - -#include "nm_util.h" - -extern int verbose; - -int -nm_do_ioctl(struct nm_desc_t *me, u_long what, int subcmd) -{ - struct ifreq ifr; - int error; - int fd; - -#if defined( __FreeBSD__ ) || defined (__APPLE__) - (void)subcmd; // only used on Linux - fd = me->fd; -#endif - -#ifdef linux - struct ethtool_value eval; - - bzero(&eval, sizeof(eval)); - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) { - printf("Error: cannot get device control socket.\n"); - return -1; - } -#endif /* linux */ - - bzero(&ifr, sizeof(ifr)); - strncpy(ifr.ifr_name, me->req.nr_name, sizeof(ifr.ifr_name)); - switch (what) { - case SIOCSIFFLAGS: -#ifndef __APPLE__ - ifr.ifr_flagshigh = me->if_flags >> 16; -#endif - ifr.ifr_flags = me->if_flags & 0xffff; - break; - -#if defined( __FreeBSD__ ) - case SIOCSIFCAP: - ifr.ifr_reqcap = me->if_reqcap; - ifr.ifr_curcap = me->if_curcap; - break; -#endif - -#ifdef linux - case SIOCETHTOOL: - eval.cmd = subcmd; - eval.data = 0; - ifr.ifr_data = (caddr_t)&eval; - break; -#endif /* linux */ - } - error = ioctl(fd, what, &ifr); - if (error) - goto done; - switch (what) { - case SIOCGIFFLAGS: -#ifndef __APPLE__ - me->if_flags = (ifr.ifr_flagshigh << 16) | - (0xffff & ifr.ifr_flags); -#endif - if (verbose) - D("flags are 0x%x", me->if_flags); - break; - -#if defined( __FreeBSD__ ) - case SIOCGIFCAP: - me->if_reqcap = ifr.ifr_reqcap; - me->if_curcap = ifr.ifr_curcap; - if (verbose) - D("curcap are 0x%x", me->if_curcap); - break; -#endif /* __FreeBSD__ */ - } -done: -#ifdef linux - close(fd); -#endif - if (error) - D("ioctl error %d %lu", error, what); - return error; -} - -/* - * open a device. if me->mem is null then do an mmap. - * Returns the file descriptor. - * The extra flag checks configures promisc mode. - */ -struct nm_desc_t * -netmap_open(const char *name, int ringid, int promisc) -{ - struct nm_desc_t *d = nm_open(name, NULL, ringid, 0); - - if (d == NULL) - return d; - - if (verbose) - D("memsize is %d MB", d->req.nr_memsize>>20); - - /* Set the operating mode. */ - if (ringid != NETMAP_SW_RING) { - nm_do_ioctl(d, SIOCGIFFLAGS, 0); - if ((d->if_flags & IFF_UP) == 0) { - D("%s is down, bringing up...", name); - d->if_flags |= IFF_UP; - } - if (promisc) { - d->if_flags |= IFF_PPROMISC; - nm_do_ioctl(d, SIOCSIFFLAGS, 0); - } - - /* disable GSO, TSO, RXCSUM, TXCSUM... - * TODO: set them back when done. - */ -#ifdef __FreeBSD__ - nm_do_ioctl(d, SIOCGIFCAP, 0); - d->if_reqcap = d->if_curcap; - d->if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE); - nm_do_ioctl(d, SIOCSIFCAP, 0); -#endif -#ifdef linux - nm_do_ioctl(d, SIOCETHTOOL, ETHTOOL_SGSO); - nm_do_ioctl(d, SIOCETHTOOL, ETHTOOL_STSO); - nm_do_ioctl(d, SIOCETHTOOL, ETHTOOL_SRXCSUM); - nm_do_ioctl(d, SIOCETHTOOL, ETHTOOL_STXCSUM); -#endif /* linux */ - } - - return d; -} - - -/* - * how many packets on this set of queues ? - */ -int -pkt_queued(struct nm_desc_t *d, int tx) -{ - u_int i, tot = 0; - - ND("me %p begin %d end %d", me, me->begin, me->end); - if (tx) { - for (i = d->first_tx_ring; i <= d->last_tx_ring; i++) - tot += nm_ring_space(d->tx + i); - } else { - for (i = d->first_rx_ring; i <= d->last_rx_ring; i++) - tot += nm_ring_space(d->rx + i); - } - return tot; -} - -#if 0 - -/* - * - -Helper routines for multiple readers from the same queue - -- all readers open the device in 'passive' mode (NETMAP_PRIV_RING set). - In this mode a thread that loses the race on a poll() just continues - without calling *xsync() - -- all readers share an extra 'ring' which contains the sync information. - In particular we have a shared head+tail pointers that work - together with cur and available - ON RETURN FROM THE SYSCALL: - shadow->cur = ring->cur - shadow->tail = ring->tail - shadow->link[i] = i for all slots // mark invalid - - */ - -struct nm_q_arg { - u_int want; /* Input */ - u_int have; /* Output, 0 on error */ - u_int cur; - u_int tail; - struct netmap_ring *ring; -}; - -/* - * grab a number of slots from the queue. - */ -struct nm_q_arg -my_grab(struct nm_q_arg q) -{ - const u_int ns = q.ring->num_slots; - - // lock(ring); - for (;;) { - - q.cur = (volatile u_int)q.ring->head; - q.have = ns + q.head - (volatile u_int)q.ring->tail; - if (q.have >= ns) - q.have -= ns; - if (q.have == 0) /* no space; caller may ioctl/retry */ - break; - if (q.want < q.have) - q.have = q.want; - q.tail = q.cur + q.have; - if (q.tail >= ns) - q.tail -= ns; - if (atomic_cmpset_int(&q.ring->cur, q.cur, q.tail) - break; /* success */ - } - // unlock(ring); - D("returns %d out of %d at %d,%d", - q.have, q.want, q.cur, q.tail); - /* the last one can clear avail ? */ - return q; -} - - -int -my_release(struct nm_q_arg q) -{ - u_int cur = q.cur, tail = q.tail, i; - struct netmap_ring *r = q.ring; - - /* link the block to the next one. - * there is no race here because the location is mine. - */ - r->slot[cur].ptr = tail; /* this is mine */ - r->slot[cur].flags |= NM_SLOT_PTR; // points to next block - // memory barrier - // lock(ring); - if (r->head != cur) - goto done; - for (;;) { - // advance head - r->head = head = r->slot[head].ptr; - // barrier ? - if (head == r->slot[head].ptr) - break; // stop here - } - /* we have advanced from q.head to head (r.head might be - * further down. - */ - // do an ioctl/poll to flush. -done: - // unlock(ring); - return; /* not my turn to release */ -} -#endif /* unused */ diff --git a/tools/tools/netmap/nm_util.h b/tools/tools/netmap/nm_util.h deleted file mode 100644 index 0ab2e2e81984..000000000000 --- a/tools/tools/netmap/nm_util.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD$ - * $Id$ - * - * Some utilities to build netmap-based programs. - */ - -#ifndef _NM_UTIL_H -#define _NM_UTIL_H - -#define _GNU_SOURCE /* for CPU_SET() */ - -#include <stdio.h> /* fprintf */ -#include <sys/poll.h> /* POLLIN */ -#include <inttypes.h> /* PRI* macros */ -#include <sys/types.h> /* u_char */ - -#include <arpa/inet.h> /* ntohs */ -#include <sys/sysctl.h> /* sysctl */ -#include <ifaddrs.h> /* getifaddrs */ -#include <net/ethernet.h> /* ETHERTYPE_IP */ -#include <netinet/in.h> /* IPPROTO_* */ -#include <netinet/ip.h> /* struct ip */ -#include <netinet/udp.h> /* struct udp */ - - -#define NETMAP_WITH_LIBS -#include <net/netmap_user.h> - -#include <pthread.h> /* pthread_* */ - -#ifdef linux - -#define cpuset_t cpu_set_t - -#define ifr_flagshigh ifr_flags /* only the low 16 bits here */ -#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ -#include <linux/ethtool.h> -#include <linux/sockios.h> - -#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME -#include <netinet/ether.h> /* ether_aton */ -#include <linux/if_packet.h> /* sockaddr_ll */ -#endif /* linux */ - -#ifdef __FreeBSD__ -#include <sys/endian.h> /* le64toh */ -#include <machine/param.h> - -#include <pthread_np.h> /* pthread w/ affinity */ -#include <sys/cpuset.h> /* cpu_set */ -#include <net/if_dl.h> /* LLADDR */ -#endif /* __FreeBSD__ */ - -#ifdef __APPLE__ - -#define cpuset_t uint64_t // XXX -static inline void CPU_ZERO(cpuset_t *p) -{ - *p = 0; -} - -static inline void CPU_SET(uint32_t i, cpuset_t *p) -{ - *p |= 1<< (i & 0x3f); -} - -#define pthread_setaffinity_np(a, b, c) ((void)a, 0) - -#define ifr_flagshigh ifr_flags // XXX -#define IFF_PPROMISC IFF_PROMISC -#include <net/if_dl.h> /* LLADDR */ -#define clock_gettime(a,b) \ - do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) -#endif /* __APPLE__ */ - -static inline int min(int a, int b) { return a < b ? a : b; } -extern int time_second; - -/* debug support */ -#define ND(format, ...) do {} while(0) -#define D(format, ...) \ - fprintf(stderr, "%s [%d] " format "\n", \ - __FUNCTION__, __LINE__, ##__VA_ARGS__) - -#define RD(lps, format, ...) \ - do { \ - static int t0, cnt; \ - if (t0 != time_second) { \ - t0 = time_second; \ - cnt = 0; \ - } \ - if (cnt++ < lps) \ - D(format, ##__VA_ARGS__); \ - } while (0) - - - -struct nm_desc_t * netmap_open(const char *name, int ringid, int promisc); -int nm_do_ioctl(struct nm_desc_t *me, u_long what, int subcmd); -int pkt_queued(struct nm_desc_t *d, int tx); -#endif /* _NM_UTIL_H */ diff --git a/tools/tools/netmap/pcap.c b/tools/tools/netmap/pcap.c deleted file mode 100644 index b3c2be5d23ff..000000000000 --- a/tools/tools/netmap/pcap.c +++ /dev/null @@ -1,528 +0,0 @@ -/* - * (C) 2011-2014 Luigi Rizzo - * - * BSD license - * - * A simple library that maps some pcap functions onto netmap - * This is not 100% complete but enough to let tcpdump, trafshow - * and other apps work. - * - * $FreeBSD$ - */ - -#define MY_PCAP -#include "nm_util.h" - -char *version = "$Id$"; -int verbose = 0; - -/* - * We redefine here a number of structures that are in pcap.h - * so we can compile this file without the system header. - */ -#ifndef PCAP_ERRBUF_SIZE -#define PCAP_ERRBUF_SIZE 128 -/* - * Each packet is accompanied by a header including the timestamp, - * captured size and actual size. - */ -struct pcap_pkthdr { - struct timeval ts; /* time stamp */ - uint32_t caplen; /* length of portion present */ - uint32_t len; /* length this packet (off wire) */ -}; - -typedef struct pcap_if pcap_if_t; - -/* - * Representation of an interface address. - */ -struct pcap_addr { - struct pcap_addr *next; - struct sockaddr *addr; /* address */ - struct sockaddr *netmask; /* netmask for the above */ - struct sockaddr *broadaddr; /* broadcast addr for the above */ - struct sockaddr *dstaddr; /* P2P dest. address for the above */ -}; - -struct pcap_if { - struct pcap_if *next; - char *name; /* name to hand to "pcap_open_live()" */ - char *description; /* textual description of interface, or NULL */ - struct pcap_addr *addresses; - uint32_t flags; /* PCAP_IF_ interface flags */ -}; - -/* - * We do not support stats (yet) - */ -struct pcap_stat { - u_int ps_recv; /* number of packets received */ - u_int ps_drop; /* number of packets dropped */ - u_int ps_ifdrop; /* drops by interface XXX not yet supported */ -#ifdef WIN32 - u_int bs_capt; /* number of packets that reach the app. */ -#endif /* WIN32 */ -}; - -typedef struct nm_desc_t pcap_t; -typedef enum { - PCAP_D_INOUT = 0, - PCAP_D_IN, - PCAP_D_OUT -} pcap_direction_t; - - - -typedef void (*pcap_handler)(u_char *user, - const struct pcap_pkthdr *h, const u_char *bytes); - -char errbuf[PCAP_ERRBUF_SIZE]; - -pcap_t *pcap_open_live(const char *device, int snaplen, - int promisc, int to_ms, char *errbuf); - -int pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf); -void pcap_close(pcap_t *p); -int pcap_get_selectable_fd(pcap_t *p); -int pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user); -int pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf); -int pcap_setdirection(pcap_t *p, pcap_direction_t d); -char *pcap_lookupdev(char *errbuf); -int pcap_inject(pcap_t *p, const void *buf, size_t size); -int pcap_fileno(pcap_t *p); -const char *pcap_lib_version(void); - - -struct eproto { - const char *s; - u_short p; -}; -#endif /* !PCAP_ERRBUF_SIZE */ - -#ifndef TEST -/* - * build as a shared library - */ - -char pcap_version[] = "libnetmap version 0.3"; - - -/* - * There is a set of functions that tcpdump expects even if probably - * not used - */ -struct eproto eproto_db[] = { - { "ip", ETHERTYPE_IP }, - { "arp", ETHERTYPE_ARP }, - { (char *)0, 0 } -}; - - -const char *pcap_lib_version(void) -{ - return pcap_version; -} - -int -pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf) -{ - pcap_if_t *top = NULL; -#ifndef linux - struct ifaddrs *i_head, *i; - pcap_if_t *cur; - struct pcap_addr *tail = NULL; - int l; - - D("listing all devs"); - *alldevsp = NULL; - i_head = NULL; - - if (getifaddrs(&i_head)) { - D("cannot get if addresses"); - return -1; - } - for (i = i_head; i; i = i->ifa_next) { - //struct ifaddrs *ifa; - struct pcap_addr *pca; - //struct sockaddr *sa; - - D("got interface %s", i->ifa_name); - if (!top || strcmp(top->name, i->ifa_name)) { - /* new interface */ - l = sizeof(*top) + strlen(i->ifa_name) + 1; - cur = calloc(1, l); - if (cur == NULL) { - D("no space for if descriptor"); - continue; - } - cur->name = (char *)(cur + 1); - //cur->flags = i->ifa_flags; - strcpy(cur->name, i->ifa_name); - cur->description = NULL; - cur->next = top; - top = cur; - tail = NULL; - } - /* now deal with addresses */ - D("%s addr family %d len %d %s %s", - top->name, - i->ifa_addr->sa_family, i->ifa_addr->sa_len, - i->ifa_netmask ? "Netmask" : "", - i->ifa_broadaddr ? "Broadcast" : ""); - l = sizeof(struct pcap_addr) + - (i->ifa_addr ? i->ifa_addr->sa_len:0) + - (i->ifa_netmask ? i->ifa_netmask->sa_len:0) + - (i->ifa_broadaddr? i->ifa_broadaddr->sa_len:0); - pca = calloc(1, l); - if (pca == NULL) { - D("no space for if addr"); - continue; - } -#define SA_NEXT(x) ((struct sockaddr *)((char *)(x) + (x)->sa_len)) - pca->addr = (struct sockaddr *)(pca + 1); - pkt_copy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len); - if (i->ifa_netmask) { - pca->netmask = SA_NEXT(pca->addr); - bcopy(i->ifa_netmask, pca->netmask, i->ifa_netmask->sa_len); - if (i->ifa_broadaddr) { - pca->broadaddr = SA_NEXT(pca->netmask); - bcopy(i->ifa_broadaddr, pca->broadaddr, i->ifa_broadaddr->sa_len); - } - } - if (tail == NULL) { - top->addresses = pca; - } else { - tail->next = pca; - } - tail = pca; - - } - freeifaddrs(i_head); -#endif /* !linux */ - (void)errbuf; /* UNUSED */ - *alldevsp = top; - return 0; -} - -void pcap_freealldevs(pcap_if_t *alldevs) -{ - (void)alldevs; /* UNUSED */ - D("unimplemented"); -} - -char * -pcap_lookupdev(char *buf) -{ - D("%s", buf); - strcpy(buf, "/dev/netmap"); - return buf; -} - -pcap_t * -pcap_create(const char *source, char *errbuf) -{ - D("src %s (call open liveted)", source); - return pcap_open_live(source, 0, 1, 100, errbuf); -} - -int -pcap_activate(pcap_t *p) -{ - D("pcap %p running", p); - return 0; -} - -int -pcap_can_set_rfmon(pcap_t *p) -{ - (void)p; /* UNUSED */ - D(""); - return 0; /* no we can't */ -} - -int -pcap_set_snaplen(pcap_t *p, int snaplen) -{ - struct nm_desc_t *me = p; - - D("len %d", snaplen); - me->snaplen = snaplen; - return 0; -} - -int -pcap_snapshot(pcap_t *p) -{ - struct nm_desc_t *me = p; - - D("len %d", me->snaplen); - return me->snaplen; -} - -int -pcap_lookupnet(const char *device, uint32_t *netp, - uint32_t *maskp, char *errbuf) -{ - - (void)errbuf; /* UNUSED */ - D("device %s", device); - inet_aton("10.0.0.255", (struct in_addr *)netp); - inet_aton("255.255.255.0",(struct in_addr *) maskp); - return 0; -} - -int -pcap_set_promisc(pcap_t *p, int promisc) -{ - D("promisc %d", promisc); - if (nm_do_ioctl(p, SIOCGIFFLAGS, 0)) - D("SIOCGIFFLAGS failed"); - if (promisc) { - p->if_flags |= IFF_PPROMISC; - } else { - p->if_flags &= ~IFF_PPROMISC; - } - if (nm_do_ioctl(p, SIOCSIFFLAGS, 0)) - D("SIOCSIFFLAGS failed"); - return 0; -} - -int -pcap_set_timeout(pcap_t *p, int to_ms) -{ - D("%d ms", to_ms); - p->to_ms = to_ms; - return 0; -} - -struct bpf_program; - -int -pcap_compile(pcap_t *p, struct bpf_program *fp, - const char *str, int optimize, uint32_t netmask) -{ - (void)p; /* UNUSED */ - (void)fp; /* UNUSED */ - (void)optimize; /* UNUSED */ - (void)netmask; /* UNUSED */ - D("%s", str); - return 0; -} - -int -pcap_setfilter(pcap_t *p, struct bpf_program *fp) -{ - (void)p; /* UNUSED */ - (void)fp; /* UNUSED */ - D(""); - return 0; -} - -int -pcap_datalink(pcap_t *p) -{ - (void)p; /* UNUSED */ - D("returns 1"); - return 1; // ethernet -} - -const char * -pcap_datalink_val_to_name(int dlt) -{ - D("%d returns DLT_EN10MB", dlt); - return "DLT_EN10MB"; -} - -const char * -pcap_datalink_val_to_description(int dlt) -{ - D("%d returns Ethernet link", dlt); - return "Ethernet link"; -} - -struct pcap_stat; -int -pcap_stats(pcap_t *p, struct pcap_stat *ps) -{ - *ps = *(struct pcap_stat *)(void *)&(p->st); - return 0; /* accumulate from pcap_dispatch() */ -}; - -char * -pcap_geterr(pcap_t *p) -{ - D(""); - return p->msg; -} - -pcap_t * -pcap_open_live(const char *device, int snaplen, - int promisc, int to_ms, char *errbuf) -{ - struct nm_desc_t *d; - int l; - - if (!device) { - D("missing device name"); - return NULL; - } - - l = strlen(device) + 1; - D("request to open %s snaplen %d promisc %d timeout %dms", - device, snaplen, promisc, to_ms); - d = nm_open(device, NULL, 0, 0); - if (d == NULL) { - D("error opening %s", device); - return NULL; - } - d->to_ms = to_ms; - d->snaplen = snaplen; - d->errbuf = errbuf; - d->promisc = promisc; - - return d; -} - -void -pcap_close(pcap_t *p) -{ - nm_close(p); - /* restore original flags ? */ -} - -int -pcap_fileno(pcap_t *p) -{ - struct nm_desc_t *d = p; - D("returns %d", d->fd); - return d->fd; -} - -int -pcap_get_selectable_fd(pcap_t *p) -{ - struct nm_desc_t *d = p; - - return d->fd; -} - -int -pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf) -{ - (void)p; /* UNUSED */ - (void)errbuf; /* UNUSED */ - D("mode is %d", nonblock); - return 0; /* ignore */ -} - -int -pcap_setdirection(pcap_t *p, pcap_direction_t d) -{ - (void)p; /* UNUSED */ - (void)d; /* UNUSED */ - D(""); - return 0; /* ignore */ -}; - -int -pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user) -{ - return nm_dispatch(p, cnt, (void *)callback, user); -} - -int -pcap_inject(pcap_t *p, const void *buf, size_t size) -{ - return nm_inject(p, buf, size); -} - -int -pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user) -{ - struct pollfd fds[1]; - int i; - - ND("cnt %d", cnt); - memset(fds, 0, sizeof(fds)); - fds[0].fd = p->fd; - fds[0].events = (POLLIN); - - while (cnt == -1 || cnt > 0) { - if (poll(fds, 1, p->to_ms) <= 0) { - D("poll error/timeout"); - continue; - } - i = nm_dispatch(p, cnt, (void *)callback, user); - if (cnt > 0) - cnt -= i; - } - return 0; -} - -#endif /* !TEST */ - -#ifdef TEST /* build test code */ -void do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf) -{ - pcap_inject((pcap_t *)user, buf, h->caplen); -} - -/* - * a simple pcap test program, bridge between two interfaces. - */ -int -main(int argc, char **argv) -{ - pcap_t *p0, *p1; - int burst = 1024; - struct pollfd pollfd[2]; - - fprintf(stderr, "%s %s built %s %s\n", - argv[0], version, __DATE__, __TIME__); - - while (argc > 1 && !strcmp(argv[1], "-v")) { - verbose++; - argv++; - argc--; - } - - if (argc < 3 || argc > 4 || !strcmp(argv[1], argv[2])) { - D("Usage: %s IFNAME1 IFNAME2 [BURST]", argv[0]); - return (1); - } - if (argc > 3) - burst = atoi(argv[3]); - - p0 = pcap_open_live(argv[1], 0, 1, 100, NULL); - p1 = pcap_open_live(argv[2], 0, 1, 100, NULL); - D("%s", version); - D("open returns %p %p", p0, p1); - if (!p0 || !p1) - return(1); - bzero(pollfd, sizeof(pollfd)); - pollfd[0].fd = pcap_fileno(p0); - pollfd[1].fd = pcap_fileno(p1); - pollfd[0].events = pollfd[1].events = POLLIN; - for (;;) { - /* do i need to reset ? */ - pollfd[0].revents = pollfd[1].revents = 0; - int ret = poll(pollfd, 2, 1000); - if (ret <= 0 || verbose) - D("poll %s [0] ev %x %x [1] ev %x %x", - ret <= 0 ? "timeout" : "ok", - pollfd[0].events, - pollfd[0].revents, - pollfd[1].events, - pollfd[1].revents); - if (ret < 0) - continue; - if (pollfd[0].revents & POLLIN) - pcap_dispatch(p0, burst, do_send, (void *)p1); - if (pollfd[1].revents & POLLIN) - pcap_dispatch(p1, burst, do_send, (void *)p0); - } - - return (0); -} -#endif /* TEST */ diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c index 3fb7702083fd..8e78fa8e24ed 100644 --- a/tools/tools/netmap/pkt-gen.c +++ b/tools/tools/netmap/pkt-gen.c @@ -37,26 +37,83 @@ * */ -#define MY_PCAP -#include "nm_util.h" -// #include <net/netmap_user.h> +#define _GNU_SOURCE /* for CPU_SET() */ +#include <stdio.h> +#define NETMAP_WITH_LIBS +#include <net/netmap_user.h> + #include <ctype.h> // isprint() +#include <unistd.h> // sysconf() +#include <sys/poll.h> +#include <arpa/inet.h> /* ntohs */ +#include <sys/sysctl.h> /* sysctl */ +#include <ifaddrs.h> /* getifaddrs */ +#include <net/ethernet.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> + +#include <pthread.h> #ifndef NO_PCAP #include <pcap/pcap.h> #endif + +#ifdef linux + +#define cpuset_t cpu_set_t + +#define ifr_flagshigh ifr_flags /* only the low 16 bits here */ +#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ +#include <linux/ethtool.h> +#include <linux/sockios.h> + +#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME +#include <netinet/ether.h> /* ether_aton */ +#include <linux/if_packet.h> /* sockaddr_ll */ +#endif /* linux */ + +#ifdef __FreeBSD__ +#include <sys/endian.h> /* le64toh */ +#include <machine/param.h> + +#include <pthread_np.h> /* pthread w/ affinity */ +#include <sys/cpuset.h> /* cpu_set */ +#include <net/if_dl.h> /* LLADDR */ +#endif /* __FreeBSD__ */ + +#ifdef __APPLE__ + +#define cpuset_t uint64_t // XXX +static inline void CPU_ZERO(cpuset_t *p) +{ + *p = 0; +} + +static inline void CPU_SET(uint32_t i, cpuset_t *p) +{ + *p |= 1<< (i & 0x3f); +} + +#define pthread_setaffinity_np(a, b, c) ((void)a, 0) + +#define ifr_flagshigh ifr_flags // XXX +#define IFF_PPROMISC IFF_PROMISC +#include <net/if_dl.h> /* LLADDR */ +#define clock_gettime(a,b) \ + do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) +#endif /* __APPLE__ */ + const char *default_payload="netmap pkt-gen DIRECT payload\n" "http://info.iet.unipi.it/~luigi/netmap/ "; const char *indirect_payload="netmap pkt-gen indirect payload\n" "http://info.iet.unipi.it/~luigi/netmap/ "; -int time_second; // support for RD() debugging macro - int verbose = 0; -#define SKIP_PAYLOAD 1 /* do not check payload. */ +#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */ #define VIRT_HDR_1 10 /* length of a base vnet-hdr */ @@ -85,6 +142,8 @@ struct mac_range { struct ether_addr start, end; }; +/* ifname can be netmap:foo-xxxx */ +#define MAX_IFNAMELEN 64 /* our buffer for ifname */ /* * global arguments for all threads */ @@ -119,15 +178,16 @@ struct glob_arg { int affinity; int main_fd; + struct nm_desc *nmd; + uint64_t nmd_flags; int report_interval; /* milliseconds between prints */ void *(*td_body)(void *); void *mmap_addr; - int mmap_size; - char *ifname; + char ifname[MAX_IFNAMELEN]; char *nmr_config; int dummy_send; int virt_header; /* send also the virt_header */ - int host_ring; + int extra_bufs; /* goes in nr_arg3 */ }; enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; @@ -142,9 +202,7 @@ struct targ { int completed; int cancel; int fd; - struct nmreq nmr; - struct netmap_if *nifp; - uint16_t qfirst, qlast; /* range of queues to scan */ + struct nm_desc *nmd; volatile uint64_t count; struct timespec tic, toc; int me; @@ -187,7 +245,7 @@ extract_ip_range(struct ip_range *r) pp = index(ap, ':'); if (pp) { *pp++ = '\0'; - if (*pp) + if (*pp) r->port1 = strtol(pp, NULL, 0); } if (*ap) { @@ -261,19 +319,17 @@ sigint_h(int sig) static int system_ncpus(void) { -#ifdef __FreeBSD__ - int mib[2], ncpus; - size_t len; - - mib[0] = CTL_HW; - mib[1] = HW_NCPU; - len = sizeof(mib); + int ncpus; +#if defined (__FreeBSD__) + int mib[2] = { CTL_HW, HW_NCPU }; + size_t len = sizeof(mib); sysctl(mib, 2, &ncpus, &len, NULL, 0); - +#elif defined(linux) + ncpus = sysconf(_SC_NPROCESSORS_ONLN); +#else /* others */ + ncpus = 1; +#endif /* others */ return (ncpus); -#else - return 1; -#endif /* !__FreeBSD__ */ } #ifdef __linux__ @@ -299,15 +355,17 @@ system_ncpus(void) /* * parse the vale configuration in conf and put it in nmr. + * Return the flag set if necessary. * The configuration may consist of 0 to 4 numbers separated * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. * Missing numbers or zeroes stand for default values. * As an additional convenience, if exactly one number * is specified, then this is assigned to both #tx-slots and #rx-slots. - * If there is no 4th number, then the 3rd is assigned to both #tx-rings + * If there is no 4th number, then the 3rd is assigned to both #tx-rings * and #rx-rings. */ -void parse_nmr_config(const char* conf, struct nmreq *nmr) +int +parse_nmr_config(const char* conf, struct nmreq *nmr) { char *w, *tok; int i, v; @@ -315,7 +373,7 @@ void parse_nmr_config(const char* conf, struct nmreq *nmr) nmr->nr_tx_rings = nmr->nr_rx_rings = 0; nmr->nr_tx_slots = nmr->nr_rx_slots = 0; if (conf == NULL || ! *conf) - return; + return 0; w = strdup(conf); for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { v = atoi(tok); @@ -341,6 +399,9 @@ void parse_nmr_config(const char* conf, struct nmreq *nmr) nmr->nr_tx_rings, nmr->nr_tx_slots, nmr->nr_rx_rings, nmr->nr_rx_slots); free(w); + return (nmr->nr_tx_rings || nmr->nr_tx_slots || + nmr->nr_rx_rings || nmr->nr_rx_slots) ? + NM_OPEN_RING_CFG : 0; } @@ -385,7 +446,6 @@ source_hwaddr(const char *ifname, char *buf) static int setaffinity(pthread_t me, int i) { -#if 1 // def __FreeBSD__ cpuset_t cpumask; if (i == -1) @@ -399,10 +459,6 @@ setaffinity(pthread_t me, int i) D("Unable to set affinity: %s", strerror(errno)); return 1; } -#else - (void)me; /* suppress 'unused' warnings */ - (void)i; -#endif /* __FreeBSD__ */ return 0; } @@ -449,7 +505,7 @@ dump_payload(char *p, int len, struct netmap_ring *ring, int cur) int i, j, i0; /* get the length in ASCII of the length of the packet. */ - + printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", ring, cur, ring->slot[cur].buf_idx, ring->slot[cur].flags, len); @@ -632,7 +688,7 @@ send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, slot->flags |= NS_INDIRECT; slot->ptr = (uint64_t)frame; } else if (options & OPT_COPY) { - pkt_copy(frame, p, size); + nm_pkt_copy(frame, p, size); if (fcnt == nfrags) update_addresses(pkt, g); } else if (options & OPT_MEMCPY) { @@ -671,21 +727,19 @@ static void * pinger_body(void *data) { struct targ *targ = (struct targ *) data; - struct pollfd fds[1]; - struct netmap_if *nifp = targ->nifp; + struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; + struct netmap_if *nifp = targ->nmd->nifp; int i, rx = 0, n = targ->g->npackets; void *frame; int size; + uint32_t sent = 0; + struct timespec ts, now, last_print; + uint32_t count = 0, min = 1000000000, av = 0; frame = &targ->pkt; frame += sizeof(targ->pkt.vh) - targ->g->virt_header; size = targ->g->pkt_size + targ->g->virt_header; - fds[0].fd = targ->fd; - fds[0].events = (POLLIN); - static uint32_t sent; - struct timespec ts, now, last_print; - uint32_t count = 0, min = 1000000000, av = 0; if (targ->g->nthreads > 1) { D("can only ping with 1 thread"); @@ -706,7 +760,7 @@ pinger_body(void *data) if (nm_ring_empty(ring)) { D("-- ouch, cannot send"); } else { - pkt_copy(frame, p, size); + nm_pkt_copy(frame, p, size); clock_gettime(CLOCK_REALTIME_PRECISE, &ts); bcopy(&sent, p+42, sizeof(sent)); bcopy(&ts, p+46, sizeof(ts)); @@ -715,13 +769,14 @@ pinger_body(void *data) } } /* should use a parameter to decide how often to send */ - if (poll(fds, 1, 3000) <= 0) { + if (poll(&pfd, 1, 3000) <= 0) { D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); continue; } /* see what we got back */ - for (i = targ->qfirst; i < targ->qlast; i++) { + for (i = targ->nmd->first_tx_ring; + i <= targ->nmd->last_tx_ring; i++) { ring = NETMAP_RXRING(nifp, i); while (!nm_ring_empty(ring)) { uint32_t seq; @@ -775,12 +830,10 @@ static void * ponger_body(void *data) { struct targ *targ = (struct targ *) data; - struct pollfd fds[1]; - struct netmap_if *nifp = targ->nifp; + struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; + struct netmap_if *nifp = targ->nmd->nifp; struct netmap_ring *txring, *rxring; int i, rx = 0, sent = 0, n = targ->g->npackets; - fds[0].fd = targ->fd; - fds[0].events = (POLLIN); if (targ->g->nthreads > 1) { D("can only reply ping with 1 thread"); @@ -791,9 +844,9 @@ ponger_body(void *data) uint32_t txcur, txavail; //#define BUSYWAIT #ifdef BUSYWAIT - ioctl(fds[0].fd, NIOCRXSYNC, NULL); + ioctl(pfd.fd, NIOCRXSYNC, NULL); #else - if (poll(fds, 1, 1000) <= 0) { + if (poll(&pfd, 1, 1000) <= 0) { D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); continue; @@ -803,7 +856,7 @@ ponger_body(void *data) txcur = txring->cur; txavail = nm_ring_space(txring); /* see what we got back */ - for (i = targ->qfirst; i < targ->qlast; i++) { + for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { rxring = NETMAP_RXRING(nifp, i); while (!nm_ring_empty(rxring)) { uint16_t *spkt, *dpkt; @@ -821,7 +874,7 @@ ponger_body(void *data) /* copy... */ dpkt = (uint16_t *)dst; spkt = (uint16_t *)src; - pkt_copy(src, dst, slot->len); + nm_pkt_copy(src, dst, slot->len); dpkt[0] = spkt[3]; dpkt[1] = spkt[4]; dpkt[2] = spkt[5]; @@ -838,7 +891,7 @@ ponger_body(void *data) txring->head = txring->cur = txcur; targ->count = sent; #ifdef BUSYWAIT - ioctl(fds[0].fd, NIOCTXSYNC, NULL); + ioctl(pfd.fd, NIOCTXSYNC, NULL); #endif //D("tx %d rx %d", sent, rx); } @@ -924,11 +977,11 @@ static void * sender_body(void *data) { struct targ *targ = (struct targ *) data; - - struct pollfd fds[1]; - struct netmap_if *nifp = targ->nifp; + struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; + struct netmap_if *nifp = targ->nmd->nifp; struct netmap_ring *txring; - int i, n = targ->g->npackets / targ->g->nthreads, sent = 0; + int i, n = targ->g->npackets / targ->g->nthreads; + int64_t sent = 0; int options = targ->g->options | OPT_COPY; struct timespec nexttime = { 0, 0}; // XXX silence compiler int rate_limit = targ->g->tx_rate; @@ -943,10 +996,6 @@ sender_body(void *data) D("start"); if (setaffinity(targ->thread, targ->affinity)) goto quit; - /* setup poll(2) mechanism. */ - memset(fds, 0, sizeof(fds)); - fds[0].fd = targ->fd; - fds[0].events = (POLLOUT); /* main loop.*/ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); @@ -956,7 +1005,7 @@ sender_body(void *data) wait_time(targ->tic); nexttime = targ->tic; } - if (targ->g->dev_type == DEV_TAP) { + if (targ->g->dev_type == DEV_TAP) { D("writing to file desc %d", targ->g->main_fd); for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { @@ -997,14 +1046,14 @@ sender_body(void *data) /* * wait for available room in the send queue(s) */ - if (poll(fds, 1, 2000) <= 0) { + if (poll(&pfd, 1, 2000) <= 0) { if (targ->cancel) break; D("poll error/timeout on queue %d: %s", targ->me, strerror(errno)); - goto quit; + // goto quit; } - if (fds[0].revents & POLLERR) { + if (pfd.revents & POLLERR) { D("poll error"); goto quit; } @@ -1015,7 +1064,7 @@ sender_body(void *data) D("drop copy"); options &= ~OPT_COPY; } - for (i = targ->qfirst; i < targ->qlast; i++) { + for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { int m, limit = rate_limit ? tosend : targ->g->burst; if (n > 0 && n - sent < limit) limit = n - sent; @@ -1024,10 +1073,10 @@ sender_body(void *data) continue; if (frags > 1) limit = ((limit + frags - 1) / frags) * frags; - + m = send_packets(txring, pkt, frame, size, targ->g, limit, options, frags); - ND("limit %d tail %d frags %d m %d", + ND("limit %d tail %d frags %d m %d", limit, txring->tail, frags, m); sent += m; targ->count = sent; @@ -1039,13 +1088,13 @@ sender_body(void *data) } } /* flush any remaining packets */ - ioctl(fds[0].fd, NIOCTXSYNC, NULL); + ioctl(pfd.fd, NIOCTXSYNC, NULL); /* final part: wait all the TX queues to be empty. */ - for (i = targ->qfirst; i < targ->qlast; i++) { + for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { txring = NETMAP_TXRING(nifp, i); while (nm_tx_pending(txring)) { - ioctl(fds[0].fd, NIOCTXSYNC, NULL); + ioctl(pfd.fd, NIOCTXSYNC, NULL); usleep(1); /* wait 1 tick */ } } @@ -1102,8 +1151,8 @@ static void * receiver_body(void *data) { struct targ *targ = (struct targ *) data; - struct pollfd fds[1]; - struct netmap_if *nifp = targ->nifp; + struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; + struct netmap_if *nifp = targ->nmd->nifp; struct netmap_ring *rxring; int i; uint64_t received = 0; @@ -1111,17 +1160,13 @@ receiver_body(void *data) if (setaffinity(targ->thread, targ->affinity)) goto quit; - /* setup poll(2) mechanism. */ - memset(fds, 0, sizeof(fds)); - fds[0].fd = targ->fd; - fds[0].events = (POLLIN); - /* unbounded wait for the first packet. */ for (;;) { - i = poll(fds, 1, 1000); - if (i > 0 && !(fds[0].revents & POLLERR)) + i = poll(&pfd, 1, 1000); + if (i > 0 && !(pfd.revents & POLLERR)) break; - RD(1, "waiting for initial packets, poll returns %d %d", i, fds[0].revents); + RD(1, "waiting for initial packets, poll returns %d %d", + i, pfd.revents); } /* main loop, exit after 1s silence */ @@ -1146,18 +1191,18 @@ receiver_body(void *data) while (!targ->cancel) { /* Once we started to receive packets, wait at most 1 seconds before quitting. */ - if (poll(fds, 1, 1 * 1000) <= 0 && !targ->g->forever) { + if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); targ->toc.tv_sec -= 1; /* Subtract timeout time. */ - break; + goto out; } - if (fds[0].revents & POLLERR) { + if (pfd.revents & POLLERR) { D("poll err"); goto quit; } - for (i = targ->qfirst; i < targ->qlast; i++) { + for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { int m; rxring = NETMAP_RXRING(nifp, i); @@ -1168,12 +1213,12 @@ receiver_body(void *data) received += m; } targ->count = received; - - // tell the card we have read the data - //ioctl(fds[0].fd, NIOCRXSYNC, NULL); } } + clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); + +out: targ->completed = 1; targ->count = received; @@ -1190,10 +1235,10 @@ quit: static const char * norm(char *buf, double val) { - char *units[] = { "", "K", "M", "G" }; + char *units[] = { "", "K", "M", "G", "T" }; u_int i; - for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *); i++) + for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++) val /= 1000; sprintf(buf, "%.2f %s", val, units[i]); return buf; @@ -1205,8 +1250,8 @@ tx_output(uint64_t sent, int size, double delta) double bw, raw_bw, pps; char b1[40], b2[80], b3[80]; - printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n", - sent, size, delta); + printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", + (unsigned long long)sent, size, delta); if (delta == 0) delta = 1e-6; if (size < 60) /* correct for min packet size */ @@ -1227,7 +1272,8 @@ rx_output(uint64_t received, double delta) double pps; char b1[40]; - printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta); + printf("Received %llu packets, in %.2f seconds.\n", + (unsigned long long) received, delta); if (delta == 0) delta = 1e-6; @@ -1262,7 +1308,6 @@ usage(void) "\t-R rate in packets per second\n" "\t-X dump payload\n" "\t-H len add empty virtio-net-header with size 'len'\n" - "\t-h use host ring\n" "", cmd); @@ -1280,77 +1325,57 @@ start_threads(struct glob_arg *g) * using a single descriptor. */ for (i = 0; i < g->nthreads; i++) { - bzero(&targs[i], sizeof(targs[i])); - targs[i].fd = -1; /* default, with pcap */ - targs[i].g = g; + struct targ *t = &targs[i]; - if (g->dev_type == DEV_NETMAP) { - struct nmreq tifreq; - int tfd; + bzero(t, sizeof(*t)); + t->fd = -1; /* default, with pcap */ + t->g = g; - /* register interface. */ - tfd = open("/dev/netmap", O_RDWR); - if (tfd == -1) { - D("Unable to open /dev/netmap: %s", strerror(errno)); - continue; - } - targs[i].fd = tfd; + if (g->dev_type == DEV_NETMAP) { + struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ - bzero(&tifreq, sizeof(tifreq)); - strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name)); - tifreq.nr_version = NETMAP_API; - if (g->host_ring) { - tifreq.nr_ringid = NETMAP_SW_RING; - } else { - tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0; + if (g->nthreads > 1) { + if (nmd.req.nr_flags != NR_REG_ALL_NIC) { + D("invalid nthreads mode %d", nmd.req.nr_flags); + continue; + } + nmd.req.nr_flags = NR_REG_ONE_NIC; + nmd.req.nr_ringid = i; } - parse_nmr_config(g->nmr_config, &tifreq); + /* Only touch one of the rings (rx is already ok) */ + if (g->td_body == receiver_body) + nmd.req.nr_ringid |= NETMAP_NO_TX_POLL; - /* - * if we are acting as a receiver only, do not touch the transmit ring. - * This is not the default because many apps may use the interface - * in both directions, but a pure receiver does not. - */ - if (g->td_body == receiver_body) { - tifreq.nr_ringid |= NETMAP_NO_TX_POLL; - } + /* register interface. Override ifname and ringid etc. */ - if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) { - D("Unable to register %s: %s", g->ifname, strerror(errno)); + t->nmd = nm_open(t->g->ifname, NULL, g->nmd_flags | + NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, g->nmd); + if (t->nmd == NULL) { + D("Unable to open %s: %s", + t->g->ifname, strerror(errno)); continue; } - D("memsize is %d MB", tifreq.nr_memsize >> 20); - targs[i].nmr = tifreq; - targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset); - D("nifp flags 0x%x", targs[i].nifp->ni_flags); - /* start threads. */ - if (g->host_ring) { - targs[i].qfirst = (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings); - targs[i].qlast = targs[i].qfirst + 1; - } else { - targs[i].qfirst = (g->nthreads > 1) ? i : 0; - targs[i].qlast = (g->nthreads > 1) ? i+1 : - (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings); - } + t->fd = t->nmd->fd; + } else { targs[i].fd = g->main_fd; } - targs[i].used = 1; - targs[i].me = i; + t->used = 1; + t->me = i; if (g->affinity >= 0) { if (g->affinity < g->cpus) - targs[i].affinity = g->affinity; + t->affinity = g->affinity; else - targs[i].affinity = i % g->cpus; - } else - targs[i].affinity = -1; + t->affinity = i % g->cpus; + } else { + t->affinity = -1; + } /* default, init packets */ - initialize_packet(&targs[i]); + initialize_packet(t); - if (pthread_create(&targs[i].thread, NULL, g->td_body, - &targs[i]) == -1) { + if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { D("Unable to create thread %d: %s", i, strerror(errno)); - targs[i].used = 0; + t->used = 0; } } } @@ -1375,7 +1400,6 @@ main_thread(struct glob_arg *g) delta.tv_usec = (g->report_interval%1000)*1000; select(0, NULL, NULL, NULL, &delta); gettimeofday(&now, NULL); - time_second = now.tv_sec; timersub(&now, &toc, &toc); my_count = 0; for (i = 0; i < g->nthreads; i++) { @@ -1388,8 +1412,10 @@ main_thread(struct glob_arg *g) continue; npkts = my_count - prev; pps = (npkts*1000000 + usec/2) / usec; - D("%" PRIu64 " pps (%" PRIu64 " pkts in %" PRIu64 " usec)", - pps, npkts, usec); + D("%llu pps (%llu pkts in %llu usec)", + (unsigned long long)pps, + (unsigned long long)npkts, + (unsigned long long)usec); prev = my_count; toc = now; if (done == g->nthreads) @@ -1433,7 +1459,7 @@ main_thread(struct glob_arg *g) rx_output(count, delta_t); if (g->dev_type == DEV_NETMAP) { - munmap(g->mmap_addr, g->mmap_size); + munmap(g->nmd->mem, g->nmd->req.nr_memsize); close(g->main_fd); } } @@ -1521,7 +1547,6 @@ main(int arc, char **argv) struct glob_arg g; - struct nmreq nmr; int ch; int wait_link = 2; int devqueues = 1; /* how many device queues */ @@ -1548,7 +1573,7 @@ main(int arc, char **argv) g.virt_header = 0; while ( (ch = getopt(arc, argv, - "a:f:F:n:i:It:r:l:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:h")) != -1) { + "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:")) != -1) { struct sf *fn; switch(ch) { @@ -1594,23 +1619,28 @@ main(int arc, char **argv) * otherwise we guess */ D("interface is %s", optarg); - g.ifname = optarg; + if (strlen(optarg) > MAX_IFNAMELEN - 8) { + D("ifname too long %s", optarg); + break; + } + strcpy(g.ifname, optarg); if (!strcmp(optarg, "null")) { g.dev_type = DEV_NETMAP; g.dummy_send = 1; } else if (!strncmp(optarg, "tap:", 4)) { g.dev_type = DEV_TAP; - g.ifname = optarg + 4; + strcpy(g.ifname, optarg + 4); } else if (!strncmp(optarg, "pcap:", 5)) { g.dev_type = DEV_PCAP; - g.ifname = optarg + 5; - } else if (!strncmp(optarg, "netmap:", 7)) { + strcpy(g.ifname, optarg + 5); + } else if (!strncmp(optarg, "netmap:", 7) || + !strncmp(optarg, "vale", 4)) { g.dev_type = DEV_NETMAP; - g.ifname = optarg + 7; } else if (!strncmp(optarg, "tap", 3)) { g.dev_type = DEV_TAP; - } else { + } else { /* prepend netmap: */ g.dev_type = DEV_NETMAP; + sprintf(g.ifname, "netmap:%s", optarg); } break; @@ -1618,18 +1648,6 @@ main(int arc, char **argv) g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ break; - case 't': /* send, deprecated */ - D("-t deprecated, please use -f tx -n %s", optarg); - g.td_body = sender_body; - g.npackets = atoi(optarg); - break; - - case 'r': /* receive */ - D("-r deprecated, please use -f rx -n %s", optarg); - g.td_body = receiver_body; - g.npackets = atoi(optarg); - break; - case 'l': /* pkt_size */ g.pkt_size = atoi(optarg); break; @@ -1686,8 +1704,8 @@ main(int arc, char **argv) case 'H': g.virt_header = atoi(optarg); break; - case 'h': - g.host_ring = 1; + case 'e': /* extra bufs */ + g.extra_bufs = atoi(optarg); break; } } @@ -1759,42 +1777,33 @@ main(int arc, char **argv) } else if (g.dummy_send) { /* but DEV_NETMAP */ D("using a dummy send routine"); } else { - bzero(&nmr, sizeof(nmr)); - nmr.nr_version = NETMAP_API; + struct nm_desc base_nmd; + + bzero(&base_nmd, sizeof(base_nmd)); + + g.nmd_flags = 0; + g.nmd_flags |= parse_nmr_config(g.nmr_config, &base_nmd.req); + if (g.extra_bufs) { + base_nmd.req.nr_arg3 = g.extra_bufs; + g.nmd_flags |= NM_OPEN_ARG3; + } + /* - * Open the netmap device to fetch the number of queues of our - * interface. + * Open the netmap device using nm_open(). * - * The first NIOCREGIF also detaches the card from the * protocol stack and may cause a reset of the card, * which in turn may take some time for the PHY to - * reconfigure. - */ - g.main_fd = open("/dev/netmap", O_RDWR); - if (g.main_fd == -1) { - D("Unable to open /dev/netmap: %s", strerror(errno)); - // fail later - } - /* - * Register the interface on the netmap device: from now on, - * we can operate on the network interface without any - * interference from the legacy network stack. - * - * We decide to put the first interface registration here to - * give time to cards that take a long time to reset the PHY. + * reconfigure. We do the open here to have time to reset. */ - bzero(&nmr, sizeof(nmr)); - nmr.nr_version = NETMAP_API; - strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name)); - parse_nmr_config(g.nmr_config, &nmr); - if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) { - D("Unable to register interface %s: %s", g.ifname, strerror(errno)); - //continue, fail later + g.nmd = nm_open(g.ifname, NULL, g.nmd_flags, &base_nmd); + if (g.nmd == NULL) { + D("Unable to open %s: %s", g.ifname, strerror(errno)); + goto out; } - ND("%s: txr %d txd %d rxr %d rxd %d", g.ifname, - nmr.nr_tx_rings, nmr.nr_tx_slots, - nmr.nr_rx_rings, nmr.nr_rx_slots); - devqueues = nmr.nr_rx_rings; + g.main_fd = g.nmd->fd; + D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); + + devqueues = g.nmd->req.nr_rx_rings; /* validate provided nthreads. */ if (g.nthreads < 1 || g.nthreads > devqueues) { @@ -1802,32 +1811,18 @@ main(int arc, char **argv) // continue, fail later } - /* - * Map the netmap shared memory: instead of issuing mmap() - * inside the body of the threads, we prefer to keep this - * operation here to simplify the thread logic. - */ - D("mapping %d Kbytes", nmr.nr_memsize>>10); - g.mmap_size = nmr.nr_memsize; - g.mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize, - PROT_WRITE | PROT_READ, - MAP_SHARED, g.main_fd, 0); - if (g.mmap_addr == MAP_FAILED) { - D("Unable to mmap %d KB: %s", nmr.nr_memsize >> 10, strerror(errno)); - // continue, fail later - } - if (verbose) { - struct netmap_if *nifp = NETMAP_IF(g.mmap_addr, nmr.nr_offset); + struct netmap_if *nifp = g.nmd->nifp; + struct nmreq *req = &g.nmd->req; - D("nifp at offset %d, %d tx %d rx rings %s", - nmr.nr_offset, nmr.nr_tx_rings, nmr.nr_rx_rings, - nmr.nr_ringid & NETMAP_PRIV_MEM ? "PRIVATE" : "common" ); - for (i = 0; i <= nmr.nr_tx_rings; i++) { + D("nifp at offset %d, %d tx %d rx region %d", + req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, + req->nr_arg2); + for (i = 0; i <= req->nr_tx_rings; i++) { D(" TX%d at 0x%lx", i, (char *)NETMAP_TXRING(nifp, i) - (char *)nifp); } - for (i = 0; i <= nmr.nr_rx_rings; i++) { + for (i = 0; i <= req->nr_rx_rings; i++) { D(" RX%d at 0x%lx", i, (char *)NETMAP_RXRING(nifp, i) - (char *)nifp); } @@ -1846,7 +1841,8 @@ main(int arc, char **argv) g.src_ip.name, g.dst_ip.name, g.src_mac.name, g.dst_mac.name); } - + +out: /* Exit if something went wrong. */ if (g.main_fd < 0) { D("aborting"); @@ -1854,7 +1850,7 @@ main(int arc, char **argv) } } - + if (g.options) { D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", g.options & OPT_PREFETCH ? " prefetch" : "", diff --git a/tools/tools/netmap/vale-ctl.c b/tools/tools/netmap/vale-ctl.c index eb6c48d15a04..e1d8da568063 100644 --- a/tools/tools/netmap/vale-ctl.c +++ b/tools/tools/netmap/vale-ctl.c @@ -33,6 +33,7 @@ #include <unistd.h> /* close */ #include <sys/ioctl.h> /* ioctl */ #include <sys/param.h> +#include <sys/socket.h> /* apple needs sockaddr */ #include <net/if.h> /* ifreq */ #include <net/netmap.h> #include <net/netmap_user.h> |