diff options
Diffstat (limited to 'tools/tools/netmap')
-rw-r--r-- | tools/tools/netmap/Makefile | 25 | ||||
-rw-r--r-- | tools/tools/netmap/README | 11 | ||||
-rw-r--r-- | tools/tools/netmap/bridge.c | 456 | ||||
-rw-r--r-- | tools/tools/netmap/click-test.cfg | 19 | ||||
-rw-r--r-- | tools/tools/netmap/pcap.c | 761 | ||||
-rw-r--r-- | tools/tools/netmap/pkt-gen.c | 1021 |
6 files changed, 2293 insertions, 0 deletions
diff --git a/tools/tools/netmap/Makefile b/tools/tools/netmap/Makefile new file mode 100644 index 000000000000..4b682e52a311 --- /dev/null +++ b/tools/tools/netmap/Makefile @@ -0,0 +1,25 @@ +# +# $FreeBSD$ +# +# For multiple programs using a single source file each, +# we can just define 'progs' and create custom targets. +PROGS = pkt-gen bridge testpcap libnetmap.so + +CLEANFILES = $(PROGS) pcap.o +NO_MAN= +CFLAGS += -Werror -Wall -nostdinc -I/usr/include -I../../../sys +CFLAGS += -Wextra + +LDFLAGS += -lpthread -lpcap + +.include <bsd.prog.mk> +.include <bsd.lib.mk> + +all: $(PROGS) + +testpcap: pcap.c libnetmap.so + $(CC) $(CFLAGS) -L. -lnetmap -o ${.TARGET} pcap.c + +libnetmap.so: pcap.c + $(CC) $(CFLAGS) -fpic -c ${.ALLSRC} + $(CC) -shared -o ${.TARGET} ${.ALLSRC:.c=.o} diff --git a/tools/tools/netmap/README b/tools/tools/netmap/README new file mode 100644 index 000000000000..9a1ba6096188 --- /dev/null +++ b/tools/tools/netmap/README @@ -0,0 +1,11 @@ +$FreeBSD$ + +This directory contains examples that use netmap + + pkt-gen a packet sink/source using the netmap API + + bridge a two-port jumper wire, also using the native API + + testpcap a jumper wire using libnetmap (or libpcap) + + click* various click examples diff --git a/tools/tools/netmap/bridge.c b/tools/tools/netmap/bridge.c new file mode 100644 index 000000000000..2385a0811fb5 --- /dev/null +++ b/tools/tools/netmap/bridge.c @@ -0,0 +1,456 @@ +/* + * (C) 2011 Luigi Rizzo, Matteo Landi + * + * BSD license + * + * A netmap client to bridge two network interfaces + * (or one interface and the host stack). + * + * $FreeBSD$ + */ + +#include <errno.h> +#include <signal.h> /* signal */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> /* strcmp */ +#include <fcntl.h> /* open */ +#include <unistd.h> /* close */ + +#include <sys/endian.h> /* le64toh */ +#include <sys/mman.h> /* PROT_* */ +#include <sys/ioctl.h> /* ioctl */ +#include <machine/param.h> +#include <sys/poll.h> +#include <sys/socket.h> /* sockaddr.. */ +#include <arpa/inet.h> /* ntohs */ + +#include <net/if.h> /* ifreq */ +#include <net/ethernet.h> +#include <net/netmap.h> +#include <net/netmap_user.h> + +#include <netinet/in.h> /* sockaddr_in */ + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +int verbose = 0; + +/* debug support */ +#define ND(format, ...) {} +#define D(format, ...) do { \ + if (!verbose) break; \ + struct timeval _xxts; \ + gettimeofday(&_xxts, NULL); \ + fprintf(stderr, "%03d.%06d %s [%d] " format "\n", \ + (int)_xxts.tv_sec %1000, (int)_xxts.tv_usec, \ + __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + } while (0) + + +char *version = "$Id: bridge.c 9642 2011-11-07 21:39:47Z luigi $"; + +static int do_abort = 0; + +/* + * info on a ring we handle + */ +struct my_ring { + const char *ifname; + int fd; + char *mem; /* userspace mmap address */ + u_int memsize; + u_int queueid; + u_int begin, end; /* first..last+1 rings to check */ + struct netmap_if *nifp; + struct netmap_ring *tx, *rx; /* shortcuts */ + + uint32_t if_flags; + uint32_t if_reqcap; + uint32_t if_curcap; +}; + +static void +sigint_h(__unused int sig) +{ + do_abort = 1; + signal(SIGINT, SIG_DFL); +} + + +static int +do_ioctl(struct my_ring *me, int what) +{ + struct ifreq ifr; + int error; + + bzero(&ifr, sizeof(ifr)); + strncpy(ifr.ifr_name, me->ifname, sizeof(ifr.ifr_name)); + switch (what) { + case SIOCSIFFLAGS: + ifr.ifr_flagshigh = me->if_flags >> 16; + ifr.ifr_flags = me->if_flags & 0xffff; + break; + case SIOCSIFCAP: + ifr.ifr_reqcap = me->if_reqcap; + ifr.ifr_curcap = me->if_curcap; + break; + } + error = ioctl(me->fd, what, &ifr); + if (error) { + D("ioctl error %d", what); + return error; + } + switch (what) { + case SIOCGIFFLAGS: + me->if_flags = (ifr.ifr_flagshigh << 16) | + (0xffff & ifr.ifr_flags); + if (verbose) + D("flags are 0x%x", me->if_flags); + break; + + case SIOCGIFCAP: + me->if_reqcap = ifr.ifr_reqcap; + me->if_curcap = ifr.ifr_curcap; + if (verbose) + D("curcap are 0x%x", me->if_curcap); + break; + } + return 0; +} + +/* + * open a device. if me->mem is null then do an mmap. + */ +static int +netmap_open(struct my_ring *me, int ringid) +{ + int fd, err, l; + struct nmreq req; + + me->fd = fd = open("/dev/netmap", O_RDWR); + if (fd < 0) { + D("Unable to open /dev/netmap"); + return (-1); + } + bzero(&req, sizeof(req)); + strncpy(req.nr_name, me->ifname, sizeof(req.nr_name)); + req.nr_ringid = ringid; + err = ioctl(fd, NIOCGINFO, &req); + if (err) { + D("cannot get info on %s", me->ifname); + goto error; + } + me->memsize = l = req.nr_memsize; + if (verbose) + D("memsize is %d MB", l>>20); + err = ioctl(fd, NIOCREGIF, &req); + if (err) { + D("Unable to register %s", me->ifname); + goto error; + } + + if (me->mem == NULL) { + me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + if (me->mem == MAP_FAILED) { + D("Unable to mmap"); + me->mem = NULL; + goto error; + } + } + + me->nifp = NETMAP_IF(me->mem, req.nr_offset); + me->queueid = ringid; + if (ringid & NETMAP_SW_RING) { + me->begin = req.nr_numrings; + me->end = me->begin + 1; + } else if (ringid & NETMAP_HW_RING) { + me->begin = ringid & NETMAP_RING_MASK; + me->end = me->begin + 1; + } else { + me->begin = 0; + me->end = req.nr_numrings; + } + me->tx = NETMAP_TXRING(me->nifp, me->begin); + me->rx = NETMAP_RXRING(me->nifp, me->begin); + return (0); +error: + close(me->fd); + return -1; +} + + +static int +netmap_close(struct my_ring *me) +{ + D(""); + if (me->mem) + munmap(me->mem, me->memsize); + ioctl(me->fd, NIOCUNREGIF, NULL); + close(me->fd); + return (0); +} + + +/* + * move up to 'limit' pkts from rxring to txring swapping buffers. + */ +static int +process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, + u_int limit, const char *msg) +{ + u_int j, k, m = 0; + + /* print a warning if any of the ring flags is set (e.g. NM_REINIT) */ + if (rxring->flags || txring->flags) + D("%s rxflags %x txflags %x", + msg, rxring->flags, txring->flags); + j = rxring->cur; /* RX */ + k = txring->cur; /* TX */ + if (rxring->avail < limit) + limit = rxring->avail; + if (txring->avail < limit) + limit = txring->avail; + m = limit; + while (limit-- > 0) { + struct netmap_slot *rs = &rxring->slot[j]; + struct netmap_slot *ts = &txring->slot[k]; + uint32_t pkt; + + /* swap packets */ + if (ts->buf_idx < 2 || rs->buf_idx < 2) { + D("wrong index rx[%d] = %d -> tx[%d] = %d", + j, rs->buf_idx, k, ts->buf_idx); + sleep(2); + } + pkt = ts->buf_idx; + ts->buf_idx = rs->buf_idx; + rs->buf_idx = pkt; + + /* copy the packet lenght. */ + if (rs->len < 14 || rs->len > 2048) + D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k); + else if (verbose > 1) + D("send len %d rx[%d] -> tx[%d]", rs->len, j, k); + ts->len = rs->len; + + /* report the buffer change. */ + ts->flags |= NS_BUF_CHANGED; + rs->flags |= NS_BUF_CHANGED; + j = NETMAP_RING_NEXT(rxring, j); + k = NETMAP_RING_NEXT(txring, k); + } + rxring->avail -= m; + txring->avail -= m; + rxring->cur = j; + txring->cur = k; + if (verbose && m > 0) + D("sent %d packets to %p", m, txring); + + return (m); +} + +/* move packts from src to destination */ +static int +move(struct my_ring *src, struct my_ring *dst, u_int limit) +{ + struct netmap_ring *txring, *rxring; + u_int m = 0, si = src->begin, di = dst->begin; + const char *msg = (src->queueid & NETMAP_SW_RING) ? + "host->net" : "net->host"; + + while (si < src->end && di < dst->end) { + rxring = NETMAP_RXRING(src->nifp, si); + txring = NETMAP_TXRING(dst->nifp, di); + ND("txring %p rxring %p", txring, rxring); + if (rxring->avail == 0) { + si++; + continue; + } + if (txring->avail == 0) { + di++; + continue; + } + m += process_rings(rxring, txring, limit, msg); + } + + return (m); +} + +/* + * how many packets on this set of queues ? + */ +static int +howmany(struct my_ring *me, int tx) +{ + u_int i, tot = 0; + + ND("me %p begin %d end %d", me, me->begin, me->end); + for (i = me->begin; i < me->end; i++) { + struct netmap_ring *ring = tx ? + NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i); + tot += ring->avail; + } + if (0 && verbose && tot && !tx) + D("ring %s %s %s has %d avail at %d", + me->ifname, tx ? "tx": "rx", + me->end > me->nifp->ni_num_queues ? + "host":"net", + tot, NETMAP_TXRING(me->nifp, me->begin)->cur); + return tot; +} + +/* + * bridge [-v] if1 [if2] + * + * If only one name, or the two interfaces are the same, + * bridges userland and the adapter. Otherwise bridge + * two intefaces. + */ +int +main(int argc, char **argv) +{ + struct pollfd pollfd[2]; + int i; + u_int burst = 1024; + struct my_ring me[2]; + + fprintf(stderr, "%s %s built %s %s\n", + argv[0], version, __DATE__, __TIME__); + + bzero(me, sizeof(me)); + + while (argc > 1 && !strcmp(argv[1], "-v")) { + verbose++; + argv++; + argc--; + } + + if (argc < 2 || argc > 4) { + D("Usage: %s IFNAME1 [IFNAME2 [BURST]]", argv[0]); + return (1); + } + + /* setup netmap interface #1. */ + me[0].ifname = argv[1]; + if (argc == 2 || !strcmp(argv[1], argv[2])) { + D("same interface, endpoint 0 goes to host"); + i = NETMAP_SW_RING; + me[1].ifname = argv[1]; + } else { + /* two different interfaces. Take all rings on if1 */ + i = 0; // all hw rings + me[1].ifname = argv[2]; + } + if (netmap_open(me, i)) + return (1); + me[1].mem = me[0].mem; /* copy the pointer, so only one mmap */ + if (netmap_open(me+1, 0)) + return (1); + + /* if bridging two interfaces, set promisc mode */ + if (i != NETMAP_SW_RING) { + do_ioctl(me, SIOCGIFFLAGS); + if ((me[0].if_flags & IFF_UP) == 0) { + D("%s is down, bringing up...", me[0].ifname); + me[0].if_flags |= IFF_UP; + } + me[0].if_flags |= IFF_PPROMISC; + do_ioctl(me, SIOCSIFFLAGS); + + do_ioctl(me+1, SIOCGIFFLAGS); + me[1].if_flags |= IFF_PPROMISC; + do_ioctl(me+1, SIOCSIFFLAGS); + + /* also disable checksums etc. */ + do_ioctl(me, SIOCGIFCAP); + me[0].if_reqcap = me[0].if_curcap; + me[0].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE); + do_ioctl(me+0, SIOCSIFCAP); + } + do_ioctl(me+1, SIOCGIFFLAGS); + if ((me[1].if_flags & IFF_UP) == 0) { + D("%s is down, bringing up...", me[1].ifname); + me[1].if_flags |= IFF_UP; + } + do_ioctl(me+1, SIOCSIFFLAGS); + + do_ioctl(me+1, SIOCGIFCAP); + me[1].if_reqcap = me[1].if_curcap; + me[1].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE); + do_ioctl(me+1, SIOCSIFCAP); + if (argc > 3) + burst = atoi(argv[3]); /* packets burst size. */ + + /* setup poll(2) variables. */ + memset(pollfd, 0, sizeof(pollfd)); + for (i = 0; i < 2; i++) { + pollfd[i].fd = me[i].fd; + pollfd[i].events = (POLLIN); + } + + D("Wait 2 secs for link to come up..."); + sleep(2); + D("Ready to go, %s 0x%x/%d <-> %s 0x%x/%d.", + me[0].ifname, me[0].queueid, me[0].nifp->ni_num_queues, + me[1].ifname, me[1].queueid, me[1].nifp->ni_num_queues); + + /* main loop */ + signal(SIGINT, sigint_h); + while (!do_abort) { + int n0, n1, ret; + pollfd[0].events = pollfd[1].events = 0; + pollfd[0].revents = pollfd[1].revents = 0; + n0 = howmany(me, 0); + n1 = howmany(me + 1, 0); + if (n0) + pollfd[1].events |= POLLOUT; + else + pollfd[0].events |= POLLIN; + if (n1) + pollfd[0].events |= POLLOUT; + else + pollfd[1].events |= POLLIN; + ret = poll(pollfd, 2, 2500); + if (ret <= 0 || verbose) + D("poll %s [0] ev %x %x rx %d@%d tx %d," + " [1] ev %x %x rx %d@%d tx %d", + ret <= 0 ? "timeout" : "ok", + pollfd[0].events, + pollfd[0].revents, + howmany(me, 0), + me[0].rx->cur, + howmany(me, 1), + pollfd[1].events, + pollfd[1].revents, + howmany(me+1, 0), + me[1].rx->cur, + howmany(me+1, 1) + ); + if (ret < 0) + continue; + if (pollfd[0].revents & POLLERR) { + D("error on fd0, rxcur %d@%d", + me[0].rx->avail, me[0].rx->cur); + } + if (pollfd[1].revents & POLLERR) { + D("error on fd1, rxcur %d@%d", + me[1].rx->avail, me[1].rx->cur); + } + if (pollfd[0].revents & POLLOUT) { + move(me + 1, me, burst); + // XXX we don't need the ioctl */ + // ioctl(me[0].fd, NIOCTXSYNC, NULL); + } + if (pollfd[1].revents & POLLOUT) { + move(me, me + 1, burst); + // XXX we don't need the ioctl */ + // ioctl(me[1].fd, NIOCTXSYNC, NULL); + } + } + D("exiting"); + netmap_close(me + 1); + netmap_close(me + 0); + + return (0); +} diff --git a/tools/tools/netmap/click-test.cfg b/tools/tools/netmap/click-test.cfg new file mode 100644 index 000000000000..fc5759f88b1e --- /dev/null +++ b/tools/tools/netmap/click-test.cfg @@ -0,0 +1,19 @@ +// +// $FreeBSD$ +// +// A sample test configuration for click +// +// +// create a switch + +myswitch :: EtherSwitch; + +// two input devices + +c0 :: FromDevice(ix0, PROMISC true); +c1 :: FromDevice(ix1, PROMISC true); + +// and now pass packets around + +c0[0] -> [0]sw[0] -> Queue(10000) -> ToDevice(ix0); +c1[0] -> [1]sw[1] -> Queue(10000) -> ToDevice(ix1); diff --git a/tools/tools/netmap/pcap.c b/tools/tools/netmap/pcap.c new file mode 100644 index 000000000000..f010b839bfb2 --- /dev/null +++ b/tools/tools/netmap/pcap.c @@ -0,0 +1,761 @@ +/* + * (C) 2011 Luigi Rizzo + * + * BSD license + * + * A simple library that maps some pcap functions onto netmap + * This is not 100% complete but enough to let tcpdump, trafshow + * and other apps work. + * + * $FreeBSD$ + */ + +#include <errno.h> +#include <signal.h> /* signal */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> /* strcmp */ +#include <fcntl.h> /* open */ +#include <unistd.h> /* close */ + +#include <sys/endian.h> /* le64toh */ +#include <sys/mman.h> /* PROT_* */ +#include <sys/ioctl.h> /* ioctl */ +#include <machine/param.h> +#include <sys/poll.h> +#include <sys/socket.h> /* sockaddr.. */ +#include <arpa/inet.h> /* ntohs */ + +#include <net/if.h> /* ifreq */ +#include <net/ethernet.h> +#include <net/netmap.h> +#include <net/netmap_user.h> + +#include <netinet/in.h> /* sockaddr_in */ + +#include <sys/socket.h> +#include <ifaddrs.h> + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +char *version = "$Id$"; +int verbose = 0; + +/* debug support */ +#define ND(format, ...) do {} while (0) +#define D(format, ...) do { \ + if (verbose) \ + fprintf(stderr, "--- %s [%d] " format "\n", \ + __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + } while (0) + + +/* + * We redefine here a number of structures that are in pcap.h + * so we can compile this file without the system header. + */ +#ifndef PCAP_ERRBUF_SIZE +#define PCAP_ERRBUF_SIZE 128 + +/* + * Each packet is accompanied by a header including the timestamp, + * captured size and actual size. + */ +struct pcap_pkthdr { + struct timeval ts; /* time stamp */ + uint32_t caplen; /* length of portion present */ + uint32_t len; /* length this packet (off wire) */ +}; + +typedef struct pcap_if pcap_if_t; + +/* + * Representation of an interface address. + */ +struct pcap_addr { + struct pcap_addr *next; + struct sockaddr *addr; /* address */ + struct sockaddr *netmask; /* netmask for the above */ + struct sockaddr *broadaddr; /* broadcast addr for the above */ + struct sockaddr *dstaddr; /* P2P dest. address for the above */ +}; + +struct pcap_if { + struct pcap_if *next; + char *name; /* name to hand to "pcap_open_live()" */ + char *description; /* textual description of interface, or NULL */ + struct pcap_addr *addresses; + uint32_t flags; /* PCAP_IF_ interface flags */ +}; + +/* + * We do not support stats (yet) + */ +struct pcap_stat { + u_int ps_recv; /* number of packets received */ + u_int ps_drop; /* number of packets dropped */ + u_int ps_ifdrop; /* drops by interface XXX not yet supported */ +#ifdef WIN32 + u_int bs_capt; /* number of packets that reach the app. */ +#endif /* WIN32 */ +}; + +typedef void pcap_t; +typedef enum { + PCAP_D_INOUT = 0, + PCAP_D_IN, + PCAP_D_OUT +} pcap_direction_t; + + + +typedef void (*pcap_handler)(u_char *user, + const struct pcap_pkthdr *h, const u_char *bytes); + +char errbuf[PCAP_ERRBUF_SIZE]; + +pcap_t *pcap_open_live(const char *device, int snaplen, + int promisc, int to_ms, char *errbuf); + +int pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf); +void pcap_close(pcap_t *p); +int pcap_get_selectable_fd(pcap_t *p); +int pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user); +int pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf); +int pcap_setdirection(pcap_t *p, pcap_direction_t d); +char *pcap_lookupdev(char *errbuf); +int pcap_inject(pcap_t *p, const void *buf, size_t size); +int pcap_fileno(pcap_t *p); + +struct eproto { + const char *s; + u_short p; +}; +#endif /* !PCAP_ERRBUF_SIZE */ + +#ifdef __PIC__ +/* + * build as a shared library + */ + +char pcap_version[] = "libnetmap version 0.3"; + +/* + * Our equivalent of pcap_t + */ +struct my_ring { + struct nmreq nmr; + + int fd; + char *mem; /* userspace mmap address */ + u_int memsize; + u_int queueid; + u_int begin, end; /* first..last+1 rings to check */ + struct netmap_if *nifp; + + int snaplen; + char *errbuf; + int promisc; + int to_ms; + + struct pcap_pkthdr hdr; + + uint32_t if_flags; + uint32_t if_reqcap; + uint32_t if_curcap; + + struct pcap_stat st; + + char msg[PCAP_ERRBUF_SIZE]; +}; + + +static int +do_ioctl(struct my_ring *me, int what) +{ + struct ifreq ifr; + int error; + + bzero(&ifr, sizeof(ifr)); + strncpy(ifr.ifr_name, me->nmr.nr_name, sizeof(ifr.ifr_name)); + switch (what) { + case SIOCSIFFLAGS: + D("call SIOCSIFFLAGS 0x%x", me->if_flags); + ifr.ifr_flagshigh = (me->if_flags >> 16) & 0xffff; + ifr.ifr_flags = me->if_flags & 0xffff; + break; + case SIOCSIFCAP: + ifr.ifr_reqcap = me->if_reqcap; + ifr.ifr_curcap = me->if_curcap; + break; + } + error = ioctl(me->fd, what, &ifr); + if (error) { + D("ioctl 0x%x error %d", what, error); + return error; + } + switch (what) { + case SIOCSIFFLAGS: + case SIOCGIFFLAGS: + me->if_flags = (ifr.ifr_flagshigh << 16) | + (0xffff & ifr.ifr_flags); + D("flags are L 0x%x H 0x%x 0x%x", + (uint16_t)ifr.ifr_flags, + (uint16_t)ifr.ifr_flagshigh, me->if_flags); + break; + + case SIOCGIFCAP: + me->if_reqcap = ifr.ifr_reqcap; + me->if_curcap = ifr.ifr_curcap; + D("curcap are 0x%x", me->if_curcap); + break; + } + return 0; +} + + +/* + * open a device. if me->mem is null then do an mmap. + */ +static int +netmap_open(struct my_ring *me, int ringid) +{ + int fd, err, l; + u_int i; + struct nmreq req; + + me->fd = fd = open("/dev/netmap", O_RDWR); + if (fd < 0) { + D("Unable to open /dev/netmap"); + return (-1); + } + bzero(&req, sizeof(req)); + strncpy(req.nr_name, me->nmr.nr_name, sizeof(req.nr_name)); + req.nr_ringid = ringid; + err = ioctl(fd, NIOCGINFO, &req); + if (err) { + D("cannot get info on %s", me->nmr.nr_name); + goto error; + } + me->memsize = l = req.nr_memsize; + ND("memsize is %d MB", l>>20); + err = ioctl(fd, NIOCREGIF, &req); + if (err) { + D("Unable to register %s", me->nmr.nr_name); + goto error; + } + + if (me->mem == NULL) { + me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + if (me->mem == MAP_FAILED) { + D("Unable to mmap"); + me->mem = NULL; + goto error; + } + } + + me->nifp = NETMAP_IF(me->mem, req.nr_offset); + me->queueid = ringid; + if (ringid & NETMAP_SW_RING) { + me->begin = req.nr_numrings; + me->end = me->begin + 1; + } else if (ringid & NETMAP_HW_RING) { + me->begin = ringid & NETMAP_RING_MASK; + me->end = me->begin + 1; + } else { + me->begin = 0; + me->end = req.nr_numrings; + } + /* request timestamps for packets */ + for (i = me->begin; i < me->end; i++) { + struct netmap_ring *ring = NETMAP_RXRING(me->nifp, i); + ring->flags = NR_TIMESTAMP; + } + //me->tx = NETMAP_TXRING(me->nifp, 0); + return (0); +error: + close(me->fd); + return -1; +} + +/* + * There is a set of functions that tcpdump expects even if probably + * not used + */ +struct eproto eproto_db[] = { + { "ip", ETHERTYPE_IP }, + { "arp", ETHERTYPE_ARP }, + { (char *)0, 0 } +}; + + +int +pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf) +{ + struct ifaddrs *i_head, *i; + pcap_if_t *top = NULL, *cur; + struct pcap_addr *tail = NULL; + int l; + + D("listing all devs"); + *alldevsp = NULL; + i_head = NULL; + + if (getifaddrs(&i_head)) { + D("cannot get if addresses"); + return -1; + } + for (i = i_head; i; i = i->ifa_next) { + //struct ifaddrs *ifa; + struct pcap_addr *pca; + //struct sockaddr *sa; + + D("got interface %s", i->ifa_name); + if (!top || strcmp(top->name, i->ifa_name)) { + /* new interface */ + l = sizeof(*top) + strlen(i->ifa_name) + 1; + cur = calloc(1, l); + if (cur == NULL) { + D("no space for if descriptor"); + continue; + } + cur->name = (char *)(cur + 1); + //cur->flags = i->ifa_flags; + strcpy(cur->name, i->ifa_name); + cur->description = NULL; + cur->next = top; + top = cur; + tail = NULL; + } + /* now deal with addresses */ + D("%s addr family %d len %d %s %s", + top->name, + i->ifa_addr->sa_family, i->ifa_addr->sa_len, + i->ifa_netmask ? "Netmask" : "", + i->ifa_broadaddr ? "Broadcast" : ""); + l = sizeof(struct pcap_addr) + + (i->ifa_addr ? i->ifa_addr->sa_len:0) + + (i->ifa_netmask ? i->ifa_netmask->sa_len:0) + + (i->ifa_broadaddr? i->ifa_broadaddr->sa_len:0); + pca = calloc(1, l); + if (pca == NULL) { + D("no space for if addr"); + continue; + } +#define SA_NEXT(x) ((struct sockaddr *)((char *)(x) + (x)->sa_len)) + pca->addr = (struct sockaddr *)(pca + 1); + bcopy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len); + if (i->ifa_netmask) { + pca->netmask = SA_NEXT(pca->addr); + bcopy(i->ifa_netmask, pca->netmask, i->ifa_netmask->sa_len); + if (i->ifa_broadaddr) { + pca->broadaddr = SA_NEXT(pca->netmask); + bcopy(i->ifa_broadaddr, pca->broadaddr, i->ifa_broadaddr->sa_len); + } + } + if (tail == NULL) { + top->addresses = pca; + } else { + tail->next = pca; + } + tail = pca; + + } + freeifaddrs(i_head); + *alldevsp = top; + return 0; +} + +void pcap_freealldevs(__unused pcap_if_t *alldevs) +{ + D("unimplemented"); +} + +char * +pcap_lookupdev(char *buf) +{ + D("%s", buf); + strcpy(buf, "/dev/netmap"); + return buf; +} + +pcap_t * +pcap_create(const char *source, char *errbuf) +{ + D("src %s (call open liveted)", source); + return pcap_open_live(source, 0, 1, 100, errbuf); +} + +int +pcap_activate(pcap_t *p) +{ + D("pcap %p running", p); + return 0; +} + +int +pcap_can_set_rfmon(__unused pcap_t *p) +{ + D(""); + return 0; /* no we can't */ +} + +int +pcap_set_snaplen(pcap_t *p, int snaplen) +{ + struct my_ring *me = p; + + D("len %d", snaplen); + me->snaplen = snaplen; + return 0; +} + +int +pcap_snapshot(pcap_t *p) +{ + struct my_ring *me = p; + + D("len %d", me->snaplen); + return me->snaplen; +} + +int +pcap_lookupnet(const char *device, uint32_t *netp, + uint32_t *maskp, __unused char *errbuf) +{ + + D("device %s", device); + inet_aton("10.0.0.255", (struct in_addr *)netp); + inet_aton("255.255.255.0",(struct in_addr *) maskp); + return 0; +} + +int +pcap_set_promisc(pcap_t *p, int promisc) +{ + struct my_ring *me = p; + + D("promisc %d", promisc); + if (do_ioctl(me, SIOCGIFFLAGS)) + D("SIOCGIFFLAGS failed"); + if (promisc) { + me->if_flags |= IFF_PPROMISC; + } else { + me->if_flags &= ~IFF_PPROMISC; + } + if (do_ioctl(me, SIOCSIFFLAGS)) + D("SIOCSIFFLAGS failed"); + return 0; +} + +int +pcap_set_timeout(pcap_t *p, int to_ms) +{ + struct my_ring *me = p; + + D("%d ms", to_ms); + me->to_ms = to_ms; + return 0; +} + +struct bpf_program; + +int +pcap_compile(__unused pcap_t *p, __unused struct bpf_program *fp, + const char *str, __unused int optimize, __unused uint32_t netmask) +{ + D("%s", str); + return 0; +} + +int +pcap_setfilter(__unused pcap_t *p, __unused struct bpf_program *fp) +{ + D(""); + return 0; +} + +int +pcap_datalink(__unused pcap_t *p) +{ + D(""); + return 1; // ethernet +} + +const char * +pcap_datalink_val_to_name(int dlt) +{ + D("%d", dlt); + return "DLT_EN10MB"; +} + +const char * +pcap_datalink_val_to_description(int dlt) +{ + D("%d", dlt); + return "Ethernet link"; +} + +struct pcap_stat; +int +pcap_stats(pcap_t *p, struct pcap_stat *ps) +{ + struct my_ring *me = p; + ND(""); + + me->st.ps_recv += 10; + *ps = me->st; + sprintf(me->msg, "stats not supported"); + return -1; +}; + +char * +pcap_geterr(pcap_t *p) +{ + struct my_ring *me = p; + + D(""); + return me->msg; +} + +pcap_t * +pcap_open_live(const char *device, __unused int snaplen, + int promisc, int to_ms, __unused char *errbuf) +{ + struct my_ring *me; + + D("request to open %s", device); + me = calloc(1, sizeof(*me)); + if (me == NULL) { + D("failed to allocate struct for %s", device); + return NULL; + } + strncpy(me->nmr.nr_name, device, sizeof(me->nmr.nr_name)); + if (netmap_open(me, 0)) { + D("error opening %s", device); + free(me); + return NULL; + } + me->to_ms = to_ms; + if (do_ioctl(me, SIOCGIFFLAGS)) + D("SIOCGIFFLAGS failed"); + if (promisc) { + me->if_flags |= IFF_PPROMISC; + if (do_ioctl(me, SIOCSIFFLAGS)) + D("SIOCSIFFLAGS failed"); + } + if (do_ioctl(me, SIOCGIFCAP)) + D("SIOCGIFCAP failed"); + me->if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE); + if (do_ioctl(me, SIOCSIFCAP)) + D("SIOCSIFCAP failed"); + + return (pcap_t *)me; +} + +void +pcap_close(pcap_t *p) +{ + struct my_ring *me = p; + + D(""); + if (!me) + return; + if (me->mem) + munmap(me->mem, me->memsize); + /* restore original flags ? */ + ioctl(me->fd, NIOCUNREGIF, NULL); + close(me->fd); + bzero(me, sizeof(*me)); + free(me); +} + +int +pcap_fileno(pcap_t *p) +{ + struct my_ring *me = p; + D("returns %d", me->fd); + return me->fd; +} + +int +pcap_get_selectable_fd(pcap_t *p) +{ + struct my_ring *me = p; + + ND(""); + return me->fd; +} + +int +pcap_setnonblock(__unused pcap_t *p, int nonblock, __unused char *errbuf) +{ + D("mode is %d", nonblock); + return 0; /* ignore */ +} + +int +pcap_setdirection(__unused pcap_t *p, __unused pcap_direction_t d) +{ + D(""); + return 0; /* ignore */ +}; + +int +pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user) +{ + struct my_ring *me = p; + int got = 0; + u_int si; + + ND("cnt %d", cnt); + /* scan all rings */ + for (si = me->begin; si < me->end; si++) { + struct netmap_ring *ring = NETMAP_RXRING(me->nifp, si); + ND("ring has %d pkts", ring->avail); + if (ring->avail == 0) + continue; + me->hdr.ts = ring->ts; + while ((cnt == -1 || cnt != got) && ring->avail > 0) { + u_int i = ring->cur; + u_int idx = ring->slot[i].buf_idx; + if (idx < 2) { + D("%s bogus RX index %d at offset %d", + me->nifp->ni_name, idx, i); + sleep(2); + } + u_char *buf = (u_char *)NETMAP_BUF(ring, idx); + me->hdr.len = me->hdr.caplen = ring->slot[i].len; + // D("call %p len %d", p, me->hdr.len); + callback(user, &me->hdr, buf); + ring->cur = NETMAP_RING_NEXT(ring, i); + ring->avail--; + got++; + } + } + return got; +} + +int +pcap_inject(pcap_t *p, const void *buf, size_t size) +{ + struct my_ring *me = p; + u_int si; + + ND("cnt %d", cnt); + /* scan all rings */ + for (si = me->begin; si < me->end; si++) { + struct netmap_ring *ring = NETMAP_TXRING(me->nifp, si); + + ND("ring has %d pkts", ring->avail); + if (ring->avail == 0) + continue; + u_int i = ring->cur; + u_int idx = ring->slot[i].buf_idx; + if (idx < 2) { + D("%s bogus TX index %d at offset %d", + me->nifp->ni_name, idx, i); + sleep(2); + } + u_char *dst = (u_char *)NETMAP_BUF(ring, idx); + ring->slot[i].len = size; + bcopy(buf, dst, size); + ring->cur = NETMAP_RING_NEXT(ring, i); + ring->avail--; + // if (ring->avail == 0) ioctl(me->fd, NIOCTXSYNC, NULL); + return size; + } + errno = ENOBUFS; + return -1; +} + +int +pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user) +{ + struct my_ring *me = p; + struct pollfd fds[1]; + int i; + + ND("cnt %d", cnt); + memset(fds, 0, sizeof(fds)); + fds[0].fd = me->fd; + fds[0].events = (POLLIN); + + while (cnt == -1 || cnt > 0) { + if (poll(fds, 1, me->to_ms) <= 0) { + D("poll error/timeout"); + continue; + } + i = pcap_dispatch(p, cnt, callback, user); + if (cnt > 0) + cnt -= i; + } + return 0; +} + +#endif /* __PIC__ */ + +#ifndef __PIC__ +void do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf) +{ + pcap_inject((pcap_t *)user, buf, h->caplen); +} + +/* + * a simple pcap test program, bridge between two interfaces. + */ +int +main(int argc, char **argv) +{ + pcap_t *p0, *p1; + int burst = 1024; + struct pollfd pollfd[2]; + + fprintf(stderr, "%s %s built %s %s\n", + argv[0], version, __DATE__, __TIME__); + + while (argc > 1 && !strcmp(argv[1], "-v")) { + verbose++; + argv++; + argc--; + } + + if (argc < 3 || argc > 4 || !strcmp(argv[1], argv[2])) { + D("Usage: %s IFNAME1 IFNAME2 [BURST]", argv[0]); + return (1); + } + if (argc > 3) + burst = atoi(argv[3]); + + p0 = pcap_open_live(argv[1], 0, 1, 100, NULL); + p1 = pcap_open_live(argv[2], 0, 1, 100, NULL); + D("%s", version); + D("open returns %p %p", p0, p1); + if (!p0 || !p1) + return(1); + bzero(pollfd, sizeof(pollfd)); + pollfd[0].fd = pcap_fileno(p0); + pollfd[1].fd = pcap_fileno(p1); + pollfd[0].events = pollfd[1].events = POLLIN; + for (;;) { + /* do i need to reset ? */ + pollfd[0].revents = pollfd[1].revents = 0; + int ret = poll(pollfd, 2, 1000); + if (ret <= 0 || verbose) + D("poll %s [0] ev %x %x [1] ev %x %x", + ret <= 0 ? "timeout" : "ok", + pollfd[0].events, + pollfd[0].revents, + pollfd[1].events, + pollfd[1].revents); + if (ret < 0) + continue; + if (pollfd[0].revents & POLLIN) + pcap_dispatch(p0, burst, do_send, p1); + if (pollfd[1].revents & POLLIN) + pcap_dispatch(p1, burst, do_send, p0); + } + + return (0); +} +#endif /* !__PIC__ */ diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c new file mode 100644 index 000000000000..747bd9dde00b --- /dev/null +++ b/tools/tools/netmap/pkt-gen.c @@ -0,0 +1,1021 @@ +/* + * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $FreeBSD$ + * $Id: pkt-gen.c 9638 2011-11-07 18:07:43Z luigi $ + * + * Example program to show how to build a multithreaded packet + * source/sink using the netmap device. + * + * In this example we create a programmable number of threads + * to take care of all the queues of the interface used to + * send or receive traffic. + * + */ + +const char *default_payload="netmap pkt-gen Luigi Rizzo and Matteo Landi\n" + "http://info.iet.unipi.it/~luigi/netmap/ "; + +#include <errno.h> +#include <pthread.h> /* pthread_* */ +#include <pthread_np.h> /* pthread w/ affinity */ +#include <signal.h> /* signal */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> /* strcmp */ +#include <fcntl.h> /* open */ +#include <unistd.h> /* close */ +#include <ifaddrs.h> /* getifaddrs */ + +#include <sys/mman.h> /* PROT_* */ +#include <sys/ioctl.h> /* ioctl */ +#include <sys/poll.h> +#include <sys/socket.h> /* sockaddr.. */ +#include <arpa/inet.h> /* ntohs */ +#include <sys/param.h> +#include <sys/cpuset.h> /* cpu_set */ +#include <sys/sysctl.h> /* sysctl */ +#include <sys/time.h> /* timersub */ + +#include <net/ethernet.h> +#include <net/if.h> /* ifreq */ +#include <net/if_dl.h> /* LLADDR */ + +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> + +#include <net/netmap.h> +#include <net/netmap_user.h> +#include <pcap/pcap.h> + + +static inline int min(int a, int b) { return a < b ? a : b; } + +/* debug support */ +#define D(format, ...) \ + fprintf(stderr, "%s [%d] " format "\n", \ + __FUNCTION__, __LINE__, ##__VA_ARGS__) + +#ifndef EXPERIMENTAL +#define EXPERIMENTAL 0 +#endif + +int verbose = 0; +#define MAX_QUEUES 64 /* no need to limit */ + +#define SKIP_PAYLOAD 1 /* do not check payload. */ + +#if EXPERIMENTAL +/* Wrapper around `rdtsc' to take reliable timestamps flushing the pipeline */ +#define netmap_rdtsc(t) \ + do { \ + u_int __regs[4]; \ + \ + do_cpuid(0, __regs); \ + (t) = rdtsc(); \ + } while (0) + +static __inline void +do_cpuid(u_int ax, u_int *p) +{ + __asm __volatile("cpuid" + : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax)); +} + +static __inline uint64_t +rdtsc(void) +{ + uint64_t rv; + + __asm __volatile("rdtsc" : "=A" (rv)); + return (rv); +} +#define MAX_SAMPLES 100000 +#endif /* EXPERIMENTAL */ + + +struct pkt { + struct ether_header eh; + struct ip ip; + struct udphdr udp; + uint8_t body[NETMAP_BUF_SIZE]; +} __attribute__((__packed__)); + +/* + * global arguments for all threads + */ +struct glob_arg { + const char *src_ip; + const char *dst_ip; + const char *src_mac; + const char *dst_mac; + int pkt_size; + int burst; + int npackets; /* total packets to send */ + int nthreads; + int cpus; + int use_pcap; + pcap_t *p; +}; + +struct mystat { + uint64_t containers[8]; +}; + +/* + * Arguments for a new thread. The same structure is used by + * the source and the sink + */ +struct targ { + struct glob_arg *g; + int used; + int completed; + int fd; + struct nmreq nmr; + struct netmap_if *nifp; + uint16_t qfirst, qlast; /* range of queues to scan */ + uint64_t count; + struct timeval tic, toc; + int me; + pthread_t thread; + int affinity; + + uint8_t dst_mac[6]; + uint8_t src_mac[6]; + u_int dst_mac_range; + u_int src_mac_range; + uint32_t dst_ip; + uint32_t src_ip; + u_int dst_ip_range; + u_int src_ip_range; + + struct pkt pkt; +}; + + +static struct targ *targs; +static int global_nthreads; + +/* control-C handler */ +static void +sigint_h(__unused int sig) +{ + for (int i = 0; i < global_nthreads; i++) { + /* cancel active threads. */ + if (targs[i].used == 0) + continue; + + D("Cancelling thread #%d\n", i); + pthread_cancel(targs[i].thread); + targs[i].used = 0; + } + + signal(SIGINT, SIG_DFL); +} + + +/* sysctl wrapper to return the number of active CPUs */ +static int +system_ncpus(void) +{ + int mib[2], ncpus; + size_t len; + + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + len = sizeof(mib); + sysctl(mib, 2, &ncpus, &len, NULL, 0); + + return (ncpus); +} + +/* + * locate the src mac address for our interface, put it + * into the user-supplied buffer. return 0 if ok, -1 on error. + */ +static int +source_hwaddr(const char *ifname, char *buf) +{ + struct ifaddrs *ifaphead, *ifap; + int l = sizeof(ifap->ifa_name); + + if (getifaddrs(&ifaphead) != 0) { + D("getifaddrs %s failed", ifname); + return (-1); + } + + for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { + struct sockaddr_dl *sdl = + (struct sockaddr_dl *)ifap->ifa_addr; + uint8_t *mac; + + if (!sdl || sdl->sdl_family != AF_LINK) + continue; + if (strncmp(ifap->ifa_name, ifname, l) != 0) + continue; + mac = (uint8_t *)LLADDR(sdl); + sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", + mac[0], mac[1], mac[2], + mac[3], mac[4], mac[5]); + if (verbose) + D("source hwaddr %s", buf); + break; + } + freeifaddrs(ifaphead); + return ifap ? 0 : 1; +} + + +/* set the thread affinity. */ +static int +setaffinity(pthread_t me, int i) +{ + cpuset_t cpumask; + + if (i == -1) + return 0; + + /* Set thread affinity affinity.*/ + CPU_ZERO(&cpumask); + CPU_SET(i, &cpumask); + + if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { + D("Unable to set affinity"); + return 1; + } + return 0; +} + +/* Compute the checksum of the given ip header. */ +static uint16_t +checksum(const void *data, uint16_t len) +{ + const uint8_t *addr = data; + uint32_t sum = 0; + + while (len > 1) { + sum += addr[0] * 256 + addr[1]; + addr += 2; + len -= 2; + } + + if (len == 1) + sum += *addr * 256; + + sum = (sum >> 16) + (sum & 0xffff); + sum += (sum >> 16); + + sum = htons(sum); + + return ~sum; +} + +/* + * Fill a packet with some payload. + */ +static void +initialize_packet(struct targ *targ) +{ + struct pkt *pkt = &targ->pkt; + struct ether_header *eh; + struct ip *ip; + struct udphdr *udp; + uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(*ip); + int i, l, l0 = strlen(default_payload); + char *p; + + for (i = 0; i < paylen;) { + l = min(l0, paylen - i); + bcopy(default_payload, pkt->body + i, l); + i += l; + } + pkt->body[i-1] = '\0'; + + udp = &pkt->udp; + udp->uh_sport = htons(1234); + udp->uh_dport = htons(4321); + udp->uh_ulen = htons(paylen); + udp->uh_sum = 0; // checksum(udp, sizeof(*udp)); + + ip = &pkt->ip; + ip->ip_v = IPVERSION; + ip->ip_hl = 5; + ip->ip_id = 0; + ip->ip_tos = IPTOS_LOWDELAY; + ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh)); + ip->ip_id = 0; + ip->ip_off = htons(IP_DF); /* Don't fragment */ + ip->ip_ttl = IPDEFTTL; + ip->ip_p = IPPROTO_UDP; + inet_aton(targ->g->src_ip, (struct in_addr *)&ip->ip_src); + inet_aton(targ->g->dst_ip, (struct in_addr *)&ip->ip_dst); + targ->dst_ip = ip->ip_dst.s_addr; + targ->src_ip = ip->ip_src.s_addr; + p = index(targ->g->src_ip, '-'); + if (p) { + targ->dst_ip_range = atoi(p+1); + D("dst-ip sweep %d addresses", targ->dst_ip_range); + } + ip->ip_sum = checksum(ip, sizeof(*ip)); + + eh = &pkt->eh; + bcopy(ether_aton(targ->g->src_mac), targ->src_mac, 6); + bcopy(targ->src_mac, eh->ether_shost, 6); + p = index(targ->g->src_mac, '-'); + if (p) + targ->src_mac_range = atoi(p+1); + + bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); + bcopy(targ->dst_mac, eh->ether_dhost, 6); + p = index(targ->g->dst_mac, '-'); + if (p) + targ->dst_mac_range = atoi(p+1); + eh->ether_type = htons(ETHERTYPE_IP); +} + +/* Check the payload of the packet for errors (use it for debug). + * Look for consecutive ascii representations of the size of the packet. + */ +static void +check_payload(char *p, int psize) +{ + char temp[64]; + int n_read, size, sizelen; + + /* get the length in ASCII of the length of the packet. */ + sizelen = sprintf(temp, "%d", psize) + 1; // include a whitespace + + /* dummy payload. */ + p += 14; /* skip packet header. */ + n_read = 14; + while (psize - n_read >= sizelen) { + sscanf(p, "%d", &size); + if (size != psize) { + D("Read %d instead of %d", size, psize); + break; + } + + p += sizelen; + n_read += sizelen; + } +} + + +/* + * create and enqueue a batch of packets on a ring. + * On the last one set NS_REPORT to tell the driver to generate + * an interrupt when done. + */ +static int +send_packets(struct netmap_ring *ring, struct pkt *pkt, + int size, u_int count, int fill_all) +{ + u_int sent, cur = ring->cur; + + if (ring->avail < count) + count = ring->avail; + + for (sent = 0; sent < count; sent++) { + struct netmap_slot *slot = &ring->slot[cur]; + char *p = NETMAP_BUF(ring, slot->buf_idx); + + if (fill_all) + memcpy(p, pkt, size); + + slot->len = size; + if (sent == count - 1) + slot->flags |= NS_REPORT; + cur = NETMAP_RING_NEXT(ring, cur); + } + ring->avail -= sent; + ring->cur = cur; + + return (sent); +} + +static void * +sender_body(void *data) +{ + struct targ *targ = (struct targ *) data; + + struct pollfd fds[1]; + struct netmap_if *nifp = targ->nifp; + struct netmap_ring *txring; + int i, n = targ->g->npackets / targ->g->nthreads, sent = 0; + int fill_all = 1; + + if (setaffinity(targ->thread, targ->affinity)) + goto quit; + /* setup poll(2) machanism. */ + memset(fds, 0, sizeof(fds)); + fds[0].fd = targ->fd; + fds[0].events = (POLLOUT); + + /* main loop.*/ + gettimeofday(&targ->tic, NULL); + if (targ->g->use_pcap) { + int size = targ->g->pkt_size; + void *pkt = &targ->pkt; + pcap_t *p = targ->g->p; + + for (; sent < n; sent++) { + if (pcap_inject(p, pkt, size) == -1) + break; + } + } else { + while (sent < n) { + + /* + * wait for available room in the send queue(s) + */ + if (poll(fds, 1, 2000) <= 0) { + D("poll error/timeout on queue %d\n", targ->me); + goto quit; + } + /* + * scan our queues and send on those with room + */ + if (sent > 100000) + fill_all = 0; + for (i = targ->qfirst; i < targ->qlast; i++) { + int m, limit = MIN(n - sent, targ->g->burst); + + txring = NETMAP_TXRING(nifp, i); + if (txring->avail == 0) + continue; + m = send_packets(txring, &targ->pkt, targ->g->pkt_size, + limit, fill_all); + sent += m; + targ->count = sent; + } + } + /* Tell the interface that we have new packets. */ + ioctl(fds[0].fd, NIOCTXSYNC, NULL); + + /* final part: wait all the TX queues to be empty. */ + for (i = targ->qfirst; i < targ->qlast; i++) { + txring = NETMAP_TXRING(nifp, i); + while (!NETMAP_TX_RING_EMPTY(txring)) { + ioctl(fds[0].fd, NIOCTXSYNC, NULL); + usleep(1); /* wait 1 tick */ + } + } + } + + gettimeofday(&targ->toc, NULL); + targ->completed = 1; + targ->count = sent; + +quit: + /* reset the ``used`` flag. */ + targ->used = 0; + + return (NULL); +} + + +static void +receive_pcap(u_char *user, __unused const struct pcap_pkthdr * h, + __unused const u_char * bytes) +{ + int *count = (int *)user; + (*count)++; +} + +static int +receive_packets(struct netmap_ring *ring, u_int limit, int skip_payload) +{ + u_int cur, rx; + + cur = ring->cur; + if (ring->avail < limit) + limit = ring->avail; + for (rx = 0; rx < limit; rx++) { + struct netmap_slot *slot = &ring->slot[cur]; + char *p = NETMAP_BUF(ring, slot->buf_idx); + + if (!skip_payload) + check_payload(p, slot->len); + + cur = NETMAP_RING_NEXT(ring, cur); + } + ring->avail -= rx; + ring->cur = cur; + + return (rx); +} + +static void * +receiver_body(void *data) +{ + struct targ *targ = (struct targ *) data; + struct pollfd fds[1]; + struct netmap_if *nifp = targ->nifp; + struct netmap_ring *rxring; + int i, received = 0; + + if (setaffinity(targ->thread, targ->affinity)) + goto quit; + + /* setup poll(2) machanism. */ + memset(fds, 0, sizeof(fds)); + fds[0].fd = targ->fd; + fds[0].events = (POLLIN); + + /* unbounded wait for the first packet. */ + for (;;) { + i = poll(fds, 1, 1000); + if (i > 0 && !(fds[0].revents & POLLERR)) + break; + D("waiting for initial packets, poll returns %d %d", i, fds[0].revents); + } + + /* main loop, exit after 1s silence */ + gettimeofday(&targ->tic, NULL); + if (targ->g->use_pcap) { + for (;;) { + pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL); + } + } else { + while (1) { + /* Once we started to receive packets, wait at most 1 seconds + before quitting. */ + if (poll(fds, 1, 1 * 1000) <= 0) { + gettimeofday(&targ->toc, NULL); + targ->toc.tv_sec -= 1; /* Substract timeout time. */ + break; + } + + for (i = targ->qfirst; i < targ->qlast; i++) { + int m; + + rxring = NETMAP_RXRING(nifp, i); + if (rxring->avail == 0) + continue; + + m = receive_packets(rxring, targ->g->burst, + SKIP_PAYLOAD); + received += m; + targ->count = received; + } + + // tell the card we have read the data + //ioctl(fds[0].fd, NIOCRXSYNC, NULL); + } + } + + targ->completed = 1; + targ->count = received; + +quit: + /* reset the ``used`` flag. */ + targ->used = 0; + + return (NULL); +} + +static void +tx_output(uint64_t sent, int size, double delta) +{ + double amount = 8.0 * (1.0 * size * sent) / delta; + double pps = sent / delta; + char units[4] = { '\0', 'K', 'M', 'G' }; + int aunit = 0, punit = 0; + + while (amount >= 1000) { + amount /= 1000; + aunit += 1; + } + while (pps >= 1000) { + pps /= 1000; + punit += 1; + } + + printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", + sent, size, delta); + printf("Speed: %.2f%cpps. Bandwidth: %.2f%cbps.\n", + pps, units[punit], amount, units[aunit]); +} + + +static void +rx_output(uint64_t received, double delta) +{ + + double pps = received / delta; + char units[4] = { '\0', 'K', 'M', 'G' }; + int punit = 0; + + while (pps >= 1000) { + pps /= 1000; + punit += 1; + } + + printf("Received %llu packets, in %.2f seconds.\n", received, delta); + printf("Speed: %.2f%cpps.\n", pps, units[punit]); +} + +static void +usage(void) +{ + const char *cmd = "pkt-gen"; + fprintf(stderr, + "Usage:\n" + "%s arguments\n" + "\t-i interface interface name\n" + "\t-t pkts_to_send also forces send mode\n" + "\t-r pkts_to_receive also forces receive mode\n" + "\t-l pkts_size in bytes excluding CRC\n" + "\t-d dst-ip end with %%n to sweep n addresses\n" + "\t-s src-ip end with %%n to sweep n addresses\n" + "\t-D dst-mac end with %%n to sweep n addresses\n" + "\t-S src-mac end with %%n to sweep n addresses\n" + "\t-b burst size testing, mostly\n" + "\t-c cores cores to use\n" + "\t-p threads processes/threads to use\n" + "\t-T report_ms milliseconds between reports\n" + "\t-w wait_for_link_time in seconds\n" + "", + cmd); + + exit(0); +} + + +int +main(int arc, char **argv) +{ + int i, fd; + + struct glob_arg g; + + struct nmreq nmr; + void *mmap_addr; /* the mmap address */ + void *(*td_body)(void *) = receiver_body; + int ch; + int report_interval = 1000; /* report interval */ + char *ifname = NULL; + int wait_link = 2; + int devqueues = 1; /* how many device queues */ + + bzero(&g, sizeof(g)); + + g.src_ip = "10.0.0.1"; + g.dst_ip = "10.1.0.1"; + g.dst_mac = "ff:ff:ff:ff:ff:ff"; + g.src_mac = NULL; + g.pkt_size = 60; + g.burst = 512; // default + g.nthreads = 1; + g.cpus = 1; + + while ( (ch = getopt(arc, argv, + "i:t:r:l:d:s:D:S:b:c:p:T:w:v")) != -1) { + switch(ch) { + default: + D("bad option %c %s", ch, optarg); + usage(); + break; + case 'i': /* interface */ + ifname = optarg; + break; + case 't': /* send */ + td_body = sender_body; + g.npackets = atoi(optarg); + break; + case 'r': /* receive */ + td_body = receiver_body; + g.npackets = atoi(optarg); + break; + case 'l': /* pkt_size */ + g.pkt_size = atoi(optarg); + break; + case 'd': + g.dst_ip = optarg; + break; + case 's': + g.src_ip = optarg; + break; + case 'T': /* report interval */ + report_interval = atoi(optarg); + break; + case 'w': + wait_link = atoi(optarg); + break; + case 'b': /* burst */ + g.burst = atoi(optarg); + break; + case 'c': + g.cpus = atoi(optarg); + break; + case 'p': + g.nthreads = atoi(optarg); + break; + + case 'P': + g.use_pcap = 1; + break; + + case 'D': /* destination mac */ + g.dst_mac = optarg; + { + struct ether_addr *mac = ether_aton(g.dst_mac); + D("ether_aton(%s) gives %p", g.dst_mac, mac); + } + break; + case 'S': /* source mac */ + g.src_mac = optarg; + break; + case 'v': + verbose++; + } + } + + if (ifname == NULL) { + D("missing ifname"); + usage(); + } + { + int n = system_ncpus(); + if (g.cpus < 0 || g.cpus > n) { + D("%d cpus is too high, have only %d cpus", g.cpus, n); + usage(); + } + if (g.cpus == 0) + g.cpus = n; + } + if (g.pkt_size < 16 || g.pkt_size > 1536) { + D("bad pktsize %d\n", g.pkt_size); + usage(); + } + + bzero(&nmr, sizeof(nmr)); + /* + * Open the netmap device to fetch the number of queues of our + * interface. + * + * The first NIOCREGIF also detaches the card from the + * protocol stack and may cause a reset of the card, + * which in turn may take some time for the PHY to + * reconfigure. + */ + fd = open("/dev/netmap", O_RDWR); + if (fd == -1) { + D("Unable to open /dev/netmap"); + // fail later + } else { + if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) { + D("Unable to get if info without name"); + } else { + D("map size is %d Kb", nmr.nr_memsize >> 10); + } + bzero(&nmr, sizeof(nmr)); + strncpy(nmr.nr_name, ifname, sizeof(nmr.nr_name)); + if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) { + D("Unable to get if info for %s", ifname); + } + devqueues = nmr.nr_numrings; + } + + /* validate provided nthreads. */ + if (g.nthreads < 1 || g.nthreads > devqueues) { + D("bad nthreads %d, have %d queues", g.nthreads, devqueues); + // continue, fail later + } + + if (td_body == sender_body && g.src_mac == NULL) { + static char mybuf[20] = "ff:ff:ff:ff:ff:ff"; + /* retrieve source mac address. */ + if (source_hwaddr(ifname, mybuf) == -1) { + D("Unable to retrieve source mac"); + // continue, fail later + } + g.src_mac = mybuf; + } + + /* + * Map the netmap shared memory: instead of issuing mmap() + * inside the body of the threads, we prefer to keep this + * operation here to simplify the thread logic. + */ + D("mmapping %d Kbytes", nmr.nr_memsize>>10); + mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize, + PROT_WRITE | PROT_READ, + MAP_SHARED, fd, 0); + if (mmap_addr == MAP_FAILED) { + D("Unable to mmap %d KB", nmr.nr_memsize >> 10); + // continue, fail later + } + + /* + * Register the interface on the netmap device: from now on, + * we can operate on the network interface without any + * interference from the legacy network stack. + * + * We decide to put the first interface registration here to + * give time to cards that take a long time to reset the PHY. + */ + if (ioctl(fd, NIOCREGIF, &nmr) == -1) { + D("Unable to register interface %s", ifname); + //continue, fail later + } + + + /* Print some debug information. */ + fprintf(stdout, + "%s %s: %d queues, %d threads and %d cpus.\n", + (td_body == sender_body) ? "Sending on" : "Receiving from", + ifname, + devqueues, + g.nthreads, + g.cpus); + if (td_body == sender_body) { + fprintf(stdout, "%s -> %s (%s -> %s)\n", + g.src_ip, g.dst_ip, + g.src_mac, g.dst_mac); + } + + /* Exit if something went wrong. */ + if (fd < 0) { + D("aborting"); + usage(); + } + + + /* Wait for PHY reset. */ + D("Wait %d secs for phy reset", wait_link); + sleep(wait_link); + D("Ready..."); + + /* Install ^C handler. */ + global_nthreads = g.nthreads; + signal(SIGINT, sigint_h); + + if (g.use_pcap) { + // XXX g.p = pcap_open_live(..); + } + + targs = calloc(g.nthreads, sizeof(*targs)); + /* + * Now create the desired number of threads, each one + * using a single descriptor. + */ + for (i = 0; i < g.nthreads; i++) { + struct netmap_if *tnifp; + struct nmreq tifreq; + int tfd; + + if (g.use_pcap) { + tfd = -1; + tnifp = NULL; + } else { + /* register interface. */ + tfd = open("/dev/netmap", O_RDWR); + if (tfd == -1) { + D("Unable to open /dev/netmap"); + continue; + } + + bzero(&tifreq, sizeof(tifreq)); + strncpy(tifreq.nr_name, ifname, sizeof(tifreq.nr_name)); + tifreq.nr_ringid = (g.nthreads > 1) ? (i | NETMAP_HW_RING) : 0; + + /* + * if we are acting as a receiver only, do not touch the transmit ring. + * This is not the default because many apps may use the interface + * in both directions, but a pure receiver does not. + */ + if (td_body == receiver_body) { + tifreq.nr_ringid |= NETMAP_NO_TX_POLL; + } + + if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) { + D("Unable to register %s", ifname); + continue; + } + tnifp = NETMAP_IF(mmap_addr, tifreq.nr_offset); + } + /* start threads. */ + bzero(&targs[i], sizeof(targs[i])); + targs[i].g = &g; + targs[i].used = 1; + targs[i].completed = 0; + targs[i].fd = tfd; + targs[i].nmr = tifreq; + targs[i].nifp = tnifp; + targs[i].qfirst = (g.nthreads > 1) ? i : 0; + targs[i].qlast = (g.nthreads > 1) ? i+1 : tifreq.nr_numrings; + targs[i].me = i; + targs[i].affinity = g.cpus ? i % g.cpus : -1; + if (td_body == sender_body) { + /* initialize the packet to send. */ + initialize_packet(&targs[i]); + } + + if (pthread_create(&targs[i].thread, NULL, td_body, + &targs[i]) == -1) { + D("Unable to create thread %d", i); + targs[i].used = 0; + } + } + + { + uint64_t my_count = 0, prev = 0; + uint64_t count = 0; + double delta_t; + struct timeval tic, toc; + + gettimeofday(&toc, NULL); + for (;;) { + struct timeval now, delta; + uint64_t pps; + int done = 0; + + delta.tv_sec = report_interval/1000; + delta.tv_usec = (report_interval%1000)*1000; + select(0, NULL, NULL, NULL, &delta); + gettimeofday(&now, NULL); + timersub(&now, &toc, &toc); + my_count = 0; + for (i = 0; i < g.nthreads; i++) { + my_count += targs[i].count; + if (targs[i].used == 0) + done++; + } + pps = toc.tv_sec* 1000000 + toc.tv_usec; + if (pps < 10000) + continue; + pps = (my_count - prev)*1000000 / pps; + D("%llu pps", pps); + prev = my_count; + toc = now; + if (done == g.nthreads) + break; + } + + timerclear(&tic); + timerclear(&toc); + for (i = 0; i < g.nthreads; i++) { + /* + * Join active threads, unregister interfaces and close + * file descriptors. + */ + pthread_join(targs[i].thread, NULL); + ioctl(targs[i].fd, NIOCUNREGIF, &targs[i].nmr); + close(targs[i].fd); + + if (targs[i].completed == 0) + continue; + + /* + * Collect threads o1utput and extract information about + * how log it took to send all the packets. + */ + count += targs[i].count; + if (!timerisset(&tic) || timercmp(&targs[i].tic, &tic, <)) + tic = targs[i].tic; + if (!timerisset(&toc) || timercmp(&targs[i].toc, &toc, >)) + toc = targs[i].toc; + } + + /* print output. */ + timersub(&toc, &tic, &toc); + delta_t = toc.tv_sec + 1e-6* toc.tv_usec; + if (td_body == sender_body) + tx_output(count, g.pkt_size, delta_t); + else + rx_output(count, delta_t); + } + + ioctl(fd, NIOCUNREGIF, &nmr); + munmap(mmap_addr, nmr.nr_memsize); + close(fd); + + return (0); +} +/* end of file */ |