aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/conf/files4
-rw-r--r--sys/dev/netmap/netmap.c501
-rw-r--r--sys/dev/netmap/netmap_freebsd.c265
-rw-r--r--sys/dev/netmap/netmap_generic.c41
-rw-r--r--sys/dev/netmap/netmap_kern.h227
-rw-r--r--sys/dev/netmap/netmap_mem2.c382
-rw-r--r--sys/dev/netmap/netmap_mem2.h14
-rw-r--r--sys/dev/netmap/netmap_offloadings.c401
-rw-r--r--sys/dev/netmap/netmap_pipe.c711
-rw-r--r--sys/dev/netmap/netmap_vale.c287
-rw-r--r--sys/modules/netmap/Makefile2
-rw-r--r--sys/net/netmap.h163
-rw-r--r--sys/net/netmap_user.h354
-rw-r--r--tools/tools/netmap/Makefile21
-rw-r--r--tools/tools/netmap/README17
-rw-r--r--tools/tools/netmap/bridge.c114
-rw-r--r--tools/tools/netmap/click-test.cfg19
-rw-r--r--tools/tools/netmap/nm_util.c278
-rw-r--r--tools/tools/netmap/nm_util.h127
-rw-r--r--tools/tools/netmap/pcap.c528
-rw-r--r--tools/tools/netmap/pkt-gen.c450
-rw-r--r--tools/tools/netmap/vale-ctl.c1
22 files changed, 3082 insertions, 1825 deletions
diff --git a/sys/conf/files b/sys/conf/files
index 1f20111572fe..c61030225e84 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1875,8 +1875,10 @@ dev/ncv/ncr53c500_pccard.c optional ncv pccard
dev/netmap/netmap.c optional netmap
dev/netmap/netmap_freebsd.c optional netmap
dev/netmap/netmap_generic.c optional netmap
-dev/netmap/netmap_mbq.c optional netmap
+dev/netmap/netmap_mbq.c optional netmap
dev/netmap/netmap_mem2.c optional netmap
+dev/netmap/netmap_offloadings.c optional netmap
+dev/netmap/netmap_pipe.c optional netmap
dev/netmap/netmap_vale.c optional netmap
# compile-with "${NORMAL_C} -Wconversion -Wextra"
dev/nge/if_nge.c optional nge
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index fdd368a346fe..de88fb58fc8c 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -156,9 +156,11 @@ ports attached to the switch)
/* reduce conditional code */
-#define init_waitqueue_head(x) // only needed in linux
-
+// linux API, use for the knlist in FreeBSD
+#define init_waitqueue_head(x) knlist_init_mtx(&(x)->si_note, NULL)
+void freebsd_selwakeup(struct selinfo *si, int pri);
+#define OS_selwakeup(a, b) freebsd_selwakeup(a, b)
#elif defined(linux)
@@ -231,6 +233,7 @@ static int netmap_admode = NETMAP_ADMODE_BEST;
int netmap_generic_mit = 100*1000; /* Generic mitigation interval in nanoseconds. */
int netmap_generic_ringsize = 1024; /* Generic ringsize. */
+int netmap_generic_rings = 1; /* number of queues in generic. */
SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
@@ -238,6 +241,7 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0,
SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
NMG_LOCK_T netmap_global_lock;
@@ -270,28 +274,30 @@ netmap_set_all_rings(struct ifnet *ifp, int stopped)
{
struct netmap_adapter *na;
int i;
+ u_int ntx, nrx;
if (!(ifp->if_capenable & IFCAP_NETMAP))
return;
na = NA(ifp);
- for (i = 0; i <= na->num_tx_rings; i++) {
+ ntx = netmap_real_tx_rings(na);
+ nrx = netmap_real_rx_rings(na);
+
+ for (i = 0; i < ntx; i++) {
if (stopped)
netmap_disable_ring(na->tx_rings + i);
else
na->tx_rings[i].nkr_stopped = 0;
- na->nm_notify(na, i, NR_TX, NAF_DISABLE_NOTIFY |
- (i == na->num_tx_rings ? NAF_GLOBAL_NOTIFY: 0));
+ na->nm_notify(na, i, NR_TX, NAF_DISABLE_NOTIFY);
}
- for (i = 0; i <= na->num_rx_rings; i++) {
+ for (i = 0; i < nrx; i++) {
if (stopped)
netmap_disable_ring(na->rx_rings + i);
else
na->rx_rings[i].nkr_stopped = 0;
- na->nm_notify(na, i, NR_RX, NAF_DISABLE_NOTIFY |
- (i == na->num_rx_rings ? NAF_GLOBAL_NOTIFY: 0));
+ na->nm_notify(na, i, NR_RX, NAF_DISABLE_NOTIFY);
}
}
@@ -426,14 +432,73 @@ netmap_update_config(struct netmap_adapter *na)
return 1;
}
+static int
+netmap_txsync_compat(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ return na->nm_txsync(na, kring->ring_id, flags);
+}
+
+static int
+netmap_rxsync_compat(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ return na->nm_rxsync(na, kring->ring_id, flags);
+}
+
+static int
+netmap_txsync_to_host_compat(struct netmap_kring *kring, int flags)
+{
+ (void)flags;
+ netmap_txsync_to_host(kring->na);
+ return 0;
+}
+
+static int
+netmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags)
+{
+ (void)flags;
+ netmap_rxsync_from_host(kring->na, NULL, NULL);
+ return 0;
+}
+
+
+/* create the krings array and initialize the fields common to all adapters.
+ * The array layout is this:
+ *
+ * +----------+
+ * na->tx_rings ----->| | \
+ * | | } na->num_tx_ring
+ * | | /
+ * +----------+
+ * | | host tx kring
+ * na->rx_rings ----> +----------+
+ * | | \
+ * | | } na->num_rx_rings
+ * | | /
+ * +----------+
+ * | | host rx kring
+ * +----------+
+ * na->tailroom ----->| | \
+ * | | } tailroom bytes
+ * | | /
+ * +----------+
+ *
+ * Note: for compatibility, host krings are created even when not needed.
+ * The tailroom space is currently used by vale ports for allocating leases.
+ */
int
-netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom)
+netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
{
u_int i, len, ndesc;
struct netmap_kring *kring;
+ u_int ntx, nrx;
+
+ /* account for the (possibly fake) host rings */
+ ntx = na->num_tx_rings + 1;
+ nrx = na->num_rx_rings + 1;
- // XXX additional space for extra rings ?
len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom;
na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
@@ -454,12 +519,19 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail
kring->na = na;
kring->ring_id = i;
kring->nkr_num_slots = ndesc;
+ if (i < na->num_tx_rings) {
+ kring->nm_sync = netmap_txsync_compat; // XXX
+ } else if (i == na->num_tx_rings) {
+ kring->nm_sync = netmap_txsync_to_host_compat;
+ }
/*
* IMPORTANT: Always keep one slot empty.
*/
kring->rhead = kring->rcur = kring->nr_hwcur = 0;
kring->rtail = kring->nr_hwtail = ndesc - 1;
snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", NM_IFPNAME(na->ifp), i);
+ ND("ktx %s h %d c %d t %d",
+ kring->name, kring->rhead, kring->rcur, kring->rtail);
mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF);
init_waitqueue_head(&kring->si);
}
@@ -471,9 +543,16 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail
kring->na = na;
kring->ring_id = i;
kring->nkr_num_slots = ndesc;
+ if (i < na->num_rx_rings) {
+ kring->nm_sync = netmap_rxsync_compat; // XXX
+ } else if (i == na->num_rx_rings) {
+ kring->nm_sync = netmap_rxsync_from_host_compat;
+ }
kring->rhead = kring->rcur = kring->nr_hwcur = 0;
kring->rtail = kring->nr_hwtail = 0;
snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", NM_IFPNAME(na->ifp), i);
+ ND("krx %s h %d c %d t %d",
+ kring->name, kring->rhead, kring->rcur, kring->rtail);
mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF);
init_waitqueue_head(&kring->si);
}
@@ -486,17 +565,15 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail
}
-/* XXX check boundaries */
+/* undo the actions performed by netmap_krings_create */
void
netmap_krings_delete(struct netmap_adapter *na)
{
- int i;
+ struct netmap_kring *kring = na->tx_rings;
- for (i = 0; i < na->num_tx_rings + 1; i++) {
- mtx_destroy(&na->tx_rings[i].q_lock);
- }
- for (i = 0; i < na->num_rx_rings + 1; i++) {
- mtx_destroy(&na->rx_rings[i].q_lock);
+ /* we rely on the krings layout described above */
+ for ( ; kring != na->tailroom; kring++) {
+ mtx_destroy(&kring->q_lock);
}
free(na->tx_rings, M_DEVBUF);
na->tx_rings = na->rx_rings = na->tailroom = NULL;
@@ -677,6 +754,20 @@ netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
netmap_mem_if_delete(na, nifp);
}
+static __inline int
+nm_tx_si_user(struct netmap_priv_d *priv)
+{
+ return (priv->np_na != NULL &&
+ (priv->np_txqlast - priv->np_txqfirst > 1));
+}
+
+static __inline int
+nm_rx_si_user(struct netmap_priv_d *priv)
+{
+ return (priv->np_na != NULL &&
+ (priv->np_rxqlast - priv->np_rxqfirst > 1));
+}
+
/*
* returns 1 if this is the last instance and we can free priv
@@ -702,6 +793,10 @@ netmap_dtor_locked(struct netmap_priv_d *priv)
priv->np_nifp = NULL;
netmap_drop_memory_locked(priv);
if (priv->np_na) {
+ if (nm_tx_si_user(priv))
+ na->tx_si_users--;
+ if (nm_rx_si_user(priv))
+ na->rx_si_users--;
netmap_adapter_put(na);
priv->np_na = NULL;
}
@@ -864,22 +959,8 @@ netmap_txsync_to_host(struct netmap_adapter *na)
struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
struct netmap_ring *ring = kring->ring;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = nm_txsync_prologue(kring);
+ u_int const head = kring->rhead;
struct mbq q;
- int error;
-
- error = nm_kr_tryget(kring);
- if (error) {
- if (error == NM_KR_BUSY)
- D("ring %p busy (user error)", kring);
- return;
- }
- if (head > lim) {
- D("invalid ring index in stack TX kring %p", kring);
- netmap_ring_reinit(kring);
- nm_kr_put(kring);
- return;
- }
/* Take packets from hwcur to head and pass them up.
* force head = cur since netmap_grab_packets() stops at head
@@ -896,7 +977,6 @@ netmap_txsync_to_host(struct netmap_adapter *na)
kring->nr_hwtail -= lim + 1;
nm_txsync_finalize(kring);
- nm_kr_put(kring);
netmap_send_up(na->ifp, &q);
}
@@ -921,27 +1001,15 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
struct netmap_ring *ring = kring->ring;
u_int nm_i, n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = nm_rxsync_prologue(kring);
+ u_int const head = kring->rhead;
int ret = 0;
struct mbq *q = &kring->rx_queue;
(void)pwait; /* disable unused warnings */
-
- if (head > lim) {
- netmap_ring_reinit(kring);
- return EINVAL;
- }
-
- if (kring->nkr_stopped) /* check a first time without lock */
- return EBUSY;
+ (void)td;
mtx_lock(&q->lock);
- if (kring->nkr_stopped) { /* check again with lock held */
- ret = EBUSY;
- goto unlock_out;
- }
-
/* First part: import newly received packets */
n = mbq_len(q);
if (n) { /* grab packets from the queue */
@@ -982,8 +1050,6 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
if (kring->rcur == kring->rtail && td) /* no bufs available */
selrecord(td, &kring->si);
-unlock_out:
-
mtx_unlock(&q->lock);
return ret;
}
@@ -1107,19 +1173,26 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
int
netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
{
- struct ifnet *ifp;
+ struct ifnet *ifp = NULL;
int error = 0;
- struct netmap_adapter *ret;
+ struct netmap_adapter *ret = NULL;
*na = NULL; /* default return value */
/* first try to see if this is a bridge port. */
NMG_LOCK_ASSERT();
+ error = netmap_get_pipe_na(nmr, na, create);
+ if (error || *na != NULL)
+ return error;
+
error = netmap_get_bdg_na(nmr, na, create);
- if (error || *na != NULL) /* valid match in netmap_get_bdg_na() */
+ if (error)
return error;
+ if (*na != NULL) /* valid match in netmap_get_bdg_na() */
+ goto pipes;
+
ifp = ifunit_ref(nmr->nr_name);
if (ifp == NULL) {
return ENXIO;
@@ -1129,18 +1202,23 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
if (error)
goto out;
- if (ret != NULL) {
- /* Users cannot use the NIC attached to a bridge directly */
- if (NETMAP_OWNED_BY_KERN(ret)) {
- error = EBUSY;
- goto out;
- }
- error = 0;
- *na = ret;
- netmap_adapter_get(ret);
+ /* Users cannot use the NIC attached to a bridge directly */
+ if (NETMAP_OWNED_BY_KERN(ret)) {
+ error = EBUSY;
+ goto out;
}
+ *na = ret;
+ netmap_adapter_get(ret);
+
+pipes:
+ error = netmap_pipe_alloc(*na, nmr);
+
out:
- if_rele(ifp);
+ if (error && ret != NULL)
+ netmap_adapter_put(ret);
+
+ if (ifp)
+ if_rele(ifp);
return error;
}
@@ -1365,45 +1443,88 @@ netmap_ring_reinit(struct netmap_kring *kring)
* for all rings is the same as a single ring.
*/
static int
-netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
+netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
{
struct netmap_adapter *na = priv->np_na;
- struct ifnet *ifp = na->ifp;
- u_int i = ringid & NETMAP_RING_MASK;
- /* initially (np_qfirst == np_qlast) we don't want to lock */
- u_int lim = na->num_rx_rings;
-
- if (na->num_tx_rings > lim)
- lim = na->num_tx_rings;
- if ( (ringid & NETMAP_HW_RING) && i >= lim) {
- D("invalid ring id %d", i);
- return (EINVAL);
- }
- priv->np_ringid = ringid;
- if (ringid & NETMAP_SW_RING) {
- priv->np_qfirst = NETMAP_SW_RING;
- priv->np_qlast = 0;
- } else if (ringid & NETMAP_HW_RING) {
- priv->np_qfirst = i;
- priv->np_qlast = i + 1;
- } else {
- priv->np_qfirst = 0;
- priv->np_qlast = NETMAP_HW_RING ;
+ u_int j, i = ringid & NETMAP_RING_MASK;
+ u_int reg = flags & NR_REG_MASK;
+
+ if (reg == NR_REG_DEFAULT) {
+ /* convert from old ringid to flags */
+ if (ringid & NETMAP_SW_RING) {
+ reg = NR_REG_SW;
+ } else if (ringid & NETMAP_HW_RING) {
+ reg = NR_REG_ONE_NIC;
+ } else {
+ reg = NR_REG_ALL_NIC;
+ }
+ D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
+ }
+ switch (reg) {
+ case NR_REG_ALL_NIC:
+ case NR_REG_PIPE_MASTER:
+ case NR_REG_PIPE_SLAVE:
+ priv->np_txqfirst = 0;
+ priv->np_txqlast = na->num_tx_rings;
+ priv->np_rxqfirst = 0;
+ priv->np_rxqlast = na->num_rx_rings;
+ ND("%s %d %d", "ALL/PIPE",
+ priv->np_rxqfirst, priv->np_rxqlast);
+ break;
+ case NR_REG_SW:
+ case NR_REG_NIC_SW:
+ if (!(na->na_flags & NAF_HOST_RINGS)) {
+ D("host rings not supported");
+ return EINVAL;
+ }
+ priv->np_txqfirst = (reg == NR_REG_SW ?
+ na->num_tx_rings : 0);
+ priv->np_txqlast = na->num_tx_rings + 1;
+ priv->np_rxqfirst = (reg == NR_REG_SW ?
+ na->num_rx_rings : 0);
+ priv->np_rxqlast = na->num_rx_rings + 1;
+ ND("%s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
+ priv->np_rxqfirst, priv->np_rxqlast);
+ break;
+ case NR_REG_ONE_NIC:
+ if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
+ D("invalid ring id %d", i);
+ return EINVAL;
+ }
+ /* if not enough rings, use the first one */
+ j = i;
+ if (j >= na->num_tx_rings)
+ j = 0;
+ priv->np_txqfirst = j;
+ priv->np_txqlast = j + 1;
+ j = i;
+ if (j >= na->num_rx_rings)
+ j = 0;
+ priv->np_rxqfirst = j;
+ priv->np_rxqlast = j + 1;
+ break;
+ default:
+ D("invalid regif type %d", reg);
+ return EINVAL;
}
priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
- if (netmap_verbose) {
- if (ringid & NETMAP_SW_RING)
- D("ringid %s set to SW RING", NM_IFPNAME(ifp));
- else if (ringid & NETMAP_HW_RING)
- D("ringid %s set to HW RING %d", NM_IFPNAME(ifp),
- priv->np_qfirst);
- else
- D("ringid %s set to all %d HW RINGS", NM_IFPNAME(ifp), lim);
- }
+ priv->np_flags = (flags & ~NR_REG_MASK) | reg;
+ if (nm_tx_si_user(priv))
+ na->tx_si_users++;
+ if (nm_rx_si_user(priv))
+ na->rx_si_users++;
+ if (netmap_verbose) {
+ D("%s: tx [%d,%d) rx [%d,%d) id %d",
+ NM_IFPNAME(na->ifp),
+ priv->np_txqfirst,
+ priv->np_txqlast,
+ priv->np_rxqfirst,
+ priv->np_rxqlast,
+ i);
+ }
return 0;
}
-
/*
* possibly move the interface to netmap-mode.
* If success it returns a pointer to netmap_if, otherwise NULL.
@@ -1411,7 +1532,7 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
*/
struct netmap_if *
netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
- uint16_t ringid, int *err)
+ uint16_t ringid, uint32_t flags, int *err)
{
struct ifnet *ifp = na->ifp;
struct netmap_if *nifp = NULL;
@@ -1421,7 +1542,7 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
/* ring configuration may have changed, fetch from the card */
netmap_update_config(na);
priv->np_na = na; /* store the reference */
- error = netmap_set_ringid(priv, ringid);
+ error = netmap_set_ringid(priv, ringid, flags);
if (error)
goto out;
/* ensure allocators are ready */
@@ -1501,26 +1622,12 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
struct nmreq *nmr = (struct nmreq *) data;
struct netmap_adapter *na = NULL;
int error;
- u_int i, lim;
+ u_int i, qfirst, qlast;
struct netmap_if *nifp;
struct netmap_kring *krings;
(void)dev; /* UNUSED */
(void)fflag; /* UNUSED */
-#ifdef linux
-#define devfs_get_cdevpriv(pp) \
- ({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; \
- (*pp ? 0 : ENOENT); })
-
-/* devfs_set_cdevpriv cannot fail on linux */
-#define devfs_set_cdevpriv(p, fn) \
- ({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); })
-
-
-#define devfs_clear_cdevpriv() do { \
- netmap_dtor(priv); ((struct file *)td)->private_data = 0; \
- } while (0)
-#endif /* linux */
if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
/* truncate name */
@@ -1530,6 +1637,9 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
nmr->nr_name,
nmr->nr_version, NETMAP_API);
nmr->nr_version = NETMAP_API;
+ }
+ if (nmr->nr_version < NETMAP_MIN_API ||
+ nmr->nr_version > NETMAP_MAX_API) {
return EINVAL;
}
}
@@ -1564,7 +1674,8 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
nmd = na->nm_mem; /* get memory allocator */
}
- error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags);
+ error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
+ &nmr->nr_arg2);
if (error)
break;
if (na == NULL) /* only memory info */
@@ -1576,8 +1687,6 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
nmr->nr_tx_rings = na->num_tx_rings;
nmr->nr_rx_slots = na->num_rx_desc;
nmr->nr_tx_slots = na->num_tx_desc;
- if (memflags & NETMAP_MEM_PRIVATE)
- nmr->nr_ringid |= NETMAP_PRIV_MEM;
netmap_adapter_put(na);
} while (0);
NMG_UNLOCK();
@@ -1587,7 +1696,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
/* possibly attach/detach NIC and VALE switch */
i = nmr->nr_cmd;
if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
- || i == NETMAP_BDG_OFFSET) {
+ || i == NETMAP_BDG_VNET_HDR) {
error = netmap_bdg_ctl(nmr, NULL);
break;
} else if (i != 0) {
@@ -1602,7 +1711,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
u_int memflags;
if (priv->np_na != NULL) { /* thread already registered */
- error = netmap_set_ringid(priv, nmr->nr_ringid);
+ error = EBUSY;
break;
}
/* find the interface and a reference */
@@ -1615,27 +1724,39 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
error = EBUSY;
break;
}
- nifp = netmap_do_regif(priv, na, nmr->nr_ringid, &error);
+ nifp = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags, &error);
if (!nifp) { /* reg. failed, release priv and ref */
netmap_adapter_put(na);
priv->np_nifp = NULL;
break;
}
+ priv->np_td = td; // XXX kqueue, debugging only
/* return the offset of the netmap_if object */
nmr->nr_rx_rings = na->num_rx_rings;
nmr->nr_tx_rings = na->num_tx_rings;
nmr->nr_rx_slots = na->num_rx_desc;
nmr->nr_tx_slots = na->num_tx_desc;
- error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags);
+ error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags,
+ &nmr->nr_arg2);
if (error) {
netmap_adapter_put(na);
break;
}
if (memflags & NETMAP_MEM_PRIVATE) {
- nmr->nr_ringid |= NETMAP_PRIV_MEM;
*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
}
+ priv->np_txsi = (priv->np_txqlast - priv->np_txqfirst > 1) ?
+ &na->tx_si : &na->tx_rings[priv->np_txqfirst].si;
+ priv->np_rxsi = (priv->np_rxqlast - priv->np_rxqfirst > 1) ?
+ &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si;
+
+ if (nmr->nr_arg3) {
+ D("requested %d extra buffers", nmr->nr_arg3);
+ nmr->nr_arg3 = netmap_extra_alloc(na,
+ &nifp->ni_bufs_head, nmr->nr_arg3);
+ D("got %d extra buffers", nmr->nr_arg3);
+ }
nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
} while (0);
NMG_UNLOCK();
@@ -1666,21 +1787,17 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
break;
}
- if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
- if (cmd == NIOCTXSYNC)
- netmap_txsync_to_host(na);
- else
- netmap_rxsync_from_host(na, NULL, NULL);
- break;
+ if (cmd == NIOCTXSYNC) {
+ krings = na->tx_rings;
+ qfirst = priv->np_txqfirst;
+ qlast = priv->np_txqlast;
+ } else {
+ krings = na->rx_rings;
+ qfirst = priv->np_rxqfirst;
+ qlast = priv->np_rxqlast;
}
- /* find the last ring to scan */
- lim = priv->np_qlast;
- if (lim == NETMAP_HW_RING)
- lim = (cmd == NIOCTXSYNC) ?
- na->num_tx_rings : na->num_rx_rings;
-
- krings = (cmd == NIOCTXSYNC) ? na->tx_rings : na->rx_rings;
- for (i = priv->np_qfirst; i < lim; i++) {
+
+ for (i = qfirst; i < qlast; i++) {
struct netmap_kring *kring = krings + i;
if (nm_kr_tryget(kring)) {
error = EBUSY;
@@ -1694,14 +1811,14 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
netmap_ring_reinit(kring);
} else {
- na->nm_txsync(na, i, NAF_FORCE_RECLAIM);
+ kring->nm_sync(kring, NAF_FORCE_RECLAIM);
}
if (netmap_verbose & NM_VERB_TXSYNC)
D("post txsync ring %d cur %d hwcur %d",
i, kring->ring->cur,
kring->nr_hwcur);
} else {
- na->nm_rxsync(na, i, NAF_FORCE_READ);
+ kring->nm_sync(kring, NAF_FORCE_READ);
microtime(&na->rx_rings[i].ring->ts);
}
nm_kr_put(kring);
@@ -1772,9 +1889,9 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
struct ifnet *ifp;
struct netmap_kring *kring;
u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
- u_int lim_tx, lim_rx;
struct mbq q; /* packets from hw queues to host stack */
void *pwait = dev; /* linux compatibility */
+ int is_kevent = 0;
/*
* In order to avoid nested locks, we need to "double check"
@@ -1786,7 +1903,19 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
(void)pwait;
mbq_init(&q);
- if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
+ /*
+ * XXX kevent has curthread->tp_fop == NULL,
+ * so devfs_get_cdevpriv() fails. We circumvent this by passing
+ * priv as the first argument, which is also useful to avoid
+ * the selrecord() which are not necessary in that case.
+ */
+ if (devfs_get_cdevpriv((void **)&priv) != 0) {
+ is_kevent = 1;
+ if (netmap_verbose)
+ D("called from kevent");
+ priv = (struct netmap_priv_d *)dev;
+ }
+ if (priv == NULL)
return POLLERR;
if (priv->np_nifp == NULL) {
@@ -1811,28 +1940,6 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
want_tx = events & (POLLOUT | POLLWRNORM);
want_rx = events & (POLLIN | POLLRDNORM);
- lim_tx = na->num_tx_rings;
- lim_rx = na->num_rx_rings;
-
- if (priv->np_qfirst == NETMAP_SW_RING) {
- // XXX locking ?
- /* handle the host stack ring */
- if (priv->np_txpoll || want_tx) {
- /* push any packets up, then we are always ready */
- netmap_txsync_to_host(na);
- revents |= want_tx;
- }
- if (want_rx) {
- kring = &na->rx_rings[lim_rx];
- /* XXX replace with rxprologue etc. */
- if (nm_ring_empty(kring->ring))
- netmap_rxsync_from_host(na, td, dev);
- if (!nm_ring_empty(kring->ring))
- revents |= want_rx;
- }
- return (revents);
- }
-
/*
* check_all_{tx|rx} are set if the card has more than one queue AND
@@ -1847,19 +1954,15 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
* there are pending packets to send. The latter can be disabled
* passing NETMAP_NO_TX_POLL in the NIOCREG call.
*/
- check_all_tx = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1);
- check_all_rx = (priv->np_qlast == NETMAP_HW_RING) && (lim_rx > 1);
-
- if (priv->np_qlast != NETMAP_HW_RING) {
- lim_tx = lim_rx = priv->np_qlast;
- }
+ check_all_tx = nm_tx_si_user(priv);
+ check_all_rx = nm_rx_si_user(priv);
/*
* We start with a lock free round which is cheap if we have
* slots available. If this fails, then lock and call the sync
* routines.
*/
- for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
+ for (i = priv->np_rxqfirst; want_rx && i < priv->np_rxqlast; i++) {
kring = &na->rx_rings[i];
/* XXX compare ring->cur and kring->tail */
if (!nm_ring_empty(kring->ring)) {
@@ -1867,7 +1970,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
want_rx = 0; /* also breaks the loop */
}
}
- for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) {
+ for (i = priv->np_txqfirst; want_tx && i < priv->np_txqlast; i++) {
kring = &na->tx_rings[i];
/* XXX compare ring->cur and kring->tail */
if (!nm_ring_empty(kring->ring)) {
@@ -1891,7 +1994,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
* used to skip rings with no pending transmissions.
*/
flush_tx:
- for (i = priv->np_qfirst; i < lim_tx; i++) {
+ for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
int found = 0;
kring = &na->tx_rings[i];
@@ -1906,7 +2009,7 @@ flush_tx:
netmap_ring_reinit(kring);
revents |= POLLERR;
} else {
- if (na->nm_txsync(na, i, 0))
+ if (kring->nm_sync(kring, 0))
revents |= POLLERR;
}
@@ -1921,12 +2024,12 @@ flush_tx:
if (found) { /* notify other listeners */
revents |= want_tx;
want_tx = 0;
- na->nm_notify(na, i, NR_TX, NAF_GLOBAL_NOTIFY);
+ na->nm_notify(na, i, NR_TX, 0);
}
}
- if (want_tx && retry_tx) {
+ if (want_tx && retry_tx && !is_kevent) {
selrecord(td, check_all_tx ?
- &na->tx_si : &na->tx_rings[priv->np_qfirst].si);
+ &na->tx_si : &na->tx_rings[priv->np_txqfirst].si);
retry_tx = 0;
goto flush_tx;
}
@@ -1940,7 +2043,7 @@ flush_tx:
int send_down = 0; /* transparent mode */
/* two rounds here to for race avoidance */
do_retry_rx:
- for (i = priv->np_qfirst; i < lim_rx; i++) {
+ for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
int found = 0;
kring = &na->rx_rings[i];
@@ -1962,7 +2065,7 @@ do_retry_rx:
netmap_grab_packets(kring, &q, netmap_fwd);
}
- if (na->nm_rxsync(na, i, 0))
+ if (kring->nm_sync(kring, 0))
revents |= POLLERR;
if (netmap_no_timestamp == 0 ||
kring->ring->flags & NR_TIMESTAMP) {
@@ -1974,24 +2077,26 @@ do_retry_rx:
if (found) {
revents |= want_rx;
retry_rx = 0;
- na->nm_notify(na, i, NR_RX, NAF_GLOBAL_NOTIFY);
+ na->nm_notify(na, i, NR_RX, 0);
}
}
/* transparent mode XXX only during first pass ? */
- kring = &na->rx_rings[lim_rx];
- if (check_all_rx
- && (netmap_fwd || kring->ring->flags & NR_FORWARD)) {
- /* XXX fix to use kring fields */
- if (nm_ring_empty(kring->ring))
- send_down = netmap_rxsync_from_host(na, td, dev);
- if (!nm_ring_empty(kring->ring))
- revents |= want_rx;
+ if (na->na_flags & NAF_HOST_RINGS) {
+ kring = &na->rx_rings[na->num_rx_rings];
+ if (check_all_rx
+ && (netmap_fwd || kring->ring->flags & NR_FORWARD)) {
+ /* XXX fix to use kring fields */
+ if (nm_ring_empty(kring->ring))
+ send_down = netmap_rxsync_from_host(na, td, dev);
+ if (!nm_ring_empty(kring->ring))
+ revents |= want_rx;
+ }
}
- if (retry_rx)
+ if (retry_rx && !is_kevent)
selrecord(td, check_all_rx ?
- &na->rx_si : &na->rx_rings[priv->np_qfirst].si);
+ &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si);
if (send_down > 0 || retry_rx) {
retry_rx = 0;
if (send_down)
@@ -2032,14 +2137,14 @@ netmap_notify(struct netmap_adapter *na, u_int n_ring,
if (tx == NR_TX) {
kring = na->tx_rings + n_ring;
- selwakeuppri(&kring->si, PI_NET);
- if (flags & NAF_GLOBAL_NOTIFY)
- selwakeuppri(&na->tx_si, PI_NET);
+ OS_selwakeup(&kring->si, PI_NET);
+ if (na->tx_si_users > 0)
+ OS_selwakeup(&na->tx_si, PI_NET);
} else {
kring = na->rx_rings + n_ring;
- selwakeuppri(&kring->si, PI_NET);
- if (flags & NAF_GLOBAL_NOTIFY)
- selwakeuppri(&na->rx_si, PI_NET);
+ OS_selwakeup(&kring->si, PI_NET);
+ if (na->rx_si_users > 0)
+ OS_selwakeup(&na->rx_si, PI_NET);
}
return 0;
}
@@ -2090,6 +2195,7 @@ netmap_detach_common(struct netmap_adapter *na)
D("freeing leftover tx_rings");
na->nm_krings_delete(na);
}
+ netmap_pipe_dealloc(na);
if (na->na_flags & NAF_MEM_OWNER)
netmap_mem_private_delete(na->nm_mem);
bzero(na, sizeof(*na));
@@ -2120,6 +2226,7 @@ netmap_attach(struct netmap_adapter *arg)
if (hwna == NULL)
goto fail;
hwna->up = *arg;
+ hwna->up.na_flags |= NAF_HOST_RINGS;
if (netmap_attach_common(&hwna->up)) {
free(hwna, M_DEVBUF);
goto fail;
@@ -2177,12 +2284,10 @@ NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
return 1;
}
-
int
netmap_hw_krings_create(struct netmap_adapter *na)
{
- int ret = netmap_krings_create(na,
- na->num_tx_rings + 1, na->num_rx_rings + 1, 0);
+ int ret = netmap_krings_create(na, 0);
if (ret == 0) {
/* initialize the mbq for the sw rx ring */
mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
@@ -2370,7 +2475,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
* We do the wakeup here, but the ring is not yet reconfigured.
* However, we are under lock so there are no races.
*/
- na->nm_notify(na, n, tx, NAF_GLOBAL_NOTIFY);
+ na->nm_notify(na, n, tx, 0);
return kring->ring->slot;
}
@@ -2405,15 +2510,13 @@ netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
return; // not a physical queue
kring = na->rx_rings + q;
kring->nr_kflags |= NKR_PENDINTR; // XXX atomic ?
- na->nm_notify(na, q, NR_RX,
- (na->num_rx_rings > 1 ? NAF_GLOBAL_NOTIFY : 0));
+ na->nm_notify(na, q, NR_RX, 0);
*work_done = 1; /* do not fire napi again */
} else { /* TX path */
if (q >= na->num_tx_rings)
return; // not a physical queue
kring = na->tx_rings + q;
- na->nm_notify(na, q, NR_TX,
- (na->num_tx_rings > 1 ? NAF_GLOBAL_NOTIFY : 0));
+ na->nm_notify(na, q, NR_TX, 0);
}
}
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
index 6716168526dc..a8e287c6ddd8 100644
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -29,8 +29,10 @@
#include <sys/module.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/poll.h> /* POLLIN, POLLOUT */
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/conf.h> /* DEV_MODULE */
+#include <sys/endian.h>
#include <sys/rwlock.h>
@@ -49,6 +51,8 @@
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* */
+#include <netinet/in.h> /* in6_cksum_pseudo() */
+#include <machine/in_cksum.h> /* in_pseudo(), in_cksum_hdr() */
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
@@ -57,6 +61,73 @@
/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
+rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
+{
+ /* TODO XXX please use the FreeBSD implementation for this. */
+ uint16_t *words = (uint16_t *)data;
+ int nw = len / 2;
+ int i;
+
+ for (i = 0; i < nw; i++)
+ cur_sum += be16toh(words[i]);
+
+ if (len & 1)
+ cur_sum += (data[len-1] << 8);
+
+ return cur_sum;
+}
+
+/* Fold a raw checksum: 'cur_sum' is in host byte order, while the
+ * return value is in network byte order.
+ */
+uint16_t nm_csum_fold(rawsum_t cur_sum)
+{
+ /* TODO XXX please use the FreeBSD implementation for this. */
+ while (cur_sum >> 16)
+ cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16);
+
+ return htobe16((~cur_sum) & 0xFFFF);
+}
+
+uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
+{
+#if 0
+ return in_cksum_hdr((void *)iph);
+#else
+ return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
+#endif
+}
+
+void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
+ size_t datalen, uint16_t *check)
+{
+ uint16_t pseudolen = datalen + iph->protocol;
+
+ /* Compute and insert the pseudo-header cheksum. */
+ *check = in_pseudo(iph->saddr, iph->daddr,
+ htobe16(pseudolen));
+ /* Compute the checksum on TCP/UDP header + payload
+ * (includes the pseudo-header).
+ */
+ *check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
+}
+
+void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
+ size_t datalen, uint16_t *check)
+{
+#ifdef INET6
+ *check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
+ *check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
+#else
+ static int notsupported = 0;
+ if (!notsupported) {
+ notsupported = 1;
+ D("inet6 segmentation not supported");
+ }
+#endif
+}
+
+
/*
* Intercept the rx routine in the standard device driver.
* Second argument is non-zero to intercept, 0 to restore
@@ -91,10 +162,7 @@ netmap_catch_rx(struct netmap_adapter *na, int intercept)
* Intercept the packet steering routine in the tx path,
* so that we can decide which queue is used for an mbuf.
* Second argument is non-zero to intercept, 0 to restore.
- *
- * actually we also need to redirect the if_transmit ?
- *
- * XXX see if FreeBSD has such a mechanism
+ * On freebsd we just intercept if_transmit.
*/
void
netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
@@ -111,7 +179,8 @@ netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
}
-/* Transmit routine used by generic_netmap_txsync(). Returns 0 on success
+/*
+ * Transmit routine used by generic_netmap_txsync(). Returns 0 on success
* and non-zero on error (which may be packet drops or other errors).
* addr and len identify the netmap buffer, m is the (preallocated)
* mbuf to use for transmissions.
@@ -162,38 +231,39 @@ void
generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
{
D("called");
- *txq = 1;
- *rxq = 1;
+ *txq = netmap_generic_rings;
+ *rxq = netmap_generic_rings;
}
-void netmap_mitigation_init(struct netmap_generic_adapter *na)
+void netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *na)
{
ND("called");
- na->mit_pending = 0;
+ mit->mit_pending = 0;
+ mit->mit_na = na;
}
-void netmap_mitigation_start(struct netmap_generic_adapter *na)
+void netmap_mitigation_start(struct nm_generic_mit *mit)
{
ND("called");
}
-void netmap_mitigation_restart(struct netmap_generic_adapter *na)
+void netmap_mitigation_restart(struct nm_generic_mit *mit)
{
ND("called");
}
-int netmap_mitigation_active(struct netmap_generic_adapter *na)
+int netmap_mitigation_active(struct nm_generic_mit *mit)
{
ND("called");
return 0;
}
-void netmap_mitigation_cleanup(struct netmap_generic_adapter *na)
+void netmap_mitigation_cleanup(struct nm_generic_mit *mit)
{
ND("called");
}
@@ -216,8 +286,10 @@ netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
vm_ooffset_t foff, struct ucred *cred, u_short *color)
{
struct netmap_vm_handle_t *vmh = handle;
- D("handle %p size %jd prot %d foff %jd",
- handle, (intmax_t)size, prot, (intmax_t)foff);
+
+ if (netmap_verbose)
+ D("handle %p size %jd prot %d foff %jd",
+ handle, (intmax_t)size, prot, (intmax_t)foff);
dev_ref(vmh->dev);
return 0;
}
@@ -229,7 +301,9 @@ netmap_dev_pager_dtor(void *handle)
struct netmap_vm_handle_t *vmh = handle;
struct cdev *dev = vmh->dev;
struct netmap_priv_d *priv = vmh->priv;
- D("handle %p", handle);
+
+ if (netmap_verbose)
+ D("handle %p", handle);
netmap_dtor(priv);
free(vmh, M_DEVBUF);
dev_rel(dev);
@@ -302,8 +376,9 @@ netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
struct netmap_priv_d *priv;
vm_object_t obj;
- D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
- (intmax_t )*foff, (intmax_t )objsize, objp, prot);
+ if (netmap_verbose)
+ D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
+ (intmax_t )*foff, (intmax_t )objsize, objp, prot);
vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
M_NOWAIT | M_ZERO);
@@ -383,6 +458,157 @@ netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
return 0;
}
+/******************** kqueue support ****************/
+
+/*
+ * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED.
+ * We use a non-zero argument to distinguish the call from the one
+ * in kevent_scan() which instead also needs to run netmap_poll().
+ * The knote uses a global mutex for the time being. We might
+ * try to reuse the one in the si, but it is not allocated
+ * permanently so it might be a bit tricky.
+ *
+ * The *kqfilter function registers one or another f_event
+ * depending on read or write mode.
+ * In the call to f_event() td_fpop is NULL so any child function
+ * calling devfs_get_cdevpriv() would fail - and we need it in
+ * netmap_poll(). As a workaround we store priv into kn->kn_hook
+ * and pass it as first argument to netmap_poll(), which then
+ * uses the failure to tell that we are called from f_event()
+ * and do not need the selrecord().
+ */
+
+void freebsd_selwakeup(struct selinfo *si, int pri);
+
+void
+freebsd_selwakeup(struct selinfo *si, int pri)
+{
+ if (netmap_verbose)
+ D("on knote %p", &si->si_note);
+ selwakeuppri(si, pri);
+ /* use a non-zero hint to tell the notification from the
+ * call done in kqueue_scan() which uses 0
+ */
+ KNOTE_UNLOCKED(&si->si_note, 0x100 /* notification */);
+}
+
+static void
+netmap_knrdetach(struct knote *kn)
+{
+ struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
+ struct selinfo *si = priv->np_rxsi;
+
+ D("remove selinfo %p", si);
+ knlist_remove(&si->si_note, kn, 0);
+}
+
+static void
+netmap_knwdetach(struct knote *kn)
+{
+ struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
+ struct selinfo *si = priv->np_txsi;
+
+ D("remove selinfo %p", si);
+ knlist_remove(&si->si_note, kn, 0);
+}
+
+/*
+ * callback from notifies (generated externally) and our
+ * calls to kevent(). The former we just return 1 (ready)
+ * since we do not know better.
+ * In the latter we call netmap_poll and return 0/1 accordingly.
+ */
+static int
+netmap_knrw(struct knote *kn, long hint, int events)
+{
+ struct netmap_priv_d *priv;
+ int revents;
+
+ if (hint != 0) {
+ ND(5, "call from notify");
+ return 1; /* assume we are ready */
+ }
+ priv = kn->kn_hook;
+ /* the notification may come from an external thread,
+ * in which case we do not want to run the netmap_poll
+ * This should be filtered above, but check just in case.
+ */
+ if (curthread != priv->np_td) { /* should not happen */
+ RD(5, "curthread changed %p %p", curthread, priv->np_td);
+ return 1;
+ } else {
+ revents = netmap_poll((void *)priv, events, curthread);
+ return (events & revents) ? 1 : 0;
+ }
+}
+
+static int
+netmap_knread(struct knote *kn, long hint)
+{
+ return netmap_knrw(kn, hint, POLLIN);
+}
+
+static int
+netmap_knwrite(struct knote *kn, long hint)
+{
+ return netmap_knrw(kn, hint, POLLOUT);
+}
+
+static struct filterops netmap_rfiltops = {
+ .f_isfd = 1,
+ .f_detach = netmap_knrdetach,
+ .f_event = netmap_knread,
+};
+
+static struct filterops netmap_wfiltops = {
+ .f_isfd = 1,
+ .f_detach = netmap_knwdetach,
+ .f_event = netmap_knwrite,
+};
+
+
+/*
+ * This is called when a thread invokes kevent() to record
+ * a change in the configuration of the kqueue().
+ * The 'priv' should be the same as in the netmap device.
+ */
+static int
+netmap_kqfilter(struct cdev *dev, struct knote *kn)
+{
+ struct netmap_priv_d *priv;
+ int error;
+ struct netmap_adapter *na;
+ struct selinfo *si;
+ int ev = kn->kn_filter;
+
+ if (ev != EVFILT_READ && ev != EVFILT_WRITE) {
+ D("bad filter request %d", ev);
+ return 1;
+ }
+ error = devfs_get_cdevpriv((void**)&priv);
+ if (error) {
+ D("device not yet setup");
+ return 1;
+ }
+ na = priv->np_na;
+ if (na == NULL) {
+ D("no netmap adapter for this file descriptor");
+ return 1;
+ }
+ /* the si is indicated in the priv */
+ si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi;
+ // XXX lock(priv) ?
+ kn->kn_fop = (ev == EVFILT_WRITE) ?
+ &netmap_wfiltops : &netmap_rfiltops;
+ kn->kn_hook = priv;
+ knlist_add(&si->si_note, kn, 1);
+ // XXX unlock(priv)
+ ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s",
+ na, na->ifp->if_xname, curthread, priv, kn,
+ priv->np_nifp,
+ kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH");
+ return 0;
+}
struct cdevsw netmap_cdevsw = {
.d_version = D_VERSION,
@@ -391,9 +617,10 @@ struct cdevsw netmap_cdevsw = {
.d_mmap_single = netmap_mmap_single,
.d_ioctl = netmap_ioctl,
.d_poll = netmap_poll,
+ .d_kqfilter = netmap_kqfilter,
.d_close = netmap_close,
};
-
+/*--- end of kqueue support ----*/
/*
* Kernel entry point.
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
index e695fcbd29f8..63253b6b0693 100644
--- a/sys/dev/netmap/netmap_generic.c
+++ b/sys/dev/netmap/netmap_generic.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#define rtnl_lock() D("rtnl_lock called");
#define rtnl_unlock() D("rtnl_unlock called");
#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid)
+#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid)
#define smp_mb()
/*
@@ -222,6 +223,17 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
#endif /* REG_RESET */
if (enable) { /* Enable netmap mode. */
+ /* Init the mitigation support. */
+ gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!gna->mit) {
+ D("mitigation allocation failed");
+ error = ENOMEM;
+ goto out;
+ }
+ for (r=0; r<na->num_rx_rings; r++)
+ netmap_mitigation_init(&gna->mit[r], na);
+
/* Initialize the rx queue, as generic_rx_handler() can
* be called as soon as netmap_catch_rx() returns.
*/
@@ -229,9 +241,6 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
mbq_safe_init(&na->rx_rings[r].rx_queue);
}
- /* Init the mitigation timer. */
- netmap_mitigation_init(gna);
-
/*
* Preallocate packet buffers for the tx rings.
*/
@@ -306,7 +315,9 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
mbq_safe_destroy(&na->rx_rings[r].rx_queue);
}
- netmap_mitigation_cleanup(gna);
+ for (r=0; r<na->num_rx_rings; r++)
+ netmap_mitigation_cleanup(&gna->mit[r]);
+ free(gna->mit, M_DEVBUF);
for (r=0; r<na->num_tx_rings; r++) {
for (i=0; i<na->num_tx_desc; i++) {
@@ -344,10 +355,12 @@ free_tx_pools:
free(na->tx_rings[r].tx_pool, M_DEVBUF);
na->tx_rings[r].tx_pool = NULL;
}
- netmap_mitigation_cleanup(gna);
for (r=0; r<na->num_rx_rings; r++) {
+ netmap_mitigation_cleanup(&gna->mit[r]);
mbq_safe_destroy(&na->rx_rings[r].rx_queue);
}
+ free(gna->mit, M_DEVBUF);
+out:
return error;
}
@@ -557,12 +570,11 @@ generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
nm_i = nm_next(nm_i, lim);
+ IFRATE(rate_ctx.new.txpkt ++);
}
/* Update hwcur to the next slot to transmit. */
kring->nr_hwcur = nm_i; /* not head, we could break early */
-
- IFRATE(rate_ctx.new.txpkt += ntx);
}
/*
@@ -600,7 +612,11 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
struct netmap_adapter *na = NA(ifp);
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
u_int work_done;
- u_int rr = 0; // receive ring number
+ u_int rr = MBUF_RXQ(m); // receive ring number
+
+ if (rr >= na->num_rx_rings) {
+ rr = rr % na->num_rx_rings; // XXX expensive...
+ }
/* limit the size of the queue */
if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
@@ -617,13 +633,13 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
/* same as send combining, filter notification if there is a
* pending timer, otherwise pass it up and start a timer.
*/
- if (likely(netmap_mitigation_active(gna))) {
+ if (likely(netmap_mitigation_active(&gna->mit[rr]))) {
/* Record that there is some pending work. */
- gna->mit_pending = 1;
+ gna->mit[rr].mit_pending = 1;
} else {
netmap_generic_irq(na->ifp, rr, &work_done);
IFRATE(rate_ctx.new.rxirq++);
- netmap_mitigation_start(gna);
+ netmap_mitigation_start(&gna->mit[rr]);
}
}
}
@@ -682,7 +698,6 @@ generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
ring->slot[nm_i].flags = slot_flags;
m_freem(m);
nm_i = nm_next(nm_i, lim);
- n++;
}
if (n) {
kring->nr_hwtail = nm_i;
@@ -772,7 +787,7 @@ generic_netmap_attach(struct ifnet *ifp)
/* when using generic, IFCAP_NETMAP is set so we force
* NAF_SKIP_INTR to use the regular interrupt handler
*/
- na->na_flags = NAF_SKIP_INTR;
+ na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
ifp->num_tx_queues, ifp->real_num_tx_queues,
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 668e083e0b96..ddcb0e3185a6 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -35,6 +35,7 @@
#define _NET_NETMAP_KERN_H_
#define WITH_VALE // comment out to disable VALE support
+#define WITH_PIPES
#if defined(__FreeBSD__)
@@ -267,11 +268,11 @@ struct netmap_kring {
volatile int nkr_stopped; // XXX what for ?
- /* support for adapters without native netmap support.
+ /* Support for adapters without native netmap support.
* On tx rings we preallocate an array of tx buffers
* (same size as the netmap ring), on rx rings we
- * store incoming packets in a queue.
- * XXX who writes to the rx queue ?
+ * store incoming mbufs in a queue that is drained by
+ * a rxsync.
*/
struct mbuf **tx_pool;
// u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */
@@ -280,6 +281,13 @@ struct netmap_kring {
uint32_t ring_id; /* debugging */
char name[64]; /* diagnostic */
+ int (*nm_sync)(struct netmap_kring *kring, int flags);
+
+#ifdef WITH_PIPES
+ struct netmap_kring *pipe;
+ struct netmap_ring *save_ring;
+#endif /* WITH_PIPES */
+
} __attribute__((__aligned__(64)));
@@ -388,6 +396,7 @@ struct netmap_adapter {
* emulated. Where possible (e.g. FreeBSD)
* IFCAP_NETMAP also mirrors this flag.
*/
+#define NAF_HOST_RINGS 64 /* the adapter supports the host rings */
int active_fds; /* number of user-space descriptors using this
interface, which is equal to the number of
struct netmap_if objs in the mapped region. */
@@ -411,6 +420,9 @@ struct netmap_adapter {
NM_SELINFO_T tx_si, rx_si; /* global wait queues */
+ /* count users of the global wait queues */
+ int tx_si_users, rx_si_users;
+
/* copy of if_qflush and if_transmit pointers, to intercept
* packets from the network stack when netmap is active.
*/
@@ -438,9 +450,11 @@ struct netmap_adapter {
*
* nm_config() returns configuration information from the OS
*
- * nm_krings_create() XXX
+ * nm_krings_create() create and init the krings array
+ * (the array layout must conform to the description
+ * found above the definition of netmap_krings_create)
*
- * nm_krings_delete() XXX
+ * nm_krings_delete() cleanup and delete the kring array
*
* nm_notify() is used to act after data have become available.
* For hw devices this is typically a selwakeup(),
@@ -464,7 +478,6 @@ struct netmap_adapter {
void (*nm_krings_delete)(struct netmap_adapter *);
int (*nm_notify)(struct netmap_adapter *,
u_int ring, enum txrx, int flags);
-#define NAF_GLOBAL_NOTIFY 4
#define NAF_DISABLE_NOTIFY 8
/* standard refcount to control the lifetime of the adapter
@@ -484,6 +497,12 @@ struct netmap_adapter {
* from userspace
*/
void *na_private;
+
+#ifdef WITH_PIPES
+ struct netmap_pipe_adapter **na_pipes;
+ int na_next_pipe;
+ int na_max_pipes;
+#endif /* WITH_PIPES */
};
@@ -514,7 +533,10 @@ struct netmap_vp_adapter { /* VALE software port */
struct nm_bridge *na_bdg;
int retry;
- u_int offset; /* Offset of ethernet header for each packet. */
+ /* Offset of ethernet header for each packet. */
+ u_int virt_hdr_len;
+ /* Maximum Frame Size, used in bdg_mismatch_datapath() */
+ u_int mfs;
};
@@ -524,6 +546,12 @@ struct netmap_hw_adapter { /* physical device */
struct net_device_ops nm_ndo; // XXX linux only
};
+/* Mitigation support. */
+struct nm_generic_mit {
+ struct hrtimer mit_timer;
+ int mit_pending;
+ struct netmap_adapter *mit_na; /* backpointer */
+};
struct netmap_generic_adapter { /* emulated device */
struct netmap_hw_adapter up;
@@ -534,18 +562,29 @@ struct netmap_generic_adapter { /* emulated device */
/* generic netmap adapters support:
* a net_device_ops struct overrides ndo_select_queue(),
* save_if_input saves the if_input hook (FreeBSD),
- * mit_timer and mit_pending implement rx interrupt mitigation,
+ * mit implements rx interrupt mitigation,
*/
struct net_device_ops generic_ndo;
void (*save_if_input)(struct ifnet *, struct mbuf *);
- struct hrtimer mit_timer;
- int mit_pending;
+ struct nm_generic_mit *mit;
#ifdef linux
netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *);
#endif
};
+static __inline int
+netmap_real_tx_rings(struct netmap_adapter *na)
+{
+ return na->num_tx_rings + !!(na->na_flags & NAF_HOST_RINGS);
+}
+
+static __inline int
+netmap_real_rx_rings(struct netmap_adapter *na)
+{
+ return na->num_rx_rings + !!(na->na_flags & NAF_HOST_RINGS);
+}
+
#ifdef WITH_VALE
/*
@@ -614,6 +653,25 @@ struct netmap_bwrap_adapter {
#endif /* WITH_VALE */
+#ifdef WITH_PIPES
+
+#define NM_MAXPIPES 64 /* max number of pipes per adapter */
+
+struct netmap_pipe_adapter {
+ struct netmap_adapter up;
+
+ u_int id; /* pipe identifier */
+ int role; /* either NR_REG_PIPE_MASTER or NR_REG_PIPE_SLAVE */
+
+ struct netmap_adapter *parent; /* adapter that owns the memory */
+ struct netmap_pipe_adapter *peer; /* the other end of the pipe */
+ int peer_ref; /* 1 iff we are holding a ref to the peer */
+
+ u_int parent_slot; /* index in the parent pipe array */
+};
+
+#endif /* WITH_PIPES */
+
/* return slots reserved to rx clients; used in drivers */
static inline uint32_t
@@ -767,9 +825,8 @@ uint32_t nm_rxsync_prologue(struct netmap_kring *);
static inline void
nm_txsync_finalize(struct netmap_kring *kring)
{
- /* update ring head/tail to what the kernel knows */
+ /* update ring tail to what the kernel knows */
kring->ring->tail = kring->rtail = kring->nr_hwtail;
- kring->ring->head = kring->rhead = kring->nr_hwcur;
/* note, head/rhead/hwcur might be behind cur/rcur
* if no carrier
@@ -819,14 +876,14 @@ nm_rxsync_finalize(struct netmap_kring *kring)
* Support routines to be used with the VALE switch
*/
int netmap_update_config(struct netmap_adapter *na);
-int netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom);
+int netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
void netmap_krings_delete(struct netmap_adapter *na);
int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
struct netmap_if *
netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
- uint16_t ringid, int *err);
+ uint16_t ringid, uint32_t flags, int *err);
@@ -868,6 +925,20 @@ int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func);
#define netmap_bdg_ctl(_1, _2) EINVAL
#endif /* !WITH_VALE */
+#ifdef WITH_PIPES
+/* max number of pipes per device */
+#define NM_MAXPIPES 64 /* XXX how many? */
+/* in case of no error, returns the actual number of pipes in nmr->nr_arg1 */
+int netmap_pipe_alloc(struct netmap_adapter *, struct nmreq *nmr);
+void netmap_pipe_dealloc(struct netmap_adapter *);
+int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
+#else /* !WITH_PIPES */
+#define NM_MAXPIPES 0
+#define netmap_pipe_alloc(_1, _2) EOPNOTSUPP
+#define netmap_pipe_dealloc(_1)
+#define netmap_get_pipe_na(_1, _2, _3) 0
+#endif
+
/* Various prototypes */
int netmap_poll(struct cdev *dev, int events, struct thread *td);
int netmap_init(void);
@@ -938,6 +1009,7 @@ enum { /* verbose flags */
extern int netmap_txsync_retry;
extern int netmap_generic_mit;
extern int netmap_generic_ringsize;
+extern int netmap_generic_rings;
/*
* NA returns a pointer to the struct netmap adapter from the ifp,
@@ -1160,13 +1232,21 @@ struct netmap_priv_d {
struct netmap_if * volatile np_nifp; /* netmap if descriptor. */
struct netmap_adapter *np_na;
- int np_ringid; /* from the ioctl */
- u_int np_qfirst, np_qlast; /* range of rings to scan */
- uint16_t np_txpoll;
+ uint32_t np_flags; /* from the ioctl */
+ u_int np_txqfirst, np_txqlast; /* range of tx rings to scan */
+ u_int np_rxqfirst, np_rxqlast; /* range of rx rings to scan */
+ uint16_t np_txpoll; /* XXX and also np_rxpoll ? */
struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */
/* np_refcount is only used on FreeBSD */
int np_refcount; /* use with NMG_LOCK held */
+
+ /* pointers to the selinfo to be used for selrecord.
+ * Either the local or the global one depending on the
+ * number of rings.
+ */
+ NM_SELINFO_T *np_rxsi, *np_txsi;
+ struct thread *np_td; /* kqueue, just debugging */
};
@@ -1188,10 +1268,113 @@ void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
* to reduce the number of interrupt requests/selwakeup
* to clients on incoming packets.
*/
-void netmap_mitigation_init(struct netmap_generic_adapter *na);
-void netmap_mitigation_start(struct netmap_generic_adapter *na);
-void netmap_mitigation_restart(struct netmap_generic_adapter *na);
-int netmap_mitigation_active(struct netmap_generic_adapter *na);
-void netmap_mitigation_cleanup(struct netmap_generic_adapter *na);
+void netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *na);
+void netmap_mitigation_start(struct nm_generic_mit *mit);
+void netmap_mitigation_restart(struct nm_generic_mit *mit);
+int netmap_mitigation_active(struct nm_generic_mit *mit);
+void netmap_mitigation_cleanup(struct nm_generic_mit *mit);
+
+
+
+/* Shared declarations for the VALE switch. */
+
+/*
+ * Each transmit queue accumulates a batch of packets into
+ * a structure before forwarding. Packets to the same
+ * destination are put in a list using ft_next as a link field.
+ * ft_frags and ft_next are valid only on the first fragment.
+ */
+struct nm_bdg_fwd { /* forwarding entry for a bridge */
+ void *ft_buf; /* netmap or indirect buffer */
+ uint8_t ft_frags; /* how many fragments (only on 1st frag) */
+ uint8_t _ft_port; /* dst port (unused) */
+ uint16_t ft_flags; /* flags, e.g. indirect */
+ uint16_t ft_len; /* src fragment len */
+ uint16_t ft_next; /* next packet to same destination */
+};
+
+/* struct 'virtio_net_hdr' from linux. */
+struct nm_vnet_hdr {
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
+#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
+ uint8_t flags;
+#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
+#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
+ uint8_t gso_type;
+ uint16_t hdr_len;
+ uint16_t gso_size;
+ uint16_t csum_start;
+ uint16_t csum_offset;
+};
+
+#define WORST_CASE_GSO_HEADER (14+40+60) /* IPv6 + TCP */
+
+/* Private definitions for IPv4, IPv6, UDP and TCP headers. */
+
+struct nm_iphdr {
+ uint8_t version_ihl;
+ uint8_t tos;
+ uint16_t tot_len;
+ uint16_t id;
+ uint16_t frag_off;
+ uint8_t ttl;
+ uint8_t protocol;
+ uint16_t check;
+ uint32_t saddr;
+ uint32_t daddr;
+ /*The options start here. */
+};
+
+struct nm_tcphdr {
+ uint16_t source;
+ uint16_t dest;
+ uint32_t seq;
+ uint32_t ack_seq;
+ uint8_t doff; /* Data offset + Reserved */
+ uint8_t flags;
+ uint16_t window;
+ uint16_t check;
+ uint16_t urg_ptr;
+};
+
+struct nm_udphdr {
+ uint16_t source;
+ uint16_t dest;
+ uint16_t len;
+ uint16_t check;
+};
+
+struct nm_ipv6hdr {
+ uint8_t priority_version;
+ uint8_t flow_lbl[3];
+
+ uint16_t payload_len;
+ uint8_t nexthdr;
+ uint8_t hop_limit;
+
+ uint8_t saddr[16];
+ uint8_t daddr[16];
+};
+
+/* Type used to store a checksum (in host byte order) that hasn't been
+ * folded yet.
+ */
+#define rawsum_t uint32_t
+
+rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum);
+uint16_t nm_csum_ipv4(struct nm_iphdr *iph);
+void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
+ size_t datalen, uint16_t *check);
+void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
+ size_t datalen, uint16_t *check);
+uint16_t nm_csum_fold(rawsum_t cur_sum);
+
+void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
+ struct netmap_vp_adapter *dst_na,
+ struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
+ u_int *j, u_int lim, u_int *howmany);
#endif /* _NET_NETMAP_KERN_H_ */
diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
index 55f598518434..5491845090e7 100644
--- a/sys/dev/netmap/netmap_mem2.c
+++ b/sys/dev/netmap/netmap_mem2.c
@@ -82,6 +82,21 @@ struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
},
};
+struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
+ [NETMAP_IF_POOL] = {
+ .size = 1024,
+ .num = 1,
+ },
+ [NETMAP_RING_POOL] = {
+ .size = 5*PAGE_SIZE,
+ .num = 4,
+ },
+ [NETMAP_BUF_POOL] = {
+ .size = 2048,
+ .num = 4098,
+ },
+};
+
/*
* nm_mem is the memory allocator used for all physical interfaces
@@ -118,9 +133,16 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */
.config = netmap_mem_global_config,
.finalize = netmap_mem_global_finalize,
.deref = netmap_mem_global_deref,
+
+ .nm_id = 1,
+
+ .prev = &nm_mem,
+ .next = &nm_mem,
};
+struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
+
// XXX logically belongs to nm_mem
struct lut_entry *netmap_buffer_lut; /* exported */
@@ -135,7 +157,7 @@ const struct netmap_mem_d nm_blueprint = {
.objminsize = sizeof(struct netmap_if),
.objmaxsize = 4096,
.nummin = 1,
- .nummax = 10,
+ .nummax = 100,
},
[NETMAP_RING_POOL] = {
.name = "%s_ring",
@@ -172,13 +194,67 @@ const struct netmap_mem_d nm_blueprint = {
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
- CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s")
+ CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \
+ SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \
+ CTLFLAG_RW, &netmap_min_priv_params[id].size, 0, \
+ "Default size of private netmap " STRINGIFY(name) "s"); \
+ SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_num, \
+ CTLFLAG_RW, &netmap_min_priv_params[id].num, 0, \
+ "Default number of private netmap " STRINGIFY(name) "s")
SYSCTL_DECL(_dev_netmap);
DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
+static int
+nm_mem_assign_id(struct netmap_mem_d *nmd)
+{
+ nm_memid_t id;
+ struct netmap_mem_d *scan = netmap_last_mem_d;
+ int error = ENOMEM;
+
+ NMA_LOCK(&nm_mem);
+
+ do {
+ /* we rely on unsigned wrap around */
+ id = scan->nm_id + 1;
+ if (id == 0) /* reserve 0 as error value */
+ id = 1;
+ scan = scan->next;
+ if (id != scan->nm_id) {
+ nmd->nm_id = id;
+ nmd->prev = scan->prev;
+ nmd->next = scan;
+ scan->prev->next = nmd;
+ scan->prev = nmd;
+ netmap_last_mem_d = nmd;
+ error = 0;
+ break;
+ }
+ } while (scan != netmap_last_mem_d);
+
+ NMA_UNLOCK(&nm_mem);
+ return error;
+}
+
+static void
+nm_mem_release_id(struct netmap_mem_d *nmd)
+{
+ NMA_LOCK(&nm_mem);
+
+ nmd->prev->next = nmd->next;
+ nmd->next->prev = nmd->prev;
+
+ if (netmap_last_mem_d == nmd)
+ netmap_last_mem_d = nmd->prev;
+
+ nmd->prev = nmd->next = NULL;
+
+ NMA_UNLOCK(&nm_mem);
+}
+
+
/*
* First, find the allocator that contains the requested offset,
* then locate the cluster through a lookup table.
@@ -216,7 +292,8 @@ netmap_mem_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
}
int
-netmap_mem_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags)
+netmap_mem_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
+ nm_memid_t *id)
{
int error = 0;
NMA_LOCK(nmd);
@@ -234,6 +311,7 @@ netmap_mem_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags)
}
}
*memflags = nmd->flags;
+ *id = nmd->nm_id;
out:
NMA_UNLOCK(nmd);
return error;
@@ -343,21 +421,34 @@ netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_
/*
- * free by index, not by address. This is slow, but is only used
- * for a small number of objects (rings, nifp)
+ * free by index, not by address.
+ * XXX should we also cleanup the content ?
*/
-static void
+static int
netmap_obj_free(struct netmap_obj_pool *p, uint32_t j)
{
+ uint32_t *ptr, mask;
+
if (j >= p->objtotal) {
D("invalid index %u, max %u", j, p->objtotal);
- return;
+ return 1;
+ }
+ ptr = &p->bitmap[j / 32];
+ mask = (1 << (j % 32));
+ if (*ptr & mask) {
+ D("ouch, double free on buffer %d", j);
+ return 1;
+ } else {
+ *ptr |= mask;
+ p->objfree++;
+ return 0;
}
- p->bitmap[j / 32] |= (1 << (j % 32));
- p->objfree++;
- return;
}
+/*
+ * free by address. This is slow but is only used for a few
+ * objects (rings, nifp)
+ */
static void
netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
{
@@ -388,9 +479,63 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], NETMAP_BDG_BUF_SIZE(n), _pos, _index)
+#if 0 // XXX unused
/* Return the index associated to the given packet buffer */
#define netmap_buf_index(n, v) \
(netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n))
+#endif
+
+/*
+ * allocate extra buffers in a linked list.
+ * returns the actual number.
+ */
+uint32_t
+netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n)
+{
+ struct netmap_mem_d *nmd = na->nm_mem;
+ uint32_t i, pos = 0; /* opaque, scan position in the bitmap */
+
+ NMA_LOCK(nmd);
+
+ *head = 0; /* default, 'null' index ie empty list */
+ for (i = 0 ; i < n; i++) {
+ uint32_t cur = *head; /* save current head */
+ uint32_t *p = netmap_buf_malloc(nmd, &pos, head);
+ if (p == NULL) {
+ D("no more buffers after %d of %d", i, n);
+ *head = cur; /* restore */
+ break;
+ }
+ RD(5, "allocate buffer %d -> %d", *head, cur);
+ *p = cur; /* link to previous head */
+ }
+
+ NMA_UNLOCK(nmd);
+
+ return i;
+}
+
+static void
+netmap_extra_free(struct netmap_adapter *na, uint32_t head)
+{
+ struct lut_entry *lut = na->na_lut;
+ struct netmap_mem_d *nmd = na->nm_mem;
+ struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
+ uint32_t i, cur, *buf;
+
+ D("freeing the extra list");
+ for (i = 0; head >=2 && head < p->objtotal; i++) {
+ cur = head;
+ buf = lut[head].vaddr;
+ head = *buf;
+ *buf = 0;
+ if (netmap_obj_free(p, cur))
+ break;
+ }
+ if (head != 0)
+ D("breaking with head %d", head);
+ D("freed %d buffers", i);
+}
/* Return nonzero on error */
@@ -425,6 +570,19 @@ cleanup:
return (ENOMEM);
}
+static void
+netmap_mem_set_ring(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n, uint32_t index)
+{
+ struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
+ u_int i;
+
+ for (i = 0; i < n; i++) {
+ slot[i].buf_idx = index;
+ slot[i].len = p->_objsize;
+ slot[i].flags = 0;
+ }
+}
+
static void
netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i)
@@ -438,6 +596,18 @@ netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i)
netmap_obj_free(p, i);
}
+
+static void
+netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
+{
+ u_int i;
+
+ for (i = 0; i < n; i++) {
+ if (slot[i].buf_idx > 2)
+ netmap_free_buf(nmd, slot[i].buf_idx);
+ }
+}
+
static void
netmap_reset_obj_allocator(struct netmap_obj_pool *p)
{
@@ -677,7 +847,9 @@ static void
netmap_mem_reset_all(struct netmap_mem_d *nmd)
{
int i;
- D("resetting %p", nmd);
+
+ if (netmap_verbose)
+ D("resetting %p", nmd);
for (i = 0; i < NETMAP_POOLS_NR; i++) {
netmap_reset_obj_allocator(&nmd->pools[i]);
}
@@ -703,12 +875,14 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd)
nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
nmd->flags |= NETMAP_MEM_FINALIZED;
- D("Have %d KB for interfaces, %d KB for rings and %d MB for buffers",
- nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
- nmd->pools[NETMAP_RING_POOL].memtotal >> 10,
- nmd->pools[NETMAP_BUF_POOL].memtotal >> 20);
+ if (netmap_verbose)
+ D("interfaces %d KB, rings %d KB, buffers %d MB",
+ nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
+ nmd->pools[NETMAP_RING_POOL].memtotal >> 10,
+ nmd->pools[NETMAP_BUF_POOL].memtotal >> 20);
- D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
+ if (netmap_verbose)
+ D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
return 0;
@@ -724,10 +898,13 @@ netmap_mem_private_delete(struct netmap_mem_d *nmd)
{
if (nmd == NULL)
return;
- D("deleting %p", nmd);
+ if (netmap_verbose)
+ D("deleting %p", nmd);
if (nmd->refcount > 0)
D("bug: deleting mem allocator with refcount=%d!", nmd->refcount);
- D("done deleting %p", nmd);
+ nm_mem_release_id(nmd);
+ if (netmap_verbose)
+ D("done deleting %p", nmd);
NMA_LOCK_DESTROY(nmd);
free(nmd, M_DEVBUF);
}
@@ -762,35 +939,70 @@ netmap_mem_private_deref(struct netmap_mem_d *nmd)
NMA_UNLOCK(nmd);
}
+
+/*
+ * allocator for private memory
+ */
struct netmap_mem_d *
-netmap_mem_private_new(const char *name, u_int txr, u_int txd, u_int rxr, u_int rxd)
+netmap_mem_private_new(const char *name, u_int txr, u_int txd,
+ u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int *perr)
{
struct netmap_mem_d *d = NULL;
struct netmap_obj_params p[NETMAP_POOLS_NR];
- int i;
- u_int maxd;
+ int i, err;
+ u_int v, maxd;
d = malloc(sizeof(struct netmap_mem_d),
M_DEVBUF, M_NOWAIT | M_ZERO);
- if (d == NULL)
- return NULL;
+ if (d == NULL) {
+ err = ENOMEM;
+ goto error;
+ }
*d = nm_blueprint;
- /* XXX the rest of the code assumes the stack rings are alwasy present */
+ err = nm_mem_assign_id(d);
+ if (err)
+ goto error;
+
+ /* account for the fake host rings */
txr++;
rxr++;
- p[NETMAP_IF_POOL].size = sizeof(struct netmap_if) +
- sizeof(ssize_t) * (txr + rxr);
- p[NETMAP_IF_POOL].num = 2;
+
+ /* copy the min values */
+ for (i = 0; i < NETMAP_POOLS_NR; i++) {
+ p[i] = netmap_min_priv_params[i];
+ }
+
+ /* possibly increase them to fit user request */
+ v = sizeof(struct netmap_if) + sizeof(ssize_t) * (txr + rxr);
+ if (p[NETMAP_IF_POOL].size < v)
+ p[NETMAP_IF_POOL].size = v;
+ v = 2 + 4 * npipes;
+ if (p[NETMAP_IF_POOL].num < v)
+ p[NETMAP_IF_POOL].num = v;
maxd = (txd > rxd) ? txd : rxd;
- p[NETMAP_RING_POOL].size = sizeof(struct netmap_ring) +
- sizeof(struct netmap_slot) * maxd;
- p[NETMAP_RING_POOL].num = txr + rxr;
- p[NETMAP_BUF_POOL].size = 2048; /* XXX find a way to let the user choose this */
- p[NETMAP_BUF_POOL].num = rxr * (rxd + 2) + txr * (txd + 2);
+ v = sizeof(struct netmap_ring) + sizeof(struct netmap_slot) * maxd;
+ if (p[NETMAP_RING_POOL].size < v)
+ p[NETMAP_RING_POOL].size = v;
+ /* each pipe endpoint needs two tx rings (1 normal + 1 host, fake)
+ * and two rx rings (again, 1 normal and 1 fake host)
+ */
+ v = txr + rxr + 8 * npipes;
+ if (p[NETMAP_RING_POOL].num < v)
+ p[NETMAP_RING_POOL].num = v;
+ /* for each pipe we only need the buffers for the 4 "real" rings.
+ * On the other end, the pipe ring dimension may be different from
+ * the parent port ring dimension. As a compromise, we allocate twice the
+ * space actually needed if the pipe rings were the same size as the parent rings
+ */
+ v = (4 * npipes + rxr) * rxd + (4 * npipes + txr) * txd + 2 + extra_bufs;
+ /* the +2 is for the tx and rx fake buffers (indices 0 and 1) */
+ if (p[NETMAP_BUF_POOL].num < v)
+ p[NETMAP_BUF_POOL].num = v;
- D("req if %d*%d ring %d*%d buf %d*%d",
+ if (netmap_verbose)
+ D("req if %d*%d ring %d*%d buf %d*%d",
p[NETMAP_IF_POOL].num,
p[NETMAP_IF_POOL].size,
p[NETMAP_RING_POOL].num,
@@ -802,8 +1014,9 @@ netmap_mem_private_new(const char *name, u_int txr, u_int txd, u_int rxr, u_int
snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
nm_blueprint.pools[i].name,
name);
- if (netmap_config_obj_allocator(&d->pools[i],
- p[i].num, p[i].size))
+ err = netmap_config_obj_allocator(&d->pools[i],
+ p[i].num, p[i].size);
+ if (err)
goto error;
}
@@ -814,6 +1027,8 @@ netmap_mem_private_new(const char *name, u_int txr, u_int txd, u_int rxr, u_int
return d;
error:
netmap_mem_private_delete(d);
+ if (perr)
+ *perr = err;
return NULL;
}
@@ -917,20 +1132,25 @@ netmap_mem_fini(void)
static void
netmap_free_rings(struct netmap_adapter *na)
{
- u_int i;
+ struct netmap_kring *kring;
+ struct netmap_ring *ring;
if (!na->tx_rings)
return;
- for (i = 0; i < na->num_tx_rings + 1; i++) {
- if (na->tx_rings[i].ring) {
- netmap_ring_free(na->nm_mem, na->tx_rings[i].ring);
- na->tx_rings[i].ring = NULL;
- }
+ for (kring = na->tx_rings; kring != na->rx_rings; kring++) {
+ ring = kring->ring;
+ if (ring == NULL)
+ continue;
+ netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
+ netmap_ring_free(na->nm_mem, ring);
+ kring->ring = NULL;
}
- for (i = 0; i < na->num_rx_rings + 1; i++) {
- if (na->rx_rings[i].ring) {
- netmap_ring_free(na->nm_mem, na->rx_rings[i].ring);
- na->rx_rings[i].ring = NULL;
- }
+ for (/* cont'd from above */; kring != na->tailroom; kring++) {
+ ring = kring->ring;
+ if (ring == NULL)
+ continue;
+ netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
+ netmap_ring_free(na->nm_mem, ring);
+ kring->ring = NULL;
}
}
@@ -938,6 +1158,8 @@ netmap_free_rings(struct netmap_adapter *na)
*
* Allocate netmap rings and buffers for this card
* The rings are contiguous, but have variable size.
+ * The kring array must follow the layout described
+ * in netmap_krings_create().
*/
int
netmap_mem_rings_create(struct netmap_adapter *na)
@@ -945,10 +1167,16 @@ netmap_mem_rings_create(struct netmap_adapter *na)
struct netmap_ring *ring;
u_int len, ndesc;
struct netmap_kring *kring;
+ u_int i;
NMA_LOCK(na->nm_mem);
- for (kring = na->tx_rings; kring != na->rx_rings; kring++) { /* Transmit rings */
+ /* transmit rings */
+ for (i =0, kring = na->tx_rings; kring != na->rx_rings; kring++, i++) {
+ if (kring->ring) {
+ ND("%s %ld already created", kring->name, kring - na->tx_rings);
+ continue; /* already created by somebody else */
+ }
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
@@ -971,14 +1199,27 @@ netmap_mem_rings_create(struct netmap_adapter *na)
ring->tail = kring->rtail;
*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
+ ND("%s h %d c %d t %d", kring->name,
+ ring->head, ring->cur, ring->tail);
ND("initializing slots for txring");
- if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
- D("Cannot allocate buffers for tx_ring");
- goto cleanup;
+ if (i != na->num_tx_rings || (na->na_flags & NAF_HOST_RINGS)) {
+ /* this is a real ring */
+ if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
+ D("Cannot allocate buffers for tx_ring");
+ goto cleanup;
+ }
+ } else {
+ /* this is a fake tx ring, set all indices to 0 */
+ netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
}
}
- for ( ; kring != na->tailroom; kring++) { /* Receive rings */
+ /* receive rings */
+ for ( i = 0 /* kring cont'd from above */ ; kring != na->tailroom; kring++, i++) {
+ if (kring->ring) {
+ ND("%s %ld already created", kring->name, kring - na->rx_rings);
+ continue; /* already created by somebody else */
+ }
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
@@ -1001,10 +1242,18 @@ netmap_mem_rings_create(struct netmap_adapter *na)
ring->tail = kring->rtail;
*(int *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
+ ND("%s h %d c %d t %d", kring->name,
+ ring->head, ring->cur, ring->tail);
ND("initializing slots for rxring %p", ring);
- if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
- D("Cannot allocate buffers for rx_ring");
- goto cleanup;
+ if (i != na->num_rx_rings || (na->na_flags & NAF_HOST_RINGS)) {
+ /* this is a real ring */
+ if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
+ D("Cannot allocate buffers for rx_ring");
+ goto cleanup;
+ }
+ } else {
+ /* this is a fake rx ring, set all indices to 1 */
+ netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 1);
}
}
@@ -1024,20 +1273,8 @@ void
netmap_mem_rings_delete(struct netmap_adapter *na)
{
/* last instance, release bufs and rings */
- u_int i, lim;
- struct netmap_kring *kring;
- struct netmap_ring *ring;
-
NMA_LOCK(na->nm_mem);
- for (kring = na->tx_rings; kring != na->tailroom; kring++) {
- ring = kring->ring;
- if (ring == NULL)
- continue;
- lim = kring->nkr_num_slots;
- for (i = 0; i < lim; i++)
- netmap_free_buf(na->nm_mem, ring->slot[i].buf_idx);
- }
netmap_free_rings(na);
NMA_UNLOCK(na->nm_mem);
@@ -1059,16 +1296,12 @@ netmap_mem_if_new(const char *ifname, struct netmap_adapter *na)
ssize_t base; /* handy for relative offsets between rings and nifp */
u_int i, len, ntx, nrx;
- /*
- * verify whether virtual port need the stack ring
- */
- ntx = na->num_tx_rings + 1; /* shorthand, include stack ring */
- nrx = na->num_rx_rings + 1; /* shorthand, include stack ring */
+ /* account for the (eventually fake) host rings */
+ ntx = na->num_tx_rings + 1;
+ nrx = na->num_rx_rings + 1;
/*
* the descriptor is followed inline by an array of offsets
* to the tx and rx rings in the shared memory region.
- * For virtual rx rings we also allocate an array of
- * pointers to assign to nkr_leases.
*/
NMA_LOCK(na->nm_mem);
@@ -1112,7 +1345,8 @@ netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
/* nothing to do */
return;
NMA_LOCK(na->nm_mem);
-
+ if (nifp->ni_bufs_head)
+ netmap_extra_free(na, nifp->ni_bufs_head);
netmap_if_free(na->nm_mem, nifp);
NMA_UNLOCK(na->nm_mem);
diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h
index 8e6c58cbc4ee..e83616a5195f 100644
--- a/sys/dev/netmap/netmap_mem2.h
+++ b/sys/dev/netmap/netmap_mem2.h
@@ -160,6 +160,7 @@ typedef int (*netmap_mem_config_t)(struct netmap_mem_d*);
typedef int (*netmap_mem_finalize_t)(struct netmap_mem_d*);
typedef void (*netmap_mem_deref_t)(struct netmap_mem_d*);
+typedef uint16_t nm_memid_t;
/* We implement two kinds of netmap_mem_d structures:
*
@@ -192,6 +193,11 @@ struct netmap_mem_d {
netmap_mem_config_t config;
netmap_mem_finalize_t finalize;
netmap_mem_deref_t deref;
+
+ nm_memid_t nm_id; /* allocator identifier */
+
+ /* list of all existing allocators, sorted by nm_id */
+ struct netmap_mem_d *prev, *next;
};
extern struct netmap_mem_d nm_mem;
@@ -206,14 +212,16 @@ void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
int netmap_mem_rings_create(struct netmap_adapter *);
void netmap_mem_rings_delete(struct netmap_adapter *);
void netmap_mem_deref(struct netmap_mem_d *);
-int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags);
+int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
-struct netmap_mem_d*
- netmap_mem_private_new(const char *name, u_int txr, u_int txd, u_int rxr, u_int rxd);
+struct netmap_mem_d* netmap_mem_private_new(const char *name,
+ u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes,
+ int* error);
void netmap_mem_private_delete(struct netmap_mem_d *);
#define NETMAP_BDG_BUF_SIZE(n) ((n)->pools[NETMAP_BUF_POOL]._objsize)
+uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);
#endif
diff --git a/sys/dev/netmap/netmap_offloadings.c b/sys/dev/netmap/netmap_offloadings.c
new file mode 100644
index 000000000000..a776a2424577
--- /dev/null
+++ b/sys/dev/netmap/netmap_offloadings.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD$ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/sockio.h>
+#include <sys/socketvar.h> /* struct socket */
+#include <sys/socket.h> /* sockaddrs */
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/endian.h>
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+
+
+
+/* This routine is called by bdg_mismatch_datapath() when it finishes
+ * accumulating bytes for a segment, in order to fix some fields in the
+ * segment headers (which still contain the same content as the header
+ * of the original GSO packet). 'buf' points to the beginning (e.g.
+ * the ethernet header) of the segment, and 'len' is its length.
+ */
+static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
+ u_int segmented_bytes, u_int last_segment,
+ u_int tcp, u_int iphlen)
+{
+ struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
+ struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
+ uint16_t *check = NULL;
+ uint8_t *check_data = NULL;
+
+ if (iphlen == 20) {
+ /* Set the IPv4 "Total Length" field. */
+ iph->tot_len = htobe16(len-14);
+ ND("ip total length %u", be16toh(ip->tot_len));
+
+ /* Set the IPv4 "Identification" field. */
+ iph->id = htobe16(be16toh(iph->id) + idx);
+ ND("ip identification %u", be16toh(iph->id));
+
+ /* Compute and insert the IPv4 header checksum. */
+ iph->check = 0;
+ iph->check = nm_csum_ipv4(iph);
+ ND("IP csum %x", be16toh(iph->check));
+ } else {/* if (iphlen == 40) */
+ /* Set the IPv6 "Payload Len" field. */
+ ip6h->payload_len = htobe16(len-14-iphlen);
+ }
+
+ if (tcp) {
+ struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
+
+ /* Set the TCP sequence number. */
+ tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
+ ND("tcp seq %u", be32toh(tcph->seq));
+
+ /* Zero the PSH and FIN TCP flags if this is not the last
+ segment. */
+ if (!last_segment)
+ tcph->flags &= ~(0x8 | 0x1);
+ ND("last_segment %u", last_segment);
+
+ check = &tcph->check;
+ check_data = (uint8_t *)tcph;
+ } else { /* UDP */
+ struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
+
+ /* Set the UDP 'Length' field. */
+ udph->len = htobe16(len-14-iphlen);
+
+ check = &udph->check;
+ check_data = (uint8_t *)udph;
+ }
+
+ /* Compute and insert TCP/UDP checksum. */
+ *check = 0;
+ if (iphlen == 20)
+ nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
+ else
+ nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
+
+ ND("TCP/UDP csum %x", be16toh(*check));
+}
+
+
+/* The VALE mismatch datapath implementation. */
+void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
+ struct netmap_vp_adapter *dst_na,
+ struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
+ u_int *j, u_int lim, u_int *howmany)
+{
+ struct netmap_slot *slot = NULL;
+ struct nm_vnet_hdr *vh = NULL;
+ /* Number of source slots to process. */
+ u_int frags = ft_p->ft_frags;
+ struct nm_bdg_fwd *ft_end = ft_p + frags;
+
+ /* Source and destination pointers. */
+ uint8_t *dst, *src;
+ size_t src_len, dst_len;
+
+ u_int j_start = *j;
+ u_int dst_slots = 0;
+
+ /* If the source port uses the offloadings, while destination doesn't,
+ * we grab the source virtio-net header and do the offloadings here.
+ */
+ if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
+ vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
+ }
+
+ /* Init source and dest pointers. */
+ src = ft_p->ft_buf;
+ src_len = ft_p->ft_len;
+ slot = &ring->slot[*j];
+ dst = BDG_NMB(&dst_na->up, slot);
+ dst_len = src_len;
+
+ /* We are processing the first input slot and there is a mismatch
+ * between source and destination virt_hdr_len (SHL and DHL).
+ * When the a client is using virtio-net headers, the header length
+ * can be:
+ * - 10: the header corresponds to the struct nm_vnet_hdr
+ * - 12: the first 10 bytes correspond to the struct
+ * virtio_net_hdr, and the last 2 bytes store the
+ * "mergeable buffers" info, which is an optional
+ * hint that can be zeroed for compability
+ *
+ * The destination header is therefore built according to the
+ * following table:
+ *
+ * SHL | DHL | destination header
+ * -----------------------------
+ * 0 | 10 | zero
+ * 0 | 12 | zero
+ * 10 | 0 | doesn't exist
+ * 10 | 12 | first 10 bytes are copied from source header, last 2 are zero
+ * 12 | 0 | doesn't exist
+ * 12 | 10 | copied from the first 10 bytes of source header
+ */
+ bzero(dst, dst_na->virt_hdr_len);
+ if (na->virt_hdr_len && dst_na->virt_hdr_len)
+ memcpy(dst, src, sizeof(struct nm_vnet_hdr));
+ /* Skip the virtio-net headers. */
+ src += na->virt_hdr_len;
+ src_len -= na->virt_hdr_len;
+ dst += dst_na->virt_hdr_len;
+ dst_len = dst_na->virt_hdr_len + src_len;
+
+ /* Here it could be dst_len == 0 (which implies src_len == 0),
+ * so we avoid passing a zero length fragment.
+ */
+ if (dst_len == 0) {
+ ft_p++;
+ src = ft_p->ft_buf;
+ src_len = ft_p->ft_len;
+ dst_len = src_len;
+ }
+
+ if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+ u_int gso_bytes = 0;
+ /* Length of the GSO packet header. */
+ u_int gso_hdr_len = 0;
+ /* Pointer to the GSO packet header. Assume it is in a single fragment. */
+ uint8_t *gso_hdr = NULL;
+ /* Index of the current segment. */
+ u_int gso_idx = 0;
+ /* Payload data bytes segmented so far (e.g. TCP data bytes). */
+ u_int segmented_bytes = 0;
+ /* Length of the IP header (20 if IPv4, 40 if IPv6). */
+ u_int iphlen = 0;
+ /* Is this a TCP or an UDP GSO packet? */
+ u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
+ == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
+
+ /* Segment the GSO packet contained into the input slots (frags). */
+ while (ft_p != ft_end) {
+ size_t copy;
+
+ /* Grab the GSO header if we don't have it. */
+ if (!gso_hdr) {
+ uint16_t ethertype;
+
+ gso_hdr = src;
+
+ /* Look at the 'Ethertype' field to see if this packet
+ * is IPv4 or IPv6.
+ */
+ ethertype = be16toh(*((uint16_t *)(gso_hdr + 12)));
+ if (ethertype == 0x0800)
+ iphlen = 20;
+ else /* if (ethertype == 0x86DD) */
+ iphlen = 40;
+ ND(3, "type=%04x", ethertype);
+
+ /* Compute gso_hdr_len. For TCP we need to read the
+ * content of the 'Data Offset' field.
+ */
+ if (tcp) {
+ struct nm_tcphdr *tcph =
+ (struct nm_tcphdr *)&gso_hdr[14+iphlen];
+
+ gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
+ } else
+ gso_hdr_len = 14 + iphlen + 8; /* UDP */
+
+ ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
+ dst_na->mfs);
+
+ /* Advance source pointers. */
+ src += gso_hdr_len;
+ src_len -= gso_hdr_len;
+ if (src_len == 0) {
+ ft_p++;
+ if (ft_p == ft_end)
+ break;
+ src = ft_p->ft_buf;
+ src_len = ft_p->ft_len;
+ continue;
+ }
+ }
+
+ /* Fill in the header of the current segment. */
+ if (gso_bytes == 0) {
+ memcpy(dst, gso_hdr, gso_hdr_len);
+ gso_bytes = gso_hdr_len;
+ }
+
+ /* Fill in data and update source and dest pointers. */
+ copy = src_len;
+ if (gso_bytes + copy > dst_na->mfs)
+ copy = dst_na->mfs - gso_bytes;
+ memcpy(dst + gso_bytes, src, copy);
+ gso_bytes += copy;
+ src += copy;
+ src_len -= copy;
+
+ /* A segment is complete or we have processed all the
+ the GSO payload bytes. */
+ if (gso_bytes >= dst_na->mfs ||
+ (src_len == 0 && ft_p + 1 == ft_end)) {
+ /* After raw segmentation, we must fix some header
+ * fields and compute checksums, in a protocol dependent
+ * way. */
+ gso_fix_segment(dst, gso_bytes, gso_idx,
+ segmented_bytes,
+ src_len == 0 && ft_p + 1 == ft_end,
+ tcp, iphlen);
+
+ ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
+ slot->len = gso_bytes;
+ slot->flags = 0;
+ segmented_bytes += gso_bytes - gso_hdr_len;
+
+ dst_slots++;
+
+ /* Next destination slot. */
+ *j = nm_next(*j, lim);
+ slot = &ring->slot[*j];
+ dst = BDG_NMB(&dst_na->up, slot);
+
+ gso_bytes = 0;
+ gso_idx++;
+ }
+
+ /* Next input slot. */
+ if (src_len == 0) {
+ ft_p++;
+ if (ft_p == ft_end)
+ break;
+ src = ft_p->ft_buf;
+ src_len = ft_p->ft_len;
+ }
+ }
+ ND(3, "%d bytes segmented", segmented_bytes);
+
+ } else {
+ /* Address of a checksum field into a destination slot. */
+ uint16_t *check = NULL;
+ /* Accumulator for an unfolded checksum. */
+ rawsum_t csum = 0;
+
+ /* Process a non-GSO packet. */
+
+ /* Init 'check' if necessary. */
+ if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
+ if (unlikely(vh->csum_offset + vh->csum_start > src_len))
+ D("invalid checksum request");
+ else
+ check = (uint16_t *)(dst + vh->csum_start +
+ vh->csum_offset);
+ }
+
+ while (ft_p != ft_end) {
+ /* Init/update the packet checksum if needed. */
+ if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
+ if (!dst_slots)
+ csum = nm_csum_raw(src + vh->csum_start,
+ src_len - vh->csum_start, 0);
+ else
+ csum = nm_csum_raw(src, src_len, csum);
+ }
+
+ /* Round to a multiple of 64 */
+ src_len = (src_len + 63) & ~63;
+
+ if (ft_p->ft_flags & NS_INDIRECT) {
+ if (copyin(src, dst, src_len)) {
+ /* Invalid user pointer, pretend len is 0. */
+ dst_len = 0;
+ }
+ } else {
+ memcpy(dst, src, (int)src_len);
+ }
+ slot->len = dst_len;
+
+ dst_slots++;
+
+ /* Next destination slot. */
+ *j = nm_next(*j, lim);
+ slot = &ring->slot[*j];
+ dst = BDG_NMB(&dst_na->up, slot);
+
+ /* Next source slot. */
+ ft_p++;
+ src = ft_p->ft_buf;
+ dst_len = src_len = ft_p->ft_len;
+
+ }
+
+ /* Finalize (fold) the checksum if needed. */
+ if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
+ *check = nm_csum_fold(csum);
+ }
+ ND(3, "using %u dst_slots", dst_slots);
+
+ /* A second pass on the desitations slots to set the slot flags,
+ * using the right number of destination slots.
+ */
+ while (j_start != *j) {
+ slot = &ring->slot[j_start];
+ slot->flags = (dst_slots << 8)| NS_MOREFRAG;
+ j_start = nm_next(j_start, lim);
+ }
+ /* Clear NS_MOREFRAG flag on last entry. */
+ slot->flags = (dst_slots << 8);
+ }
+
+ /* Update howmany. */
+ if (unlikely(dst_slots > *howmany)) {
+ dst_slots = *howmany;
+ D("Slot allocation error: Should never happen");
+ }
+ *howmany -= dst_slots;
+}
diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c
new file mode 100644
index 000000000000..f8f29fa1770a
--- /dev/null
+++ b/sys/dev/netmap/netmap_pipe.c
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD$ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/refcount.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#ifdef WITH_PIPES
+
+#define NM_PIPE_MAXSLOTS 4096
+
+int netmap_default_pipes = 0; /* default number of pipes for each nic */
+SYSCTL_DECL(_dev_netmap);
+SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
+
+/* allocate the pipe array in the parent adapter */
+int
+netmap_pipe_alloc(struct netmap_adapter *na, struct nmreq *nmr)
+{
+ size_t len;
+ int mode = nmr->nr_flags & NR_REG_MASK;
+ u_int npipes;
+
+ if (mode == NR_REG_PIPE_MASTER || mode == NR_REG_PIPE_SLAVE) {
+ /* this is for our parent, not for us */
+ return 0;
+ }
+
+ /* TODO: we can resize the array if the new
+ * request can accomodate the already existing pipes
+ */
+ if (na->na_pipes) {
+ nmr->nr_arg1 = na->na_max_pipes;
+ return 0;
+ }
+
+ npipes = nmr->nr_arg1;
+ if (npipes == 0)
+ npipes = netmap_default_pipes;
+ nm_bound_var(&npipes, 0, 0, NM_MAXPIPES, NULL);
+
+ if (npipes == 0) {
+ /* really zero, nothing to alloc */
+ goto out;
+ }
+
+ len = sizeof(struct netmap_pipe_adapter *) * npipes;
+ na->na_pipes = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (na->na_pipes == NULL)
+ return ENOMEM;
+
+ na->na_max_pipes = npipes;
+ na->na_next_pipe = 0;
+
+out:
+ nmr->nr_arg1 = npipes;
+
+ return 0;
+}
+
+/* deallocate the parent array in the parent adapter */
+void
+netmap_pipe_dealloc(struct netmap_adapter *na)
+{
+ if (na->na_pipes) {
+ ND("freeing pipes for %s", NM_IFPNAME(na->ifp));
+ free(na->na_pipes, M_DEVBUF);
+ na->na_pipes = NULL;
+ na->na_max_pipes = 0;
+ na->na_next_pipe = 0;
+ }
+}
+
+/* find a pipe endpoint with the given id among the parent's pipes */
+static struct netmap_pipe_adapter *
+netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id)
+{
+ int i;
+ struct netmap_pipe_adapter *na;
+
+ for (i = 0; i < parent->na_next_pipe; i++) {
+ na = parent->na_pipes[i];
+ if (na->id == pipe_id) {
+ return na;
+ }
+ }
+ return NULL;
+}
+
+/* add a new pipe endpoint to the parent array */
+static int
+netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
+{
+ if (parent->na_next_pipe >= parent->na_max_pipes) {
+ D("%s: no space left for pipes", NM_IFPNAME(parent->ifp));
+ return ENOMEM;
+ }
+
+ parent->na_pipes[parent->na_next_pipe] = na;
+ na->parent_slot = parent->na_next_pipe;
+ parent->na_next_pipe++;
+ return 0;
+}
+
+/* remove the given pipe endpoint from the parent array */
+static void
+netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
+{
+ u_int n;
+ n = --parent->na_next_pipe;
+ if (n != na->parent_slot) {
+ parent->na_pipes[na->parent_slot] =
+ parent->na_pipes[n];
+ }
+ parent->na_pipes[n] = NULL;
+}
+
+static int
+netmap_pipe_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
+{
+ struct netmap_kring *txkring = na->tx_rings + ring_nr,
+ *rxkring = txkring->pipe;
+ u_int limit; /* slots to transfer */
+ u_int j, k, lim_tx = txkring->nkr_num_slots - 1,
+ lim_rx = rxkring->nkr_num_slots - 1;
+ int m, busy;
+
+ ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
+ ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail,
+ txkring->rcur, txkring->rhead, txkring->rtail);
+
+ j = rxkring->nr_hwtail; /* RX */
+ k = txkring->nr_hwcur; /* TX */
+ m = txkring->rhead - txkring->nr_hwcur; /* new slots */
+ if (m < 0)
+ m += txkring->nkr_num_slots;
+ limit = m;
+ m = rxkring->nkr_num_slots - 1; /* max avail space on destination */
+ busy = j - rxkring->nr_hwcur; /* busy slots */
+ if (busy < 0)
+ busy += txkring->nkr_num_slots;
+ m -= busy; /* subtract busy slots */
+ ND(2, "m %d limit %d", m, limit);
+ if (m < limit)
+ limit = m;
+
+ if (limit == 0) {
+ /* either the rxring is full, or nothing to send */
+ nm_txsync_finalize(txkring); /* actually useless */
+ return 0;
+ }
+
+ while (limit-- > 0) {
+ struct netmap_slot *rs = &rxkring->save_ring->slot[j];
+ struct netmap_slot *ts = &txkring->ring->slot[k];
+ struct netmap_slot tmp;
+
+ /* swap the slots */
+ tmp = *rs;
+ *rs = *ts;
+ *ts = tmp;
+
+ /* no need to report the buffer change */
+
+ j = nm_next(j, lim_rx);
+ k = nm_next(k, lim_tx);
+ }
+
+ wmb(); /* make sure the slots are updated before publishing them */
+ rxkring->nr_hwtail = j;
+ txkring->nr_hwcur = k;
+ txkring->nr_hwtail = nm_prev(k, lim_tx);
+
+ nm_txsync_finalize(txkring);
+ ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail,
+ txkring->rcur, txkring->rhead, txkring->rtail, j);
+
+ wmb(); /* make sure rxkring->nr_hwtail is updated before notifying */
+ rxkring->na->nm_notify(rxkring->na, rxkring->ring_id, NR_RX, 0);
+
+ return 0;
+}
+
+static int
+netmap_pipe_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
+{
+ struct netmap_kring *rxkring = na->rx_rings + ring_nr,
+ *txkring = rxkring->pipe;
+ uint32_t oldhwcur = rxkring->nr_hwcur;
+
+ ND("%s %x <- %s", rxkring->name, flags, txkring->name);
+ rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */
+ ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail,
+ rxkring->rcur, rxkring->rhead, rxkring->rtail);
+ rmb(); /* paired with the first wmb() in txsync */
+ nm_rxsync_finalize(rxkring);
+
+ if (oldhwcur != rxkring->nr_hwcur) {
+ /* we have released some slots, notify the other end */
+ wmb(); /* make sure nr_hwcur is updated before notifying */
+ txkring->na->nm_notify(txkring->na, txkring->ring_id, NR_TX, 0);
+ }
+ return 0;
+}
+
+/* Pipe endpoints are created and destroyed together, so that endopoints do not
+ * have to check for the existence of their peer at each ?xsync.
+ *
+ * To play well with the existing netmap infrastructure (refcounts etc.), we
+ * adopt the following strategy:
+ *
+ * 1) The first endpoint that is created also creates the other endpoint and
+ * grabs a reference to it.
+ *
+ * state A) user1 --> endpoint1 --> endpoint2
+ *
+ * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
+ * its reference to the user:
+ *
+ * state B) user1 --> endpoint1 endpoint2 <--- user2
+ *
+ * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
+ * callback endpoint2 notes that endpoint1 is still active and adds a reference
+ * from endpoint1 to itself. When user2 then releases her own reference,
+ * endpoint2 is not destroyed and we are back to state A. A symmetrical state
+ * would be reached if endpoint1 were released instead.
+ *
+ * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
+ * it owns a reference to endpoint2 and releases it.
+ *
+ * Something similar goes on for the creation and destruction of the krings.
+ */
+
+
+/* netmap_pipe_krings_delete.
+ *
+ * There are two cases:
+ *
+ * 1) state is
+ *
+ * usr1 --> e1 --> e2
+ *
+ * and we are e1. We have to create both sets
+ * of krings.
+ *
+ * 2) state is
+ *
+ * usr1 --> e1 --> e2
+ *
+ * and we are e2. e1 is certainly registered and our
+ * krings already exist, but they may be hidden.
+ */
+static int
+netmap_pipe_krings_create(struct netmap_adapter *na)
+{
+ struct netmap_pipe_adapter *pna =
+ (struct netmap_pipe_adapter *)na;
+ struct netmap_adapter *ona = &pna->peer->up;
+ int error = 0;
+ if (pna->peer_ref) {
+ int i;
+
+ /* case 1) above */
+ D("%p: case 1, create everything", na);
+ error = netmap_krings_create(na, 0);
+ if (error)
+ goto err;
+
+ /* we also create all the rings, since we need to
+ * update the save_ring pointers.
+ * netmap_mem_rings_create (called by our caller)
+ * will not create the rings again
+ */
+
+ error = netmap_mem_rings_create(na);
+ if (error)
+ goto del_krings1;
+
+ /* update our hidden ring pointers */
+ for (i = 0; i < na->num_tx_rings + 1; i++)
+ na->tx_rings[i].save_ring = na->tx_rings[i].ring;
+ for (i = 0; i < na->num_rx_rings + 1; i++)
+ na->rx_rings[i].save_ring = na->rx_rings[i].ring;
+
+ /* now, create krings and rings of the other end */
+ error = netmap_krings_create(ona, 0);
+ if (error)
+ goto del_rings1;
+
+ error = netmap_mem_rings_create(ona);
+ if (error)
+ goto del_krings2;
+
+ for (i = 0; i < ona->num_tx_rings + 1; i++)
+ ona->tx_rings[i].save_ring = ona->tx_rings[i].ring;
+ for (i = 0; i < ona->num_rx_rings + 1; i++)
+ ona->rx_rings[i].save_ring = ona->rx_rings[i].ring;
+
+ /* cross link the krings */
+ for (i = 0; i < na->num_tx_rings; i++) {
+ na->tx_rings[i].pipe = pna->peer->up.rx_rings + i;
+ na->rx_rings[i].pipe = pna->peer->up.tx_rings + i;
+ pna->peer->up.tx_rings[i].pipe = na->rx_rings + i;
+ pna->peer->up.rx_rings[i].pipe = na->tx_rings + i;
+ }
+ } else {
+ int i;
+ /* case 2) above */
+ /* recover the hidden rings */
+ ND("%p: case 2, hidden rings", na);
+ for (i = 0; i < na->num_tx_rings + 1; i++)
+ na->tx_rings[i].ring = na->tx_rings[i].save_ring;
+ for (i = 0; i < na->num_rx_rings + 1; i++)
+ na->rx_rings[i].ring = na->rx_rings[i].save_ring;
+ }
+ return 0;
+
+del_krings2:
+ netmap_krings_delete(ona);
+del_rings1:
+ netmap_mem_rings_delete(na);
+del_krings1:
+ netmap_krings_delete(na);
+err:
+ return error;
+}
+
+/* netmap_pipe_reg.
+ *
+ * There are two cases on registration (onoff==1)
+ *
+ * 1.a) state is
+ *
+ * usr1 --> e1 --> e2
+ *
+ * and we are e1. Nothing special to do.
+ *
+ * 1.b) state is
+ *
+ * usr1 --> e1 --> e2 <-- usr2
+ *
+ * and we are e2. Drop the ref e1 is holding.
+ *
+ * There are two additional cases on unregister (onoff==0)
+ *
+ * 2.a) state is
+ *
+ * usr1 --> e1 --> e2
+ *
+ * and we are e1. Nothing special to do, e2 will
+ * be cleaned up by the destructor of e1.
+ *
+ * 2.b) state is
+ *
+ * usr1 --> e1 e2 <-- usr2
+ *
+ * and we are either e1 or e2. Add a ref from the
+ * other end and hide our rings.
+ */
+static int
+netmap_pipe_reg(struct netmap_adapter *na, int onoff)
+{
+ struct netmap_pipe_adapter *pna =
+ (struct netmap_pipe_adapter *)na;
+ struct ifnet *ifp = na->ifp;
+ ND("%p: onoff %d", na, onoff);
+ if (onoff) {
+ ifp->if_capenable |= IFCAP_NETMAP;
+ } else {
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+ }
+ if (pna->peer_ref) {
+ ND("%p: case 1.a or 2.a, nothing to do", na);
+ return 0;
+ }
+ if (onoff) {
+ ND("%p: case 1.b, drop peer", na);
+ pna->peer->peer_ref = 0;
+ netmap_adapter_put(na);
+ } else {
+ int i;
+ ND("%p: case 2.b, grab peer", na);
+ netmap_adapter_get(na);
+ pna->peer->peer_ref = 1;
+ /* hide our rings from netmap_mem_rings_delete */
+ for (i = 0; i < na->num_tx_rings + 1; i++) {
+ na->tx_rings[i].ring = NULL;
+ }
+ for (i = 0; i < na->num_rx_rings + 1; i++) {
+ na->rx_rings[i].ring = NULL;
+ }
+ }
+ return 0;
+}
+
+/* netmap_pipe_krings_delete.
+ *
+ * There are two cases:
+ *
+ * 1) state is
+ *
+ * usr1 --> e1 --> e2
+ *
+ * and we are e1 (e2 is not registered, so krings_delete cannot be
+ * called on it);
+ *
+ * 2) state is
+ *
+ * usr1 --> e1 e2 <-- usr2
+ *
+ * and we are either e1 or e2.
+ *
+ * In the former case we have to also delete the krings of e2;
+ * in the latter case we do nothing (note that our krings
+ * have already been hidden in the unregister callback).
+ */
+static void
+netmap_pipe_krings_delete(struct netmap_adapter *na)
+{
+ struct netmap_pipe_adapter *pna =
+ (struct netmap_pipe_adapter *)na;
+ struct netmap_adapter *ona; /* na of the other end */
+ int i;
+
+ if (!pna->peer_ref) {
+ ND("%p: case 2, kept alive by peer", na);
+ return;
+ }
+ /* case 1) above */
+ ND("%p: case 1, deleting everyhing", na);
+ netmap_krings_delete(na); /* also zeroes tx_rings etc. */
+ /* restore the ring to be deleted on the peer */
+ ona = &pna->peer->up;
+ if (ona->tx_rings == NULL) {
+ /* already deleted, we must be on an
+ * cleanup-after-error path */
+ return;
+ }
+ for (i = 0; i < ona->num_tx_rings + 1; i++)
+ ona->tx_rings[i].ring = ona->tx_rings[i].save_ring;
+ for (i = 0; i < ona->num_rx_rings + 1; i++)
+ ona->rx_rings[i].ring = ona->rx_rings[i].save_ring;
+ netmap_mem_rings_delete(ona);
+ netmap_krings_delete(ona);
+}
+
+
+static void
+netmap_pipe_dtor(struct netmap_adapter *na)
+{
+ struct netmap_pipe_adapter *pna =
+ (struct netmap_pipe_adapter *)na;
+ ND("%p", na);
+ if (pna->peer_ref) {
+ ND("%p: clean up peer", na);
+ pna->peer_ref = 0;
+ netmap_adapter_put(&pna->peer->up);
+ }
+ if (pna->role == NR_REG_PIPE_MASTER)
+ netmap_pipe_remove(pna->parent, pna);
+ netmap_adapter_put(pna->parent);
+ free(na->ifp, M_DEVBUF);
+ na->ifp = NULL;
+ pna->parent = NULL;
+}
+
+int
+netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
+{
+ struct nmreq pnmr;
+ struct netmap_adapter *pna; /* parent adapter */
+ struct netmap_pipe_adapter *mna, *sna, *req;
+ struct ifnet *ifp, *ifp2;
+ u_int pipe_id;
+ int role = nmr->nr_flags & NR_REG_MASK;
+ int error;
+
+ ND("flags %x", nmr->nr_flags);
+
+ if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) {
+ ND("not a pipe");
+ return 0;
+ }
+ role = nmr->nr_flags & NR_REG_MASK;
+
+ /* first, try to find the parent adapter */
+ bzero(&pnmr, sizeof(pnmr));
+ memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
+ /* pass to parent the requested number of pipes */
+ pnmr.nr_arg1 = nmr->nr_arg1;
+ error = netmap_get_na(&pnmr, &pna, create);
+ if (error) {
+ ND("parent lookup failed: %d", error);
+ return error;
+ }
+ ND("found parent: %s", NM_IFPNAME(pna->ifp));
+
+ if (NETMAP_OWNED_BY_KERN(pna)) {
+ ND("parent busy");
+ error = EBUSY;
+ goto put_out;
+ }
+
+ /* next, lookup the pipe id in the parent list */
+ req = NULL;
+ pipe_id = nmr->nr_ringid & NETMAP_RING_MASK;
+ mna = netmap_pipe_find(pna, pipe_id);
+ if (mna) {
+ if (mna->role == role) {
+ ND("found %d directly at %d", pipe_id, mna->parent_slot);
+ req = mna;
+ } else {
+ ND("found %d indirectly at %d", pipe_id, mna->parent_slot);
+ req = mna->peer;
+ }
+ /* the pipe we have found already holds a ref to the parent,
+ * so we need to drop the one we got from netmap_get_na()
+ */
+ netmap_adapter_put(pna);
+ goto found;
+ }
+ ND("pipe %d not found, create %d", pipe_id, create);
+ if (!create) {
+ error = ENODEV;
+ goto put_out;
+ }
+ /* we create both master and slave.
+ * The endpoint we were asked for holds a reference to
+ * the other one.
+ */
+ ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!ifp) {
+ error = ENOMEM;
+ goto put_out;
+ }
+ strcpy(ifp->if_xname, NM_IFPNAME(pna->ifp));
+
+ mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (mna == NULL) {
+ error = ENOMEM;
+ goto free_ifp;
+ }
+ mna->up.ifp = ifp;
+
+ mna->id = pipe_id;
+ mna->role = NR_REG_PIPE_MASTER;
+ mna->parent = pna;
+
+ mna->up.nm_txsync = netmap_pipe_txsync;
+ mna->up.nm_rxsync = netmap_pipe_rxsync;
+ mna->up.nm_register = netmap_pipe_reg;
+ mna->up.nm_dtor = netmap_pipe_dtor;
+ mna->up.nm_krings_create = netmap_pipe_krings_create;
+ mna->up.nm_krings_delete = netmap_pipe_krings_delete;
+ mna->up.nm_mem = pna->nm_mem;
+ mna->up.na_lut = pna->na_lut;
+ mna->up.na_lut_objtotal = pna->na_lut_objtotal;
+
+ mna->up.num_tx_rings = 1;
+ mna->up.num_rx_rings = 1;
+ mna->up.num_tx_desc = nmr->nr_tx_slots;
+ nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
+ 1, NM_PIPE_MAXSLOTS, NULL);
+ mna->up.num_rx_desc = nmr->nr_rx_slots;
+ nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
+ 1, NM_PIPE_MAXSLOTS, NULL);
+ error = netmap_attach_common(&mna->up);
+ if (error)
+ goto free_ifp;
+ /* register the master with the parent */
+ error = netmap_pipe_add(pna, mna);
+ if (error)
+ goto free_mna;
+
+ /* create the slave */
+ ifp2 = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!ifp) {
+ error = ENOMEM;
+ goto free_mna;
+ }
+ strcpy(ifp2->if_xname, NM_IFPNAME(pna->ifp));
+
+ sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (sna == NULL) {
+ error = ENOMEM;
+ goto free_ifp2;
+ }
+ /* most fields are the same, copy from master and then fix */
+ *sna = *mna;
+ sna->up.ifp = ifp2;
+ sna->role = NR_REG_PIPE_SLAVE;
+ error = netmap_attach_common(&sna->up);
+ if (error)
+ goto free_sna;
+
+ /* join the two endpoints */
+ mna->peer = sna;
+ sna->peer = mna;
+
+ /* we already have a reference to the parent, but we
+ * need another one for the other endpoint we created
+ */
+ netmap_adapter_get(pna);
+
+ if (role == NR_REG_PIPE_MASTER) {
+ req = mna;
+ mna->peer_ref = 1;
+ netmap_adapter_get(&sna->up);
+ } else {
+ req = sna;
+ sna->peer_ref = 1;
+ netmap_adapter_get(&mna->up);
+ }
+ ND("created master %p and slave %p", mna, sna);
+found:
+
+ ND("pipe %d %s at %p", pipe_id,
+ (req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req);
+ *na = &req->up;
+ netmap_adapter_get(*na);
+
+ /* write the configuration back */
+ nmr->nr_tx_rings = req->up.num_tx_rings;
+ nmr->nr_rx_rings = req->up.num_rx_rings;
+ nmr->nr_tx_slots = req->up.num_tx_desc;
+ nmr->nr_rx_slots = req->up.num_rx_desc;
+
+ /* keep the reference to the parent.
+ * It will be released by the req destructor
+ */
+
+ return 0;
+
+free_sna:
+ free(sna, M_DEVBUF);
+free_ifp2:
+ free(ifp2, M_DEVBUF);
+free_mna:
+ free(mna, M_DEVBUF);
+free_ifp:
+ free(ifp, M_DEVBUF);
+put_out:
+ netmap_adapter_put(pna);
+ return error;
+}
+
+
+#endif /* WITH_PIPES */
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
index 13a725378c28..34e39126e525 100644
--- a/sys/dev/netmap/netmap_vale.c
+++ b/sys/dev/netmap/netmap_vale.c
@@ -164,21 +164,6 @@ static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
int kern_netmap_regif(struct nmreq *nmr);
/*
- * Each transmit queue accumulates a batch of packets into
- * a structure before forwarding. Packets to the same
- * destination are put in a list using ft_next as a link field.
- * ft_frags and ft_next are valid only on the first fragment.
- */
-struct nm_bdg_fwd { /* forwarding entry for a bridge */
- void *ft_buf; /* netmap or indirect buffer */
- uint8_t ft_frags; /* how many fragments (only on 1st frag) */
- uint8_t _ft_port; /* dst port (unused) */
- uint16_t ft_flags; /* flags, e.g. indirect */
- uint16_t ft_len; /* src fragment len */
- uint16_t ft_next; /* next packet to same destination */
-};
-
-/*
* For each output interface, nm_bdg_q is used to construct a list.
* bq_len is the number of output buffers (we can have coalescing
* during the copy).
@@ -381,7 +366,7 @@ nm_alloc_bdgfwd(struct netmap_adapter *na)
l += sizeof(struct nm_bdg_q) * num_dstq;
l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
- nrings = na->num_tx_rings + 1;
+ nrings = netmap_real_tx_rings(na);
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
struct nm_bdg_fwd *ft;
@@ -421,7 +406,8 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
acquire BDG_WLOCK() and copy back the array.
*/
- D("detach %d and %d (lim %d)", hw, sw, lim);
+ if (netmap_verbose)
+ D("detach %d and %d (lim %d)", hw, sw, lim);
/* make a copy of the list of active ports, update it,
* and then copy back within BDG_WLOCK().
*/
@@ -675,7 +661,7 @@ nm_bdg_attach(struct nmreq *nmr)
goto unref_exit;
}
- nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
+ nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
if (!nifp) {
goto unref_exit;
}
@@ -855,15 +841,23 @@ netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
NMG_UNLOCK();
break;
- case NETMAP_BDG_OFFSET:
+ case NETMAP_BDG_VNET_HDR:
+ /* Valid lengths for the virtio-net header are 0 (no header),
+ 10 and 12. */
+ if (nmr->nr_arg1 != 0 &&
+ nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
+ nmr->nr_arg1 != 12) {
+ error = EINVAL;
+ break;
+ }
NMG_LOCK();
error = netmap_get_bdg_na(nmr, &na, 0);
if (na && !error) {
vpna = (struct netmap_vp_adapter *)na;
- if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET)
- nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET;
- vpna->offset = nmr->nr_arg1;
- D("Using offset %d for %p", vpna->offset, vpna);
+ vpna->virt_hdr_len = nmr->nr_arg1;
+ if (vpna->virt_hdr_len)
+ vpna->mfs = NETMAP_BDG_BUF_SIZE(na->nm_mem);
+ D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
netmap_adapter_put(na);
}
NMG_UNLOCK();
@@ -877,26 +871,20 @@ netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
return error;
}
-
static int
netmap_vp_krings_create(struct netmap_adapter *na)
{
- u_int ntx, nrx, tailroom;
+ u_int tailroom;
int error, i;
uint32_t *leases;
-
- /* XXX vps do not need host rings,
- * but we crash if we don't have one
- */
- ntx = na->num_tx_rings + 1;
- nrx = na->num_rx_rings + 1;
+ u_int nrx = netmap_real_rx_rings(na);
/*
* Leases are attached to RX rings on vale ports
*/
tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
- error = netmap_krings_create(na, ntx, nrx, tailroom);
+ error = netmap_krings_create(na, tailroom);
if (error)
return error;
@@ -1212,16 +1200,16 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
u_int len = ft[i].ft_len;
ND("slot %d frags %d", i, ft[i].ft_frags);
- /* Drop the packet if the offset is not into the first
+ /* Drop the packet if the virtio-net header is not into the first
fragment nor at the very beginning of the second. */
- if (unlikely(na->offset > len))
+ if (unlikely(na->virt_hdr_len > len))
continue;
- if (len == na->offset) {
+ if (len == na->virt_hdr_len) {
buf = ft[i+1].ft_buf;
len = ft[i+1].ft_len;
} else {
- buf += na->offset;
- len -= na->offset;
+ buf += na->virt_hdr_len;
+ len -= na->virt_hdr_len;
}
dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na);
if (netmap_verbose > 255)
@@ -1280,13 +1268,13 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
struct netmap_vp_adapter *dst_na;
struct netmap_kring *kring;
struct netmap_ring *ring;
- u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
+ u_int dst_nr, lim, j, d_i, next, brd_next;
u_int needed, howmany;
int retry = netmap_txsync_retry;
struct nm_bdg_q *d;
uint32_t my_start = 0, lease_idx = 0;
int nrings;
- int offset_mismatch;
+ int virt_hdr_mismatch = 0;
d_i = dsts[i];
ND("second pass %d port %d", i, d_i);
@@ -1311,8 +1299,6 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
goto cleanup;
}
- offset_mismatch = (dst_na->offset != na->offset);
-
/* there is at least one either unicast or broadcast packet */
brd_next = brddst->bq_head;
next = d->bq_head;
@@ -1325,6 +1311,29 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
*/
needed = d->bq_len + brddst->bq_len;
+ if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
+ /* There is a virtio-net header/offloadings mismatch between
+ * source and destination. The slower mismatch datapath will
+ * be used to cope with all the mismatches.
+ */
+ virt_hdr_mismatch = 1;
+ if (dst_na->mfs < na->mfs) {
+ /* We may need to do segmentation offloadings, and so
+ * we may need a number of destination slots greater
+ * than the number of input slots ('needed').
+ * We look for the smallest integer 'x' which satisfies:
+ * needed * na->mfs + x * H <= x * na->mfs
+ * where 'H' is the length of the longest header that may
+ * be replicated in the segmentation process (e.g. for
+ * TCPv4 we must account for ethernet header, IP header
+ * and TCPv4 header).
+ */
+ needed = (needed * na->mfs) /
+ (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
+ ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
+ }
+ }
+
ND(5, "pass 2 dst %d is %x %s",
i, d_i, is_vp ? "virtual" : "nic/host");
dst_nr = d_i & (NM_BDG_MAXRINGS-1);
@@ -1337,6 +1346,10 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
retry:
+ if (dst_na->retry && retry) {
+ /* try to get some free slot from the previous run */
+ dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
+ }
/* reserve the buffers in the queue and an entry
* to report completion, and drop lock.
* XXX this might become a helper function.
@@ -1346,9 +1359,6 @@ retry:
mtx_unlock(&kring->q_lock);
goto cleanup;
}
- if (dst_na->retry) {
- dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
- }
my_start = j = kring->nkr_hwlease;
howmany = nm_kr_space(kring, 1);
if (needed < howmany)
@@ -1365,7 +1375,6 @@ retry:
struct netmap_slot *slot;
struct nm_bdg_fwd *ft_p, *ft_end;
u_int cnt;
- int fix_mismatch = offset_mismatch;
/* find the queue from which we pick next packet.
* NM_FT_NULL is always higher than valid indexes
@@ -1383,58 +1392,43 @@ retry:
cnt = ft_p->ft_frags; // cnt > 0
if (unlikely(cnt > howmany))
break; /* no more space */
- howmany -= cnt;
if (netmap_verbose && cnt > 1)
RD(5, "rx %d frags to %d", cnt, j);
ft_end = ft_p + cnt;
- do {
- char *dst, *src = ft_p->ft_buf;
- size_t copy_len = ft_p->ft_len, dst_len = copy_len;
-
- slot = &ring->slot[j];
- dst = BDG_NMB(&dst_na->up, slot);
-
- if (unlikely(fix_mismatch)) {
- /* We are processing the first fragment
- * and there is a mismatch between source
- * and destination offsets. Create a zeroed
- * header for the destination, independently
- * of the source header length and content.
- */
- src += na->offset;
- copy_len -= na->offset;
- bzero(dst, dst_na->offset);
- dst += dst_na->offset;
- dst_len = dst_na->offset + copy_len;
- /* fix the first fragment only */
- fix_mismatch = 0;
- /* Here it could be copy_len == dst_len == 0,
- * and so a zero length fragment is passed.
- */
- }
-
- ND("send [%d] %d(%d) bytes at %s:%d",
- i, (int)copy_len, (int)dst_len,
- NM_IFPNAME(dst_ifp), j);
- /* round to a multiple of 64 */
- copy_len = (copy_len + 63) & ~63;
-
- if (ft_p->ft_flags & NS_INDIRECT) {
- if (copyin(src, dst, copy_len)) {
- // invalid user pointer, pretend len is 0
- dst_len = 0;
- }
- } else {
- //memcpy(dst, src, copy_len);
- pkt_copy(src, dst, (int)copy_len);
- }
- slot->len = dst_len;
- slot->flags = (cnt << 8)| NS_MOREFRAG;
- j = nm_next(j, lim);
- ft_p++;
- sent++;
- } while (ft_p != ft_end);
- slot->flags = (cnt << 8); /* clear flag on last entry */
+ if (unlikely(virt_hdr_mismatch)) {
+ bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
+ } else {
+ howmany -= cnt;
+ do {
+ char *dst, *src = ft_p->ft_buf;
+ size_t copy_len = ft_p->ft_len, dst_len = copy_len;
+
+ slot = &ring->slot[j];
+ dst = BDG_NMB(&dst_na->up, slot);
+
+ ND("send [%d] %d(%d) bytes at %s:%d",
+ i, (int)copy_len, (int)dst_len,
+ NM_IFPNAME(dst_ifp), j);
+ /* round to a multiple of 64 */
+ copy_len = (copy_len + 63) & ~63;
+
+ if (ft_p->ft_flags & NS_INDIRECT) {
+ if (copyin(src, dst, copy_len)) {
+ // invalid user pointer, pretend len is 0
+ dst_len = 0;
+ }
+ } else {
+ //memcpy(dst, src, copy_len);
+ pkt_copy(src, dst, (int)copy_len);
+ }
+ slot->len = dst_len;
+ slot->flags = (cnt << 8)| NS_MOREFRAG;
+ j = nm_next(j, lim);
+ needed--;
+ ft_p++;
+ } while (ft_p != ft_end);
+ slot->flags = (cnt << 8); /* clear flag on last entry */
+ }
/* are we done ? */
if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
break;
@@ -1484,9 +1478,9 @@ retry:
*/
if (likely(j != my_start)) {
kring->nr_hwtail = j;
- dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
still_locked = 0;
mtx_unlock(&kring->q_lock);
+ dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
if (dst_na->retry && retry--)
goto retry;
}
@@ -1615,6 +1609,7 @@ bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
struct netmap_vp_adapter *vpna;
struct netmap_adapter *na;
int error;
+ u_int npipes = 0;
vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
if (vpna == NULL)
@@ -1636,8 +1631,23 @@ bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
na->num_tx_desc = nmr->nr_tx_slots;
nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1, NM_BDG_MAXSLOTS, NULL);
+ /* validate number of pipes. We want at least 1,
+ * but probably can do with some more.
+ * So let's use 2 as default (when 0 is supplied)
+ */
+ npipes = nmr->nr_arg1;
+ nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
+ nmr->nr_arg1 = npipes; /* write back */
+ /* validate extra bufs */
+ nm_bound_var(&nmr->nr_arg3, 0, 0,
+ 128*NM_BDG_MAXSLOTS, NULL);
na->num_rx_desc = nmr->nr_rx_slots;
- vpna->offset = 0;
+ vpna->virt_hdr_len = 0;
+ vpna->mfs = 1514;
+ /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
+ vpna->mfs = netmap_buf_size; */
+ if (netmap_verbose)
+ D("max frame size %u", vpna->mfs);
na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
na->nm_txsync = bdg_netmap_txsync;
@@ -1648,14 +1658,21 @@ bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
na->nm_krings_delete = netmap_vp_krings_delete;
na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp),
na->num_tx_rings, na->num_tx_desc,
- na->num_rx_rings, na->num_rx_desc);
+ na->num_rx_rings, na->num_rx_desc,
+ nmr->nr_arg3, npipes, &error);
+ if (na->nm_mem == NULL)
+ goto err;
/* other nmd fields are set in the common routine */
error = netmap_attach_common(na);
- if (error) {
- free(vpna, M_DEVBUF);
- return error;
- }
+ if (error)
+ goto err;
return 0;
+
+err:
+ if (na->nm_mem != NULL)
+ netmap_mem_private_delete(na->nm_mem);
+ free(vpna, M_DEVBUF);
+ return error;
}
@@ -1763,19 +1780,17 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx,
ring->cur = kring->rcur;
ring->tail = kring->rtail;
- /* simulate a user wakeup on the rx ring */
if (is_host_ring) {
- netmap_rxsync_from_host(na, NULL, NULL);
vpna = hostna;
ring_nr = 0;
- } else {
- /* fetch packets that have arrived.
- * XXX maybe do this in a loop ?
- */
- error = na->nm_rxsync(na, ring_nr, 0);
- if (error)
- goto put_out;
- }
+ }
+ /* simulate a user wakeup on the rx ring */
+ /* fetch packets that have arrived.
+ * XXX maybe do this in a loop ?
+ */
+ error = kring->nm_sync(kring, 0);
+ if (error)
+ goto put_out;
if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
D("how strange, interrupt with no packets on %s",
NM_IFPNAME(ifp));
@@ -1801,7 +1816,7 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx,
ring->tail = kring->rtail;
/* another call to actually release the buffers */
if (!is_host_ring) {
- error = na->nm_rxsync(na, ring_nr, 0);
+ error = kring->nm_sync(kring, 0);
} else {
/* mark all packets as released, as in the
* second part of netmap_rxsync_from_host()
@@ -1842,11 +1857,11 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
* The original number of rings comes from hwna,
* rx rings on one side equals tx rings on the other.
*/
- for (i = 0; i <= na->num_rx_rings; i++) {
+ for (i = 0; i < na->num_rx_rings + 1; i++) {
hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
hwna->tx_rings[i].ring = na->rx_rings[i].ring;
}
- for (i = 0; i <= na->num_tx_rings; i++) {
+ for (i = 0; i < na->num_tx_rings + 1; i++) {
hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
hwna->rx_rings[i].ring = na->tx_rings[i].ring;
}
@@ -1914,8 +1929,10 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
return error;
}
- hostna->tx_rings = na->tx_rings + na->num_tx_rings;
- hostna->rx_rings = na->rx_rings + na->num_rx_rings;
+ if (na->na_flags & NAF_HOST_RINGS) {
+ hostna->tx_rings = na->tx_rings + na->num_tx_rings;
+ hostna->rx_rings = na->rx_rings + na->num_rx_rings;
+ }
return 0;
}
@@ -1957,6 +1974,7 @@ netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int f
if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
return 0;
+ mtx_lock(&kring->q_lock);
/* first step: simulate a user wakeup on the rx ring */
netmap_vp_rxsync(na, ring_n, flags);
ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
@@ -1972,12 +1990,8 @@ netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int f
*/
/* set tail to what the hw expects */
ring->tail = hw_kring->rtail;
- if (ring_n == na->num_rx_rings) {
- netmap_txsync_to_host(hwna);
- } else {
- nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
- error = hwna->nm_txsync(hwna, ring_n, flags);
- }
+ nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
+ error = hw_kring->nm_sync(hw_kring, flags);
/* fourth step: now we are back the rx ring */
/* claim ownership on all hw owned bufs */
@@ -1991,7 +2005,7 @@ netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int f
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
ring->head, ring->cur, ring->tail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
-
+ mtx_unlock(&kring->q_lock);
return error;
}
@@ -2047,18 +2061,21 @@ netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
bna->hwna = hwna;
netmap_adapter_get(hwna);
hwna->na_private = bna; /* weak reference */
-
- hostna = &bna->host.up;
- hostna->ifp = hwna->ifp;
- hostna->num_tx_rings = 1;
- hostna->num_tx_desc = hwna->num_rx_desc;
- hostna->num_rx_rings = 1;
- hostna->num_rx_desc = hwna->num_tx_desc;
- // hostna->nm_txsync = netmap_bwrap_host_txsync;
- // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
- hostna->nm_notify = netmap_bwrap_host_notify;
- hostna->nm_mem = na->nm_mem;
- hostna->na_private = bna;
+
+ if (hwna->na_flags & NAF_HOST_RINGS) {
+ na->na_flags |= NAF_HOST_RINGS;
+ hostna = &bna->host.up;
+ hostna->ifp = hwna->ifp;
+ hostna->num_tx_rings = 1;
+ hostna->num_tx_desc = hwna->num_rx_desc;
+ hostna->num_rx_rings = 1;
+ hostna->num_rx_desc = hwna->num_tx_desc;
+ // hostna->nm_txsync = netmap_bwrap_host_txsync;
+ // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
+ hostna->nm_notify = netmap_bwrap_host_notify;
+ hostna->nm_mem = na->nm_mem;
+ hostna->na_private = bna;
+ }
ND("%s<->%s txr %d txd %d rxr %d rxd %d",
fake->if_xname, real->if_xname,
diff --git a/sys/modules/netmap/Makefile b/sys/modules/netmap/Makefile
index aea844bde1ce..647cd103600f 100644
--- a/sys/modules/netmap/Makefile
+++ b/sys/modules/netmap/Makefile
@@ -14,5 +14,7 @@ SRCS += netmap_generic.c
SRCS += netmap_mbq.c netmap_mbq.h
SRCS += netmap_vale.c
SRCS += netmap_freebsd.c
+SRCS += netmap_offloadings.c
+SRCS += netmap_pipe.c
.include <bsd.kmod.mk>
diff --git a/sys/net/netmap.h b/sys/net/netmap.h
index a5ee9b55edc9..f0b4c56d4e39 100644
--- a/sys/net/netmap.h
+++ b/sys/net/netmap.h
@@ -39,8 +39,10 @@
#ifndef _NET_NETMAP_H_
#define _NET_NETMAP_H_
-#define NETMAP_API 10 /* current API version */
+#define NETMAP_API 11 /* current API version */
+#define NETMAP_MIN_API 11 /* min and max versions accepted */
+#define NETMAP_MAX_API 15
/*
* Some fields should be cache-aligned to reduce contention.
* The alignment is architecture and OS dependent, but rather than
@@ -73,20 +75,21 @@
+===============+ / | buf_idx, len | slot[1]
| txring_ofs[0] | (rel.to nifp)--' | flags, ptr |
| txring_ofs[1] | +---------------+
- (tx+1+extra_tx entries) (num_slots entries)
+ (tx+1 entries) (num_slots entries)
| txring_ofs[t] | | buf_idx, len | slot[n-1]
+---------------+ | flags, ptr |
| rxring_ofs[0] | +---------------+
| rxring_ofs[1] |
- (rx+1+extra_rx entries)
+ (rx+1 entries)
| rxring_ofs[r] |
+---------------+
- * For each "interface" (NIC, host stack, VALE switch port) attached to a
- * file descriptor, the mmap()ed region contains a (logically readonly)
+ * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to
+ * a file descriptor, the mmap()ed region contains a (logically readonly)
* struct netmap_if pointing to struct netmap_ring's.
+ *
* There is one netmap_ring per physical NIC ring, plus one tx/rx ring
- * pair attached to the host stack (this pair is unused for VALE ports).
+ * pair attached to the host stack (this pair is unused for non-NIC ports).
*
* All physical/host stack ports share the same memory region,
* so that zero-copy can be implemented between them.
@@ -98,7 +101,42 @@
* is provided for user-supplied buffers in the tx path.
*
* In user space, the buffer address is computed as
- * (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE
+ * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE
+ *
+ * Added in NETMAP_API 11:
+ *
+ * + NIOCREGIF can request the allocation of extra spare buffers from
+ * the same memory pool. The desired number of buffers must be in
+ * nr_arg3. The ioctl may return fewer buffers, depending on memory
+ * availability. nr_arg3 will return the actual value, and, once
+ * mapped, nifp->ni_bufs_head will be the index of the first buffer.
+ *
+ * The buffers are linked to each other using the first uint32_t
+ * as the index. On close, ni_bufs_head must point to the list of
+ * buffers to be released.
+ *
+ * + NIOCREGIF can request space for extra rings (and buffers)
+ * allocated in the same memory space. The number of extra rings
+ * is in nr_arg1, and is advisory. This is a no-op on NICs where
+ * the size of the memory space is fixed.
+ *
+ * + NIOCREGIF can attach to PIPE rings sharing the same memory
+ * space with a parent device. The ifname indicates the parent device,
+ * which must already exist. Flags in nr_flags indicate if we want to
+ * bind the master or slave side, the index (from nr_ringid)
+ * is just a cookie and does need to be sequential.
+ *
+ * + NIOCREGIF can also attach to 'monitor' rings that replicate
+ * the content of specific rings, also from the same memory space.
+ *
+ * Extra flags in nr_flags support the above functions.
+ * Application libraries may use the following naming scheme:
+ * netmap:foo all NIC ring pairs
+ * netmap:foo^ only host ring pair
+ * netmap:foo+ all NIC ring + host ring pairs
+ * netmap:foo-k the k-th NIC ring pair
+ * netmap:foo{k PIPE ring pair k, master side
+ * netmap:foo}k PIPE ring pair k, slave side
*/
/*
@@ -284,8 +322,8 @@ struct netmap_if {
const uint32_t ni_tx_rings; /* number of HW tx rings */
const uint32_t ni_rx_rings; /* number of HW rx rings */
- const uint32_t ni_extra_tx_rings;
- const uint32_t ni_extra_rx_rings;
+ uint32_t ni_bufs_head; /* head index for extra bufs */
+ uint32_t ni_spare1[5];
/*
* The following array contains the offset of each netmap ring
* from this structure, in the following order:
@@ -321,6 +359,7 @@ struct netmap_if {
*
* The actual argument (struct nmreq) has a number of options to request
* different functions.
+ * The following are used in NIOCREGIF when nr_cmd == 0:
*
* nr_name (in)
* The name of the port (em0, valeXXX:YYY, etc.)
@@ -337,6 +376,13 @@ struct netmap_if {
*
* nr_ringid (in)
* Indicates how rings should be bound to the file descriptors.
+ * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK)
+ * are used to indicate the ring number, and nr_flags specifies
+ * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected.
+ *
+ * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED:
+ * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control
+ * the binding as follows:
* 0 (default) binds all physical rings
* NETMAP_HW_RING | ring number binds a single ring pair
* NETMAP_SW_RING binds only the host tx/rx rings
@@ -345,8 +391,41 @@ struct netmap_if {
* packets on tx rings only if POLLOUT is set.
* The default is to push any pending packet.
*
- * NETMAP_PRIV_MEM is set on return for ports that use private
- * memory regions and cannot use buffer swapping.
+ * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release
+ * packets on rx rings also when POLLIN is NOT set.
+ * The default is to touch the rx ring only with POLLIN.
+ * Note that this is the opposite of TX because it
+ * reflects the common usage.
+ *
+ * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead.
+ * NETMAP_PRIV_MEM is set on return for ports that do not use
+ * the global memory allocator.
+ * This information is not significant and applications
+ * should look at the region id in nr_arg2
+ *
+ * nr_flags is the recommended mode to indicate which rings should
+ * be bound to a file descriptor. Values are NR_REG_*
+ *
+ * nr_arg1 (in) The number of extra rings to be reserved.
+ * Especially when allocating a VALE port the system only
+ * allocates the amount of memory needed for the port.
+ * If more shared memory rings are desired (e.g. for pipes),
+ * the first invocation for the same basename/allocator
+ * should specify a suitable number. Memory cannot be
+ * extended after the first allocation without closing
+ * all ports on the same region.
+ *
+ * nr_arg2 (in/out) The identity of the memory region used.
+ * On input, 0 means the system decides autonomously,
+ * other values may try to select a specific region.
+ * On return the actual value is reported.
+ * Region '1' is the global allocator, normally shared
+ * by all interfaces. Other values are private regions.
+ * If two ports the same region zero-copy is possible.
+ *
+ * nr_arg3 (in/out) number of extra buffers to be allocated.
+ *
+ *
*
* nr_cmd (in) if non-zero indicates a special command:
* NETMAP_BDG_ATTACH and nr_name = vale*:ifname
@@ -362,17 +441,33 @@ struct netmap_if {
* NETMAP_BDG_LIST
* list the configuration of VALE switches.
*
- * NETMAP_BDG_OFFSET XXX ?
- * Set the offset of data in packets. Used with VALE
- * switches where the clients use the vhost header.
+ * NETMAP_BDG_VNET_HDR
+ * Set the virtio-net header length used by the client
+ * of a VALE switch port.
+ *
+ * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific
*
- * nr_arg1, nr_arg2 (in/out) command specific
+ *
*
*/
/*
- * struct nmreq overlays a struct ifreq
+ * struct nmreq overlays a struct ifreq (just the name)
+ *
+ * On input, nr_ringid indicates which rings we are requesting,
+ * with the low flags for the specific ring number.
+ * selection FLAGS RING INDEX
+ *
+ * all the NIC rings 0x0000 -
+ * only HOST ring 0x2000 ring index
+ * single NIC ring 0x4000 -
+ * all the NIC+HOST rings 0x6000 -
+ * one pipe ring, master 0x8000 ring index
+ * *** INVALID 0xA000
+ * one pipe ring, slave 0xC000 ring index
+ * *** INVALID 0xE000
+ *
*/
struct nmreq {
char nr_name[IFNAMSIZ];
@@ -383,27 +478,47 @@ struct nmreq {
uint32_t nr_rx_slots; /* slots in rx rings */
uint16_t nr_tx_rings; /* number of tx rings */
uint16_t nr_rx_rings; /* number of rx rings */
+
uint16_t nr_ringid; /* ring(s) we care about */
-#define NETMAP_PRIV_MEM 0x8000 /* rings use private memory */
-#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */
-#define NETMAP_SW_RING 0x2000 /* process the sw ring */
+#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */
+#define NETMAP_SW_RING 0x2000 /* only host ring pair */
+
+#define NETMAP_RING_MASK 0x0fff /* the ring number */
+
#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
-#define NETMAP_RING_MASK 0xfff /* the ring number */
+
+#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */
uint16_t nr_cmd;
#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
#define NETMAP_BDG_DETACH 2 /* detach the NIC */
#define NETMAP_BDG_LOOKUP_REG 3 /* register lookup function */
#define NETMAP_BDG_LIST 4 /* get bridge's info */
-#define NETMAP_BDG_OFFSET 5 /* set the port offset */
+#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */
+#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */
- uint16_t nr_arg1;
+ uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */
#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
-#define NETMAP_BDG_MAX_OFFSET 12
uint16_t nr_arg2;
- uint32_t spare2[3];
+ uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */
+ uint32_t nr_flags;
+ /* various modes, extends nr_ringid */
+ uint32_t spare2[1];
+};
+
+#define NR_REG_MASK 0xf /* values for nr_flags */
+enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
+ NR_REG_ALL_NIC = 1,
+ NR_REG_SW = 2,
+ NR_REG_NIC_SW = 3,
+ NR_REG_ONE_NIC = 4,
+ NR_REG_PIPE_MASTER = 5,
+ NR_REG_PIPE_SLAVE = 6,
};
+/* monitor uses the NR_REG to select the rings to monitor */
+#define NR_MONITOR_TX 0x100
+#define NR_MONITOR_RX 0x200
/*
diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h
index 1bb337cf0ef7..9c3a4c1e5949 100644
--- a/sys/net/netmap_user.h
+++ b/sys/net/netmap_user.h
@@ -66,6 +66,7 @@
#define _NET_NETMAP_USER_H_
#include <stdint.h>
+#include <sys/socket.h> /* apple needs sockaddr */
#include <net/if.h> /* IFNAMSIZ */
#ifndef likely
@@ -104,12 +105,12 @@ nm_ring_next(struct netmap_ring *r, uint32_t i)
/*
* Return 1 if we have pending transmissions in the tx ring.
- * When everything is complete ring->cur = ring->tail + 1 (modulo ring size)
+ * When everything is complete ring->head = ring->tail + 1 (modulo ring size)
*/
static inline int
nm_tx_pending(struct netmap_ring *r)
{
- return nm_ring_next(r, r->tail) != r->cur;
+ return nm_ring_next(r, r->tail) != r->head;
}
@@ -142,13 +143,41 @@ nm_ring_space(struct netmap_ring *ring)
#include <signal.h>
#include <stdlib.h>
-struct nm_hdr_t { /* same as pcap_pkthdr */
+#ifndef ND /* debug macros */
+/* debug support */
+#define ND(_fmt, ...) do {} while(0)
+#define D(_fmt, ...) \
+ do { \
+ struct timeval t0; \
+ gettimeofday(&t0, NULL); \
+ fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \
+ (int)(t0.tv_sec % 1000), (int)t0.tv_usec, \
+ __FUNCTION__, __LINE__, ##__VA_ARGS__); \
+ } while (0)
+
+/* Rate limited version of "D", lps indicates how many per second */
+#define RD(lps, format, ...) \
+ do { \
+ static int t0, __cnt; \
+ struct timeval __xxts; \
+ gettimeofday(&__xxts, NULL); \
+ if (t0 != __xxts.tv_sec) { \
+ t0 = __xxts.tv_sec; \
+ __cnt = 0; \
+ } \
+ if (__cnt++ < lps) { \
+ D(format, ##__VA_ARGS__); \
+ } \
+ } while (0)
+#endif
+
+struct nm_pkthdr { /* same as pcap_pkthdr */
struct timeval ts;
uint32_t caplen;
uint32_t len;
};
-struct nm_stat_t { // pcap_stat
+struct nm_stat { /* same as pcap_stat */
u_int ps_recv;
u_int ps_drop;
u_int ps_ifdrop;
@@ -159,19 +188,29 @@ struct nm_stat_t { // pcap_stat
#define NM_ERRBUF_SIZE 512
-struct nm_desc_t {
- struct nm_desc_t *self;
+struct nm_desc {
+ struct nm_desc *self; /* point to self if netmap. */
int fd;
void *mem;
int memsize;
- struct netmap_if *nifp;
+ int done_mmap; /* set if mem is the result of mmap */
+ struct netmap_if * const nifp;
uint16_t first_tx_ring, last_tx_ring, cur_tx_ring;
uint16_t first_rx_ring, last_rx_ring, cur_rx_ring;
struct nmreq req; /* also contains the nr_name = ifname */
- struct nm_hdr_t hdr;
-
- struct netmap_ring *tx, *rx; /* shortcuts to base hw/sw rings */
+ struct nm_pkthdr hdr;
+ /*
+ * The memory contains netmap_if, rings and then buffers.
+ * Given a pointer (e.g. to nm_inject) we can compare with
+ * mem/buf_start/buf_end to tell if it is a buffer or
+ * some other descriptor in our region.
+ * We also store a pointer to some ring as it helps in the
+ * translation from buffer indexes to addresses.
+ */
+ struct netmap_ring * const some_ring;
+ void * const buf_start;
+ void * const buf_end;
/* parameters from pcap_open_live */
int snaplen;
int promisc;
@@ -183,7 +222,7 @@ struct nm_desc_t {
uint32_t if_reqcap;
uint32_t if_curcap;
- struct nm_stat_t st;
+ struct nm_stat st;
char msg[NM_ERRBUF_SIZE];
};
@@ -191,8 +230,8 @@ struct nm_desc_t {
* when the descriptor is open correctly, d->self == d
* Eventually we should also use some magic number.
*/
-#define P2NMD(p) ((struct nm_desc_t *)(p))
-#define IS_NETMAP_DESC(d) (P2NMD(d)->self == P2NMD(d))
+#define P2NMD(p) ((struct nm_desc *)(p))
+#define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d))
#define NETMAP_FD(d) (P2NMD(d)->fd)
@@ -205,7 +244,7 @@ struct nm_desc_t {
* XXX only for multiples of 64 bytes, non overlapped.
*/
static inline void
-pkt_copy(const void *_src, void *_dst, int l)
+nm_pkt_copy(const void *_src, void *_dst, int l)
{
const uint64_t *src = (const uint64_t *)_src;
uint64_t *dst = (uint64_t *)_dst;
@@ -230,7 +269,7 @@ pkt_copy(const void *_src, void *_dst, int l)
/*
* The callback, invoked on each received packet. Same as libpcap
*/
-typedef void (*nm_cb_t)(u_char *, const struct nm_hdr_t *, const u_char *d);
+typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
/*
*--- the pcap-like API ---
@@ -238,21 +277,49 @@ typedef void (*nm_cb_t)(u_char *, const struct nm_hdr_t *, const u_char *d);
* nm_open() opens a file descriptor, binds to a port and maps memory.
*
* ifname (netmap:foo or vale:foo) is the port name
- * flags can be NETMAP_SW_RING or NETMAP_HW_RING etc.
- * ring_no only used if NETMAP_HW_RING is specified, is interpreted
- * as a string or integer indicating the ring number
- * ring_flags is stored in all ring flags (e.g. for transparent mode)
- * to open. If successful, t opens the fd and maps the memory.
+ * a suffix can indicate the follwing:
+ * ^ bind the host (sw) ring pair
+ * * bind host and NIC ring pairs (transparent)
+ * -NN bind individual NIC ring pair
+ * {NN bind master side of pipe NN
+ * }NN bind slave side of pipe NN
+ *
+ * req provides the initial values of nmreq before parsing ifname.
+ * Remember that the ifname parsing will override the ring
+ * number in nm_ringid, and part of nm_flags;
+ * flags special functions, normally 0
+ * indicates which fields of *arg are significant
+ * arg special functions, normally NULL
+ * if passed a netmap_desc with mem != NULL,
+ * use that memory instead of mmap.
*/
-static struct nm_desc_t *nm_open(const char *ifname,
- const char *ring_no, int flags, int ring_flags);
+static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req,
+ uint64_t flags, const struct nm_desc *arg);
+
+/*
+ * nm_open can import some fields from the parent descriptor.
+ * These flags control which ones.
+ * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL,
+ * which set the initial value for these flags.
+ * Note that the 16 low bits of the flags are reserved for data
+ * that may go into the nmreq.
+ */
+enum {
+ NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */
+ NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */
+ NM_OPEN_ARG1 = 0x100000,
+ NM_OPEN_ARG2 = 0x200000,
+ NM_OPEN_ARG3 = 0x400000,
+ NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */
+};
+
/*
* nm_close() closes and restores the port to its previous state
*/
-static int nm_close(struct nm_desc_t *);
+static int nm_close(struct nm_desc *);
/*
* nm_inject() is the same as pcap_inject()
@@ -260,111 +327,226 @@ static int nm_close(struct nm_desc_t *);
* nm_nextpkt() is the same as pcap_next()
*/
-static int nm_inject(struct nm_desc_t *, const void *, size_t);
-static int nm_dispatch(struct nm_desc_t *, int, nm_cb_t, u_char *);
-static u_char *nm_nextpkt(struct nm_desc_t *, struct nm_hdr_t *);
+static int nm_inject(struct nm_desc *, const void *, size_t);
+static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *);
+static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *);
/*
* Try to open, return descriptor if successful, NULL otherwise.
* An invalid netmap name will return errno = 0;
+ * You can pass a pointer to a pre-filled nm_desc to add special
+ * parameters. Flags is used as follows
+ * NM_OPEN_NO_MMAP use the memory from arg, only
+ * if the nr_arg2 (memory block) matches.
+ * NM_OPEN_ARG1 use req.nr_arg1 from arg
+ * NM_OPEN_ARG2 use req.nr_arg2 from arg
+ * NM_OPEN_RING_CFG user ring config from arg
*/
-static struct nm_desc_t *
-nm_open(const char *ifname, const char *ring_name, int flags, int ring_flags)
+static struct nm_desc *
+nm_open(const char *ifname, const struct nmreq *req,
+ uint64_t new_flags, const struct nm_desc *arg)
{
- struct nm_desc_t *d;
- u_int n, namelen;
- char *port = NULL;
+ struct nm_desc *d = NULL;
+ const struct nm_desc *parent = arg;
+ u_int namelen;
+ uint32_t nr_ringid = 0, nr_flags;
+ const char *port = NULL;
+ const char *errmsg = NULL;
if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
- errno = 0; /* name not recognised */
+ errno = 0; /* name not recognised, not an error */
return NULL;
}
if (ifname[0] == 'n')
ifname += 7;
- port = strchr(ifname, '-');
- if (!port) {
- namelen = strlen(ifname);
- } else {
- namelen = port - ifname;
- flags &= ~(NETMAP_SW_RING | NETMAP_HW_RING | NETMAP_RING_MASK);
- if (port[1] == 's')
- flags |= NETMAP_SW_RING;
- else
- ring_name = port;
+ /* scan for a separator */
+ for (port = ifname; *port && !index("-*^{}", *port); port++)
+ ;
+ namelen = port - ifname;
+ if (namelen >= sizeof(d->req.nr_name)) {
+ errmsg = "name too long";
+ goto fail;
}
- if (namelen >= sizeof(d->req.nr_name))
- namelen = sizeof(d->req.nr_name) - 1;
+ switch (*port) {
+ default: /* '\0', no suffix */
+ nr_flags = NR_REG_ALL_NIC;
+ break;
+ case '-': /* one NIC */
+ nr_flags = NR_REG_ONE_NIC;
+ nr_ringid = atoi(port + 1);
+ break;
+ case '*': /* NIC and SW, ignore port */
+ nr_flags = NR_REG_NIC_SW;
+ if (port[1]) {
+ errmsg = "invalid port for nic+sw";
+ goto fail;
+ }
+ break;
+ case '^': /* only sw ring */
+ nr_flags = NR_REG_SW;
+ if (port[1]) {
+ errmsg = "invalid port for sw ring";
+ goto fail;
+ }
+ break;
+ case '{':
+ nr_flags = NR_REG_PIPE_MASTER;
+ nr_ringid = atoi(port + 1);
+ break;
+ case '}':
+ nr_flags = NR_REG_PIPE_SLAVE;
+ nr_ringid = atoi(port + 1);
+ break;
+ }
+
+ if (nr_ringid >= NETMAP_RING_MASK) {
+ errmsg = "invalid ringid";
+ goto fail;
+ }
+ /* add the *XPOLL flags */
+ nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL);
- d = (struct nm_desc_t *)calloc(1, sizeof(*d));
+ d = (struct nm_desc *)calloc(1, sizeof(*d));
if (d == NULL) {
+ errmsg = "nm_desc alloc failure";
errno = ENOMEM;
return NULL;
}
d->self = d; /* set this early so nm_close() works */
d->fd = open("/dev/netmap", O_RDWR);
- if (d->fd < 0)
+ if (d->fd < 0) {
+ errmsg = "cannot open /dev/netmap";
goto fail;
-
- if (flags & NETMAP_SW_RING) {
- d->req.nr_ringid = NETMAP_SW_RING;
- } else {
- u_int r;
- if (flags & NETMAP_HW_RING) /* interpret ring as int */
- r = (uintptr_t)ring_name;
- else /* interpret ring as numeric string */
- r = ring_name ? atoi(ring_name) : ~0;
- r = (r < NETMAP_RING_MASK) ? (r | NETMAP_HW_RING) : 0;
- d->req.nr_ringid = r; /* set the ring */
}
- d->req.nr_ringid |= (flags & ~NETMAP_RING_MASK);
+
+ if (req)
+ d->req = *req;
d->req.nr_version = NETMAP_API;
+ d->req.nr_ringid &= ~NETMAP_RING_MASK;
+
+ /* these fields are overridden by ifname and flags processing */
+ d->req.nr_ringid |= nr_ringid;
+ d->req.nr_flags = nr_flags;
memcpy(d->req.nr_name, ifname, namelen);
d->req.nr_name[namelen] = '\0';
+ /* optionally import info from parent */
+ if (IS_NETMAP_DESC(parent) && new_flags) {
+ if (new_flags & NM_OPEN_ARG1)
+ D("overriding ARG1 %d", parent->req.nr_arg1);
+ d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ?
+ parent->req.nr_arg1 : 4;
+ if (new_flags & NM_OPEN_ARG2)
+ D("overriding ARG2 %d", parent->req.nr_arg2);
+ d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ?
+ parent->req.nr_arg2 : 0;
+ if (new_flags & NM_OPEN_ARG3)
+ D("overriding ARG3 %d", parent->req.nr_arg3);
+ d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ?
+ parent->req.nr_arg3 : 0;
+ if (new_flags & NM_OPEN_RING_CFG) {
+ D("overriding RING_CFG");
+ d->req.nr_tx_slots = parent->req.nr_tx_slots;
+ d->req.nr_rx_slots = parent->req.nr_rx_slots;
+ d->req.nr_tx_rings = parent->req.nr_tx_rings;
+ d->req.nr_rx_rings = parent->req.nr_rx_rings;
+ }
+ if (new_flags & NM_OPEN_IFNAME) {
+ D("overriding ifname %s ringid 0x%x flags 0x%x",
+ parent->req.nr_name, parent->req.nr_ringid,
+ parent->req.nr_flags);
+ memcpy(d->req.nr_name, parent->req.nr_name,
+ sizeof(d->req.nr_name));
+ d->req.nr_ringid = parent->req.nr_ringid;
+ d->req.nr_flags = parent->req.nr_flags;
+ }
+ }
if (ioctl(d->fd, NIOCREGIF, &d->req)) {
+ errmsg = "NIOCREGIF failed";
goto fail;
}
- d->memsize = d->req.nr_memsize;
- d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
- d->fd, 0);
- if (d->mem == NULL)
- goto fail;
- d->nifp = NETMAP_IF(d->mem, d->req.nr_offset);
- if (d->req.nr_ringid & NETMAP_SW_RING) {
+ if (IS_NETMAP_DESC(parent) && parent->mem &&
+ parent->req.nr_arg2 == d->req.nr_arg2) {
+ /* do not mmap, inherit from parent */
+ d->memsize = parent->memsize;
+ d->mem = parent->mem;
+ } else {
+ d->memsize = d->req.nr_memsize;
+ d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ d->fd, 0);
+ if (d->mem == NULL) {
+ errmsg = "mmap failed";
+ goto fail;
+ }
+ d->done_mmap = 1;
+ }
+ {
+ struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
+ struct netmap_ring *r = NETMAP_RXRING(nifp, );
+
+ *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
+ *(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
+ *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
+ *(void **)(uintptr_t)&d->buf_end =
+ (char *)d->mem + d->memsize;
+ }
+
+ if (nr_flags == NR_REG_SW) { /* host stack */
d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
- } else if (d->req.nr_ringid & NETMAP_HW_RING) {
- /* XXX check validity */
- d->first_tx_ring = d->last_tx_ring =
- d->first_rx_ring = d->last_rx_ring =
- d->req.nr_ringid & NETMAP_RING_MASK;
- } else {
- d->first_tx_ring = d->last_rx_ring = 0;
+ } else if (nr_flags == NR_REG_ALL_NIC) { /* only nic */
+ d->first_tx_ring = 0;
+ d->first_rx_ring = 0;
d->last_tx_ring = d->req.nr_tx_rings - 1;
d->last_rx_ring = d->req.nr_rx_rings - 1;
+ } else if (nr_flags == NR_REG_NIC_SW) {
+ d->first_tx_ring = 0;
+ d->first_rx_ring = 0;
+ d->last_tx_ring = d->req.nr_tx_rings;
+ d->last_rx_ring = d->req.nr_rx_rings;
+ } else if (nr_flags == NR_REG_ONE_NIC) {
+ /* XXX check validity */
+ d->first_tx_ring = d->last_tx_ring =
+ d->first_rx_ring = d->last_rx_ring = nr_ringid;
+ } else { /* pipes */
+ d->first_tx_ring = d->last_tx_ring = 0;
+ d->first_rx_ring = d->last_rx_ring = 0;
}
- d->tx = NETMAP_TXRING(d->nifp, 0);
- d->rx = NETMAP_RXRING(d->nifp, 0);
- d->cur_tx_ring = d->first_tx_ring;
- d->cur_rx_ring = d->first_rx_ring;
- for (n = d->first_tx_ring; n <= d->last_tx_ring; n++) {
- d->tx[n].flags |= ring_flags;
+
+#ifdef DEBUG_NETMAP_USER
+ { /* debugging code */
+ int i;
+
+ D("%s tx %d .. %d %d rx %d .. %d %d", ifname,
+ d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings,
+ d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings);
+ for (i = 0; i <= d->req.nr_tx_rings; i++) {
+ struct netmap_ring *r = NETMAP_TXRING(d->nifp, i);
+ D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
}
- for (n = d->first_rx_ring; n <= d->last_rx_ring; n++) {
- d->rx[n].flags |= ring_flags;
+ for (i = 0; i <= d->req.nr_rx_rings; i++) {
+ struct netmap_ring *r = NETMAP_RXRING(d->nifp, i);
+ D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
}
+ }
+#endif /* debugging */
+
+ d->cur_tx_ring = d->first_tx_ring;
+ d->cur_rx_ring = d->first_rx_ring;
return d;
fail:
nm_close(d);
+ if (errmsg)
+ D("%s %s", errmsg, ifname);
errno = EINVAL;
return NULL;
}
static int
-nm_close(struct nm_desc_t *d)
+nm_close(struct nm_desc *d)
{
/*
* ugly trick to avoid unused warnings
@@ -375,7 +557,7 @@ nm_close(struct nm_desc_t *d)
if (d == NULL || d->self != d)
return EINVAL;
- if (d->mem)
+ if (d->done_mmap && d->mem)
munmap(d->mem, d->memsize);
if (d->fd != -1)
close(d->fd);
@@ -389,7 +571,7 @@ nm_close(struct nm_desc_t *d)
* Same prototype as pcap_inject(), only need to cast.
*/
static int
-nm_inject(struct nm_desc_t *d, const void *buf, size_t size)
+nm_inject(struct nm_desc *d, const void *buf, size_t size)
{
u_int c, n = d->last_tx_ring - d->first_tx_ring + 1;
@@ -408,7 +590,7 @@ nm_inject(struct nm_desc_t *d, const void *buf, size_t size)
i = ring->cur;
idx = ring->slot[i].buf_idx;
ring->slot[i].len = size;
- pkt_copy(buf, NETMAP_BUF(ring, idx), size);
+ nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size);
d->cur_tx_ring = ri;
ring->head = ring->cur = nm_ring_next(ring, i);
return size;
@@ -421,7 +603,7 @@ nm_inject(struct nm_desc_t *d, const void *buf, size_t size)
* Same prototype as pcap_dispatch(), only need to cast.
*/
static int
-nm_dispatch(struct nm_desc_t *d, int cnt, nm_cb_t cb, u_char *arg)
+nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
{
int n = d->last_rx_ring - d->first_rx_ring + 1;
int c, got = 0, ri = d->cur_rx_ring;
@@ -457,7 +639,7 @@ nm_dispatch(struct nm_desc_t *d, int cnt, nm_cb_t cb, u_char *arg)
}
static u_char *
-nm_nextpkt(struct nm_desc_t *d, struct nm_hdr_t *hdr)
+nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr)
{
int ri = d->cur_rx_ring;
diff --git a/tools/tools/netmap/Makefile b/tools/tools/netmap/Makefile
index e873389c7179..c50247366b5a 100644
--- a/tools/tools/netmap/Makefile
+++ b/tools/tools/netmap/Makefile
@@ -3,11 +3,11 @@
#
# For multiple programs using a single source file each,
# we can just define 'progs' and create custom targets.
-PROGS = pkt-gen bridge vale-ctl testpcap libnetmap.so
+PROGS = pkt-gen bridge vale-ctl
-CLEANFILES = $(PROGS) pcap.o nm_util.o
+CLEANFILES = $(PROGS) *.o
NO_MAN=
-CFLAGS += -Werror -Wall -nostdinc -I/usr/include -I../../../sys
+CFLAGS += -Werror -Wall # -nostdinc -I/usr/include -I../../../sys
CFLAGS += -Wextra
LDFLAGS += -lpthread
@@ -22,12 +22,11 @@ LDFLAGS += -lpcap
all: $(PROGS)
-pkt-gen bridge: nm_util.o
- $(CC) $(CFLAGS) -o ${.TARGET} ${.TARGET:=.c} nm_util.o $(LDFLAGS)
+pkt-gen: pkt-gen.o
+ $(CC) $(CFLAGS) -o pkt-gen pkt-gen.o $(LDFLAGS)
-testpcap: pcap.c libnetmap.so
- $(CC) $(CFLAGS) -DTEST -L. -lnetmap -o ${.TARGET} pcap.c
-
-libnetmap.so: pcap.c nm_util.c
- $(CC) $(CFLAGS) -fpic -c ${.ALLSRC}
- $(CC) -shared -o ${.TARGET} ${.ALLSRC:.c=.o}
+bridge: bridge.o
+ $(CC) $(CFLAGS) -o bridge bridge.o
+
+vale-ctl: vale-ctl.o
+ $(CC) $(CFLAGS) -o vale-ctl vale-ctl.o
diff --git a/tools/tools/netmap/README b/tools/tools/netmap/README
index 2bde6f2ab4d8..40378e62bbe6 100644
--- a/tools/tools/netmap/README
+++ b/tools/tools/netmap/README
@@ -6,19 +6,4 @@ This directory contains examples that use netmap
bridge a two-port jumper wire, also using the native API
- testpcap a jumper wire using libnetmap (or libpcap)
-
- click* various click examples
-
-------------------------------------------------------------
-Some performance data as of may 2012 for applications using libpcap.
-Throughput is generally in Mpps computed with the 64-byte frames,
-using 1 core on a 2.9GHz CPU and 10Gbit/s interface
-
-Libpcap version -- Application ---------------------
-BSD netmap
----------------------------------------------------
- 0.77 3.82 ports/trafshow (version 5)
- 0.94 7.7 net-mgmt/ipcad (ip accounting daemon)
- 0.9 5.0 net-mgmt/darkstat (ip accounting + graphing)
- 0.83 2.45 net-mgmt/iftop (curses traffic display)
+ vale-ctl the program to control VALE bridges
diff --git a/tools/tools/netmap/bridge.c b/tools/tools/netmap/bridge.c
index cab545bfc919..0895d4ede676 100644
--- a/tools/tools/netmap/bridge.c
+++ b/tools/tools/netmap/bridge.c
@@ -9,14 +9,15 @@
* $FreeBSD$
*/
-#include "nm_util.h"
-
+#include <stdio.h>
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+#include <sys/poll.h>
int verbose = 0;
-char *version = "$Id$";
-
static int do_abort = 0;
+static int zerocopy = 1; /* enable zerocopy if possible */
static void
sigint_h(int sig)
@@ -28,6 +29,26 @@ sigint_h(int sig)
/*
+ * how many packets on this set of queues ?
+ */
+int
+pkt_queued(struct nm_desc *d, int tx)
+{
+ u_int i, tot = 0;
+
+ if (tx) {
+ for (i = d->first_tx_ring; i <= d->last_tx_ring; i++) {
+ tot += nm_ring_space(NETMAP_TXRING(d->nifp, i));
+ }
+ } else {
+ for (i = d->first_rx_ring; i <= d->last_rx_ring; i++) {
+ tot += nm_ring_space(NETMAP_RXRING(d->nifp, i));
+ }
+ }
+ return tot;
+}
+
+/*
* move up to 'limit' pkts from rxring to txring swapping buffers.
*/
static int
@@ -52,12 +73,6 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
while (limit-- > 0) {
struct netmap_slot *rs = &rxring->slot[j];
struct netmap_slot *ts = &txring->slot[k];
-#ifdef NO_SWAP
- char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx);
- char *txbuf = NETMAP_BUF(txring, ts->buf_idx);
-#else
- uint32_t pkt;
-#endif
/* swap packets */
if (ts->buf_idx < 2 || rs->buf_idx < 2) {
@@ -65,24 +80,26 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
j, rs->buf_idx, k, ts->buf_idx);
sleep(2);
}
-#ifndef NO_SWAP
- pkt = ts->buf_idx;
- ts->buf_idx = rs->buf_idx;
- rs->buf_idx = pkt;
-#endif
/* copy the packet length. */
- if (rs->len < 14 || rs->len > 2048)
+ if (rs->len > 2048) {
D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k);
- else if (verbose > 1)
+ rs->len = 0;
+ } else if (verbose > 1) {
D("%s send len %d rx[%d] -> tx[%d]", msg, rs->len, j, k);
+ }
ts->len = rs->len;
-#ifdef NO_SWAP
- pkt_copy(rxbuf, txbuf, ts->len);
-#else
- /* report the buffer change. */
- ts->flags |= NS_BUF_CHANGED;
- rs->flags |= NS_BUF_CHANGED;
-#endif /* NO_SWAP */
+ if (zerocopy) {
+ uint32_t pkt = ts->buf_idx;
+ ts->buf_idx = rs->buf_idx;
+ rs->buf_idx = pkt;
+ /* report the buffer change. */
+ ts->flags |= NS_BUF_CHANGED;
+ rs->flags |= NS_BUF_CHANGED;
+ } else {
+ char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx);
+ char *txbuf = NETMAP_BUF(txring, ts->buf_idx);
+ nm_pkt_copy(rxbuf, txbuf, ts->len);
+ }
j = nm_ring_next(rxring, j);
k = nm_ring_next(txring, k);
}
@@ -96,7 +113,7 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
/* move packts from src to destination */
static int
-move(struct nm_desc_t *src, struct nm_desc_t *dst, u_int limit)
+move(struct nm_desc *src, struct nm_desc *dst, u_int limit)
{
struct netmap_ring *txring, *rxring;
u_int m = 0, si = src->first_rx_ring, di = dst->first_tx_ring;
@@ -104,8 +121,8 @@ move(struct nm_desc_t *src, struct nm_desc_t *dst, u_int limit)
"host->net" : "net->host";
while (si <= src->last_rx_ring && di <= dst->last_tx_ring) {
- rxring = src->tx + si;
- txring = dst->tx + di;
+ rxring = NETMAP_RXRING(src->nifp, si);
+ txring = NETMAP_TXRING(dst->nifp, di);
ND("txring %p rxring %p", txring, rxring);
if (nm_ring_empty(rxring)) {
si++;
@@ -141,15 +158,16 @@ int
main(int argc, char **argv)
{
struct pollfd pollfd[2];
- int i, ch;
+ int ch;
u_int burst = 1024, wait_link = 4;
- struct nm_desc_t *pa = NULL, *pb = NULL;
+ struct nm_desc *pa = NULL, *pb = NULL;
char *ifa = NULL, *ifb = NULL;
+ char ifabuf[64] = { 0 };
- fprintf(stderr, "%s %s built %s %s\n",
- argv[0], version, __DATE__, __TIME__);
+ fprintf(stderr, "%s built %s %s\n",
+ argv[0], __DATE__, __TIME__);
- while ( (ch = getopt(argc, argv, "b:i:vw:")) != -1) {
+ while ( (ch = getopt(argc, argv, "b:ci:vw:")) != -1) {
switch (ch) {
default:
D("bad option %c %s", ch, optarg);
@@ -167,6 +185,9 @@ main(int argc, char **argv)
D("%s ignored, already have 2 interfaces",
optarg);
break;
+ case 'c':
+ zerocopy = 0; /* do not zerocopy */
+ break;
case 'v':
verbose++;
break;
@@ -202,20 +223,25 @@ main(int argc, char **argv)
}
if (!strcmp(ifa, ifb)) {
D("same interface, endpoint 0 goes to host");
- i = NETMAP_SW_RING;
+ snprintf(ifabuf, sizeof(ifabuf) - 1, "%s^", ifa);
+ ifa = ifabuf;
} else {
/* two different interfaces. Take all rings on if1 */
- i = 0; // all hw rings
}
- pa = netmap_open(ifa, i, 1);
- if (pa == NULL)
+ pa = nm_open(ifa, NULL, 0, NULL);
+ if (pa == NULL) {
+ D("cannot open %s", ifa);
return (1);
+ }
// XXX use a single mmap ?
- pb = netmap_open(ifb, 0, 1);
+ pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, pa);
if (pb == NULL) {
+ D("cannot open %s", ifb);
nm_close(pa);
return (1);
}
+ zerocopy = zerocopy && (pa->mem == pb->mem);
+ D("------- zerocopy %ssupported", zerocopy ? "" : "NOT ");
/* setup poll(2) variables. */
memset(pollfd, 0, sizeof(pollfd));
@@ -252,23 +278,25 @@ main(int argc, char **argv)
pollfd[0].events,
pollfd[0].revents,
pkt_queued(pa, 0),
- pa->rx->cur,
+ NETMAP_RXRING(pa->nifp, pa->cur_rx_ring)->cur,
pkt_queued(pa, 1),
pollfd[1].events,
pollfd[1].revents,
pkt_queued(pb, 0),
- pb->rx->cur,
+ NETMAP_RXRING(pb->nifp, pb->cur_rx_ring)->cur,
pkt_queued(pb, 1)
);
if (ret < 0)
continue;
if (pollfd[0].revents & POLLERR) {
- D("error on fd0, rx [%d,%d)",
- pa->rx->cur, pa->rx->tail);
+ struct netmap_ring *rx = NETMAP_RXRING(pa->nifp, pa->cur_rx_ring);
+ D("error on fd0, rx [%d,%d,%d)",
+ rx->head, rx->cur, rx->tail);
}
if (pollfd[1].revents & POLLERR) {
- D("error on fd1, rx [%d,%d)",
- pb->rx->cur, pb->rx->tail);
+ struct netmap_ring *rx = NETMAP_RXRING(pb->nifp, pb->cur_rx_ring);
+ D("error on fd1, rx [%d,%d,%d)",
+ rx->head, rx->cur, rx->tail);
}
if (pollfd[0].revents & POLLOUT) {
move(pb, pa, burst);
diff --git a/tools/tools/netmap/click-test.cfg b/tools/tools/netmap/click-test.cfg
deleted file mode 100644
index fc5759f88b1e..000000000000
--- a/tools/tools/netmap/click-test.cfg
+++ /dev/null
@@ -1,19 +0,0 @@
-//
-// $FreeBSD$
-//
-// A sample test configuration for click
-//
-//
-// create a switch
-
-myswitch :: EtherSwitch;
-
-// two input devices
-
-c0 :: FromDevice(ix0, PROMISC true);
-c1 :: FromDevice(ix1, PROMISC true);
-
-// and now pass packets around
-
-c0[0] -> [0]sw[0] -> Queue(10000) -> ToDevice(ix0);
-c1[0] -> [1]sw[1] -> Queue(10000) -> ToDevice(ix1);
diff --git a/tools/tools/netmap/nm_util.c b/tools/tools/netmap/nm_util.c
deleted file mode 100644
index deb52bbc87e4..000000000000
--- a/tools/tools/netmap/nm_util.c
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- * $Id$
- *
- * utilities to use netmap devices.
- * This does the basic functions of opening a device and issuing
- * ioctls()
- */
-
-#include "nm_util.h"
-
-extern int verbose;
-
-int
-nm_do_ioctl(struct nm_desc_t *me, u_long what, int subcmd)
-{
- struct ifreq ifr;
- int error;
- int fd;
-
-#if defined( __FreeBSD__ ) || defined (__APPLE__)
- (void)subcmd; // only used on Linux
- fd = me->fd;
-#endif
-
-#ifdef linux
- struct ethtool_value eval;
-
- bzero(&eval, sizeof(eval));
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0) {
- printf("Error: cannot get device control socket.\n");
- return -1;
- }
-#endif /* linux */
-
- bzero(&ifr, sizeof(ifr));
- strncpy(ifr.ifr_name, me->req.nr_name, sizeof(ifr.ifr_name));
- switch (what) {
- case SIOCSIFFLAGS:
-#ifndef __APPLE__
- ifr.ifr_flagshigh = me->if_flags >> 16;
-#endif
- ifr.ifr_flags = me->if_flags & 0xffff;
- break;
-
-#if defined( __FreeBSD__ )
- case SIOCSIFCAP:
- ifr.ifr_reqcap = me->if_reqcap;
- ifr.ifr_curcap = me->if_curcap;
- break;
-#endif
-
-#ifdef linux
- case SIOCETHTOOL:
- eval.cmd = subcmd;
- eval.data = 0;
- ifr.ifr_data = (caddr_t)&eval;
- break;
-#endif /* linux */
- }
- error = ioctl(fd, what, &ifr);
- if (error)
- goto done;
- switch (what) {
- case SIOCGIFFLAGS:
-#ifndef __APPLE__
- me->if_flags = (ifr.ifr_flagshigh << 16) |
- (0xffff & ifr.ifr_flags);
-#endif
- if (verbose)
- D("flags are 0x%x", me->if_flags);
- break;
-
-#if defined( __FreeBSD__ )
- case SIOCGIFCAP:
- me->if_reqcap = ifr.ifr_reqcap;
- me->if_curcap = ifr.ifr_curcap;
- if (verbose)
- D("curcap are 0x%x", me->if_curcap);
- break;
-#endif /* __FreeBSD__ */
- }
-done:
-#ifdef linux
- close(fd);
-#endif
- if (error)
- D("ioctl error %d %lu", error, what);
- return error;
-}
-
-/*
- * open a device. if me->mem is null then do an mmap.
- * Returns the file descriptor.
- * The extra flag checks configures promisc mode.
- */
-struct nm_desc_t *
-netmap_open(const char *name, int ringid, int promisc)
-{
- struct nm_desc_t *d = nm_open(name, NULL, ringid, 0);
-
- if (d == NULL)
- return d;
-
- if (verbose)
- D("memsize is %d MB", d->req.nr_memsize>>20);
-
- /* Set the operating mode. */
- if (ringid != NETMAP_SW_RING) {
- nm_do_ioctl(d, SIOCGIFFLAGS, 0);
- if ((d->if_flags & IFF_UP) == 0) {
- D("%s is down, bringing up...", name);
- d->if_flags |= IFF_UP;
- }
- if (promisc) {
- d->if_flags |= IFF_PPROMISC;
- nm_do_ioctl(d, SIOCSIFFLAGS, 0);
- }
-
- /* disable GSO, TSO, RXCSUM, TXCSUM...
- * TODO: set them back when done.
- */
-#ifdef __FreeBSD__
- nm_do_ioctl(d, SIOCGIFCAP, 0);
- d->if_reqcap = d->if_curcap;
- d->if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
- nm_do_ioctl(d, SIOCSIFCAP, 0);
-#endif
-#ifdef linux
- nm_do_ioctl(d, SIOCETHTOOL, ETHTOOL_SGSO);
- nm_do_ioctl(d, SIOCETHTOOL, ETHTOOL_STSO);
- nm_do_ioctl(d, SIOCETHTOOL, ETHTOOL_SRXCSUM);
- nm_do_ioctl(d, SIOCETHTOOL, ETHTOOL_STXCSUM);
-#endif /* linux */
- }
-
- return d;
-}
-
-
-/*
- * how many packets on this set of queues ?
- */
-int
-pkt_queued(struct nm_desc_t *d, int tx)
-{
- u_int i, tot = 0;
-
- ND("me %p begin %d end %d", me, me->begin, me->end);
- if (tx) {
- for (i = d->first_tx_ring; i <= d->last_tx_ring; i++)
- tot += nm_ring_space(d->tx + i);
- } else {
- for (i = d->first_rx_ring; i <= d->last_rx_ring; i++)
- tot += nm_ring_space(d->rx + i);
- }
- return tot;
-}
-
-#if 0
-
-/*
- *
-
-Helper routines for multiple readers from the same queue
-
-- all readers open the device in 'passive' mode (NETMAP_PRIV_RING set).
- In this mode a thread that loses the race on a poll() just continues
- without calling *xsync()
-
-- all readers share an extra 'ring' which contains the sync information.
- In particular we have a shared head+tail pointers that work
- together with cur and available
- ON RETURN FROM THE SYSCALL:
- shadow->cur = ring->cur
- shadow->tail = ring->tail
- shadow->link[i] = i for all slots // mark invalid
-
- */
-
-struct nm_q_arg {
- u_int want; /* Input */
- u_int have; /* Output, 0 on error */
- u_int cur;
- u_int tail;
- struct netmap_ring *ring;
-};
-
-/*
- * grab a number of slots from the queue.
- */
-struct nm_q_arg
-my_grab(struct nm_q_arg q)
-{
- const u_int ns = q.ring->num_slots;
-
- // lock(ring);
- for (;;) {
-
- q.cur = (volatile u_int)q.ring->head;
- q.have = ns + q.head - (volatile u_int)q.ring->tail;
- if (q.have >= ns)
- q.have -= ns;
- if (q.have == 0) /* no space; caller may ioctl/retry */
- break;
- if (q.want < q.have)
- q.have = q.want;
- q.tail = q.cur + q.have;
- if (q.tail >= ns)
- q.tail -= ns;
- if (atomic_cmpset_int(&q.ring->cur, q.cur, q.tail)
- break; /* success */
- }
- // unlock(ring);
- D("returns %d out of %d at %d,%d",
- q.have, q.want, q.cur, q.tail);
- /* the last one can clear avail ? */
- return q;
-}
-
-
-int
-my_release(struct nm_q_arg q)
-{
- u_int cur = q.cur, tail = q.tail, i;
- struct netmap_ring *r = q.ring;
-
- /* link the block to the next one.
- * there is no race here because the location is mine.
- */
- r->slot[cur].ptr = tail; /* this is mine */
- r->slot[cur].flags |= NM_SLOT_PTR; // points to next block
- // memory barrier
- // lock(ring);
- if (r->head != cur)
- goto done;
- for (;;) {
- // advance head
- r->head = head = r->slot[head].ptr;
- // barrier ?
- if (head == r->slot[head].ptr)
- break; // stop here
- }
- /* we have advanced from q.head to head (r.head might be
- * further down.
- */
- // do an ioctl/poll to flush.
-done:
- // unlock(ring);
- return; /* not my turn to release */
-}
-#endif /* unused */
diff --git a/tools/tools/netmap/nm_util.h b/tools/tools/netmap/nm_util.h
deleted file mode 100644
index 0ab2e2e81984..000000000000
--- a/tools/tools/netmap/nm_util.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- * $Id$
- *
- * Some utilities to build netmap-based programs.
- */
-
-#ifndef _NM_UTIL_H
-#define _NM_UTIL_H
-
-#define _GNU_SOURCE /* for CPU_SET() */
-
-#include <stdio.h> /* fprintf */
-#include <sys/poll.h> /* POLLIN */
-#include <inttypes.h> /* PRI* macros */
-#include <sys/types.h> /* u_char */
-
-#include <arpa/inet.h> /* ntohs */
-#include <sys/sysctl.h> /* sysctl */
-#include <ifaddrs.h> /* getifaddrs */
-#include <net/ethernet.h> /* ETHERTYPE_IP */
-#include <netinet/in.h> /* IPPROTO_* */
-#include <netinet/ip.h> /* struct ip */
-#include <netinet/udp.h> /* struct udp */
-
-
-#define NETMAP_WITH_LIBS
-#include <net/netmap_user.h>
-
-#include <pthread.h> /* pthread_* */
-
-#ifdef linux
-
-#define cpuset_t cpu_set_t
-
-#define ifr_flagshigh ifr_flags /* only the low 16 bits here */
-#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */
-#include <linux/ethtool.h>
-#include <linux/sockios.h>
-
-#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
-#include <netinet/ether.h> /* ether_aton */
-#include <linux/if_packet.h> /* sockaddr_ll */
-#endif /* linux */
-
-#ifdef __FreeBSD__
-#include <sys/endian.h> /* le64toh */
-#include <machine/param.h>
-
-#include <pthread_np.h> /* pthread w/ affinity */
-#include <sys/cpuset.h> /* cpu_set */
-#include <net/if_dl.h> /* LLADDR */
-#endif /* __FreeBSD__ */
-
-#ifdef __APPLE__
-
-#define cpuset_t uint64_t // XXX
-static inline void CPU_ZERO(cpuset_t *p)
-{
- *p = 0;
-}
-
-static inline void CPU_SET(uint32_t i, cpuset_t *p)
-{
- *p |= 1<< (i & 0x3f);
-}
-
-#define pthread_setaffinity_np(a, b, c) ((void)a, 0)
-
-#define ifr_flagshigh ifr_flags // XXX
-#define IFF_PPROMISC IFF_PROMISC
-#include <net/if_dl.h> /* LLADDR */
-#define clock_gettime(a,b) \
- do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
-#endif /* __APPLE__ */
-
-static inline int min(int a, int b) { return a < b ? a : b; }
-extern int time_second;
-
-/* debug support */
-#define ND(format, ...) do {} while(0)
-#define D(format, ...) \
- fprintf(stderr, "%s [%d] " format "\n", \
- __FUNCTION__, __LINE__, ##__VA_ARGS__)
-
-#define RD(lps, format, ...) \
- do { \
- static int t0, cnt; \
- if (t0 != time_second) { \
- t0 = time_second; \
- cnt = 0; \
- } \
- if (cnt++ < lps) \
- D(format, ##__VA_ARGS__); \
- } while (0)
-
-
-
-struct nm_desc_t * netmap_open(const char *name, int ringid, int promisc);
-int nm_do_ioctl(struct nm_desc_t *me, u_long what, int subcmd);
-int pkt_queued(struct nm_desc_t *d, int tx);
-#endif /* _NM_UTIL_H */
diff --git a/tools/tools/netmap/pcap.c b/tools/tools/netmap/pcap.c
deleted file mode 100644
index b3c2be5d23ff..000000000000
--- a/tools/tools/netmap/pcap.c
+++ /dev/null
@@ -1,528 +0,0 @@
-/*
- * (C) 2011-2014 Luigi Rizzo
- *
- * BSD license
- *
- * A simple library that maps some pcap functions onto netmap
- * This is not 100% complete but enough to let tcpdump, trafshow
- * and other apps work.
- *
- * $FreeBSD$
- */
-
-#define MY_PCAP
-#include "nm_util.h"
-
-char *version = "$Id$";
-int verbose = 0;
-
-/*
- * We redefine here a number of structures that are in pcap.h
- * so we can compile this file without the system header.
- */
-#ifndef PCAP_ERRBUF_SIZE
-#define PCAP_ERRBUF_SIZE 128
-/*
- * Each packet is accompanied by a header including the timestamp,
- * captured size and actual size.
- */
-struct pcap_pkthdr {
- struct timeval ts; /* time stamp */
- uint32_t caplen; /* length of portion present */
- uint32_t len; /* length this packet (off wire) */
-};
-
-typedef struct pcap_if pcap_if_t;
-
-/*
- * Representation of an interface address.
- */
-struct pcap_addr {
- struct pcap_addr *next;
- struct sockaddr *addr; /* address */
- struct sockaddr *netmask; /* netmask for the above */
- struct sockaddr *broadaddr; /* broadcast addr for the above */
- struct sockaddr *dstaddr; /* P2P dest. address for the above */
-};
-
-struct pcap_if {
- struct pcap_if *next;
- char *name; /* name to hand to "pcap_open_live()" */
- char *description; /* textual description of interface, or NULL */
- struct pcap_addr *addresses;
- uint32_t flags; /* PCAP_IF_ interface flags */
-};
-
-/*
- * We do not support stats (yet)
- */
-struct pcap_stat {
- u_int ps_recv; /* number of packets received */
- u_int ps_drop; /* number of packets dropped */
- u_int ps_ifdrop; /* drops by interface XXX not yet supported */
-#ifdef WIN32
- u_int bs_capt; /* number of packets that reach the app. */
-#endif /* WIN32 */
-};
-
-typedef struct nm_desc_t pcap_t;
-typedef enum {
- PCAP_D_INOUT = 0,
- PCAP_D_IN,
- PCAP_D_OUT
-} pcap_direction_t;
-
-
-
-typedef void (*pcap_handler)(u_char *user,
- const struct pcap_pkthdr *h, const u_char *bytes);
-
-char errbuf[PCAP_ERRBUF_SIZE];
-
-pcap_t *pcap_open_live(const char *device, int snaplen,
- int promisc, int to_ms, char *errbuf);
-
-int pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf);
-void pcap_close(pcap_t *p);
-int pcap_get_selectable_fd(pcap_t *p);
-int pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user);
-int pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf);
-int pcap_setdirection(pcap_t *p, pcap_direction_t d);
-char *pcap_lookupdev(char *errbuf);
-int pcap_inject(pcap_t *p, const void *buf, size_t size);
-int pcap_fileno(pcap_t *p);
-const char *pcap_lib_version(void);
-
-
-struct eproto {
- const char *s;
- u_short p;
-};
-#endif /* !PCAP_ERRBUF_SIZE */
-
-#ifndef TEST
-/*
- * build as a shared library
- */
-
-char pcap_version[] = "libnetmap version 0.3";
-
-
-/*
- * There is a set of functions that tcpdump expects even if probably
- * not used
- */
-struct eproto eproto_db[] = {
- { "ip", ETHERTYPE_IP },
- { "arp", ETHERTYPE_ARP },
- { (char *)0, 0 }
-};
-
-
-const char *pcap_lib_version(void)
-{
- return pcap_version;
-}
-
-int
-pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf)
-{
- pcap_if_t *top = NULL;
-#ifndef linux
- struct ifaddrs *i_head, *i;
- pcap_if_t *cur;
- struct pcap_addr *tail = NULL;
- int l;
-
- D("listing all devs");
- *alldevsp = NULL;
- i_head = NULL;
-
- if (getifaddrs(&i_head)) {
- D("cannot get if addresses");
- return -1;
- }
- for (i = i_head; i; i = i->ifa_next) {
- //struct ifaddrs *ifa;
- struct pcap_addr *pca;
- //struct sockaddr *sa;
-
- D("got interface %s", i->ifa_name);
- if (!top || strcmp(top->name, i->ifa_name)) {
- /* new interface */
- l = sizeof(*top) + strlen(i->ifa_name) + 1;
- cur = calloc(1, l);
- if (cur == NULL) {
- D("no space for if descriptor");
- continue;
- }
- cur->name = (char *)(cur + 1);
- //cur->flags = i->ifa_flags;
- strcpy(cur->name, i->ifa_name);
- cur->description = NULL;
- cur->next = top;
- top = cur;
- tail = NULL;
- }
- /* now deal with addresses */
- D("%s addr family %d len %d %s %s",
- top->name,
- i->ifa_addr->sa_family, i->ifa_addr->sa_len,
- i->ifa_netmask ? "Netmask" : "",
- i->ifa_broadaddr ? "Broadcast" : "");
- l = sizeof(struct pcap_addr) +
- (i->ifa_addr ? i->ifa_addr->sa_len:0) +
- (i->ifa_netmask ? i->ifa_netmask->sa_len:0) +
- (i->ifa_broadaddr? i->ifa_broadaddr->sa_len:0);
- pca = calloc(1, l);
- if (pca == NULL) {
- D("no space for if addr");
- continue;
- }
-#define SA_NEXT(x) ((struct sockaddr *)((char *)(x) + (x)->sa_len))
- pca->addr = (struct sockaddr *)(pca + 1);
- pkt_copy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len);
- if (i->ifa_netmask) {
- pca->netmask = SA_NEXT(pca->addr);
- bcopy(i->ifa_netmask, pca->netmask, i->ifa_netmask->sa_len);
- if (i->ifa_broadaddr) {
- pca->broadaddr = SA_NEXT(pca->netmask);
- bcopy(i->ifa_broadaddr, pca->broadaddr, i->ifa_broadaddr->sa_len);
- }
- }
- if (tail == NULL) {
- top->addresses = pca;
- } else {
- tail->next = pca;
- }
- tail = pca;
-
- }
- freeifaddrs(i_head);
-#endif /* !linux */
- (void)errbuf; /* UNUSED */
- *alldevsp = top;
- return 0;
-}
-
-void pcap_freealldevs(pcap_if_t *alldevs)
-{
- (void)alldevs; /* UNUSED */
- D("unimplemented");
-}
-
-char *
-pcap_lookupdev(char *buf)
-{
- D("%s", buf);
- strcpy(buf, "/dev/netmap");
- return buf;
-}
-
-pcap_t *
-pcap_create(const char *source, char *errbuf)
-{
- D("src %s (call open liveted)", source);
- return pcap_open_live(source, 0, 1, 100, errbuf);
-}
-
-int
-pcap_activate(pcap_t *p)
-{
- D("pcap %p running", p);
- return 0;
-}
-
-int
-pcap_can_set_rfmon(pcap_t *p)
-{
- (void)p; /* UNUSED */
- D("");
- return 0; /* no we can't */
-}
-
-int
-pcap_set_snaplen(pcap_t *p, int snaplen)
-{
- struct nm_desc_t *me = p;
-
- D("len %d", snaplen);
- me->snaplen = snaplen;
- return 0;
-}
-
-int
-pcap_snapshot(pcap_t *p)
-{
- struct nm_desc_t *me = p;
-
- D("len %d", me->snaplen);
- return me->snaplen;
-}
-
-int
-pcap_lookupnet(const char *device, uint32_t *netp,
- uint32_t *maskp, char *errbuf)
-{
-
- (void)errbuf; /* UNUSED */
- D("device %s", device);
- inet_aton("10.0.0.255", (struct in_addr *)netp);
- inet_aton("255.255.255.0",(struct in_addr *) maskp);
- return 0;
-}
-
-int
-pcap_set_promisc(pcap_t *p, int promisc)
-{
- D("promisc %d", promisc);
- if (nm_do_ioctl(p, SIOCGIFFLAGS, 0))
- D("SIOCGIFFLAGS failed");
- if (promisc) {
- p->if_flags |= IFF_PPROMISC;
- } else {
- p->if_flags &= ~IFF_PPROMISC;
- }
- if (nm_do_ioctl(p, SIOCSIFFLAGS, 0))
- D("SIOCSIFFLAGS failed");
- return 0;
-}
-
-int
-pcap_set_timeout(pcap_t *p, int to_ms)
-{
- D("%d ms", to_ms);
- p->to_ms = to_ms;
- return 0;
-}
-
-struct bpf_program;
-
-int
-pcap_compile(pcap_t *p, struct bpf_program *fp,
- const char *str, int optimize, uint32_t netmask)
-{
- (void)p; /* UNUSED */
- (void)fp; /* UNUSED */
- (void)optimize; /* UNUSED */
- (void)netmask; /* UNUSED */
- D("%s", str);
- return 0;
-}
-
-int
-pcap_setfilter(pcap_t *p, struct bpf_program *fp)
-{
- (void)p; /* UNUSED */
- (void)fp; /* UNUSED */
- D("");
- return 0;
-}
-
-int
-pcap_datalink(pcap_t *p)
-{
- (void)p; /* UNUSED */
- D("returns 1");
- return 1; // ethernet
-}
-
-const char *
-pcap_datalink_val_to_name(int dlt)
-{
- D("%d returns DLT_EN10MB", dlt);
- return "DLT_EN10MB";
-}
-
-const char *
-pcap_datalink_val_to_description(int dlt)
-{
- D("%d returns Ethernet link", dlt);
- return "Ethernet link";
-}
-
-struct pcap_stat;
-int
-pcap_stats(pcap_t *p, struct pcap_stat *ps)
-{
- *ps = *(struct pcap_stat *)(void *)&(p->st);
- return 0; /* accumulate from pcap_dispatch() */
-};
-
-char *
-pcap_geterr(pcap_t *p)
-{
- D("");
- return p->msg;
-}
-
-pcap_t *
-pcap_open_live(const char *device, int snaplen,
- int promisc, int to_ms, char *errbuf)
-{
- struct nm_desc_t *d;
- int l;
-
- if (!device) {
- D("missing device name");
- return NULL;
- }
-
- l = strlen(device) + 1;
- D("request to open %s snaplen %d promisc %d timeout %dms",
- device, snaplen, promisc, to_ms);
- d = nm_open(device, NULL, 0, 0);
- if (d == NULL) {
- D("error opening %s", device);
- return NULL;
- }
- d->to_ms = to_ms;
- d->snaplen = snaplen;
- d->errbuf = errbuf;
- d->promisc = promisc;
-
- return d;
-}
-
-void
-pcap_close(pcap_t *p)
-{
- nm_close(p);
- /* restore original flags ? */
-}
-
-int
-pcap_fileno(pcap_t *p)
-{
- struct nm_desc_t *d = p;
- D("returns %d", d->fd);
- return d->fd;
-}
-
-int
-pcap_get_selectable_fd(pcap_t *p)
-{
- struct nm_desc_t *d = p;
-
- return d->fd;
-}
-
-int
-pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf)
-{
- (void)p; /* UNUSED */
- (void)errbuf; /* UNUSED */
- D("mode is %d", nonblock);
- return 0; /* ignore */
-}
-
-int
-pcap_setdirection(pcap_t *p, pcap_direction_t d)
-{
- (void)p; /* UNUSED */
- (void)d; /* UNUSED */
- D("");
- return 0; /* ignore */
-};
-
-int
-pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
-{
- return nm_dispatch(p, cnt, (void *)callback, user);
-}
-
-int
-pcap_inject(pcap_t *p, const void *buf, size_t size)
-{
- return nm_inject(p, buf, size);
-}
-
-int
-pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
-{
- struct pollfd fds[1];
- int i;
-
- ND("cnt %d", cnt);
- memset(fds, 0, sizeof(fds));
- fds[0].fd = p->fd;
- fds[0].events = (POLLIN);
-
- while (cnt == -1 || cnt > 0) {
- if (poll(fds, 1, p->to_ms) <= 0) {
- D("poll error/timeout");
- continue;
- }
- i = nm_dispatch(p, cnt, (void *)callback, user);
- if (cnt > 0)
- cnt -= i;
- }
- return 0;
-}
-
-#endif /* !TEST */
-
-#ifdef TEST /* build test code */
-void do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf)
-{
- pcap_inject((pcap_t *)user, buf, h->caplen);
-}
-
-/*
- * a simple pcap test program, bridge between two interfaces.
- */
-int
-main(int argc, char **argv)
-{
- pcap_t *p0, *p1;
- int burst = 1024;
- struct pollfd pollfd[2];
-
- fprintf(stderr, "%s %s built %s %s\n",
- argv[0], version, __DATE__, __TIME__);
-
- while (argc > 1 && !strcmp(argv[1], "-v")) {
- verbose++;
- argv++;
- argc--;
- }
-
- if (argc < 3 || argc > 4 || !strcmp(argv[1], argv[2])) {
- D("Usage: %s IFNAME1 IFNAME2 [BURST]", argv[0]);
- return (1);
- }
- if (argc > 3)
- burst = atoi(argv[3]);
-
- p0 = pcap_open_live(argv[1], 0, 1, 100, NULL);
- p1 = pcap_open_live(argv[2], 0, 1, 100, NULL);
- D("%s", version);
- D("open returns %p %p", p0, p1);
- if (!p0 || !p1)
- return(1);
- bzero(pollfd, sizeof(pollfd));
- pollfd[0].fd = pcap_fileno(p0);
- pollfd[1].fd = pcap_fileno(p1);
- pollfd[0].events = pollfd[1].events = POLLIN;
- for (;;) {
- /* do i need to reset ? */
- pollfd[0].revents = pollfd[1].revents = 0;
- int ret = poll(pollfd, 2, 1000);
- if (ret <= 0 || verbose)
- D("poll %s [0] ev %x %x [1] ev %x %x",
- ret <= 0 ? "timeout" : "ok",
- pollfd[0].events,
- pollfd[0].revents,
- pollfd[1].events,
- pollfd[1].revents);
- if (ret < 0)
- continue;
- if (pollfd[0].revents & POLLIN)
- pcap_dispatch(p0, burst, do_send, (void *)p1);
- if (pollfd[1].revents & POLLIN)
- pcap_dispatch(p1, burst, do_send, (void *)p0);
- }
-
- return (0);
-}
-#endif /* TEST */
diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c
index 3fb7702083fd..8e78fa8e24ed 100644
--- a/tools/tools/netmap/pkt-gen.c
+++ b/tools/tools/netmap/pkt-gen.c
@@ -37,26 +37,83 @@
*
*/
-#define MY_PCAP
-#include "nm_util.h"
-// #include <net/netmap_user.h>
+#define _GNU_SOURCE /* for CPU_SET() */
+#include <stdio.h>
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+
#include <ctype.h> // isprint()
+#include <unistd.h> // sysconf()
+#include <sys/poll.h>
+#include <arpa/inet.h> /* ntohs */
+#include <sys/sysctl.h> /* sysctl */
+#include <ifaddrs.h> /* getifaddrs */
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+#include <pthread.h>
#ifndef NO_PCAP
#include <pcap/pcap.h>
#endif
+
+#ifdef linux
+
+#define cpuset_t cpu_set_t
+
+#define ifr_flagshigh ifr_flags /* only the low 16 bits here */
+#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+
+#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
+#include <netinet/ether.h> /* ether_aton */
+#include <linux/if_packet.h> /* sockaddr_ll */
+#endif /* linux */
+
+#ifdef __FreeBSD__
+#include <sys/endian.h> /* le64toh */
+#include <machine/param.h>
+
+#include <pthread_np.h> /* pthread w/ affinity */
+#include <sys/cpuset.h> /* cpu_set */
+#include <net/if_dl.h> /* LLADDR */
+#endif /* __FreeBSD__ */
+
+#ifdef __APPLE__
+
+#define cpuset_t uint64_t // XXX
+static inline void CPU_ZERO(cpuset_t *p)
+{
+ *p = 0;
+}
+
+static inline void CPU_SET(uint32_t i, cpuset_t *p)
+{
+ *p |= 1<< (i & 0x3f);
+}
+
+#define pthread_setaffinity_np(a, b, c) ((void)a, 0)
+
+#define ifr_flagshigh ifr_flags // XXX
+#define IFF_PPROMISC IFF_PROMISC
+#include <net/if_dl.h> /* LLADDR */
+#define clock_gettime(a,b) \
+ do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
+#endif /* __APPLE__ */
+
const char *default_payload="netmap pkt-gen DIRECT payload\n"
"http://info.iet.unipi.it/~luigi/netmap/ ";
const char *indirect_payload="netmap pkt-gen indirect payload\n"
"http://info.iet.unipi.it/~luigi/netmap/ ";
-int time_second; // support for RD() debugging macro
-
int verbose = 0;
-#define SKIP_PAYLOAD 1 /* do not check payload. */
+#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */
#define VIRT_HDR_1 10 /* length of a base vnet-hdr */
@@ -85,6 +142,8 @@ struct mac_range {
struct ether_addr start, end;
};
+/* ifname can be netmap:foo-xxxx */
+#define MAX_IFNAMELEN 64 /* our buffer for ifname */
/*
* global arguments for all threads
*/
@@ -119,15 +178,16 @@ struct glob_arg {
int affinity;
int main_fd;
+ struct nm_desc *nmd;
+ uint64_t nmd_flags;
int report_interval; /* milliseconds between prints */
void *(*td_body)(void *);
void *mmap_addr;
- int mmap_size;
- char *ifname;
+ char ifname[MAX_IFNAMELEN];
char *nmr_config;
int dummy_send;
int virt_header; /* send also the virt_header */
- int host_ring;
+ int extra_bufs; /* goes in nr_arg3 */
};
enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
@@ -142,9 +202,7 @@ struct targ {
int completed;
int cancel;
int fd;
- struct nmreq nmr;
- struct netmap_if *nifp;
- uint16_t qfirst, qlast; /* range of queues to scan */
+ struct nm_desc *nmd;
volatile uint64_t count;
struct timespec tic, toc;
int me;
@@ -187,7 +245,7 @@ extract_ip_range(struct ip_range *r)
pp = index(ap, ':');
if (pp) {
*pp++ = '\0';
- if (*pp)
+ if (*pp)
r->port1 = strtol(pp, NULL, 0);
}
if (*ap) {
@@ -261,19 +319,17 @@ sigint_h(int sig)
static int
system_ncpus(void)
{
-#ifdef __FreeBSD__
- int mib[2], ncpus;
- size_t len;
-
- mib[0] = CTL_HW;
- mib[1] = HW_NCPU;
- len = sizeof(mib);
+ int ncpus;
+#if defined (__FreeBSD__)
+ int mib[2] = { CTL_HW, HW_NCPU };
+ size_t len = sizeof(mib);
sysctl(mib, 2, &ncpus, &len, NULL, 0);
-
+#elif defined(linux)
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+#else /* others */
+ ncpus = 1;
+#endif /* others */
return (ncpus);
-#else
- return 1;
-#endif /* !__FreeBSD__ */
}
#ifdef __linux__
@@ -299,15 +355,17 @@ system_ncpus(void)
/*
* parse the vale configuration in conf and put it in nmr.
+ * Return the flag set if necessary.
* The configuration may consist of 0 to 4 numbers separated
* by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings.
* Missing numbers or zeroes stand for default values.
* As an additional convenience, if exactly one number
* is specified, then this is assigned to both #tx-slots and #rx-slots.
- * If there is no 4th number, then the 3rd is assigned to both #tx-rings
+ * If there is no 4th number, then the 3rd is assigned to both #tx-rings
* and #rx-rings.
*/
-void parse_nmr_config(const char* conf, struct nmreq *nmr)
+int
+parse_nmr_config(const char* conf, struct nmreq *nmr)
{
char *w, *tok;
int i, v;
@@ -315,7 +373,7 @@ void parse_nmr_config(const char* conf, struct nmreq *nmr)
nmr->nr_tx_rings = nmr->nr_rx_rings = 0;
nmr->nr_tx_slots = nmr->nr_rx_slots = 0;
if (conf == NULL || ! *conf)
- return;
+ return 0;
w = strdup(conf);
for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) {
v = atoi(tok);
@@ -341,6 +399,9 @@ void parse_nmr_config(const char* conf, struct nmreq *nmr)
nmr->nr_tx_rings, nmr->nr_tx_slots,
nmr->nr_rx_rings, nmr->nr_rx_slots);
free(w);
+ return (nmr->nr_tx_rings || nmr->nr_tx_slots ||
+ nmr->nr_rx_rings || nmr->nr_rx_slots) ?
+ NM_OPEN_RING_CFG : 0;
}
@@ -385,7 +446,6 @@ source_hwaddr(const char *ifname, char *buf)
static int
setaffinity(pthread_t me, int i)
{
-#if 1 // def __FreeBSD__
cpuset_t cpumask;
if (i == -1)
@@ -399,10 +459,6 @@ setaffinity(pthread_t me, int i)
D("Unable to set affinity: %s", strerror(errno));
return 1;
}
-#else
- (void)me; /* suppress 'unused' warnings */
- (void)i;
-#endif /* __FreeBSD__ */
return 0;
}
@@ -449,7 +505,7 @@ dump_payload(char *p, int len, struct netmap_ring *ring, int cur)
int i, j, i0;
/* get the length in ASCII of the length of the packet. */
-
+
printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n",
ring, cur, ring->slot[cur].buf_idx,
ring->slot[cur].flags, len);
@@ -632,7 +688,7 @@ send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame,
slot->flags |= NS_INDIRECT;
slot->ptr = (uint64_t)frame;
} else if (options & OPT_COPY) {
- pkt_copy(frame, p, size);
+ nm_pkt_copy(frame, p, size);
if (fcnt == nfrags)
update_addresses(pkt, g);
} else if (options & OPT_MEMCPY) {
@@ -671,21 +727,19 @@ static void *
pinger_body(void *data)
{
struct targ *targ = (struct targ *) data;
- struct pollfd fds[1];
- struct netmap_if *nifp = targ->nifp;
+ struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
+ struct netmap_if *nifp = targ->nmd->nifp;
int i, rx = 0, n = targ->g->npackets;
void *frame;
int size;
+ uint32_t sent = 0;
+ struct timespec ts, now, last_print;
+ uint32_t count = 0, min = 1000000000, av = 0;
frame = &targ->pkt;
frame += sizeof(targ->pkt.vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
- fds[0].fd = targ->fd;
- fds[0].events = (POLLIN);
- static uint32_t sent;
- struct timespec ts, now, last_print;
- uint32_t count = 0, min = 1000000000, av = 0;
if (targ->g->nthreads > 1) {
D("can only ping with 1 thread");
@@ -706,7 +760,7 @@ pinger_body(void *data)
if (nm_ring_empty(ring)) {
D("-- ouch, cannot send");
} else {
- pkt_copy(frame, p, size);
+ nm_pkt_copy(frame, p, size);
clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
bcopy(&sent, p+42, sizeof(sent));
bcopy(&ts, p+46, sizeof(ts));
@@ -715,13 +769,14 @@ pinger_body(void *data)
}
}
/* should use a parameter to decide how often to send */
- if (poll(fds, 1, 3000) <= 0) {
+ if (poll(&pfd, 1, 3000) <= 0) {
D("poll error/timeout on queue %d: %s", targ->me,
strerror(errno));
continue;
}
/* see what we got back */
- for (i = targ->qfirst; i < targ->qlast; i++) {
+ for (i = targ->nmd->first_tx_ring;
+ i <= targ->nmd->last_tx_ring; i++) {
ring = NETMAP_RXRING(nifp, i);
while (!nm_ring_empty(ring)) {
uint32_t seq;
@@ -775,12 +830,10 @@ static void *
ponger_body(void *data)
{
struct targ *targ = (struct targ *) data;
- struct pollfd fds[1];
- struct netmap_if *nifp = targ->nifp;
+ struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
+ struct netmap_if *nifp = targ->nmd->nifp;
struct netmap_ring *txring, *rxring;
int i, rx = 0, sent = 0, n = targ->g->npackets;
- fds[0].fd = targ->fd;
- fds[0].events = (POLLIN);
if (targ->g->nthreads > 1) {
D("can only reply ping with 1 thread");
@@ -791,9 +844,9 @@ ponger_body(void *data)
uint32_t txcur, txavail;
//#define BUSYWAIT
#ifdef BUSYWAIT
- ioctl(fds[0].fd, NIOCRXSYNC, NULL);
+ ioctl(pfd.fd, NIOCRXSYNC, NULL);
#else
- if (poll(fds, 1, 1000) <= 0) {
+ if (poll(&pfd, 1, 1000) <= 0) {
D("poll error/timeout on queue %d: %s", targ->me,
strerror(errno));
continue;
@@ -803,7 +856,7 @@ ponger_body(void *data)
txcur = txring->cur;
txavail = nm_ring_space(txring);
/* see what we got back */
- for (i = targ->qfirst; i < targ->qlast; i++) {
+ for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) {
rxring = NETMAP_RXRING(nifp, i);
while (!nm_ring_empty(rxring)) {
uint16_t *spkt, *dpkt;
@@ -821,7 +874,7 @@ ponger_body(void *data)
/* copy... */
dpkt = (uint16_t *)dst;
spkt = (uint16_t *)src;
- pkt_copy(src, dst, slot->len);
+ nm_pkt_copy(src, dst, slot->len);
dpkt[0] = spkt[3];
dpkt[1] = spkt[4];
dpkt[2] = spkt[5];
@@ -838,7 +891,7 @@ ponger_body(void *data)
txring->head = txring->cur = txcur;
targ->count = sent;
#ifdef BUSYWAIT
- ioctl(fds[0].fd, NIOCTXSYNC, NULL);
+ ioctl(pfd.fd, NIOCTXSYNC, NULL);
#endif
//D("tx %d rx %d", sent, rx);
}
@@ -924,11 +977,11 @@ static void *
sender_body(void *data)
{
struct targ *targ = (struct targ *) data;
-
- struct pollfd fds[1];
- struct netmap_if *nifp = targ->nifp;
+ struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT };
+ struct netmap_if *nifp = targ->nmd->nifp;
struct netmap_ring *txring;
- int i, n = targ->g->npackets / targ->g->nthreads, sent = 0;
+ int i, n = targ->g->npackets / targ->g->nthreads;
+ int64_t sent = 0;
int options = targ->g->options | OPT_COPY;
struct timespec nexttime = { 0, 0}; // XXX silence compiler
int rate_limit = targ->g->tx_rate;
@@ -943,10 +996,6 @@ sender_body(void *data)
D("start");
if (setaffinity(targ->thread, targ->affinity))
goto quit;
- /* setup poll(2) mechanism. */
- memset(fds, 0, sizeof(fds));
- fds[0].fd = targ->fd;
- fds[0].events = (POLLOUT);
/* main loop.*/
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
@@ -956,7 +1005,7 @@ sender_body(void *data)
wait_time(targ->tic);
nexttime = targ->tic;
}
- if (targ->g->dev_type == DEV_TAP) {
+ if (targ->g->dev_type == DEV_TAP) {
D("writing to file desc %d", targ->g->main_fd);
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
@@ -997,14 +1046,14 @@ sender_body(void *data)
/*
* wait for available room in the send queue(s)
*/
- if (poll(fds, 1, 2000) <= 0) {
+ if (poll(&pfd, 1, 2000) <= 0) {
if (targ->cancel)
break;
D("poll error/timeout on queue %d: %s", targ->me,
strerror(errno));
- goto quit;
+ // goto quit;
}
- if (fds[0].revents & POLLERR) {
+ if (pfd.revents & POLLERR) {
D("poll error");
goto quit;
}
@@ -1015,7 +1064,7 @@ sender_body(void *data)
D("drop copy");
options &= ~OPT_COPY;
}
- for (i = targ->qfirst; i < targ->qlast; i++) {
+ for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
int m, limit = rate_limit ? tosend : targ->g->burst;
if (n > 0 && n - sent < limit)
limit = n - sent;
@@ -1024,10 +1073,10 @@ sender_body(void *data)
continue;
if (frags > 1)
limit = ((limit + frags - 1) / frags) * frags;
-
+
m = send_packets(txring, pkt, frame, size, targ->g,
limit, options, frags);
- ND("limit %d tail %d frags %d m %d",
+ ND("limit %d tail %d frags %d m %d",
limit, txring->tail, frags, m);
sent += m;
targ->count = sent;
@@ -1039,13 +1088,13 @@ sender_body(void *data)
}
}
/* flush any remaining packets */
- ioctl(fds[0].fd, NIOCTXSYNC, NULL);
+ ioctl(pfd.fd, NIOCTXSYNC, NULL);
/* final part: wait all the TX queues to be empty. */
- for (i = targ->qfirst; i < targ->qlast; i++) {
+ for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
txring = NETMAP_TXRING(nifp, i);
while (nm_tx_pending(txring)) {
- ioctl(fds[0].fd, NIOCTXSYNC, NULL);
+ ioctl(pfd.fd, NIOCTXSYNC, NULL);
usleep(1); /* wait 1 tick */
}
}
@@ -1102,8 +1151,8 @@ static void *
receiver_body(void *data)
{
struct targ *targ = (struct targ *) data;
- struct pollfd fds[1];
- struct netmap_if *nifp = targ->nifp;
+ struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
+ struct netmap_if *nifp = targ->nmd->nifp;
struct netmap_ring *rxring;
int i;
uint64_t received = 0;
@@ -1111,17 +1160,13 @@ receiver_body(void *data)
if (setaffinity(targ->thread, targ->affinity))
goto quit;
- /* setup poll(2) mechanism. */
- memset(fds, 0, sizeof(fds));
- fds[0].fd = targ->fd;
- fds[0].events = (POLLIN);
-
/* unbounded wait for the first packet. */
for (;;) {
- i = poll(fds, 1, 1000);
- if (i > 0 && !(fds[0].revents & POLLERR))
+ i = poll(&pfd, 1, 1000);
+ if (i > 0 && !(pfd.revents & POLLERR))
break;
- RD(1, "waiting for initial packets, poll returns %d %d", i, fds[0].revents);
+ RD(1, "waiting for initial packets, poll returns %d %d",
+ i, pfd.revents);
}
/* main loop, exit after 1s silence */
@@ -1146,18 +1191,18 @@ receiver_body(void *data)
while (!targ->cancel) {
/* Once we started to receive packets, wait at most 1 seconds
before quitting. */
- if (poll(fds, 1, 1 * 1000) <= 0 && !targ->g->forever) {
+ if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
targ->toc.tv_sec -= 1; /* Subtract timeout time. */
- break;
+ goto out;
}
- if (fds[0].revents & POLLERR) {
+ if (pfd.revents & POLLERR) {
D("poll err");
goto quit;
}
- for (i = targ->qfirst; i < targ->qlast; i++) {
+ for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) {
int m;
rxring = NETMAP_RXRING(nifp, i);
@@ -1168,12 +1213,12 @@ receiver_body(void *data)
received += m;
}
targ->count = received;
-
- // tell the card we have read the data
- //ioctl(fds[0].fd, NIOCRXSYNC, NULL);
}
}
+ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
+
+out:
targ->completed = 1;
targ->count = received;
@@ -1190,10 +1235,10 @@ quit:
static const char *
norm(char *buf, double val)
{
- char *units[] = { "", "K", "M", "G" };
+ char *units[] = { "", "K", "M", "G", "T" };
u_int i;
- for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *); i++)
+ for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
val /= 1000;
sprintf(buf, "%.2f %s", val, units[i]);
return buf;
@@ -1205,8 +1250,8 @@ tx_output(uint64_t sent, int size, double delta)
double bw, raw_bw, pps;
char b1[40], b2[80], b3[80];
- printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n",
- sent, size, delta);
+ printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n",
+ (unsigned long long)sent, size, delta);
if (delta == 0)
delta = 1e-6;
if (size < 60) /* correct for min packet size */
@@ -1227,7 +1272,8 @@ rx_output(uint64_t received, double delta)
double pps;
char b1[40];
- printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta);
+ printf("Received %llu packets, in %.2f seconds.\n",
+ (unsigned long long) received, delta);
if (delta == 0)
delta = 1e-6;
@@ -1262,7 +1308,6 @@ usage(void)
"\t-R rate in packets per second\n"
"\t-X dump payload\n"
"\t-H len add empty virtio-net-header with size 'len'\n"
- "\t-h use host ring\n"
"",
cmd);
@@ -1280,77 +1325,57 @@ start_threads(struct glob_arg *g)
* using a single descriptor.
*/
for (i = 0; i < g->nthreads; i++) {
- bzero(&targs[i], sizeof(targs[i]));
- targs[i].fd = -1; /* default, with pcap */
- targs[i].g = g;
+ struct targ *t = &targs[i];
- if (g->dev_type == DEV_NETMAP) {
- struct nmreq tifreq;
- int tfd;
+ bzero(t, sizeof(*t));
+ t->fd = -1; /* default, with pcap */
+ t->g = g;
- /* register interface. */
- tfd = open("/dev/netmap", O_RDWR);
- if (tfd == -1) {
- D("Unable to open /dev/netmap: %s", strerror(errno));
- continue;
- }
- targs[i].fd = tfd;
+ if (g->dev_type == DEV_NETMAP) {
+ struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
- bzero(&tifreq, sizeof(tifreq));
- strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name));
- tifreq.nr_version = NETMAP_API;
- if (g->host_ring) {
- tifreq.nr_ringid = NETMAP_SW_RING;
- } else {
- tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
+ if (g->nthreads > 1) {
+ if (nmd.req.nr_flags != NR_REG_ALL_NIC) {
+ D("invalid nthreads mode %d", nmd.req.nr_flags);
+ continue;
+ }
+ nmd.req.nr_flags = NR_REG_ONE_NIC;
+ nmd.req.nr_ringid = i;
}
- parse_nmr_config(g->nmr_config, &tifreq);
+ /* Only touch one of the rings (rx is already ok) */
+ if (g->td_body == receiver_body)
+ nmd.req.nr_ringid |= NETMAP_NO_TX_POLL;
- /*
- * if we are acting as a receiver only, do not touch the transmit ring.
- * This is not the default because many apps may use the interface
- * in both directions, but a pure receiver does not.
- */
- if (g->td_body == receiver_body) {
- tifreq.nr_ringid |= NETMAP_NO_TX_POLL;
- }
+ /* register interface. Override ifname and ringid etc. */
- if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
- D("Unable to register %s: %s", g->ifname, strerror(errno));
+ t->nmd = nm_open(t->g->ifname, NULL, g->nmd_flags |
+ NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, g->nmd);
+ if (t->nmd == NULL) {
+ D("Unable to open %s: %s",
+ t->g->ifname, strerror(errno));
continue;
}
- D("memsize is %d MB", tifreq.nr_memsize >> 20);
- targs[i].nmr = tifreq;
- targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset);
- D("nifp flags 0x%x", targs[i].nifp->ni_flags);
- /* start threads. */
- if (g->host_ring) {
- targs[i].qfirst = (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
- targs[i].qlast = targs[i].qfirst + 1;
- } else {
- targs[i].qfirst = (g->nthreads > 1) ? i : 0;
- targs[i].qlast = (g->nthreads > 1) ? i+1 :
- (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
- }
+ t->fd = t->nmd->fd;
+
} else {
targs[i].fd = g->main_fd;
}
- targs[i].used = 1;
- targs[i].me = i;
+ t->used = 1;
+ t->me = i;
if (g->affinity >= 0) {
if (g->affinity < g->cpus)
- targs[i].affinity = g->affinity;
+ t->affinity = g->affinity;
else
- targs[i].affinity = i % g->cpus;
- } else
- targs[i].affinity = -1;
+ t->affinity = i % g->cpus;
+ } else {
+ t->affinity = -1;
+ }
/* default, init packets */
- initialize_packet(&targs[i]);
+ initialize_packet(t);
- if (pthread_create(&targs[i].thread, NULL, g->td_body,
- &targs[i]) == -1) {
+ if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) {
D("Unable to create thread %d: %s", i, strerror(errno));
- targs[i].used = 0;
+ t->used = 0;
}
}
}
@@ -1375,7 +1400,6 @@ main_thread(struct glob_arg *g)
delta.tv_usec = (g->report_interval%1000)*1000;
select(0, NULL, NULL, NULL, &delta);
gettimeofday(&now, NULL);
- time_second = now.tv_sec;
timersub(&now, &toc, &toc);
my_count = 0;
for (i = 0; i < g->nthreads; i++) {
@@ -1388,8 +1412,10 @@ main_thread(struct glob_arg *g)
continue;
npkts = my_count - prev;
pps = (npkts*1000000 + usec/2) / usec;
- D("%" PRIu64 " pps (%" PRIu64 " pkts in %" PRIu64 " usec)",
- pps, npkts, usec);
+ D("%llu pps (%llu pkts in %llu usec)",
+ (unsigned long long)pps,
+ (unsigned long long)npkts,
+ (unsigned long long)usec);
prev = my_count;
toc = now;
if (done == g->nthreads)
@@ -1433,7 +1459,7 @@ main_thread(struct glob_arg *g)
rx_output(count, delta_t);
if (g->dev_type == DEV_NETMAP) {
- munmap(g->mmap_addr, g->mmap_size);
+ munmap(g->nmd->mem, g->nmd->req.nr_memsize);
close(g->main_fd);
}
}
@@ -1521,7 +1547,6 @@ main(int arc, char **argv)
struct glob_arg g;
- struct nmreq nmr;
int ch;
int wait_link = 2;
int devqueues = 1; /* how many device queues */
@@ -1548,7 +1573,7 @@ main(int arc, char **argv)
g.virt_header = 0;
while ( (ch = getopt(arc, argv,
- "a:f:F:n:i:It:r:l:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:h")) != -1) {
+ "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:")) != -1) {
struct sf *fn;
switch(ch) {
@@ -1594,23 +1619,28 @@ main(int arc, char **argv)
* otherwise we guess
*/
D("interface is %s", optarg);
- g.ifname = optarg;
+ if (strlen(optarg) > MAX_IFNAMELEN - 8) {
+ D("ifname too long %s", optarg);
+ break;
+ }
+ strcpy(g.ifname, optarg);
if (!strcmp(optarg, "null")) {
g.dev_type = DEV_NETMAP;
g.dummy_send = 1;
} else if (!strncmp(optarg, "tap:", 4)) {
g.dev_type = DEV_TAP;
- g.ifname = optarg + 4;
+ strcpy(g.ifname, optarg + 4);
} else if (!strncmp(optarg, "pcap:", 5)) {
g.dev_type = DEV_PCAP;
- g.ifname = optarg + 5;
- } else if (!strncmp(optarg, "netmap:", 7)) {
+ strcpy(g.ifname, optarg + 5);
+ } else if (!strncmp(optarg, "netmap:", 7) ||
+ !strncmp(optarg, "vale", 4)) {
g.dev_type = DEV_NETMAP;
- g.ifname = optarg + 7;
} else if (!strncmp(optarg, "tap", 3)) {
g.dev_type = DEV_TAP;
- } else {
+ } else { /* prepend netmap: */
g.dev_type = DEV_NETMAP;
+ sprintf(g.ifname, "netmap:%s", optarg);
}
break;
@@ -1618,18 +1648,6 @@ main(int arc, char **argv)
g.options |= OPT_INDIRECT; /* XXX use indirect buffer */
break;
- case 't': /* send, deprecated */
- D("-t deprecated, please use -f tx -n %s", optarg);
- g.td_body = sender_body;
- g.npackets = atoi(optarg);
- break;
-
- case 'r': /* receive */
- D("-r deprecated, please use -f rx -n %s", optarg);
- g.td_body = receiver_body;
- g.npackets = atoi(optarg);
- break;
-
case 'l': /* pkt_size */
g.pkt_size = atoi(optarg);
break;
@@ -1686,8 +1704,8 @@ main(int arc, char **argv)
case 'H':
g.virt_header = atoi(optarg);
break;
- case 'h':
- g.host_ring = 1;
+ case 'e': /* extra bufs */
+ g.extra_bufs = atoi(optarg);
break;
}
}
@@ -1759,42 +1777,33 @@ main(int arc, char **argv)
} else if (g.dummy_send) { /* but DEV_NETMAP */
D("using a dummy send routine");
} else {
- bzero(&nmr, sizeof(nmr));
- nmr.nr_version = NETMAP_API;
+ struct nm_desc base_nmd;
+
+ bzero(&base_nmd, sizeof(base_nmd));
+
+ g.nmd_flags = 0;
+ g.nmd_flags |= parse_nmr_config(g.nmr_config, &base_nmd.req);
+ if (g.extra_bufs) {
+ base_nmd.req.nr_arg3 = g.extra_bufs;
+ g.nmd_flags |= NM_OPEN_ARG3;
+ }
+
/*
- * Open the netmap device to fetch the number of queues of our
- * interface.
+ * Open the netmap device using nm_open().
*
- * The first NIOCREGIF also detaches the card from the
* protocol stack and may cause a reset of the card,
* which in turn may take some time for the PHY to
- * reconfigure.
- */
- g.main_fd = open("/dev/netmap", O_RDWR);
- if (g.main_fd == -1) {
- D("Unable to open /dev/netmap: %s", strerror(errno));
- // fail later
- }
- /*
- * Register the interface on the netmap device: from now on,
- * we can operate on the network interface without any
- * interference from the legacy network stack.
- *
- * We decide to put the first interface registration here to
- * give time to cards that take a long time to reset the PHY.
+ * reconfigure. We do the open here to have time to reset.
*/
- bzero(&nmr, sizeof(nmr));
- nmr.nr_version = NETMAP_API;
- strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name));
- parse_nmr_config(g.nmr_config, &nmr);
- if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) {
- D("Unable to register interface %s: %s", g.ifname, strerror(errno));
- //continue, fail later
+ g.nmd = nm_open(g.ifname, NULL, g.nmd_flags, &base_nmd);
+ if (g.nmd == NULL) {
+ D("Unable to open %s: %s", g.ifname, strerror(errno));
+ goto out;
}
- ND("%s: txr %d txd %d rxr %d rxd %d", g.ifname,
- nmr.nr_tx_rings, nmr.nr_tx_slots,
- nmr.nr_rx_rings, nmr.nr_rx_slots);
- devqueues = nmr.nr_rx_rings;
+ g.main_fd = g.nmd->fd;
+ D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem);
+
+ devqueues = g.nmd->req.nr_rx_rings;
/* validate provided nthreads. */
if (g.nthreads < 1 || g.nthreads > devqueues) {
@@ -1802,32 +1811,18 @@ main(int arc, char **argv)
// continue, fail later
}
- /*
- * Map the netmap shared memory: instead of issuing mmap()
- * inside the body of the threads, we prefer to keep this
- * operation here to simplify the thread logic.
- */
- D("mapping %d Kbytes", nmr.nr_memsize>>10);
- g.mmap_size = nmr.nr_memsize;
- g.mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize,
- PROT_WRITE | PROT_READ,
- MAP_SHARED, g.main_fd, 0);
- if (g.mmap_addr == MAP_FAILED) {
- D("Unable to mmap %d KB: %s", nmr.nr_memsize >> 10, strerror(errno));
- // continue, fail later
- }
-
if (verbose) {
- struct netmap_if *nifp = NETMAP_IF(g.mmap_addr, nmr.nr_offset);
+ struct netmap_if *nifp = g.nmd->nifp;
+ struct nmreq *req = &g.nmd->req;
- D("nifp at offset %d, %d tx %d rx rings %s",
- nmr.nr_offset, nmr.nr_tx_rings, nmr.nr_rx_rings,
- nmr.nr_ringid & NETMAP_PRIV_MEM ? "PRIVATE" : "common" );
- for (i = 0; i <= nmr.nr_tx_rings; i++) {
+ D("nifp at offset %d, %d tx %d rx region %d",
+ req->nr_offset, req->nr_tx_rings, req->nr_rx_rings,
+ req->nr_arg2);
+ for (i = 0; i <= req->nr_tx_rings; i++) {
D(" TX%d at 0x%lx", i,
(char *)NETMAP_TXRING(nifp, i) - (char *)nifp);
}
- for (i = 0; i <= nmr.nr_rx_rings; i++) {
+ for (i = 0; i <= req->nr_rx_rings; i++) {
D(" RX%d at 0x%lx", i,
(char *)NETMAP_RXRING(nifp, i) - (char *)nifp);
}
@@ -1846,7 +1841,8 @@ main(int arc, char **argv)
g.src_ip.name, g.dst_ip.name,
g.src_mac.name, g.dst_mac.name);
}
-
+
+out:
/* Exit if something went wrong. */
if (g.main_fd < 0) {
D("aborting");
@@ -1854,7 +1850,7 @@ main(int arc, char **argv)
}
}
-
+
if (g.options) {
D("--- SPECIAL OPTIONS:%s%s%s%s%s\n",
g.options & OPT_PREFETCH ? " prefetch" : "",
diff --git a/tools/tools/netmap/vale-ctl.c b/tools/tools/netmap/vale-ctl.c
index eb6c48d15a04..e1d8da568063 100644
--- a/tools/tools/netmap/vale-ctl.c
+++ b/tools/tools/netmap/vale-ctl.c
@@ -33,6 +33,7 @@
#include <unistd.h> /* close */
#include <sys/ioctl.h> /* ioctl */
#include <sys/param.h>
+#include <sys/socket.h> /* apple needs sockaddr */
#include <net/if.h> /* ifreq */
#include <net/netmap.h>
#include <net/netmap_user.h>