aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuigi Rizzo <luigi@FreeBSD.org>2015-07-10 05:51:36 +0000
committerLuigi Rizzo <luigi@FreeBSD.org>2015-07-10 05:51:36 +0000
commit847bf38369b6ea5abf8b6409006468cfe4f66d5e (patch)
tree2a938ad28f8fa79c60e58c3430a4c2c93631db94
parent9d73ee0f82b756db5e53a32e55766db958d41dba (diff)
downloadsrc-847bf38369b6ea5abf8b6409006468cfe4f66d5e.tar.gz
src-847bf38369b6ea5abf8b6409006468cfe4f66d5e.zip
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and Stefano Garzarella, is partly supported by grants from Verisign and Cisco, and brings in the following: - fix zerocopy monitor ports and introduce copying monitor ports (the latter are lower performance but give access to all traffic in parallel with the application) - exclusive open mode, useful to implement solutions that recover from crashes of the main netmap client (suggested by Patrick Kelsey) - revised memory allocator in preparation for the 'passthrough mode' (ptnetmap) recently presented at bsdcan. ptnetmap is described in S. Garzarella, G. Lettieri, L. Rizzo; Virtual device passthrough for high speed VM networking, ACM/IEEE ANCS 2015, Oakland (CA) May 2015 http://info.iet.unipi.it/~luigi/research.html - fix rx CRC handing on ixl - add module dependencies for netmap when building drivers as modules - minor simplifications to device-specific routines (*txsync, *rxsync) - general code cleanup (remove unused variables, introduce macros to access rings and remove duplicate code, Applications do not need to be recompiled, unless of course they want to use the new features (monitors and exclusive open). Those willing to try this code on stable/10 can just update the sys/dev/netmap/*, sys/net/netmap* with the version in HEAD and apply the small patches to individual device drivers. MFC after: 1 month Sponsored by: (partly) Verisign, Cisco
Notes
Notes: svn path=/head/; revision=285349
-rw-r--r--sys/dev/cxgbe/t4_main.c7
-rw-r--r--sys/dev/cxgbe/t4_netmap.c6
-rw-r--r--sys/dev/e1000/if_em.c3
-rw-r--r--sys/dev/e1000/if_igb.c3
-rw-r--r--sys/dev/e1000/if_lem.c3
-rw-r--r--sys/dev/ixgbe/if_ix.c3
-rw-r--r--sys/dev/netmap/if_em_netmap.h7
-rw-r--r--sys/dev/netmap/if_igb_netmap.h7
-rw-r--r--sys/dev/netmap/if_ixl_netmap.h16
-rw-r--r--sys/dev/netmap/if_lem_netmap.h7
-rw-r--r--sys/dev/netmap/if_re_netmap.h7
-rw-r--r--sys/dev/netmap/if_vtnet_netmap.h8
-rw-r--r--sys/dev/netmap/ixgbe_netmap.h7
-rw-r--r--sys/dev/netmap/netmap.c1041
-rw-r--r--sys/dev/netmap/netmap_freebsd.c39
-rw-r--r--sys/dev/netmap/netmap_generic.c30
-rw-r--r--sys/dev/netmap/netmap_kern.h350
-rw-r--r--sys/dev/netmap/netmap_mem2.c553
-rw-r--r--sys/dev/netmap/netmap_mem2.h30
-rw-r--r--sys/dev/netmap/netmap_monitor.c713
-rw-r--r--sys/dev/netmap/netmap_pipe.c135
-rw-r--r--sys/dev/netmap/netmap_vale.c350
-rw-r--r--sys/dev/re/if_re.c1
-rw-r--r--sys/net/netmap.h8
-rw-r--r--sys/net/netmap_user.h155
25 files changed, 2034 insertions, 1455 deletions
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 5c8805967f00..a3403ad79e62 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -8533,10 +8533,17 @@ static devclass_t cxgbe_devclass, cxl_devclass;
DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
MODULE_VERSION(t4nex, 1);
MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
+
DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
MODULE_VERSION(t5nex, 1);
MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
MODULE_VERSION(cxgbe, 1);
diff --git a/sys/dev/cxgbe/t4_netmap.c b/sys/dev/cxgbe/t4_netmap.c
index f54a67fe94c0..a4afb8a25794 100644
--- a/sys/dev/cxgbe/t4_netmap.c
+++ b/sys/dev/cxgbe/t4_netmap.c
@@ -917,8 +917,6 @@ cxgbe_netmap_txsync(struct netmap_kring *kring, int flags)
kring->nr_hwtail -= kring->nkr_num_slots;
}
- nm_txsync_finalize(kring);
-
return (0);
}
@@ -931,7 +929,7 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
struct port_info *pi = ifp->if_softc;
struct adapter *sc = pi->adapter;
struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[pi->first_nm_rxq + kring->ring_id];
- u_int const head = nm_rxsync_prologue(kring);
+ u_int const head = kring->rhead;
u_int n;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
@@ -993,8 +991,6 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
}
}
- nm_rxsync_finalize(kring);
-
return (0);
}
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index 8032345d09ae..52b03d65ca50 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -344,6 +344,9 @@ devclass_t em_devclass;
DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
MODULE_DEPEND(em, pci, 1, 1, 1);
MODULE_DEPEND(em, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(em, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/*********************************************************************
* Tunable default values.
diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c
index 6ac6eb63c987..384a46bc223d 100644
--- a/sys/dev/e1000/if_igb.c
+++ b/sys/dev/e1000/if_igb.c
@@ -322,6 +322,9 @@ static devclass_t igb_devclass;
DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
MODULE_DEPEND(igb, pci, 1, 1, 1);
MODULE_DEPEND(igb, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(igb, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/*********************************************************************
* Tunable default values.
diff --git a/sys/dev/e1000/if_lem.c b/sys/dev/e1000/if_lem.c
index 894a74a9db81..f34010e5c86e 100644
--- a/sys/dev/e1000/if_lem.c
+++ b/sys/dev/e1000/if_lem.c
@@ -286,6 +286,9 @@ extern devclass_t em_devclass;
DRIVER_MODULE(lem, pci, lem_driver, em_devclass, 0, 0);
MODULE_DEPEND(lem, pci, 1, 1, 1);
MODULE_DEPEND(lem, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(lem, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/*********************************************************************
* Tunable default values.
diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c
index 77556267fa15..c8ce7445c860 100644
--- a/sys/dev/ixgbe/if_ix.c
+++ b/sys/dev/ixgbe/if_ix.c
@@ -246,6 +246,9 @@ DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0);
MODULE_DEPEND(ix, pci, 1, 1, 1);
MODULE_DEPEND(ix, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(ix, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/*
** TUNEABLE PARAMETERS:
diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h
index 99eaa6f01319..eae4f8c18ca0 100644
--- a/sys/dev/netmap/if_em_netmap.h
+++ b/sys/dev/netmap/if_em_netmap.h
@@ -198,8 +198,6 @@ em_netmap_txsync(struct netmap_kring *kring, int flags)
}
}
- nm_txsync_finalize(kring);
-
return 0;
}
@@ -217,7 +215,7 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = nm_rxsync_prologue(kring);
+ u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@@ -303,9 +301,6 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags)
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
- /* tell userspace that there might be new packets */
- nm_rxsync_finalize(kring);
-
return 0;
ring_reset:
diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h
index c73846073341..33b7b3b66547 100644
--- a/sys/dev/netmap/if_igb_netmap.h
+++ b/sys/dev/netmap/if_igb_netmap.h
@@ -180,8 +180,6 @@ igb_netmap_txsync(struct netmap_kring *kring, int flags)
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
- nm_txsync_finalize(kring);
-
return 0;
}
@@ -199,7 +197,7 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = nm_rxsync_prologue(kring);
+ u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@@ -283,9 +281,6 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags)
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
- /* tell userspace that there might be new packets */
- nm_rxsync_finalize(kring);
-
return 0;
ring_reset:
diff --git a/sys/dev/netmap/if_ixl_netmap.h b/sys/dev/netmap/if_ixl_netmap.h
index d6aff1f8c9a8..f7e7baaf6bc2 100644
--- a/sys/dev/netmap/if_ixl_netmap.h
+++ b/sys/dev/netmap/if_ixl_netmap.h
@@ -68,9 +68,14 @@ extern int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip;
* count packets that might be missed due to lost interrupts.
*/
SYSCTL_DECL(_dev_netmap);
-int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip;
+/*
+ * The xl driver by default strips CRCs and we do not override it.
+ */
+int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
+#if 0
SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_crcstrip,
- CTLFLAG_RW, &ixl_crcstrip, 0, "strip CRC on rx frames");
+ CTLFLAG_RW, &ixl_crcstrip, 1, "strip CRC on rx frames");
+#endif
SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss,
CTLFLAG_RW, &ixl_rx_miss, 0, "potentially missed rx intr");
SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss_bufs,
@@ -268,8 +273,6 @@ ixl_netmap_txsync(struct netmap_kring *kring, int flags)
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
- nm_txsync_finalize(kring);
-
return 0;
}
@@ -297,7 +300,7 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = nm_rxsync_prologue(kring);
+ u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@@ -408,9 +411,6 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags)
wr32(vsi->hw, rxr->tail, nic_i);
}
- /* tell userspace that there might be new packets */
- nm_rxsync_finalize(kring);
-
return 0;
ring_reset:
diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h
index 50eb1f719929..0ec9b1346609 100644
--- a/sys/dev/netmap/if_lem_netmap.h
+++ b/sys/dev/netmap/if_lem_netmap.h
@@ -302,8 +302,6 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags)
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
- nm_txsync_finalize(kring);
-
return 0;
}
@@ -321,7 +319,7 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = nm_rxsync_prologue(kring);
+ u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@@ -466,9 +464,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
}
- /* tell userspace that there might be new packets */
- nm_rxsync_finalize(kring);
-
return 0;
ring_reset:
diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h
index 354f14df1c58..ac08aedd7962 100644
--- a/sys/dev/netmap/if_re_netmap.h
+++ b/sys/dev/netmap/if_re_netmap.h
@@ -159,8 +159,6 @@ re_netmap_txsync(struct netmap_kring *kring, int flags)
}
}
- nm_txsync_finalize(kring);
-
return 0;
}
@@ -178,7 +176,7 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = nm_rxsync_prologue(kring);
+ u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@@ -273,9 +271,6 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags)
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
}
- /* tell userspace that there might be new packets */
- nm_rxsync_finalize(kring);
-
return 0;
ring_reset:
diff --git a/sys/dev/netmap/if_vtnet_netmap.h b/sys/dev/netmap/if_vtnet_netmap.h
index 63f4fa9aa5df..791cee56bcee 100644
--- a/sys/dev/netmap/if_vtnet_netmap.h
+++ b/sys/dev/netmap/if_vtnet_netmap.h
@@ -214,9 +214,6 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT);
}
-//out:
- nm_txsync_finalize(kring);
-
return 0;
}
@@ -278,7 +275,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
// u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = nm_rxsync_prologue(kring);
+ u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@@ -340,9 +337,6 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
vtnet_rxq_enable_intr(rxq);
}
- /* tell userspace that there might be new packets. */
- nm_rxsync_finalize(kring);
-
ND("[C] h %d c %d t %d hwcur %d hwtail %d",
ring->head, ring->cur, ring->tail,
kring->nr_hwcur, kring->nr_hwtail);
diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
index f1f03cb6d7a7..4d5bde20a3dd 100644
--- a/sys/dev/netmap/ixgbe_netmap.h
+++ b/sys/dev/netmap/ixgbe_netmap.h
@@ -322,8 +322,6 @@ ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
}
}
- nm_txsync_finalize(kring);
-
return 0;
}
@@ -351,7 +349,7 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = nm_rxsync_prologue(kring);
+ u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@@ -458,9 +456,6 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i);
}
- /* tell userspace that there might be new packets */
- nm_rxsync_finalize(kring);
-
return 0;
ring_reset:
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index 11229ccf6b87..0a728bbf94e7 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -293,7 +293,7 @@ ports attached to the switch)
* kring->nm_sync() == DEVICE_netmap_rxsync()
* 2) device interrupt handler
* na->nm_notify() == netmap_notify()
- * - tx from host stack
+ * - rx from host stack
* concurrently:
* 1) host stack
* netmap_transmit()
@@ -313,31 +313,113 @@ ports attached to the switch)
*
* -= SYSTEM DEVICE WITH GENERIC SUPPORT =-
*
+ * na == NA(ifp) == generic_netmap_adapter created in generic_netmap_attach()
*
- *
- * -= VALE PORT =-
- *
- *
- *
- * -= NETMAP PIPE =-
- *
- *
- *
- * -= SYSTEM DEVICE WITH NATIVE SUPPORT, CONNECTED TO VALE, NO HOST RINGS =-
- *
- *
- *
- * -= SYSTEM DEVICE WITH NATIVE SUPPORT, CONNECTED TO VALE, WITH HOST RINGS =-
- *
- *
- *
- * -= SYSTEM DEVICE WITH GENERIC SUPPORT, CONNECTED TO VALE, NO HOST RINGS =-
- *
- *
- *
- * -= SYSTEM DEVICE WITH GENERIC SUPPORT, CONNECTED TO VALE, WITH HOST RINGS =-
+ * - tx from netmap userspace:
+ * concurrently:
+ * 1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
+ * kring->nm_sync() == generic_netmap_txsync()
+ * linux: dev_queue_xmit() with NM_MAGIC_PRIORITY_TX
+ * generic_ndo_start_xmit()
+ * orig. dev. start_xmit
+ * FreeBSD: na->if_transmit() == orig. dev if_transmit
+ * 2) generic_mbuf_destructor()
+ * na->nm_notify() == netmap_notify()
+ * - rx from netmap userspace:
+ * 1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
+ * kring->nm_sync() == generic_netmap_rxsync()
+ * mbq_safe_dequeue()
+ * 2) device driver
+ * generic_rx_handler()
+ * mbq_safe_enqueue()
+ * na->nm_notify() == netmap_notify()
+ * - rx from host stack:
+ * concurrently:
+ * 1) host stack
+ * linux: generic_ndo_start_xmit()
+ * netmap_transmit()
+ * FreeBSD: ifp->if_input() == netmap_transmit
+ * both:
+ * na->nm_notify() == netmap_notify()
+ * 2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
+ * kring->nm_sync() == netmap_rxsync_from_host_compat
+ * netmap_rxsync_from_host(na, NULL, NULL)
+ * - tx to host stack:
+ * ioctl(NIOCTXSYNC)/netmap_poll() in process context
+ * kring->nm_sync() == netmap_txsync_to_host_compat
+ * netmap_txsync_to_host(na)
+ * NM_SEND_UP()
+ * FreeBSD: na->if_input() == ??? XXX
+ * linux: netif_rx() with NM_MAGIC_PRIORITY_RX
*
*
+ * -= VALE =-
+ *
+ * INCOMING:
+ *
+ * - VALE ports:
+ * ioctl(NIOCTXSYNC)/netmap_poll() in process context
+ * kring->nm_sync() == netmap_vp_txsync()
+ *
+ * - system device with native support:
+ * from cable:
+ * interrupt
+ * na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
+ * kring->nm_sync() == DEVICE_netmap_rxsync()
+ * netmap_vp_txsync()
+ * kring->nm_sync() == DEVICE_netmap_rxsync()
+ * from host stack:
+ * netmap_transmit()
+ * na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
+ * kring->nm_sync() == netmap_rxsync_from_host_compat()
+ * netmap_vp_txsync()
+ *
+ * - system device with generic support:
+ * from device driver:
+ * generic_rx_handler()
+ * na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring)
+ * kring->nm_sync() == generic_netmap_rxsync()
+ * netmap_vp_txsync()
+ * kring->nm_sync() == generic_netmap_rxsync()
+ * from host stack:
+ * netmap_transmit()
+ * na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
+ * kring->nm_sync() == netmap_rxsync_from_host_compat()
+ * netmap_vp_txsync()
+ *
+ * (all cases) --> nm_bdg_flush()
+ * dest_na->nm_notify() == (see below)
+ *
+ * OUTGOING:
+ *
+ * - VALE ports:
+ * concurrently:
+ * 1) ioctlNIOCRXSYNC)/netmap_poll() in process context
+ * kring->nm_sync() == netmap_vp_rxsync()
+ * 2) from nm_bdg_flush()
+ * na->nm_notify() == netmap_notify()
+ *
+ * - system device with native support:
+ * to cable:
+ * na->nm_notify() == netmap_bwrap_notify()
+ * netmap_vp_rxsync()
+ * kring->nm_sync() == DEVICE_netmap_txsync()
+ * netmap_vp_rxsync()
+ * to host stack:
+ * netmap_vp_rxsync()
+ * kring->nm_sync() == netmap_txsync_to_host_compat
+ * netmap_vp_rxsync_locked()
+ *
+ * - system device with generic adapter:
+ * to device driver:
+ * na->nm_notify() == netmap_bwrap_notify()
+ * netmap_vp_rxsync()
+ * kring->nm_sync() == generic_netmap_txsync()
+ * netmap_vp_rxsync()
+ * to host stack:
+ * netmap_vp_rxsync()
+ * kring->nm_sync() == netmap_txsync_to_host_compat
+ * netmap_vp_rxsync()
*
*/
@@ -412,15 +494,6 @@ ports attached to the switch)
MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
-/*
- * The following variables are used by the drivers and replicate
- * fields in the global memory pool. They only refer to buffers
- * used by physical interfaces.
- */
-u_int netmap_total_buffers;
-u_int netmap_buf_size;
-char *netmap_buffer_base; /* also address of an invalid buffer */
-
/* user-controlled variables */
int netmap_verbose;
@@ -446,7 +519,6 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, adaptive_io, CTLFLAG_RW,
int netmap_flags = 0; /* debug flags */
int netmap_fwd = 0; /* force transparent mode */
-int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */
/*
* netmap_admode selects the netmap mode to use.
@@ -464,7 +536,6 @@ int netmap_generic_rings = 1; /* number of queues in generic. */
SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
-SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, "");
SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
@@ -472,15 +543,6 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rin
NMG_LOCK_T netmap_global_lock;
-
-static void
-nm_kr_get(struct netmap_kring *kr)
-{
- while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
- tsleep(kr, 0, "NM_KR_GET", 4);
-}
-
-
/*
* mark the ring as stopped, and run through the locks
* to make sure other users get to see it.
@@ -495,34 +557,14 @@ netmap_disable_ring(struct netmap_kring *kr)
nm_kr_put(kr);
}
-/* stop or enable a single tx ring */
-void
-netmap_set_txring(struct netmap_adapter *na, u_int ring_id, int stopped)
-{
- if (stopped)
- netmap_disable_ring(na->tx_rings + ring_id);
- else
- na->tx_rings[ring_id].nkr_stopped = 0;
- /* nofify that the stopped state has changed. This is currently
- *only used by bwrap to propagate the state to its own krings.
- * (see netmap_bwrap_intr_notify).
- */
- na->nm_notify(na, ring_id, NR_TX, NAF_DISABLE_NOTIFY);
-}
-
-/* stop or enable a single rx ring */
+/* stop or enable a single ring */
void
-netmap_set_rxring(struct netmap_adapter *na, u_int ring_id, int stopped)
+netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped)
{
if (stopped)
- netmap_disable_ring(na->rx_rings + ring_id);
+ netmap_disable_ring(NMR(na, t) + ring_id);
else
- na->rx_rings[ring_id].nkr_stopped = 0;
- /* nofify that the stopped state has changed. This is currently
- *only used by bwrap to propagate the state to its own krings.
- * (see netmap_bwrap_intr_notify).
- */
- na->nm_notify(na, ring_id, NR_RX, NAF_DISABLE_NOTIFY);
+ NMR(na, t)[ring_id].nkr_stopped = 0;
}
@@ -531,20 +573,15 @@ void
netmap_set_all_rings(struct netmap_adapter *na, int stopped)
{
int i;
- u_int ntx, nrx;
+ enum txrx t;
if (!nm_netmap_on(na))
return;
- ntx = netmap_real_tx_rings(na);
- nrx = netmap_real_rx_rings(na);
-
- for (i = 0; i < ntx; i++) {
- netmap_set_txring(na, i, stopped);
- }
-
- for (i = 0; i < nrx; i++) {
- netmap_set_rxring(na, i, stopped);
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_real_rings(na, t); i++) {
+ netmap_set_ring(na, i, t, stopped);
+ }
}
}
@@ -657,7 +694,8 @@ netmap_update_config(struct netmap_adapter *na)
txr = txd = rxr = rxd = 0;
if (na->nm_config == NULL ||
- na->nm_config(na, &txr, &txd, &rxr, &rxd)) {
+ na->nm_config(na, &txr, &txd, &rxr, &rxd))
+ {
/* take whatever we had at init time */
txr = na->num_tx_rings;
txd = na->num_tx_desc;
@@ -738,73 +776,59 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
{
u_int i, len, ndesc;
struct netmap_kring *kring;
- u_int ntx, nrx;
+ u_int n[NR_TXRX];
+ enum txrx t;
/* account for the (possibly fake) host rings */
- ntx = na->num_tx_rings + 1;
- nrx = na->num_rx_rings + 1;
+ n[NR_TX] = na->num_tx_rings + 1;
+ n[NR_RX] = na->num_rx_rings + 1;
- len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom;
+ len = (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring) + tailroom;
na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
if (na->tx_rings == NULL) {
D("Cannot allocate krings");
return ENOMEM;
}
- na->rx_rings = na->tx_rings + ntx;
+ na->rx_rings = na->tx_rings + n[NR_TX];
/*
* All fields in krings are 0 except the one initialized below.
* but better be explicit on important kring fields.
*/
- ndesc = na->num_tx_desc;
- for (i = 0; i < ntx; i++) { /* Transmit rings */
- kring = &na->tx_rings[i];
- bzero(kring, sizeof(*kring));
- kring->na = na;
- kring->ring_id = i;
- kring->nkr_num_slots = ndesc;
- if (i < na->num_tx_rings) {
- kring->nm_sync = na->nm_txsync;
- } else if (i == na->num_tx_rings) {
- kring->nm_sync = netmap_txsync_to_host_compat;
- }
- /*
- * IMPORTANT: Always keep one slot empty.
- */
- kring->rhead = kring->rcur = kring->nr_hwcur = 0;
- kring->rtail = kring->nr_hwtail = ndesc - 1;
- snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", na->name, i);
- ND("ktx %s h %d c %d t %d",
- kring->name, kring->rhead, kring->rcur, kring->rtail);
- mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF);
- init_waitqueue_head(&kring->si);
- }
-
- ndesc = na->num_rx_desc;
- for (i = 0; i < nrx; i++) { /* Receive rings */
- kring = &na->rx_rings[i];
- bzero(kring, sizeof(*kring));
- kring->na = na;
- kring->ring_id = i;
- kring->nkr_num_slots = ndesc;
- if (i < na->num_rx_rings) {
- kring->nm_sync = na->nm_rxsync;
- } else if (i == na->num_rx_rings) {
- kring->nm_sync = netmap_rxsync_from_host_compat;
+ for_rx_tx(t) {
+ ndesc = nma_get_ndesc(na, t);
+ for (i = 0; i < n[t]; i++) {
+ kring = &NMR(na, t)[i];
+ bzero(kring, sizeof(*kring));
+ kring->na = na;
+ kring->ring_id = i;
+ kring->tx = t;
+ kring->nkr_num_slots = ndesc;
+ if (i < nma_get_nrings(na, t)) {
+ kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync);
+ } else if (i == na->num_tx_rings) {
+ kring->nm_sync = (t == NR_TX ?
+ netmap_txsync_to_host_compat :
+ netmap_rxsync_from_host_compat);
+ }
+ kring->nm_notify = na->nm_notify;
+ kring->rhead = kring->rcur = kring->nr_hwcur = 0;
+ /*
+ * IMPORTANT: Always keep one slot empty.
+ */
+ kring->rtail = kring->nr_hwtail = (t == NR_TX ? ndesc - 1 : 0);
+ snprintf(kring->name, sizeof(kring->name) - 1, "%s %s%d", na->name,
+ nm_txrx2str(t), i);
+ ND("ktx %s h %d c %d t %d",
+ kring->name, kring->rhead, kring->rcur, kring->rtail);
+ mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF);
+ init_waitqueue_head(&kring->si);
}
- kring->rhead = kring->rcur = kring->nr_hwcur = 0;
- kring->rtail = kring->nr_hwtail = 0;
- snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", na->name, i);
- ND("krx %s h %d c %d t %d",
- kring->name, kring->rhead, kring->rcur, kring->rtail);
- mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF);
- init_waitqueue_head(&kring->si);
+ init_waitqueue_head(&na->si[t]);
}
- init_waitqueue_head(&na->tx_si);
- init_waitqueue_head(&na->rx_si);
- na->tailroom = na->rx_rings + nrx;
+ na->tailroom = na->rx_rings + n[NR_RX];
return 0;
}
@@ -829,6 +853,10 @@ void
netmap_krings_delete(struct netmap_adapter *na)
{
struct netmap_kring *kring = na->tx_rings;
+ enum txrx t;
+
+ for_rx_tx(t)
+ netmap_knlist_destroy(&na->si[t]);
/* we rely on the krings layout described above */
for ( ; kring != na->tailroom; kring++) {
@@ -858,142 +886,35 @@ netmap_hw_krings_delete(struct netmap_adapter *na)
}
-/* create a new netmap_if for a newly registered fd.
- * If this is the first registration of the adapter,
- * also create the netmap rings and their in-kernel view,
- * the netmap krings.
- */
-/* call with NMG_LOCK held */
-static struct netmap_if*
-netmap_if_new(struct netmap_adapter *na)
-{
- struct netmap_if *nifp;
-
- if (netmap_update_config(na)) {
- /* configuration mismatch, report and fail */
- return NULL;
- }
-
- if (na->active_fds) /* already registered */
- goto final;
-
- /* create and init the krings arrays.
- * Depending on the adapter, this may also create
- * the netmap rings themselves
- */
- if (na->nm_krings_create(na))
- return NULL;
-
- /* create all missing netmap rings */
- if (netmap_mem_rings_create(na))
- goto cleanup;
-
-final:
-
- /* in all cases, create a new netmap if */
- nifp = netmap_mem_if_new(na);
- if (nifp == NULL)
- goto cleanup;
-
- return (nifp);
-
-cleanup:
-
- if (na->active_fds == 0) {
- netmap_mem_rings_delete(na);
- na->nm_krings_delete(na);
- }
-
- return NULL;
-}
-
-
-/* grab a reference to the memory allocator, if we don't have one already. The
- * reference is taken from the netmap_adapter registered with the priv.
- */
-/* call with NMG_LOCK held */
-static int
-netmap_get_memory_locked(struct netmap_priv_d* p)
-{
- struct netmap_mem_d *nmd;
- int error = 0;
-
- if (p->np_na == NULL) {
- if (!netmap_mmap_unreg)
- return ENODEV;
- /* for compatibility with older versions of the API
- * we use the global allocator when no interface has been
- * registered
- */
- nmd = &nm_mem;
- } else {
- nmd = p->np_na->nm_mem;
- }
- if (p->np_mref == NULL) {
- error = netmap_mem_finalize(nmd, p->np_na);
- if (!error)
- p->np_mref = nmd;
- } else if (p->np_mref != nmd) {
- /* a virtual port has been registered, but previous
- * syscalls already used the global allocator.
- * We cannot continue
- */
- error = ENODEV;
- }
- return error;
-}
-
-
-/* call with NMG_LOCK *not* held */
-int
-netmap_get_memory(struct netmap_priv_d* p)
-{
- int error;
- NMG_LOCK();
- error = netmap_get_memory_locked(p);
- NMG_UNLOCK();
- return error;
-}
-
-
-/* call with NMG_LOCK held */
-static int
-netmap_have_memory_locked(struct netmap_priv_d* p)
-{
- return p->np_mref != NULL;
-}
-
-
-/* call with NMG_LOCK held */
-static void
-netmap_drop_memory_locked(struct netmap_priv_d* p)
-{
- if (p->np_mref) {
- netmap_mem_deref(p->np_mref, p->np_na);
- p->np_mref = NULL;
- }
-}
-
/*
- * Call nm_register(ifp,0) to stop netmap mode on the interface and
+ * Undo everything that was done in netmap_do_regif(). In particular,
+ * call nm_register(ifp,0) to stop netmap mode on the interface and
* revert to normal operation.
- * The second argument is the nifp to work on. In some cases it is
- * not attached yet to the netmap_priv_d so we need to pass it as
- * a separate argument.
*/
/* call with NMG_LOCK held */
+static void netmap_unset_ringid(struct netmap_priv_d *);
+static void netmap_rel_exclusive(struct netmap_priv_d *);
static void
-netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
+netmap_do_unregif(struct netmap_priv_d *priv)
{
struct netmap_adapter *na = priv->np_na;
NMG_LOCK_ASSERT();
na->active_fds--;
+ /* release exclusive use if it was requested on regif */
+ netmap_rel_exclusive(priv);
if (na->active_fds <= 0) { /* last instance */
if (netmap_verbose)
D("deleting last instance for %s", na->name);
+
+#ifdef WITH_MONITOR
+ /* walk through all the rings and tell any monitor
+ * that the port is going to exit netmap mode
+ */
+ netmap_monitor_stop(na);
+#endif
/*
* (TO CHECK) This function is only called
* when the last reference to this file descriptor goes
@@ -1014,37 +935,33 @@ netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
* XXX The wake up now must happen during *_down(), when
* we order all activities to stop. -gl
*/
- netmap_knlist_destroy(&na->tx_si);
- netmap_knlist_destroy(&na->rx_si);
-
/* delete rings and buffers */
netmap_mem_rings_delete(na);
na->nm_krings_delete(na);
}
+ /* possibily decrement counter of tx_si/rx_si users */
+ netmap_unset_ringid(priv);
/* delete the nifp */
- netmap_mem_if_delete(na, nifp);
-}
-
-/* call with NMG_LOCK held */
-static __inline int
-nm_tx_si_user(struct netmap_priv_d *priv)
-{
- return (priv->np_na != NULL &&
- (priv->np_txqlast - priv->np_txqfirst > 1));
+ netmap_mem_if_delete(na, priv->np_nifp);
+ /* drop the allocator */
+ netmap_mem_deref(na->nm_mem, na);
+ /* mark the priv as unregistered */
+ priv->np_na = NULL;
+ priv->np_nifp = NULL;
}
/* call with NMG_LOCK held */
static __inline int
-nm_rx_si_user(struct netmap_priv_d *priv)
+nm_si_user(struct netmap_priv_d *priv, enum txrx t)
{
return (priv->np_na != NULL &&
- (priv->np_rxqlast - priv->np_rxqfirst > 1));
+ (priv->np_qlast[t] - priv->np_qfirst[t] > 1));
}
-
/*
* Destructor of the netmap_priv_d, called when the fd has
- * no active open() and mmap(). Also called in error paths.
+ * no active open() and mmap().
+ * Undo all the things done by NIOCREGIF.
*
* returns 1 if this is the last instance and we can free priv
*/
@@ -1066,17 +983,8 @@ netmap_dtor_locked(struct netmap_priv_d *priv)
if (!na) {
return 1; //XXX is it correct?
}
- netmap_do_unregif(priv, priv->np_nifp);
- priv->np_nifp = NULL;
- netmap_drop_memory_locked(priv);
- if (priv->np_na) {
- if (nm_tx_si_user(priv))
- na->tx_si_users--;
- if (nm_rx_si_user(priv))
- na->rx_si_users--;
- netmap_adapter_put(na);
- priv->np_na = NULL;
- }
+ netmap_do_unregif(priv);
+ netmap_adapter_put(na);
return 1;
}
@@ -1148,7 +1056,7 @@ static void
netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
{
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = kring->ring->head;
+ u_int const head = kring->rhead;
u_int n;
struct netmap_adapter *na = kring->na;
@@ -1235,7 +1143,6 @@ void
netmap_txsync_to_host(struct netmap_adapter *na)
{
struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
- struct netmap_ring *ring = kring->ring;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
struct mbq q;
@@ -1246,14 +1153,12 @@ netmap_txsync_to_host(struct netmap_adapter *na)
* the queue is drained in all cases.
*/
mbq_init(&q);
- ring->cur = head;
netmap_grab_packets(kring, &q, 1 /* force */);
ND("have %d pkts in queue", mbq_len(&q));
kring->nr_hwcur = head;
kring->nr_hwtail = head + lim;
if (kring->nr_hwtail > lim)
kring->nr_hwtail -= lim + 1;
- nm_txsync_finalize(kring);
netmap_send_up(na->ifp, &q);
}
@@ -1281,11 +1186,13 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
int ret = 0;
- struct mbq *q = &kring->rx_queue;
+ struct mbq *q = &kring->rx_queue, fq;
(void)pwait; /* disable unused warnings */
(void)td;
+ mbq_init(&fq); /* fq holds packets to be freed */
+
mbq_lock(q);
/* First part: import newly received packets */
@@ -1308,7 +1215,7 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
slot->len = len;
slot->flags = kring->nkr_slot_flags;
nm_i = nm_next(nm_i, lim);
- m_freem(m);
+ mbq_enqueue(&fq, m);
}
kring->nr_hwtail = nm_i;
}
@@ -1323,13 +1230,15 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
kring->nr_hwcur = head;
}
- nm_rxsync_finalize(kring);
-
/* access copies of cur,tail in the kring */
if (kring->rcur == kring->rtail && td) /* no bufs available */
OS_selrecord(td, &kring->si);
mbq_unlock(q);
+
+ mbq_purge(&fq);
+ mbq_destroy(&fq);
+
return ret;
}
@@ -1363,9 +1272,11 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
{
/* generic support */
int i = netmap_admode; /* Take a snapshot. */
- int error = 0;
struct netmap_adapter *prev_na;
+#ifdef WITH_GENERIC
struct netmap_generic_adapter *gna;
+ int error = 0;
+#endif
*na = NULL; /* default */
@@ -1401,6 +1312,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
if (!NETMAP_CAPABLE(ifp) && i == NETMAP_ADMODE_NATIVE)
return EOPNOTSUPP;
+#ifdef WITH_GENERIC
/* Otherwise, create a generic adapter and return it,
* saving the previously used netmap adapter, if any.
*
@@ -1431,6 +1343,9 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
ND("Created generic NA %p (prev %p)", gna, gna->prev);
return 0;
+#else /* !WITH_GENERIC */
+ return EOPNOTSUPP;
+#endif
}
@@ -1489,7 +1404,7 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
return error;
if (*na != NULL) /* valid match in netmap_get_bdg_na() */
- goto pipes;
+ goto out;
/*
* This must be a hardware na, lookup the name in the system.
@@ -1509,14 +1424,6 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
*na = ret;
netmap_adapter_get(ret);
-pipes:
- /*
- * If we are opening a pipe whose parent was not in netmap mode,
- * we have to allocate the pipe array now.
- * XXX get rid of this clumsiness (2014-03-15)
- */
- error = netmap_pipe_alloc(*na, nmr);
-
out:
if (error && ret != NULL)
netmap_adapter_put(ret);
@@ -1541,9 +1448,10 @@ out:
*
* hwcur, rhead, rtail and hwtail are reliable
*/
-u_int
+static u_int
nm_txsync_prologue(struct netmap_kring *kring)
{
+#define NM_ASSERT(t) if (t) { D("fail " #t); goto error; }
struct netmap_ring *ring = kring->ring;
u_int head = ring->head; /* read only once */
u_int cur = ring->cur; /* read only once */
@@ -1569,25 +1477,20 @@ nm_txsync_prologue(struct netmap_kring *kring)
*/
if (kring->rtail >= kring->rhead) {
/* want rhead <= head <= rtail */
- if (head < kring->rhead || head > kring->rtail)
- goto error;
+ NM_ASSERT(head < kring->rhead || head > kring->rtail);
/* and also head <= cur <= rtail */
- if (cur < head || cur > kring->rtail)
- goto error;
+ NM_ASSERT(cur < head || cur > kring->rtail);
} else { /* here rtail < rhead */
/* we need head outside rtail .. rhead */
- if (head > kring->rtail && head < kring->rhead)
- goto error;
+ NM_ASSERT(head > kring->rtail && head < kring->rhead);
/* two cases now: head <= rtail or head >= rhead */
if (head <= kring->rtail) {
/* want head <= cur <= rtail */
- if (cur < head || cur > kring->rtail)
- goto error;
+ NM_ASSERT(cur < head || cur > kring->rtail);
} else { /* head >= rhead */
/* cur must be outside rtail..head */
- if (cur > kring->rtail && cur < head)
- goto error;
+ NM_ASSERT(cur > kring->rtail && cur < head);
}
}
if (ring->tail != kring->rtail) {
@@ -1600,12 +1503,13 @@ nm_txsync_prologue(struct netmap_kring *kring)
return head;
error:
- RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d",
+ RD(5, "%s kring error: head %d cur %d tail %d rhead %d rcur %d rtail %d hwcur %d hwtail %d",
kring->name,
- kring->nr_hwcur,
- kring->rcur, kring->nr_hwtail,
- cur, ring->tail);
+ head, cur, ring->tail,
+ kring->rhead, kring->rcur, kring->rtail,
+ kring->nr_hwcur, kring->nr_hwtail);
return n;
+#undef NM_ASSERT
}
@@ -1620,14 +1524,14 @@ error:
* hwcur and hwtail are reliable.
*
*/
-u_int
+static u_int
nm_rxsync_prologue(struct netmap_kring *kring)
{
struct netmap_ring *ring = kring->ring;
uint32_t const n = kring->nkr_num_slots;
uint32_t head, cur;
- ND("%s kc %d kt %d h %d c %d t %d",
+ ND(5,"%s kc %d kt %d h %d c %d t %d",
kring->name,
kring->nr_hwcur, kring->nr_hwtail,
ring->head, ring->cur, ring->tail);
@@ -1719,7 +1623,7 @@ netmap_ring_reinit(struct netmap_kring *kring)
for (i = 0; i <= lim; i++) {
u_int idx = ring->slot[i].buf_idx;
u_int len = ring->slot[i].len;
- if (idx < 2 || idx >= netmap_total_buffers) {
+ if (idx < 2 || idx >= kring->na->na_lut.objtotal) {
RD(5, "bad index at slot %d idx %d len %d ", i, idx, len);
ring->slot[i].buf_idx = 0;
ring->slot[i].len = 0;
@@ -1754,6 +1658,7 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
struct netmap_adapter *na = priv->np_na;
u_int j, i = ringid & NETMAP_RING_MASK;
u_int reg = flags & NR_REG_MASK;
+ enum txrx t;
if (reg == NR_REG_DEFAULT) {
/* convert from old ringid to flags */
@@ -1770,12 +1675,12 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
case NR_REG_ALL_NIC:
case NR_REG_PIPE_MASTER:
case NR_REG_PIPE_SLAVE:
- priv->np_txqfirst = 0;
- priv->np_txqlast = na->num_tx_rings;
- priv->np_rxqfirst = 0;
- priv->np_rxqlast = na->num_rx_rings;
+ for_rx_tx(t) {
+ priv->np_qfirst[t] = 0;
+ priv->np_qlast[t] = nma_get_nrings(na, t);
+ }
ND("%s %d %d", "ALL/PIPE",
- priv->np_rxqfirst, priv->np_rxqlast);
+ priv->np_qfirst[NR_RX], priv->np_qlast[NR_RX]);
break;
case NR_REG_SW:
case NR_REG_NIC_SW:
@@ -1783,31 +1688,27 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
D("host rings not supported");
return EINVAL;
}
- priv->np_txqfirst = (reg == NR_REG_SW ?
- na->num_tx_rings : 0);
- priv->np_txqlast = na->num_tx_rings + 1;
- priv->np_rxqfirst = (reg == NR_REG_SW ?
- na->num_rx_rings : 0);
- priv->np_rxqlast = na->num_rx_rings + 1;
+ for_rx_tx(t) {
+ priv->np_qfirst[t] = (reg == NR_REG_SW ?
+ nma_get_nrings(na, t) : 0);
+ priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
+ }
ND("%s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
- priv->np_rxqfirst, priv->np_rxqlast);
+ priv->np_qfirst[NR_RX], priv->np_qlast[NR_RX]);
break;
case NR_REG_ONE_NIC:
if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
D("invalid ring id %d", i);
return EINVAL;
}
- /* if not enough rings, use the first one */
- j = i;
- if (j >= na->num_tx_rings)
- j = 0;
- priv->np_txqfirst = j;
- priv->np_txqlast = j + 1;
- j = i;
- if (j >= na->num_rx_rings)
- j = 0;
- priv->np_rxqfirst = j;
- priv->np_rxqlast = j + 1;
+ for_rx_tx(t) {
+ /* if not enough rings, use the first one */
+ j = i;
+ if (j >= nma_get_nrings(na, t))
+ j = 0;
+ priv->np_qfirst[t] = j;
+ priv->np_qlast[t] = j + 1;
+ }
break;
default:
D("invalid regif type %d", reg);
@@ -1818,10 +1719,10 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
if (netmap_verbose) {
D("%s: tx [%d,%d) rx [%d,%d) id %d",
na->name,
- priv->np_txqfirst,
- priv->np_txqlast,
- priv->np_rxqfirst,
- priv->np_rxqlast,
+ priv->np_qfirst[NR_TX],
+ priv->np_qlast[NR_TX],
+ priv->np_qfirst[NR_RX],
+ priv->np_qlast[NR_RX],
i);
}
return 0;
@@ -1837,6 +1738,7 @@ netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
{
struct netmap_adapter *na = priv->np_na;
int error;
+ enum txrx t;
error = netmap_interp_ringid(priv, ringid, flags);
if (error) {
@@ -1850,13 +1752,109 @@ netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
* The default netmap_notify() callback will then
* avoid signaling the global queue if nobody is using it
*/
- if (nm_tx_si_user(priv))
- na->tx_si_users++;
- if (nm_rx_si_user(priv))
- na->rx_si_users++;
+ for_rx_tx(t) {
+ if (nm_si_user(priv, t))
+ na->si_users[t]++;
+ }
return 0;
}
+static void
+netmap_unset_ringid(struct netmap_priv_d *priv)
+{
+ struct netmap_adapter *na = priv->np_na;
+ enum txrx t;
+
+ for_rx_tx(t) {
+ if (nm_si_user(priv, t))
+ na->si_users[t]--;
+ priv->np_qfirst[t] = priv->np_qlast[t] = 0;
+ }
+ priv->np_flags = 0;
+ priv->np_txpoll = 0;
+}
+
+
+/* check that the rings we want to bind are not exclusively owned by a previous
+ * bind. If exclusive ownership has been requested, we also mark the rings.
+ */
+static int
+netmap_get_exclusive(struct netmap_priv_d *priv)
+{
+ struct netmap_adapter *na = priv->np_na;
+ u_int i;
+ struct netmap_kring *kring;
+ int excl = (priv->np_flags & NR_EXCLUSIVE);
+ enum txrx t;
+
+ ND("%s: grabbing tx [%d, %d) rx [%d, %d)",
+ na->name,
+ priv->np_qfirst[NR_TX],
+ priv->np_qlast[NR_TX],
+ priv->np_qfirst[NR_RX],
+ priv->np_qlast[NR_RX]);
+
+ /* first round: check that all the requested rings
+ * are neither alread exclusively owned, nor we
+ * want exclusive ownership when they are already in use
+ */
+ for_rx_tx(t) {
+ for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
+ kring = &NMR(na, t)[i];
+ if ((kring->nr_kflags & NKR_EXCLUSIVE) ||
+ (kring->users && excl))
+ {
+ ND("ring %s busy", kring->name);
+ return EBUSY;
+ }
+ }
+ }
+
+ /* second round: increment usage cound and possibly
+ * mark as exclusive
+ */
+
+ for_rx_tx(t) {
+ for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
+ kring = &NMR(na, t)[i];
+ kring->users++;
+ if (excl)
+ kring->nr_kflags |= NKR_EXCLUSIVE;
+ }
+ }
+
+ return 0;
+
+}
+
+/* undo netmap_get_ownership() */
+static void
+netmap_rel_exclusive(struct netmap_priv_d *priv)
+{
+ struct netmap_adapter *na = priv->np_na;
+ u_int i;
+ struct netmap_kring *kring;
+ int excl = (priv->np_flags & NR_EXCLUSIVE);
+ enum txrx t;
+
+ ND("%s: releasing tx [%d, %d) rx [%d, %d)",
+ na->name,
+ priv->np_qfirst[NR_TX],
+ priv->np_qlast[NR_TX],
+ priv->np_qfirst[NR_RX],
+ priv->np_qlast[MR_RX]);
+
+
+ for_rx_tx(t) {
+ for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
+ kring = &NMR(na, t)[i];
+ if (excl)
+ kring->nr_kflags &= ~NKR_EXCLUSIVE;
+ kring->users--;
+ }
+ }
+}
+
/*
* possibly move the interface to netmap-mode.
* If success it returns a pointer to netmap_if, otherwise NULL.
@@ -1871,9 +1869,8 @@ netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
* The bwrap has to override this, since it has to forward
* the request to the wrapped adapter (netmap_bwrap_config).
*
- * XXX netmap_if_new calls this again (2014-03-15)
*
- * na->nm_krings_create() [by netmap_if_new]
+ * na->nm_krings_create()
* (create and init the krings array)
*
* One of the following:
@@ -1927,15 +1924,14 @@ netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
* the hwna notify callback (to get the frames
* coming from outside go through the bridge).
*
- * XXX maybe netmap_if_new() should be merged with this (2014-03-15).
*
*/
-struct netmap_if *
+int
netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
- uint16_t ringid, uint32_t flags, int *err)
+ uint16_t ringid, uint32_t flags)
{
struct netmap_if *nifp = NULL;
- int error, need_mem = 0;
+ int error;
NMG_LOCK_ASSERT();
/* ring configuration may have changed, fetch from the card */
@@ -1943,57 +1939,121 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
priv->np_na = na; /* store the reference */
error = netmap_set_ringid(priv, ringid, flags);
if (error)
- goto out;
- /* ensure allocators are ready */
- need_mem = !netmap_have_memory_locked(priv);
- if (need_mem) {
- error = netmap_get_memory_locked(priv);
- ND("get_memory returned %d", error);
+ goto err;
+ error = netmap_mem_finalize(na->nm_mem, na);
+ if (error)
+ goto err;
+
+ if (na->active_fds == 0) {
+ /*
+ * If this is the first registration of the adapter,
+ * also create the netmap rings and their in-kernel view,
+ * the netmap krings.
+ */
+
+ /*
+ * Depending on the adapter, this may also create
+ * the netmap rings themselves
+ */
+ error = na->nm_krings_create(na);
+ if (error)
+ goto err_drop_mem;
+
+ /* create all missing netmap rings */
+ error = netmap_mem_rings_create(na);
if (error)
- goto out;
+ goto err_del_krings;
}
- /* Allocate a netmap_if and, if necessary, all the netmap_ring's */
- nifp = netmap_if_new(na);
- if (nifp == NULL) { /* allocation failed */
+
+ /* now the kring must exist and we can check whether some
+ * previous bind has exclusive ownership on them
+ */
+ error = netmap_get_exclusive(priv);
+ if (error)
+ goto err_del_rings;
+
+ /* in all cases, create a new netmap if */
+ nifp = netmap_mem_if_new(na);
+ if (nifp == NULL) {
error = ENOMEM;
- goto out;
+ goto err_rel_excl;
}
+
na->active_fds++;
if (!nm_netmap_on(na)) {
/* Netmap not active, set the card in netmap mode
* and make it use the shared buffers.
*/
/* cache the allocator info in the na */
- na->na_lut = netmap_mem_get_lut(na->nm_mem);
- ND("%p->na_lut == %p", na, na->na_lut);
- na->na_lut_objtotal = netmap_mem_get_buftotal(na->nm_mem);
- na->na_lut_objsize = netmap_mem_get_bufsize(na->nm_mem);
+ netmap_mem_get_lut(na->nm_mem, &na->na_lut);
+ ND("%p->na_lut == %p", na, na->na_lut.lut);
error = na->nm_register(na, 1); /* mode on */
- if (error) {
- netmap_do_unregif(priv, nifp);
- nifp = NULL;
- }
- }
-out:
- *err = error;
- if (error) {
- /* we should drop the allocator, but only
- * if we were the ones who grabbed it
- */
- if (need_mem)
- netmap_drop_memory_locked(priv);
- priv->np_na = NULL;
- }
- if (nifp != NULL) {
- /*
- * advertise that the interface is ready bt setting ni_nifp.
- * The barrier is needed because readers (poll and *SYNC)
- * check for priv->np_nifp != NULL without locking
- */
- wmb(); /* make sure previous writes are visible to all CPUs */
- priv->np_nifp = nifp;
+ if (error)
+ goto err_del_if;
}
- return nifp;
+
+ /*
+ * advertise that the interface is ready by setting np_nifp.
+ * The barrier is needed because readers (poll, *SYNC and mmap)
+ * check for priv->np_nifp != NULL without locking
+ */
+ mb(); /* make sure previous writes are visible to all CPUs */
+ priv->np_nifp = nifp;
+
+ return 0;
+
+err_del_if:
+ memset(&na->na_lut, 0, sizeof(na->na_lut));
+ na->active_fds--;
+ netmap_mem_if_delete(na, nifp);
+err_rel_excl:
+ netmap_rel_exclusive(priv);
+err_del_rings:
+ if (na->active_fds == 0)
+ netmap_mem_rings_delete(na);
+err_del_krings:
+ if (na->active_fds == 0)
+ na->nm_krings_delete(na);
+err_drop_mem:
+ netmap_mem_deref(na->nm_mem, na);
+err:
+ priv->np_na = NULL;
+ return error;
+}
+
+
+/*
+ * update kring and ring at the end of txsync.
+ */
+static inline void
+nm_txsync_finalize(struct netmap_kring *kring)
+{
+ /* update ring tail to what the kernel knows */
+ kring->ring->tail = kring->rtail = kring->nr_hwtail;
+
+ /* note, head/rhead/hwcur might be behind cur/rcur
+ * if no carrier
+ */
+ ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
+ kring->name, kring->nr_hwcur, kring->nr_hwtail,
+ kring->rhead, kring->rcur, kring->rtail);
+}
+
+
+/*
+ * update kring and ring at the end of rxsync
+ */
+static inline void
+nm_rxsync_finalize(struct netmap_kring *kring)
+{
+ /* tell userspace that there might be new packets */
+ //struct netmap_ring *ring = kring->ring;
+ ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail,
+ kring->nr_hwtail);
+ kring->ring->tail = kring->rtail = kring->nr_hwtail;
+ /* make a copy of the state for next round */
+ kring->rhead = kring->ring->head;
+ kring->rcur = kring->ring->cur;
}
@@ -2021,6 +2081,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
u_int i, qfirst, qlast;
struct netmap_if *nifp;
struct netmap_kring *krings;
+ enum txrx t;
(void)dev; /* UNUSED */
(void)fflag; /* UNUSED */
@@ -2108,7 +2169,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
do {
u_int memflags;
- if (priv->np_na != NULL) { /* thread already registered */
+ if (priv->np_nifp != NULL) { /* thread already registered */
error = EBUSY;
break;
}
@@ -2121,12 +2182,12 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
error = EBUSY;
break;
}
- nifp = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags, &error);
- if (!nifp) { /* reg. failed, release priv and ref */
+ error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags);
+ if (error) { /* reg. failed, release priv and ref */
netmap_adapter_put(na);
- priv->np_nifp = NULL;
break;
}
+ nifp = priv->np_nifp;
priv->np_td = td; // XXX kqueue, debugging only
/* return the offset of the netmap_if object */
@@ -2137,16 +2198,17 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags,
&nmr->nr_arg2);
if (error) {
+ netmap_do_unregif(priv);
netmap_adapter_put(na);
break;
}
if (memflags & NETMAP_MEM_PRIVATE) {
*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
}
- priv->np_txsi = (priv->np_txqlast - priv->np_txqfirst > 1) ?
- &na->tx_si : &na->tx_rings[priv->np_txqfirst].si;
- priv->np_rxsi = (priv->np_rxqlast - priv->np_rxqfirst > 1) ?
- &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si;
+ for_rx_tx(t) {
+ priv->np_si[t] = nm_si_user(priv, t) ?
+ &na->si[t] : &NMR(na, t)[priv->np_qfirst[t]].si;
+ }
if (nmr->nr_arg3) {
D("requested %d extra buffers", nmr->nr_arg3);
@@ -2182,15 +2244,10 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
break;
}
- if (cmd == NIOCTXSYNC) {
- krings = na->tx_rings;
- qfirst = priv->np_txqfirst;
- qlast = priv->np_txqlast;
- } else {
- krings = na->rx_rings;
- qfirst = priv->np_rxqfirst;
- qlast = priv->np_rxqlast;
- }
+ t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
+ krings = NMR(na, t);
+ qfirst = priv->np_qfirst[t];
+ qlast = priv->np_qlast[t];
for (i = qfirst; i < qlast; i++) {
struct netmap_kring *kring = krings + i;
@@ -2205,15 +2262,19 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
kring->nr_hwcur);
if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
netmap_ring_reinit(kring);
- } else {
- kring->nm_sync(kring, NAF_FORCE_RECLAIM);
+ } else if (kring->nm_sync(kring, NAF_FORCE_RECLAIM) == 0) {
+ nm_txsync_finalize(kring);
}
if (netmap_verbose & NM_VERB_TXSYNC)
D("post txsync ring %d cur %d hwcur %d",
i, kring->ring->cur,
kring->nr_hwcur);
} else {
- kring->nm_sync(kring, NAF_FORCE_READ);
+ if (nm_rxsync_prologue(kring) >= kring->nkr_num_slots) {
+ netmap_ring_reinit(kring);
+ } else if (kring->nm_sync(kring, NAF_FORCE_READ) == 0) {
+ nm_rxsync_finalize(kring);
+ }
microtime(&na->rx_rings[i].ring->ts);
}
nm_kr_put(kring);
@@ -2221,9 +2282,11 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
break;
+#ifdef WITH_VALE
case NIOCCONFIG:
error = netmap_bdg_config(nmr);
break;
+#endif
#ifdef __FreeBSD__
case FIONBIO:
case FIOASYNC:
@@ -2286,10 +2349,13 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
struct netmap_priv_d *priv = NULL;
struct netmap_adapter *na;
struct netmap_kring *kring;
- u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
+ u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0;
+#define want_tx want[NR_TX]
+#define want_rx want[NR_RX]
struct mbq q; /* packets from hw queues to host stack */
void *pwait = dev; /* linux compatibility */
int is_kevent = 0;
+ enum txrx t;
/*
* In order to avoid nested locks, we need to "double check"
@@ -2320,7 +2386,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
D("No if registered");
return POLLERR;
}
- rmb(); /* make sure following reads are not from cache */
+ mb(); /* make sure following reads are not from cache */
na = priv->np_na;
@@ -2346,28 +2412,22 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
* there are pending packets to send. The latter can be disabled
* passing NETMAP_NO_TX_POLL in the NIOCREG call.
*/
- check_all_tx = nm_tx_si_user(priv);
- check_all_rx = nm_rx_si_user(priv);
+ check_all_tx = nm_si_user(priv, NR_TX);
+ check_all_rx = nm_si_user(priv, NR_RX);
/*
* We start with a lock free round which is cheap if we have
* slots available. If this fails, then lock and call the sync
* routines.
*/
- for (i = priv->np_rxqfirst; want_rx && i < priv->np_rxqlast; i++) {
- kring = &na->rx_rings[i];
- /* XXX compare ring->cur and kring->tail */
- if (!nm_ring_empty(kring->ring)) {
- revents |= want_rx;
- want_rx = 0; /* also breaks the loop */
- }
- }
- for (i = priv->np_txqfirst; want_tx && i < priv->np_txqlast; i++) {
- kring = &na->tx_rings[i];
- /* XXX compare ring->cur and kring->tail */
- if (!nm_ring_empty(kring->ring)) {
- revents |= want_tx;
- want_tx = 0; /* also breaks the loop */
+ for_rx_tx(t) {
+ for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
+ kring = &NMR(na, t)[i];
+ /* XXX compare ring->cur and kring->tail */
+ if (!nm_ring_empty(kring->ring)) {
+ revents |= want[t];
+ want[t] = 0; /* also breaks the loop */
+ }
}
}
@@ -2386,7 +2446,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
* used to skip rings with no pending transmissions.
*/
flush_tx:
- for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
+ for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_RX]; i++) {
int found = 0;
kring = &na->tx_rings[i];
@@ -2410,6 +2470,8 @@ flush_tx:
} else {
if (kring->nm_sync(kring, 0))
revents |= POLLERR;
+ else
+ nm_txsync_finalize(kring);
}
/*
@@ -2423,12 +2485,12 @@ flush_tx:
if (found) { /* notify other listeners */
revents |= want_tx;
want_tx = 0;
- na->nm_notify(na, i, NR_TX, 0);
+ kring->nm_notify(kring, 0);
}
}
if (want_tx && retry_tx && !is_kevent) {
OS_selrecord(td, check_all_tx ?
- &na->tx_si : &na->tx_rings[priv->np_txqfirst].si);
+ &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
retry_tx = 0;
goto flush_tx;
}
@@ -2442,7 +2504,7 @@ flush_tx:
int send_down = 0; /* transparent mode */
/* two rounds here for race avoidance */
do_retry_rx:
- for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
+ for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) {
int found = 0;
kring = &na->rx_rings[i];
@@ -2454,6 +2516,12 @@ do_retry_rx:
continue;
}
+ if (nm_rxsync_prologue(kring) >= kring->nkr_num_slots) {
+ netmap_ring_reinit(kring);
+ revents |= POLLERR;
+ }
+ /* now we can use kring->rcur, rtail */
+
/*
* transparent mode support: collect packets
* from the rxring(s).
@@ -2468,17 +2536,18 @@ do_retry_rx:
if (kring->nm_sync(kring, 0))
revents |= POLLERR;
+ else
+ nm_rxsync_finalize(kring);
if (netmap_no_timestamp == 0 ||
kring->ring->flags & NR_TIMESTAMP) {
microtime(&kring->ring->ts);
}
- /* after an rxsync we can use kring->rcur, rtail */
found = kring->rcur != kring->rtail;
nm_kr_put(kring);
if (found) {
revents |= want_rx;
retry_rx = 0;
- na->nm_notify(na, i, NR_RX, 0);
+ kring->nm_notify(kring, 0);
}
}
@@ -2497,7 +2566,7 @@ do_retry_rx:
if (retry_rx && !is_kevent)
OS_selrecord(td, check_all_rx ?
- &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si);
+ &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
if (send_down > 0 || retry_rx) {
retry_rx = 0;
if (send_down)
@@ -2523,6 +2592,8 @@ do_retry_rx:
netmap_send_up(na->ifp, &q);
return (revents);
+#undef want_tx
+#undef want_rx
}
@@ -2532,27 +2603,19 @@ static int netmap_hw_krings_create(struct netmap_adapter *);
/* default notify callback */
static int
-netmap_notify(struct netmap_adapter *na, u_int n_ring,
- enum txrx tx, int flags)
+netmap_notify(struct netmap_kring *kring, int flags)
{
- struct netmap_kring *kring;
+ struct netmap_adapter *na = kring->na;
+ enum txrx t = kring->tx;
+
+ OS_selwakeup(&kring->si, PI_NET);
+ /* optimization: avoid a wake up on the global
+ * queue if nobody has registered for more
+ * than one ring
+ */
+ if (na->si_users[t] > 0)
+ OS_selwakeup(&na->si[t], PI_NET);
- if (tx == NR_TX) {
- kring = na->tx_rings + n_ring;
- OS_selwakeup(&kring->si, PI_NET);
- /* optimization: avoid a wake up on the global
- * queue if nobody has registered for more
- * than one ring
- */
- if (na->tx_si_users > 0)
- OS_selwakeup(&na->tx_si, PI_NET);
- } else {
- kring = na->rx_rings + n_ring;
- OS_selwakeup(&kring->si, PI_NET);
- /* optimization: same as above */
- if (na->rx_si_users > 0)
- OS_selwakeup(&na->rx_si, PI_NET);
- }
return 0;
}
@@ -2605,11 +2668,14 @@ netmap_attach_common(struct netmap_adapter *na)
if (na->nm_mem == NULL)
/* use the global allocator */
na->nm_mem = &nm_mem;
+ netmap_mem_get(na->nm_mem);
+#ifdef WITH_VALE
if (na->nm_bdg_attach == NULL)
/* no special nm_bdg_attach callback. On VALE
* attach, we need to interpose a bwrap
*/
na->nm_bdg_attach = netmap_bwrap_attach;
+#endif
return 0;
}
@@ -2626,8 +2692,8 @@ netmap_detach_common(struct netmap_adapter *na)
na->nm_krings_delete(na);
}
netmap_pipe_dealloc(na);
- if (na->na_flags & NAF_MEM_OWNER)
- netmap_mem_private_delete(na->nm_mem);
+ if (na->nm_mem)
+ netmap_mem_put(na->nm_mem);
bzero(na, sizeof(*na));
free(na, M_DEVBUF);
}
@@ -2678,7 +2744,7 @@ netmap_attach(struct netmap_adapter *arg)
if (hwna == NULL)
goto fail;
hwna->up = *arg;
- hwna->up.na_flags |= NAF_HOST_RINGS;
+ hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
hwna->nm_hw_register = hwna->up.nm_register;
hwna->up.nm_register = netmap_hw_register;
@@ -2691,7 +2757,7 @@ netmap_attach(struct netmap_adapter *arg)
#ifdef linux
if (ifp->netdev_ops) {
/* prepare a clone of the netdev ops */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
+#ifndef NETMAP_LINUX_HAVE_NETDEV_OPS
hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
#else
hwna->nm_ndo = *ifp->netdev_ops;
@@ -2702,7 +2768,7 @@ netmap_attach(struct netmap_adapter *arg)
hwna->nm_eto = *ifp->ethtool_ops;
}
hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
-#ifdef ETHTOOL_SCHANNELS
+#ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
hwna->nm_eto.set_channels = linux_netmap_set_channels;
#endif
if (arg->nm_config == NULL) {
@@ -2710,17 +2776,9 @@ netmap_attach(struct netmap_adapter *arg)
}
#endif /* linux */
-#ifdef __FreeBSD__
if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n",
hwna->up.num_tx_rings, hwna->up.num_tx_desc,
hwna->up.num_rx_rings, hwna->up.num_rx_desc);
-#else
- D("success for %s tx %d/%d rx %d/%d queues/slots",
- hwna->up.name,
- hwna->up.num_tx_rings, hwna->up.num_tx_desc,
- hwna->up.num_rx_rings, hwna->up.num_rx_desc
- );
-#endif
return 0;
fail:
@@ -2788,16 +2846,19 @@ netmap_detach(struct ifnet *ifp)
NMG_LOCK();
netmap_disable_all_rings(ifp);
- if (!netmap_adapter_put(na)) {
- /* someone is still using the adapter,
- * tell them that the interface is gone
- */
- na->ifp = NULL;
- // XXX also clear NAF_NATIVE_ON ?
- na->na_flags &= ~NAF_NETMAP_ON;
- /* give them a chance to notice */
- netmap_enable_all_rings(ifp);
+ na->ifp = NULL;
+ na->na_flags &= ~NAF_NETMAP_ON;
+ /*
+ * if the netmap adapter is not native, somebody
+ * changed it, so we can not release it here.
+ * The NULL na->ifp will notify the new owner that
+ * the driver is gone.
+ */
+ if (na->na_flags & NAF_NATIVE) {
+ netmap_adapter_put(na);
}
+ /* give them a chance to notice */
+ netmap_enable_all_rings(ifp);
NMG_UNLOCK();
}
@@ -2824,6 +2885,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
struct mbq *q;
int space;
+ kring = &na->rx_rings[na->num_rx_rings];
// XXX [Linux] we do not need this lock
// if we follow the down/configure/up protocol -gl
// mtx_lock(&na->core_lock);
@@ -2834,7 +2896,6 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
goto done;
}
- kring = &na->rx_rings[na->num_rx_rings];
q = &kring->rx_queue;
// XXX reconsider long packets if we handle fragments
@@ -2872,7 +2933,7 @@ done:
if (m)
m_freem(m);
/* unconditionally wake up listeners */
- na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
+ kring->nm_notify(kring, 0);
/* this is normally netmap_notify(), but for nics
* connected to a bridge it is netmap_bwrap_intr_notify(),
* that possibly forwards the frames through the switch
@@ -2953,7 +3014,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
* We do the wakeup here, but the ring is not yet reconfigured.
* However, we are under lock so there are no races.
*/
- na->nm_notify(na, n, tx, 0);
+ kring->nm_notify(kring, 0);
return kring->ring->slot;
}
@@ -2977,6 +3038,7 @@ netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
{
struct netmap_adapter *na = NA(ifp);
struct netmap_kring *kring;
+ enum txrx t = (work_done ? NR_RX : NR_TX);
q &= NETMAP_RING_MASK;
@@ -2984,19 +3046,16 @@ netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
}
- if (work_done) { /* RX path */
- if (q >= na->num_rx_rings)
- return; // not a physical queue
- kring = na->rx_rings + q;
+ if (q >= nma_get_nrings(na, t))
+ return; // not a physical queue
+
+ kring = NMR(na, t) + q;
+
+ if (t == NR_RX) {
kring->nr_kflags |= NKR_PENDINTR; // XXX atomic ?
- na->nm_notify(na, q, NR_RX, 0);
*work_done = 1; /* do not fire napi again */
- } else { /* TX path */
- if (q >= na->num_tx_rings)
- return; // not a physical queue
- kring = na->tx_rings + q;
- na->nm_notify(na, q, NR_TX, 0);
}
+ kring->nm_notify(kring, 0);
}
@@ -3057,7 +3116,7 @@ extern struct cdevsw netmap_cdevsw;
void
netmap_fini(void)
{
- // XXX destroy_bridges() ?
+ netmap_uninit_bridges();
if (netmap_dev)
destroy_dev(netmap_dev);
netmap_mem_fini();
@@ -3087,10 +3146,14 @@ netmap_init(void)
if (!netmap_dev)
goto fail;
- netmap_init_bridges();
+ error = netmap_init_bridges();
+ if (error)
+ goto fail;
+
#ifdef __FreeBSD__
nm_vi_init_index();
#endif
+
printf("netmap: loaded module\n");
return (0);
fail:
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
index 968724854ea6..ebac6b0efe21 100644
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -24,6 +24,8 @@
*/
/* $FreeBSD$ */
+#include "opt_inet.h"
+#include "opt_inet6.h"
#include <sys/types.h>
#include <sys/module.h>
@@ -148,9 +150,9 @@ nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
* Second argument is non-zero to intercept, 0 to restore
*/
int
-netmap_catch_rx(struct netmap_adapter *na, int intercept)
+netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept)
{
- struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+ struct netmap_adapter *na = &gna->up.up;
struct ifnet *ifp = na->ifp;
if (intercept) {
@@ -183,7 +185,7 @@ void
netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
{
struct netmap_adapter *na = &gna->up.up;
- struct ifnet *ifp = na->ifp;
+ struct ifnet *ifp = netmap_generic_getifp(gna);
if (enable) {
na->if_transmit = ifp->if_transmit;
@@ -494,6 +496,7 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
{
struct netmap_vm_handle_t *vmh = object->handle;
struct netmap_priv_d *priv = vmh->priv;
+ struct netmap_adapter *na = priv->np_na;
vm_paddr_t paddr;
vm_page_t page;
vm_memattr_t memattr;
@@ -503,7 +506,7 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
object, (intmax_t)offset, prot, mres);
memattr = object->memattr;
pidx = OFF_TO_IDX(offset);
- paddr = netmap_mem_ofstophys(priv->np_mref, offset);
+ paddr = netmap_mem_ofstophys(na->nm_mem, offset);
if (paddr == 0)
return VM_PAGER_FAIL;
@@ -568,14 +571,14 @@ netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
error = devfs_get_cdevpriv((void**)&priv);
if (error)
goto err_unlock;
+ if (priv->np_nifp == NULL) {
+ error = EINVAL;
+ goto err_unlock;
+ }
vmh->priv = priv;
priv->np_refcount++;
NMG_UNLOCK();
- error = netmap_get_memory(priv);
- if (error)
- goto err_deref;
-
obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
&netmap_cdev_pager_ops, objsize, prot,
*foff, NULL);
@@ -598,8 +601,18 @@ err_unlock:
return error;
}
-
-// XXX can we remove this ?
+/*
+ * netmap_close() is called on every close(), but we do not need to do
+ * anything at that moment, since the process may have other open file
+ * descriptors for /dev/netmap. Instead, we pass netmap_dtor() to
+ * devfs_set_cdevpriv() on open(). The FreeBSD kernel will call the destructor
+ * when the last fd pointing to the device is closed.
+ *
+ * Unfortunately, FreeBSD does not automatically track active mmap()s on an fd,
+ * so we have to track them by ourselvesi (see above). The result is that
+ * netmap_dtor() is called when the process has no open fds and no active
+ * memory maps on /dev/netmap, as in linux.
+ */
static int
netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
{
@@ -673,7 +686,7 @@ static void
netmap_knrdetach(struct knote *kn)
{
struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
- struct selinfo *si = &priv->np_rxsi->si;
+ struct selinfo *si = &priv->np_si[NR_RX]->si;
D("remove selinfo %p", si);
knlist_remove(&si->si_note, kn, 0);
@@ -683,7 +696,7 @@ static void
netmap_knwdetach(struct knote *kn)
{
struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
- struct selinfo *si = &priv->np_txsi->si;
+ struct selinfo *si = &priv->np_si[NR_TX]->si;
D("remove selinfo %p", si);
knlist_remove(&si->si_note, kn, 0);
@@ -773,7 +786,7 @@ netmap_kqfilter(struct cdev *dev, struct knote *kn)
return 1;
}
/* the si is indicated in the priv */
- si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi;
+ si = priv->np_si[(ev == EVFILT_WRITE) ? NR_TX : NR_RX];
// XXX lock(priv) ?
kn->kn_fop = (ev == EVFILT_WRITE) ?
&netmap_wfiltops : &netmap_rfiltops;
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
index ecdb36824077..bc5b452cfaae 100644
--- a/sys/dev/netmap/netmap_generic.c
+++ b/sys/dev/netmap/netmap_generic.c
@@ -305,7 +305,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
}
rtnl_lock();
/* Prepare to intercept incoming traffic. */
- error = netmap_catch_rx(na, 1);
+ error = netmap_catch_rx(gna, 1);
if (error) {
D("netdev_rx_handler_register() failed (%d)", error);
goto register_handler;
@@ -342,7 +342,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
netmap_catch_tx(gna, 0);
/* Do not intercept packets on the rx path. */
- netmap_catch_rx(na, 0);
+ netmap_catch_rx(gna, 0);
rtnl_unlock();
@@ -645,8 +645,6 @@ generic_netmap_txsync(struct netmap_kring *kring, int flags)
generic_netmap_tx_clean(kring);
- nm_txsync_finalize(kring);
-
return 0;
}
@@ -711,7 +709,7 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nm_i; /* index into the netmap ring */ //j,
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const head = nm_rxsync_prologue(kring);
+ u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
if (head > lim)
@@ -774,8 +772,6 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
}
kring->nr_hwcur = head;
}
- /* tell userspace that there might be new packets. */
- nm_rxsync_finalize(kring);
IFRATE(rate_ctx.new.rxsync++);
return 0;
@@ -784,20 +780,25 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
static void
generic_netmap_dtor(struct netmap_adapter *na)
{
- struct ifnet *ifp = na->ifp;
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
+ struct ifnet *ifp = netmap_generic_getifp(gna);
struct netmap_adapter *prev_na = gna->prev;
if (prev_na != NULL) {
D("Released generic NA %p", gna);
- if_rele(na->ifp);
+ if_rele(ifp);
netmap_adapter_put(prev_na);
+ if (na->ifp == NULL) {
+ /*
+ * The driver has been removed without releasing
+ * the reference so we need to do it here.
+ */
+ netmap_adapter_put(prev_na);
+ }
}
- if (ifp != NULL) {
- WNA(ifp) = prev_na;
- D("Restored native NA %p", prev_na);
- na->ifp = NULL;
- }
+ WNA(ifp) = prev_na;
+ D("Restored native NA %p", prev_na);
+ na->ifp = NULL;
}
/*
@@ -834,6 +835,7 @@ generic_netmap_attach(struct ifnet *ifp)
return ENOMEM;
}
na = (struct netmap_adapter *)gna;
+ strncpy(na->name, ifp->if_xname, sizeof(na->name));
na->ifp = ifp;
na->num_tx_desc = num_tx_desc;
na->num_rx_desc = num_rx_desc;
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 95b3a5deda69..fd715cd1378f 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -34,26 +34,46 @@
#ifndef _NET_NETMAP_KERN_H_
#define _NET_NETMAP_KERN_H_
+#if defined(linux)
+
+#if defined(CONFIG_NETMAP_VALE)
+#define WITH_VALE
+#endif
+#if defined(CONFIG_NETMAP_PIPE)
+#define WITH_PIPES
+#endif
+#if defined(CONFIG_NETMAP_MONITOR)
+#define WITH_MONITOR
+#endif
+#if defined(CONFIG_NETMAP_GENERIC)
+#define WITH_GENERIC
+#endif
+#if defined(CONFIG_NETMAP_V1000)
+#define WITH_V1000
+#endif
+
+#else /* not linux */
+
#define WITH_VALE // comment out to disable VALE support
#define WITH_PIPES
#define WITH_MONITOR
#define WITH_GENERIC
+#endif
+
#if defined(__FreeBSD__)
#define likely(x) __builtin_expect((long)!!(x), 1L)
#define unlikely(x) __builtin_expect((long)!!(x), 0L)
-#define NM_LOCK_T struct mtx
+#define NM_LOCK_T struct mtx /* low level spinlock, used to protect queues */
-/* netmap global lock */
-#define NMG_LOCK_T struct sx
-#define NMG_LOCK_INIT() sx_init(&netmap_global_lock, \
- "netmap global lock")
-#define NMG_LOCK_DESTROY() sx_destroy(&netmap_global_lock)
-#define NMG_LOCK() sx_xlock(&netmap_global_lock)
-#define NMG_UNLOCK() sx_xunlock(&netmap_global_lock)
-#define NMG_LOCK_ASSERT() sx_assert(&netmap_global_lock, SA_XLOCKED)
+#define NM_MTX_T struct sx /* OS-specific mutex (sleepable) */
+#define NM_MTX_INIT(m) sx_init(&(m), #m)
+#define NM_MTX_DESTROY(m) sx_destroy(&(m))
+#define NM_MTX_LOCK(m) sx_xlock(&(m))
+#define NM_MTX_UNLOCK(m) sx_xunlock(&(m))
+#define NM_MTX_ASSERT(m) sx_assert(&(m), SA_XLOCKED)
#define NM_SELINFO_T struct nm_selinfo
#define MBUF_LEN(m) ((m)->m_pkthdr.len)
@@ -102,6 +122,8 @@ struct ethtool_ops {
};
struct hrtimer {
};
+#define NM_BNS_GET(b)
+#define NM_BNS_PUT(b)
#elif defined (linux)
@@ -117,20 +139,12 @@ struct hrtimer {
#define NM_ATOMIC_T volatile long unsigned int
-#define NM_MTX_T struct mutex
-#define NM_MTX_INIT(m, s) do { (void)s; mutex_init(&(m)); } while (0)
-#define NM_MTX_DESTROY(m) do { (void)m; } while (0)
+#define NM_MTX_T struct mutex /* OS-specific sleepable lock */
+#define NM_MTX_INIT(m) mutex_init(&(m))
+#define NM_MTX_DESTROY(m) do { (void)(m); } while (0)
#define NM_MTX_LOCK(m) mutex_lock(&(m))
#define NM_MTX_UNLOCK(m) mutex_unlock(&(m))
-#define NM_MTX_LOCK_ASSERT(m) mutex_is_locked(&(m))
-
-#define NMG_LOCK_T NM_MTX_T
-#define NMG_LOCK_INIT() NM_MTX_INIT(netmap_global_lock, \
- "netmap_global_lock")
-#define NMG_LOCK_DESTROY() NM_MTX_DESTROY(netmap_global_lock)
-#define NMG_LOCK() NM_MTX_LOCK(netmap_global_lock)
-#define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock)
-#define NMG_LOCK_ASSERT() NM_MTX_LOCK_ASSERT(netmap_global_lock)
+#define NM_MTX_ASSERT(m) mutex_is_locked(&(m))
#ifndef DEV_NETMAP
#define DEV_NETMAP
@@ -152,6 +166,13 @@ struct hrtimer {
#endif /* end - platform-specific code */
+#define NMG_LOCK_T NM_MTX_T
+#define NMG_LOCK_INIT() NM_MTX_INIT(netmap_global_lock)
+#define NMG_LOCK_DESTROY() NM_MTX_DESTROY(netmap_global_lock)
+#define NMG_LOCK() NM_MTX_LOCK(netmap_global_lock)
+#define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock)
+#define NMG_LOCK_ASSERT() NM_MTX_ASSERT(netmap_global_lock)
+
#define ND(format, ...)
#define D(format, ...) \
do { \
@@ -185,6 +206,23 @@ const char *nm_dump_buf(char *p, int len, int lim, char *dst);
extern NMG_LOCK_T netmap_global_lock;
+enum txrx { NR_RX = 0, NR_TX = 1, NR_TXRX };
+
+static __inline const char*
+nm_txrx2str(enum txrx t)
+{
+ return (t== NR_RX ? "RX" : "TX");
+}
+
+static __inline enum txrx
+nm_txrx_swap(enum txrx t)
+{
+ return (t== NR_RX ? NR_TX : NR_RX);
+}
+
+#define for_rx_tx(t) for ((t) = 0; (t) < NR_TXRX; (t)++)
+
+
/*
* private, kernel view of a ring. Keeps track of the status of
* a ring across system calls.
@@ -259,6 +297,7 @@ struct netmap_kring {
uint32_t nr_kflags; /* private driver flags */
#define NKR_PENDINTR 0x1 // Pending interrupt.
+#define NKR_EXCLUSIVE 0x2 /* exclusive binding */
uint32_t nkr_num_slots;
/*
@@ -308,7 +347,10 @@ struct netmap_kring {
// u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */
struct mbq rx_queue; /* intercepted rx mbufs. */
+ uint32_t users; /* existing bindings for this ring */
+
uint32_t ring_id; /* debugging */
+ enum txrx tx; /* kind of ring (tx or rx) */
char name[64]; /* diagnostic */
/* [tx]sync callback for this kring.
@@ -323,6 +365,7 @@ struct netmap_kring {
* any of the nm_krings_create callbacks.
*/
int (*nm_sync)(struct netmap_kring *kring, int flags);
+ int (*nm_notify)(struct netmap_kring *kring, int flags);
#ifdef WITH_PIPES
struct netmap_kring *pipe; /* if this is a pipe ring,
@@ -333,17 +376,25 @@ struct netmap_kring {
*/
#endif /* WITH_PIPES */
+#ifdef WITH_VALE
+ int (*save_notify)(struct netmap_kring *kring, int flags);
+#endif
+
#ifdef WITH_MONITOR
- /* pointer to the adapter that is monitoring this kring (if any)
- */
- struct netmap_monitor_adapter *monitor;
+ /* array of krings that are monitoring this kring */
+ struct netmap_kring **monitors;
+ uint32_t max_monitors; /* current size of the monitors array */
+ uint32_t n_monitors; /* next unused entry in the monitor array */
/*
- * Monitors work by intercepting the txsync and/or rxsync of the
- * monitored krings. This is implemented by replacing
- * the nm_sync pointer above and saving the previous
- * one in save_sync below.
+ * Monitors work by intercepting the sync and notify callbacks of the
+ * monitored krings. This is implemented by replacing the pointers
+ * above and saving the previous ones in mon_* pointers below
*/
- int (*save_sync)(struct netmap_kring *kring, int flags);
+ int (*mon_sync)(struct netmap_kring *kring, int flags);
+ int (*mon_notify)(struct netmap_kring *kring, int flags);
+
+ uint32_t mon_tail; /* last seen slot on rx */
+ uint32_t mon_pos; /* index of this ring in the monitored ring array */
#endif
} __attribute__((__aligned__(64)));
@@ -414,8 +465,11 @@ tail->| |<-hwtail | |<-hwlease
*/
-
-enum txrx { NR_RX = 0, NR_TX = 1 };
+struct netmap_lut {
+ struct lut_entry *lut;
+ uint32_t objtotal; /* max buffer index */
+ uint32_t objsize; /* buffer size */
+};
struct netmap_vp_adapter; // forward
@@ -445,11 +499,10 @@ struct netmap_adapter {
* forwarding packets coming from this
* interface
*/
-#define NAF_MEM_OWNER 8 /* the adapter is responsible for the
- * deallocation of the memory allocator
+#define NAF_MEM_OWNER 8 /* the adapter uses its own memory area
+ * that cannot be changed
*/
-#define NAF_NATIVE_ON 16 /* the adapter is native and the attached
- * interface is in netmap mode.
+#define NAF_NATIVE 16 /* the adapter is native.
* Virtual ports (vale, pipe, monitor...)
* should never use this flag.
*/
@@ -469,7 +522,7 @@ struct netmap_adapter {
u_int num_rx_rings; /* number of adapter receive rings */
u_int num_tx_rings; /* number of adapter transmit rings */
- u_int num_tx_desc; /* number of descriptor in each queue */
+ u_int num_tx_desc; /* number of descriptor in each queue */
u_int num_rx_desc;
/* tx_rings and rx_rings are private but allocated
@@ -483,10 +536,10 @@ struct netmap_adapter {
/* (used for leases) */
- NM_SELINFO_T tx_si, rx_si; /* global wait queues */
+ NM_SELINFO_T si[NR_TXRX]; /* global wait queues */
/* count users of the global wait queues */
- int tx_si_users, rx_si_users;
+ int si_users[NR_TXRX];
void *pdev; /* used to store pci device */
@@ -544,6 +597,7 @@ struct netmap_adapter {
int (*nm_txsync)(struct netmap_kring *kring, int flags);
int (*nm_rxsync)(struct netmap_kring *kring, int flags);
+ int (*nm_notify)(struct netmap_kring *kring, int flags);
#define NAF_FORCE_READ 1
#define NAF_FORCE_RECLAIM 2
/* return configuration information */
@@ -551,12 +605,6 @@ struct netmap_adapter {
u_int *txr, u_int *txd, u_int *rxr, u_int *rxd);
int (*nm_krings_create)(struct netmap_adapter *);
void (*nm_krings_delete)(struct netmap_adapter *);
- int (*nm_notify)(struct netmap_adapter *,
- u_int ring, enum txrx, int flags);
-#define NAF_DISABLE_NOTIFY 8 /* notify that the stopped state of the
- * ring has changed (kring->nkr_stopped)
- */
-
#ifdef WITH_VALE
/*
* nm_bdg_attach() initializes the na_vp field to point
@@ -593,9 +641,7 @@ struct netmap_adapter {
* buffer addresses, and the total number of buffers.
*/
struct netmap_mem_d *nm_mem;
- struct lut_entry *na_lut;
- uint32_t na_lut_objtotal; /* max buffer index */
- uint32_t na_lut_objsize; /* buffer size */
+ struct netmap_lut na_lut;
/* additional information attached to this adapter
* by other netmap subsystems. Currently used by
@@ -603,16 +649,49 @@ struct netmap_adapter {
*/
void *na_private;
-#ifdef WITH_PIPES
/* array of pipes that have this adapter as a parent */
struct netmap_pipe_adapter **na_pipes;
int na_next_pipe; /* next free slot in the array */
int na_max_pipes; /* size of the array */
-#endif /* WITH_PIPES */
char name[64];
};
+static __inline u_int
+nma_get_ndesc(struct netmap_adapter *na, enum txrx t)
+{
+ return (t == NR_TX ? na->num_tx_desc : na->num_rx_desc);
+}
+
+static __inline void
+nma_set_ndesc(struct netmap_adapter *na, enum txrx t, u_int v)
+{
+ if (t == NR_TX)
+ na->num_tx_desc = v;
+ else
+ na->num_rx_desc = v;
+}
+
+static __inline u_int
+nma_get_nrings(struct netmap_adapter *na, enum txrx t)
+{
+ return (t == NR_TX ? na->num_tx_rings : na->num_rx_rings);
+}
+
+static __inline void
+nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
+{
+ if (t == NR_TX)
+ na->num_tx_rings = v;
+ else
+ na->num_rx_rings = v;
+}
+
+static __inline struct netmap_kring*
+NMR(struct netmap_adapter *na, enum txrx t)
+{
+ return (t == NR_TX ? na->tx_rings : na->rx_rings);
+}
/*
* If the NIC is owned by the kernel
@@ -624,7 +703,6 @@ struct netmap_adapter {
#define NETMAP_OWNED_BY_ANY(na) \
(NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0))
-
/*
* derived netmap adapters for various types of ports
*/
@@ -645,6 +723,8 @@ struct netmap_vp_adapter { /* VALE software port */
u_int virt_hdr_len;
/* Maximum Frame Size, used in bdg_mismatch_datapath() */
u_int mfs;
+ /* Last source MAC on this port */
+ uint64_t last_smac;
};
@@ -689,15 +769,9 @@ struct netmap_generic_adapter { /* emulated device */
#endif /* WITH_GENERIC */
static __inline int
-netmap_real_tx_rings(struct netmap_adapter *na)
-{
- return na->num_tx_rings + !!(na->na_flags & NAF_HOST_RINGS);
-}
-
-static __inline int
-netmap_real_rx_rings(struct netmap_adapter *na)
+netmap_real_rings(struct netmap_adapter *na, enum txrx t)
{
- return na->num_rx_rings + !!(na->na_flags & NAF_HOST_RINGS);
+ return nma_get_nrings(na, t) + !!(na->na_flags & NAF_HOST_RINGS);
}
#ifdef WITH_VALE
@@ -751,9 +825,6 @@ struct netmap_bwrap_adapter {
struct netmap_vp_adapter host; /* for host rings */
struct netmap_adapter *hwna; /* the underlying device */
- /* backup of the hwna notify callback */
- int (*save_notify)(struct netmap_adapter *,
- u_int ring, enum txrx, int flags);
/* backup of the hwna memory allocator */
struct netmap_mem_d *save_nmd;
@@ -847,6 +918,14 @@ static __inline int nm_kr_tryget(struct netmap_kring *kr)
return 0;
}
+static __inline void nm_kr_get(struct netmap_kring *kr)
+{
+ while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
+ tsleep(kr, 0, "NM_KR_GET", 4);
+}
+
+
+
/*
* The following functions are used by individual drivers to
@@ -896,15 +975,15 @@ const char *netmap_bdg_name(struct netmap_vp_adapter *);
#endif /* WITH_VALE */
static inline int
-nm_native_on(struct netmap_adapter *na)
+nm_netmap_on(struct netmap_adapter *na)
{
- return na && na->na_flags & NAF_NATIVE_ON;
+ return na && na->na_flags & NAF_NETMAP_ON;
}
static inline int
-nm_netmap_on(struct netmap_adapter *na)
+nm_native_on(struct netmap_adapter *na)
{
- return na && na->na_flags & NAF_NETMAP_ON;
+ return nm_netmap_on(na) && (na->na_flags & NAF_NATIVE);
}
/* set/clear native flags and if_transmit/netdev_ops */
@@ -913,7 +992,7 @@ nm_set_native_flags(struct netmap_adapter *na)
{
struct ifnet *ifp = na->ifp;
- na->na_flags |= (NAF_NATIVE_ON | NAF_NETMAP_ON);
+ na->na_flags |= NAF_NETMAP_ON;
#ifdef IFCAP_NETMAP /* or FreeBSD ? */
ifp->if_capenable |= IFCAP_NETMAP;
#endif
@@ -940,63 +1019,13 @@ nm_clear_native_flags(struct netmap_adapter *na)
ifp->netdev_ops = (void *)na->if_transmit;
ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
#endif
- na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON);
+ na->na_flags &= ~NAF_NETMAP_ON;
#ifdef IFCAP_NETMAP /* or FreeBSD ? */
ifp->if_capenable &= ~IFCAP_NETMAP;
#endif
}
-/*
- * validates parameters in the ring/kring, returns a value for head
- * If any error, returns ring_size to force a reinit.
- */
-uint32_t nm_txsync_prologue(struct netmap_kring *);
-
-
-/*
- * validates parameters in the ring/kring, returns a value for head,
- * and the 'reserved' value in the argument.
- * If any error, returns ring_size lim to force a reinit.
- */
-uint32_t nm_rxsync_prologue(struct netmap_kring *);
-
-
-/*
- * update kring and ring at the end of txsync.
- */
-static inline void
-nm_txsync_finalize(struct netmap_kring *kring)
-{
- /* update ring tail to what the kernel knows */
- kring->ring->tail = kring->rtail = kring->nr_hwtail;
-
- /* note, head/rhead/hwcur might be behind cur/rcur
- * if no carrier
- */
- ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
- kring->name, kring->nr_hwcur, kring->nr_hwtail,
- kring->rhead, kring->rcur, kring->rtail);
-}
-
-
-/*
- * update kring and ring at the end of rxsync
- */
-static inline void
-nm_rxsync_finalize(struct netmap_kring *kring)
-{
- /* tell userspace that there might be new packets */
- //struct netmap_ring *ring = kring->ring;
- ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail,
- kring->nr_hwtail);
- kring->ring->tail = kring->rtail = kring->nr_hwtail;
- /* make a copy of the state for next round */
- kring->rhead = kring->ring->head;
- kring->rcur = kring->ring->cur;
-}
-
-
/* check/fix address and len in tx rings */
#if 1 /* debug version */
#define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \
@@ -1050,14 +1079,15 @@ int netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
* been created using netmap_krings_create
*/
void netmap_krings_delete(struct netmap_adapter *na);
+int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
+
/* set the stopped/enabled status of ring
* When stopping, they also wait for all current activity on the ring to
* terminate. The status change is then notified using the na nm_notify
* callback.
*/
-void netmap_set_txring(struct netmap_adapter *, u_int ring_id, int stopped);
-void netmap_set_rxring(struct netmap_adapter *, u_int ring_id, int stopped);
+void netmap_set_ring(struct netmap_adapter *, u_int ring_id, enum txrx, int stopped);
/* set the stopped/enabled status of all rings of the adapter. */
void netmap_set_all_rings(struct netmap_adapter *, int stopped);
/* convenience wrappers for netmap_set_all_rings, used in drivers */
@@ -1066,9 +1096,9 @@ void netmap_enable_all_rings(struct ifnet *);
int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
-struct netmap_if *
+int
netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
- uint16_t ringid, uint32_t flags, int *err);
+ uint16_t ringid, uint32_t flags);
@@ -1088,7 +1118,7 @@ int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
* XXX in practice "unknown" might be handled same as broadcast.
*/
typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
- const struct netmap_vp_adapter *);
+ struct netmap_vp_adapter *);
typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
struct netmap_bdg_ops {
@@ -1098,7 +1128,7 @@ struct netmap_bdg_ops {
};
u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
- const struct netmap_vp_adapter *);
+ struct netmap_vp_adapter *);
#define NM_BDG_MAXPORTS 254 /* up to 254 */
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
@@ -1108,34 +1138,52 @@ u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
/* these are redefined in case of no VALE support */
int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
-void netmap_init_bridges(void);
+struct nm_bridge *netmap_init_bridges2(u_int);
+void netmap_uninit_bridges2(struct nm_bridge *, u_int);
+int netmap_init_bridges(void);
+void netmap_uninit_bridges(void);
int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops);
int netmap_bdg_config(struct nmreq *nmr);
#else /* !WITH_VALE */
#define netmap_get_bdg_na(_1, _2, _3) 0
-#define netmap_init_bridges(_1)
+#define netmap_init_bridges(_1) 0
+#define netmap_uninit_bridges()
#define netmap_bdg_ctl(_1, _2) EINVAL
#endif /* !WITH_VALE */
#ifdef WITH_PIPES
/* max number of pipes per device */
#define NM_MAXPIPES 64 /* XXX how many? */
-/* in case of no error, returns the actual number of pipes in nmr->nr_arg1 */
-int netmap_pipe_alloc(struct netmap_adapter *, struct nmreq *nmr);
void netmap_pipe_dealloc(struct netmap_adapter *);
int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
#else /* !WITH_PIPES */
#define NM_MAXPIPES 0
-#define netmap_pipe_alloc(_1, _2) EOPNOTSUPP
+#define netmap_pipe_alloc(_1, _2) 0
#define netmap_pipe_dealloc(_1)
-#define netmap_get_pipe_na(_1, _2, _3) 0
+#define netmap_get_pipe_na(nmr, _2, _3) \
+ ({ int role__ = (nmr)->nr_flags & NR_REG_MASK; \
+ (role__ == NR_REG_PIPE_MASTER || \
+ role__ == NR_REG_PIPE_SLAVE) ? EOPNOTSUPP : 0; })
#endif
#ifdef WITH_MONITOR
int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
+void netmap_monitor_stop(struct netmap_adapter *na);
#else
-#define netmap_get_monitor_na(_1, _2, _3) 0
+#define netmap_get_monitor_na(nmr, _2, _3) \
+ ((nmr)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
+#endif
+
+#ifdef CONFIG_NET_NS
+struct net *netmap_bns_get(void);
+void netmap_bns_put(struct net *);
+void netmap_bns_getbridges(struct nm_bridge **, u_int *);
+#else
+#define netmap_bns_get()
+#define netmap_bns_put(_1)
+#define netmap_bns_getbridges(b, n) \
+ do { *b = nm_bridges; *n = NM_BRIDGES; } while (0)
#endif
/* Various prototypes */
@@ -1186,8 +1234,8 @@ int netmap_adapter_put(struct netmap_adapter *na);
/*
* module variables
*/
-#define NETMAP_BUF_BASE(na) ((na)->na_lut[0].vaddr)
-#define NETMAP_BUF_SIZE(na) ((na)->na_lut_objsize)
+#define NETMAP_BUF_BASE(na) ((na)->na_lut.lut[0].vaddr)
+#define NETMAP_BUF_SIZE(na) ((na)->na_lut.objsize)
extern int netmap_mitigate; // XXX not really used
extern int netmap_no_pendintr;
extern int netmap_verbose; // XXX debugging
@@ -1291,15 +1339,14 @@ netmap_reload_map(struct netmap_adapter *na,
#else /* linux */
int nm_iommu_group_id(bus_dma_tag_t dev);
-extern size_t netmap_mem_get_bufsize(struct netmap_mem_d *);
#include <linux/dma-mapping.h>
static inline void
netmap_load_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
- if (map) {
- *map = dma_map_single(na->pdev, buf, netmap_mem_get_bufsize(na->nm_mem),
+ if (0 && map) {
+ *map = dma_map_single(na->pdev, buf, na->na_lut.objsize,
DMA_BIDIRECTIONAL);
}
}
@@ -1308,7 +1355,7 @@ static inline void
netmap_unload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map)
{
- u_int sz = netmap_mem_get_bufsize(na->nm_mem);
+ u_int sz = na->na_lut.objsize;
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
@@ -1320,7 +1367,7 @@ static inline void
netmap_reload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
- u_int sz = netmap_mem_get_bufsize(na->nm_mem);
+ u_int sz = na->na_lut.objsize;
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
@@ -1418,9 +1465,9 @@ struct netmap_obj_pool;
static inline void *
NMB(struct netmap_adapter *na, struct netmap_slot *slot)
{
- struct lut_entry *lut = na->na_lut;
+ struct lut_entry *lut = na->na_lut.lut;
uint32_t i = slot->buf_idx;
- return (unlikely(i >= na->na_lut_objtotal)) ?
+ return (unlikely(i >= na->na_lut.objtotal)) ?
lut[0].vaddr : lut[i].vaddr;
}
@@ -1428,10 +1475,10 @@ static inline void *
PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
{
uint32_t i = slot->buf_idx;
- struct lut_entry *lut = na->na_lut;
- void *ret = (i >= na->na_lut_objtotal) ? lut[0].vaddr : lut[i].vaddr;
+ struct lut_entry *lut = na->na_lut.lut;
+ void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr;
- *pp = (i >= na->na_lut_objtotal) ? lut[0].paddr : lut[i].paddr;
+ *pp = (i >= na->na_lut.objtotal) ? lut[0].paddr : lut[i].paddr;
return ret;
}
@@ -1459,7 +1506,7 @@ void netmap_txsync_to_host(struct netmap_adapter *na);
* If np_nifp is NULL initialization has not been performed,
* so they should return an error to userspace.
*
- * The ref_done field is used to regulate access to the refcount in the
+ * The ref_done field (XXX ?) is used to regulate access to the refcount in the
* memory allocator. The refcount must be incremented at most once for
* each open("/dev/netmap"). The increment is performed by the first
* function that calls netmap_get_memory() (currently called by
@@ -1472,11 +1519,10 @@ struct netmap_priv_d {
struct netmap_adapter *np_na;
uint32_t np_flags; /* from the ioctl */
- u_int np_txqfirst, np_txqlast; /* range of tx rings to scan */
- u_int np_rxqfirst, np_rxqlast; /* range of rx rings to scan */
+ u_int np_qfirst[NR_TXRX],
+ np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
uint16_t np_txpoll; /* XXX and also np_rxpoll ? */
- struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */
/* np_refcount is only used on FreeBSD */
int np_refcount; /* use with NMG_LOCK held */
@@ -1484,7 +1530,7 @@ struct netmap_priv_d {
* Either the local or the global one depending on the
* number of rings.
*/
- NM_SELINFO_T *np_rxsi, *np_txsi;
+ NM_SELINFO_T *np_si[NR_TXRX];
struct thread *np_td; /* kqueue, just debugging */
};
@@ -1507,12 +1553,20 @@ struct netmap_monitor_adapter {
*/
int generic_netmap_attach(struct ifnet *ifp);
-int netmap_catch_rx(struct netmap_adapter *na, int intercept);
+int netmap_catch_rx(struct netmap_generic_adapter *na, int intercept);
void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
void netmap_catch_tx(struct netmap_generic_adapter *na, int enable);
int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr);
int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
+static inline struct ifnet*
+netmap_generic_getifp(struct netmap_generic_adapter *gna)
+{
+ if (gna->prev)
+ return gna->prev->ifp;
+
+ return gna->up.up.ifp;
+}
//#define RATE_GENERIC /* Enables communication statistics for generic. */
#ifdef RATE_GENERIC
diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
index c823376dbab6..023604d49535 100644
--- a/sys/dev/netmap/netmap_mem2.c
+++ b/sys/dev/netmap/netmap_mem2.c
@@ -71,6 +71,7 @@ struct netmap_obj_params {
u_int size;
u_int num;
};
+
struct netmap_obj_pool {
char name[NETMAP_POOL_MAX_NAMSZ]; /* name of the allocator */
@@ -106,16 +107,26 @@ struct netmap_obj_pool {
u_int r_objsize;
};
-#ifdef linux
-// XXX a mtx would suffice here 20130415 lr
-#define NMA_LOCK_T struct semaphore
-#else /* !linux */
-#define NMA_LOCK_T struct mtx
-#endif /* linux */
+#define NMA_LOCK_T NM_MTX_T
+
+
+struct netmap_mem_ops {
+ void (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*);
+ int (*nmd_get_info)(struct netmap_mem_d *, u_int *size,
+ u_int *memflags, uint16_t *id);
-typedef int (*netmap_mem_config_t)(struct netmap_mem_d*);
-typedef int (*netmap_mem_finalize_t)(struct netmap_mem_d*);
-typedef void (*netmap_mem_deref_t)(struct netmap_mem_d*);
+ vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t);
+ int (*nmd_config)(struct netmap_mem_d *);
+ int (*nmd_finalize)(struct netmap_mem_d *);
+ void (*nmd_deref)(struct netmap_mem_d *);
+ ssize_t (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr);
+ void (*nmd_delete)(struct netmap_mem_d *);
+
+ struct netmap_if * (*nmd_if_new)(struct netmap_adapter *);
+ void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *);
+ int (*nmd_rings_create)(struct netmap_adapter *);
+ void (*nmd_rings_delete)(struct netmap_adapter *);
+};
typedef uint16_t nm_memid_t;
@@ -126,52 +137,144 @@ struct netmap_mem_d {
u_int flags;
#define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */
int lasterr; /* last error for curr config */
- int refcount; /* existing priv structures */
+ int active; /* active users */
+ int refcount;
/* the three allocators */
struct netmap_obj_pool pools[NETMAP_POOLS_NR];
- netmap_mem_config_t config; /* called with NMA_LOCK held */
- netmap_mem_finalize_t finalize; /* called with NMA_LOCK held */
- netmap_mem_deref_t deref; /* called with NMA_LOCK held */
-
nm_memid_t nm_id; /* allocator identifier */
int nm_grp; /* iommu groupd id */
/* list of all existing allocators, sorted by nm_id */
struct netmap_mem_d *prev, *next;
+
+ struct netmap_mem_ops *ops;
};
-/* accessor functions */
-struct lut_entry*
-netmap_mem_get_lut(struct netmap_mem_d *nmd)
+#define NMD_DEFCB(t0, name) \
+t0 \
+netmap_mem_##name(struct netmap_mem_d *nmd) \
+{ \
+ return nmd->ops->nmd_##name(nmd); \
+}
+
+#define NMD_DEFCB1(t0, name, t1) \
+t0 \
+netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1) \
+{ \
+ return nmd->ops->nmd_##name(nmd, a1); \
+}
+
+#define NMD_DEFCB3(t0, name, t1, t2, t3) \
+t0 \
+netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1, t2 a2, t3 a3) \
+{ \
+ return nmd->ops->nmd_##name(nmd, a1, a2, a3); \
+}
+
+#define NMD_DEFNACB(t0, name) \
+t0 \
+netmap_mem_##name(struct netmap_adapter *na) \
+{ \
+ return na->nm_mem->ops->nmd_##name(na); \
+}
+
+#define NMD_DEFNACB1(t0, name, t1) \
+t0 \
+netmap_mem_##name(struct netmap_adapter *na, t1 a1) \
+{ \
+ return na->nm_mem->ops->nmd_##name(na, a1); \
+}
+
+NMD_DEFCB1(void, get_lut, struct netmap_lut *);
+NMD_DEFCB3(int, get_info, u_int *, u_int *, uint16_t *);
+NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t);
+static int netmap_mem_config(struct netmap_mem_d *);
+NMD_DEFCB(int, config);
+NMD_DEFCB1(ssize_t, if_offset, const void *);
+NMD_DEFCB(void, delete);
+
+NMD_DEFNACB(struct netmap_if *, if_new);
+NMD_DEFNACB1(void, if_delete, struct netmap_if *);
+NMD_DEFNACB(int, rings_create);
+NMD_DEFNACB(void, rings_delete);
+
+static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
+static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
+static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
+
+#define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx)
+#define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx)
+#define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx)
+#define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx)
+
+#ifdef NM_DEBUG_MEM_PUTGET
+#define NM_DBG_REFC(nmd, func, line) \
+ printf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
+#else
+#define NM_DBG_REFC(nmd, func, line)
+#endif
+
+#ifdef NM_DEBUG_MEM_PUTGET
+void __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
+#else
+void netmap_mem_get(struct netmap_mem_d *nmd)
+#endif
+{
+ NMA_LOCK(nmd);
+ nmd->refcount++;
+ NM_DBG_REFC(nmd, func, line);
+ NMA_UNLOCK(nmd);
+}
+
+#ifdef NM_DEBUG_MEM_PUTGET
+void __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
+#else
+void netmap_mem_put(struct netmap_mem_d *nmd)
+#endif
{
- return nmd->pools[NETMAP_BUF_POOL].lut;
+ int last;
+ NMA_LOCK(nmd);
+ last = (--nmd->refcount == 0);
+ NM_DBG_REFC(nmd, func, line);
+ NMA_UNLOCK(nmd);
+ if (last)
+ netmap_mem_delete(nmd);
}
-u_int
-netmap_mem_get_buftotal(struct netmap_mem_d *nmd)
+int
+netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
- return nmd->pools[NETMAP_BUF_POOL].objtotal;
+ if (nm_mem_assign_group(nmd, na->pdev) < 0) {
+ return ENOMEM;
+ } else {
+ nmd->ops->nmd_finalize(nmd);
+ }
+
+ if (!nmd->lasterr && na->pdev)
+ netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
+
+ return nmd->lasterr;
}
-size_t
-netmap_mem_get_bufsize(struct netmap_mem_d *nmd)
+void
+netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
- return nmd->pools[NETMAP_BUF_POOL]._objsize;
+ NMA_LOCK(nmd);
+ netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
+ NMA_UNLOCK(nmd);
+ return nmd->ops->nmd_deref(nmd);
}
-#ifdef linux
-#define NMA_LOCK_INIT(n) sema_init(&(n)->nm_mtx, 1)
-#define NMA_LOCK_DESTROY(n)
-#define NMA_LOCK(n) down(&(n)->nm_mtx)
-#define NMA_UNLOCK(n) up(&(n)->nm_mtx)
-#else /* !linux */
-#define NMA_LOCK_INIT(n) mtx_init(&(n)->nm_mtx, "netmap memory allocator lock", NULL, MTX_DEF)
-#define NMA_LOCK_DESTROY(n) mtx_destroy(&(n)->nm_mtx)
-#define NMA_LOCK(n) mtx_lock(&(n)->nm_mtx)
-#define NMA_UNLOCK(n) mtx_unlock(&(n)->nm_mtx)
-#endif /* linux */
+/* accessor functions */
+static void
+netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
+{
+ lut->lut = nmd->pools[NETMAP_BUF_POOL].lut;
+ lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
+ lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
+}
struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
[NETMAP_IF_POOL] = {
@@ -209,9 +312,7 @@ struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
* running in netmap mode.
* Virtual (VALE) ports will have each its own allocator.
*/
-static int netmap_mem_global_config(struct netmap_mem_d *nmd);
-static int netmap_mem_global_finalize(struct netmap_mem_d *nmd);
-static void netmap_mem_global_deref(struct netmap_mem_d *nmd);
+extern struct netmap_mem_ops netmap_mem_global_ops; /* forward */
struct netmap_mem_d nm_mem = { /* Our memory allocator. */
.pools = {
[NETMAP_IF_POOL] = {
@@ -236,24 +337,21 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */
.nummax = 1000000, /* one million! */
},
},
- .config = netmap_mem_global_config,
- .finalize = netmap_mem_global_finalize,
- .deref = netmap_mem_global_deref,
.nm_id = 1,
.nm_grp = -1,
.prev = &nm_mem,
.next = &nm_mem,
+
+ .ops = &netmap_mem_global_ops
};
struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
/* blueprint for the private memory allocators */
-static int netmap_mem_private_config(struct netmap_mem_d *nmd);
-static int netmap_mem_private_finalize(struct netmap_mem_d *nmd);
-static void netmap_mem_private_deref(struct netmap_mem_d *nmd);
+extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */
const struct netmap_mem_d nm_blueprint = {
.pools = {
[NETMAP_IF_POOL] = {
@@ -278,11 +376,10 @@ const struct netmap_mem_d nm_blueprint = {
.nummax = 1000000, /* one million! */
},
},
- .config = netmap_mem_private_config,
- .finalize = netmap_mem_private_finalize,
- .deref = netmap_mem_private_deref,
.flags = NETMAP_MEM_PRIVATE,
+
+ .ops = &netmap_mem_private_ops
};
/* memory allocator related sysctls */
@@ -382,8 +479,8 @@ nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
* First, find the allocator that contains the requested offset,
* then locate the cluster through a lookup table.
*/
-vm_paddr_t
-netmap_mem_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
+static vm_paddr_t
+netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
{
int i;
vm_ooffset_t o = offset;
@@ -414,13 +511,13 @@ netmap_mem_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
return 0; // XXX bad address
}
-int
-netmap_mem_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
+static int
+netmap_mem2_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
nm_memid_t *id)
{
int error = 0;
NMA_LOCK(nmd);
- error = nmd->config(nmd);
+ error = netmap_mem_config(nmd);
if (error)
goto out;
if (size) {
@@ -487,8 +584,8 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)))
-ssize_t
-netmap_mem_if_offset(struct netmap_mem_d *nmd, const void *addr)
+static ssize_t
+netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr)
{
ssize_t v;
NMA_LOCK(nmd);
@@ -648,7 +745,7 @@ netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n)
static void
netmap_extra_free(struct netmap_adapter *na, uint32_t head)
{
- struct lut_entry *lut = na->na_lut;
+ struct lut_entry *lut = na->na_lut.lut;
struct netmap_mem_d *nmd = na->nm_mem;
struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
uint32_t i, cur, *buf;
@@ -1081,15 +1178,15 @@ error:
-void
+static void
netmap_mem_private_delete(struct netmap_mem_d *nmd)
{
if (nmd == NULL)
return;
if (netmap_verbose)
D("deleting %p", nmd);
- if (nmd->refcount > 0)
- D("bug: deleting mem allocator with refcount=%d!", nmd->refcount);
+ if (nmd->active > 0)
+ D("bug: deleting mem allocator with active=%d!", nmd->active);
nm_mem_release_id(nmd);
if (netmap_verbose)
D("done deleting %p", nmd);
@@ -1110,8 +1207,10 @@ static int
netmap_mem_private_finalize(struct netmap_mem_d *nmd)
{
int err;
- nmd->refcount++;
+ NMA_LOCK(nmd);
+ nmd->active++;
err = netmap_mem_finalize_all(nmd);
+ NMA_UNLOCK(nmd);
return err;
}
@@ -1119,8 +1218,10 @@ netmap_mem_private_finalize(struct netmap_mem_d *nmd)
static void
netmap_mem_private_deref(struct netmap_mem_d *nmd)
{
- if (--nmd->refcount <= 0)
+ NMA_LOCK(nmd);
+ if (--nmd->active <= 0)
netmap_mem_reset_all(nmd);
+ NMA_UNLOCK(nmd);
}
@@ -1223,14 +1324,14 @@ netmap_mem_global_config(struct netmap_mem_d *nmd)
{
int i;
- if (nmd->refcount)
+ if (nmd->active)
/* already in use, we cannot change the configuration */
goto out;
if (!netmap_memory_config_changed(nmd))
goto out;
- D("reconfiguring");
+ ND("reconfiguring");
if (nmd->flags & NETMAP_MEM_FINALIZED) {
/* reset previous allocation */
@@ -1261,7 +1362,7 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd)
if (netmap_mem_global_config(nmd))
goto out;
- nmd->refcount++;
+ nmd->active++;
if (nmd->flags & NETMAP_MEM_FINALIZED) {
/* may happen if config is not changed */
@@ -1276,53 +1377,56 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd)
out:
if (nmd->lasterr)
- nmd->refcount--;
+ nmd->active--;
err = nmd->lasterr;
return err;
}
+static void
+netmap_mem_global_delete(struct netmap_mem_d *nmd)
+{
+ int i;
+
+ for (i = 0; i < NETMAP_POOLS_NR; i++) {
+ netmap_destroy_obj_allocator(&nm_mem.pools[i]);
+ }
+
+ NMA_LOCK_DESTROY(&nm_mem);
+}
+
int
netmap_mem_init(void)
{
NMA_LOCK_INIT(&nm_mem);
+ netmap_mem_get(&nm_mem);
return (0);
}
void
netmap_mem_fini(void)
{
- int i;
-
- for (i = 0; i < NETMAP_POOLS_NR; i++) {
- netmap_destroy_obj_allocator(&nm_mem.pools[i]);
- }
- NMA_LOCK_DESTROY(&nm_mem);
+ netmap_mem_put(&nm_mem);
}
static void
netmap_free_rings(struct netmap_adapter *na)
{
- struct netmap_kring *kring;
- struct netmap_ring *ring;
- if (!na->tx_rings)
- return;
- for (kring = na->tx_rings; kring != na->rx_rings; kring++) {
- ring = kring->ring;
- if (ring == NULL)
- continue;
- netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
- netmap_ring_free(na->nm_mem, ring);
- kring->ring = NULL;
- }
- for (/* cont'd from above */; kring != na->tailroom; kring++) {
- ring = kring->ring;
- if (ring == NULL)
- continue;
- netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
- netmap_ring_free(na->nm_mem, ring);
- kring->ring = NULL;
+ enum txrx t;
+
+ for_rx_tx(t) {
+ u_int i;
+ for (i = 0; i < netmap_real_rings(na, t); i++) {
+ struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_ring *ring = kring->ring;
+
+ if (ring == NULL)
+ continue;
+ netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
+ netmap_ring_free(na->nm_mem, ring);
+ kring->ring = NULL;
+ }
}
}
@@ -1333,99 +1437,63 @@ netmap_free_rings(struct netmap_adapter *na)
* The kring array must follow the layout described
* in netmap_krings_create().
*/
-int
-netmap_mem_rings_create(struct netmap_adapter *na)
+static int
+netmap_mem2_rings_create(struct netmap_adapter *na)
{
- struct netmap_ring *ring;
- u_int len, ndesc;
- struct netmap_kring *kring;
- u_int i;
+ enum txrx t;
NMA_LOCK(na->nm_mem);
- /* transmit rings */
- for (i =0, kring = na->tx_rings; kring != na->rx_rings; kring++, i++) {
- if (kring->ring) {
- ND("%s %ld already created", kring->name, kring - na->tx_rings);
- continue; /* already created by somebody else */
- }
- ndesc = kring->nkr_num_slots;
- len = sizeof(struct netmap_ring) +
- ndesc * sizeof(struct netmap_slot);
- ring = netmap_ring_malloc(na->nm_mem, len);
- if (ring == NULL) {
- D("Cannot allocate tx_ring");
- goto cleanup;
- }
- ND("txring at %p", ring);
- kring->ring = ring;
- *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
- *(int64_t *)(uintptr_t)&ring->buf_ofs =
- (na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
- na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
- netmap_ring_offset(na->nm_mem, ring);
-
- /* copy values from kring */
- ring->head = kring->rhead;
- ring->cur = kring->rcur;
- ring->tail = kring->rtail;
- *(uint16_t *)(uintptr_t)&ring->nr_buf_size =
- netmap_mem_bufsize(na->nm_mem);
- ND("%s h %d c %d t %d", kring->name,
- ring->head, ring->cur, ring->tail);
- ND("initializing slots for txring");
- if (i != na->num_tx_rings || (na->na_flags & NAF_HOST_RINGS)) {
- /* this is a real ring */
- if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
- D("Cannot allocate buffers for tx_ring");
- goto cleanup;
- }
- } else {
- /* this is a fake tx ring, set all indices to 0 */
- netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
- }
- }
+ for_rx_tx(t) {
+ u_int i;
- /* receive rings */
- for ( i = 0 /* kring cont'd from above */ ; kring != na->tailroom; kring++, i++) {
- if (kring->ring) {
- ND("%s %ld already created", kring->name, kring - na->rx_rings);
- continue; /* already created by somebody else */
- }
- ndesc = kring->nkr_num_slots;
- len = sizeof(struct netmap_ring) +
- ndesc * sizeof(struct netmap_slot);
- ring = netmap_ring_malloc(na->nm_mem, len);
- if (ring == NULL) {
- D("Cannot allocate rx_ring");
- goto cleanup;
- }
- ND("rxring at %p", ring);
- kring->ring = ring;
- *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
- *(int64_t *)(uintptr_t)&ring->buf_ofs =
- (na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
- na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
- netmap_ring_offset(na->nm_mem, ring);
-
- /* copy values from kring */
- ring->head = kring->rhead;
- ring->cur = kring->rcur;
- ring->tail = kring->rtail;
- *(int *)(uintptr_t)&ring->nr_buf_size =
- netmap_mem_bufsize(na->nm_mem);
- ND("%s h %d c %d t %d", kring->name,
- ring->head, ring->cur, ring->tail);
- ND("initializing slots for rxring %p", ring);
- if (i != na->num_rx_rings || (na->na_flags & NAF_HOST_RINGS)) {
- /* this is a real ring */
- if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
- D("Cannot allocate buffers for rx_ring");
+ for (i = 0; i <= nma_get_nrings(na, t); i++) {
+ struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_ring *ring = kring->ring;
+ u_int len, ndesc;
+
+ if (ring) {
+ ND("%s already created", kring->name);
+ continue; /* already created by somebody else */
+ }
+ ndesc = kring->nkr_num_slots;
+ len = sizeof(struct netmap_ring) +
+ ndesc * sizeof(struct netmap_slot);
+ ring = netmap_ring_malloc(na->nm_mem, len);
+ if (ring == NULL) {
+ D("Cannot allocate %s_ring", nm_txrx2str(t));
goto cleanup;
}
- } else {
- /* this is a fake rx ring, set all indices to 1 */
- netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 1);
+ ND("txring at %p", ring);
+ kring->ring = ring;
+ *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
+ *(int64_t *)(uintptr_t)&ring->buf_ofs =
+ (na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
+ na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
+ netmap_ring_offset(na->nm_mem, ring);
+
+ /* copy values from kring */
+ ring->head = kring->rhead;
+ ring->cur = kring->rcur;
+ ring->tail = kring->rtail;
+ *(uint16_t *)(uintptr_t)&ring->nr_buf_size =
+ netmap_mem_bufsize(na->nm_mem);
+ ND("%s h %d c %d t %d", kring->name,
+ ring->head, ring->cur, ring->tail);
+ ND("initializing slots for %s_ring", nm_txrx2str(txrx));
+ if (i != nma_get_nrings(na, t) || (na->na_flags & NAF_HOST_RINGS)) {
+ /* this is a real ring */
+ if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
+ D("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
+ goto cleanup;
+ }
+ } else {
+ /* this is a fake ring, set all indices to 0 */
+ netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
+ }
+ /* ring info */
+ *(uint16_t *)(uintptr_t)&ring->ringid = kring->ring_id;
+ *(uint16_t *)(uintptr_t)&ring->dir = kring->tx;
}
}
@@ -1441,8 +1509,8 @@ cleanup:
return ENOMEM;
}
-void
-netmap_mem_rings_delete(struct netmap_adapter *na)
+static void
+netmap_mem2_rings_delete(struct netmap_adapter *na)
{
/* last instance, release bufs and rings */
NMA_LOCK(na->nm_mem);
@@ -1461,16 +1529,20 @@ netmap_mem_rings_delete(struct netmap_adapter *na)
* (number of tx/rx rings and descs) does not change while
* the interface is in netmap mode.
*/
-struct netmap_if *
-netmap_mem_if_new(struct netmap_adapter *na)
+static struct netmap_if *
+netmap_mem2_if_new(struct netmap_adapter *na)
{
struct netmap_if *nifp;
ssize_t base; /* handy for relative offsets between rings and nifp */
- u_int i, len, ntx, nrx;
-
- /* account for the (eventually fake) host rings */
- ntx = na->num_tx_rings + 1;
- nrx = na->num_rx_rings + 1;
+ u_int i, len, n[NR_TXRX], ntot;
+ enum txrx t;
+
+ ntot = 0;
+ for_rx_tx(t) {
+ /* account for the (eventually fake) host rings */
+ n[t] = nma_get_nrings(na, t) + 1;
+ ntot += n[t];
+ }
/*
* the descriptor is followed inline by an array of offsets
* to the tx and rx rings in the shared memory region.
@@ -1478,7 +1550,7 @@ netmap_mem_if_new(struct netmap_adapter *na)
NMA_LOCK(na->nm_mem);
- len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t);
+ len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t));
nifp = netmap_if_malloc(na->nm_mem, len);
if (nifp == NULL) {
NMA_UNLOCK(na->nm_mem);
@@ -1496,12 +1568,12 @@ netmap_mem_if_new(struct netmap_adapter *na)
* userspace to reach the ring from the nifp.
*/
base = netmap_if_offset(na->nm_mem, nifp);
- for (i = 0; i < ntx; i++) {
+ for (i = 0; i < n[NR_TX]; i++) {
*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] =
netmap_ring_offset(na->nm_mem, na->tx_rings[i].ring) - base;
}
- for (i = 0; i < nrx; i++) {
- *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+ntx] =
+ for (i = 0; i < n[NR_RX]; i++) {
+ *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] =
netmap_ring_offset(na->nm_mem, na->rx_rings[i].ring) - base;
}
@@ -1510,8 +1582,8 @@ netmap_mem_if_new(struct netmap_adapter *na)
return (nifp);
}
-void
-netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
+static void
+netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
{
if (nifp == NULL)
/* nothing to do */
@@ -1528,78 +1600,39 @@ static void
netmap_mem_global_deref(struct netmap_mem_d *nmd)
{
- nmd->refcount--;
- if (!nmd->refcount)
+ nmd->active--;
+ if (!nmd->active)
nmd->nm_grp = -1;
if (netmap_verbose)
- D("refcount = %d", nmd->refcount);
+ D("active = %d", nmd->active);
}
-int
-netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
-{
- if (nm_mem_assign_group(nmd, na->pdev) < 0) {
- return ENOMEM;
- } else {
- NMA_LOCK(nmd);
- nmd->finalize(nmd);
- NMA_UNLOCK(nmd);
- }
-
- if (!nmd->lasterr && na->pdev)
- netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
-
- return nmd->lasterr;
-}
-
-void
-netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
-{
- NMA_LOCK(nmd);
- netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
- if (nmd->refcount == 1) {
- u_int i;
-
- /*
- * Reset the allocator when it falls out of use so that any
- * pool resources leaked by unclean application exits are
- * reclaimed.
- */
- for (i = 0; i < NETMAP_POOLS_NR; i++) {
- struct netmap_obj_pool *p;
- u_int j;
-
- p = &nmd->pools[i];
- p->objfree = p->objtotal;
- /*
- * Reproduce the net effect of the M_ZERO malloc()
- * and marking of free entries in the bitmap that
- * occur in finalize_obj_allocator()
- */
- memset(p->bitmap,
- '\0',
- sizeof(uint32_t) * ((p->objtotal + 31) / 32));
-
- /*
- * Set all the bits in the bitmap that have
- * corresponding buffers to 1 to indicate they are
- * free.
- */
- for (j = 0; j < p->objtotal; j++) {
- if (p->lut[j].vaddr != NULL) {
- p->bitmap[ (j>>5) ] |= ( 1 << (j & 31) );
- }
- }
- }
-
- /*
- * Per netmap_mem_finalize_all(),
- * buffers 0 and 1 are reserved
- */
- nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
- nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
- }
- nmd->deref(nmd);
- NMA_UNLOCK(nmd);
-}
+struct netmap_mem_ops netmap_mem_global_ops = {
+ .nmd_get_lut = netmap_mem2_get_lut,
+ .nmd_get_info = netmap_mem2_get_info,
+ .nmd_ofstophys = netmap_mem2_ofstophys,
+ .nmd_config = netmap_mem_global_config,
+ .nmd_finalize = netmap_mem_global_finalize,
+ .nmd_deref = netmap_mem_global_deref,
+ .nmd_delete = netmap_mem_global_delete,
+ .nmd_if_offset = netmap_mem2_if_offset,
+ .nmd_if_new = netmap_mem2_if_new,
+ .nmd_if_delete = netmap_mem2_if_delete,
+ .nmd_rings_create = netmap_mem2_rings_create,
+ .nmd_rings_delete = netmap_mem2_rings_delete
+};
+struct netmap_mem_ops netmap_mem_private_ops = {
+ .nmd_get_lut = netmap_mem2_get_lut,
+ .nmd_get_info = netmap_mem2_get_info,
+ .nmd_ofstophys = netmap_mem2_ofstophys,
+ .nmd_config = netmap_mem_private_config,
+ .nmd_finalize = netmap_mem_private_finalize,
+ .nmd_deref = netmap_mem_private_deref,
+ .nmd_if_offset = netmap_mem2_if_offset,
+ .nmd_delete = netmap_mem_private_delete,
+ .nmd_if_new = netmap_mem2_if_new,
+ .nmd_if_delete = netmap_mem2_if_delete,
+ .nmd_rings_create = netmap_mem2_rings_create,
+ .nmd_rings_delete = netmap_mem2_rings_delete
+};
diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h
index 4c620bd4a780..ef0ff96d8e7f 100644
--- a/sys/dev/netmap/netmap_mem2.h
+++ b/sys/dev/netmap/netmap_mem2.h
@@ -117,9 +117,7 @@
extern struct netmap_mem_d nm_mem;
-struct lut_entry* netmap_mem_get_lut(struct netmap_mem_d *);
-u_int netmap_mem_get_buftotal(struct netmap_mem_d *);
-size_t netmap_mem_get_bufsize(struct netmap_mem_d *);
+void netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *);
vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
int netmap_mem_finalize(struct netmap_mem_d *, struct netmap_adapter *);
int netmap_mem_init(void);
@@ -134,12 +132,34 @@ ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
struct netmap_mem_d* netmap_mem_private_new(const char *name,
u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes,
int* error);
-void netmap_mem_private_delete(struct netmap_mem_d *);
+void netmap_mem_delete(struct netmap_mem_d *);
+
+//#define NM_DEBUG_MEM_PUTGET 1
+
+#ifdef NM_DEBUG_MEM_PUTGET
+
+#define netmap_mem_get(nmd) \
+ do { \
+ __netmap_mem_get(nmd, __FUNCTION__, __LINE__); \
+ } while (0)
+
+#define netmap_mem_put(nmd) \
+ do { \
+ __netmap_mem_put(nmd, __FUNCTION__, __LINE__); \
+ } while (0)
+
+void __netmap_mem_get(struct netmap_mem_d *, const char *, int);
+void __netmap_mem_put(struct netmap_mem_d *, const char *, int);
+#else /* !NM_DEBUG_MEM_PUTGET */
+
+void netmap_mem_get(struct netmap_mem_d *);
+void netmap_mem_put(struct netmap_mem_d *);
+
+#endif /* !NM_DEBUG_PUTGET */
#define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */
#define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */
uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);
-
#endif
diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c
index 746abb524d06..be7ce440015c 100644
--- a/sys/dev/netmap/netmap_monitor.c
+++ b/sys/dev/netmap/netmap_monitor.c
@@ -28,7 +28,7 @@
*
* Monitors
*
- * netmap monitors can be used to do zero-copy monitoring of network traffic
+ * netmap monitors can be used to do monitoring of network traffic
* on another adapter, when the latter adapter is working in netmap mode.
*
* Monitors offer to userspace the same interface as any other netmap port,
@@ -38,8 +38,24 @@
* monitored adapter. During registration, the user can choose if she wants
* to intercept tx only, rx only, or both tx and rx traffic.
*
- * The monitor only sees the frames after they have been consumed in the
- * monitored adapter:
+ * If the monitor is not able to cope with the stream of frames, excess traffic
+ * will be dropped.
+ *
+ * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
+ *
+ * Monitors can be either zero-copy or copy-based.
+ *
+ * Copy monitors see the frames before they are consumed:
+ *
+ * - For tx traffic, this is when the application sends them, before they are
+ * passed down to the adapter.
+ *
+ * - For rx traffic, this is when they are received by the adapter, before
+ * they are sent up to the application, if any (note that, if no
+ * application is reading from a monitored ring, the ring will eventually
+ * fill up and traffic will stop).
+ *
+ * Zero-copy monitors only see the frames after they have been consumed:
*
* - For tx traffic, this is after the slots containing the frames have been
* marked as free. Note that this may happen at a considerably delay after
@@ -49,11 +65,9 @@
* has released them. In most cases, the consumer is a userspace
* application which may have modified the frame contents.
*
- * If the monitor is not able to cope with the stream of frames, excess traffic
- * will be dropped.
- *
- * Each ring can be monitored by at most one monitor. This may change in the
- * future, if we implement monitor chaining.
+ * Several copy monitors may be active on any ring. Zero-copy monitors,
+ * instead, need exclusive access to each of the monitored rings. This may
+ * change in the future, if we implement zero-copy monitor chaining.
*
*/
@@ -105,34 +119,319 @@
#define NM_MONITOR_MAXSLOTS 4096
-/* monitor works by replacing the nm_sync callbacks in the monitored rings.
- * The actions to be performed are the same on both tx and rx rings, so we
- * have collected them here
+/*
+ ********************************************************************
+ * functions common to both kind of monitors
+ ********************************************************************
+ */
+
+/* nm_sync callback for the monitor's own tx rings.
+ * This makes no sense and always returns error
+ */
+static int
+netmap_monitor_txsync(struct netmap_kring *kring, int flags)
+{
+ RD(1, "%s %x", kring->name, flags);
+ return EIO;
+}
+
+/* nm_sync callback for the monitor's own rx rings.
+ * Note that the lock in netmap_zmon_parent_sync only protects
+ * writers among themselves. Synchronization between writers
+ * (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync)
+ * and readers (i.e., netmap_zmon_rxsync) relies on memory barriers.
+ */
+static int
+netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
+{
+ ND("%s %x", kring->name, flags);
+ kring->nr_hwcur = kring->rcur;
+ mb();
+ return 0;
+}
+
+/* nm_krings_create callbacks for monitors.
+ * We could use the default netmap_hw_krings_zmon, but
+ * we don't need the mbq.
+ */
+static int
+netmap_monitor_krings_create(struct netmap_adapter *na)
+{
+ return netmap_krings_create(na, 0);
+}
+
+/* nm_krings_delete callback for monitors */
+static void
+netmap_monitor_krings_delete(struct netmap_adapter *na)
+{
+ netmap_krings_delete(na);
+}
+
+
+static u_int
+nm_txrx2flag(enum txrx t)
+{
+ return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX);
+}
+
+/* allocate the monitors array in the monitored kring */
+static int
+nm_monitor_alloc(struct netmap_kring *kring, u_int n)
+{
+ size_t len;
+ struct netmap_kring **nm;
+
+ if (n <= kring->max_monitors)
+ /* we already have more entries that requested */
+ return 0;
+
+ len = sizeof(struct netmap_kring *) * n;
+ nm = realloc(kring->monitors, len, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (nm == NULL)
+ return ENOMEM;
+
+ kring->monitors = nm;
+ kring->max_monitors = n;
+
+ return 0;
+}
+
+/* deallocate the parent array in the parent adapter */
+static void
+nm_monitor_dealloc(struct netmap_kring *kring)
+{
+ if (kring->monitors) {
+ if (kring->n_monitors > 0) {
+ D("freeing not empty monitor array for %s (%d dangling monitors)!", kring->name,
+ kring->n_monitors);
+ }
+ free(kring->monitors, M_DEVBUF);
+ kring->monitors = NULL;
+ kring->max_monitors = 0;
+ kring->n_monitors = 0;
+ }
+}
+
+/*
+ * monitors work by replacing the nm_sync() and possibly the
+ * nm_notify() callbacks in the monitored rings.
+ */
+static int netmap_zmon_parent_txsync(struct netmap_kring *, int);
+static int netmap_zmon_parent_rxsync(struct netmap_kring *, int);
+static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
+static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
+static int netmap_monitor_parent_notify(struct netmap_kring *, int);
+
+
+/* add the monitor mkring to the list of monitors of kring.
+ * If this is the first monitor, intercept the callbacks
+ */
+static int
+netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zcopy)
+{
+ int error = 0;
+
+ /* sinchronize with concurrently running nm_sync()s */
+ nm_kr_get(kring);
+ /* make sure the monitor array exists and is big enough */
+ error = nm_monitor_alloc(kring, kring->n_monitors + 1);
+ if (error)
+ goto out;
+ kring->monitors[kring->n_monitors] = mkring;
+ mkring->mon_pos = kring->n_monitors;
+ kring->n_monitors++;
+ if (kring->n_monitors == 1) {
+ /* this is the first monitor, intercept callbacks */
+ D("%s: intercept callbacks on %s", mkring->name, kring->name);
+ kring->mon_sync = kring->nm_sync;
+ /* zcopy monitors do not override nm_notify(), but
+ * we save the original one regardless, so that
+ * netmap_monitor_del() does not need to know the
+ * monitor type
+ */
+ kring->mon_notify = kring->nm_notify;
+ if (kring->tx == NR_TX) {
+ kring->nm_sync = (zcopy ? netmap_zmon_parent_txsync :
+ netmap_monitor_parent_txsync);
+ } else {
+ kring->nm_sync = (zcopy ? netmap_zmon_parent_rxsync :
+ netmap_monitor_parent_rxsync);
+ if (!zcopy) {
+ /* also intercept notify */
+ kring->nm_notify = netmap_monitor_parent_notify;
+ kring->mon_tail = kring->nr_hwtail;
+ }
+ }
+ }
+
+out:
+ nm_kr_put(kring);
+ return error;
+}
+
+
+/* remove the monitor mkring from the list of monitors of kring.
+ * If this is the last monitor, restore the original callbacks
+ */
+static void
+netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
+{
+ /* sinchronize with concurrently running nm_sync()s */
+ nm_kr_get(kring);
+ kring->n_monitors--;
+ if (mkring->mon_pos != kring->n_monitors) {
+ kring->monitors[mkring->mon_pos] = kring->monitors[kring->n_monitors];
+ kring->monitors[mkring->mon_pos]->mon_pos = mkring->mon_pos;
+ }
+ kring->monitors[kring->n_monitors] = NULL;
+ if (kring->n_monitors == 0) {
+ /* this was the last monitor, restore callbacks and delete monitor array */
+ D("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync);
+ kring->nm_sync = kring->mon_sync;
+ kring->mon_sync = NULL;
+ if (kring->tx == NR_RX) {
+ D("%s: restoring notify on %s: %p",
+ mkring->name, kring->name, kring->mon_notify);
+ kring->nm_notify = kring->mon_notify;
+ kring->mon_notify = NULL;
+ }
+ nm_monitor_dealloc(kring);
+ }
+ nm_kr_put(kring);
+}
+
+
+/* This is called when the monitored adapter leaves netmap mode
+ * (see netmap_do_unregif).
+ * We need to notify the monitors that the monitored rings are gone.
+ * We do this by setting their mna->priv.np_na to NULL.
+ * Note that the rings are already stopped when this happens, so
+ * no monitor ring callback can be active.
+ */
+void
+netmap_monitor_stop(struct netmap_adapter *na)
+{
+ enum txrx t;
+
+ for_rx_tx(t) {
+ u_int i;
+
+ for (i = 0; i < nma_get_nrings(na, t); i++) {
+ struct netmap_kring *kring = &NMR(na, t)[i];
+ u_int j;
+
+ for (j = 0; j < kring->n_monitors; j++) {
+ struct netmap_kring *mkring =
+ kring->monitors[j];
+ struct netmap_monitor_adapter *mna =
+ (struct netmap_monitor_adapter *)mkring->na;
+ /* forget about this adapter */
+ mna->priv.np_na = NULL;
+ }
+ }
+ }
+}
+
+
+/* common functions for the nm_register() callbacks of both kind of
+ * monitors.
*/
static int
-netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr)
+netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
{
- struct netmap_monitor_adapter *mna = kring->monitor;
- struct netmap_kring *mkring = &mna->up.rx_rings[kring->ring_id];
- struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
- int error;
- int rel_slots, free_slots, busy;
+ struct netmap_monitor_adapter *mna =
+ (struct netmap_monitor_adapter *)na;
+ struct netmap_priv_d *priv = &mna->priv;
+ struct netmap_adapter *pna = priv->np_na;
+ struct netmap_kring *kring, *mkring;
+ int i;
+ enum txrx t;
+
+ ND("%p: onoff %d", na, onoff);
+ if (onoff) {
+ if (pna == NULL) {
+ /* parent left netmap mode, fatal */
+ D("%s: internal error", na->name);
+ return ENXIO;
+ }
+ for_rx_tx(t) {
+ if (mna->flags & nm_txrx2flag(t)) {
+ for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
+ kring = &NMR(pna, t)[i];
+ mkring = &na->rx_rings[i];
+ netmap_monitor_add(mkring, kring, zmon);
+ }
+ }
+ }
+ na->na_flags |= NAF_NETMAP_ON;
+ } else {
+ if (pna == NULL) {
+ D("%s: parent left netmap mode, nothing to restore", na->name);
+ return 0;
+ }
+ na->na_flags &= ~NAF_NETMAP_ON;
+ for_rx_tx(t) {
+ if (mna->flags & nm_txrx2flag(t)) {
+ for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
+ kring = &NMR(pna, t)[i];
+ mkring = &na->rx_rings[i];
+ netmap_monitor_del(mkring, kring);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ ****************************************************************
+ * functions specific for zero-copy monitors
+ ****************************************************************
+ */
+
+/*
+ * Common function for both zero-copy tx and rx nm_sync()
+ * callbacks
+ */
+static int
+netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
+{
+ struct netmap_kring *mkring = kring->monitors[0];
+ struct netmap_ring *ring = kring->ring, *mring;
+ int error = 0;
+ int rel_slots, free_slots, busy, sent = 0;
u_int beg, end, i;
u_int lim = kring->nkr_num_slots - 1,
- mlim = mkring->nkr_num_slots - 1;
+ mlim; // = mkring->nkr_num_slots - 1;
+
+ if (mkring == NULL) {
+ RD(5, "NULL monitor on %s", kring->name);
+ return 0;
+ }
+ mring = mkring->ring;
+ mlim = mkring->nkr_num_slots - 1;
/* get the relased slots (rel_slots) */
- beg = *ringptr;
- error = kring->save_sync(kring, flags);
- if (error)
- return error;
- end = *ringptr;
+ if (tx == NR_TX) {
+ beg = kring->nr_hwtail;
+ error = kring->mon_sync(kring, flags);
+ if (error)
+ return error;
+ end = kring->nr_hwtail;
+ } else { /* NR_RX */
+ beg = kring->nr_hwcur;
+ end = kring->rhead;
+ }
+
rel_slots = end - beg;
if (rel_slots < 0)
rel_slots += kring->nkr_num_slots;
if (!rel_slots) {
- return 0;
+ /* no released slots, but we still need
+ * to call rxsync if this is a rx ring
+ */
+ goto out_rxsync;
}
/* we need to lock the monitor receive ring, since it
@@ -147,19 +446,18 @@ netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr
busy += mkring->nkr_num_slots;
free_slots = mlim - busy;
- if (!free_slots) {
- mtx_unlock(&mkring->q_lock);
- return 0;
- }
+ if (!free_slots)
+ goto out;
/* swap min(free_slots, rel_slots) slots */
if (free_slots < rel_slots) {
beg += (rel_slots - free_slots);
- if (beg > lim)
- beg = 0;
+ if (beg >= kring->nkr_num_slots)
+ beg -= kring->nkr_num_slots;
rel_slots = free_slots;
}
+ sent = rel_slots;
for ( ; rel_slots; rel_slots--) {
struct netmap_slot *s = &ring->slot[beg];
struct netmap_slot *ms = &mring->slot[i];
@@ -168,6 +466,7 @@ netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr
tmp = ms->buf_idx;
ms->buf_idx = s->buf_idx;
s->buf_idx = tmp;
+ ND(5, "beg %d buf_idx %d", beg, tmp);
tmp = ms->len;
ms->len = s->len;
@@ -182,143 +481,196 @@ netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr
mb();
mkring->nr_hwtail = i;
+out:
mtx_unlock(&mkring->q_lock);
- /* notify the new frames to the monitor */
- mna->up.nm_notify(&mna->up, mkring->ring_id, NR_RX, 0);
- return 0;
+
+ if (sent) {
+ /* notify the new frames to the monitor */
+ mkring->nm_notify(mkring, 0);
+ }
+
+out_rxsync:
+ if (tx == NR_RX)
+ error = kring->mon_sync(kring, flags);
+
+ return error;
}
/* callback used to replace the nm_sync callback in the monitored tx rings */
static int
-netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
+netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
- return netmap_monitor_parent_sync(kring, flags, &kring->nr_hwtail);
+ return netmap_zmon_parent_sync(kring, flags, NR_TX);
}
/* callback used to replace the nm_sync callback in the monitored rx rings */
static int
-netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
+netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
- return netmap_monitor_parent_sync(kring, flags, &kring->rcur);
+ return netmap_zmon_parent_sync(kring, flags, NR_RX);
}
-/* nm_sync callback for the monitor's own tx rings.
- * This makes no sense and always returns error
- */
+
static int
-netmap_monitor_txsync(struct netmap_kring *kring, int flags)
+netmap_zmon_reg(struct netmap_adapter *na, int onoff)
{
- D("%s %x", kring->name, flags);
- return EIO;
+ return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */);
}
-/* nm_sync callback for the monitor's own rx rings.
- * Note that the lock in netmap_monitor_parent_sync only protects
- * writers among themselves. Synchronization between writers
- * (i.e., netmap_monitor_parent_txsync and netmap_monitor_parent_rxsync)
- * and readers (i.e., netmap_monitor_rxsync) relies on memory barriers.
- */
-static int
-netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
+/* nm_dtor callback for monitors */
+static void
+netmap_zmon_dtor(struct netmap_adapter *na)
{
- ND("%s %x", kring->name, flags);
- kring->nr_hwcur = kring->rcur;
- mb();
- nm_rxsync_finalize(kring);
- return 0;
+ struct netmap_monitor_adapter *mna =
+ (struct netmap_monitor_adapter *)na;
+ struct netmap_priv_d *priv = &mna->priv;
+ struct netmap_adapter *pna = priv->np_na;
+
+ netmap_adapter_put(pna);
}
-/* nm_krings_create callbacks for monitors.
- * We could use the default netmap_hw_krings_monitor, but
- * we don't need the mbq.
+/*
+ ****************************************************************
+ * functions specific for copy monitors
+ ****************************************************************
*/
-static int
-netmap_monitor_krings_create(struct netmap_adapter *na)
+
+static void
+netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots)
{
- return netmap_krings_create(na, 0);
-}
+ u_int j;
+ for (j = 0; j < kring->n_monitors; j++) {
+ struct netmap_kring *mkring = kring->monitors[j];
+ u_int i, mlim, beg;
+ int free_slots, busy, sent = 0, m;
+ u_int lim = kring->nkr_num_slots - 1;
+ struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
+ u_int max_len = NETMAP_BUF_SIZE(mkring->na);
-/* nm_register callback for monitors.
- *
- * On registration, replace the nm_sync callbacks in the monitored
- * rings with our own, saving the previous ones in the monitored
- * rings themselves, where they are used by netmap_monitor_parent_sync.
- *
- * On de-registration, restore the original callbacks. We need to
- * stop traffic while we are doing this, since the monitored adapter may
- * have already started executing a netmap_monitor_parent_sync
- * and may not like the kring->save_sync pointer to become NULL.
- */
-static int
-netmap_monitor_reg(struct netmap_adapter *na, int onoff)
-{
- struct netmap_monitor_adapter *mna =
- (struct netmap_monitor_adapter *)na;
- struct netmap_priv_d *priv = &mna->priv;
- struct netmap_adapter *pna = priv->np_na;
- struct netmap_kring *kring;
- int i;
+ mlim = mkring->nkr_num_slots - 1;
- ND("%p: onoff %d", na, onoff);
- if (onoff) {
- if (!nm_netmap_on(pna)) {
- /* parent left netmap mode, fatal */
- return ENXIO;
- }
- if (mna->flags & NR_MONITOR_TX) {
- for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
- kring = &pna->tx_rings[i];
- kring->save_sync = kring->nm_sync;
- kring->nm_sync = netmap_monitor_parent_txsync;
- }
- }
- if (mna->flags & NR_MONITOR_RX) {
- for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
- kring = &pna->rx_rings[i];
- kring->save_sync = kring->nm_sync;
- kring->nm_sync = netmap_monitor_parent_rxsync;
- }
- }
- na->na_flags |= NAF_NETMAP_ON;
- } else {
- if (!nm_netmap_on(pna)) {
- /* parent left netmap mode, nothing to restore */
- return 0;
+ /* we need to lock the monitor receive ring, since it
+ * is the target of bot tx and rx traffic from the monitored
+ * adapter
+ */
+ mtx_lock(&mkring->q_lock);
+ /* get the free slots available on the monitor ring */
+ i = mkring->nr_hwtail;
+ busy = i - mkring->nr_hwcur;
+ if (busy < 0)
+ busy += mkring->nkr_num_slots;
+ free_slots = mlim - busy;
+
+ if (!free_slots)
+ goto out;
+
+ /* copy min(free_slots, new_slots) slots */
+ m = new_slots;
+ beg = first_new;
+ if (free_slots < m) {
+ beg += (m - free_slots);
+ if (beg >= kring->nkr_num_slots)
+ beg -= kring->nkr_num_slots;
+ m = free_slots;
}
- na->na_flags &= ~NAF_NETMAP_ON;
- if (mna->flags & NR_MONITOR_TX) {
- for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
- netmap_set_txring(pna, i, 1 /* stopped */);
- kring = &pna->tx_rings[i];
- kring->nm_sync = kring->save_sync;
- kring->save_sync = NULL;
- netmap_set_txring(pna, i, 0 /* enabled */);
+
+ for ( ; m; m--) {
+ struct netmap_slot *s = &ring->slot[beg];
+ struct netmap_slot *ms = &mring->slot[i];
+ u_int copy_len = s->len;
+ char *src = NMB(kring->na, s),
+ *dst = NMB(mkring->na, ms);
+
+ if (unlikely(copy_len > max_len)) {
+ RD(5, "%s->%s: truncating %d to %d", kring->name,
+ mkring->name, copy_len, max_len);
+ copy_len = max_len;
}
+
+ memcpy(dst, src, copy_len);
+ ms->len = copy_len;
+ sent++;
+
+ beg = nm_next(beg, lim);
+ i = nm_next(i, mlim);
}
- if (mna->flags & NR_MONITOR_RX) {
- for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
- netmap_set_rxring(pna, i, 1 /* stopped */);
- kring = &pna->rx_rings[i];
- kring->nm_sync = kring->save_sync;
- kring->save_sync = NULL;
- netmap_set_rxring(pna, i, 0 /* enabled */);
- }
+ mb();
+ mkring->nr_hwtail = i;
+ out:
+ mtx_unlock(&mkring->q_lock);
+
+ if (sent) {
+ /* notify the new frames to the monitor */
+ mkring->nm_notify(mkring, 0);
}
}
+}
+
+/* callback used to replace the nm_sync callback in the monitored tx rings */
+static int
+netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
+{
+ u_int first_new;
+ int new_slots;
+
+ /* get the new slots */
+ first_new = kring->nr_hwcur;
+ new_slots = kring->rhead - first_new;
+ if (new_slots < 0)
+ new_slots += kring->nkr_num_slots;
+ if (new_slots)
+ netmap_monitor_parent_sync(kring, first_new, new_slots);
+ return kring->mon_sync(kring, flags);
+}
+
+/* callback used to replace the nm_sync callback in the monitored rx rings */
+static int
+netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
+{
+ u_int first_new;
+ int new_slots, error;
+
+ /* get the new slots */
+ error = kring->mon_sync(kring, flags);
+ if (error)
+ return error;
+ first_new = kring->mon_tail;
+ new_slots = kring->nr_hwtail - first_new;
+ if (new_slots < 0)
+ new_slots += kring->nkr_num_slots;
+ if (new_slots)
+ netmap_monitor_parent_sync(kring, first_new, new_slots);
+ kring->mon_tail = kring->nr_hwtail;
return 0;
}
-/* nm_krings_delete callback for monitors */
-static void
-netmap_monitor_krings_delete(struct netmap_adapter *na)
+
+/* callback used to replace the nm_notify() callback in the monitored rx rings */
+static int
+netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
{
- netmap_krings_delete(na);
+ ND(5, "%s %x", kring->name, flags);
+ /* ?xsync callbacks have tryget called by their callers
+ * (NIOCREGIF and poll()), but here we have to call it
+ * by ourself
+ */
+ if (nm_kr_tryget(kring))
+ goto out;
+ netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
+ nm_kr_put(kring);
+out:
+ return kring->mon_notify(kring, flags);
}
-/* nm_dtor callback for monitors */
+static int
+netmap_monitor_reg(struct netmap_adapter *na, int onoff)
+{
+ return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */);
+}
+
static void
netmap_monitor_dtor(struct netmap_adapter *na)
{
@@ -326,22 +678,7 @@ netmap_monitor_dtor(struct netmap_adapter *na)
(struct netmap_monitor_adapter *)na;
struct netmap_priv_d *priv = &mna->priv;
struct netmap_adapter *pna = priv->np_na;
- int i;
- ND("%p", na);
- if (nm_netmap_on(pna)) {
- /* parent still in netmap mode, mark its krings as free */
- if (mna->flags & NR_MONITOR_TX) {
- for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
- pna->tx_rings[i].monitor = NULL;
- }
- }
- if (mna->flags & NR_MONITOR_RX) {
- for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
- pna->rx_rings[i].monitor = NULL;
- }
- }
- }
netmap_adapter_put(pna);
}
@@ -354,6 +691,9 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
struct netmap_adapter *pna; /* parent adapter */
struct netmap_monitor_adapter *mna;
int i, error;
+ enum txrx t;
+ int zcopy = (nmr->nr_flags & NR_ZCOPY_MON);
+ char monsuff[10] = "";
if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
ND("not a monitor");
@@ -400,44 +740,65 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
D("ringid error");
goto put_out;
}
- if (nmr->nr_flags & NR_MONITOR_TX) {
- for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
- struct netmap_kring *kring = &pna->tx_rings[i];
- if (kring->monitor) {
- error = EBUSY;
- D("ring busy");
- goto release_out;
+ if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) {
+ snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]);
+ }
+ snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name,
+ monsuff,
+ zcopy ? "z" : "",
+ (nmr->nr_flags & NR_MONITOR_RX) ? "r" : "",
+ (nmr->nr_flags & NR_MONITOR_TX) ? "t" : "");
+
+ if (zcopy) {
+ /* zero copy monitors need exclusive access to the monitored rings */
+ for_rx_tx(t) {
+ if (! (nmr->nr_flags & nm_txrx2flag(t)))
+ continue;
+ for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
+ struct netmap_kring *kring = &NMR(pna, t)[i];
+ if (kring->n_monitors > 0) {
+ error = EBUSY;
+ D("ring %s already monitored by %s", kring->name,
+ kring->monitors[0]->name);
+ goto put_out;
+ }
}
- kring->monitor = mna;
}
- }
- if (nmr->nr_flags & NR_MONITOR_RX) {
- for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
- struct netmap_kring *kring = &pna->rx_rings[i];
- if (kring->monitor) {
- error = EBUSY;
- D("ring busy");
- goto release_out;
+ mna->up.nm_register = netmap_zmon_reg;
+ mna->up.nm_dtor = netmap_zmon_dtor;
+ /* to have zero copy, we need to use the same memory allocator
+ * as the monitored port
+ */
+ mna->up.nm_mem = pna->nm_mem;
+ mna->up.na_lut = pna->na_lut;
+ } else {
+ /* normal monitors are incompatible with zero copy ones */
+ for_rx_tx(t) {
+ if (! (nmr->nr_flags & nm_txrx2flag(t)))
+ continue;
+ for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
+ struct netmap_kring *kring = &NMR(pna, t)[i];
+ if (kring->n_monitors > 0 &&
+ kring->monitors[0]->na->nm_register == netmap_zmon_reg)
+ {
+ error = EBUSY;
+ D("ring busy");
+ goto put_out;
+ }
}
- kring->monitor = mna;
}
+ mna->up.nm_rxsync = netmap_monitor_rxsync;
+ mna->up.nm_register = netmap_monitor_reg;
+ mna->up.nm_dtor = netmap_monitor_dtor;
}
- snprintf(mna->up.name, sizeof(mna->up.name), "mon:%s", pna->name);
-
/* the monitor supports the host rings iff the parent does */
mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
+ /* a do-nothing txsync: monitors cannot be used to inject packets */
mna->up.nm_txsync = netmap_monitor_txsync;
mna->up.nm_rxsync = netmap_monitor_rxsync;
- mna->up.nm_register = netmap_monitor_reg;
- mna->up.nm_dtor = netmap_monitor_dtor;
mna->up.nm_krings_create = netmap_monitor_krings_create;
mna->up.nm_krings_delete = netmap_monitor_krings_delete;
- mna->up.nm_mem = pna->nm_mem;
- mna->up.na_lut = pna->na_lut;
- mna->up.na_lut_objtotal = pna->na_lut_objtotal;
- mna->up.na_lut_objsize = pna->na_lut_objsize;
-
mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
* in the parent
@@ -458,7 +819,7 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
error = netmap_attach_common(&mna->up);
if (error) {
D("attach_common error");
- goto release_out;
+ goto put_out;
}
/* remember the traffic directions we have to monitor */
@@ -478,16 +839,6 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
return 0;
-release_out:
- D("monitor error");
- for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
- if (pna->tx_rings[i].monitor == mna)
- pna->tx_rings[i].monitor = NULL;
- }
- for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
- if (pna->rx_rings[i].monitor == mna)
- pna->rx_rings[i].monitor = NULL;
- }
put_out:
netmap_adapter_put(pna);
free(mna, M_DEVBUF);
diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c
index 64828670c35b..3fe29bb2ff9c 100644
--- a/sys/dev/netmap/netmap_pipe.c
+++ b/sys/dev/netmap/netmap_pipe.c
@@ -72,51 +72,31 @@
#define NM_PIPE_MAXSLOTS 4096
-int netmap_default_pipes = 0; /* default number of pipes for each nic */
+int netmap_default_pipes = 0; /* ignored, kept for compatibility */
SYSCTL_DECL(_dev_netmap);
SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
/* allocate the pipe array in the parent adapter */
-int
-netmap_pipe_alloc(struct netmap_adapter *na, struct nmreq *nmr)
+static int
+nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
{
size_t len;
- int mode = nmr->nr_flags & NR_REG_MASK;
- u_int npipes;
+ struct netmap_pipe_adapter **npa;
- if (mode == NR_REG_PIPE_MASTER || mode == NR_REG_PIPE_SLAVE) {
- /* this is for our parent, not for us */
+ if (npipes <= na->na_max_pipes)
+ /* we already have more entries that requested */
return 0;
- }
-
- /* TODO: we can resize the array if the new
- * request can accomodate the already existing pipes
- */
- if (na->na_pipes) {
- nmr->nr_arg1 = na->na_max_pipes;
- return 0;
- }
-
- npipes = nmr->nr_arg1;
- if (npipes == 0)
- npipes = netmap_default_pipes;
- nm_bound_var(&npipes, 0, 0, NM_MAXPIPES, NULL);
-
- if (npipes == 0) {
- /* really zero, nothing to alloc */
- goto out;
- }
+
+ if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES)
+ return EINVAL;
- len = sizeof(struct netmap_pipe_adapter *) * npipes;
- na->na_pipes = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
- if (na->na_pipes == NULL)
+ len = sizeof(struct netmap_pipe_adapter *) * npipes;
+ npa = realloc(na->na_pipes, len, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (npa == NULL)
return ENOMEM;
+ na->na_pipes = npa;
na->na_max_pipes = npipes;
- na->na_next_pipe = 0;
-
-out:
- nmr->nr_arg1 = npipes;
return 0;
}
@@ -126,7 +106,10 @@ void
netmap_pipe_dealloc(struct netmap_adapter *na)
{
if (na->na_pipes) {
- ND("freeing pipes for %s", na->name);
+ if (na->na_next_pipe > 0) {
+ D("freeing not empty pipe array for %s (%d dangling pipes)!", na->name,
+ na->na_next_pipe);
+ }
free(na->na_pipes, M_DEVBUF);
na->na_pipes = NULL;
na->na_max_pipes = 0;
@@ -155,8 +138,10 @@ static int
netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
{
if (parent->na_next_pipe >= parent->na_max_pipes) {
- D("%s: no space left for pipes", parent->name);
- return ENOMEM;
+ u_int npipes = parent->na_max_pipes ? 2*parent->na_max_pipes : 2;
+ int error = nm_pipe_alloc(parent, npipes);
+ if (error)
+ return error;
}
parent->na_pipes[parent->na_next_pipe] = na;
@@ -172,8 +157,10 @@ netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na
u_int n;
n = --parent->na_next_pipe;
if (n != na->parent_slot) {
- parent->na_pipes[na->parent_slot] =
- parent->na_pipes[n];
+ struct netmap_pipe_adapter **p =
+ &parent->na_pipes[na->parent_slot];
+ *p = parent->na_pipes[n];
+ (*p)->parent_slot = na->parent_slot;
}
parent->na_pipes[n] = NULL;
}
@@ -208,7 +195,6 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
if (limit == 0) {
/* either the rxring is full, or nothing to send */
- nm_txsync_finalize(txkring); /* actually useless */
return 0;
}
@@ -222,7 +208,9 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
*rs = *ts;
*ts = tmp;
- /* no need to report the buffer change */
+ /* report the buffer change */
+ ts->flags |= NS_BUF_CHANGED;
+ rs->flags |= NS_BUF_CHANGED;
j = nm_next(j, lim_rx);
k = nm_next(k, lim_tx);
@@ -233,12 +221,11 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
txkring->nr_hwcur = k;
txkring->nr_hwtail = nm_prev(k, lim_tx);
- nm_txsync_finalize(txkring);
ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail,
txkring->rcur, txkring->rhead, txkring->rtail, j);
mb(); /* make sure rxkring->nr_hwtail is updated before notifying */
- rxkring->na->nm_notify(rxkring->na, rxkring->ring_id, NR_RX, 0);
+ rxkring->nm_notify(rxkring, 0);
return 0;
}
@@ -254,12 +241,11 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail,
rxkring->rcur, rxkring->rhead, rxkring->rtail);
mb(); /* paired with the first mb() in txsync */
- nm_rxsync_finalize(rxkring);
if (oldhwcur != rxkring->nr_hwcur) {
/* we have released some slots, notify the other end */
mb(); /* make sure nr_hwcur is updated before notifying */
- txkring->na->nm_notify(txkring->na, txkring->ring_id, NR_TX, 0);
+ txkring->nm_notify(txkring, 0);
}
return 0;
}
@@ -318,11 +304,13 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
(struct netmap_pipe_adapter *)na;
struct netmap_adapter *ona = &pna->peer->up;
int error = 0;
+ enum txrx t;
+
if (pna->peer_ref) {
int i;
/* case 1) above */
- D("%p: case 1, create everything", na);
+ ND("%p: case 1, create everything", na);
error = netmap_krings_create(na, 0);
if (error)
goto err;
@@ -338,10 +326,10 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
goto del_krings1;
/* update our hidden ring pointers */
- for (i = 0; i < na->num_tx_rings + 1; i++)
- na->tx_rings[i].save_ring = na->tx_rings[i].ring;
- for (i = 0; i < na->num_rx_rings + 1; i++)
- na->rx_rings[i].save_ring = na->rx_rings[i].ring;
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
+ NMR(na, t)[i].save_ring = NMR(na, t)[i].ring;
+ }
/* now, create krings and rings of the other end */
error = netmap_krings_create(ona, 0);
@@ -352,27 +340,28 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
if (error)
goto del_krings2;
- for (i = 0; i < ona->num_tx_rings + 1; i++)
- ona->tx_rings[i].save_ring = ona->tx_rings[i].ring;
- for (i = 0; i < ona->num_rx_rings + 1; i++)
- ona->rx_rings[i].save_ring = ona->rx_rings[i].ring;
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(ona, t) + 1; i++)
+ NMR(ona, t)[i].save_ring = NMR(ona, t)[i].ring;
+ }
/* cross link the krings */
- for (i = 0; i < na->num_tx_rings; i++) {
- na->tx_rings[i].pipe = pna->peer->up.rx_rings + i;
- na->rx_rings[i].pipe = pna->peer->up.tx_rings + i;
- pna->peer->up.tx_rings[i].pipe = na->rx_rings + i;
- pna->peer->up.rx_rings[i].pipe = na->tx_rings + i;
+ for_rx_tx(t) {
+ enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ for (i = 0; i < nma_get_nrings(na, t); i++) {
+ NMR(na, t)[i].pipe = NMR(&pna->peer->up, r) + i;
+ NMR(&pna->peer->up, r)[i].pipe = NMR(na, t) + i;
+ }
}
} else {
int i;
/* case 2) above */
/* recover the hidden rings */
ND("%p: case 2, hidden rings", na);
- for (i = 0; i < na->num_tx_rings + 1; i++)
- na->tx_rings[i].ring = na->tx_rings[i].save_ring;
- for (i = 0; i < na->num_rx_rings + 1; i++)
- na->rx_rings[i].ring = na->rx_rings[i].save_ring;
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
+ NMR(na, t)[i].ring = NMR(na, t)[i].save_ring;
+ }
}
return 0;
@@ -423,6 +412,8 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
{
struct netmap_pipe_adapter *pna =
(struct netmap_pipe_adapter *)na;
+ enum txrx t;
+
ND("%p: onoff %d", na, onoff);
if (onoff) {
na->na_flags |= NAF_NETMAP_ON;
@@ -443,11 +434,10 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
netmap_adapter_get(na);
pna->peer->peer_ref = 1;
/* hide our rings from netmap_mem_rings_delete */
- for (i = 0; i < na->num_tx_rings + 1; i++) {
- na->tx_rings[i].ring = NULL;
- }
- for (i = 0; i < na->num_rx_rings + 1; i++) {
- na->rx_rings[i].ring = NULL;
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ NMR(na, t)[i].ring = NULL;
+ }
}
}
return 0;
@@ -481,6 +471,7 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
(struct netmap_pipe_adapter *)na;
struct netmap_adapter *ona; /* na of the other end */
int i;
+ enum txrx t;
if (!pna->peer_ref) {
ND("%p: case 2, kept alive by peer", na);
@@ -496,10 +487,10 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
* cleanup-after-error path */
return;
}
- for (i = 0; i < ona->num_tx_rings + 1; i++)
- ona->tx_rings[i].ring = ona->tx_rings[i].save_ring;
- for (i = 0; i < ona->num_rx_rings + 1; i++)
- ona->rx_rings[i].ring = ona->rx_rings[i].save_ring;
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(ona, t) + 1; i++)
+ NMR(ona, t)[i].ring = NMR(ona, t)[i].save_ring;
+ }
netmap_mem_rings_delete(ona);
netmap_krings_delete(ona);
}
@@ -604,8 +595,6 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
mna->up.nm_krings_delete = netmap_pipe_krings_delete;
mna->up.nm_mem = pna->nm_mem;
mna->up.na_lut = pna->na_lut;
- mna->up.na_lut_objtotal = pna->na_lut_objtotal;
- mna->up.na_lut_objsize = pna->na_lut_objsize;
mna->up.num_tx_rings = 1;
mna->up.num_rx_rings = 1;
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
index 6b1fe1fdf94b..c2af263c6351 100644
--- a/sys/dev/netmap/netmap_vale.c
+++ b/sys/dev/netmap/netmap_vale.c
@@ -222,6 +222,10 @@ struct nm_bridge {
* the lookup function, and allocated on attach
*/
struct nm_hash_ent ht[NM_BDG_HASH];
+
+#ifdef CONFIG_NET_NS
+ struct net *ns;
+#endif /* CONFIG_NET_NS */
};
const char*
@@ -234,12 +238,14 @@ netmap_bdg_name(struct netmap_vp_adapter *vp)
}
+#ifndef CONFIG_NET_NS
/*
* XXX in principle nm_bridges could be created dynamically
* Right now we have a static array and deletions are protected
* by an exclusive lock.
*/
-struct nm_bridge nm_bridges[NM_BRIDGES];
+struct nm_bridge *nm_bridges;
+#endif /* !CONFIG_NET_NS */
/*
@@ -283,10 +289,13 @@ static struct nm_bridge *
nm_find_bridge(const char *name, int create)
{
int i, l, namelen;
- struct nm_bridge *b = NULL;
+ struct nm_bridge *b = NULL, *bridges;
+ u_int num_bridges;
NMG_LOCK_ASSERT();
+ netmap_bns_getbridges(&bridges, &num_bridges);
+
namelen = strlen(NM_NAME); /* base length */
l = name ? strlen(name) : 0; /* actual length */
if (l < namelen) {
@@ -304,8 +313,8 @@ nm_find_bridge(const char *name, int create)
ND("--- prefix is '%.*s' ---", namelen, name);
/* lookup the name, remember empty slot if there is one */
- for (i = 0; i < NM_BRIDGES; i++) {
- struct nm_bridge *x = nm_bridges + i;
+ for (i = 0; i < num_bridges; i++) {
+ struct nm_bridge *x = bridges + i;
if (x->bdg_active_ports == 0) {
if (create && b == NULL)
@@ -318,7 +327,7 @@ nm_find_bridge(const char *name, int create)
break;
}
}
- if (i == NM_BRIDGES && b) { /* name not found, can create entry */
+ if (i == num_bridges && b) { /* name not found, can create entry */
/* initialize the bridge */
strncpy(b->bdg_basename, name, namelen);
ND("create new bridge %s with ports %d", b->bdg_basename,
@@ -331,6 +340,7 @@ nm_find_bridge(const char *name, int create)
b->bdg_ops.lookup = netmap_bdg_learning;
/* reset the MAC address table */
bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ NM_BNS_GET(b);
}
return b;
}
@@ -373,7 +383,7 @@ nm_alloc_bdgfwd(struct netmap_adapter *na)
l += sizeof(struct nm_bdg_q) * num_dstq;
l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
- nrings = netmap_real_tx_rings(na);
+ nrings = netmap_real_rings(na, NR_TX);
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
struct nm_bdg_fwd *ft;
@@ -458,6 +468,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
if (lim == 0) {
ND("marking bridge %s as free", b->bdg_basename);
bzero(&b->bdg_ops, sizeof(b->bdg_ops));
+ NM_BNS_PUT(b);
}
}
@@ -632,7 +643,7 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
i = b->bdg_port_index[j];
vpna = b->bdg_ports[i];
// KASSERT(na != NULL);
- D("checking %s", vpna->up.name);
+ ND("checking %s", vpna->up.name);
if (!strcmp(vpna->up.name, nr_name)) {
netmap_adapter_get(&vpna->up);
ND("found existing if %s refs %d", nr_name)
@@ -813,12 +824,15 @@ unlock_exit:
int
netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
{
- struct nm_bridge *b;
+ struct nm_bridge *b, *bridges;
struct netmap_adapter *na;
struct netmap_vp_adapter *vpna;
char *name = nmr->nr_name;
int cmd = nmr->nr_cmd, namelen = strlen(name);
int error = 0, i, j;
+ u_int num_bridges;
+
+ netmap_bns_getbridges(&bridges, &num_bridges);
switch (cmd) {
case NETMAP_BDG_NEWIF:
@@ -852,7 +866,6 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
break;
}
- name = name + b->bdg_namelen + 1;
error = ENOENT;
for (j = 0; j < b->bdg_active_ports; j++) {
i = b->bdg_port_index[j];
@@ -866,7 +879,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
*/
if (!strcmp(vpna->up.name, name)) {
/* bridge index */
- nmr->nr_arg1 = b - nm_bridges;
+ nmr->nr_arg1 = b - bridges;
nmr->nr_arg2 = i; /* port index */
error = 0;
break;
@@ -886,7 +899,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
NMG_LOCK();
for (error = ENOENT; i < NM_BRIDGES; i++) {
- b = nm_bridges + i;
+ b = bridges + i;
if (j >= b->bdg_active_ports) {
j = 0; /* following bridges scan from 0 */
continue;
@@ -984,7 +997,7 @@ netmap_vp_krings_create(struct netmap_adapter *na)
u_int tailroom;
int error, i;
uint32_t *leases;
- u_int nrx = netmap_real_rx_rings(na);
+ u_int nrx = netmap_real_rings(na, NR_RX);
/*
* Leases are attached to RX rings on vale ports
@@ -1066,6 +1079,9 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end)
ft[ft_i].ft_flags = slot->flags;
ND("flags is 0x%x", slot->flags);
+ /* we do not use the buf changed flag, but we still need to reset it */
+ slot->flags &= ~NS_BUF_CHANGED;
+
/* this slot goes into a list so initialize the link field */
ft[ft_i].ft_next = NM_FT_NULL;
buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
@@ -1180,7 +1196,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
*/
u_int
netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
- const struct netmap_vp_adapter *na)
+ struct netmap_vp_adapter *na)
{
uint8_t *buf = ft->ft_buf;
u_int buf_len = ft->ft_len;
@@ -1211,11 +1227,11 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
* The hash is somewhat expensive, there might be some
* worthwhile optimizations here.
*/
- if ((buf[6] & 1) == 0) { /* valid src */
+ if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
uint8_t *s = buf+6;
sh = nm_bridge_rthash(s); // XXX hash of source
/* update source port forwarding entry */
- ht[sh].mac = smac; /* XXX expire ? */
+ na->last_smac = ht[sh].mac = smac; /* XXX expire ? */
ht[sh].ports = mysrc;
if (netmap_verbose)
D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
@@ -1229,7 +1245,6 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
}
/* XXX otherwise return NM_BDG_UNKNOWN ? */
}
- *dst_ring = 0;
return dst;
}
@@ -1475,7 +1490,7 @@ retry:
if (dst_na->retry && retry) {
/* try to get some free slot from the previous run */
- dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
+ kring->nm_notify(kring, 0);
/* actually useful only for bwraps, since there
* the notify will trigger a txsync on the hwna. VALE ports
* have dst_na->retry == 0
@@ -1616,7 +1631,7 @@ retry:
kring->nr_hwtail = j;
still_locked = 0;
mtx_unlock(&kring->q_lock);
- dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
+ kring->nm_notify(kring, 0);
/* this is netmap_notify for VALE ports and
* netmap_bwrap_notify for bwrap. The latter will
* trigger a txsync on the underlying hwna
@@ -1649,29 +1664,28 @@ netmap_vp_txsync(struct netmap_kring *kring, int flags)
(struct netmap_vp_adapter *)kring->na;
u_int done;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = kring->rcur;
+ u_int const head = kring->rhead;
if (bridge_batch <= 0) { /* testing only */
- done = cur; // used all
+ done = head; // used all
goto done;
}
if (!na->na_bdg) {
- done = cur;
+ done = head;
goto done;
}
if (bridge_batch > NM_BDG_BATCH)
bridge_batch = NM_BDG_BATCH;
- done = nm_bdg_preflush(kring, cur);
+ done = nm_bdg_preflush(kring, head);
done:
- if (done != cur)
- D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
+ if (done != head)
+ D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
/*
* packets between 'done' and 'cur' are left unsent.
*/
kring->nr_hwcur = done;
kring->nr_hwtail = nm_prev(done, lim);
- nm_txsync_finalize(kring);
if (netmap_verbose)
D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
return 0;
@@ -1687,7 +1701,7 @@ netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
struct netmap_adapter *na = kring->na;
struct netmap_ring *ring = kring->ring;
u_int nm_i, lim = kring->nkr_num_slots - 1;
- u_int head = nm_rxsync_prologue(kring);
+ u_int head = kring->rhead;
int n;
if (head > lim) {
@@ -1717,8 +1731,6 @@ netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
kring->nr_hwcur = head;
}
- /* tell userspace that there are new packets */
- nm_rxsync_finalize(kring);
n = 0;
done:
return n;
@@ -1804,12 +1816,13 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter
na->num_rx_desc = nmr->nr_rx_slots;
vpna->virt_hdr_len = 0;
vpna->mfs = 1514;
+ vpna->last_smac = ~0llu;
/*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
vpna->mfs = netmap_buf_size; */
if (netmap_verbose)
D("max frame size %u", vpna->mfs);
- na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
+ na->na_flags |= NAF_BDG_MAYSLEEP;
na->nm_txsync = netmap_vp_txsync;
na->nm_rxsync = netmap_vp_rxsync;
na->nm_register = netmap_vp_reg;
@@ -1832,7 +1845,7 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter
err:
if (na->nm_mem != NULL)
- netmap_mem_private_delete(na->nm_mem);
+ netmap_mem_delete(na->nm_mem);
free(vpna, M_DEVBUF);
return error;
}
@@ -1913,75 +1926,35 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
* The bridge wrapper then sends the packets through the bridge.
*/
static int
-netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
+netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
{
+ struct netmap_adapter *na = kring->na;
struct netmap_bwrap_adapter *bna = na->na_private;
- struct netmap_vp_adapter *hostna = &bna->host;
- struct netmap_kring *kring, *bkring;
+ struct netmap_kring *bkring;
struct netmap_ring *ring;
- int is_host_ring = ring_nr == na->num_rx_rings;
struct netmap_vp_adapter *vpna = &bna->up;
+ u_int ring_nr = kring->ring_id;
int error = 0;
if (netmap_verbose)
- D("%s %s%d 0x%x", na->name,
- (tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
-
- if (flags & NAF_DISABLE_NOTIFY) {
- /* the enabled/disabled state of the ring has changed,
- * propagate the info to the wrapper (with tx/rx swapped)
- */
- if (tx == NR_TX) {
- netmap_set_rxring(&vpna->up, ring_nr,
- na->tx_rings[ring_nr].nkr_stopped);
- } else {
- netmap_set_txring(&vpna->up, ring_nr,
- na->rx_rings[ring_nr].nkr_stopped);
- }
- return 0;
- }
+ D("%s %s 0x%x", na->name, kring->name, flags);
if (!nm_netmap_on(na))
return 0;
- /* we only care about receive interrupts */
- if (tx == NR_TX)
- return 0;
-
- kring = &na->rx_rings[ring_nr];
- ring = kring->ring;
+ bkring = &vpna->up.tx_rings[ring_nr];
+ ring = kring->ring; /* == kbkring->ring */
/* make sure the ring is not disabled */
if (nm_kr_tryget(kring))
return 0;
- if (is_host_ring && hostna->na_bdg == NULL) {
- error = bna->save_notify(na, ring_nr, tx, flags);
- goto put_out;
- }
-
- /* Here we expect ring->head = ring->cur = ring->tail
- * because everything has been released from the previous round.
- * However the ring is shared and we might have info from
- * the wrong side (the tx ring). Hence we overwrite with
- * the info from the rx kring.
- */
if (netmap_verbose)
- D("%s head %d cur %d tail %d (kring %d %d %d)", na->name,
- ring->head, ring->cur, ring->tail,
+ D("%s head %d cur %d tail %d", na->name,
kring->rhead, kring->rcur, kring->rtail);
- ring->head = kring->rhead;
- ring->cur = kring->rcur;
- ring->tail = kring->rtail;
-
- if (is_host_ring) {
- vpna = hostna;
- ring_nr = 0;
- }
- /* simulate a user wakeup on the rx ring */
- /* fetch packets that have arrived.
- * XXX maybe do this in a loop ?
+ /* simulate a user wakeup on the rx ring
+ * fetch packets that have arrived.
*/
error = kring->nm_sync(kring, 0);
if (error)
@@ -1992,33 +1965,18 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx,
goto put_out;
}
- /* new packets are ring->cur to ring->tail, and the bkring
- * had hwcur == ring->cur. So advance ring->cur to ring->tail
+ /* new packets are kring->rcur to kring->nr_hwtail, and the bkring
+ * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
* to push all packets out.
*/
- ring->head = ring->cur = ring->tail;
-
- /* also set tail to what the bwrap expects */
- bkring = &vpna->up.tx_rings[ring_nr];
- ring->tail = bkring->nr_hwtail; // rtail too ?
+ bkring->rhead = bkring->rcur = kring->nr_hwtail;
- /* pass packets to the switch */
- nm_txsync_prologue(bkring); // XXX error checking ?
netmap_vp_txsync(bkring, flags);
/* mark all buffers as released on this ring */
- ring->head = ring->cur = kring->nr_hwtail;
- ring->tail = kring->rtail;
+ kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
/* another call to actually release the buffers */
- if (!is_host_ring) {
- error = kring->nm_sync(kring, 0);
- } else {
- /* mark all packets as released, as in the
- * second part of netmap_rxsync_from_host()
- */
- kring->nr_hwcur = kring->nr_hwtail;
- nm_rxsync_finalize(kring);
- }
+ error = kring->nm_sync(kring, 0);
put_out:
nm_kr_put(kring);
@@ -2035,6 +1993,7 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
struct netmap_adapter *hwna = bna->hwna;
struct netmap_vp_adapter *hostna = &bna->host;
int error;
+ enum txrx t;
ND("%s %s", na->name, onoff ? "on" : "off");
@@ -2047,8 +2006,6 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
* putting it in netmap mode
*/
hwna->na_lut = na->na_lut;
- hwna->na_lut_objtotal = na->na_lut_objtotal;
- hwna->na_lut_objsize = na->na_lut_objsize;
if (hostna->na_bdg) {
/* if the host rings have been attached to switch,
@@ -2056,8 +2013,6 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
* in the hostna also
*/
hostna->up.na_lut = na->na_lut;
- hostna->up.na_lut_objtotal = na->na_lut_objtotal;
- hostna->up.na_lut_objsize = na->na_lut_objsize;
}
/* cross-link the netmap rings
@@ -2066,13 +2021,12 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
* We need to do this now, after the initialization
* of the kring->ring pointers
*/
- for (i = 0; i < na->num_rx_rings + 1; i++) {
- hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
- hwna->tx_rings[i].ring = na->rx_rings[i].ring;
- }
- for (i = 0; i < na->num_tx_rings + 1; i++) {
- hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
- hwna->rx_rings[i].ring = na->tx_rings[i].ring;
+ for_rx_tx(t) {
+ enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ for (i = 0; i < nma_get_nrings(na, r) + 1; i++) {
+ NMR(hwna, t)[i].nkr_num_slots = NMR(na, r)[i].nkr_num_slots;
+ NMR(hwna, t)[i].ring = NMR(na, r)[i].ring;
+ }
}
}
@@ -2087,14 +2041,29 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
netmap_vp_reg(&hostna->up, onoff);
if (onoff) {
- /* intercept the hwna nm_nofify callback */
- bna->save_notify = hwna->nm_notify;
- hwna->nm_notify = netmap_bwrap_intr_notify;
+ u_int i;
+ /* intercept the hwna nm_nofify callback on the hw rings */
+ for (i = 0; i < hwna->num_rx_rings; i++) {
+ hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
+ hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
+ }
+ i = hwna->num_rx_rings; /* for safety */
+ /* save the host ring notify unconditionally */
+ hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
+ if (hostna->na_bdg) {
+ /* also intercept the host ring notify */
+ hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
+ }
} else {
- hwna->nm_notify = bna->save_notify;
- hwna->na_lut = NULL;
- hwna->na_lut_objtotal = 0;
- hwna->na_lut_objsize = 0;
+ u_int i;
+ /* reset all notify callbacks (including host ring) */
+ for (i = 0; i <= hwna->num_rx_rings; i++) {
+ hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify;
+ hwna->rx_rings[i].save_notify = NULL;
+ }
+ hwna->na_lut.lut = NULL;
+ hwna->na_lut.objtotal = 0;
+ hwna->na_lut.objsize = 0;
}
return 0;
@@ -2154,9 +2123,9 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
* The corresponding krings must point back to the
* hostna
*/
- hostna->tx_rings = na->tx_rings + na->num_tx_rings;
+ hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
hostna->tx_rings[0].na = hostna;
- hostna->rx_rings = na->rx_rings + na->num_rx_rings;
+ hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
hostna->rx_rings[0].na = hostna;
}
@@ -2180,74 +2149,59 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na)
/* notify method for the bridge-->hwna direction */
static int
-netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
+netmap_bwrap_notify(struct netmap_kring *kring, int flags)
{
- struct netmap_bwrap_adapter *bna =
- (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *na = kring->na;
+ struct netmap_bwrap_adapter *bna = na->na_private;
struct netmap_adapter *hwna = bna->hwna;
- struct netmap_kring *kring, *hw_kring;
- struct netmap_ring *ring;
- u_int lim;
+ u_int ring_n = kring->ring_id;
+ u_int lim = kring->nkr_num_slots - 1;
+ struct netmap_kring *hw_kring;
int error = 0;
- if (tx == NR_TX)
- return EINVAL;
-
- kring = &na->rx_rings[ring_n];
+ ND("%s: na %s hwna %s",
+ (kring ? kring->name : "NULL!"),
+ (na ? na->name : "NULL!"),
+ (hwna ? hwna->name : "NULL!"));
hw_kring = &hwna->tx_rings[ring_n];
- ring = kring->ring;
- lim = kring->nkr_num_slots - 1;
+
+ if (nm_kr_tryget(hw_kring))
+ return 0;
if (!nm_netmap_on(hwna))
return 0;
- mtx_lock(&kring->q_lock);
/* first step: simulate a user wakeup on the rx ring */
- netmap_vp_rxsync_locked(kring, flags);
+ netmap_vp_rxsync(kring, flags);
ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
na->name, ring_n,
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
ring->head, ring->cur, ring->tail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
- /* second step: the simulated user consumes all new packets */
- ring->head = ring->cur = ring->tail;
-
- /* third step: the new packets are sent on the tx ring
+ /* second step: the new packets are sent on the tx ring
* (which is actually the same ring)
*/
- /* set tail to what the hw expects */
- ring->tail = hw_kring->rtail;
- nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
+ hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
error = hw_kring->nm_sync(hw_kring, flags);
+ if (error)
+ goto out;
- /* fourth step: now we are back the rx ring */
+ /* third step: now we are back the rx ring */
/* claim ownership on all hw owned bufs */
- ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
- ring->tail = kring->rtail; /* restore saved value of tail, for safety */
+ kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
- /* fifth step: the user goes to sleep again, causing another rxsync */
- netmap_vp_rxsync_locked(kring, flags);
+ /* fourth step: the user goes to sleep again, causing another rxsync */
+ netmap_vp_rxsync(kring, flags);
ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
na->name, ring_n,
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
ring->head, ring->cur, ring->tail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
- mtx_unlock(&kring->q_lock);
+out:
+ nm_kr_put(hw_kring);
return error;
}
-/* notify method for the bridge-->host-rings path */
-static int
-netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
-{
- struct netmap_bwrap_adapter *bna = na->na_private;
- struct netmap_adapter *port_na = &bna->up.up;
- if (tx == NR_TX || ring_n != 0)
- return EINVAL;
- return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
-}
-
-
/* nm_bdg_ctl callback for the bwrap.
* Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
* On attach, it needs to provide a fake netmap_priv_d structure and
@@ -2261,7 +2215,6 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
{
struct netmap_priv_d *npriv;
struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
- struct netmap_if *nifp;
int error = 0;
if (attach) {
@@ -2275,8 +2228,8 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
if (npriv == NULL)
return ENOMEM;
- nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
- if (!nifp) {
+ error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags);
+ if (error) {
bzero(npriv, sizeof(*npriv));
free(npriv, M_DEVBUF);
return error;
@@ -2323,6 +2276,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
struct netmap_adapter *na = NULL;
struct netmap_adapter *hostna = NULL;
int error = 0;
+ enum txrx t;
/* make sure the NIC is not already in use */
if (NETMAP_OWNED_BY_ANY(hwna)) {
@@ -2336,15 +2290,17 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
}
na = &bna->up.up;
+ na->na_private = bna;
strncpy(na->name, nr_name, sizeof(na->name));
/* fill the ring data for the bwrap adapter with rx/tx meanings
* swapped. The real cross-linking will be done during register,
* when all the krings will have been created.
*/
- na->num_rx_rings = hwna->num_tx_rings;
- na->num_tx_rings = hwna->num_rx_rings;
- na->num_tx_desc = hwna->num_rx_desc;
- na->num_rx_desc = hwna->num_tx_desc;
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ nma_set_nrings(na, t, nma_get_nrings(hwna, r));
+ nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
+ }
na->nm_dtor = netmap_bwrap_dtor;
na->nm_register = netmap_bwrap_register;
// na->nm_txsync = netmap_bwrap_txsync;
@@ -2376,13 +2332,14 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
hostna = &bna->host.up;
snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
hostna->ifp = hwna->ifp;
- hostna->num_tx_rings = 1;
- hostna->num_tx_desc = hwna->num_rx_desc;
- hostna->num_rx_rings = 1;
- hostna->num_rx_desc = hwna->num_tx_desc;
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t);
+ nma_set_nrings(hostna, t, 1);
+ nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
+ }
// hostna->nm_txsync = netmap_bwrap_host_txsync;
// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
- hostna->nm_notify = netmap_bwrap_host_notify;
+ hostna->nm_notify = netmap_bwrap_notify;
hostna->nm_mem = na->nm_mem;
hostna->na_private = bna;
hostna->na_vp = &bna->up;
@@ -2416,7 +2373,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
return 0;
err_free:
- netmap_mem_private_delete(na->nm_mem);
+ netmap_mem_delete(na->nm_mem);
err_put:
hwna->na_vp = hwna->na_hostvp = NULL;
netmap_adapter_put(hwna);
@@ -2425,13 +2382,54 @@ err_put:
}
+struct nm_bridge *
+netmap_init_bridges2(u_int n)
+{
+ int i;
+ struct nm_bridge *b;
+
+ b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (b == NULL)
+ return NULL;
+ for (i = 0; i < n; i++)
+ BDG_RWINIT(&b[i]);
+ return b;
+}
void
-netmap_init_bridges(void)
+netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
{
int i;
- bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
- for (i = 0; i < NM_BRIDGES; i++)
- BDG_RWINIT(&nm_bridges[i]);
+
+ if (b == NULL)
+ return;
+
+ for (i = 0; i < n; i++)
+ BDG_RWDESTROY(&b[i]);
+ free(b, M_DEVBUF);
+}
+
+int
+netmap_init_bridges(void)
+{
+#ifdef CONFIG_NET_NS
+ return netmap_bns_register();
+#else
+ nm_bridges = netmap_init_bridges2(NM_BRIDGES);
+ if (nm_bridges == NULL)
+ return ENOMEM;
+ return 0;
+#endif
+}
+
+void
+netmap_uninit_bridges(void)
+{
+#ifdef CONFIG_NET_NS
+ netmap_bns_unregister();
+#else
+ netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
+#endif
}
#endif /* WITH_VALE */
diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c
index a67d367cfdf9..677d397a1c7a 100644
--- a/sys/dev/re/if_re.c
+++ b/sys/dev/re/if_re.c
@@ -304,6 +304,7 @@ static void re_set_linkspeed (struct rl_softc *);
#ifdef DEV_NETMAP /* see ixgbe.c for details */
#include <dev/netmap/if_re_netmap.h>
+MODULE_DEPEND(re, netmap, 1, 1, 1);
#endif /* !DEV_NETMAP */
#ifdef RE_DIAG
diff --git a/sys/net/netmap.h b/sys/net/netmap.h
index 1203bfb37fff..88b2957502ab 100644
--- a/sys/net/netmap.h
+++ b/sys/net/netmap.h
@@ -157,6 +157,11 @@ struct netmap_slot {
/*
* must be set whenever buf_idx is changed (as it might be
* necessary to recompute the physical address and mapping)
+ *
+ * It is also set by the kernel whenever the buf_idx is
+ * changed internally (e.g., by pipes). Applications may
+ * use this information to know when they can reuse the
+ * contents of previously prepared buffers.
*/
#define NS_REPORT 0x0002 /* ask the hardware to report results */
@@ -513,6 +518,9 @@ enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
/* monitor uses the NR_REG to select the rings to monitor */
#define NR_MONITOR_TX 0x100
#define NR_MONITOR_RX 0x200
+#define NR_ZCOPY_MON 0x400
+/* request exclusive access to the selected rings */
+#define NR_EXCLUSIVE 0x800
/*
diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h
index aab6c358de73..130117db7a2e 100644
--- a/sys/net/netmap_user.h
+++ b/sys/net/netmap_user.h
@@ -284,6 +284,12 @@ typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
* -NN bind individual NIC ring pair
* {NN bind master side of pipe NN
* }NN bind slave side of pipe NN
+ * a suffix starting with + and the following flags,
+ * in any order:
+ * x exclusive access
+ * z zero copy monitor
+ * t monitor tx side
+ * r monitor rx side
*
* req provides the initial values of nmreq before parsing ifname.
* Remember that the ifname parsing will override the ring
@@ -351,9 +357,12 @@ nm_open(const char *ifname, const struct nmreq *req,
struct nm_desc *d = NULL;
const struct nm_desc *parent = arg;
u_int namelen;
- uint32_t nr_ringid = 0, nr_flags;
+ uint32_t nr_ringid = 0, nr_flags, nr_reg;
const char *port = NULL;
- const char *errmsg = NULL;
+#define MAXERRMSG 80
+ char errmsg[MAXERRMSG] = "";
+ enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK } p_state;
+ long num;
if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
errno = 0; /* name not recognised, not an error */
@@ -362,60 +371,112 @@ nm_open(const char *ifname, const struct nmreq *req,
if (ifname[0] == 'n')
ifname += 7;
/* scan for a separator */
- for (port = ifname; *port && !index("-*^{}", *port); port++)
+ for (port = ifname; *port && !index("-*^{}/", *port); port++)
;
namelen = port - ifname;
if (namelen >= sizeof(d->req.nr_name)) {
- errmsg = "name too long";
+ snprintf(errmsg, MAXERRMSG, "name too long");
goto fail;
}
- switch (*port) {
- default: /* '\0', no suffix */
- nr_flags = NR_REG_ALL_NIC;
- break;
- case '-': /* one NIC */
- nr_flags = NR_REG_ONE_NIC;
- nr_ringid = atoi(port + 1);
- break;
- case '*': /* NIC and SW, ignore port */
- nr_flags = NR_REG_NIC_SW;
- if (port[1]) {
- errmsg = "invalid port for nic+sw";
- goto fail;
- }
- break;
- case '^': /* only sw ring */
- nr_flags = NR_REG_SW;
- if (port[1]) {
- errmsg = "invalid port for sw ring";
- goto fail;
+ p_state = P_START;
+ nr_flags = NR_REG_ALL_NIC; /* default for no suffix */
+ while (*port) {
+ switch (p_state) {
+ case P_START:
+ switch (*port) {
+ case '^': /* only SW ring */
+ nr_flags = NR_REG_SW;
+ p_state = P_RNGSFXOK;
+ break;
+ case '*': /* NIC and SW */
+ nr_flags = NR_REG_NIC_SW;
+ p_state = P_RNGSFXOK;
+ break;
+ case '-': /* one NIC ring pair */
+ nr_flags = NR_REG_ONE_NIC;
+ p_state = P_GETNUM;
+ break;
+ case '{': /* pipe (master endpoint) */
+ nr_flags = NR_REG_PIPE_MASTER;
+ p_state = P_GETNUM;
+ break;
+ case '}': /* pipe (slave endoint) */
+ nr_flags = NR_REG_PIPE_SLAVE;
+ p_state = P_GETNUM;
+ break;
+ case '/': /* start of flags */
+ p_state = P_FLAGS;
+ break;
+ default:
+ snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port);
+ goto fail;
+ }
+ port++;
+ break;
+ case P_RNGSFXOK:
+ switch (*port) {
+ case '/':
+ p_state = P_FLAGS;
+ break;
+ default:
+ snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port);
+ goto fail;
+ }
+ port++;
+ break;
+ case P_GETNUM:
+ num = strtol(port, (char **)&port, 10);
+ if (num < 0 || num >= NETMAP_RING_MASK) {
+ snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)",
+ num, NETMAP_RING_MASK);
+ goto fail;
+ }
+ nr_ringid = num & NETMAP_RING_MASK;
+ p_state = P_RNGSFXOK;
+ break;
+ case P_FLAGS:
+ case P_FLAGSOK:
+ switch (*port) {
+ case 'x':
+ nr_flags |= NR_EXCLUSIVE;
+ break;
+ case 'z':
+ nr_flags |= NR_ZCOPY_MON;
+ break;
+ case 't':
+ nr_flags |= NR_MONITOR_TX;
+ break;
+ case 'r':
+ nr_flags |= NR_MONITOR_RX;
+ break;
+ default:
+ snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port);
+ goto fail;
+ }
+ port++;
+ p_state = P_FLAGSOK;
+ break;
}
- break;
- case '{':
- nr_flags = NR_REG_PIPE_MASTER;
- nr_ringid = atoi(port + 1);
- break;
- case '}':
- nr_flags = NR_REG_PIPE_SLAVE;
- nr_ringid = atoi(port + 1);
- break;
}
-
- if (nr_ringid >= NETMAP_RING_MASK) {
- errmsg = "invalid ringid";
+ if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) {
+ snprintf(errmsg, MAXERRMSG, "unexpected end of port name");
goto fail;
}
-
+ ND("flags: %s %s %s %s",
+ (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "",
+ (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "",
+ (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "",
+ (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : "");
d = (struct nm_desc *)calloc(1, sizeof(*d));
if (d == NULL) {
- errmsg = "nm_desc alloc failure";
+ snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure");
errno = ENOMEM;
return NULL;
}
d->self = d; /* set this early so nm_close() works */
d->fd = open("/dev/netmap", O_RDWR);
if (d->fd < 0) {
- errmsg = "cannot open /dev/netmap";
+ snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno));
goto fail;
}
@@ -464,7 +525,7 @@ nm_open(const char *ifname, const struct nmreq *req,
d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL);
if (ioctl(d->fd, NIOCREGIF, &d->req)) {
- errmsg = "NIOCREGIF failed";
+ snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno));
goto fail;
}
@@ -479,7 +540,7 @@ nm_open(const char *ifname, const struct nmreq *req,
d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
d->fd, 0);
if (d->mem == MAP_FAILED) {
- errmsg = "mmap failed";
+ snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno));
goto fail;
}
d->done_mmap = 1;
@@ -495,20 +556,22 @@ nm_open(const char *ifname, const struct nmreq *req,
(char *)d->mem + d->memsize;
}
- if (d->req.nr_flags == NR_REG_SW) { /* host stack */
+ nr_reg = d->req.nr_flags & NR_REG_MASK;
+
+ if (nr_reg == NR_REG_SW) { /* host stack */
d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
- } else if (d->req.nr_flags == NR_REG_ALL_NIC) { /* only nic */
+ } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */
d->first_tx_ring = 0;
d->first_rx_ring = 0;
d->last_tx_ring = d->req.nr_tx_rings - 1;
d->last_rx_ring = d->req.nr_rx_rings - 1;
- } else if (d->req.nr_flags == NR_REG_NIC_SW) {
+ } else if (nr_reg == NR_REG_NIC_SW) {
d->first_tx_ring = 0;
d->first_rx_ring = 0;
d->last_tx_ring = d->req.nr_tx_rings;
d->last_rx_ring = d->req.nr_rx_rings;
- } else if (d->req.nr_flags == NR_REG_ONE_NIC) {
+ } else if (nr_reg == NR_REG_ONE_NIC) {
/* XXX check validity */
d->first_tx_ring = d->last_tx_ring =
d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK;
@@ -541,7 +604,7 @@ nm_open(const char *ifname, const struct nmreq *req,
fail:
nm_close(d);
- if (errmsg)
+ if (errmsg[0])
D("%s %s", errmsg, ifname);
if (errno == 0)
errno = EINVAL;