aboutsummaryrefslogtreecommitdiff
path: root/sys/dev
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/e1000/if_em.c2
-rw-r--r--sys/dev/e1000/if_igb.c4
-rw-r--r--sys/dev/e1000/if_lem.c2
-rw-r--r--sys/dev/ixgbe/ixgbe.c2
-rw-r--r--sys/dev/netmap/if_em_netmap.h52
-rw-r--r--sys/dev/netmap/if_igb_netmap.h52
-rw-r--r--sys/dev/netmap/if_lem_netmap.h57
-rw-r--r--sys/dev/netmap/if_re_netmap.h84
-rw-r--r--sys/dev/netmap/ixgbe_netmap.h74
-rw-r--r--sys/dev/netmap/netmap.c835
-rw-r--r--sys/dev/netmap/netmap_freebsd.c26
-rw-r--r--sys/dev/netmap/netmap_generic.c1008
-rw-r--r--sys/dev/netmap/netmap_kern.h490
-rw-r--r--sys/dev/netmap/netmap_mbq.c15
-rw-r--r--sys/dev/netmap/netmap_mbq.h2
-rw-r--r--sys/dev/netmap/netmap_mem2.c20
-rw-r--r--sys/dev/netmap/netmap_mem2.h2
-rw-r--r--sys/dev/netmap/netmap_vale.c437
18 files changed, 1660 insertions, 1504 deletions
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index 580407a529fd..428612a4a695 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -4352,7 +4352,7 @@ em_initialize_receive_unit(struct adapter *adapter)
* preserve the rx buffers passed to userspace.
*/
if (ifp->if_capenable & IFCAP_NETMAP)
- rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
+ rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
#endif /* DEV_NETMAP */
E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
}
diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c
index 57e4f893ab35..2134e29625cc 100644
--- a/sys/dev/e1000/if_igb.c
+++ b/sys/dev/e1000/if_igb.c
@@ -4630,13 +4630,13 @@ igb_initialize_receive_units(struct adapter *adapter)
* an init() while a netmap client is active must
* preserve the rx buffers passed to userspace.
* In this driver it means we adjust RDT to
- * somthing different from next_to_refresh
+ * something different from next_to_refresh
* (which is not used in netmap mode).
*/
if (ifp->if_capenable & IFCAP_NETMAP) {
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->rx_rings[i];
- int t = rxr->next_to_refresh - kring->nr_hwavail;
+ int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
if (t >= adapter->num_rx_desc)
t -= adapter->num_rx_desc;
diff --git a/sys/dev/e1000/if_lem.c b/sys/dev/e1000/if_lem.c
index a3da50c176ed..8014a0f9fde7 100644
--- a/sys/dev/e1000/if_lem.c
+++ b/sys/dev/e1000/if_lem.c
@@ -3367,7 +3367,7 @@ lem_initialize_receive_unit(struct adapter *adapter)
#ifdef DEV_NETMAP
/* preserve buffers already made available to clients */
if (ifp->if_capenable & IFCAP_NETMAP)
- rctl -= NA(adapter->ifp)->rx_rings[0].nr_hwavail;
+ rctl -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[0]);
#endif /* DEV_NETMAP */
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl);
diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c
index 740f7709e5b2..6dfec02cc8d9 100644
--- a/sys/dev/ixgbe/ixgbe.c
+++ b/sys/dev/ixgbe/ixgbe.c
@@ -1245,7 +1245,7 @@ ixgbe_init_locked(struct adapter *adapter)
if (ifp->if_capenable & IFCAP_NETMAP) {
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->rx_rings[i];
- int t = na->num_rx_desc - 1 - kring->nr_hwavail;
+ int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
} else
diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h
index dbbee4222407..17b4c4fd2e14 100644
--- a/sys/dev/netmap/if_em_netmap.h
+++ b/sys/dev/netmap/if_em_netmap.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -120,9 +120,9 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
- u_int n, new_slots;
+ u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = nm_txsync_prologue(kring, &new_slots);
+ u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
@@ -130,9 +130,6 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
- if (cur > lim) /* error checking in nm_txsync_prologue() */
- return netmap_ring_reinit(kring);
-
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
@@ -141,9 +138,9 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
*/
nm_i = kring->nr_hwcur;
- if (nm_i != cur) { /* we have new packets to send */
+ if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != cur; n++) {
+ for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
@@ -175,9 +172,7 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwcur = cur; /* the saved ring->cur */
- /* decrease avail by # of packets sent minus previous ones */
- kring->nr_hwavail -= new_slots;
+ kring->nr_hwcur = head;
/* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
@@ -190,26 +185,20 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/*
* Second part: reclaim buffers for completed transmissions.
*/
- if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
- int delta;
-
+ if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* record completed transmissions using TDH */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
- delta = nic_i - txr->next_to_clean;
- if (delta) {
- /* some completed, increment hwavail. */
- if (delta < 0)
- delta += kring->nkr_num_slots;
+ if (nic_i != txr->next_to_clean) {
txr->next_to_clean = nic_i;
- kring->nr_hwavail += delta;
+ kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
}
- nm_txsync_finalize(kring, cur);
+ nm_txsync_finalize(kring);
return 0;
}
@@ -226,16 +215,16 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
- u_int n, resvd;
+ u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
+ u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
- if (cur > lim)
+ if (head > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
@@ -251,7 +240,7 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
- for (n = 0; ; n++) {
+ for (n = 0; ; n++) { // XXX no need to count
struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->status);
@@ -268,7 +257,7 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
}
if (n) { /* update the state variables */
rxr->next_to_check = nic_i;
- kring->nr_hwavail += n;
+ kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
@@ -277,9 +266,9 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
- if (nm_i != cur) {
+ if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != cur; n++) {
+ for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
@@ -302,8 +291,7 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwavail -= n;
- kring->nr_hwcur = cur;
+ kring->nr_hwcur = head;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
@@ -311,12 +299,12 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
- nic_i = (nic_i == 0) ? lim : nic_i - 1;
+ nic_i = nm_prev(nic_i, lim);
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
/* tell userspace that there might be new packets */
- ring->avail = kring->nr_hwavail - resvd;
+ nm_rxsync_finalize(kring);
return 0;
diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h
index b91d0baba06f..e1929f0918e2 100644
--- a/sys/dev/netmap/if_igb_netmap.h
+++ b/sys/dev/netmap/if_igb_netmap.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -88,9 +88,9 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
- u_int n, new_slots;
+ u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = nm_txsync_prologue(kring, &new_slots);
+ u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
@@ -101,9 +101,6 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
u32 olinfo_status =
(adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
- if (cur > lim) /* error checking in nm_txsync_prologue() */
- return netmap_ring_reinit(kring);
-
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
@@ -112,9 +109,9 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
*/
nm_i = kring->nr_hwcur;
- if (nm_i != cur) { /* we have new packets to send */
+ if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != cur; n++) {
+ for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
@@ -155,9 +152,7 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwcur = cur; /* the saved ring->cur */
- /* decrease avail by # of packets sent minus previous ones */
- kring->nr_hwavail -= new_slots;
+ kring->nr_hwcur = head;
/* Set the watchdog XXX ? */
txr->queue_status = IGB_QUEUE_WORKING;
@@ -174,26 +169,18 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/*
* Second part: reclaim buffers for completed transmissions.
*/
- if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
- int delta;
-
+ if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* record completed transmissions using TDH */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
- delta = nic_i - txr->next_to_clean;
- if (delta) {
- /* some completed, increment hwavail. */
- if (delta < 0)
- delta += kring->nkr_num_slots;
- txr->next_to_clean = nic_i;
- kring->nr_hwavail += delta;
- }
+ txr->next_to_clean = nic_i;
+ kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
- nm_txsync_finalize(kring, cur);
+ nm_txsync_finalize(kring);
return 0;
}
@@ -210,16 +197,16 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
- u_int n, resvd;
+ u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
+ u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
- if (cur > lim)
+ if (head > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
@@ -250,7 +237,7 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
}
if (n) { /* update the state variables */
rxr->next_to_check = nic_i;
- kring->nr_hwavail += n;
+ kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
@@ -259,9 +246,9 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
- if (nm_i != cur) {
+ if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != cur; n++) {
+ for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
@@ -284,8 +271,7 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwavail -= n;
- kring->nr_hwcur = cur;
+ kring->nr_hwcur = head;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
@@ -293,12 +279,12 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
- nic_i = (nic_i == 0) ? lim : nic_i - 1;
+ nic_i = nm_prev(nic_i, lim);
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
/* tell userspace that there might be new packets */
- ring->avail = kring->nr_hwavail - resvd;
+ nm_rxsync_finalize(kring);
return 0;
diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h
index 8ad3b7a2a352..4fce5c988d09 100644
--- a/sys/dev/netmap/if_lem_netmap.h
+++ b/sys/dev/netmap/if_lem_netmap.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -91,18 +91,14 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
- u_int n, new_slots;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = nm_txsync_prologue(kring, &new_slots);
+ u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
- if (cur > lim) /* error checking in nm_txsync_prologue() */
- return netmap_ring_reinit(kring);
-
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
@@ -111,9 +107,9 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
*/
nm_i = kring->nr_hwcur;
- if (nm_i != cur) { /* we have new packets to send */
+ if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != cur; n++) {
+ while (nm_i != head) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
@@ -145,9 +141,7 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwcur = cur; /* the saved ring->cur */
- /* decrease avail by # of packets sent minus previous ones */
- kring->nr_hwavail -= new_slots;
+ kring->nr_hwcur = head;
/* synchronize the NIC ring */
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
@@ -160,26 +154,19 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/*
* Second part: reclaim buffers for completed transmissions.
*/
- if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
- int delta;
-
+ if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
+ kring->last_reclaim = ticks;
/* record completed transmissions using TDH */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
- delta = nic_i - adapter->next_tx_to_clean;
- if (delta) {
- /* some completed, increment hwavail. */
- if (delta < 0)
- delta += kring->nkr_num_slots;
- adapter->next_tx_to_clean = nic_i;
- kring->nr_hwavail += delta;
- }
+ adapter->next_tx_to_clean = nic_i;
+ kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
- nm_txsync_finalize(kring, cur);
+ nm_txsync_finalize(kring);
return 0;
}
@@ -196,15 +183,15 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
- u_int n, resvd;
+ u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
+ u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
- if (cur > lim)
+ if (head > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
@@ -241,9 +228,14 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
+ ND("%d new packets at nic %d nm %d tail %d",
+ n,
+ adapter->next_rx_desc_to_check,
+ netmap_idx_n2k(kring, adapter->next_rx_desc_to_check),
+ kring->nr_hwtail);
adapter->next_rx_desc_to_check = nic_i;
// ifp->if_ipackets += n;
- kring->nr_hwavail += n;
+ kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
@@ -252,9 +244,9 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
- if (nm_i != cur) {
+ if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != cur; n++) {
+ for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
@@ -277,20 +269,19 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwavail -= n;
- kring->nr_hwcur = cur;
+ kring->nr_hwcur = head;
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
- nic_i = (nic_i == 0) ? lim : nic_i - 1;
+ nic_i = nm_prev(nic_i, lim);
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
}
/* tell userspace that there might be new packets */
- ring->avail = kring->nr_hwavail - resvd;
+ nm_rxsync_finalize(kring);
return 0;
diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h
index 2c7ba060cffd..10abe4f49f83 100644
--- a/sys/dev/netmap/if_re_netmap.h
+++ b/sys/dev/netmap/if_re_netmap.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -72,17 +72,14 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
- u_int n, new_slots;
+ u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = nm_txsync_prologue(kring, &new_slots);
+ u_int const head = kring->rhead;
/* device-specific */
struct rl_softc *sc = ifp->if_softc;
struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc;
- if (cur > lim) /* error checking in nm_txsync_prologue() */
- return netmap_ring_reinit(kring);
-
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
sc->rl_ldata.rl_tx_list_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); // XXX extra postwrite ?
@@ -91,11 +88,11 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
- if (nm_i != cur) { /* we have new packets to send */
+ if (nm_i != head) { /* we have new packets to send */
nic_i = sc->rl_ldata.rl_tx_prodidx;
// XXX or netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != cur; n++) {
+ for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
@@ -132,9 +129,7 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nic_i = nm_next(nic_i, lim);
}
sc->rl_ldata.rl_tx_prodidx = nic_i;
- /* decrease avail by # of packets sent minus previous ones */
- kring->nr_hwcur = cur; /* the saved ring->cur */
- kring->nr_hwavail -= new_slots;
+ kring->nr_hwcur = head;
/* synchronize the NIC ring */
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
@@ -148,7 +143,7 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/*
* Second part: reclaim buffers for completed transmissions.
*/
- if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
+ if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
nic_i = sc->rl_ldata.rl_tx_considx;
for (n = 0; nic_i != sc->rl_ldata.rl_tx_prodidx;
n++, nic_i = RL_TX_DESC_NXT(sc, nic_i)) {
@@ -160,11 +155,11 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
if (n > 0) {
sc->rl_ldata.rl_tx_considx = nic_i;
sc->rl_ldata.rl_tx_free += n;
- kring->nr_hwavail += n;
+ kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
}
- nm_txsync_finalize(kring, cur);
+ nm_txsync_finalize(kring);
return 0;
}
@@ -181,16 +176,16 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
- u_int n, resvd;
+ u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
+ u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct rl_softc *sc = ifp->if_softc;
struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc;
- if (cur > lim)
+ if (head > lim)
return netmap_ring_reinit(kring);
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
@@ -202,16 +197,17 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
*
* This device uses all the buffers in the ring, so we need
* another termination condition in addition to RL_RDESC_STAT_OWN
- * cleared (all buffers could have it cleared. The easiest one
- * is to limit the amount of data reported up to 'lim'
+ * cleared (all buffers could have it cleared). The easiest one
+ * is to stop right before nm_hwcur.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
+ uint32_t stop_i = nm_prev(kring->nr_hwcur, lim);
nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
nm_i = netmap_idx_n2k(kring, nic_i);
- for (n = kring->nr_hwavail; n < lim ; n++) {
+ while (nm_i != stop_i) {
struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[nic_i];
uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
uint32_t total_len;
@@ -226,14 +222,12 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* sync was in re_newbuf() */
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD);
+ // sc->rl_ifp->if_ipackets++;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
- if (n != kring->nr_hwavail) {
- sc->rl_ldata.rl_rx_prodidx = nic_i;
- sc->rl_ifp->if_ipackets += n - kring->nr_hwavail;
- kring->nr_hwavail = n;
- }
+ sc->rl_ldata.rl_rx_prodidx = nic_i;
+ kring->nr_hwtail = nm_i;
kring->nr_kflags &= ~NKR_PENDINTR;
}
@@ -241,9 +235,9 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
- if (nm_i != cur) {
+ if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != cur; n++) {
+ for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
@@ -272,8 +266,7 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwavail -= n;
- kring->nr_hwcur = cur;
+ kring->nr_hwcur = head;
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
sc->rl_ldata.rl_rx_list_map,
@@ -281,7 +274,7 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
}
/* tell userspace that there might be new packets */
- ring->avail = kring->nr_hwavail - resvd;
+ nm_rxsync_finalize(kring);
return 0;
@@ -336,36 +329,35 @@ re_netmap_rx_init(struct rl_softc *sc)
struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
struct rl_desc *desc = sc->rl_ldata.rl_rx_list;
uint32_t cmdstat;
- int i, n, max_avail;
+ uint32_t nic_i, max_avail;
+ uint32_t const n = sc->rl_ldata.rl_rx_desc_cnt;
if (!slot)
return;
- n = sc->rl_ldata.rl_rx_desc_cnt;
/*
- * Userspace owned hwavail packets before the reset,
- * so the NIC that last hwavail descriptors of the ring
- * are still owned by the driver (and keep one empty).
+ * Do not release the slots owned by userspace,
+ * and also keep one empty.
*/
- max_avail = n - 1 - na->rx_rings[0].nr_hwavail;
- for (i = 0; i < n; i++) {
+ max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]);
+ for (nic_i = 0; nic_i < n; nic_i++) {
void *addr;
uint64_t paddr;
- int l = netmap_idx_n2k(&na->rx_rings[0], i);
+ uint32_t nm_i = netmap_idx_n2k(&na->rx_rings[0], nic_i);
- addr = PNMB(slot + l, &paddr);
+ addr = PNMB(slot + nm_i, &paddr);
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
- sc->rl_ldata.rl_rx_desc[i].rx_dmamap, addr);
+ sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, addr);
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
- sc->rl_ldata.rl_rx_desc[i].rx_dmamap, BUS_DMASYNC_PREREAD);
- desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
- desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
+ sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, BUS_DMASYNC_PREREAD);
+ desc[nic_i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
+ desc[nic_i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
cmdstat = NETMAP_BUF_SIZE;
- if (i == n - 1) /* mark the end of ring */
+ if (nic_i == n - 1) /* mark the end of ring */
cmdstat |= RL_RDESC_CMD_EOR;
- if (i < max_avail)
+ if (nic_i < max_avail)
cmdstat |= RL_RDESC_CMD_OWN;
- desc[i].rl_cmdstat = htole32(cmdstat);
+ desc[nic_i].rl_cmdstat = htole32(cmdstat);
}
}
diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
index 4dea6639d325..a617cc4c2429 100644
--- a/sys/dev/netmap/ixgbe_netmap.h
+++ b/sys/dev/netmap/ixgbe_netmap.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -141,14 +141,13 @@ ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
/*
* Reconcile kernel and user view of the transmit ring.
*
- * Userspace wants to send packets up to the one before ring->cur,
+ * All information is in the kring.
+ * Userspace wants to send packets up to the one before kring->rhead,
* kernel knows kring->nr_hwcur is the first unsent packet.
*
* Here we push packets out (as many as possible), and possibly
* reclaim buffers from previously completed transmission.
*
- * ring->avail is not used on input, but it is updated on return.
- *
* The caller (netmap) guarantees that there is only one instance
* running at any time. Any interference with other driver
* methods should be handled by the individual drivers.
@@ -161,9 +160,9 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
- u_int n, new_slots;
+ u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = nm_txsync_prologue(kring, &new_slots);
+ u_int const head = kring->rhead;
/*
* interrupts on every tx packet are expensive so request
* them every half ring, or where NS_REPORT is set
@@ -175,9 +174,6 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
int reclaim_tx;
- if (cur > lim) /* error checking in nm_txsync_prologue() */
- return netmap_ring_reinit(kring);
-
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
@@ -199,7 +195,7 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
*/
/*
- * If we have packets to send (kring->nr_hwcur != ring->cur)
+ * If we have packets to send (kring->nr_hwcur != kring->rhead)
* iterate over the netmap ring, fetch length and update
* the corresponding slot in the NIC ring. Some drivers also
* need to update the buffer's physical address in the NIC slot
@@ -217,13 +213,13 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
*/
nm_i = kring->nr_hwcur;
- if (nm_i != cur) { /* we have new packets to send */
+ if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
__builtin_prefetch(&ring->slot[nm_i]);
__builtin_prefetch(&txr->tx_buffers[nic_i]);
- for (n = 0; nm_i != cur; n++) {
+ for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
@@ -262,9 +258,7 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwcur = cur; /* the saved ring->cur */
- /* decrease avail by # of packets sent minus previous ones */
- kring->nr_hwavail -= new_slots;
+ kring->nr_hwcur = head;
/* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
@@ -281,7 +275,7 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
*/
if (flags & NAF_FORCE_RECLAIM) {
reclaim_tx = 1; /* forced reclaim */
- } else if (kring->nr_hwavail > 0) {
+ } else if (!nm_kr_txempty(kring)) {
reclaim_tx = 0; /* have buffers, no reclaim */
} else {
/*
@@ -321,21 +315,13 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nic_i -= kring->nkr_num_slots;
}
if (nic_i != txr->next_to_clean) {
- n = (nic_i + lim + 1) - txr->next_to_clean;
- if (n > lim)
- n -= lim + 1;
/* some tx completed, increment avail */
txr->next_to_clean = nic_i;
- kring->nr_hwavail += n;
- if (kring->nr_hwavail > lim) {
- RD(5, "bad hwavail %d",
- kring->nr_hwavail);
- return netmap_ring_reinit(kring);
- }
+ kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
}
- nm_txsync_finalize(kring, cur);
+ nm_txsync_finalize(kring);
return 0;
}
@@ -347,14 +333,9 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* The caller guarantees a single invocations, but races against
* the rest of the driver should be handled here.
*
- * When called, userspace has released buffers up to
- * ring->cur - ring->reserved (last one excluded).
- *
- * The last interrupt reported kring->nr_hwavail slots available
- * after kring->nr_hwcur.
- * We must subtract the newly consumed slots (cur - nr_hwcur)
- * from nr_hwavail, make the descriptors available for the next reads,
- * and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail.
+ * On call, kring->rhead is the first packet that userspace wants
+ * to keep, and kring->rcur is the wakeup point.
+ * The kernel has previously reported packets up to kring->rtail.
*
* If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
* of whether or not we received an interrupt.
@@ -367,16 +348,16 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
- u_int n, resvd;
+ u_int n;
u_int const lim = kring->nkr_num_slots - 1;
- u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
+ u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
- if (cur > lim)
+ if (head > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
@@ -391,8 +372,8 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* and they may differ in case if_init() has been called while
* in netmap mode. For the receive ring we have
*
- * nm_i = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
* nic_i = rxr->next_to_check;
+ * nm_i = kring->nr_hwtail (previous)
* and
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*
@@ -402,7 +383,7 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
int crclen = ix_crcstrip ? 0 : 4;
uint16_t slot_flags = kring->nkr_slot_flags;
- nic_i = rxr->next_to_check;
+ nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
@@ -425,23 +406,23 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
ix_rx_miss_bufs += n;
}
rxr->next_to_check = nic_i;
- kring->nr_hwavail += n;
+ kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
- * (kring->nr_hwcur to ring->cur - ring->reserved excluded),
+ * (kring->nr_hwcur to kring->rhead excluded),
* and make the buffers available for reception.
* As usual nm_i is the index in the netmap ring,
* nic_i is the index in the NIC ring, and
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*/
nm_i = kring->nr_hwcur;
- if (nm_i != cur) {
+ if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
- for (n = 0; nm_i != cur; n++) {
+ for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
@@ -464,8 +445,7 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwavail -= n;
- kring->nr_hwcur = cur;
+ kring->nr_hwcur = head;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
@@ -473,12 +453,12 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
- nic_i = (nic_i == 0) ? lim : nic_i - 1;
+ nic_i = nm_prev(nic_i, lim);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i);
}
/* tell userspace that there might be new packets */
- ring->avail = kring->nr_hwavail - resvd;
+ nm_rxsync_finalize(kring);
return 0;
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index 478d9374937f..358d4693dcb3 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -151,7 +151,6 @@ ports attached to the switch)
#include <machine/bus.h> /* bus_dmamap_* */
#include <sys/endian.h>
#include <sys/refcount.h>
-#include <sys/jail.h>
/* reduce conditional code */
@@ -226,9 +225,6 @@ enum { NETMAP_ADMODE_BEST = 0, /* use native, fallback to generic */
NETMAP_ADMODE_NATIVE, /* either native or none */
NETMAP_ADMODE_GENERIC, /* force generic */
NETMAP_ADMODE_LAST };
-#define NETMAP_ADMODE_NATIVE 1 /* Force native netmap adapter. */
-#define NETMAP_ADMODE_GENERIC 2 /* Force generic netmap adapter. */
-#define NETMAP_ADMODE_BEST 0 /* Priority to native netmap adapter. */
static int netmap_admode = NETMAP_ADMODE_BEST;
int netmap_generic_mit = 100*1000; /* Generic mitigation interval in nanoseconds. */
@@ -252,6 +248,10 @@ nm_kr_get(struct netmap_kring *kr)
}
+/*
+ * mark the ring as stopped, and run through the locks
+ * to make sure other users get to see it.
+ */
void
netmap_disable_ring(struct netmap_kring *kr)
{
@@ -380,7 +380,6 @@ nm_dump_buf(char *p, int len, int lim, char *dst)
}
-
/*
* Fetch configuration from the device, to cope with dynamic
* reconfigurations after loading the module.
@@ -432,6 +431,7 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail
u_int i, len, ndesc;
struct netmap_kring *kring;
+ // XXX additional space for extra rings ?
len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom;
na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
@@ -441,19 +441,23 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail
}
na->rx_rings = na->tx_rings + ntx;
+ /*
+ * All fields in krings are 0 except the one initialized below.
+ * but better be explicit on important kring fields.
+ */
ndesc = na->num_tx_desc;
for (i = 0; i < ntx; i++) { /* Transmit rings */
kring = &na->tx_rings[i];
bzero(kring, sizeof(*kring));
kring->na = na;
+ kring->ring_id = i;
kring->nkr_num_slots = ndesc;
/*
- * IMPORTANT:
- * Always keep one slot empty, so we can detect new
- * transmissions comparing cur and nr_hwcur (they are
- * the same only if there are no new transmissions).
+ * IMPORTANT: Always keep one slot empty.
*/
- kring->nr_hwavail = ndesc - 1;
+ kring->rhead = kring->rcur = kring->nr_hwcur = 0;
+ kring->rtail = kring->nr_hwtail = ndesc - 1;
+ snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", NM_IFPNAME(na->ifp), i);
mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF);
init_waitqueue_head(&kring->si);
}
@@ -463,7 +467,11 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail
kring = &na->rx_rings[i];
bzero(kring, sizeof(*kring));
kring->na = na;
+ kring->ring_id = i;
kring->nkr_num_slots = ndesc;
+ kring->rhead = kring->rcur = kring->nr_hwcur = 0;
+ kring->rtail = kring->nr_hwtail = 0;
+ snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", NM_IFPNAME(na->ifp), i);
mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF);
init_waitqueue_head(&kring->si);
}
@@ -473,10 +481,10 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail
na->tailroom = na->rx_rings + nrx;
return 0;
-
}
+/* XXX check boundaries */
void
netmap_krings_delete(struct netmap_adapter *na)
{
@@ -493,6 +501,23 @@ netmap_krings_delete(struct netmap_adapter *na)
}
+/*
+ * Destructor for NIC ports. They also have an mbuf queue
+ * on the rings connected to the host so we need to purge
+ * them first.
+ */
+static void
+netmap_hw_krings_delete(struct netmap_adapter *na)
+{
+ struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue;
+
+ ND("destroy sw mbq with len %d", mbq_len(q));
+ mbq_purge(q);
+ mbq_safe_destroy(q);
+ netmap_krings_delete(na);
+}
+
+
static struct netmap_if*
netmap_if_new(const char *ifname, struct netmap_adapter *na)
{
@@ -721,6 +746,7 @@ netmap_dtor(void *data)
/*
* pass a chain of buffers to the host stack as coming from 'dst'
+ * We do not need to lock because the queue is private.
*/
static void
netmap_send_up(struct ifnet *dst, struct mbq *q)
@@ -739,39 +765,30 @@ netmap_send_up(struct ifnet *dst, struct mbq *q)
/*
* put a copy of the buffers marked NS_FORWARD into an mbuf chain.
- * Run from hwcur to cur - reserved
+ * Take packets from hwcur to ring->head marked NS_FORWARD (or forced)
+ * and pass them up. Drop remaining packets in the unlikely event
+ * of an mbuf shortage.
*/
static void
netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
{
- /* Take packets from hwcur to cur-reserved and pass them up.
- * In case of no buffers we give up. At the end of the loop,
- * the queue is drained in all cases.
- * XXX handle reserved
- */
- u_int lim = kring->nkr_num_slots - 1;
- struct mbuf *m;
- u_int k = kring->ring->cur, n = kring->ring->reserved;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const head = kring->ring->head;
+ u_int n;
struct netmap_adapter *na = kring->na;
- /* compute the final position, ring->cur - ring->reserved */
- if (n > 0) {
- if (k < n)
- k += kring->nkr_num_slots;
- k += n;
- }
- for (n = kring->nr_hwcur; n != k;) {
+ for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
+ struct mbuf *m;
struct netmap_slot *slot = &kring->ring->slot[n];
- n = nm_next(n, lim);
if ((slot->flags & NS_FORWARD) == 0 && !force)
continue;
if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) {
- D("bad pkt at %d len %d", n, slot->len);
+ RD(5, "bad pkt at %d len %d", n, slot->len);
continue;
}
slot->flags &= ~NS_FORWARD; // XXX needed ?
- /* XXX adapt to the case of a multisegment packet */
+ /* XXX TODO: adapt to the case of a multisegment packet */
m = m_devget(BDG_NMB(na, slot), slot->len, 0, na->ifp, NULL);
if (m == NULL)
@@ -782,69 +799,54 @@ netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
/*
- * The host ring has packets from nr_hwcur to (cur - reserved)
- * to be sent down to the NIC.
- * We need to use the queue lock on the source (host RX ring)
- * to protect against netmap_transmit.
- * If the user is well behaved we do not need to acquire locks
- * on the destination(s),
- * so we only need to make sure that there are no panics because
- * of user errors.
- * XXX verify
- *
- * We scan the tx rings, which have just been
- * flushed so nr_hwcur == cur. Pushing packets down means
- * increment cur and decrement avail.
- * XXX to be verified
+ * Send to the NIC rings packets marked NS_FORWARD between
+ * kring->nr_hwcur and kring->rhead
+ * Called under kring->rx_queue.lock on the sw rx ring,
*/
-static void
+static u_int
netmap_sw_to_nic(struct netmap_adapter *na)
{
struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
- struct netmap_kring *k1 = &na->tx_rings[0];
- u_int i, howmany, src_lim, dst_lim;
-
- /* XXX we should also check that the carrier is on */
- if (kring->nkr_stopped)
- return;
+ struct netmap_slot *rxslot = kring->ring->slot;
+ u_int i, rxcur = kring->nr_hwcur;
+ u_int const head = kring->rhead;
+ u_int const src_lim = kring->nkr_num_slots - 1;
+ u_int sent = 0;
+
+ /* scan rings to find space, then fill as much as possible */
+ for (i = 0; i < na->num_tx_rings; i++) {
+ struct netmap_kring *kdst = &na->tx_rings[i];
+ struct netmap_ring *rdst = kdst->ring;
+ u_int const dst_lim = kdst->nkr_num_slots - 1;
+
+ /* XXX do we trust ring or kring->rcur,rtail ? */
+ for (; rxcur != head && !nm_ring_empty(rdst);
+ rxcur = nm_next(rxcur, src_lim) ) {
+ struct netmap_slot *src, *dst, tmp;
+ u_int dst_cur = rdst->cur;
- mtx_lock(&kring->q_lock);
+ src = &rxslot[rxcur];
+ if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
+ continue;
- if (kring->nkr_stopped)
- goto out;
+ sent++;
- howmany = kring->nr_hwavail; /* XXX otherwise cur - reserved - nr_hwcur */
+ dst = &rdst->slot[dst_cur];
- src_lim = kring->nkr_num_slots - 1;
- for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
- ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
- dst_lim = k1->nkr_num_slots - 1;
- while (howmany > 0 && k1->ring->avail > 0) {
- struct netmap_slot *src, *dst, tmp;
- src = &kring->ring->slot[kring->nr_hwcur];
- dst = &k1->ring->slot[k1->ring->cur];
tmp = *src;
+
src->buf_idx = dst->buf_idx;
src->flags = NS_BUF_CHANGED;
dst->buf_idx = tmp.buf_idx;
dst->len = tmp.len;
dst->flags = NS_BUF_CHANGED;
- ND("out len %d buf %d from %d to %d",
- dst->len, dst->buf_idx,
- kring->nr_hwcur, k1->ring->cur);
-
- kring->nr_hwcur = nm_next(kring->nr_hwcur, src_lim);
- howmany--;
- kring->nr_hwavail--;
- k1->ring->cur = nm_next(k1->ring->cur, dst_lim);
- k1->ring->avail--;
+
+ rdst->cur = nm_next(dst_cur, dst_lim);
}
- kring->ring->cur = kring->nr_hwcur; // XXX
- k1++; // XXX why?
+ /* if (sent) XXX txsync ? */
}
-out:
- mtx_unlock(&kring->q_lock);
+ return sent;
}
@@ -859,7 +861,8 @@ netmap_txsync_to_host(struct netmap_adapter *na)
{
struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
struct netmap_ring *ring = kring->ring;
- u_int k, lim = kring->nkr_num_slots - 1;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const head = nm_txsync_prologue(kring);
struct mbq q;
int error;
@@ -869,22 +872,27 @@ netmap_txsync_to_host(struct netmap_adapter *na)
D("ring %p busy (user error)", kring);
return;
}
- k = ring->cur;
- if (k > lim) {
+ if (head > lim) {
D("invalid ring index in stack TX kring %p", kring);
netmap_ring_reinit(kring);
nm_kr_put(kring);
return;
}
- /* Take packets from hwcur to cur and pass them up.
+ /* Take packets from hwcur to head and pass them up.
+ * force head = cur since netmap_grab_packets() stops at head
* In case of no buffers we give up. At the end of the loop,
* the queue is drained in all cases.
*/
mbq_init(&q);
- netmap_grab_packets(kring, &q, 1);
- kring->nr_hwcur = k;
- kring->nr_hwavail = ring->avail = lim;
+ ring->cur = head;
+ netmap_grab_packets(kring, &q, 1 /* force */);
+ ND("have %d pkts in queue", mbq_len(&q));
+ kring->nr_hwcur = head;
+ kring->nr_hwtail = head + lim;
+ if (kring->nr_hwtail > lim)
+ kring->nr_hwtail -= lim + 1;
+ nm_txsync_finalize(kring);
nm_kr_put(kring);
netmap_send_up(na->ifp, &q);
@@ -893,60 +901,89 @@ netmap_txsync_to_host(struct netmap_adapter *na)
/*
* rxsync backend for packets coming from the host stack.
- * They have been put in the queue by netmap_transmit() so we
- * need to protect access to the kring using a lock.
+ * They have been put in kring->rx_queue by netmap_transmit().
+ * We protect access to the kring using kring->rx_queue.lock
*
* This routine also does the selrecord if called from the poll handler
* (we know because td != NULL).
*
* NOTE: on linux, selrecord() is defined as a macro and uses pwait
* as an additional hidden argument.
+ * returns the number of packets delivered to tx queues in
+ * transparent mode, or a negative value if error
*/
-static void
+int
netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
{
struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
struct netmap_ring *ring = kring->ring;
- u_int j, n, lim = kring->nkr_num_slots;
- u_int k = ring->cur, resvd = ring->reserved;
+ u_int nm_i, n;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const head = nm_rxsync_prologue(kring);
+ int ret = 0;
+ struct mbq *q = &kring->rx_queue;
(void)pwait; /* disable unused warnings */
- if (kring->nkr_stopped) /* check a first time without lock */
- return;
+ if (head > lim) {
+ netmap_ring_reinit(kring);
+ return EINVAL;
+ }
- mtx_lock(&kring->q_lock);
+ if (kring->nkr_stopped) /* check a first time without lock */
+ return EBUSY;
- if (kring->nkr_stopped) /* check again with lock held */
- goto unlock_out;
+ mtx_lock(&q->lock);
- if (k >= lim) {
- netmap_ring_reinit(kring);
+ if (kring->nkr_stopped) { /* check again with lock held */
+ ret = EBUSY;
goto unlock_out;
}
- /* new packets are already set in nr_hwavail */
- /* skip past packets that userspace has released */
- j = kring->nr_hwcur;
- if (resvd > 0) {
- if (resvd + ring->avail >= lim + 1) {
- D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
- ring->reserved = resvd = 0; // XXX panic...
+
+ /* First part: import newly received packets */
+ n = mbq_len(q);
+ if (n) { /* grab packets from the queue */
+ struct mbuf *m;
+ uint32_t stop_i;
+
+ nm_i = kring->nr_hwtail;
+ stop_i = nm_prev(nm_i, lim);
+ while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
+ int len = MBUF_LEN(m);
+ struct netmap_slot *slot = &ring->slot[nm_i];
+
+ m_copydata(m, 0, len, BDG_NMB(na, slot));
+ ND("nm %d len %d", nm_i, len);
+ if (netmap_verbose)
+ D("%s", nm_dump_buf(BDG_NMB(na, slot),len, 128, NULL));
+
+ slot->len = len;
+ slot->flags = kring->nkr_slot_flags;
+ nm_i = nm_next(nm_i, lim);
}
- k = (k >= resvd) ? k - resvd : k + lim - resvd;
+ kring->nr_hwtail = nm_i;
}
- if (j != k) {
- n = k >= j ? k - j : k + lim - j;
- kring->nr_hwavail -= n;
- kring->nr_hwcur = k;
+
+ /*
+ * Second part: skip past packets that userspace has released.
+ */
+ nm_i = kring->nr_hwcur;
+ if (nm_i != head) { /* something was released */
+ if (netmap_fwd || kring->ring->flags & NR_FORWARD)
+ ret = netmap_sw_to_nic(na);
+ kring->nr_hwcur = head;
}
- k = ring->avail = kring->nr_hwavail - resvd;
- if (k == 0 && td)
+
+ nm_rxsync_finalize(kring);
+
+ /* access copies of cur,tail in the kring */
+ if (kring->rcur == kring->rtail && td) /* no bufs available */
selrecord(td, &kring->si);
- if (k && (netmap_verbose & NM_VERB_HOST))
- D("%d pkts from stack", k);
+
unlock_out:
- mtx_unlock(&kring->q_lock);
+ mtx_unlock(&q->lock);
+ return ret;
}
@@ -1042,7 +1079,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
// XXX add a refcount ?
netmap_adapter_get(prev_na);
}
- D("Created generic NA %p (prev %p)", gna, gna->prev);
+ ND("Created generic NA %p (prev %p)", gna, gna->prev);
return 0;
}
@@ -1113,154 +1150,167 @@ out:
/*
* validate parameters on entry for *_txsync()
* Returns ring->cur if ok, or something >= kring->nkr_num_slots
- * in case of error. The extra argument is a pointer to
- * 'new_bufs'. XXX this may be deprecated at some point.
+ * in case of error.
*
- * Below is a correct configuration on input. ring->cur
- * must be in the region covered by kring->hwavail,
- * and ring->avail and kring->avail should end at the same slot.
+ * rhead, rcur and rtail=hwtail are stored from previous round.
+ * hwcur is the next packet to send to the ring.
*
- * +-hwcur
- * |
- * v<--hwres-->|<-----hwavail---->
- * ------+------------------------------+-------- ring
- * |
- * |<---avail--->
- * +--cur
+ * We want
+ * hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail
*
+ * hwcur, rhead, rtail and hwtail are reliable
*/
u_int
-nm_txsync_prologue(struct netmap_kring *kring, u_int *new_slots)
+nm_txsync_prologue(struct netmap_kring *kring)
{
struct netmap_ring *ring = kring->ring;
+ u_int head = ring->head; /* read only once */
u_int cur = ring->cur; /* read only once */
- u_int avail = ring->avail; /* read only once */
u_int n = kring->nkr_num_slots;
- u_int kstart, kend, a;
-#if 1 /* kernel sanity checks */
- if (kring->nr_hwcur >= n ||
- kring->nr_hwreserved >= n || kring->nr_hwavail >= n ||
- kring->nr_hwreserved + kring->nr_hwavail >= n)
+ ND(5, "%s kcur %d ktail %d head %d cur %d tail %d",
+ kring->name,
+ kring->nr_hwcur, kring->nr_hwtail,
+ ring->head, ring->cur, ring->tail);
+#if 1 /* kernel sanity checks; but we can trust the kring. */
+ if (kring->nr_hwcur >= n || kring->rhead >= n ||
+ kring->rtail >= n || kring->nr_hwtail >= n)
goto error;
#endif /* kernel sanity checks */
- kstart = kring->nr_hwcur + kring->nr_hwreserved;
- if (kstart >= n)
- kstart -= n;
- kend = kstart + kring->nr_hwavail;
- /* user sanity checks. a is the expected avail */
- if (cur < kstart) {
- /* too low, but maybe wraparound */
- if (cur + n > kend)
+ /*
+ * user sanity checks. We only use 'cur',
+ * A, B, ... are possible positions for cur:
+ *
+ * 0 A cur B tail C n-1
+ * 0 D tail E cur F n-1
+ *
+ * B, F, D are valid. A, C, E are wrong
+ */
+ if (kring->rtail >= kring->rhead) {
+ /* want rhead <= head <= rtail */
+ if (head < kring->rhead || head > kring->rtail)
goto error;
- *new_slots = cur + n - kstart;
- a = kend - cur - n;
- } else {
- if (cur > kend)
+ /* and also head <= cur <= rtail */
+ if (cur < head || cur > kring->rtail)
+ goto error;
+ } else { /* here rtail < rhead */
+ /* we need head outside rtail .. rhead */
+ if (head > kring->rtail && head < kring->rhead)
goto error;
- *new_slots = cur - kstart;
- a = kend - cur;
+
+ /* two cases now: head <= rtail or head >= rhead */
+ if (head <= kring->rtail) {
+ /* want head <= cur <= rtail */
+ if (cur < head || cur > kring->rtail)
+ goto error;
+ } else { /* head >= rhead */
+ /* cur must be outside rtail..head */
+ if (cur > kring->rtail && cur < head)
+ goto error;
+ }
}
- if (a != avail) {
- RD(5, "wrong but fixable avail have %d need %d",
- avail, a);
- ring->avail = avail = a;
+ if (ring->tail != kring->rtail) {
+ RD(5, "tail overwritten was %d need %d",
+ ring->tail, kring->rtail);
+ ring->tail = kring->rtail;
}
- return cur;
+ kring->rhead = head;
+ kring->rcur = cur;
+ return head;
error:
- RD(5, "kring error: hwcur %d hwres %d hwavail %d cur %d av %d",
+ RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d",
+ kring->name,
kring->nr_hwcur,
- kring->nr_hwreserved, kring->nr_hwavail,
- cur, avail);
+ kring->rcur, kring->nr_hwtail,
+ cur, ring->tail);
return n;
}
/*
* validate parameters on entry for *_rxsync()
- * Returns ring->cur - ring->reserved if ok,
- * or something >= kring->nkr_num_slots
- * in case of error. The extra argument is a pointer to
- * 'resvd'. XXX this may be deprecated at some point.
+ * Returns ring->head if ok, kring->nkr_num_slots on error.
*
- * Below is a correct configuration on input. ring->cur and
- * ring->reserved must be in the region covered by kring->hwavail,
- * and ring->avail and kring->avail should end at the same slot.
+ * For a valid configuration,
+ * hwcur <= head <= cur <= tail <= hwtail
*
- * +-hwcur
- * |
- * v<-------hwavail---------->
- * ---------+--------------------------+-------- ring
- * |<--res-->|
- * |<---avail--->
- * +--cur
+ * We only consider head and cur.
+ * hwcur and hwtail are reliable.
*
*/
u_int
-nm_rxsync_prologue(struct netmap_kring *kring, u_int *resvd)
+nm_rxsync_prologue(struct netmap_kring *kring)
{
struct netmap_ring *ring = kring->ring;
- u_int cur = ring->cur; /* read only once */
- u_int avail = ring->avail; /* read only once */
- u_int res = ring->reserved; /* read only once */
- u_int n = kring->nkr_num_slots;
- u_int kend = kring->nr_hwcur + kring->nr_hwavail;
- u_int a;
+ uint32_t const n = kring->nkr_num_slots;
+ uint32_t head, cur;
+ ND("%s kc %d kt %d h %d c %d t %d",
+ kring->name,
+ kring->nr_hwcur, kring->nr_hwtail,
+ ring->head, ring->cur, ring->tail);
+ /*
+ * Before storing the new values, we should check they do not
+ * move backwards. However:
+ * - head is not an issue because the previous value is hwcur;
+ * - cur could in principle go back, however it does not matter
+ * because we are processing a brand new rxsync()
+ */
+ cur = kring->rcur = ring->cur; /* read only once */
+ head = kring->rhead = ring->head; /* read only once */
#if 1 /* kernel sanity checks */
- if (kring->nr_hwcur >= n || kring->nr_hwavail >= n)
+ if (kring->nr_hwcur >= n || kring->nr_hwtail >= n)
goto error;
#endif /* kernel sanity checks */
/* user sanity checks */
- if (res >= n)
- goto error;
- /* check that cur is valid, a is the expected value of avail */
- if (cur < kring->nr_hwcur) {
- /* too low, but maybe wraparound */
- if (cur + n > kend)
+ if (kring->nr_hwtail >= kring->nr_hwcur) {
+ /* want hwcur <= rhead <= hwtail */
+ if (head < kring->nr_hwcur || head > kring->nr_hwtail)
goto error;
- a = kend - (cur + n);
- } else {
- if (cur > kend)
+ /* and also rhead <= rcur <= hwtail */
+ if (cur < head || cur > kring->nr_hwtail)
goto error;
- a = kend - cur;
- }
- if (a != avail) {
- RD(5, "wrong but fixable avail have %d need %d",
- avail, a);
- ring->avail = avail = a;
- }
- if (res != 0) {
- /* then repeat the check for cur + res */
- cur = (cur >= res) ? cur - res : n + cur - res;
- if (cur < kring->nr_hwcur) {
- /* too low, but maybe wraparound */
- if (cur + n > kend)
- goto error;
- } else if (cur > kend) {
+ } else {
+ /* we need rhead outside hwtail..hwcur */
+ if (head < kring->nr_hwcur && head > kring->nr_hwtail)
goto error;
+ /* two cases now: head <= hwtail or head >= hwcur */
+ if (head <= kring->nr_hwtail) {
+ /* want head <= cur <= hwtail */
+ if (cur < head || cur > kring->nr_hwtail)
+ goto error;
+ } else {
+ /* cur must be outside hwtail..head */
+ if (cur < head && cur > kring->nr_hwtail)
+ goto error;
}
}
- *resvd = res;
- return cur;
+ if (ring->tail != kring->rtail) {
+ RD(5, "%s tail overwritten was %d need %d",
+ kring->name,
+ ring->tail, kring->rtail);
+ ring->tail = kring->rtail;
+ }
+ return head;
error:
- RD(5, "kring error: hwcur %d hwres %d hwavail %d cur %d av %d res %d",
+ RD(5, "kring error: hwcur %d rcur %d hwtail %d head %d cur %d tail %d",
kring->nr_hwcur,
- kring->nr_hwreserved, kring->nr_hwavail,
- ring->cur, avail, res);
+ kring->rcur, kring->nr_hwtail,
+ kring->rhead, kring->rcur, ring->tail);
return n;
}
+
/*
* Error routine called when txsync/rxsync detects an error.
- * Can't do much more than resetting cur = hwcur, avail = hwavail.
+ * Can't do much more than resetting head =cur = hwcur, tail = hwtail
* Return 1 on reinit.
*
* This routine is only called by the upper half of the kernel.
* It only reads hwcur (which is changed only by the upper half, too)
- * and hwavail (which may be changed by the lower half, but only on
+ * and hwtail (which may be changed by the lower half, but only on
* a tx ring and only to increase it, so any error will be recovered
* on the next call). For the above, we don't strictly need to call
* it under lock.
@@ -1274,36 +1324,38 @@ netmap_ring_reinit(struct netmap_kring *kring)
// XXX KASSERT nm_kr_tryget
RD(10, "called for %s", NM_IFPNAME(kring->na->ifp));
+ // XXX probably wrong to trust userspace
+ kring->rhead = ring->head;
+ kring->rcur = ring->cur;
+ kring->rtail = ring->tail;
+
if (ring->cur > lim)
errors++;
+ if (ring->head > lim)
+ errors++;
+ if (ring->tail > lim)
+ errors++;
for (i = 0; i <= lim; i++) {
u_int idx = ring->slot[i].buf_idx;
u_int len = ring->slot[i].len;
if (idx < 2 || idx >= netmap_total_buffers) {
- if (!errors++)
- D("bad buffer at slot %d idx %d len %d ", i, idx, len);
+ RD(5, "bad index at slot %d idx %d len %d ", i, idx, len);
ring->slot[i].buf_idx = 0;
ring->slot[i].len = 0;
} else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) {
ring->slot[i].len = 0;
- if (!errors++)
- D("bad len %d at slot %d idx %d",
- len, i, idx);
+ RD(5, "bad len at slot %d idx %d len %d", i, idx, len);
}
}
if (errors) {
- int pos = kring - kring->na->tx_rings;
- int n = kring->na->num_tx_rings + 1;
-
RD(10, "total %d errors", errors);
- errors++;
- RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
- NM_IFPNAME(kring->na->ifp),
- pos < n ? "TX" : "RX", pos < n ? pos : pos - n,
+ RD(10, "%s reinit, cur %d -> %d tail %d -> %d",
+ kring->name,
ring->cur, kring->nr_hwcur,
- ring->avail, kring->nr_hwavail);
- ring->cur = kring->nr_hwcur;
- ring->avail = kring->nr_hwavail;
+ ring->tail, kring->nr_hwtail);
+ ring->head = kring->rhead = kring->nr_hwcur;
+ ring->cur = kring->rcur = kring->nr_hwcur;
+ ring->tail = kring->rtail = kring->nr_hwtail;
}
return (errors ? 1 : 0);
}
@@ -1436,7 +1488,6 @@ out:
* - NIOCGINFO
* - SIOCGIFADDR just for convenience
* - NIOCREGIF
- * - NIOCUNREGIF
* - NIOCTXSYNC
* - NIOCRXSYNC
*
@@ -1472,6 +1523,17 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
} while (0)
#endif /* linux */
+ if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
+ /* truncate name */
+ nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
+ if (nmr->nr_version != NETMAP_API) {
+ D("API mismatch for %s got %d need %d",
+ nmr->nr_name,
+ nmr->nr_version, NETMAP_API);
+ nmr->nr_version = NETMAP_API;
+ return EINVAL;
+ }
+ }
CURVNET_SET(TD_TO_VNET(td));
error = devfs_get_cdevpriv((void **)&priv);
@@ -1482,16 +1544,8 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
return (error == ENOENT ? ENXIO : error);
}
- nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; /* truncate name */
switch (cmd) {
case NIOCGINFO: /* return capabilities etc */
- if (nmr->nr_version != NETMAP_API) {
- D("API mismatch got %d have %d",
- nmr->nr_version, NETMAP_API);
- nmr->nr_version = NETMAP_API;
- error = EINVAL;
- break;
- }
if (nmr->nr_cmd == NETMAP_BDG_LIST) {
error = netmap_bdg_ctl(nmr, NULL);
break;
@@ -1531,11 +1585,6 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
break;
case NIOCREGIF:
- if (nmr->nr_version != NETMAP_API) {
- nmr->nr_version = NETMAP_API;
- error = EINVAL;
- break;
- }
/* possibly attach/detach NIC and VALE switch */
i = nmr->nr_cmd;
if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
@@ -1593,12 +1642,6 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
NMG_UNLOCK();
break;
- case NIOCUNREGIF:
- // XXX we have no data here ?
- D("deprecated, data is %p", nmr);
- error = EINVAL;
- break;
-
case NIOCTXSYNC:
case NIOCRXSYNC:
nifp = priv->np_nifp;
@@ -1649,7 +1692,11 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
D("pre txsync ring %d cur %d hwcur %d",
i, kring->ring->cur,
kring->nr_hwcur);
- na->nm_txsync(na, i, NAF_FORCE_RECLAIM);
+ if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
+ netmap_ring_reinit(kring);
+ } else {
+ na->nm_txsync(na, i, NAF_FORCE_RECLAIM);
+ }
if (netmap_verbose & NM_VERB_TXSYNC)
D("post txsync ring %d cur %d hwcur %d",
i, kring->ring->cur,
@@ -1726,8 +1773,8 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
struct ifnet *ifp;
struct netmap_kring *kring;
u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
- u_int lim_tx, lim_rx, host_forwarded = 0;
- struct mbq q;
+ u_int lim_tx, lim_rx;
+ struct mbq q; /* packets from hw queues to host stack */
void *pwait = dev; /* linux compatibility */
/*
@@ -1735,7 +1782,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
* txsync and rxsync if we decide to do a selrecord().
* retry_tx (and retry_rx, later) prevent looping forever.
*/
- int retry_tx = 1;
+ int retry_tx = 1, retry_rx = 1;
(void)pwait;
mbq_init(&q);
@@ -1769,6 +1816,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
lim_rx = na->num_rx_rings;
if (priv->np_qfirst == NETMAP_SW_RING) {
+ // XXX locking ?
/* handle the host stack ring */
if (priv->np_txpoll || want_tx) {
/* push any packets up, then we are always ready */
@@ -1777,29 +1825,15 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
}
if (want_rx) {
kring = &na->rx_rings[lim_rx];
- if (kring->ring->avail == 0)
+ /* XXX replace with rxprologue etc. */
+ if (nm_ring_empty(kring->ring))
netmap_rxsync_from_host(na, td, dev);
- if (kring->ring->avail > 0) {
+ if (!nm_ring_empty(kring->ring))
revents |= want_rx;
- }
}
return (revents);
}
- /*
- * If we are in transparent mode, check also the host rx ring
- * XXX Transparent mode at the moment requires to bind all
- * rings to a single file descriptor.
- */
- kring = &na->rx_rings[lim_rx];
- if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
- && want_rx
- && (netmap_fwd || kring->ring->flags & NR_FORWARD) ) {
- if (kring->ring->avail == 0)
- netmap_rxsync_from_host(na, td, dev);
- if (kring->ring->avail > 0)
- revents |= want_rx;
- }
/*
* check_all_{tx|rx} are set if the card has more than one queue AND
@@ -1825,81 +1859,71 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
* We start with a lock free round which is cheap if we have
* slots available. If this fails, then lock and call the sync
* routines.
- * XXX rather than ring->avail >0 should check that
- * ring->cur has not reached hwcur+hwavail
*/
for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
kring = &na->rx_rings[i];
- if (kring->ring->avail > 0) {
+ /* XXX compare ring->cur and kring->tail */
+ if (!nm_ring_empty(kring->ring)) {
revents |= want_rx;
want_rx = 0; /* also breaks the loop */
}
}
for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) {
kring = &na->tx_rings[i];
- if (kring->ring->avail > 0) {
+ /* XXX compare ring->cur and kring->tail */
+ if (!nm_ring_empty(kring->ring)) {
revents |= want_tx;
want_tx = 0; /* also breaks the loop */
}
}
/*
- * If we to push packets out (priv->np_txpoll) or want_tx is
- * still set, we do need to run the txsync calls (on all rings,
- * to avoid that the tx rings stall).
+ * If we want to push packets out (priv->np_txpoll) or
+ * want_tx is still set, we must issue txsync calls
+ * (on all rings, to avoid that the tx rings stall).
* XXX should also check cur != hwcur on the tx rings.
* Fortunately, normal tx mode has np_txpoll set.
*/
if (priv->np_txpoll || want_tx) {
- /* If we really want to be woken up (want_tx),
- * do a selrecord, either on the global or on
- * the private structure. Then issue the txsync
- * so there is no race in the selrecord/selwait
+ /*
+ * The first round checks if anyone is ready, if not
+ * do a selrecord and another round to handle races.
+ * want_tx goes to 0 if any space is found, and is
+ * used to skip rings with no pending transmissions.
*/
flush_tx:
for (i = priv->np_qfirst; i < lim_tx; i++) {
+ int found = 0;
+
kring = &na->tx_rings[i];
- /*
- * Skip this ring if want_tx == 0
- * (we have already done a successful sync on
- * a previous ring) AND kring->cur == kring->hwcur
- * (there are no pending transmissions for this ring).
- */
if (!want_tx && kring->ring->cur == kring->nr_hwcur)
continue;
- /* make sure only one user thread is doing this */
+ /* only one thread does txsync */
if (nm_kr_tryget(kring)) {
- ND("ring %p busy is %d",
- kring, (int)kring->nr_busy);
- revents |= POLLERR;
- goto out;
+ D("%p lost race on txring %d, ok", priv, i);
+ continue;
}
-
- if (netmap_verbose & NM_VERB_TXSYNC)
- D("send %d on %s %d",
- kring->ring->cur, NM_IFPNAME(ifp), i);
- if (na->nm_txsync(na, i, 0))
+ if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
+ netmap_ring_reinit(kring);
revents |= POLLERR;
+ } else {
+ if (na->nm_txsync(na, i, 0))
+ revents |= POLLERR;
+ }
- /* Check avail and call selrecord only if
- * called with POLLOUT and run out of bufs.
- * XXX Note, we cannot trust much ring->avail
- * as it is exposed to userspace (even though
- * just updated by txsync). We should really
- * check kring->nr_hwavail or better have
- * txsync set a flag telling if we need
- * to do a selrecord().
+ /*
+ * If we found new slots, notify potential
+ * listeners on the same ring.
+ * Since we just did a txsync, look at the copies
+ * of cur,tail in the kring.
*/
- if (want_tx) {
- if (kring->ring->avail > 0) {
- /* stop at the first ring. We don't risk
- * starvation.
- */
- revents |= want_tx;
- want_tx = 0;
- }
- }
+ found = kring->rcur != kring->rtail;
nm_kr_put(kring);
+ if (found) { /* notify other listeners */
+ revents |= want_tx;
+ want_tx = 0;
+ na->nm_notify(na, i, NR_TX, NAF_GLOBAL_NOTIFY);
+ }
}
if (want_tx && retry_tx) {
selrecord(td, check_all_tx ?
@@ -1910,21 +1934,27 @@ flush_tx:
}
/*
- * now if want_rx is still set we need to lock and rxsync.
+ * If want_rx is still set scan receive rings.
* Do it on all rings because otherwise we starve.
*/
if (want_rx) {
- int retry_rx = 1;
+ int send_down = 0; /* transparent mode */
+ /* two rounds here to for race avoidance */
do_retry_rx:
for (i = priv->np_qfirst; i < lim_rx; i++) {
+ int found = 0;
+
kring = &na->rx_rings[i];
if (nm_kr_tryget(kring)) {
- revents |= POLLERR;
- goto out;
+ D("%p lost race on rxring %d, ok", priv, i);
+ continue;
}
- /* XXX NR_FORWARD should only be read on
+ /*
+ * transparent mode support: collect packets
+ * from the rxring(s).
+ * XXX NR_FORWARD should only be read on
* physical or NIC ports
*/
if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
@@ -1939,49 +1969,65 @@ do_retry_rx:
kring->ring->flags & NR_TIMESTAMP) {
microtime(&kring->ring->ts);
}
-
- if (kring->ring->avail > 0) {
+ /* after an rxsync we can use kring->rcur, rtail */
+ found = kring->rcur != kring->rtail;
+ nm_kr_put(kring);
+ if (found) {
revents |= want_rx;
retry_rx = 0;
+ na->nm_notify(na, i, NR_RX, NAF_GLOBAL_NOTIFY);
}
- nm_kr_put(kring);
}
- if (retry_rx) {
- retry_rx = 0;
+
+ /* transparent mode XXX only during first pass ? */
+ kring = &na->rx_rings[lim_rx];
+ if (check_all_rx
+ && (netmap_fwd || kring->ring->flags & NR_FORWARD)) {
+ /* XXX fix to use kring fields */
+ if (nm_ring_empty(kring->ring))
+ send_down = netmap_rxsync_from_host(na, td, dev);
+ if (!nm_ring_empty(kring->ring))
+ revents |= want_rx;
+ }
+
+ if (retry_rx)
selrecord(td, check_all_rx ?
&na->rx_si : &na->rx_rings[priv->np_qfirst].si);
- goto do_retry_rx;
+ if (send_down > 0 || retry_rx) {
+ retry_rx = 0;
+ if (send_down)
+ goto flush_tx; /* and retry_rx */
+ else
+ goto do_retry_rx;
}
}
- /* forward host to the netmap ring.
- * I am accessing nr_hwavail without lock, but netmap_transmit
- * can only increment it, so the operation is safe.
+ /*
+ * Transparent mode: marked bufs on rx rings between
+ * kring->nr_hwcur and ring->head
+ * are passed to the other endpoint.
+ *
+ * In this mode we also scan the sw rxring, which in
+ * turn passes packets up.
+ *
+ * XXX Transparent mode at the moment requires to bind all
+ * rings to a single file descriptor.
*/
- kring = &na->rx_rings[lim_rx];
- if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
- && (netmap_fwd || kring->ring->flags & NR_FORWARD)
- && kring->nr_hwavail > 0 && !host_forwarded) {
- netmap_sw_to_nic(na);
- host_forwarded = 1; /* prevent another pass */
- want_rx = 0;
- goto flush_tx;
- }
if (q.head)
netmap_send_up(na->ifp, &q);
-out:
-
return (revents);
}
-/*------- driver support routines ------*/
+
+/*-------------------- driver support routines -------------------*/
static int netmap_hw_krings_create(struct netmap_adapter *);
static int
-netmap_notify(struct netmap_adapter *na, u_int n_ring, enum txrx tx, int flags)
+netmap_notify(struct netmap_adapter *na, u_int n_ring,
+ enum txrx tx, int flags)
{
struct netmap_kring *kring;
@@ -2012,10 +2058,18 @@ netmap_attach_common(struct netmap_adapter *na)
return EINVAL;
}
WNA(ifp) = na;
+
+ /* the following is only needed for na that use the host port.
+ * XXX do we have something similar for linux ?
+ */
+#ifdef __FreeBSD__
+ na->if_input = ifp->if_input; /* for netmap_send_up */
+#endif /* __FreeBSD__ */
+
NETMAP_SET_CAPABLE(ifp);
if (na->nm_krings_create == NULL) {
na->nm_krings_create = netmap_hw_krings_create;
- na->nm_krings_delete = netmap_krings_delete;
+ na->nm_krings_delete = netmap_hw_krings_delete;
}
if (na->nm_notify == NULL)
na->nm_notify = netmap_notify;
@@ -2051,12 +2105,8 @@ netmap_detach_common(struct netmap_adapter *na)
* of hardware rings):
* krings 0..N-1 are for the hardware queues.
* kring N is for the host stack queue
- * kring N+1 is only used for the selinfo for all queues.
+ * kring N+1 is only used for the selinfo for all queues. // XXX still true ?
* Return 0 on success, ENOMEM otherwise.
- *
- * By default the receive and transmit adapter ring counts are both initialized
- * to num_queues. na->num_tx_rings can be set for cards with different tx/rx
- * setups.
*/
int
netmap_attach(struct netmap_adapter *arg)
@@ -2132,8 +2182,14 @@ NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
int
netmap_hw_krings_create(struct netmap_adapter *na)
{
- return netmap_krings_create(na,
+ int ret = netmap_krings_create(na,
na->num_tx_rings + 1, na->num_rx_rings + 1, 0);
+ if (ret == 0) {
+ /* initialize the mbq for the sw rx ring */
+ mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
+ ND("initialized sw rx queue %d", na->num_rx_rings);
+ }
+ return ret;
}
@@ -2162,6 +2218,10 @@ netmap_detach(struct ifnet *ifp)
/*
* Intercept packets from the network stack and pass them
* to netmap as incoming packets on the 'software' ring.
+ *
+ * We only store packets in a bounded mbq and then copy them
+ * in the relevant rxsync routine.
+ *
* We rely on the OS to make sure that the ifp and na do not go
* away (typically the caller checks for IFF_DRV_RUNNING or the like).
* In nm_register() or whenever there is a reinitialization,
@@ -2172,63 +2232,60 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct netmap_adapter *na = NA(ifp);
struct netmap_kring *kring;
- u_int i, len = MBUF_LEN(m);
- u_int error = EBUSY, lim;
- struct netmap_slot *slot;
+ u_int len = MBUF_LEN(m);
+ u_int error = ENOBUFS;
+ struct mbq *q;
+ int space;
// XXX [Linux] we do not need this lock
// if we follow the down/configure/up protocol -gl
// mtx_lock(&na->core_lock);
+
if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) {
- /* interface not in netmap mode anymore */
+ D("%s not in netmap mode anymore", NM_IFPNAME(ifp));
error = ENXIO;
goto done;
}
kring = &na->rx_rings[na->num_rx_rings];
- lim = kring->nkr_num_slots - 1;
- if (netmap_verbose & NM_VERB_HOST)
- D("%s packet %d len %d from the stack", NM_IFPNAME(ifp),
- kring->nr_hwcur + kring->nr_hwavail, len);
+ q = &kring->rx_queue;
+
// XXX reconsider long packets if we handle fragments
if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */
D("%s from_host, drop packet size %d > %d", NM_IFPNAME(ifp),
len, NETMAP_BDG_BUF_SIZE(na->nm_mem));
goto done;
}
- /* protect against other instances of netmap_transmit,
- * and userspace invocations of rxsync().
+
+ /* protect against rxsync_from_host(), netmap_sw_to_nic()
+ * and maybe other instances of netmap_transmit (the latter
+ * not possible on Linux).
+ * Also avoid overflowing the queue.
*/
- // XXX [Linux] there can be no other instances of netmap_transmit
- // on this same ring, but we still need this lock to protect
- // concurrent access from netmap_sw_to_nic() -gl
- mtx_lock(&kring->q_lock);
- if (kring->nr_hwavail >= lim) {
- if (netmap_verbose)
- D("stack ring %s full\n", NM_IFPNAME(ifp));
+ mtx_lock(&q->lock);
+
+ space = kring->nr_hwtail - kring->nr_hwcur;
+ if (space < 0)
+ space += kring->nkr_num_slots;
+ if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX
+ RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p",
+ NM_IFPNAME(ifp), kring->nr_hwcur, kring->nr_hwtail, mbq_len(q),
+ len, m);
} else {
- /* compute the insert position */
- i = nm_kr_rxpos(kring);
- slot = &kring->ring->slot[i];
- m_copydata(m, 0, (int)len, BDG_NMB(na, slot));
- slot->len = len;
- slot->flags = kring->nkr_slot_flags;
- kring->nr_hwavail++;
- if (netmap_verbose & NM_VERB_HOST)
- D("wake up host ring %s %d", NM_IFPNAME(na->ifp), na->num_rx_rings);
- na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
+ mbq_enqueue(q, m);
+ ND(10, "%s %d bufs in queue len %d m %p",
+ NM_IFPNAME(ifp), mbq_len(q), len, m);
+ /* notify outside the lock */
+ m = NULL;
error = 0;
}
- mtx_unlock(&kring->q_lock);
+ mtx_unlock(&q->lock);
done:
- // mtx_unlock(&na->core_lock);
-
- /* release the mbuf in either cases of success or failure. As an
- * alternative, put the mbuf in a free list and free the list
- * only when really necessary.
- */
- m_freem(m);
+ if (m)
+ m_freem(m);
+ /* unconditionally wake up listeners */
+ na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
return (error);
}
@@ -2267,27 +2324,32 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
if (n >= na->num_tx_rings)
return NULL;
kring = na->tx_rings + n;
+ // XXX check whether we should use hwcur or rcur
new_hwofs = kring->nr_hwcur - new_cur;
} else {
if (n >= na->num_rx_rings)
return NULL;
kring = na->rx_rings + n;
- new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
+ new_hwofs = kring->nr_hwtail - new_cur;
}
lim = kring->nkr_num_slots - 1;
if (new_hwofs > lim)
new_hwofs -= lim + 1;
/* Always set the new offset value and realign the ring. */
- D("%s hwofs %d -> %d, hwavail %d -> %d",
- tx == NR_TX ? "TX" : "RX",
+ if (netmap_verbose)
+ D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
+ NM_IFPNAME(na->ifp),
+ tx == NR_TX ? "TX" : "RX", n,
kring->nkr_hwofs, new_hwofs,
- kring->nr_hwavail,
- tx == NR_TX ? lim : kring->nr_hwavail);
+ kring->nr_hwtail,
+ tx == NR_TX ? lim : kring->nr_hwtail);
kring->nkr_hwofs = new_hwofs;
- if (tx == NR_TX)
- kring->nr_hwavail = lim;
- kring->nr_hwreserved = 0;
+ if (tx == NR_TX) {
+ kring->nr_hwtail = kring->nr_hwcur + lim;
+ if (kring->nr_hwtail > lim)
+ kring->nr_hwtail -= lim + 1;
+ }
#if 0 // def linux
/* XXX check that the mappings are correct */
@@ -2351,6 +2413,7 @@ netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
}
}
+
/*
* Default functions to handle rx/tx interrupts from a physical device.
* "work_done" is non-null on the RX path, NULL for the TX path.
@@ -2397,6 +2460,7 @@ netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
static struct cdev *netmap_dev; /* /dev/netmap character device. */
extern struct cdevsw netmap_cdevsw;
+
void
netmap_fini(void)
{
@@ -2408,6 +2472,7 @@ netmap_fini(void)
printf("netmap: unloaded module.\n");
}
+
int
netmap_init(void)
{
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
index c2814146d2ef..6716168526dc 100644
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -86,21 +86,31 @@ netmap_catch_rx(struct netmap_adapter *na, int intercept)
return 0;
}
+
/*
* Intercept the packet steering routine in the tx path,
* so that we can decide which queue is used for an mbuf.
* Second argument is non-zero to intercept, 0 to restore.
*
+ * actually we also need to redirect the if_transmit ?
+ *
* XXX see if FreeBSD has such a mechanism
*/
void
-netmap_catch_packet_steering(struct netmap_generic_adapter *na, int enable)
+netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
{
+ struct netmap_adapter *na = &gna->up.up;
+ struct ifnet *ifp = na->ifp;
+
if (enable) {
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_transmit;
} else {
+ ifp->if_transmit = na->if_transmit;
}
}
+
/* Transmit routine used by generic_netmap_txsync(). Returns 0 on success
* and non-zero on error (which may be packet drops or other errors).
* addr and len identify the netmap buffer, m is the (preallocated)
@@ -126,16 +136,16 @@ generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
// copy data to the mbuf
m_copyback(m, 0, len, addr);
-
// inc refcount. We are alone, so we can skip the atomic
atomic_fetchadd_int(m->m_ext.ref_cnt, 1);
m->m_flags |= M_FLOWID;
m->m_pkthdr.flowid = ring_nr;
m->m_pkthdr.rcvif = ifp; /* used for tx notification */
- ret = ifp->if_transmit(ifp, m);
+ ret = NA(ifp)->if_transmit(ifp, m);
return ret;
}
+
/*
* The following two functions are empty until we have a generic
* way to extract the info from the ifp
@@ -147,6 +157,7 @@ generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
return 0;
}
+
void
generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
{
@@ -155,6 +166,7 @@ generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
*rxq = 1;
}
+
void netmap_mitigation_init(struct netmap_generic_adapter *na)
{
ND("called");
@@ -167,22 +179,26 @@ void netmap_mitigation_start(struct netmap_generic_adapter *na)
ND("called");
}
+
void netmap_mitigation_restart(struct netmap_generic_adapter *na)
{
ND("called");
}
+
int netmap_mitigation_active(struct netmap_generic_adapter *na)
{
ND("called");
return 0;
}
+
void netmap_mitigation_cleanup(struct netmap_generic_adapter *na)
{
ND("called");
}
+
/*
* In order to track whether pages are still mapped, we hook into
* the standard cdev_pager and intercept the constructor and
@@ -194,6 +210,7 @@ struct netmap_vm_handle_t {
struct netmap_priv_d *priv;
};
+
static int
netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
vm_ooffset_t foff, struct ucred *cred, u_short *color)
@@ -218,6 +235,7 @@ netmap_dev_pager_dtor(void *handle)
dev_rel(dev);
}
+
static int
netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
int prot, vm_page_t *mres)
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
index 2c42db3f8862..109a734cac9f 100644
--- a/sys/dev/netmap/netmap_generic.c
+++ b/sys/dev/netmap/netmap_generic.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -82,7 +82,7 @@ __FBSDID("$FreeBSD$");
#include <dev/netmap/netmap_mem2.h>
#define rtnl_lock() D("rtnl_lock called");
-#define rtnl_unlock() D("rtnl_lock called");
+#define rtnl_unlock() D("rtnl_unlock called");
#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid)
#define smp_mb()
@@ -101,9 +101,9 @@ __FBSDID("$FreeBSD$");
* (or reinstall the buffer ?)
*/
#define SET_MBUF_DESTRUCTOR(m, fn) do { \
- (m)->m_ext.ext_free = (void *)fn; \
- (m)->m_ext.ext_type = EXT_EXTREF; \
- } while (0)
+ (m)->m_ext.ext_free = (void *)fn; \
+ (m)->m_ext.ext_type = EXT_EXTREF; \
+} while (0)
#define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
@@ -137,43 +137,43 @@ __FBSDID("$FreeBSD$");
#ifdef RATE
#define IFRATE(x) x
struct rate_stats {
- unsigned long txpkt;
- unsigned long txsync;
- unsigned long txirq;
- unsigned long rxpkt;
- unsigned long rxirq;
- unsigned long rxsync;
+ unsigned long txpkt;
+ unsigned long txsync;
+ unsigned long txirq;
+ unsigned long rxpkt;
+ unsigned long rxirq;
+ unsigned long rxsync;
};
struct rate_context {
- unsigned refcount;
- struct timer_list timer;
- struct rate_stats new;
- struct rate_stats old;
+ unsigned refcount;
+ struct timer_list timer;
+ struct rate_stats new;
+ struct rate_stats old;
};
#define RATE_PRINTK(_NAME_) \
- printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
+ printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
#define RATE_PERIOD 2
static void rate_callback(unsigned long arg)
{
- struct rate_context * ctx = (struct rate_context *)arg;
- struct rate_stats cur = ctx->new;
- int r;
-
- RATE_PRINTK(txpkt);
- RATE_PRINTK(txsync);
- RATE_PRINTK(txirq);
- RATE_PRINTK(rxpkt);
- RATE_PRINTK(rxsync);
- RATE_PRINTK(rxirq);
- printk("\n");
-
- ctx->old = cur;
- r = mod_timer(&ctx->timer, jiffies +
- msecs_to_jiffies(RATE_PERIOD * 1000));
- if (unlikely(r))
- D("[v1000] Error: mod_timer()");
+ struct rate_context * ctx = (struct rate_context *)arg;
+ struct rate_stats cur = ctx->new;
+ int r;
+
+ RATE_PRINTK(txpkt);
+ RATE_PRINTK(txsync);
+ RATE_PRINTK(txirq);
+ RATE_PRINTK(rxpkt);
+ RATE_PRINTK(rxsync);
+ RATE_PRINTK(rxirq);
+ printk("\n");
+
+ ctx->old = cur;
+ r = mod_timer(&ctx->timer, jiffies +
+ msecs_to_jiffies(RATE_PERIOD * 1000));
+ if (unlikely(r))
+ D("[v1000] Error: mod_timer()");
}
static struct rate_context rate_ctx;
@@ -197,150 +197,150 @@ netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
return;
- netmap_common_irq(ifp, q, work_done);
+ netmap_common_irq(ifp, q, work_done);
}
/* Enable/disable netmap mode for a generic network interface. */
-int generic_netmap_register(struct netmap_adapter *na, int enable)
+static int
+generic_netmap_register(struct netmap_adapter *na, int enable)
{
- struct ifnet *ifp = na->ifp;
- struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
- struct mbuf *m;
- int error;
- int i, r;
+ struct ifnet *ifp = na->ifp;
+ struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+ struct mbuf *m;
+ int error;
+ int i, r;
- if (!na)
- return EINVAL;
+ if (!na)
+ return EINVAL;
#ifdef REG_RESET
- error = ifp->netdev_ops->ndo_stop(ifp);
- if (error) {
- return error;
- }
+ error = ifp->netdev_ops->ndo_stop(ifp);
+ if (error) {
+ return error;
+ }
#endif /* REG_RESET */
- if (enable) { /* Enable netmap mode. */
- /* Initialize the rx queue, as generic_rx_handler() can
- * be called as soon as netmap_catch_rx() returns.
- */
- for (r=0; r<na->num_rx_rings; r++) {
- mbq_safe_init(&na->rx_rings[r].rx_queue);
- na->rx_rings[r].nr_ntc = 0;
- }
-
- /* Init the mitigation timer. */
- netmap_mitigation_init(gna);
-
- /*
- * Preallocate packet buffers for the tx rings.
- */
- for (r=0; r<na->num_tx_rings; r++) {
- na->tx_rings[r].nr_ntc = 0;
- na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
- M_DEVBUF, M_NOWAIT | M_ZERO);
- if (!na->tx_rings[r].tx_pool) {
- D("tx_pool allocation failed");
- error = ENOMEM;
- goto free_tx_pool;
- }
- for (i=0; i<na->num_tx_desc; i++) {
- m = netmap_get_mbuf(GENERIC_BUF_SIZE);
- if (!m) {
- D("tx_pool[%d] allocation failed", i);
- error = ENOMEM;
- goto free_mbufs;
- }
- na->tx_rings[r].tx_pool[i] = m;
- }
- }
- rtnl_lock();
- /* Prepare to intercept incoming traffic. */
- error = netmap_catch_rx(na, 1);
- if (error) {
- D("netdev_rx_handler_register() failed");
- goto register_handler;
- }
- ifp->if_capenable |= IFCAP_NETMAP;
-
- /* Make netmap control the packet steering. */
- netmap_catch_packet_steering(gna, 1);
-
- rtnl_unlock();
+ if (enable) { /* Enable netmap mode. */
+ /* Initialize the rx queue, as generic_rx_handler() can
+ * be called as soon as netmap_catch_rx() returns.
+ */
+ for (r=0; r<na->num_rx_rings; r++) {
+ mbq_safe_init(&na->rx_rings[r].rx_queue);
+ }
+
+ /* Init the mitigation timer. */
+ netmap_mitigation_init(gna);
+
+ /*
+ * Preallocate packet buffers for the tx rings.
+ */
+ for (r=0; r<na->num_tx_rings; r++)
+ na->tx_rings[r].tx_pool = NULL;
+ for (r=0; r<na->num_tx_rings; r++) {
+ na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!na->tx_rings[r].tx_pool) {
+ D("tx_pool allocation failed");
+ error = ENOMEM;
+ goto free_tx_pools;
+ }
+ for (i=0; i<na->num_tx_desc; i++)
+ na->tx_rings[r].tx_pool[i] = NULL;
+ for (i=0; i<na->num_tx_desc; i++) {
+ m = netmap_get_mbuf(GENERIC_BUF_SIZE);
+ if (!m) {
+ D("tx_pool[%d] allocation failed", i);
+ error = ENOMEM;
+ goto free_tx_pools;
+ }
+ na->tx_rings[r].tx_pool[i] = m;
+ }
+ }
+ rtnl_lock();
+ /* Prepare to intercept incoming traffic. */
+ error = netmap_catch_rx(na, 1);
+ if (error) {
+ D("netdev_rx_handler_register() failed");
+ goto register_handler;
+ }
+ ifp->if_capenable |= IFCAP_NETMAP;
+
+ /* Make netmap control the packet steering. */
+ netmap_catch_tx(gna, 1);
+
+ rtnl_unlock();
#ifdef RATE
- if (rate_ctx.refcount == 0) {
- D("setup_timer()");
- memset(&rate_ctx, 0, sizeof(rate_ctx));
- setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
- if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
- D("Error: mod_timer()");
- }
- }
- rate_ctx.refcount++;
+ if (rate_ctx.refcount == 0) {
+ D("setup_timer()");
+ memset(&rate_ctx, 0, sizeof(rate_ctx));
+ setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
+ if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
+ D("Error: mod_timer()");
+ }
+ }
+ rate_ctx.refcount++;
#endif /* RATE */
- } else { /* Disable netmap mode. */
- rtnl_lock();
+ } else { /* Disable netmap mode. */
+ rtnl_lock();
- ifp->if_capenable &= ~IFCAP_NETMAP;
+ ifp->if_capenable &= ~IFCAP_NETMAP;
- /* Release packet steering control. */
- netmap_catch_packet_steering(gna, 0);
+ /* Release packet steering control. */
+ netmap_catch_tx(gna, 0);
- /* Do not intercept packets on the rx path. */
- netmap_catch_rx(na, 0);
+ /* Do not intercept packets on the rx path. */
+ netmap_catch_rx(na, 0);
- rtnl_unlock();
+ rtnl_unlock();
- /* Free the mbufs going to the netmap rings */
- for (r=0; r<na->num_rx_rings; r++) {
- mbq_safe_purge(&na->rx_rings[r].rx_queue);
- mbq_safe_destroy(&na->rx_rings[r].rx_queue);
- }
+ /* Free the mbufs going to the netmap rings */
+ for (r=0; r<na->num_rx_rings; r++) {
+ mbq_safe_purge(&na->rx_rings[r].rx_queue);
+ mbq_safe_destroy(&na->rx_rings[r].rx_queue);
+ }
- netmap_mitigation_cleanup(gna);
+ netmap_mitigation_cleanup(gna);
- for (r=0; r<na->num_tx_rings; r++) {
- for (i=0; i<na->num_tx_desc; i++) {
- m_freem(na->tx_rings[r].tx_pool[i]);
- }
- free(na->tx_rings[r].tx_pool, M_DEVBUF);
- }
+ for (r=0; r<na->num_tx_rings; r++) {
+ for (i=0; i<na->num_tx_desc; i++) {
+ m_freem(na->tx_rings[r].tx_pool[i]);
+ }
+ free(na->tx_rings[r].tx_pool, M_DEVBUF);
+ }
#ifdef RATE
- if (--rate_ctx.refcount == 0) {
- D("del_timer()");
- del_timer(&rate_ctx.timer);
- }
+ if (--rate_ctx.refcount == 0) {
+ D("del_timer()");
+ del_timer(&rate_ctx.timer);
+ }
#endif
- }
+ }
#ifdef REG_RESET
- error = ifp->netdev_ops->ndo_open(ifp);
- if (error) {
- goto alloc_tx_pool;
- }
+ error = ifp->netdev_ops->ndo_open(ifp);
+ if (error) {
+ goto alloc_tx_pool;
+ }
#endif
- return 0;
+ return 0;
register_handler:
- rtnl_unlock();
-free_tx_pool:
- r--;
- i = na->num_tx_desc; /* Useless, but just to stay safe. */
-free_mbufs:
- i--;
- for (; r>=0; r--) {
- for (; i>=0; i--) {
- m_freem(na->tx_rings[r].tx_pool[i]);
- }
- free(na->tx_rings[r].tx_pool, M_DEVBUF);
- i = na->num_tx_desc - 1;
- }
-
- return error;
+ rtnl_unlock();
+free_tx_pools:
+ for (r=0; r<na->num_tx_rings; r++) {
+ if (na->tx_rings[r].tx_pool == NULL)
+ continue;
+ for (i=0; i<na->num_tx_desc; i++)
+ if (na->tx_rings[r].tx_pool[i])
+ m_freem(na->tx_rings[r].tx_pool[i]);
+ free(na->tx_rings[r].tx_pool, M_DEVBUF);
+ }
+
+ return error;
}
/*
@@ -351,93 +351,88 @@ free_mbufs:
static void
generic_mbuf_destructor(struct mbuf *m)
{
- if (netmap_verbose)
- D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
- netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
+ if (netmap_verbose)
+ D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
+ netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
#ifdef __FreeBSD__
- m->m_ext.ext_type = EXT_PACKET;
- m->m_ext.ext_free = NULL;
- if (*(m->m_ext.ref_cnt) == 0)
- *(m->m_ext.ref_cnt) = 1;
- uma_zfree(zone_pack, m);
+ m->m_ext.ext_type = EXT_PACKET;
+ m->m_ext.ext_free = NULL;
+ if (*(m->m_ext.ref_cnt) == 0)
+ *(m->m_ext.ref_cnt) = 1;
+ uma_zfree(zone_pack, m);
#endif /* __FreeBSD__ */
- IFRATE(rate_ctx.new.txirq++);
+ IFRATE(rate_ctx.new.txirq++);
}
-/* Record completed transmissions and update hwavail.
+/* Record completed transmissions and update hwtail.
*
- * nr_ntc is the oldest tx buffer not yet completed
- * (same as nr_hwavail + nr_hwcur + 1),
+ * The oldest tx buffer not yet completed is at nr_hwtail + 1,
* nr_hwcur is the first unsent buffer.
- * When cleaning, we try to recover buffers between nr_ntc and nr_hwcur.
*/
-static int
+static u_int
generic_netmap_tx_clean(struct netmap_kring *kring)
{
- u_int num_slots = kring->nkr_num_slots;
- u_int ntc = kring->nr_ntc;
- u_int hwcur = kring->nr_hwcur;
- u_int n = 0;
- struct mbuf **tx_pool = kring->tx_pool;
-
- while (ntc != hwcur) { /* buffers not completed */
- struct mbuf *m = tx_pool[ntc];
-
- if (unlikely(m == NULL)) {
- /* try to replenish the entry */
- tx_pool[ntc] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
- if (unlikely(m == NULL)) {
- D("mbuf allocation failed, XXX error");
- // XXX how do we proceed ? break ?
- return -ENOMEM;
- }
- } else if (GET_MBUF_REFCNT(m) != 1) {
- break; /* This mbuf is still busy: its refcnt is 2. */
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int nm_i = nm_next(kring->nr_hwtail, lim);
+ u_int hwcur = kring->nr_hwcur;
+ u_int n = 0;
+ struct mbuf **tx_pool = kring->tx_pool;
+
+ while (nm_i != hwcur) { /* buffers not completed */
+ struct mbuf *m = tx_pool[nm_i];
+
+ if (unlikely(m == NULL)) {
+ /* this is done, try to replenish the entry */
+ tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
+ if (unlikely(m == NULL)) {
+ D("mbuf allocation failed, XXX error");
+ // XXX how do we proceed ? break ?
+ return -ENOMEM;
+ }
+ } else if (GET_MBUF_REFCNT(m) != 1) {
+ break; /* This mbuf is still busy: its refcnt is 2. */
+ }
+ n++;
+ nm_i = nm_next(nm_i, lim);
}
- if (unlikely(++ntc == num_slots)) {
- ntc = 0;
- }
- n++;
- }
- kring->nr_ntc = ntc;
- kring->nr_hwavail += n;
- ND("tx completed [%d] -> hwavail %d", n, kring->nr_hwavail);
-
- return n;
+ kring->nr_hwtail = nm_prev(nm_i, lim);
+ ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
+
+ return n;
}
/*
- * We have pending packets in the driver between nr_ntc and j.
+ * We have pending packets in the driver between nr_hwtail +1 and hwcur.
* Compute a position in the middle, to be used to generate
* a notification.
*/
static inline u_int
generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
{
- u_int n = kring->nkr_num_slots;
- u_int ntc = kring->nr_ntc;
- u_int e;
-
- if (hwcur >= ntc) {
- e = (hwcur + ntc) / 2;
- } else { /* wrap around */
- e = (hwcur + n + ntc) / 2;
- if (e >= n) {
- e -= n;
- }
- }
-
- if (unlikely(e >= n)) {
- D("This cannot happen");
- e = 0;
- }
-
- return e;
+ u_int n = kring->nkr_num_slots;
+ u_int ntc = nm_next(kring->nr_hwtail, n-1);
+ u_int e;
+
+ if (hwcur >= ntc) {
+ e = (hwcur + ntc) / 2;
+ } else { /* wrap around */
+ e = (hwcur + n + ntc) / 2;
+ if (e >= n) {
+ e -= n;
+ }
+ }
+
+ if (unlikely(e >= n)) {
+ D("This cannot happen");
+ e = 0;
+ }
+
+ return e;
}
/*
- * We have pending packets in the driver between nr_ntc and hwcur.
+ * We have pending packets in the driver between nr_hwtail+1 and hwcur.
* Schedule a notification approximately in the middle of the two.
* There is a race but this is only called within txsync which does
* a double check.
@@ -445,28 +440,28 @@ generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
static void
generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
{
- struct mbuf *m;
- u_int e;
-
- if (kring->nr_ntc == hwcur) {
- return;
- }
- e = generic_tx_event_middle(kring, hwcur);
-
- m = kring->tx_pool[e];
- if (m == NULL) {
- /* This can happen if there is already an event on the netmap
- slot 'e': There is nothing to do. */
- return;
- }
- ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
- kring->tx_pool[e] = NULL;
- SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
-
- // XXX wmb() ?
- /* Decrement the refcount an free it if we have the last one. */
- m_freem(m);
- smp_mb();
+ struct mbuf *m;
+ u_int e;
+
+ if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) {
+ return; /* all buffers are free */
+ }
+ e = generic_tx_event_middle(kring, hwcur);
+
+ m = kring->tx_pool[e];
+ if (m == NULL) {
+ /* This can happen if there is already an event on the netmap
+ slot 'e': There is nothing to do. */
+ return;
+ }
+ ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
+ kring->tx_pool[e] = NULL;
+ SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
+
+ // XXX wmb() ?
+ /* Decrement the refcount an free it if we have the last one. */
+ m_freem(m);
+ smp_mb();
}
@@ -480,133 +475,108 @@ generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
static int
generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct ifnet *ifp = na->ifp;
- struct netmap_kring *kring = &na->tx_rings[ring_nr];
- struct netmap_ring *ring = kring->ring;
- u_int j, k, num_slots = kring->nkr_num_slots;
- int new_slots, ntx;
-
- IFRATE(rate_ctx.new.txsync++);
-
- // TODO: handle the case of mbuf allocation failure
- /* first, reclaim completed buffers */
- generic_netmap_tx_clean(kring);
-
- /* Take a copy of ring->cur now, and never read it again. */
- k = ring->cur;
- if (unlikely(k >= num_slots)) {
- return netmap_ring_reinit(kring);
- }
-
- rmb();
- j = kring->nr_hwcur;
- /*
- * 'new_slots' counts how many new slots have been added:
- * everything from hwcur to cur, excluding reserved ones, if any.
- * nr_hwreserved start from hwcur and counts how many slots were
- * not sent to the NIC from the previous round.
- */
- new_slots = k - j - kring->nr_hwreserved;
- if (new_slots < 0) {
- new_slots += num_slots;
- }
- ntx = 0;
- if (j != k) {
- /* Process new packets to send:
- * j is the current index in the netmap ring.
+ struct ifnet *ifp = na->ifp;
+ struct netmap_kring *kring = &na->tx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ u_int nm_i; /* index into the netmap ring */ // j
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const head = kring->rhead;
+
+ IFRATE(rate_ctx.new.txsync++);
+
+ // TODO: handle the case of mbuf allocation failure
+
+ rmb();
+
+ /*
+ * First part: process new packets to send.
*/
- while (j != k) {
- struct netmap_slot *slot = &ring->slot[j]; /* Current slot in the netmap ring */
- void *addr = NMB(slot);
- u_int len = slot->len;
- struct mbuf *m;
- int tx_ret;
-
- if (unlikely(addr == netmap_buffer_base || len > NETMAP_BUF_SIZE)) {
- return netmap_ring_reinit(kring);
- }
- /* Tale a mbuf from the tx pool and copy in the user packet. */
- m = kring->tx_pool[j];
- if (unlikely(!m)) {
- RD(5, "This should never happen");
- kring->tx_pool[j] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
- if (unlikely(m == NULL)) {
- D("mbuf allocation failed");
- break;
- }
- }
- /* XXX we should ask notifications when NS_REPORT is set,
- * or roughly every half frame. We can optimize this
- * by lazily requesting notifications only when a
- * transmission fails. Probably the best way is to
- * break on failures and set notifications when
- * ring->avail == 0 || j != k
- */
- tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
- if (unlikely(tx_ret)) {
- RD(5, "start_xmit failed: err %d [%u,%u,%u,%u]",
- tx_ret, kring->nr_ntc, j, k, kring->nr_hwavail);
- /*
- * No room for this mbuf in the device driver.
- * Request a notification FOR A PREVIOUS MBUF,
- * then call generic_netmap_tx_clean(kring) to do the
- * double check and see if we can free more buffers.
- * If there is space continue, else break;
- * NOTE: the double check is necessary if the problem
- * occurs in the txsync call after selrecord().
- * Also, we need some way to tell the caller that not
- * all buffers were queued onto the device (this was
- * not a problem with native netmap driver where space
- * is preallocated). The bridge has a similar problem
- * and we solve it there by dropping the excess packets.
- */
- generic_set_tx_event(kring, j);
- if (generic_netmap_tx_clean(kring)) { /* space now available */
- continue;
- } else {
- break;
- }
- }
- slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
- if (unlikely(++j == num_slots))
- j = 0;
- ntx++;
- }
-
- /* Update hwcur to the next slot to transmit. */
- kring->nr_hwcur = j;
-
- /*
- * Report all new slots as unavailable, even those not sent.
- * We account for them with with hwreserved, so that
- * nr_hwreserved =:= cur - nr_hwcur
+ nm_i = kring->nr_hwcur;
+ if (nm_i != head) { /* we have new packets to send */
+ while (nm_i != head) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+ u_int len = slot->len;
+ void *addr = NMB(slot);
+
+ /* device-specific */
+ struct mbuf *m;
+ int tx_ret;
+
+ NM_CHECK_ADDR_LEN(addr, len);
+
+ /* Tale a mbuf from the tx pool and copy in the user packet. */
+ m = kring->tx_pool[nm_i];
+ if (unlikely(!m)) {
+ RD(5, "This should never happen");
+ kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
+ if (unlikely(m == NULL)) {
+ D("mbuf allocation failed");
+ break;
+ }
+ }
+ /* XXX we should ask notifications when NS_REPORT is set,
+ * or roughly every half frame. We can optimize this
+ * by lazily requesting notifications only when a
+ * transmission fails. Probably the best way is to
+ * break on failures and set notifications when
+ * ring->cur == ring->tail || nm_i != cur
+ */
+ tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
+ if (unlikely(tx_ret)) {
+ RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
+ tx_ret, nm_i, head, kring->nr_hwtail);
+ /*
+ * No room for this mbuf in the device driver.
+ * Request a notification FOR A PREVIOUS MBUF,
+ * then call generic_netmap_tx_clean(kring) to do the
+ * double check and see if we can free more buffers.
+ * If there is space continue, else break;
+ * NOTE: the double check is necessary if the problem
+ * occurs in the txsync call after selrecord().
+ * Also, we need some way to tell the caller that not
+ * all buffers were queued onto the device (this was
+ * not a problem with native netmap driver where space
+ * is preallocated). The bridge has a similar problem
+ * and we solve it there by dropping the excess packets.
+ */
+ generic_set_tx_event(kring, nm_i);
+ if (generic_netmap_tx_clean(kring)) { /* space now available */
+ continue;
+ } else {
+ break;
+ }
+ }
+ slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+ nm_i = nm_next(nm_i, lim);
+ }
+
+ /* Update hwcur to the next slot to transmit. */
+ kring->nr_hwcur = nm_i; /* not head, we could break early */
+
+ IFRATE(rate_ctx.new.txpkt += ntx);
+ }
+
+ /*
+ * Second, reclaim completed buffers
*/
- kring->nr_hwavail -= new_slots;
- kring->nr_hwreserved = k - j;
- if (kring->nr_hwreserved < 0) {
- kring->nr_hwreserved += num_slots;
- }
-
- IFRATE(rate_ctx.new.txpkt += ntx);
-
- if (!kring->nr_hwavail) {
- /* No more available slots? Set a notification event
- * on a netmap slot that will be cleaned in the future.
- * No doublecheck is performed, since txsync() will be
- * called twice by netmap_poll().
- */
- generic_set_tx_event(kring, j);
- }
- ND("tx #%d, hwavail = %d", n, kring->nr_hwavail);
- }
-
- /* Synchronize the user's view to the kernel view. */
- ring->avail = kring->nr_hwavail;
- ring->reserved = kring->nr_hwreserved;
-
- return 0;
+ if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
+ /* No more available slots? Set a notification event
+ * on a netmap slot that will be cleaned in the future.
+ * No doublecheck is performed, since txsync() will be
+ * called twice by netmap_poll().
+ */
+ generic_set_tx_event(kring, nm_i);
+ }
+ ND("tx #%d, hwtail = %d", n, kring->nr_hwtail);
+
+ generic_netmap_tx_clean(kring);
+
+ nm_txsync_finalize(kring);
+
+ return 0;
}
+
/*
* This handler is registered (through netmap_catch_rx())
* within the attached network interface
@@ -615,38 +585,38 @@ generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* Stolen packets are put in a queue where the
* generic_netmap_rxsync() callback can extract them.
*/
-void generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
+void
+generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
{
- struct netmap_adapter *na = NA(ifp);
- struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
- u_int work_done;
- u_int rr = 0; // receive ring number
-
- ND("called");
- /* limit the size of the queue */
- if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
- m_freem(m);
- } else {
- mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
- }
-
- if (netmap_generic_mit < 32768) {
- /* no rx mitigation, pass notification up */
- netmap_generic_irq(na->ifp, rr, &work_done);
- IFRATE(rate_ctx.new.rxirq++);
- } else {
- /* same as send combining, filter notification if there is a
- * pending timer, otherwise pass it up and start a timer.
- */
- if (likely(netmap_mitigation_active(gna))) {
- /* Record that there is some pending work. */
- gna->mit_pending = 1;
- } else {
- netmap_generic_irq(na->ifp, rr, &work_done);
- IFRATE(rate_ctx.new.rxirq++);
- netmap_mitigation_start(gna);
- }
- }
+ struct netmap_adapter *na = NA(ifp);
+ struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+ u_int work_done;
+ u_int rr = 0; // receive ring number
+
+ /* limit the size of the queue */
+ if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
+ m_freem(m);
+ } else {
+ mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
+ }
+
+ if (netmap_generic_mit < 32768) {
+ /* no rx mitigation, pass notification up */
+ netmap_generic_irq(na->ifp, rr, &work_done);
+ IFRATE(rate_ctx.new.rxirq++);
+ } else {
+ /* same as send combining, filter notification if there is a
+ * pending timer, otherwise pass it up and start a timer.
+ */
+ if (likely(netmap_mitigation_active(gna))) {
+ /* Record that there is some pending work. */
+ gna->mit_pending = 1;
+ } else {
+ netmap_generic_irq(na->ifp, rr, &work_done);
+ IFRATE(rate_ctx.new.rxirq++);
+ netmap_mitigation_start(gna);
+ }
+ }
}
/*
@@ -658,105 +628,99 @@ void generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
static int
generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct netmap_kring *kring = &na->rx_rings[ring_nr];
- struct netmap_ring *ring = kring->ring;
- u_int j, n, lim = kring->nkr_num_slots - 1;
- int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
- u_int k, resvd = ring->reserved;
-
- if (ring->cur > lim)
- return netmap_ring_reinit(kring);
-
- /* Import newly received packets into the netmap ring. */
- if (netmap_no_pendintr || force_update) {
- uint16_t slot_flags = kring->nkr_slot_flags;
- struct mbuf *m;
-
- n = 0;
- j = kring->nr_ntc; /* first empty slot in the receive ring */
- /* extract buffers from the rx queue, stop at most one
- * slot before nr_hwcur (index k)
+ struct netmap_kring *kring = &na->rx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ u_int nm_i; /* index into the netmap ring */ //j,
+ u_int n;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const head = nm_rxsync_prologue(kring);
+ int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
+
+ if (head > lim)
+ return netmap_ring_reinit(kring);
+
+ /*
+ * First part: import newly received packets.
+ */
+ if (netmap_no_pendintr || force_update) {
+ /* extract buffers from the rx queue, stop at most one
+ * slot before nr_hwcur (stop_i)
+ */
+ uint16_t slot_flags = kring->nkr_slot_flags;
+ u_int stop_i = nm_prev(kring->nr_hwcur, lim);
+
+ nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */
+ for (n = 0; nm_i != stop_i; n++) {
+ int len;
+ void *addr = NMB(&ring->slot[nm_i]);
+ struct mbuf *m;
+
+ /* we only check the address here on generic rx rings */
+ if (addr == netmap_buffer_base) { /* Bad buffer */
+ return netmap_ring_reinit(kring);
+ }
+ /*
+ * Call the locked version of the function.
+ * XXX Ideally we could grab a batch of mbufs at once
+ * and save some locking overhead.
+ */
+ m = mbq_safe_dequeue(&kring->rx_queue);
+ if (!m) /* no more data */
+ break;
+ len = MBUF_LEN(m);
+ m_copydata(m, 0, len, addr);
+ ring->slot[nm_i].len = len;
+ ring->slot[nm_i].flags = slot_flags;
+ m_freem(m);
+ nm_i = nm_next(nm_i, lim);
+ n++;
+ }
+ if (n) {
+ kring->nr_hwtail = nm_i;
+ IFRATE(rate_ctx.new.rxpkt += n);
+ }
+ kring->nr_kflags &= ~NKR_PENDINTR;
+ }
+
+ // XXX should we invert the order ?
+ /*
+ * Second part: skip past packets that userspace has released.
*/
- k = (kring->nr_hwcur) ? kring->nr_hwcur-1 : lim;
- while (j != k) {
- int len;
- void *addr = NMB(&ring->slot[j]);
-
- if (addr == netmap_buffer_base) { /* Bad buffer */
- return netmap_ring_reinit(kring);
- }
- /*
- * Call the locked version of the function.
- * XXX Ideally we could grab a batch of mbufs at once,
- * by changing rx_queue into a ring.
- */
- m = mbq_safe_dequeue(&kring->rx_queue);
- if (!m)
- break;
- len = MBUF_LEN(m);
- m_copydata(m, 0, len, addr);
- ring->slot[j].len = len;
- ring->slot[j].flags = slot_flags;
- m_freem(m);
- if (unlikely(j++ == lim))
- j = 0;
- n++;
- }
- if (n) {
- kring->nr_ntc = j;
- kring->nr_hwavail += n;
- IFRATE(rate_ctx.new.rxpkt += n);
- }
- kring->nr_kflags &= ~NKR_PENDINTR;
- }
-
- // XXX should we invert the order ?
- /* Skip past packets that userspace has released */
- j = kring->nr_hwcur;
- k = ring->cur;
- if (resvd > 0) {
- if (resvd + ring->avail >= lim + 1) {
- D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
- ring->reserved = resvd = 0; // XXX panic...
- }
- k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
- }
- if (j != k) {
- /* Userspace has released some packets. */
- for (n = 0; j != k; n++) {
- struct netmap_slot *slot = &ring->slot[j];
-
- slot->flags &= ~NS_BUF_CHANGED;
- if (unlikely(j++ == lim))
- j = 0;
- }
- kring->nr_hwavail -= n;
- kring->nr_hwcur = k;
- }
- /* Tell userspace that there are new packets. */
- ring->avail = kring->nr_hwavail - resvd;
- IFRATE(rate_ctx.new.rxsync++);
-
- return 0;
+ nm_i = kring->nr_hwcur;
+ if (nm_i != head) {
+ /* Userspace has released some packets. */
+ for (n = 0; nm_i != head; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+
+ slot->flags &= ~NS_BUF_CHANGED;
+ nm_i = nm_next(nm_i, lim);
+ }
+ kring->nr_hwcur = head;
+ }
+ /* tell userspace that there might be new packets. */
+ nm_rxsync_finalize(kring);
+ IFRATE(rate_ctx.new.rxsync++);
+
+ return 0;
}
static void
generic_netmap_dtor(struct netmap_adapter *na)
{
- struct ifnet *ifp = na->ifp;
- struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
- struct netmap_adapter *prev_na = gna->prev;
-
- if (prev_na != NULL) {
- D("Released generic NA %p", gna);
- if_rele(na->ifp);
- netmap_adapter_put(prev_na);
- }
- if (ifp != NULL) {
- WNA(ifp) = prev_na;
- D("Restored native NA %p", prev_na);
- na->ifp = NULL;
- }
+ struct ifnet *ifp = na->ifp;
+ struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
+ struct netmap_adapter *prev_na = gna->prev;
+
+ if (prev_na != NULL) {
+ D("Released generic NA %p", gna);
+ if_rele(na->ifp);
+ netmap_adapter_put(prev_na);
+ }
+ if (ifp != NULL) {
+ WNA(ifp) = prev_na;
+ D("Restored native NA %p", prev_na);
+ na->ifp = NULL;
+ }
}
/*
@@ -773,46 +737,46 @@ generic_netmap_dtor(struct netmap_adapter *na)
int
generic_netmap_attach(struct ifnet *ifp)
{
- struct netmap_adapter *na;
- struct netmap_generic_adapter *gna;
- int retval;
- u_int num_tx_desc, num_rx_desc;
-
- num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
-
- generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
- ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
-
- gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
- if (gna == NULL) {
- D("no memory on attach, give up");
- return ENOMEM;
- }
- na = (struct netmap_adapter *)gna;
- na->ifp = ifp;
- na->num_tx_desc = num_tx_desc;
- na->num_rx_desc = num_rx_desc;
- na->nm_register = &generic_netmap_register;
- na->nm_txsync = &generic_netmap_txsync;
- na->nm_rxsync = &generic_netmap_rxsync;
- na->nm_dtor = &generic_netmap_dtor;
- /* when using generic, IFCAP_NETMAP is set so we force
- * NAF_SKIP_INTR to use the regular interrupt handler
- */
- na->na_flags = NAF_SKIP_INTR;
-
- ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
- ifp->num_tx_queues, ifp->real_num_tx_queues,
- ifp->tx_queue_len);
- ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
- ifp->num_rx_queues, ifp->real_num_rx_queues);
-
- generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
-
- retval = netmap_attach_common(na);
- if (retval) {
- free(gna, M_DEVBUF);
- }
-
- return retval;
+ struct netmap_adapter *na;
+ struct netmap_generic_adapter *gna;
+ int retval;
+ u_int num_tx_desc, num_rx_desc;
+
+ num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
+
+ generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
+ ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
+
+ gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (gna == NULL) {
+ D("no memory on attach, give up");
+ return ENOMEM;
+ }
+ na = (struct netmap_adapter *)gna;
+ na->ifp = ifp;
+ na->num_tx_desc = num_tx_desc;
+ na->num_rx_desc = num_rx_desc;
+ na->nm_register = &generic_netmap_register;
+ na->nm_txsync = &generic_netmap_txsync;
+ na->nm_rxsync = &generic_netmap_rxsync;
+ na->nm_dtor = &generic_netmap_dtor;
+ /* when using generic, IFCAP_NETMAP is set so we force
+ * NAF_SKIP_INTR to use the regular interrupt handler
+ */
+ na->na_flags = NAF_SKIP_INTR;
+
+ ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
+ ifp->num_tx_queues, ifp->real_num_tx_queues,
+ ifp->tx_queue_len);
+ ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
+ ifp->num_rx_queues, ifp->real_num_rx_queues);
+
+ generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
+
+ retval = netmap_attach_common(na);
+ if (retval) {
+ free(gna, M_DEVBUF);
+ }
+
+ return retval;
}
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 9381cd4cedd3..74a46297ff3d 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -1,6 +1,6 @@
/*
- * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
- * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -53,7 +53,7 @@
#define NM_SELINFO_T struct selinfo
#define MBUF_LEN(m) ((m)->m_pkthdr.len)
#define MBUF_IFP(m) ((m)->m_pkthdr.rcvif)
-#define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m)
+#define NM_SEND_UP(ifp, m) ((NA(ifp))->if_input)(ifp, m)
#define NM_ATOMIC_T volatile int // XXX ?
/* atomic operations */
@@ -76,7 +76,11 @@ struct hrtimer {
#define NM_SELINFO_T wait_queue_head_t
#define MBUF_LEN(m) ((m)->len)
#define MBUF_IFP(m) ((m)->dev)
-#define NM_SEND_UP(ifp, m) netif_rx(m)
+#define NM_SEND_UP(ifp, m) \
+ do { \
+ m->priority = NM_MAGIC_PRIORITY; \
+ netif_rx(m); \
+ } while (0)
#define NM_ATOMIC_T volatile long unsigned int
@@ -125,9 +129,9 @@ struct hrtimer {
do { \
struct timeval __xxts; \
microtime(&__xxts); \
- printf("%03d.%06d %s [%d] " format "\n", \
+ printf("%03d.%06d [%4d] %-25s " format "\n", \
(int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
- __FUNCTION__, __LINE__, ##__VA_ARGS__); \
+ __LINE__, __FUNCTION__, ##__VA_ARGS__); \
} while (0)
/* rate limited, lps indicates how many per second */
@@ -158,15 +162,23 @@ extern NMG_LOCK_T netmap_global_lock;
* a ring across system calls.
*
* nr_hwcur index of the next buffer to refill.
- * It corresponds to ring->cur - ring->reserved
+ * It corresponds to ring->head
+ * at the time the system call returns.
*
- * nr_hwavail the number of slots "owned" by userspace.
- * nr_hwavail =:= ring->avail + ring->reserved
+ * nr_hwtail index of the first buffer owned by the kernel.
+ * On RX, hwcur->hwtail are receive buffers
+ * not yet released. hwcur is advanced following
+ * ring->head, hwtail is advanced on incoming packets,
+ * and a wakeup is generated when hwtail passes ring->cur
+ * On TX, hwcur->rcur have been filled by the sender
+ * but not sent yet to the NIC; rcur->hwtail are available
+ * for new transmissions, and hwtail->hwcur-1 are pending
+ * transmissions not yet acknowledged.
*
* The indexes in the NIC and netmap rings are offset by nkr_hwofs slots.
* This is so that, on a reset, buffers owned by userspace are not
* modified by the kernel. In particular:
- * RX rings: the next empty buffer (hwcur + hwavail + hwofs) coincides with
+ * RX rings: the next empty buffer (hwtail + hwofs) coincides with
* the next empty buffer as known by the hardware (next_to_check or so).
* TX rings: hwcur + hwofs coincides with next_to_send
*
@@ -184,44 +196,76 @@ extern NMG_LOCK_T netmap_global_lock;
* from nr_hwlease, advances it, then does the
* copy outside the lock.
* In RX rings (used for VALE ports),
- * nkr_hwcur + nkr_hwavail <= nkr_hwlease < nkr_hwcur+N-1
+ * nkr_hwtail <= nkr_hwlease < nkr_hwcur+N-1
* In TX rings (used for NIC or host stack ports)
- * nkr_hwcur <= nkr_hwlease < nkr_hwcur+ nkr_hwavail
+ * nkr_hwcur <= nkr_hwlease < nkr_hwtail
* nkr_leases array of nkr_num_slots where writers can report
* completion of their block. NR_NOSLOT (~0) indicates
* that the writer has not finished yet
* nkr_lease_idx index of next free slot in nr_leases, to be assigned
*
* The kring is manipulated by txsync/rxsync and generic netmap function.
- * q_lock is used to arbitrate access to the kring from within the netmap
- * code, and this and other protections guarantee that there is never
- * more than 1 concurrent call to txsync or rxsync. So we are free
- * to manipulate the kring from within txsync/rxsync without any extra
- * locks.
+ *
+ * Concurrent rxsync or txsync on the same ring are prevented through
+ * by nm_kr_lock() which in turn uses nr_busy. This is all we need
+ * for NIC rings, and for TX rings attached to the host stack.
+ *
+ * RX rings attached to the host stack use an mbq (rx_queue) on both
+ * rxsync_from_host() and netmap_transmit(). The mbq is protected
+ * by its internal lock.
+ *
+ * RX rings attached to the VALE switch are accessed by both sender
+ * and receiver. They are protected through the q_lock on the RX ring.
*/
struct netmap_kring {
- struct netmap_ring *ring;
- uint32_t nr_hwcur;
- uint32_t nr_hwavail;
- uint32_t nr_kflags; /* private driver flags */
- int32_t nr_hwreserved;
-#define NKR_PENDINTR 0x1 // Pending interrupt.
- uint32_t nkr_num_slots;
- int32_t nkr_hwofs; /* offset between NIC and netmap ring */
+ struct netmap_ring *ring;
+
+ uint32_t nr_hwcur;
+ uint32_t nr_hwtail;
+
+ /*
+ * Copies of values in user rings, so we do not need to look
+ * at the ring (which could be modified). These are set in the
+ * *sync_prologue()/finalize() routines.
+ */
+ uint32_t rhead;
+ uint32_t rcur;
+ uint32_t rtail;
+
+ uint32_t nr_kflags; /* private driver flags */
+#define NKR_PENDINTR 0x1 // Pending interrupt.
+ uint32_t nkr_num_slots;
+
+ /*
+ * On a NIC reset, the NIC ring indexes may be reset but the
+ * indexes in the netmap rings remain the same. nkr_hwofs
+ * keeps track of the offset between the two.
+ */
+ int32_t nkr_hwofs;
uint16_t nkr_slot_flags; /* initial value for flags */
+
+ /* last_reclaim is opaque marker to help reduce the frequency
+ * of operations such as reclaiming tx buffers. A possible use
+ * is set it to ticks and do the reclaim only once per tick.
+ */
+ uint64_t last_reclaim;
+
+
+ NM_SELINFO_T si; /* poll/select wait queue */
+ NM_LOCK_T q_lock; /* protects kring and ring. */
+ NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */
+
struct netmap_adapter *na;
- struct nm_bdg_fwd *nkr_ft;
- uint32_t *nkr_leases;
-#define NR_NOSLOT ((uint32_t)~0)
- uint32_t nkr_hwlease;
- uint32_t nkr_lease_idx;
- NM_SELINFO_T si; /* poll/select wait queue */
- NM_LOCK_T q_lock; /* protects kring and ring. */
- NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */
+ /* The folloiwing fields are for VALE switch support */
+ struct nm_bdg_fwd *nkr_ft;
+ uint32_t *nkr_leases;
+#define NR_NOSLOT ((uint32_t)~0) /* used in nkr_*lease* */
+ uint32_t nkr_hwlease;
+ uint32_t nkr_lease_idx;
- volatile int nkr_stopped;
+ volatile int nkr_stopped; // XXX what for ?
/* support for adapters without native netmap support.
* On tx rings we preallocate an array of tx buffers
@@ -230,8 +274,11 @@ struct netmap_kring {
* XXX who writes to the rx queue ?
*/
struct mbuf **tx_pool;
- u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */
- struct mbq rx_queue; /* A queue for intercepted rx mbufs. */
+ // u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */
+ struct mbq rx_queue; /* intercepted rx mbufs. */
+
+ uint32_t ring_id; /* debugging */
+ char name[64]; /* diagnostic */
} __attribute__((__aligned__(64)));
@@ -243,6 +290,15 @@ nm_next(uint32_t i, uint32_t lim)
return unlikely (i == lim) ? 0 : i + 1;
}
+
+/* return the previous index, with wraparound */
+static inline uint32_t
+nm_prev(uint32_t i, uint32_t lim)
+{
+ return unlikely (i == 0) ? lim : i - 1;
+}
+
+
/*
*
* Here is the layout for the Rx and Tx rings.
@@ -253,36 +309,36 @@ nm_next(uint32_t i, uint32_t lim)
| | | |
|XXX free slot XXX| |XXX free slot XXX|
+-----------------+ +-----------------+
- | |<-hwcur | |<-hwcur
- | reserved h | | (ready |
- +----------- w -+ | to be |
- cur->| a | | sent) h |
- | v | +---------- w |
- | a | cur->| (being a |
- | i | | prepared) v |
- | avail l | | a |
- +-----------------+ + a ------ i +
- | | ... | v l |<-hwlease
- | (being | ... | a | ...
- | prepared) | ... | i | ...
- +-----------------+ ... | l | ...
- | |<-hwlease +-----------------+
+head->| owned by user |<-hwcur | not sent to nic |<-hwcur
+ | | | yet |
+ +-----------------+ | |
+ cur->| available to | | |
+ | user, not read | +-----------------+
+ | yet | cur->| (being |
+ | | | prepared) |
| | | |
+ +-----------------+ + ------ +
+tail->| |<-hwtail | |<-hwlease
+ | (being | ... | | ...
+ | prepared) | ... | | ...
+ +-----------------+ ... | | ...
+ | |<-hwlease +-----------------+
+ | | tail->| |<-hwtail
| | | |
| | | |
| | | |
+-----------------+ +-----------------+
- * The cur/avail (user view) and hwcur/hwavail (kernel view)
+ * The cur/tail (user view) and hwcur/hwtail (kernel view)
* are used in the normal operation of the card.
*
* When a ring is the output of a switch port (Rx ring for
* a VALE port, Tx ring for the host stack or NIC), slots
* are reserved in blocks through 'hwlease' which points
* to the next unused slot.
- * On an Rx ring, hwlease is always after hwavail,
- * and completions cause avail to advance.
- * On a Tx ring, hwlease is always between cur and hwavail,
+ * On an Rx ring, hwlease is always after hwtail,
+ * and completions cause hwtail to advance.
+ * On a Tx ring, hwlease is always between cur and hwtail,
* and completions cause cur to advance.
*
* nm_kr_space() returns the maximum number of slots that
@@ -294,7 +350,6 @@ nm_next(uint32_t i, uint32_t lim)
-
enum txrx { NR_RX = 0, NR_TX = 1 };
/*
@@ -349,6 +404,7 @@ struct netmap_adapter {
*/
struct netmap_kring *tx_rings; /* array of TX rings. */
struct netmap_kring *rx_rings; /* array of RX rings. */
+
void *tailroom; /* space below the rings array */
/* (used for leases) */
@@ -360,11 +416,38 @@ struct netmap_adapter {
*/
int (*if_transmit)(struct ifnet *, struct mbuf *);
+ /* copy of if_input for netmap_send_up() */
+ void (*if_input)(struct ifnet *, struct mbuf *);
+
/* references to the ifnet and device routines, used by
* the generic netmap functions.
*/
struct ifnet *ifp; /* adapter is ifp->if_softc */
+ /*---- callbacks for this netmap adapter -----*/
+ /*
+ * nm_dtor() is the cleanup routine called when destroying
+ * the adapter.
+ *
+ * nm_register() is called on NIOCREGIF and close() to enter
+ * or exit netmap mode on the NIC
+ *
+ * nm_txsync() pushes packets to the underlying hw/switch
+ *
+ * nm_rxsync() collects packets from the underlying hw/switch
+ *
+ * nm_config() returns configuration information from the OS
+ *
+ * nm_krings_create() XXX
+ *
+ * nm_krings_delete() XXX
+ *
+ * nm_notify() is used to act after data have become available.
+ * For hw devices this is typically a selwakeup(),
+ * but for NIC/host ports attached to a switch (or vice-versa)
+ * we also need to invoke the 'txsync' code downstream.
+ */
+
/* private cleanup */
void (*nm_dtor)(struct netmap_adapter *);
@@ -403,6 +486,7 @@ struct netmap_adapter {
void *na_private;
};
+
/*
* If the NIC is owned by the kernel
* (i.e., bridge), neither another bridge nor user can use it;
@@ -433,13 +517,15 @@ struct netmap_vp_adapter { /* VALE software port */
u_int offset; /* Offset of ethernet header for each packet. */
};
+
struct netmap_hw_adapter { /* physical device */
struct netmap_adapter up;
struct net_device_ops nm_ndo; // XXX linux only
};
-struct netmap_generic_adapter { /* non-native device */
+
+struct netmap_generic_adapter { /* emulated device */
struct netmap_hw_adapter up;
/* Pointer to a previously used netmap adapter. */
@@ -455,16 +541,20 @@ struct netmap_generic_adapter { /* non-native device */
struct hrtimer mit_timer;
int mit_pending;
+#ifdef linux
+ netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *);
+#endif
};
#ifdef WITH_VALE
-/* bridge wrapper for non VALE ports. It is used to connect real devices to the bridge.
+/*
+ * Bridge wrapper for non VALE ports attached to a VALE switch.
*
- * The real device must already have its own netmap adapter (hwna). The
- * bridge wrapper and the hwna adapter share the same set of netmap rings and
- * buffers, but they have two separate sets of krings descriptors, with tx/rx
- * meanings swapped:
+ * The real device must already have its own netmap adapter (hwna).
+ * The bridge wrapper and the hwna adapter share the same set of
+ * netmap rings and buffers, but they have two separate sets of
+ * krings descriptors, with tx/rx meanings swapped:
*
* netmap
* bwrap krings rings krings hwna
@@ -478,23 +568,28 @@ struct netmap_generic_adapter { /* non-native device */
* | | +------+ +-----+ +------+ | |
* +------+ +------+
*
- * - packets coming from the bridge go to the brwap rx rings, which are also the
- * hwna tx rings. The bwrap notify callback will then complete the hwna tx
- * (see netmap_bwrap_notify).
- * - packets coming from the outside go to the hwna rx rings, which are also the
- * bwrap tx rings. The (overwritten) hwna notify method will then complete
- * the bridge tx (see netmap_bwrap_intr_notify).
+ * - packets coming from the bridge go to the brwap rx rings,
+ * which are also the hwna tx rings. The bwrap notify callback
+ * will then complete the hwna tx (see netmap_bwrap_notify).
*
- * The bridge wrapper may optionally connect the hwna 'host' rings to the
- * bridge. This is done by using a second port in the bridge and connecting it
- * to the 'host' netmap_vp_adapter contained in the netmap_bwrap_adapter.
- * The brwap host adapter cross-links the hwna host rings in the same way as shown above.
+ * - packets coming from the outside go to the hwna rx rings,
+ * which are also the bwrap tx rings. The (overwritten) hwna
+ * notify method will then complete the bridge tx
+ * (see netmap_bwrap_intr_notify).
*
- * - packets coming from the bridge and directed to host stack are handled by the
- * bwrap host notify callback (see netmap_bwrap_host_notify)
- * - packets coming from the host stack are still handled by the overwritten
- * hwna notify callback (netmap_bwrap_intr_notify), but are diverted to the
- * host adapter depending on the ring number.
+ * The bridge wrapper may optionally connect the hwna 'host' rings
+ * to the bridge. This is done by using a second port in the
+ * bridge and connecting it to the 'host' netmap_vp_adapter
+ * contained in the netmap_bwrap_adapter. The brwap host adapter
+ * cross-links the hwna host rings in the same way as shown above.
+ *
+ * - packets coming from the bridge and directed to the host stack
+ * are handled by the bwrap host notify callback
+ * (see netmap_bwrap_host_notify)
+ *
+ * - packets coming from the host stack are still handled by the
+ * overwritten hwna notify callback (netmap_bwrap_intr_notify),
+ * but are diverted to the host adapter depending on the ring number.
*
*/
struct netmap_bwrap_adapter {
@@ -505,103 +600,39 @@ struct netmap_bwrap_adapter {
/* backup of the hwna notify callback */
int (*save_notify)(struct netmap_adapter *,
u_int ring, enum txrx, int flags);
- /* When we attach a physical interface to the bridge, we
+
+ /*
+ * When we attach a physical interface to the bridge, we
* allow the controlling process to terminate, so we need
* a place to store the netmap_priv_d data structure.
- * This is only done when physical interfaces are attached to a bridge.
+ * This is only done when physical interfaces
+ * are attached to a bridge.
*/
struct netmap_priv_d *na_kpriv;
};
-/*
- * Available space in the ring. Only used in VALE code
- */
-static inline uint32_t
-nm_kr_space(struct netmap_kring *k, int is_rx)
-{
- int space;
-
- if (is_rx) {
- int busy = k->nkr_hwlease - k->nr_hwcur + k->nr_hwreserved;
- if (busy < 0)
- busy += k->nkr_num_slots;
- space = k->nkr_num_slots - 1 - busy;
- } else {
- space = k->nr_hwcur + k->nr_hwavail - k->nkr_hwlease;
- if (space < 0)
- space += k->nkr_num_slots;
- }
-#if 0
- // sanity check
- if (k->nkr_hwlease >= k->nkr_num_slots ||
- k->nr_hwcur >= k->nkr_num_slots ||
- k->nr_hwavail >= k->nkr_num_slots ||
- busy < 0 ||
- busy >= k->nkr_num_slots) {
- D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease,
- k->nkr_lease_idx, k->nkr_num_slots);
- }
-#endif
- return space;
-}
-
-
+#endif /* WITH_VALE */
-/* make a lease on the kring for N positions. return the
- * lease index
- */
+/* return slots reserved to rx clients; used in drivers */
static inline uint32_t
-nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
+nm_kr_rxspace(struct netmap_kring *k)
{
- uint32_t lim = k->nkr_num_slots - 1;
- uint32_t lease_idx = k->nkr_lease_idx;
-
- k->nkr_leases[lease_idx] = NR_NOSLOT;
- k->nkr_lease_idx = nm_next(lease_idx, lim);
+ int space = k->nr_hwtail - k->nr_hwcur;
+ if (space < 0)
+ space += k->nkr_num_slots;
+ ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail);
- if (n > nm_kr_space(k, is_rx)) {
- D("invalid request for %d slots", n);
- panic("x");
- }
- /* XXX verify that there are n slots */
- k->nkr_hwlease += n;
- if (k->nkr_hwlease > lim)
- k->nkr_hwlease -= lim + 1;
-
- if (k->nkr_hwlease >= k->nkr_num_slots ||
- k->nr_hwcur >= k->nkr_num_slots ||
- k->nr_hwavail >= k->nkr_num_slots ||
- k->nkr_lease_idx >= k->nkr_num_slots) {
- D("invalid kring %s, cur %d avail %d lease %d lease_idx %d lim %d",
- k->na->ifp->if_xname,
- k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease,
- k->nkr_lease_idx, k->nkr_num_slots);
- }
- return lease_idx;
+ return space;
}
-#endif /* WITH_VALE */
-/* return update position */
-static inline uint32_t
-nm_kr_rxpos(struct netmap_kring *k)
+/* True if no space in the tx ring. only valid after txsync_prologue */
+static inline int
+nm_kr_txempty(struct netmap_kring *kring)
{
- uint32_t pos = k->nr_hwcur + k->nr_hwavail;
- if (pos >= k->nkr_num_slots)
- pos -= k->nkr_num_slots;
-#if 0
- if (pos >= k->nkr_num_slots ||
- k->nkr_hwlease >= k->nkr_num_slots ||
- k->nr_hwcur >= k->nkr_num_slots ||
- k->nr_hwavail >= k->nkr_num_slots ||
- k->nkr_lease_idx >= k->nkr_num_slots) {
- D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease,
- k->nkr_lease_idx, k->nkr_num_slots);
- }
-#endif
- return pos;
+ return kring->rcur == kring->nr_hwtail;
}
@@ -613,11 +644,13 @@ nm_kr_rxpos(struct netmap_kring *k)
#define NM_KR_BUSY 1
#define NM_KR_STOPPED 2
+
static __inline void nm_kr_put(struct netmap_kring *kr)
{
NM_ATOMIC_CLEAR(&kr->nr_busy);
}
+
static __inline int nm_kr_tryget(struct netmap_kring *kr)
{
/* check a first time without taking the lock
@@ -640,7 +673,7 @@ static __inline int nm_kr_tryget(struct netmap_kring *kr)
/*
- * The following are support routines used by individual drivers to
+ * The following functions are used by individual drivers to
* support netmap operation.
*
* netmap_attach() initializes a struct netmap_adapter, allocating the
@@ -666,7 +699,17 @@ struct netmap_slot *netmap_reset(struct netmap_adapter *na,
enum txrx tx, u_int n, u_int new_cur);
int netmap_ring_reinit(struct netmap_kring *);
-/* set/clear native flags. XXX maybe also if_transmit ? */
+/* default functions to handle rx/tx interrupts */
+int netmap_rx_irq(struct ifnet *, u_int, u_int *);
+#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
+void netmap_common_irq(struct ifnet *, u_int, u_int *work_done);
+
+void netmap_disable_all_rings(struct ifnet *);
+void netmap_enable_all_rings(struct ifnet *);
+void netmap_disable_ring(struct netmap_kring *kr);
+
+
+/* set/clear native flags and if_transmit/netdev_ops */
static inline void
nm_set_native_flags(struct netmap_adapter *na)
{
@@ -685,6 +728,7 @@ nm_set_native_flags(struct netmap_adapter *na)
#endif
}
+
static inline void
nm_clear_native_flags(struct netmap_adapter *na)
{
@@ -701,36 +745,58 @@ nm_clear_native_flags(struct netmap_adapter *na)
#endif
}
+
/*
- * validates parameters in the ring/kring, returns a value for cur,
- * and the 'new_slots' value in the argument.
- * If any error, returns cur > lim to force a reinit.
+ * validates parameters in the ring/kring, returns a value for head
+ * If any error, returns ring_size to force a reinit.
*/
-u_int nm_txsync_prologue(struct netmap_kring *, u_int *);
+uint32_t nm_txsync_prologue(struct netmap_kring *);
+
/*
- * validates parameters in the ring/kring, returns a value for cur,
+ * validates parameters in the ring/kring, returns a value for head,
* and the 'reserved' value in the argument.
- * If any error, returns cur > lim to force a reinit.
+ * If any error, returns ring_size lim to force a reinit.
+ */
+uint32_t nm_rxsync_prologue(struct netmap_kring *);
+
+
+/*
+ * update kring and ring at the end of txsync.
*/
-u_int nm_rxsync_prologue(struct netmap_kring *, u_int *);
+static inline void
+nm_txsync_finalize(struct netmap_kring *kring)
+{
+ /* update ring head/tail to what the kernel knows */
+ kring->ring->tail = kring->rtail = kring->nr_hwtail;
+ kring->ring->head = kring->rhead = kring->nr_hwcur;
+
+ /* note, head/rhead/hwcur might be behind cur/rcur
+ * if no carrier
+ */
+ ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
+ kring->name, kring->nr_hwcur, kring->nr_hwtail,
+ kring->rhead, kring->rcur, kring->rtail);
+}
+
/*
- * update kring and ring at the end of txsync
+ * update kring and ring at the end of rxsync
*/
static inline void
-nm_txsync_finalize(struct netmap_kring *kring, u_int cur)
+nm_rxsync_finalize(struct netmap_kring *kring)
{
- /* recompute hwreserved */
- kring->nr_hwreserved = cur - kring->nr_hwcur;
- if (kring->nr_hwreserved < 0)
- kring->nr_hwreserved += kring->nkr_num_slots;
-
- /* update avail and reserved to what the kernel knows */
- kring->ring->avail = kring->nr_hwavail;
- kring->ring->reserved = kring->nr_hwreserved;
+ /* tell userspace that there might be new packets */
+ //struct netmap_ring *ring = kring->ring;
+ ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail,
+ kring->nr_hwtail);
+ kring->ring->tail = kring->rtail = kring->nr_hwtail;
+ /* make a copy of the state for next round */
+ kring->rhead = kring->ring->head;
+ kring->rcur = kring->ring->cur;
}
+
/* check/fix address and len in tx rings */
#if 1 /* debug version */
#define NM_CHECK_ADDR_LEN(_a, _l) do { \
@@ -755,6 +821,8 @@ nm_txsync_finalize(struct netmap_kring *kring, u_int cur)
int netmap_update_config(struct netmap_adapter *na);
int netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom);
void netmap_krings_delete(struct netmap_adapter *na);
+int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
+
struct netmap_if *
netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
@@ -766,10 +834,13 @@ u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
+
#ifdef WITH_VALE
/*
- * The following bridge-related interfaces are used by other kernel modules
- * In the version that only supports unicast or broadcast, the lookup
+ * The following bridge-related functions are used by other
+ * kernel modules.
+ *
+ * VALE only supports unicast or broadcast. The lookup
* function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
* XXX in practice "unknown" might be handled same as broadcast.
@@ -799,8 +870,6 @@ int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func);
/* Various prototypes */
int netmap_poll(struct cdev *dev, int events, struct thread *td);
-
-
int netmap_init(void);
void netmap_fini(void);
int netmap_get_memory(struct netmap_priv_d* p);
@@ -811,7 +880,8 @@ int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct t
/* netmap_adapter creation/destruction */
#define NM_IFPNAME(ifp) ((ifp) ? (ifp)->if_xname : "zombie")
-#define NM_DEBUG_PUTGET 1
+
+// #define NM_DEBUG_PUTGET 1
#ifdef NM_DEBUG_PUTGET
@@ -844,12 +914,15 @@ int netmap_adapter_put(struct netmap_adapter *na);
#endif /* !NM_DEBUG_PUTGET */
+/*
+ * module variables
+ */
extern u_int netmap_buf_size;
#define NETMAP_BUF_SIZE netmap_buf_size // XXX remove
-extern int netmap_mitigate;
+extern int netmap_mitigate; // XXX not really used
extern int netmap_no_pendintr;
-extern u_int netmap_total_buffers;
-extern char *netmap_buffer_base;
+extern u_int netmap_total_buffers; // global allocator
+extern char *netmap_buffer_base; // global allocator
extern int netmap_verbose; // XXX debugging
enum { /* verbose flags */
NM_VERB_ON = 1, /* generic verbose */
@@ -908,7 +981,7 @@ extern int netmap_generic_ringsize;
#ifdef __FreeBSD__
-/* Callback invoked by the dma machinery after a successfull dmamap_load */
+/* Callback invoked by the dma machinery after a successful dmamap_load */
static void netmap_dmamap_cb(__unused void *arg,
__unused bus_dma_segment_t * segs, __unused int nseg, __unused int error)
{
@@ -1053,31 +1126,27 @@ BDG_NMB(struct netmap_adapter *na, struct netmap_slot *slot)
lut[0].vaddr : lut[i].vaddr;
}
-/* default functions to handle rx/tx interrupts */
-int netmap_rx_irq(struct ifnet *, u_int, u_int *);
-#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
-void netmap_common_irq(struct ifnet *, u_int, u_int *work_done);
void netmap_txsync_to_host(struct netmap_adapter *na);
-void netmap_disable_all_rings(struct ifnet *);
-void netmap_enable_all_rings(struct ifnet *);
-void netmap_disable_ring(struct netmap_kring *kr);
-/* Structure associated to each thread which registered an interface.
+/*
+ * Structure associated to each thread which registered an interface.
*
* The first 4 fields of this structure are written by NIOCREGIF and
* read by poll() and NIOC?XSYNC.
- * There is low contention among writers (actually, a correct user program
- * should have no contention among writers) and among writers and readers,
- * so we use a single global lock to protect the structure initialization.
- * Since initialization involves the allocation of memory, we reuse the memory
- * allocator lock.
+ *
+ * There is low contention among writers (a correct user program
+ * should have none) and among writers and readers, so we use a
+ * single global lock to protect the structure initialization;
+ * since initialization involves the allocation of memory,
+ * we reuse the memory allocator lock.
+ *
* Read access to the structure is lock free. Readers must check that
* np_nifp is not NULL before using the other fields.
- * If np_nifp is NULL initialization has not been performed, so they should
- * return an error to userlevel.
+ * If np_nifp is NULL initialization has not been performed,
+ * so they should return an error to userspace.
*
* The ref_done field is used to regulate access to the refcount in the
* memory allocator. The refcount must be incremented at most once for
@@ -1091,38 +1160,29 @@ struct netmap_priv_d {
struct netmap_if * volatile np_nifp; /* netmap if descriptor. */
struct netmap_adapter *np_na;
- int np_ringid; /* from the ioctl */
- u_int np_qfirst, np_qlast; /* range of rings to scan */
- uint16_t np_txpoll;
+ int np_ringid; /* from the ioctl */
+ u_int np_qfirst, np_qlast; /* range of rings to scan */
+ uint16_t np_txpoll;
struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */
/* np_refcount is only used on FreeBSD */
- int np_refcount; /* use with NMG_LOCK held */
+ int np_refcount; /* use with NMG_LOCK held */
};
/*
* generic netmap emulation for devices that do not have
* native netmap support.
- * XXX generic_netmap_register() is only exported to implement
- * nma_is_generic().
*/
-int generic_netmap_register(struct netmap_adapter *na, int enable);
int generic_netmap_attach(struct ifnet *ifp);
int netmap_catch_rx(struct netmap_adapter *na, int intercept);
void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
-void netmap_catch_packet_steering(struct netmap_generic_adapter *na, int enable);
+void netmap_catch_tx(struct netmap_generic_adapter *na, int enable);
int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr);
int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
-static __inline int
-nma_is_generic(struct netmap_adapter *na)
-{
- return na->nm_register == generic_netmap_register;
-}
-
/*
* netmap_mitigation API. This is used by the generic adapter
* to reduce the number of interrupt requests/selwakeup
@@ -1134,6 +1194,4 @@ void netmap_mitigation_restart(struct netmap_generic_adapter *na);
int netmap_mitigation_active(struct netmap_generic_adapter *na);
void netmap_mitigation_cleanup(struct netmap_generic_adapter *na);
-// int generic_timer_handler(struct hrtimer *t);
-
#endif /* _NET_NETMAP_KERN_H_ */
diff --git a/sys/dev/netmap/netmap_mbq.c b/sys/dev/netmap/netmap_mbq.c
index c8e581b69fe5..2606b13d48dc 100644
--- a/sys/dev/netmap/netmap_mbq.c
+++ b/sys/dev/netmap/netmap_mbq.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Vincenzo Maffione. All rights reserved.
+ * Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -47,17 +47,20 @@ static inline void __mbq_init(struct mbq *q)
q->count = 0;
}
+
void mbq_safe_init(struct mbq *q)
{
mtx_init(&q->lock, "mbq", NULL, MTX_SPIN);
__mbq_init(q);
}
+
void mbq_init(struct mbq *q)
{
__mbq_init(q);
}
+
static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m)
{
m->m_nextpkt = NULL;
@@ -70,6 +73,7 @@ static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m)
q->count++;
}
+
void mbq_safe_enqueue(struct mbq *q, struct mbuf *m)
{
mtx_lock(&q->lock);
@@ -77,11 +81,13 @@ void mbq_safe_enqueue(struct mbq *q, struct mbuf *m)
mtx_unlock(&q->lock);
}
+
void mbq_enqueue(struct mbq *q, struct mbuf *m)
{
__mbq_enqueue(q, m);
}
+
static inline struct mbuf *__mbq_dequeue(struct mbq *q)
{
struct mbuf *ret = NULL;
@@ -99,6 +105,7 @@ static inline struct mbuf *__mbq_dequeue(struct mbq *q)
return ret;
}
+
struct mbuf *mbq_safe_dequeue(struct mbq *q)
{
struct mbuf *ret;
@@ -110,11 +117,13 @@ struct mbuf *mbq_safe_dequeue(struct mbq *q)
return ret;
}
+
struct mbuf *mbq_dequeue(struct mbq *q)
{
return __mbq_dequeue(q);
}
+
/* XXX seems pointless to have a generic purge */
static void __mbq_purge(struct mbq *q, int safe)
{
@@ -130,16 +139,19 @@ static void __mbq_purge(struct mbq *q, int safe)
}
}
+
void mbq_purge(struct mbq *q)
{
__mbq_purge(q, 0);
}
+
void mbq_safe_purge(struct mbq *q)
{
__mbq_purge(q, 1);
}
+
void mbq_safe_destroy(struct mbq *q)
{
mtx_destroy(&q->lock);
@@ -149,4 +161,3 @@ void mbq_safe_destroy(struct mbq *q)
void mbq_destroy(struct mbq *q)
{
}
-
diff --git a/sys/dev/netmap/netmap_mbq.h b/sys/dev/netmap/netmap_mbq.h
index ad023b617a5d..d273d8a8fa23 100644
--- a/sys/dev/netmap/netmap_mbq.h
+++ b/sys/dev/netmap/netmap_mbq.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Vincenzo Maffione. All rights reserved.
+ * Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
index f28f2c04751a..b25f79cef3a4 100644
--- a/sys/dev/netmap/netmap_mem2.c
+++ b/sys/dev/netmap/netmap_mem2.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2013 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
+ * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -506,7 +506,7 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj
p->r_objsize = objsize;
#define MAX_CLUSTSIZE (1<<17)
-#define LINE_ROUND 64
+#define LINE_ROUND NM_CACHE_ALIGN // 64
if (objsize >= MAX_CLUSTSIZE) {
/* we could do it but there is no point */
D("unsupported allocation for %d bytes", objsize);
@@ -960,13 +960,15 @@ netmap_mem_rings_create(struct netmap_adapter *na)
ND("txring[%d] at %p ofs %d", i, ring);
kring->ring = ring;
*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
- *(ssize_t *)(uintptr_t)&ring->buf_ofs =
+ *(int64_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
- ring->avail = kring->nr_hwavail;
- ring->cur = kring->nr_hwcur;
+ /* copy values from kring */
+ ring->head = kring->rhead;
+ ring->cur = kring->rcur;
+ ring->tail = kring->rtail;
*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
ND("initializing slots for txring");
@@ -989,13 +991,15 @@ netmap_mem_rings_create(struct netmap_adapter *na)
kring->ring = ring;
*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
- *(ssize_t *)(uintptr_t)&ring->buf_ofs =
+ *(int64_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
- ring->cur = kring->nr_hwcur;
- ring->avail = kring->nr_hwavail;
+ /* copy values from kring */
+ ring->head = kring->rhead;
+ ring->cur = kring->rcur;
+ ring->tail = kring->rtail;
*(int *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
ND("initializing slots for rxring[%d]", i);
diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h
index f492f9814b79..8e6c58cbc4ee 100644
--- a/sys/dev/netmap/netmap_mem2.h
+++ b/sys/dev/netmap/netmap_mem2.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2013 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
+ * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
index 32d6422de120..f988b84e78b2 100644
--- a/sys/dev/netmap/netmap_vale.c
+++ b/sys/dev/netmap/netmap_vale.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -251,44 +251,6 @@ struct nm_bridge nm_bridges[NM_BRIDGES];
/*
- * A few function to tell which kind of port are we using.
- * XXX should we hold a lock ?
- *
- * nma_is_vp() virtual port
- * nma_is_host() port connected to the host stack
- * nma_is_hw() port connected to a NIC
- * nma_is_generic() generic netmap adapter XXX stop this madness
- */
-static __inline int
-nma_is_vp(struct netmap_adapter *na)
-{
- return na->nm_register == bdg_netmap_reg;
-}
-
-
-static __inline int
-nma_is_host(struct netmap_adapter *na)
-{
- return na->nm_register == NULL;
-}
-
-
-static __inline int
-nma_is_hw(struct netmap_adapter *na)
-{
- /* In case of sw adapter, nm_register is NULL */
- return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
-}
-
-static __inline int
-nma_is_bwrap(struct netmap_adapter *na)
-{
- return na->nm_register == netmap_bwrap_register;
-}
-
-
-
-/*
* this is a slightly optimized copy routine which rounds
* to multiple of 64 bytes and is often faster than dealing
* with other odd sizes. We assume there is enough room
@@ -318,7 +280,6 @@ pkt_copy(void *_src, void *_dst, int l)
}
-
/*
* locate a bridge among the existing ones.
* MUST BE CALLED WITH NMG_LOCK()
@@ -393,8 +354,8 @@ nm_free_bdgfwd(struct netmap_adapter *na)
struct netmap_kring *kring;
NMG_LOCK_ASSERT();
- nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
- kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
+ nrings = na->num_tx_rings;
+ kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
if (kring[i].nkr_ft) {
free(kring[i].nkr_ft, M_DEVBUF);
@@ -502,6 +463,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
}
}
+
static void
netmap_adapter_vp_dtor(struct netmap_adapter *na)
{
@@ -520,6 +482,16 @@ netmap_adapter_vp_dtor(struct netmap_adapter *na)
na->ifp = NULL;
}
+
+/* Try to get a reference to a netmap adapter attached to a VALE switch.
+ * If the adapter is found (or is created), this function returns 0, a
+ * non NULL pointer is returned into *na, and the caller holds a
+ * reference to the adapter.
+ * If an adapter is not found, then no reference is grabbed and the
+ * function returns an error code, or 0 if there is just a VALE prefix
+ * mismatch. Therefore the caller holds a reference when
+ * (*na != NULL && return == 0).
+ */
int
netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
{
@@ -688,18 +660,12 @@ nm_bdg_attach(struct nmreq *nmr)
return ENOMEM;
NMG_LOCK();
/* XXX probably netmap_get_bdg_na() */
- error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
+ error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
if (error) /* no device, or another bridge or user owns the device */
goto unlock_exit;
- /* netmap_get_na() sets na_bdg if this is a physical interface
- * that we can attach to a switch.
- */
- if (!nma_is_bwrap(na)) {
- /* got reference to a virtual port or direct access to a NIC.
- * perhaps specified no bridge prefix or wrong NIC name
- */
+ if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
- goto unref_exit;
+ goto unlock_exit;
}
if (na->active_fds > 0) { /* already registered */
@@ -727,6 +693,7 @@ unlock_exit:
return error;
}
+
static int
nm_bdg_detach(struct nmreq *nmr)
{
@@ -736,17 +703,15 @@ nm_bdg_detach(struct nmreq *nmr)
int last_instance;
NMG_LOCK();
- error = netmap_get_na(nmr, &na, 0 /* don't create */);
+ error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
if (error) { /* no device, or another bridge or user owns the device */
goto unlock_exit;
}
- if (!nma_is_bwrap(na)) {
- /* got reference to a virtual port or direct access to a NIC.
- * perhaps specified no bridge's prefix or wrong NIC's name
- */
+ if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
- goto unref_exit;
+ goto unlock_exit;
}
+
bna = (struct netmap_bwrap_adapter *)na;
if (na->active_fds == 0) { /* not registered */
@@ -890,12 +855,13 @@ netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
case NETMAP_BDG_OFFSET:
NMG_LOCK();
error = netmap_get_bdg_na(nmr, &na, 0);
- if (!error) {
+ if (na && !error) {
vpna = (struct netmap_vp_adapter *)na;
if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET)
nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET;
vpna->offset = nmr->nr_arg1;
D("Using offset %d for %p", vpna->offset, vpna);
+ netmap_adapter_put(na);
}
NMG_UNLOCK();
break;
@@ -947,6 +913,7 @@ netmap_vp_krings_create(struct netmap_adapter *na)
return 0;
}
+
static void
netmap_vp_krings_delete(struct netmap_adapter *na)
{
@@ -1027,10 +994,6 @@ nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
}
-/*
- *---- support for virtual bridge -----
- */
-
/* ----- FreeBSD if_bridge hash function ------- */
/*
@@ -1052,6 +1015,7 @@ do { \
c -= a; c -= b; c ^= (b >> 15); \
} while (/*CONSTCOND*/0)
+
static __inline uint32_t
nm_bridge_rthash(const uint8_t *addr)
{
@@ -1144,6 +1108,77 @@ netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
/*
+ * Available space in the ring. Only used in VALE code
+ * and only with is_rx = 1
+ */
+static inline uint32_t
+nm_kr_space(struct netmap_kring *k, int is_rx)
+{
+ int space;
+
+ if (is_rx) {
+ int busy = k->nkr_hwlease - k->nr_hwcur;
+ if (busy < 0)
+ busy += k->nkr_num_slots;
+ space = k->nkr_num_slots - 1 - busy;
+ } else {
+ /* XXX never used in this branch */
+ space = k->nr_hwtail - k->nkr_hwlease;
+ if (space < 0)
+ space += k->nkr_num_slots;
+ }
+#if 0
+ // sanity check
+ if (k->nkr_hwlease >= k->nkr_num_slots ||
+ k->nr_hwcur >= k->nkr_num_slots ||
+ k->nr_tail >= k->nkr_num_slots ||
+ busy < 0 ||
+ busy >= k->nkr_num_slots) {
+ D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
+ k->nkr_lease_idx, k->nkr_num_slots);
+ }
+#endif
+ return space;
+}
+
+
+
+
+/* make a lease on the kring for N positions. return the
+ * lease index
+ * XXX only used in VALE code and with is_rx = 1
+ */
+static inline uint32_t
+nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
+{
+ uint32_t lim = k->nkr_num_slots - 1;
+ uint32_t lease_idx = k->nkr_lease_idx;
+
+ k->nkr_leases[lease_idx] = NR_NOSLOT;
+ k->nkr_lease_idx = nm_next(lease_idx, lim);
+
+ if (n > nm_kr_space(k, is_rx)) {
+ D("invalid request for %d slots", n);
+ panic("x");
+ }
+ /* XXX verify that there are n slots */
+ k->nkr_hwlease += n;
+ if (k->nkr_hwlease > lim)
+ k->nkr_hwlease -= lim + 1;
+
+ if (k->nkr_hwlease >= k->nkr_num_slots ||
+ k->nr_hwcur >= k->nkr_num_slots ||
+ k->nr_hwtail >= k->nkr_num_slots ||
+ k->nkr_lease_idx >= k->nkr_num_slots) {
+ D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
+ k->na->ifp->if_xname,
+ k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
+ k->nkr_lease_idx, k->nkr_num_slots);
+ }
+ return lease_idx;
+}
+
+/*
* This flush routine supports only unicast and broadcast but a large
* number of ports, and lets us replace the learn and dispatch functions.
*/
@@ -1357,28 +1392,30 @@ retry:
dst = BDG_NMB(&dst_na->up, slot);
if (unlikely(fix_mismatch)) {
- if (na->offset > dst_na->offset) {
- src += na->offset - dst_na->offset;
- copy_len -= na->offset - dst_na->offset;
- dst_len = copy_len;
- } else {
- bzero(dst, dst_na->offset - na->offset);
- dst_len += dst_na->offset - na->offset;
- dst += dst_na->offset - na->offset;
- }
- /* fix the first fragment only */
- fix_mismatch = 0;
- /* completely skip an header only fragment */
- if (copy_len == 0) {
- ft_p++;
- continue;
- }
+ /* We are processing the first fragment
+ * and there is a mismatch between source
+ * and destination offsets. Create a zeroed
+ * header for the destination, independently
+ * of the source header length and content.
+ */
+ src += na->offset;
+ copy_len -= na->offset;
+ bzero(dst, dst_na->offset);
+ dst += dst_na->offset;
+ dst_len = dst_na->offset + copy_len;
+ /* fix the first fragment only */
+ fix_mismatch = 0;
+ /* Here it could be copy_len == dst_len == 0,
+ * and so a zero length fragment is passed.
+ */
}
+
+ ND("send [%d] %d(%d) bytes at %s:%d",
+ i, (int)copy_len, (int)dst_len,
+ NM_IFPNAME(dst_ifp), j);
/* round to a multiple of 64 */
copy_len = (copy_len + 63) & ~63;
- ND("send %d %d bytes at %s:%d",
- i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
if (ft_p->ft_flags & NS_INDIRECT) {
if (copyin(src, dst, copy_len)) {
// invalid user pointer, pretend len is 0
@@ -1426,7 +1463,7 @@ retry:
}
p[lease_idx] = j; /* report I am done */
- update_pos = nm_kr_rxpos(kring);
+ update_pos = kring->nr_hwtail;
if (my_start == update_pos) {
/* all slots before my_start have been reported,
@@ -1443,15 +1480,7 @@ retry:
* means there are new buffers to report
*/
if (likely(j != my_start)) {
- uint32_t old_avail = kring->nr_hwavail;
-
- kring->nr_hwavail = (j >= kring->nr_hwcur) ?
- j - kring->nr_hwcur :
- j + lim + 1 - kring->nr_hwcur;
- if (kring->nr_hwavail < old_avail) {
- D("avail shrink %d -> %d",
- old_avail, kring->nr_hwavail);
- }
+ kring->nr_hwtail = j;
dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
still_locked = 0;
mtx_unlock(&kring->q_lock);
@@ -1471,35 +1500,32 @@ cleanup:
return 0;
}
+
static int
netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
{
struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
- struct netmap_ring *ring = kring->ring;
- u_int j, k, lim = kring->nkr_num_slots - 1;
-
- k = ring->cur;
- if (k > lim)
- return netmap_ring_reinit(kring);
+ u_int done;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = kring->rcur;
if (bridge_batch <= 0) { /* testing only */
- j = k; // used all
+ done = cur; // used all
goto done;
}
if (bridge_batch > NM_BDG_BATCH)
bridge_batch = NM_BDG_BATCH;
- j = nm_bdg_preflush(na, ring_nr, kring, k);
- if (j != k)
- D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
- /* k-j modulo ring size is the number of slots processed */
- if (k < j)
- k += kring->nkr_num_slots;
- kring->nr_hwavail = lim - (k - j);
-
+ done = nm_bdg_preflush(na, ring_nr, kring, cur);
done:
- kring->nr_hwcur = j;
- ring->avail = kring->nr_hwavail;
+ if (done != cur)
+ D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
+ /*
+ * packets between 'done' and 'cur' are left unsent.
+ */
+ kring->nr_hwcur = done;
+ kring->nr_hwtail = nm_prev(done, lim);
+ nm_txsync_finalize(kring);
if (netmap_verbose)
D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
return 0;
@@ -1518,46 +1544,30 @@ bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
return netmap_vp_txsync(vpna, ring_nr, flags);
}
-
-/*
- * user process reading from a VALE switch.
- * Already protected against concurrent calls from userspace,
- * but we must acquire the queue's lock to protect against
- * writers on the same queue.
- */
static int
-bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
+netmap_vp_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- u_int j, lim = kring->nkr_num_slots - 1;
- u_int k = ring->cur, resvd = ring->reserved;
+ u_int nm_i, lim = kring->nkr_num_slots - 1;
+ u_int head = nm_rxsync_prologue(kring);
int n;
- mtx_lock(&kring->q_lock);
- if (k > lim) {
+ if (head > lim) {
D("ouch dangerous reset!!!");
n = netmap_ring_reinit(kring);
goto done;
}
- /* skip past packets that userspace has released */
- j = kring->nr_hwcur; /* netmap ring index */
- if (resvd > 0) {
- if (resvd + ring->avail >= lim + 1) {
- D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
- ring->reserved = resvd = 0; // XXX panic...
- }
- k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
- }
+ /* First part, import newly received packets. */
+ /* actually nothing to do here, they are already in the kring */
- if (j != k) { /* userspace has released some packets. */
- n = k - j;
- if (n < 0)
- n += kring->nkr_num_slots;
- ND("userspace releases %d packets", n);
- for (n = 0; likely(j != k); n++) {
- struct netmap_slot *slot = &ring->slot[j];
+ /* Second part, skip past packets that userspace has released. */
+ nm_i = kring->nr_hwcur;
+ if (nm_i != head) {
+ /* consistency check, but nothing really important here */
+ for (n = 0; likely(nm_i != head); n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
void *addr = BDG_NMB(na, slot);
if (addr == netmap_buffer_base) { /* bad buf */
@@ -1565,19 +1575,37 @@ bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
slot->buf_idx);
}
slot->flags &= ~NS_BUF_CHANGED;
- j = nm_next(j, lim);
+ nm_i = nm_next(nm_i, lim);
}
- kring->nr_hwavail -= n;
- kring->nr_hwcur = k;
+ kring->nr_hwcur = head;
}
+
/* tell userspace that there are new packets */
- ring->avail = kring->nr_hwavail - resvd;
+ nm_rxsync_finalize(kring);
n = 0;
done:
+ return n;
+}
+
+/*
+ * user process reading from a VALE switch.
+ * Already protected against concurrent calls from userspace,
+ * but we must acquire the queue's lock to protect against
+ * writers on the same queue.
+ */
+static int
+bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
+{
+ struct netmap_kring *kring = &na->rx_rings[ring_nr];
+ int n;
+
+ mtx_lock(&kring->q_lock);
+ n = netmap_vp_rxsync(na, ring_nr, flags);
mtx_unlock(&kring->q_lock);
return n;
}
+
static int
bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
{
@@ -1627,6 +1655,7 @@ bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
return 0;
}
+
static void
netmap_bwrap_dtor(struct netmap_adapter *na)
{
@@ -1652,16 +1681,22 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
}
+
/*
- * Pass packets from nic to the bridge.
+ * Intr callback for NICs connected to a bridge.
+ * Simply ignore tx interrupts (maybe we could try to recover space ?)
+ * and pass received packets from nic to the bridge.
+ *
* XXX TODO check locking: this is called from the interrupt
* handler so we should make sure that the interface is not
* disconnected while passing down an interrupt.
*
- * Note, no user process can access this NIC so we can ignore
- * the info in the 'ring'.
- */
-/* callback that overwrites the hwna notify callback.
+ * Note, no user process can access this NIC or the host stack.
+ * The only part of the ring that is significant are the slots,
+ * and head/cur/tail are set from the kring as needed
+ * (part as a receive ring, part as a transmit ring).
+ *
+ * callback that overwrites the hwna notify callback.
* Packets come from the outside or from the host stack and are put on an hwna rx ring.
* The bridge wrapper then sends the packets through the bridge.
*/
@@ -1677,21 +1712,24 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx,
struct netmap_vp_adapter *vpna = &bna->up;
int error = 0;
- ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
+ if (netmap_verbose)
+ D("%s %s%d 0x%x", NM_IFPNAME(ifp),
+ (tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
if (flags & NAF_DISABLE_NOTIFY) {
kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
- if (kring->nkr_stopped)
- netmap_disable_ring(bkring);
+ if (kring[ring_nr].nkr_stopped)
+ netmap_disable_ring(&bkring[ring_nr]);
else
- bkring->nkr_stopped = 0;
+ bkring[ring_nr].nkr_stopped = 0;
return 0;
}
if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
return 0;
+ /* we only care about receive interrupts */
if (tx == NR_TX)
return 0;
@@ -1707,7 +1745,24 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx,
goto put_out;
}
+ /* Here we expect ring->head = ring->cur = ring->tail
+ * because everything has been released from the previous round.
+ * However the ring is shared and we might have info from
+ * the wrong side (the tx ring). Hence we overwrite with
+ * the info from the rx kring.
+ */
+ if (netmap_verbose)
+ D("%s head %d cur %d tail %d (kring %d %d %d)", NM_IFPNAME(ifp),
+ ring->head, ring->cur, ring->tail,
+ kring->rhead, kring->rcur, kring->rtail);
+
+ ring->head = kring->rhead;
+ ring->cur = kring->rcur;
+ ring->tail = kring->rtail;
+
+ /* simulate a user wakeup on the rx ring */
if (is_host_ring) {
+ netmap_rxsync_from_host(na, NULL, NULL);
vpna = hostna;
ring_nr = 0;
} else {
@@ -1718,23 +1773,46 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx,
if (error)
goto put_out;
}
- if (kring->nr_hwavail == 0 && netmap_verbose) {
+ if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
D("how strange, interrupt with no packets on %s",
NM_IFPNAME(ifp));
goto put_out;
}
- /* XXX avail ? */
- ring->cur = nm_kr_rxpos(kring);
+
+ /* new packets are ring->cur to ring->tail, and the bkring
+ * had hwcur == ring->cur. So advance ring->cur to ring->tail
+ * to push all packets out.
+ */
+ ring->head = ring->cur = ring->tail;
+
+ /* also set tail to what the bwrap expects */
+ bkring = &vpna->up.tx_rings[ring_nr];
+ ring->tail = bkring->nr_hwtail; // rtail too ?
+
+ /* pass packets to the switch */
+ nm_txsync_prologue(bkring); // XXX error checking ?
netmap_vp_txsync(vpna, ring_nr, flags);
- if (!is_host_ring)
+ /* mark all buffers as released on this ring */
+ ring->head = ring->cur = kring->nr_hwtail;
+ ring->tail = kring->rtail;
+ /* another call to actually release the buffers */
+ if (!is_host_ring) {
error = na->nm_rxsync(na, ring_nr, 0);
+ } else {
+ /* mark all packets as released, as in the
+ * second part of netmap_rxsync_from_host()
+ */
+ kring->nr_hwcur = kring->nr_hwtail;
+ nm_rxsync_finalize(kring);
+ }
put_out:
nm_kr_put(kring);
return error;
}
+
static int
netmap_bwrap_register(struct netmap_adapter *na, int onoff)
{
@@ -1744,7 +1822,7 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
struct netmap_vp_adapter *hostna = &bna->host;
int error;
- ND("%s %d", NM_IFPNAME(ifp), onoff);
+ ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off");
if (onoff) {
int i;
@@ -1788,6 +1866,7 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
return 0;
}
+
static int
netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
u_int *rxr, u_int *rxd)
@@ -1807,6 +1886,7 @@ netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
return 0;
}
+
static int
netmap_bwrap_krings_create(struct netmap_adapter *na)
{
@@ -1834,6 +1914,7 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
return 0;
}
+
static void
netmap_bwrap_krings_delete(struct netmap_adapter *na)
{
@@ -1847,6 +1928,7 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na)
netmap_vp_krings_delete(na);
}
+
/* notify method for the bridge-->hwna direction */
static int
netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
@@ -1856,7 +1938,7 @@ netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int f
struct netmap_adapter *hwna = bna->hwna;
struct netmap_kring *kring, *hw_kring;
struct netmap_ring *ring;
- u_int lim, k;
+ u_int lim;
int error = 0;
if (tx == NR_TX)
@@ -1865,35 +1947,49 @@ netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int f
kring = &na->rx_rings[ring_n];
hw_kring = &hwna->tx_rings[ring_n];
ring = kring->ring;
-
lim = kring->nkr_num_slots - 1;
- k = nm_kr_rxpos(kring);
if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
return 0;
- ring->cur = k;
- ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
+ /* first step: simulate a user wakeup on the rx ring */
+ netmap_vp_rxsync(na, ring_n, flags);
+ ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
NM_IFPNAME(na->ifp), ring_n,
- kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
- ring->cur, ring->avail, ring->reserved,
- hw_kring->nr_hwcur, hw_kring->nr_hwavail);
+ kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
+ ring->head, ring->cur, ring->tail,
+ hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
+ /* second step: the simulated user consumes all new packets */
+ ring->head = ring->cur = ring->tail;
+
+ /* third step: the new packets are sent on the tx ring
+ * (which is actually the same ring)
+ */
+ /* set tail to what the hw expects */
+ ring->tail = hw_kring->rtail;
if (ring_n == na->num_rx_rings) {
netmap_txsync_to_host(hwna);
} else {
+ nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
error = hwna->nm_txsync(hwna, ring_n, flags);
}
- kring->nr_hwcur = ring->cur;
- kring->nr_hwavail = 0;
- kring->nr_hwreserved = lim - ring->avail;
- ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
+
+ /* fourth step: now we are back the rx ring */
+ /* claim ownership on all hw owned bufs */
+ ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
+ ring->tail = kring->rtail; /* restore saved value of tail, for safety */
+
+ /* fifth step: the user goes to sleep again, causing another rxsync */
+ netmap_vp_rxsync(na, ring_n, flags);
+ ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
NM_IFPNAME(na->ifp), ring_n,
- kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
- ring->cur, ring->avail, ring->reserved,
- hw_kring->nr_hwcur, hw_kring->nr_hwavail);
+ kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
+ ring->head, ring->cur, ring->tail,
+ hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
return error;
}
+
static int
netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
{
@@ -1904,6 +2000,7 @@ netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx,
return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
}
+
/* attach a bridge wrapper to the 'real' device */
static int
netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
@@ -1957,7 +2054,8 @@ netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
hostna->nm_mem = na->nm_mem;
hostna->na_private = bna;
- D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
+ ND("%s<->%s txr %d txd %d rxr %d rxd %d",
+ fake->if_xname, real->if_xname,
na->num_tx_rings, na->num_tx_desc,
na->num_rx_rings, na->num_rx_desc);
@@ -1970,6 +2068,7 @@ netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
return 0;
}
+
void
netmap_init_bridges(void)
{