diff options
Diffstat (limited to 'sys/dev')
-rw-r--r-- | sys/dev/e1000/if_em.c | 2 | ||||
-rw-r--r-- | sys/dev/e1000/if_igb.c | 4 | ||||
-rw-r--r-- | sys/dev/e1000/if_lem.c | 2 | ||||
-rw-r--r-- | sys/dev/ixgbe/ixgbe.c | 2 | ||||
-rw-r--r-- | sys/dev/netmap/if_em_netmap.h | 52 | ||||
-rw-r--r-- | sys/dev/netmap/if_igb_netmap.h | 52 | ||||
-rw-r--r-- | sys/dev/netmap/if_lem_netmap.h | 57 | ||||
-rw-r--r-- | sys/dev/netmap/if_re_netmap.h | 84 | ||||
-rw-r--r-- | sys/dev/netmap/ixgbe_netmap.h | 74 | ||||
-rw-r--r-- | sys/dev/netmap/netmap.c | 835 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_freebsd.c | 26 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_generic.c | 1008 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_kern.h | 490 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_mbq.c | 15 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_mbq.h | 2 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_mem2.c | 20 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_mem2.h | 2 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_vale.c | 437 |
18 files changed, 1660 insertions, 1504 deletions
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 580407a529fd..428612a4a695 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -4352,7 +4352,7 @@ em_initialize_receive_unit(struct adapter *adapter) * preserve the rx buffers passed to userspace. */ if (ifp->if_capenable & IFCAP_NETMAP) - rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail; + rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]); #endif /* DEV_NETMAP */ E1000_WRITE_REG(hw, E1000_RDT(i), rdt); } diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index 57e4f893ab35..2134e29625cc 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -4630,13 +4630,13 @@ igb_initialize_receive_units(struct adapter *adapter) * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. * In this driver it means we adjust RDT to - * somthing different from next_to_refresh + * something different from next_to_refresh * (which is not used in netmap mode). */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; - int t = rxr->next_to_refresh - kring->nr_hwavail; + int t = rxr->next_to_refresh - nm_kr_rxspace(kring); if (t >= adapter->num_rx_desc) t -= adapter->num_rx_desc; diff --git a/sys/dev/e1000/if_lem.c b/sys/dev/e1000/if_lem.c index a3da50c176ed..8014a0f9fde7 100644 --- a/sys/dev/e1000/if_lem.c +++ b/sys/dev/e1000/if_lem.c @@ -3367,7 +3367,7 @@ lem_initialize_receive_unit(struct adapter *adapter) #ifdef DEV_NETMAP /* preserve buffers already made available to clients */ if (ifp->if_capenable & IFCAP_NETMAP) - rctl -= NA(adapter->ifp)->rx_rings[0].nr_hwavail; + rctl -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[0]); #endif /* DEV_NETMAP */ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl); diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c index 740f7709e5b2..6dfec02cc8d9 100644 --- a/sys/dev/ixgbe/ixgbe.c +++ b/sys/dev/ixgbe/ixgbe.c @@ -1245,7 +1245,7 @@ ixgbe_init_locked(struct adapter *adapter) if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; - int t = na->num_rx_desc - 1 - kring->nr_hwavail; + int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t); } else diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h index dbbee4222407..17b4c4fd2e14 100644 --- a/sys/dev/netmap/if_em_netmap.h +++ b/sys/dev/netmap/if_em_netmap.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -120,9 +120,9 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ - u_int n, new_slots; + u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = nm_txsync_prologue(kring, &new_slots); + u_int const head = kring->rhead; /* generate an interrupt approximately every half ring */ u_int report_frequency = kring->nkr_num_slots >> 1; @@ -130,9 +130,6 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = &adapter->tx_rings[ring_nr]; - if (cur > lim) /* error checking in nm_txsync_prologue() */ - return netmap_ring_reinit(kring); - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); @@ -141,9 +138,9 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) */ nm_i = kring->nr_hwcur; - if (nm_i != cur) { /* we have new packets to send */ + if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != cur; n++) { + for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; @@ -175,9 +172,7 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } - kring->nr_hwcur = cur; /* the saved ring->cur */ - /* decrease avail by # of packets sent minus previous ones */ - kring->nr_hwavail -= new_slots; + kring->nr_hwcur = head; /* synchronize the NIC ring */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, @@ -190,26 +185,20 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) /* * Second part: reclaim buffers for completed transmissions. */ - if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) { - int delta; - + if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { /* record completed transmissions using TDH */ nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr)); if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ D("TDH wrap %d", nic_i); nic_i -= kring->nkr_num_slots; } - delta = nic_i - txr->next_to_clean; - if (delta) { - /* some completed, increment hwavail. */ - if (delta < 0) - delta += kring->nkr_num_slots; + if (nic_i != txr->next_to_clean) { txr->next_to_clean = nic_i; - kring->nr_hwavail += delta; + kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } } - nm_txsync_finalize(kring, cur); + nm_txsync_finalize(kring); return 0; } @@ -226,16 +215,16 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ - u_int n, resvd; + u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */ + u_int const head = nm_rxsync_prologue(kring); int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ struct adapter *adapter = ifp->if_softc; struct rx_ring *rxr = &adapter->rx_rings[ring_nr]; - if (cur > lim) + if (head > lim) return netmap_ring_reinit(kring); /* XXX check sync modes */ @@ -251,7 +240,7 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) nic_i = rxr->next_to_check; nm_i = netmap_idx_n2k(kring, nic_i); - for (n = 0; ; n++) { + for (n = 0; ; n++) { // XXX no need to count struct e1000_rx_desc *curr = &rxr->rx_base[nic_i]; uint32_t staterr = le32toh(curr->status); @@ -268,7 +257,7 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) } if (n) { /* update the state variables */ rxr->next_to_check = nic_i; - kring->nr_hwavail += n; + kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } @@ -277,9 +266,9 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) * Second part: skip past packets that userspace has released. */ nm_i = kring->nr_hwcur; - if (nm_i != cur) { + if (nm_i != head) { nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != cur; n++) { + for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; uint64_t paddr; void *addr = PNMB(slot, &paddr); @@ -302,8 +291,7 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } - kring->nr_hwavail -= n; - kring->nr_hwcur = cur; + kring->nr_hwcur = head; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); @@ -311,12 +299,12 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) * IMPORTANT: we must leave one free slot in the ring, * so move nic_i back by one unit */ - nic_i = (nic_i == 0) ? lim : nic_i - 1; + nic_i = nm_prev(nic_i, lim); E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i); } /* tell userspace that there might be new packets */ - ring->avail = kring->nr_hwavail - resvd; + nm_rxsync_finalize(kring); return 0; diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h index b91d0baba06f..e1929f0918e2 100644 --- a/sys/dev/netmap/if_igb_netmap.h +++ b/sys/dev/netmap/if_igb_netmap.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Universita` di Pisa. All rights reserved. + * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -88,9 +88,9 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ - u_int n, new_slots; + u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = nm_txsync_prologue(kring, &new_slots); + u_int const head = kring->rhead; /* generate an interrupt approximately every half ring */ u_int report_frequency = kring->nkr_num_slots >> 1; @@ -101,9 +101,6 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) u32 olinfo_status = (adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0; - if (cur > lim) /* error checking in nm_txsync_prologue() */ - return netmap_ring_reinit(kring); - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); @@ -112,9 +109,9 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) */ nm_i = kring->nr_hwcur; - if (nm_i != cur) { /* we have new packets to send */ + if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != cur; n++) { + for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; @@ -155,9 +152,7 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } - kring->nr_hwcur = cur; /* the saved ring->cur */ - /* decrease avail by # of packets sent minus previous ones */ - kring->nr_hwavail -= new_slots; + kring->nr_hwcur = head; /* Set the watchdog XXX ? */ txr->queue_status = IGB_QUEUE_WORKING; @@ -174,26 +169,18 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) /* * Second part: reclaim buffers for completed transmissions. */ - if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) { - int delta; - + if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { /* record completed transmissions using TDH */ nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr)); if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ D("TDH wrap %d", nic_i); nic_i -= kring->nkr_num_slots; } - delta = nic_i - txr->next_to_clean; - if (delta) { - /* some completed, increment hwavail. */ - if (delta < 0) - delta += kring->nkr_num_slots; - txr->next_to_clean = nic_i; - kring->nr_hwavail += delta; - } + txr->next_to_clean = nic_i; + kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } - nm_txsync_finalize(kring, cur); + nm_txsync_finalize(kring); return 0; } @@ -210,16 +197,16 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ - u_int n, resvd; + u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */ + u_int const head = nm_rxsync_prologue(kring); int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ struct adapter *adapter = ifp->if_softc; struct rx_ring *rxr = &adapter->rx_rings[ring_nr]; - if (cur > lim) + if (head > lim) return netmap_ring_reinit(kring); /* XXX check sync modes */ @@ -250,7 +237,7 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) } if (n) { /* update the state variables */ rxr->next_to_check = nic_i; - kring->nr_hwavail += n; + kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } @@ -259,9 +246,9 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) * Second part: skip past packets that userspace has released. */ nm_i = kring->nr_hwcur; - if (nm_i != cur) { + if (nm_i != head) { nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != cur; n++) { + for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; uint64_t paddr; void *addr = PNMB(slot, &paddr); @@ -284,8 +271,7 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } - kring->nr_hwavail -= n; - kring->nr_hwcur = cur; + kring->nr_hwcur = head; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); @@ -293,12 +279,12 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) * IMPORTANT: we must leave one free slot in the ring, * so move nic_i back by one unit */ - nic_i = (nic_i == 0) ? lim : nic_i - 1; + nic_i = nm_prev(nic_i, lim); E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i); } /* tell userspace that there might be new packets */ - ring->avail = kring->nr_hwavail - resvd; + nm_rxsync_finalize(kring); return 0; diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h index 8ad3b7a2a352..4fce5c988d09 100644 --- a/sys/dev/netmap/if_lem_netmap.h +++ b/sys/dev/netmap/if_lem_netmap.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -91,18 +91,14 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ - u_int n, new_slots; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = nm_txsync_prologue(kring, &new_slots); + u_int const head = kring->rhead; /* generate an interrupt approximately every half ring */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ struct adapter *adapter = ifp->if_softc; - if (cur > lim) /* error checking in nm_txsync_prologue() */ - return netmap_ring_reinit(kring); - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_POSTREAD); @@ -111,9 +107,9 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) */ nm_i = kring->nr_hwcur; - if (nm_i != cur) { /* we have new packets to send */ + if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != cur; n++) { + while (nm_i != head) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; @@ -145,9 +141,7 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } - kring->nr_hwcur = cur; /* the saved ring->cur */ - /* decrease avail by # of packets sent minus previous ones */ - kring->nr_hwavail -= new_slots; + kring->nr_hwcur = head; /* synchronize the NIC ring */ bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, @@ -160,26 +154,19 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) /* * Second part: reclaim buffers for completed transmissions. */ - if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) { - int delta; - + if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { + kring->last_reclaim = ticks; /* record completed transmissions using TDH */ nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0)); if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ D("TDH wrap %d", nic_i); nic_i -= kring->nkr_num_slots; } - delta = nic_i - adapter->next_tx_to_clean; - if (delta) { - /* some completed, increment hwavail. */ - if (delta < 0) - delta += kring->nkr_num_slots; - adapter->next_tx_to_clean = nic_i; - kring->nr_hwavail += delta; - } + adapter->next_tx_to_clean = nic_i; + kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } - nm_txsync_finalize(kring, cur); + nm_txsync_finalize(kring); return 0; } @@ -196,15 +183,15 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ - u_int n, resvd; + u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */ + u_int const head = nm_rxsync_prologue(kring); int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ struct adapter *adapter = ifp->if_softc; - if (cur > lim) + if (head > lim) return netmap_ring_reinit(kring); /* XXX check sync modes */ @@ -241,9 +228,14 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) nic_i = nm_next(nic_i, lim); } if (n) { /* update the state variables */ + ND("%d new packets at nic %d nm %d tail %d", + n, + adapter->next_rx_desc_to_check, + netmap_idx_n2k(kring, adapter->next_rx_desc_to_check), + kring->nr_hwtail); adapter->next_rx_desc_to_check = nic_i; // ifp->if_ipackets += n; - kring->nr_hwavail += n; + kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } @@ -252,9 +244,9 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) * Second part: skip past packets that userspace has released. */ nm_i = kring->nr_hwcur; - if (nm_i != cur) { + if (nm_i != head) { nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != cur; n++) { + for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; uint64_t paddr; void *addr = PNMB(slot, &paddr); @@ -277,20 +269,19 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } - kring->nr_hwavail -= n; - kring->nr_hwcur = cur; + kring->nr_hwcur = head; bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * IMPORTANT: we must leave one free slot in the ring, * so move nic_i back by one unit */ - nic_i = (nic_i == 0) ? lim : nic_i - 1; + nic_i = nm_prev(nic_i, lim); E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i); } /* tell userspace that there might be new packets */ - ring->avail = kring->nr_hwavail - resvd; + nm_rxsync_finalize(kring); return 0; diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h index 2c7ba060cffd..10abe4f49f83 100644 --- a/sys/dev/netmap/if_re_netmap.h +++ b/sys/dev/netmap/if_re_netmap.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -72,17 +72,14 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ - u_int n, new_slots; + u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = nm_txsync_prologue(kring, &new_slots); + u_int const head = kring->rhead; /* device-specific */ struct rl_softc *sc = ifp->if_softc; struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc; - if (cur > lim) /* error checking in nm_txsync_prologue() */ - return netmap_ring_reinit(kring); - bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); // XXX extra postwrite ? @@ -91,11 +88,11 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) * First part: process new packets to send. */ nm_i = kring->nr_hwcur; - if (nm_i != cur) { /* we have new packets to send */ + if (nm_i != head) { /* we have new packets to send */ nic_i = sc->rl_ldata.rl_tx_prodidx; // XXX or netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != cur; n++) { + for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; @@ -132,9 +129,7 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) nic_i = nm_next(nic_i, lim); } sc->rl_ldata.rl_tx_prodidx = nic_i; - /* decrease avail by # of packets sent minus previous ones */ - kring->nr_hwcur = cur; /* the saved ring->cur */ - kring->nr_hwavail -= new_slots; + kring->nr_hwcur = head; /* synchronize the NIC ring */ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, @@ -148,7 +143,7 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) /* * Second part: reclaim buffers for completed transmissions. */ - if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) { + if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { nic_i = sc->rl_ldata.rl_tx_considx; for (n = 0; nic_i != sc->rl_ldata.rl_tx_prodidx; n++, nic_i = RL_TX_DESC_NXT(sc, nic_i)) { @@ -160,11 +155,11 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) if (n > 0) { sc->rl_ldata.rl_tx_considx = nic_i; sc->rl_ldata.rl_tx_free += n; - kring->nr_hwavail += n; + kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } } - nm_txsync_finalize(kring, cur); + nm_txsync_finalize(kring); return 0; } @@ -181,16 +176,16 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ - u_int n, resvd; + u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */ + u_int const head = nm_rxsync_prologue(kring); int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ struct rl_softc *sc = ifp->if_softc; struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc; - if (cur > lim) + if (head > lim) return netmap_ring_reinit(kring); bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, @@ -202,16 +197,17 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) * * This device uses all the buffers in the ring, so we need * another termination condition in addition to RL_RDESC_STAT_OWN - * cleared (all buffers could have it cleared. The easiest one - * is to limit the amount of data reported up to 'lim' + * cleared (all buffers could have it cleared). The easiest one + * is to stop right before nm_hwcur. */ if (netmap_no_pendintr || force_update) { uint16_t slot_flags = kring->nkr_slot_flags; + uint32_t stop_i = nm_prev(kring->nr_hwcur, lim); nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */ nm_i = netmap_idx_n2k(kring, nic_i); - for (n = kring->nr_hwavail; n < lim ; n++) { + while (nm_i != stop_i) { struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[nic_i]; uint32_t rxstat = le32toh(cur_rx->rl_cmdstat); uint32_t total_len; @@ -226,14 +222,12 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) /* sync was in re_newbuf() */ bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD); + // sc->rl_ifp->if_ipackets++; nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } - if (n != kring->nr_hwavail) { - sc->rl_ldata.rl_rx_prodidx = nic_i; - sc->rl_ifp->if_ipackets += n - kring->nr_hwavail; - kring->nr_hwavail = n; - } + sc->rl_ldata.rl_rx_prodidx = nic_i; + kring->nr_hwtail = nm_i; kring->nr_kflags &= ~NKR_PENDINTR; } @@ -241,9 +235,9 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) * Second part: skip past packets that userspace has released. */ nm_i = kring->nr_hwcur; - if (nm_i != cur) { + if (nm_i != head) { nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != cur; n++) { + for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; uint64_t paddr; void *addr = PNMB(slot, &paddr); @@ -272,8 +266,7 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } - kring->nr_hwavail -= n; - kring->nr_hwcur = cur; + kring->nr_hwcur = head; bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, @@ -281,7 +274,7 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) } /* tell userspace that there might be new packets */ - ring->avail = kring->nr_hwavail - resvd; + nm_rxsync_finalize(kring); return 0; @@ -336,36 +329,35 @@ re_netmap_rx_init(struct rl_softc *sc) struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0); struct rl_desc *desc = sc->rl_ldata.rl_rx_list; uint32_t cmdstat; - int i, n, max_avail; + uint32_t nic_i, max_avail; + uint32_t const n = sc->rl_ldata.rl_rx_desc_cnt; if (!slot) return; - n = sc->rl_ldata.rl_rx_desc_cnt; /* - * Userspace owned hwavail packets before the reset, - * so the NIC that last hwavail descriptors of the ring - * are still owned by the driver (and keep one empty). + * Do not release the slots owned by userspace, + * and also keep one empty. */ - max_avail = n - 1 - na->rx_rings[0].nr_hwavail; - for (i = 0; i < n; i++) { + max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]); + for (nic_i = 0; nic_i < n; nic_i++) { void *addr; uint64_t paddr; - int l = netmap_idx_n2k(&na->rx_rings[0], i); + uint32_t nm_i = netmap_idx_n2k(&na->rx_rings[0], nic_i); - addr = PNMB(slot + l, &paddr); + addr = PNMB(slot + nm_i, &paddr); netmap_reload_map(sc->rl_ldata.rl_rx_mtag, - sc->rl_ldata.rl_rx_desc[i].rx_dmamap, addr); + sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, addr); bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, - sc->rl_ldata.rl_rx_desc[i].rx_dmamap, BUS_DMASYNC_PREREAD); - desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); - desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); + sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, BUS_DMASYNC_PREREAD); + desc[nic_i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); + desc[nic_i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); cmdstat = NETMAP_BUF_SIZE; - if (i == n - 1) /* mark the end of ring */ + if (nic_i == n - 1) /* mark the end of ring */ cmdstat |= RL_RDESC_CMD_EOR; - if (i < max_avail) + if (nic_i < max_avail) cmdstat |= RL_RDESC_CMD_OWN; - desc[i].rl_cmdstat = htole32(cmdstat); + desc[nic_i].rl_cmdstat = htole32(cmdstat); } } diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h index 4dea6639d325..a617cc4c2429 100644 --- a/sys/dev/netmap/ixgbe_netmap.h +++ b/sys/dev/netmap/ixgbe_netmap.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -141,14 +141,13 @@ ixgbe_netmap_reg(struct netmap_adapter *na, int onoff) /* * Reconcile kernel and user view of the transmit ring. * - * Userspace wants to send packets up to the one before ring->cur, + * All information is in the kring. + * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * - * ring->avail is not used on input, but it is updated on return. - * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. @@ -161,9 +160,9 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ - u_int n, new_slots; + u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = nm_txsync_prologue(kring, &new_slots); + u_int const head = kring->rhead; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set @@ -175,9 +174,6 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct tx_ring *txr = &adapter->tx_rings[ring_nr]; int reclaim_tx; - if (cur > lim) /* error checking in nm_txsync_prologue() */ - return netmap_ring_reinit(kring); - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); @@ -199,7 +195,7 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) */ /* - * If we have packets to send (kring->nr_hwcur != ring->cur) + * If we have packets to send (kring->nr_hwcur != kring->rhead) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot @@ -217,13 +213,13 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) */ nm_i = kring->nr_hwcur; - if (nm_i != cur) { /* we have new packets to send */ + if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txr->tx_buffers[nic_i]); - for (n = 0; nm_i != cur; n++) { + for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; @@ -262,9 +258,7 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } - kring->nr_hwcur = cur; /* the saved ring->cur */ - /* decrease avail by # of packets sent minus previous ones */ - kring->nr_hwavail -= new_slots; + kring->nr_hwcur = head; /* synchronize the NIC ring */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, @@ -281,7 +275,7 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) */ if (flags & NAF_FORCE_RECLAIM) { reclaim_tx = 1; /* forced reclaim */ - } else if (kring->nr_hwavail > 0) { + } else if (!nm_kr_txempty(kring)) { reclaim_tx = 0; /* have buffers, no reclaim */ } else { /* @@ -321,21 +315,13 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) nic_i -= kring->nkr_num_slots; } if (nic_i != txr->next_to_clean) { - n = (nic_i + lim + 1) - txr->next_to_clean; - if (n > lim) - n -= lim + 1; /* some tx completed, increment avail */ txr->next_to_clean = nic_i; - kring->nr_hwavail += n; - if (kring->nr_hwavail > lim) { - RD(5, "bad hwavail %d", - kring->nr_hwavail); - return netmap_ring_reinit(kring); - } + kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } } - nm_txsync_finalize(kring, cur); + nm_txsync_finalize(kring); return 0; } @@ -347,14 +333,9 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * - * When called, userspace has released buffers up to - * ring->cur - ring->reserved (last one excluded). - * - * The last interrupt reported kring->nr_hwavail slots available - * after kring->nr_hwcur. - * We must subtract the newly consumed slots (cur - nr_hwcur) - * from nr_hwavail, make the descriptors available for the next reads, - * and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail. + * On call, kring->rhead is the first packet that userspace wants + * to keep, and kring->rcur is the wakeup point. + * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. @@ -367,16 +348,16 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ - u_int n, resvd; + u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */ + u_int const head = nm_rxsync_prologue(kring); int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ struct adapter *adapter = ifp->if_softc; struct rx_ring *rxr = &adapter->rx_rings[ring_nr]; - if (cur > lim) + if (head > lim) return netmap_ring_reinit(kring); /* XXX check sync modes */ @@ -391,8 +372,8 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) * and they may differ in case if_init() has been called while * in netmap mode. For the receive ring we have * - * nm_i = (kring->nr_hwcur + kring->nr_hwavail) % ring_size * nic_i = rxr->next_to_check; + * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * @@ -402,7 +383,7 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) int crclen = ix_crcstrip ? 0 : 4; uint16_t slot_flags = kring->nkr_slot_flags; - nic_i = rxr->next_to_check; + nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail) nm_i = netmap_idx_n2k(kring, nic_i); for (n = 0; ; n++) { @@ -425,23 +406,23 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) ix_rx_miss_bufs += n; } rxr->next_to_check = nic_i; - kring->nr_hwavail += n; + kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } /* * Second part: skip past packets that userspace has released. - * (kring->nr_hwcur to ring->cur - ring->reserved excluded), + * (kring->nr_hwcur to kring->rhead excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ nm_i = kring->nr_hwcur; - if (nm_i != cur) { + if (nm_i != head) { nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != cur; n++) { + for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; uint64_t paddr; void *addr = PNMB(slot, &paddr); @@ -464,8 +445,7 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } - kring->nr_hwavail -= n; - kring->nr_hwcur = cur; + kring->nr_hwcur = head; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); @@ -473,12 +453,12 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) * IMPORTANT: we must leave one free slot in the ring, * so move nic_i back by one unit */ - nic_i = (nic_i == 0) ? lim : nic_i - 1; + nic_i = nm_prev(nic_i, lim); IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i); } /* tell userspace that there might be new packets */ - ring->avail = kring->nr_hwavail - resvd; + nm_rxsync_finalize(kring); return 0; diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 478d9374937f..358d4693dcb3 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -151,7 +151,6 @@ ports attached to the switch) #include <machine/bus.h> /* bus_dmamap_* */ #include <sys/endian.h> #include <sys/refcount.h> -#include <sys/jail.h> /* reduce conditional code */ @@ -226,9 +225,6 @@ enum { NETMAP_ADMODE_BEST = 0, /* use native, fallback to generic */ NETMAP_ADMODE_NATIVE, /* either native or none */ NETMAP_ADMODE_GENERIC, /* force generic */ NETMAP_ADMODE_LAST }; -#define NETMAP_ADMODE_NATIVE 1 /* Force native netmap adapter. */ -#define NETMAP_ADMODE_GENERIC 2 /* Force generic netmap adapter. */ -#define NETMAP_ADMODE_BEST 0 /* Priority to native netmap adapter. */ static int netmap_admode = NETMAP_ADMODE_BEST; int netmap_generic_mit = 100*1000; /* Generic mitigation interval in nanoseconds. */ @@ -252,6 +248,10 @@ nm_kr_get(struct netmap_kring *kr) } +/* + * mark the ring as stopped, and run through the locks + * to make sure other users get to see it. + */ void netmap_disable_ring(struct netmap_kring *kr) { @@ -380,7 +380,6 @@ nm_dump_buf(char *p, int len, int lim, char *dst) } - /* * Fetch configuration from the device, to cope with dynamic * reconfigurations after loading the module. @@ -432,6 +431,7 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail u_int i, len, ndesc; struct netmap_kring *kring; + // XXX additional space for extra rings ? len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom; na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO); @@ -441,19 +441,23 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail } na->rx_rings = na->tx_rings + ntx; + /* + * All fields in krings are 0 except the one initialized below. + * but better be explicit on important kring fields. + */ ndesc = na->num_tx_desc; for (i = 0; i < ntx; i++) { /* Transmit rings */ kring = &na->tx_rings[i]; bzero(kring, sizeof(*kring)); kring->na = na; + kring->ring_id = i; kring->nkr_num_slots = ndesc; /* - * IMPORTANT: - * Always keep one slot empty, so we can detect new - * transmissions comparing cur and nr_hwcur (they are - * the same only if there are no new transmissions). + * IMPORTANT: Always keep one slot empty. */ - kring->nr_hwavail = ndesc - 1; + kring->rhead = kring->rcur = kring->nr_hwcur = 0; + kring->rtail = kring->nr_hwtail = ndesc - 1; + snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", NM_IFPNAME(na->ifp), i); mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF); init_waitqueue_head(&kring->si); } @@ -463,7 +467,11 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail kring = &na->rx_rings[i]; bzero(kring, sizeof(*kring)); kring->na = na; + kring->ring_id = i; kring->nkr_num_slots = ndesc; + kring->rhead = kring->rcur = kring->nr_hwcur = 0; + kring->rtail = kring->nr_hwtail = 0; + snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", NM_IFPNAME(na->ifp), i); mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF); init_waitqueue_head(&kring->si); } @@ -473,10 +481,10 @@ netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tail na->tailroom = na->rx_rings + nrx; return 0; - } +/* XXX check boundaries */ void netmap_krings_delete(struct netmap_adapter *na) { @@ -493,6 +501,23 @@ netmap_krings_delete(struct netmap_adapter *na) } +/* + * Destructor for NIC ports. They also have an mbuf queue + * on the rings connected to the host so we need to purge + * them first. + */ +static void +netmap_hw_krings_delete(struct netmap_adapter *na) +{ + struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue; + + ND("destroy sw mbq with len %d", mbq_len(q)); + mbq_purge(q); + mbq_safe_destroy(q); + netmap_krings_delete(na); +} + + static struct netmap_if* netmap_if_new(const char *ifname, struct netmap_adapter *na) { @@ -721,6 +746,7 @@ netmap_dtor(void *data) /* * pass a chain of buffers to the host stack as coming from 'dst' + * We do not need to lock because the queue is private. */ static void netmap_send_up(struct ifnet *dst, struct mbq *q) @@ -739,39 +765,30 @@ netmap_send_up(struct ifnet *dst, struct mbq *q) /* * put a copy of the buffers marked NS_FORWARD into an mbuf chain. - * Run from hwcur to cur - reserved + * Take packets from hwcur to ring->head marked NS_FORWARD (or forced) + * and pass them up. Drop remaining packets in the unlikely event + * of an mbuf shortage. */ static void netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force) { - /* Take packets from hwcur to cur-reserved and pass them up. - * In case of no buffers we give up. At the end of the loop, - * the queue is drained in all cases. - * XXX handle reserved - */ - u_int lim = kring->nkr_num_slots - 1; - struct mbuf *m; - u_int k = kring->ring->cur, n = kring->ring->reserved; + u_int const lim = kring->nkr_num_slots - 1; + u_int const head = kring->ring->head; + u_int n; struct netmap_adapter *na = kring->na; - /* compute the final position, ring->cur - ring->reserved */ - if (n > 0) { - if (k < n) - k += kring->nkr_num_slots; - k += n; - } - for (n = kring->nr_hwcur; n != k;) { + for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) { + struct mbuf *m; struct netmap_slot *slot = &kring->ring->slot[n]; - n = nm_next(n, lim); if ((slot->flags & NS_FORWARD) == 0 && !force) continue; if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { - D("bad pkt at %d len %d", n, slot->len); + RD(5, "bad pkt at %d len %d", n, slot->len); continue; } slot->flags &= ~NS_FORWARD; // XXX needed ? - /* XXX adapt to the case of a multisegment packet */ + /* XXX TODO: adapt to the case of a multisegment packet */ m = m_devget(BDG_NMB(na, slot), slot->len, 0, na->ifp, NULL); if (m == NULL) @@ -782,69 +799,54 @@ netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force) /* - * The host ring has packets from nr_hwcur to (cur - reserved) - * to be sent down to the NIC. - * We need to use the queue lock on the source (host RX ring) - * to protect against netmap_transmit. - * If the user is well behaved we do not need to acquire locks - * on the destination(s), - * so we only need to make sure that there are no panics because - * of user errors. - * XXX verify - * - * We scan the tx rings, which have just been - * flushed so nr_hwcur == cur. Pushing packets down means - * increment cur and decrement avail. - * XXX to be verified + * Send to the NIC rings packets marked NS_FORWARD between + * kring->nr_hwcur and kring->rhead + * Called under kring->rx_queue.lock on the sw rx ring, */ -static void +static u_int netmap_sw_to_nic(struct netmap_adapter *na) { struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; - struct netmap_kring *k1 = &na->tx_rings[0]; - u_int i, howmany, src_lim, dst_lim; - - /* XXX we should also check that the carrier is on */ - if (kring->nkr_stopped) - return; + struct netmap_slot *rxslot = kring->ring->slot; + u_int i, rxcur = kring->nr_hwcur; + u_int const head = kring->rhead; + u_int const src_lim = kring->nkr_num_slots - 1; + u_int sent = 0; + + /* scan rings to find space, then fill as much as possible */ + for (i = 0; i < na->num_tx_rings; i++) { + struct netmap_kring *kdst = &na->tx_rings[i]; + struct netmap_ring *rdst = kdst->ring; + u_int const dst_lim = kdst->nkr_num_slots - 1; + + /* XXX do we trust ring or kring->rcur,rtail ? */ + for (; rxcur != head && !nm_ring_empty(rdst); + rxcur = nm_next(rxcur, src_lim) ) { + struct netmap_slot *src, *dst, tmp; + u_int dst_cur = rdst->cur; - mtx_lock(&kring->q_lock); + src = &rxslot[rxcur]; + if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd) + continue; - if (kring->nkr_stopped) - goto out; + sent++; - howmany = kring->nr_hwavail; /* XXX otherwise cur - reserved - nr_hwcur */ + dst = &rdst->slot[dst_cur]; - src_lim = kring->nkr_num_slots - 1; - for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) { - ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail); - dst_lim = k1->nkr_num_slots - 1; - while (howmany > 0 && k1->ring->avail > 0) { - struct netmap_slot *src, *dst, tmp; - src = &kring->ring->slot[kring->nr_hwcur]; - dst = &k1->ring->slot[k1->ring->cur]; tmp = *src; + src->buf_idx = dst->buf_idx; src->flags = NS_BUF_CHANGED; dst->buf_idx = tmp.buf_idx; dst->len = tmp.len; dst->flags = NS_BUF_CHANGED; - ND("out len %d buf %d from %d to %d", - dst->len, dst->buf_idx, - kring->nr_hwcur, k1->ring->cur); - - kring->nr_hwcur = nm_next(kring->nr_hwcur, src_lim); - howmany--; - kring->nr_hwavail--; - k1->ring->cur = nm_next(k1->ring->cur, dst_lim); - k1->ring->avail--; + + rdst->cur = nm_next(dst_cur, dst_lim); } - kring->ring->cur = kring->nr_hwcur; // XXX - k1++; // XXX why? + /* if (sent) XXX txsync ? */ } -out: - mtx_unlock(&kring->q_lock); + return sent; } @@ -859,7 +861,8 @@ netmap_txsync_to_host(struct netmap_adapter *na) { struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings]; struct netmap_ring *ring = kring->ring; - u_int k, lim = kring->nkr_num_slots - 1; + u_int const lim = kring->nkr_num_slots - 1; + u_int const head = nm_txsync_prologue(kring); struct mbq q; int error; @@ -869,22 +872,27 @@ netmap_txsync_to_host(struct netmap_adapter *na) D("ring %p busy (user error)", kring); return; } - k = ring->cur; - if (k > lim) { + if (head > lim) { D("invalid ring index in stack TX kring %p", kring); netmap_ring_reinit(kring); nm_kr_put(kring); return; } - /* Take packets from hwcur to cur and pass them up. + /* Take packets from hwcur to head and pass them up. + * force head = cur since netmap_grab_packets() stops at head * In case of no buffers we give up. At the end of the loop, * the queue is drained in all cases. */ mbq_init(&q); - netmap_grab_packets(kring, &q, 1); - kring->nr_hwcur = k; - kring->nr_hwavail = ring->avail = lim; + ring->cur = head; + netmap_grab_packets(kring, &q, 1 /* force */); + ND("have %d pkts in queue", mbq_len(&q)); + kring->nr_hwcur = head; + kring->nr_hwtail = head + lim; + if (kring->nr_hwtail > lim) + kring->nr_hwtail -= lim + 1; + nm_txsync_finalize(kring); nm_kr_put(kring); netmap_send_up(na->ifp, &q); @@ -893,60 +901,89 @@ netmap_txsync_to_host(struct netmap_adapter *na) /* * rxsync backend for packets coming from the host stack. - * They have been put in the queue by netmap_transmit() so we - * need to protect access to the kring using a lock. + * They have been put in kring->rx_queue by netmap_transmit(). + * We protect access to the kring using kring->rx_queue.lock * * This routine also does the selrecord if called from the poll handler * (we know because td != NULL). * * NOTE: on linux, selrecord() is defined as a macro and uses pwait * as an additional hidden argument. + * returns the number of packets delivered to tx queues in + * transparent mode, or a negative value if error */ -static void +int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait) { struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; struct netmap_ring *ring = kring->ring; - u_int j, n, lim = kring->nkr_num_slots; - u_int k = ring->cur, resvd = ring->reserved; + u_int nm_i, n; + u_int const lim = kring->nkr_num_slots - 1; + u_int const head = nm_rxsync_prologue(kring); + int ret = 0; + struct mbq *q = &kring->rx_queue; (void)pwait; /* disable unused warnings */ - if (kring->nkr_stopped) /* check a first time without lock */ - return; + if (head > lim) { + netmap_ring_reinit(kring); + return EINVAL; + } - mtx_lock(&kring->q_lock); + if (kring->nkr_stopped) /* check a first time without lock */ + return EBUSY; - if (kring->nkr_stopped) /* check again with lock held */ - goto unlock_out; + mtx_lock(&q->lock); - if (k >= lim) { - netmap_ring_reinit(kring); + if (kring->nkr_stopped) { /* check again with lock held */ + ret = EBUSY; goto unlock_out; } - /* new packets are already set in nr_hwavail */ - /* skip past packets that userspace has released */ - j = kring->nr_hwcur; - if (resvd > 0) { - if (resvd + ring->avail >= lim + 1) { - D("XXX invalid reserve/avail %d %d", resvd, ring->avail); - ring->reserved = resvd = 0; // XXX panic... + + /* First part: import newly received packets */ + n = mbq_len(q); + if (n) { /* grab packets from the queue */ + struct mbuf *m; + uint32_t stop_i; + + nm_i = kring->nr_hwtail; + stop_i = nm_prev(nm_i, lim); + while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) { + int len = MBUF_LEN(m); + struct netmap_slot *slot = &ring->slot[nm_i]; + + m_copydata(m, 0, len, BDG_NMB(na, slot)); + ND("nm %d len %d", nm_i, len); + if (netmap_verbose) + D("%s", nm_dump_buf(BDG_NMB(na, slot),len, 128, NULL)); + + slot->len = len; + slot->flags = kring->nkr_slot_flags; + nm_i = nm_next(nm_i, lim); } - k = (k >= resvd) ? k - resvd : k + lim - resvd; + kring->nr_hwtail = nm_i; } - if (j != k) { - n = k >= j ? k - j : k + lim - j; - kring->nr_hwavail -= n; - kring->nr_hwcur = k; + + /* + * Second part: skip past packets that userspace has released. + */ + nm_i = kring->nr_hwcur; + if (nm_i != head) { /* something was released */ + if (netmap_fwd || kring->ring->flags & NR_FORWARD) + ret = netmap_sw_to_nic(na); + kring->nr_hwcur = head; } - k = ring->avail = kring->nr_hwavail - resvd; - if (k == 0 && td) + + nm_rxsync_finalize(kring); + + /* access copies of cur,tail in the kring */ + if (kring->rcur == kring->rtail && td) /* no bufs available */ selrecord(td, &kring->si); - if (k && (netmap_verbose & NM_VERB_HOST)) - D("%d pkts from stack", k); + unlock_out: - mtx_unlock(&kring->q_lock); + mtx_unlock(&q->lock); + return ret; } @@ -1042,7 +1079,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na) // XXX add a refcount ? netmap_adapter_get(prev_na); } - D("Created generic NA %p (prev %p)", gna, gna->prev); + ND("Created generic NA %p (prev %p)", gna, gna->prev); return 0; } @@ -1113,154 +1150,167 @@ out: /* * validate parameters on entry for *_txsync() * Returns ring->cur if ok, or something >= kring->nkr_num_slots - * in case of error. The extra argument is a pointer to - * 'new_bufs'. XXX this may be deprecated at some point. + * in case of error. * - * Below is a correct configuration on input. ring->cur - * must be in the region covered by kring->hwavail, - * and ring->avail and kring->avail should end at the same slot. + * rhead, rcur and rtail=hwtail are stored from previous round. + * hwcur is the next packet to send to the ring. * - * +-hwcur - * | - * v<--hwres-->|<-----hwavail----> - * ------+------------------------------+-------- ring - * | - * |<---avail---> - * +--cur + * We want + * hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail * + * hwcur, rhead, rtail and hwtail are reliable */ u_int -nm_txsync_prologue(struct netmap_kring *kring, u_int *new_slots) +nm_txsync_prologue(struct netmap_kring *kring) { struct netmap_ring *ring = kring->ring; + u_int head = ring->head; /* read only once */ u_int cur = ring->cur; /* read only once */ - u_int avail = ring->avail; /* read only once */ u_int n = kring->nkr_num_slots; - u_int kstart, kend, a; -#if 1 /* kernel sanity checks */ - if (kring->nr_hwcur >= n || - kring->nr_hwreserved >= n || kring->nr_hwavail >= n || - kring->nr_hwreserved + kring->nr_hwavail >= n) + ND(5, "%s kcur %d ktail %d head %d cur %d tail %d", + kring->name, + kring->nr_hwcur, kring->nr_hwtail, + ring->head, ring->cur, ring->tail); +#if 1 /* kernel sanity checks; but we can trust the kring. */ + if (kring->nr_hwcur >= n || kring->rhead >= n || + kring->rtail >= n || kring->nr_hwtail >= n) goto error; #endif /* kernel sanity checks */ - kstart = kring->nr_hwcur + kring->nr_hwreserved; - if (kstart >= n) - kstart -= n; - kend = kstart + kring->nr_hwavail; - /* user sanity checks. a is the expected avail */ - if (cur < kstart) { - /* too low, but maybe wraparound */ - if (cur + n > kend) + /* + * user sanity checks. We only use 'cur', + * A, B, ... are possible positions for cur: + * + * 0 A cur B tail C n-1 + * 0 D tail E cur F n-1 + * + * B, F, D are valid. A, C, E are wrong + */ + if (kring->rtail >= kring->rhead) { + /* want rhead <= head <= rtail */ + if (head < kring->rhead || head > kring->rtail) goto error; - *new_slots = cur + n - kstart; - a = kend - cur - n; - } else { - if (cur > kend) + /* and also head <= cur <= rtail */ + if (cur < head || cur > kring->rtail) + goto error; + } else { /* here rtail < rhead */ + /* we need head outside rtail .. rhead */ + if (head > kring->rtail && head < kring->rhead) goto error; - *new_slots = cur - kstart; - a = kend - cur; + + /* two cases now: head <= rtail or head >= rhead */ + if (head <= kring->rtail) { + /* want head <= cur <= rtail */ + if (cur < head || cur > kring->rtail) + goto error; + } else { /* head >= rhead */ + /* cur must be outside rtail..head */ + if (cur > kring->rtail && cur < head) + goto error; + } } - if (a != avail) { - RD(5, "wrong but fixable avail have %d need %d", - avail, a); - ring->avail = avail = a; + if (ring->tail != kring->rtail) { + RD(5, "tail overwritten was %d need %d", + ring->tail, kring->rtail); + ring->tail = kring->rtail; } - return cur; + kring->rhead = head; + kring->rcur = cur; + return head; error: - RD(5, "kring error: hwcur %d hwres %d hwavail %d cur %d av %d", + RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d", + kring->name, kring->nr_hwcur, - kring->nr_hwreserved, kring->nr_hwavail, - cur, avail); + kring->rcur, kring->nr_hwtail, + cur, ring->tail); return n; } /* * validate parameters on entry for *_rxsync() - * Returns ring->cur - ring->reserved if ok, - * or something >= kring->nkr_num_slots - * in case of error. The extra argument is a pointer to - * 'resvd'. XXX this may be deprecated at some point. + * Returns ring->head if ok, kring->nkr_num_slots on error. * - * Below is a correct configuration on input. ring->cur and - * ring->reserved must be in the region covered by kring->hwavail, - * and ring->avail and kring->avail should end at the same slot. + * For a valid configuration, + * hwcur <= head <= cur <= tail <= hwtail * - * +-hwcur - * | - * v<-------hwavail----------> - * ---------+--------------------------+-------- ring - * |<--res-->| - * |<---avail---> - * +--cur + * We only consider head and cur. + * hwcur and hwtail are reliable. * */ u_int -nm_rxsync_prologue(struct netmap_kring *kring, u_int *resvd) +nm_rxsync_prologue(struct netmap_kring *kring) { struct netmap_ring *ring = kring->ring; - u_int cur = ring->cur; /* read only once */ - u_int avail = ring->avail; /* read only once */ - u_int res = ring->reserved; /* read only once */ - u_int n = kring->nkr_num_slots; - u_int kend = kring->nr_hwcur + kring->nr_hwavail; - u_int a; + uint32_t const n = kring->nkr_num_slots; + uint32_t head, cur; + ND("%s kc %d kt %d h %d c %d t %d", + kring->name, + kring->nr_hwcur, kring->nr_hwtail, + ring->head, ring->cur, ring->tail); + /* + * Before storing the new values, we should check they do not + * move backwards. However: + * - head is not an issue because the previous value is hwcur; + * - cur could in principle go back, however it does not matter + * because we are processing a brand new rxsync() + */ + cur = kring->rcur = ring->cur; /* read only once */ + head = kring->rhead = ring->head; /* read only once */ #if 1 /* kernel sanity checks */ - if (kring->nr_hwcur >= n || kring->nr_hwavail >= n) + if (kring->nr_hwcur >= n || kring->nr_hwtail >= n) goto error; #endif /* kernel sanity checks */ /* user sanity checks */ - if (res >= n) - goto error; - /* check that cur is valid, a is the expected value of avail */ - if (cur < kring->nr_hwcur) { - /* too low, but maybe wraparound */ - if (cur + n > kend) + if (kring->nr_hwtail >= kring->nr_hwcur) { + /* want hwcur <= rhead <= hwtail */ + if (head < kring->nr_hwcur || head > kring->nr_hwtail) goto error; - a = kend - (cur + n); - } else { - if (cur > kend) + /* and also rhead <= rcur <= hwtail */ + if (cur < head || cur > kring->nr_hwtail) goto error; - a = kend - cur; - } - if (a != avail) { - RD(5, "wrong but fixable avail have %d need %d", - avail, a); - ring->avail = avail = a; - } - if (res != 0) { - /* then repeat the check for cur + res */ - cur = (cur >= res) ? cur - res : n + cur - res; - if (cur < kring->nr_hwcur) { - /* too low, but maybe wraparound */ - if (cur + n > kend) - goto error; - } else if (cur > kend) { + } else { + /* we need rhead outside hwtail..hwcur */ + if (head < kring->nr_hwcur && head > kring->nr_hwtail) goto error; + /* two cases now: head <= hwtail or head >= hwcur */ + if (head <= kring->nr_hwtail) { + /* want head <= cur <= hwtail */ + if (cur < head || cur > kring->nr_hwtail) + goto error; + } else { + /* cur must be outside hwtail..head */ + if (cur < head && cur > kring->nr_hwtail) + goto error; } } - *resvd = res; - return cur; + if (ring->tail != kring->rtail) { + RD(5, "%s tail overwritten was %d need %d", + kring->name, + ring->tail, kring->rtail); + ring->tail = kring->rtail; + } + return head; error: - RD(5, "kring error: hwcur %d hwres %d hwavail %d cur %d av %d res %d", + RD(5, "kring error: hwcur %d rcur %d hwtail %d head %d cur %d tail %d", kring->nr_hwcur, - kring->nr_hwreserved, kring->nr_hwavail, - ring->cur, avail, res); + kring->rcur, kring->nr_hwtail, + kring->rhead, kring->rcur, ring->tail); return n; } + /* * Error routine called when txsync/rxsync detects an error. - * Can't do much more than resetting cur = hwcur, avail = hwavail. + * Can't do much more than resetting head =cur = hwcur, tail = hwtail * Return 1 on reinit. * * This routine is only called by the upper half of the kernel. * It only reads hwcur (which is changed only by the upper half, too) - * and hwavail (which may be changed by the lower half, but only on + * and hwtail (which may be changed by the lower half, but only on * a tx ring and only to increase it, so any error will be recovered * on the next call). For the above, we don't strictly need to call * it under lock. @@ -1274,36 +1324,38 @@ netmap_ring_reinit(struct netmap_kring *kring) // XXX KASSERT nm_kr_tryget RD(10, "called for %s", NM_IFPNAME(kring->na->ifp)); + // XXX probably wrong to trust userspace + kring->rhead = ring->head; + kring->rcur = ring->cur; + kring->rtail = ring->tail; + if (ring->cur > lim) errors++; + if (ring->head > lim) + errors++; + if (ring->tail > lim) + errors++; for (i = 0; i <= lim; i++) { u_int idx = ring->slot[i].buf_idx; u_int len = ring->slot[i].len; if (idx < 2 || idx >= netmap_total_buffers) { - if (!errors++) - D("bad buffer at slot %d idx %d len %d ", i, idx, len); + RD(5, "bad index at slot %d idx %d len %d ", i, idx, len); ring->slot[i].buf_idx = 0; ring->slot[i].len = 0; } else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) { ring->slot[i].len = 0; - if (!errors++) - D("bad len %d at slot %d idx %d", - len, i, idx); + RD(5, "bad len at slot %d idx %d len %d", i, idx, len); } } if (errors) { - int pos = kring - kring->na->tx_rings; - int n = kring->na->num_tx_rings + 1; - RD(10, "total %d errors", errors); - errors++; - RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d", - NM_IFPNAME(kring->na->ifp), - pos < n ? "TX" : "RX", pos < n ? pos : pos - n, + RD(10, "%s reinit, cur %d -> %d tail %d -> %d", + kring->name, ring->cur, kring->nr_hwcur, - ring->avail, kring->nr_hwavail); - ring->cur = kring->nr_hwcur; - ring->avail = kring->nr_hwavail; + ring->tail, kring->nr_hwtail); + ring->head = kring->rhead = kring->nr_hwcur; + ring->cur = kring->rcur = kring->nr_hwcur; + ring->tail = kring->rtail = kring->nr_hwtail; } return (errors ? 1 : 0); } @@ -1436,7 +1488,6 @@ out: * - NIOCGINFO * - SIOCGIFADDR just for convenience * - NIOCREGIF - * - NIOCUNREGIF * - NIOCTXSYNC * - NIOCRXSYNC * @@ -1472,6 +1523,17 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, } while (0) #endif /* linux */ + if (cmd == NIOCGINFO || cmd == NIOCREGIF) { + /* truncate name */ + nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; + if (nmr->nr_version != NETMAP_API) { + D("API mismatch for %s got %d need %d", + nmr->nr_name, + nmr->nr_version, NETMAP_API); + nmr->nr_version = NETMAP_API; + return EINVAL; + } + } CURVNET_SET(TD_TO_VNET(td)); error = devfs_get_cdevpriv((void **)&priv); @@ -1482,16 +1544,8 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, return (error == ENOENT ? ENXIO : error); } - nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; /* truncate name */ switch (cmd) { case NIOCGINFO: /* return capabilities etc */ - if (nmr->nr_version != NETMAP_API) { - D("API mismatch got %d have %d", - nmr->nr_version, NETMAP_API); - nmr->nr_version = NETMAP_API; - error = EINVAL; - break; - } if (nmr->nr_cmd == NETMAP_BDG_LIST) { error = netmap_bdg_ctl(nmr, NULL); break; @@ -1531,11 +1585,6 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, break; case NIOCREGIF: - if (nmr->nr_version != NETMAP_API) { - nmr->nr_version = NETMAP_API; - error = EINVAL; - break; - } /* possibly attach/detach NIC and VALE switch */ i = nmr->nr_cmd; if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH @@ -1593,12 +1642,6 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, NMG_UNLOCK(); break; - case NIOCUNREGIF: - // XXX we have no data here ? - D("deprecated, data is %p", nmr); - error = EINVAL; - break; - case NIOCTXSYNC: case NIOCRXSYNC: nifp = priv->np_nifp; @@ -1649,7 +1692,11 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, D("pre txsync ring %d cur %d hwcur %d", i, kring->ring->cur, kring->nr_hwcur); - na->nm_txsync(na, i, NAF_FORCE_RECLAIM); + if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) { + netmap_ring_reinit(kring); + } else { + na->nm_txsync(na, i, NAF_FORCE_RECLAIM); + } if (netmap_verbose & NM_VERB_TXSYNC) D("post txsync ring %d cur %d hwcur %d", i, kring->ring->cur, @@ -1726,8 +1773,8 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) struct ifnet *ifp; struct netmap_kring *kring; u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0; - u_int lim_tx, lim_rx, host_forwarded = 0; - struct mbq q; + u_int lim_tx, lim_rx; + struct mbq q; /* packets from hw queues to host stack */ void *pwait = dev; /* linux compatibility */ /* @@ -1735,7 +1782,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) * txsync and rxsync if we decide to do a selrecord(). * retry_tx (and retry_rx, later) prevent looping forever. */ - int retry_tx = 1; + int retry_tx = 1, retry_rx = 1; (void)pwait; mbq_init(&q); @@ -1769,6 +1816,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) lim_rx = na->num_rx_rings; if (priv->np_qfirst == NETMAP_SW_RING) { + // XXX locking ? /* handle the host stack ring */ if (priv->np_txpoll || want_tx) { /* push any packets up, then we are always ready */ @@ -1777,29 +1825,15 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) } if (want_rx) { kring = &na->rx_rings[lim_rx]; - if (kring->ring->avail == 0) + /* XXX replace with rxprologue etc. */ + if (nm_ring_empty(kring->ring)) netmap_rxsync_from_host(na, td, dev); - if (kring->ring->avail > 0) { + if (!nm_ring_empty(kring->ring)) revents |= want_rx; - } } return (revents); } - /* - * If we are in transparent mode, check also the host rx ring - * XXX Transparent mode at the moment requires to bind all - * rings to a single file descriptor. - */ - kring = &na->rx_rings[lim_rx]; - if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all - && want_rx - && (netmap_fwd || kring->ring->flags & NR_FORWARD) ) { - if (kring->ring->avail == 0) - netmap_rxsync_from_host(na, td, dev); - if (kring->ring->avail > 0) - revents |= want_rx; - } /* * check_all_{tx|rx} are set if the card has more than one queue AND @@ -1825,81 +1859,71 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) * We start with a lock free round which is cheap if we have * slots available. If this fails, then lock and call the sync * routines. - * XXX rather than ring->avail >0 should check that - * ring->cur has not reached hwcur+hwavail */ for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) { kring = &na->rx_rings[i]; - if (kring->ring->avail > 0) { + /* XXX compare ring->cur and kring->tail */ + if (!nm_ring_empty(kring->ring)) { revents |= want_rx; want_rx = 0; /* also breaks the loop */ } } for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) { kring = &na->tx_rings[i]; - if (kring->ring->avail > 0) { + /* XXX compare ring->cur and kring->tail */ + if (!nm_ring_empty(kring->ring)) { revents |= want_tx; want_tx = 0; /* also breaks the loop */ } } /* - * If we to push packets out (priv->np_txpoll) or want_tx is - * still set, we do need to run the txsync calls (on all rings, - * to avoid that the tx rings stall). + * If we want to push packets out (priv->np_txpoll) or + * want_tx is still set, we must issue txsync calls + * (on all rings, to avoid that the tx rings stall). * XXX should also check cur != hwcur on the tx rings. * Fortunately, normal tx mode has np_txpoll set. */ if (priv->np_txpoll || want_tx) { - /* If we really want to be woken up (want_tx), - * do a selrecord, either on the global or on - * the private structure. Then issue the txsync - * so there is no race in the selrecord/selwait + /* + * The first round checks if anyone is ready, if not + * do a selrecord and another round to handle races. + * want_tx goes to 0 if any space is found, and is + * used to skip rings with no pending transmissions. */ flush_tx: for (i = priv->np_qfirst; i < lim_tx; i++) { + int found = 0; + kring = &na->tx_rings[i]; - /* - * Skip this ring if want_tx == 0 - * (we have already done a successful sync on - * a previous ring) AND kring->cur == kring->hwcur - * (there are no pending transmissions for this ring). - */ if (!want_tx && kring->ring->cur == kring->nr_hwcur) continue; - /* make sure only one user thread is doing this */ + /* only one thread does txsync */ if (nm_kr_tryget(kring)) { - ND("ring %p busy is %d", - kring, (int)kring->nr_busy); - revents |= POLLERR; - goto out; + D("%p lost race on txring %d, ok", priv, i); + continue; } - - if (netmap_verbose & NM_VERB_TXSYNC) - D("send %d on %s %d", - kring->ring->cur, NM_IFPNAME(ifp), i); - if (na->nm_txsync(na, i, 0)) + if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) { + netmap_ring_reinit(kring); revents |= POLLERR; + } else { + if (na->nm_txsync(na, i, 0)) + revents |= POLLERR; + } - /* Check avail and call selrecord only if - * called with POLLOUT and run out of bufs. - * XXX Note, we cannot trust much ring->avail - * as it is exposed to userspace (even though - * just updated by txsync). We should really - * check kring->nr_hwavail or better have - * txsync set a flag telling if we need - * to do a selrecord(). + /* + * If we found new slots, notify potential + * listeners on the same ring. + * Since we just did a txsync, look at the copies + * of cur,tail in the kring. */ - if (want_tx) { - if (kring->ring->avail > 0) { - /* stop at the first ring. We don't risk - * starvation. - */ - revents |= want_tx; - want_tx = 0; - } - } + found = kring->rcur != kring->rtail; nm_kr_put(kring); + if (found) { /* notify other listeners */ + revents |= want_tx; + want_tx = 0; + na->nm_notify(na, i, NR_TX, NAF_GLOBAL_NOTIFY); + } } if (want_tx && retry_tx) { selrecord(td, check_all_tx ? @@ -1910,21 +1934,27 @@ flush_tx: } /* - * now if want_rx is still set we need to lock and rxsync. + * If want_rx is still set scan receive rings. * Do it on all rings because otherwise we starve. */ if (want_rx) { - int retry_rx = 1; + int send_down = 0; /* transparent mode */ + /* two rounds here to for race avoidance */ do_retry_rx: for (i = priv->np_qfirst; i < lim_rx; i++) { + int found = 0; + kring = &na->rx_rings[i]; if (nm_kr_tryget(kring)) { - revents |= POLLERR; - goto out; + D("%p lost race on rxring %d, ok", priv, i); + continue; } - /* XXX NR_FORWARD should only be read on + /* + * transparent mode support: collect packets + * from the rxring(s). + * XXX NR_FORWARD should only be read on * physical or NIC ports */ if (netmap_fwd ||kring->ring->flags & NR_FORWARD) { @@ -1939,49 +1969,65 @@ do_retry_rx: kring->ring->flags & NR_TIMESTAMP) { microtime(&kring->ring->ts); } - - if (kring->ring->avail > 0) { + /* after an rxsync we can use kring->rcur, rtail */ + found = kring->rcur != kring->rtail; + nm_kr_put(kring); + if (found) { revents |= want_rx; retry_rx = 0; + na->nm_notify(na, i, NR_RX, NAF_GLOBAL_NOTIFY); } - nm_kr_put(kring); } - if (retry_rx) { - retry_rx = 0; + + /* transparent mode XXX only during first pass ? */ + kring = &na->rx_rings[lim_rx]; + if (check_all_rx + && (netmap_fwd || kring->ring->flags & NR_FORWARD)) { + /* XXX fix to use kring fields */ + if (nm_ring_empty(kring->ring)) + send_down = netmap_rxsync_from_host(na, td, dev); + if (!nm_ring_empty(kring->ring)) + revents |= want_rx; + } + + if (retry_rx) selrecord(td, check_all_rx ? &na->rx_si : &na->rx_rings[priv->np_qfirst].si); - goto do_retry_rx; + if (send_down > 0 || retry_rx) { + retry_rx = 0; + if (send_down) + goto flush_tx; /* and retry_rx */ + else + goto do_retry_rx; } } - /* forward host to the netmap ring. - * I am accessing nr_hwavail without lock, but netmap_transmit - * can only increment it, so the operation is safe. + /* + * Transparent mode: marked bufs on rx rings between + * kring->nr_hwcur and ring->head + * are passed to the other endpoint. + * + * In this mode we also scan the sw rxring, which in + * turn passes packets up. + * + * XXX Transparent mode at the moment requires to bind all + * rings to a single file descriptor. */ - kring = &na->rx_rings[lim_rx]; - if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all - && (netmap_fwd || kring->ring->flags & NR_FORWARD) - && kring->nr_hwavail > 0 && !host_forwarded) { - netmap_sw_to_nic(na); - host_forwarded = 1; /* prevent another pass */ - want_rx = 0; - goto flush_tx; - } if (q.head) netmap_send_up(na->ifp, &q); -out: - return (revents); } -/*------- driver support routines ------*/ + +/*-------------------- driver support routines -------------------*/ static int netmap_hw_krings_create(struct netmap_adapter *); static int -netmap_notify(struct netmap_adapter *na, u_int n_ring, enum txrx tx, int flags) +netmap_notify(struct netmap_adapter *na, u_int n_ring, + enum txrx tx, int flags) { struct netmap_kring *kring; @@ -2012,10 +2058,18 @@ netmap_attach_common(struct netmap_adapter *na) return EINVAL; } WNA(ifp) = na; + + /* the following is only needed for na that use the host port. + * XXX do we have something similar for linux ? + */ +#ifdef __FreeBSD__ + na->if_input = ifp->if_input; /* for netmap_send_up */ +#endif /* __FreeBSD__ */ + NETMAP_SET_CAPABLE(ifp); if (na->nm_krings_create == NULL) { na->nm_krings_create = netmap_hw_krings_create; - na->nm_krings_delete = netmap_krings_delete; + na->nm_krings_delete = netmap_hw_krings_delete; } if (na->nm_notify == NULL) na->nm_notify = netmap_notify; @@ -2051,12 +2105,8 @@ netmap_detach_common(struct netmap_adapter *na) * of hardware rings): * krings 0..N-1 are for the hardware queues. * kring N is for the host stack queue - * kring N+1 is only used for the selinfo for all queues. + * kring N+1 is only used for the selinfo for all queues. // XXX still true ? * Return 0 on success, ENOMEM otherwise. - * - * By default the receive and transmit adapter ring counts are both initialized - * to num_queues. na->num_tx_rings can be set for cards with different tx/rx - * setups. */ int netmap_attach(struct netmap_adapter *arg) @@ -2132,8 +2182,14 @@ NM_DBG(netmap_adapter_put)(struct netmap_adapter *na) int netmap_hw_krings_create(struct netmap_adapter *na) { - return netmap_krings_create(na, + int ret = netmap_krings_create(na, na->num_tx_rings + 1, na->num_rx_rings + 1, 0); + if (ret == 0) { + /* initialize the mbq for the sw rx ring */ + mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue); + ND("initialized sw rx queue %d", na->num_rx_rings); + } + return ret; } @@ -2162,6 +2218,10 @@ netmap_detach(struct ifnet *ifp) /* * Intercept packets from the network stack and pass them * to netmap as incoming packets on the 'software' ring. + * + * We only store packets in a bounded mbq and then copy them + * in the relevant rxsync routine. + * * We rely on the OS to make sure that the ifp and na do not go * away (typically the caller checks for IFF_DRV_RUNNING or the like). * In nm_register() or whenever there is a reinitialization, @@ -2172,63 +2232,60 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m) { struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring; - u_int i, len = MBUF_LEN(m); - u_int error = EBUSY, lim; - struct netmap_slot *slot; + u_int len = MBUF_LEN(m); + u_int error = ENOBUFS; + struct mbq *q; + int space; // XXX [Linux] we do not need this lock // if we follow the down/configure/up protocol -gl // mtx_lock(&na->core_lock); + if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) { - /* interface not in netmap mode anymore */ + D("%s not in netmap mode anymore", NM_IFPNAME(ifp)); error = ENXIO; goto done; } kring = &na->rx_rings[na->num_rx_rings]; - lim = kring->nkr_num_slots - 1; - if (netmap_verbose & NM_VERB_HOST) - D("%s packet %d len %d from the stack", NM_IFPNAME(ifp), - kring->nr_hwcur + kring->nr_hwavail, len); + q = &kring->rx_queue; + // XXX reconsider long packets if we handle fragments if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */ D("%s from_host, drop packet size %d > %d", NM_IFPNAME(ifp), len, NETMAP_BDG_BUF_SIZE(na->nm_mem)); goto done; } - /* protect against other instances of netmap_transmit, - * and userspace invocations of rxsync(). + + /* protect against rxsync_from_host(), netmap_sw_to_nic() + * and maybe other instances of netmap_transmit (the latter + * not possible on Linux). + * Also avoid overflowing the queue. */ - // XXX [Linux] there can be no other instances of netmap_transmit - // on this same ring, but we still need this lock to protect - // concurrent access from netmap_sw_to_nic() -gl - mtx_lock(&kring->q_lock); - if (kring->nr_hwavail >= lim) { - if (netmap_verbose) - D("stack ring %s full\n", NM_IFPNAME(ifp)); + mtx_lock(&q->lock); + + space = kring->nr_hwtail - kring->nr_hwcur; + if (space < 0) + space += kring->nkr_num_slots; + if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX + RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p", + NM_IFPNAME(ifp), kring->nr_hwcur, kring->nr_hwtail, mbq_len(q), + len, m); } else { - /* compute the insert position */ - i = nm_kr_rxpos(kring); - slot = &kring->ring->slot[i]; - m_copydata(m, 0, (int)len, BDG_NMB(na, slot)); - slot->len = len; - slot->flags = kring->nkr_slot_flags; - kring->nr_hwavail++; - if (netmap_verbose & NM_VERB_HOST) - D("wake up host ring %s %d", NM_IFPNAME(na->ifp), na->num_rx_rings); - na->nm_notify(na, na->num_rx_rings, NR_RX, 0); + mbq_enqueue(q, m); + ND(10, "%s %d bufs in queue len %d m %p", + NM_IFPNAME(ifp), mbq_len(q), len, m); + /* notify outside the lock */ + m = NULL; error = 0; } - mtx_unlock(&kring->q_lock); + mtx_unlock(&q->lock); done: - // mtx_unlock(&na->core_lock); - - /* release the mbuf in either cases of success or failure. As an - * alternative, put the mbuf in a free list and free the list - * only when really necessary. - */ - m_freem(m); + if (m) + m_freem(m); + /* unconditionally wake up listeners */ + na->nm_notify(na, na->num_rx_rings, NR_RX, 0); return (error); } @@ -2267,27 +2324,32 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, if (n >= na->num_tx_rings) return NULL; kring = na->tx_rings + n; + // XXX check whether we should use hwcur or rcur new_hwofs = kring->nr_hwcur - new_cur; } else { if (n >= na->num_rx_rings) return NULL; kring = na->rx_rings + n; - new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur; + new_hwofs = kring->nr_hwtail - new_cur; } lim = kring->nkr_num_slots - 1; if (new_hwofs > lim) new_hwofs -= lim + 1; /* Always set the new offset value and realign the ring. */ - D("%s hwofs %d -> %d, hwavail %d -> %d", - tx == NR_TX ? "TX" : "RX", + if (netmap_verbose) + D("%s %s%d hwofs %d -> %d, hwtail %d -> %d", + NM_IFPNAME(na->ifp), + tx == NR_TX ? "TX" : "RX", n, kring->nkr_hwofs, new_hwofs, - kring->nr_hwavail, - tx == NR_TX ? lim : kring->nr_hwavail); + kring->nr_hwtail, + tx == NR_TX ? lim : kring->nr_hwtail); kring->nkr_hwofs = new_hwofs; - if (tx == NR_TX) - kring->nr_hwavail = lim; - kring->nr_hwreserved = 0; + if (tx == NR_TX) { + kring->nr_hwtail = kring->nr_hwcur + lim; + if (kring->nr_hwtail > lim) + kring->nr_hwtail -= lim + 1; + } #if 0 // def linux /* XXX check that the mappings are correct */ @@ -2351,6 +2413,7 @@ netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done) } } + /* * Default functions to handle rx/tx interrupts from a physical device. * "work_done" is non-null on the RX path, NULL for the TX path. @@ -2397,6 +2460,7 @@ netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done) static struct cdev *netmap_dev; /* /dev/netmap character device. */ extern struct cdevsw netmap_cdevsw; + void netmap_fini(void) { @@ -2408,6 +2472,7 @@ netmap_fini(void) printf("netmap: unloaded module.\n"); } + int netmap_init(void) { diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index c2814146d2ef..6716168526dc 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Universita` di Pisa. All rights reserved. + * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -86,21 +86,31 @@ netmap_catch_rx(struct netmap_adapter *na, int intercept) return 0; } + /* * Intercept the packet steering routine in the tx path, * so that we can decide which queue is used for an mbuf. * Second argument is non-zero to intercept, 0 to restore. * + * actually we also need to redirect the if_transmit ? + * * XXX see if FreeBSD has such a mechanism */ void -netmap_catch_packet_steering(struct netmap_generic_adapter *na, int enable) +netmap_catch_tx(struct netmap_generic_adapter *gna, int enable) { + struct netmap_adapter *na = &gna->up.up; + struct ifnet *ifp = na->ifp; + if (enable) { + na->if_transmit = ifp->if_transmit; + ifp->if_transmit = netmap_transmit; } else { + ifp->if_transmit = na->if_transmit; } } + /* Transmit routine used by generic_netmap_txsync(). Returns 0 on success * and non-zero on error (which may be packet drops or other errors). * addr and len identify the netmap buffer, m is the (preallocated) @@ -126,16 +136,16 @@ generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, // copy data to the mbuf m_copyback(m, 0, len, addr); - // inc refcount. We are alone, so we can skip the atomic atomic_fetchadd_int(m->m_ext.ref_cnt, 1); m->m_flags |= M_FLOWID; m->m_pkthdr.flowid = ring_nr; m->m_pkthdr.rcvif = ifp; /* used for tx notification */ - ret = ifp->if_transmit(ifp, m); + ret = NA(ifp)->if_transmit(ifp, m); return ret; } + /* * The following two functions are empty until we have a generic * way to extract the info from the ifp @@ -147,6 +157,7 @@ generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) return 0; } + void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) { @@ -155,6 +166,7 @@ generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) *rxq = 1; } + void netmap_mitigation_init(struct netmap_generic_adapter *na) { ND("called"); @@ -167,22 +179,26 @@ void netmap_mitigation_start(struct netmap_generic_adapter *na) ND("called"); } + void netmap_mitigation_restart(struct netmap_generic_adapter *na) { ND("called"); } + int netmap_mitigation_active(struct netmap_generic_adapter *na) { ND("called"); return 0; } + void netmap_mitigation_cleanup(struct netmap_generic_adapter *na) { ND("called"); } + /* * In order to track whether pages are still mapped, we hook into * the standard cdev_pager and intercept the constructor and @@ -194,6 +210,7 @@ struct netmap_vm_handle_t { struct netmap_priv_d *priv; }; + static int netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) @@ -218,6 +235,7 @@ netmap_dev_pager_dtor(void *handle) dev_rel(dev); } + static int netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c index 2c42db3f8862..109a734cac9f 100644 --- a/sys/dev/netmap/netmap_generic.c +++ b/sys/dev/netmap/netmap_generic.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Universita` di Pisa. All rights reserved. + * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -82,7 +82,7 @@ __FBSDID("$FreeBSD$"); #include <dev/netmap/netmap_mem2.h> #define rtnl_lock() D("rtnl_lock called"); -#define rtnl_unlock() D("rtnl_lock called"); +#define rtnl_unlock() D("rtnl_unlock called"); #define MBUF_TXQ(m) ((m)->m_pkthdr.flowid) #define smp_mb() @@ -101,9 +101,9 @@ __FBSDID("$FreeBSD$"); * (or reinstall the buffer ?) */ #define SET_MBUF_DESTRUCTOR(m, fn) do { \ - (m)->m_ext.ext_free = (void *)fn; \ - (m)->m_ext.ext_type = EXT_EXTREF; \ - } while (0) + (m)->m_ext.ext_free = (void *)fn; \ + (m)->m_ext.ext_type = EXT_EXTREF; \ +} while (0) #define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1) @@ -137,43 +137,43 @@ __FBSDID("$FreeBSD$"); #ifdef RATE #define IFRATE(x) x struct rate_stats { - unsigned long txpkt; - unsigned long txsync; - unsigned long txirq; - unsigned long rxpkt; - unsigned long rxirq; - unsigned long rxsync; + unsigned long txpkt; + unsigned long txsync; + unsigned long txirq; + unsigned long rxpkt; + unsigned long rxirq; + unsigned long rxsync; }; struct rate_context { - unsigned refcount; - struct timer_list timer; - struct rate_stats new; - struct rate_stats old; + unsigned refcount; + struct timer_list timer; + struct rate_stats new; + struct rate_stats old; }; #define RATE_PRINTK(_NAME_) \ - printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD); + printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD); #define RATE_PERIOD 2 static void rate_callback(unsigned long arg) { - struct rate_context * ctx = (struct rate_context *)arg; - struct rate_stats cur = ctx->new; - int r; - - RATE_PRINTK(txpkt); - RATE_PRINTK(txsync); - RATE_PRINTK(txirq); - RATE_PRINTK(rxpkt); - RATE_PRINTK(rxsync); - RATE_PRINTK(rxirq); - printk("\n"); - - ctx->old = cur; - r = mod_timer(&ctx->timer, jiffies + - msecs_to_jiffies(RATE_PERIOD * 1000)); - if (unlikely(r)) - D("[v1000] Error: mod_timer()"); + struct rate_context * ctx = (struct rate_context *)arg; + struct rate_stats cur = ctx->new; + int r; + + RATE_PRINTK(txpkt); + RATE_PRINTK(txsync); + RATE_PRINTK(txirq); + RATE_PRINTK(rxpkt); + RATE_PRINTK(rxsync); + RATE_PRINTK(rxirq); + printk("\n"); + + ctx->old = cur; + r = mod_timer(&ctx->timer, jiffies + + msecs_to_jiffies(RATE_PERIOD * 1000)); + if (unlikely(r)) + D("[v1000] Error: mod_timer()"); } static struct rate_context rate_ctx; @@ -197,150 +197,150 @@ netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done) if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP))) return; - netmap_common_irq(ifp, q, work_done); + netmap_common_irq(ifp, q, work_done); } /* Enable/disable netmap mode for a generic network interface. */ -int generic_netmap_register(struct netmap_adapter *na, int enable) +static int +generic_netmap_register(struct netmap_adapter *na, int enable) { - struct ifnet *ifp = na->ifp; - struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; - struct mbuf *m; - int error; - int i, r; + struct ifnet *ifp = na->ifp; + struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; + struct mbuf *m; + int error; + int i, r; - if (!na) - return EINVAL; + if (!na) + return EINVAL; #ifdef REG_RESET - error = ifp->netdev_ops->ndo_stop(ifp); - if (error) { - return error; - } + error = ifp->netdev_ops->ndo_stop(ifp); + if (error) { + return error; + } #endif /* REG_RESET */ - if (enable) { /* Enable netmap mode. */ - /* Initialize the rx queue, as generic_rx_handler() can - * be called as soon as netmap_catch_rx() returns. - */ - for (r=0; r<na->num_rx_rings; r++) { - mbq_safe_init(&na->rx_rings[r].rx_queue); - na->rx_rings[r].nr_ntc = 0; - } - - /* Init the mitigation timer. */ - netmap_mitigation_init(gna); - - /* - * Preallocate packet buffers for the tx rings. - */ - for (r=0; r<na->num_tx_rings; r++) { - na->tx_rings[r].nr_ntc = 0; - na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *), - M_DEVBUF, M_NOWAIT | M_ZERO); - if (!na->tx_rings[r].tx_pool) { - D("tx_pool allocation failed"); - error = ENOMEM; - goto free_tx_pool; - } - for (i=0; i<na->num_tx_desc; i++) { - m = netmap_get_mbuf(GENERIC_BUF_SIZE); - if (!m) { - D("tx_pool[%d] allocation failed", i); - error = ENOMEM; - goto free_mbufs; - } - na->tx_rings[r].tx_pool[i] = m; - } - } - rtnl_lock(); - /* Prepare to intercept incoming traffic. */ - error = netmap_catch_rx(na, 1); - if (error) { - D("netdev_rx_handler_register() failed"); - goto register_handler; - } - ifp->if_capenable |= IFCAP_NETMAP; - - /* Make netmap control the packet steering. */ - netmap_catch_packet_steering(gna, 1); - - rtnl_unlock(); + if (enable) { /* Enable netmap mode. */ + /* Initialize the rx queue, as generic_rx_handler() can + * be called as soon as netmap_catch_rx() returns. + */ + for (r=0; r<na->num_rx_rings; r++) { + mbq_safe_init(&na->rx_rings[r].rx_queue); + } + + /* Init the mitigation timer. */ + netmap_mitigation_init(gna); + + /* + * Preallocate packet buffers for the tx rings. + */ + for (r=0; r<na->num_tx_rings; r++) + na->tx_rings[r].tx_pool = NULL; + for (r=0; r<na->num_tx_rings; r++) { + na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *), + M_DEVBUF, M_NOWAIT | M_ZERO); + if (!na->tx_rings[r].tx_pool) { + D("tx_pool allocation failed"); + error = ENOMEM; + goto free_tx_pools; + } + for (i=0; i<na->num_tx_desc; i++) + na->tx_rings[r].tx_pool[i] = NULL; + for (i=0; i<na->num_tx_desc; i++) { + m = netmap_get_mbuf(GENERIC_BUF_SIZE); + if (!m) { + D("tx_pool[%d] allocation failed", i); + error = ENOMEM; + goto free_tx_pools; + } + na->tx_rings[r].tx_pool[i] = m; + } + } + rtnl_lock(); + /* Prepare to intercept incoming traffic. */ + error = netmap_catch_rx(na, 1); + if (error) { + D("netdev_rx_handler_register() failed"); + goto register_handler; + } + ifp->if_capenable |= IFCAP_NETMAP; + + /* Make netmap control the packet steering. */ + netmap_catch_tx(gna, 1); + + rtnl_unlock(); #ifdef RATE - if (rate_ctx.refcount == 0) { - D("setup_timer()"); - memset(&rate_ctx, 0, sizeof(rate_ctx)); - setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); - if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { - D("Error: mod_timer()"); - } - } - rate_ctx.refcount++; + if (rate_ctx.refcount == 0) { + D("setup_timer()"); + memset(&rate_ctx, 0, sizeof(rate_ctx)); + setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); + if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { + D("Error: mod_timer()"); + } + } + rate_ctx.refcount++; #endif /* RATE */ - } else { /* Disable netmap mode. */ - rtnl_lock(); + } else { /* Disable netmap mode. */ + rtnl_lock(); - ifp->if_capenable &= ~IFCAP_NETMAP; + ifp->if_capenable &= ~IFCAP_NETMAP; - /* Release packet steering control. */ - netmap_catch_packet_steering(gna, 0); + /* Release packet steering control. */ + netmap_catch_tx(gna, 0); - /* Do not intercept packets on the rx path. */ - netmap_catch_rx(na, 0); + /* Do not intercept packets on the rx path. */ + netmap_catch_rx(na, 0); - rtnl_unlock(); + rtnl_unlock(); - /* Free the mbufs going to the netmap rings */ - for (r=0; r<na->num_rx_rings; r++) { - mbq_safe_purge(&na->rx_rings[r].rx_queue); - mbq_safe_destroy(&na->rx_rings[r].rx_queue); - } + /* Free the mbufs going to the netmap rings */ + for (r=0; r<na->num_rx_rings; r++) { + mbq_safe_purge(&na->rx_rings[r].rx_queue); + mbq_safe_destroy(&na->rx_rings[r].rx_queue); + } - netmap_mitigation_cleanup(gna); + netmap_mitigation_cleanup(gna); - for (r=0; r<na->num_tx_rings; r++) { - for (i=0; i<na->num_tx_desc; i++) { - m_freem(na->tx_rings[r].tx_pool[i]); - } - free(na->tx_rings[r].tx_pool, M_DEVBUF); - } + for (r=0; r<na->num_tx_rings; r++) { + for (i=0; i<na->num_tx_desc; i++) { + m_freem(na->tx_rings[r].tx_pool[i]); + } + free(na->tx_rings[r].tx_pool, M_DEVBUF); + } #ifdef RATE - if (--rate_ctx.refcount == 0) { - D("del_timer()"); - del_timer(&rate_ctx.timer); - } + if (--rate_ctx.refcount == 0) { + D("del_timer()"); + del_timer(&rate_ctx.timer); + } #endif - } + } #ifdef REG_RESET - error = ifp->netdev_ops->ndo_open(ifp); - if (error) { - goto alloc_tx_pool; - } + error = ifp->netdev_ops->ndo_open(ifp); + if (error) { + goto alloc_tx_pool; + } #endif - return 0; + return 0; register_handler: - rtnl_unlock(); -free_tx_pool: - r--; - i = na->num_tx_desc; /* Useless, but just to stay safe. */ -free_mbufs: - i--; - for (; r>=0; r--) { - for (; i>=0; i--) { - m_freem(na->tx_rings[r].tx_pool[i]); - } - free(na->tx_rings[r].tx_pool, M_DEVBUF); - i = na->num_tx_desc - 1; - } - - return error; + rtnl_unlock(); +free_tx_pools: + for (r=0; r<na->num_tx_rings; r++) { + if (na->tx_rings[r].tx_pool == NULL) + continue; + for (i=0; i<na->num_tx_desc; i++) + if (na->tx_rings[r].tx_pool[i]) + m_freem(na->tx_rings[r].tx_pool[i]); + free(na->tx_rings[r].tx_pool, M_DEVBUF); + } + + return error; } /* @@ -351,93 +351,88 @@ free_mbufs: static void generic_mbuf_destructor(struct mbuf *m) { - if (netmap_verbose) - D("Tx irq (%p) queue %d", m, MBUF_TXQ(m)); - netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL); + if (netmap_verbose) + D("Tx irq (%p) queue %d", m, MBUF_TXQ(m)); + netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL); #ifdef __FreeBSD__ - m->m_ext.ext_type = EXT_PACKET; - m->m_ext.ext_free = NULL; - if (*(m->m_ext.ref_cnt) == 0) - *(m->m_ext.ref_cnt) = 1; - uma_zfree(zone_pack, m); + m->m_ext.ext_type = EXT_PACKET; + m->m_ext.ext_free = NULL; + if (*(m->m_ext.ref_cnt) == 0) + *(m->m_ext.ref_cnt) = 1; + uma_zfree(zone_pack, m); #endif /* __FreeBSD__ */ - IFRATE(rate_ctx.new.txirq++); + IFRATE(rate_ctx.new.txirq++); } -/* Record completed transmissions and update hwavail. +/* Record completed transmissions and update hwtail. * - * nr_ntc is the oldest tx buffer not yet completed - * (same as nr_hwavail + nr_hwcur + 1), + * The oldest tx buffer not yet completed is at nr_hwtail + 1, * nr_hwcur is the first unsent buffer. - * When cleaning, we try to recover buffers between nr_ntc and nr_hwcur. */ -static int +static u_int generic_netmap_tx_clean(struct netmap_kring *kring) { - u_int num_slots = kring->nkr_num_slots; - u_int ntc = kring->nr_ntc; - u_int hwcur = kring->nr_hwcur; - u_int n = 0; - struct mbuf **tx_pool = kring->tx_pool; - - while (ntc != hwcur) { /* buffers not completed */ - struct mbuf *m = tx_pool[ntc]; - - if (unlikely(m == NULL)) { - /* try to replenish the entry */ - tx_pool[ntc] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); - if (unlikely(m == NULL)) { - D("mbuf allocation failed, XXX error"); - // XXX how do we proceed ? break ? - return -ENOMEM; - } - } else if (GET_MBUF_REFCNT(m) != 1) { - break; /* This mbuf is still busy: its refcnt is 2. */ + u_int const lim = kring->nkr_num_slots - 1; + u_int nm_i = nm_next(kring->nr_hwtail, lim); + u_int hwcur = kring->nr_hwcur; + u_int n = 0; + struct mbuf **tx_pool = kring->tx_pool; + + while (nm_i != hwcur) { /* buffers not completed */ + struct mbuf *m = tx_pool[nm_i]; + + if (unlikely(m == NULL)) { + /* this is done, try to replenish the entry */ + tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); + if (unlikely(m == NULL)) { + D("mbuf allocation failed, XXX error"); + // XXX how do we proceed ? break ? + return -ENOMEM; + } + } else if (GET_MBUF_REFCNT(m) != 1) { + break; /* This mbuf is still busy: its refcnt is 2. */ + } + n++; + nm_i = nm_next(nm_i, lim); } - if (unlikely(++ntc == num_slots)) { - ntc = 0; - } - n++; - } - kring->nr_ntc = ntc; - kring->nr_hwavail += n; - ND("tx completed [%d] -> hwavail %d", n, kring->nr_hwavail); - - return n; + kring->nr_hwtail = nm_prev(nm_i, lim); + ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail); + + return n; } /* - * We have pending packets in the driver between nr_ntc and j. + * We have pending packets in the driver between nr_hwtail +1 and hwcur. * Compute a position in the middle, to be used to generate * a notification. */ static inline u_int generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur) { - u_int n = kring->nkr_num_slots; - u_int ntc = kring->nr_ntc; - u_int e; - - if (hwcur >= ntc) { - e = (hwcur + ntc) / 2; - } else { /* wrap around */ - e = (hwcur + n + ntc) / 2; - if (e >= n) { - e -= n; - } - } - - if (unlikely(e >= n)) { - D("This cannot happen"); - e = 0; - } - - return e; + u_int n = kring->nkr_num_slots; + u_int ntc = nm_next(kring->nr_hwtail, n-1); + u_int e; + + if (hwcur >= ntc) { + e = (hwcur + ntc) / 2; + } else { /* wrap around */ + e = (hwcur + n + ntc) / 2; + if (e >= n) { + e -= n; + } + } + + if (unlikely(e >= n)) { + D("This cannot happen"); + e = 0; + } + + return e; } /* - * We have pending packets in the driver between nr_ntc and hwcur. + * We have pending packets in the driver between nr_hwtail+1 and hwcur. * Schedule a notification approximately in the middle of the two. * There is a race but this is only called within txsync which does * a double check. @@ -445,28 +440,28 @@ generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur) static void generic_set_tx_event(struct netmap_kring *kring, u_int hwcur) { - struct mbuf *m; - u_int e; - - if (kring->nr_ntc == hwcur) { - return; - } - e = generic_tx_event_middle(kring, hwcur); - - m = kring->tx_pool[e]; - if (m == NULL) { - /* This can happen if there is already an event on the netmap - slot 'e': There is nothing to do. */ - return; - } - ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m)); - kring->tx_pool[e] = NULL; - SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); - - // XXX wmb() ? - /* Decrement the refcount an free it if we have the last one. */ - m_freem(m); - smp_mb(); + struct mbuf *m; + u_int e; + + if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) { + return; /* all buffers are free */ + } + e = generic_tx_event_middle(kring, hwcur); + + m = kring->tx_pool[e]; + if (m == NULL) { + /* This can happen if there is already an event on the netmap + slot 'e': There is nothing to do. */ + return; + } + ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m)); + kring->tx_pool[e] = NULL; + SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); + + // XXX wmb() ? + /* Decrement the refcount an free it if we have the last one. */ + m_freem(m); + smp_mb(); } @@ -480,133 +475,108 @@ generic_set_tx_event(struct netmap_kring *kring, u_int hwcur) static int generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) { - struct ifnet *ifp = na->ifp; - struct netmap_kring *kring = &na->tx_rings[ring_nr]; - struct netmap_ring *ring = kring->ring; - u_int j, k, num_slots = kring->nkr_num_slots; - int new_slots, ntx; - - IFRATE(rate_ctx.new.txsync++); - - // TODO: handle the case of mbuf allocation failure - /* first, reclaim completed buffers */ - generic_netmap_tx_clean(kring); - - /* Take a copy of ring->cur now, and never read it again. */ - k = ring->cur; - if (unlikely(k >= num_slots)) { - return netmap_ring_reinit(kring); - } - - rmb(); - j = kring->nr_hwcur; - /* - * 'new_slots' counts how many new slots have been added: - * everything from hwcur to cur, excluding reserved ones, if any. - * nr_hwreserved start from hwcur and counts how many slots were - * not sent to the NIC from the previous round. - */ - new_slots = k - j - kring->nr_hwreserved; - if (new_slots < 0) { - new_slots += num_slots; - } - ntx = 0; - if (j != k) { - /* Process new packets to send: - * j is the current index in the netmap ring. + struct ifnet *ifp = na->ifp; + struct netmap_kring *kring = &na->tx_rings[ring_nr]; + struct netmap_ring *ring = kring->ring; + u_int nm_i; /* index into the netmap ring */ // j + u_int const lim = kring->nkr_num_slots - 1; + u_int const head = kring->rhead; + + IFRATE(rate_ctx.new.txsync++); + + // TODO: handle the case of mbuf allocation failure + + rmb(); + + /* + * First part: process new packets to send. */ - while (j != k) { - struct netmap_slot *slot = &ring->slot[j]; /* Current slot in the netmap ring */ - void *addr = NMB(slot); - u_int len = slot->len; - struct mbuf *m; - int tx_ret; - - if (unlikely(addr == netmap_buffer_base || len > NETMAP_BUF_SIZE)) { - return netmap_ring_reinit(kring); - } - /* Tale a mbuf from the tx pool and copy in the user packet. */ - m = kring->tx_pool[j]; - if (unlikely(!m)) { - RD(5, "This should never happen"); - kring->tx_pool[j] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); - if (unlikely(m == NULL)) { - D("mbuf allocation failed"); - break; - } - } - /* XXX we should ask notifications when NS_REPORT is set, - * or roughly every half frame. We can optimize this - * by lazily requesting notifications only when a - * transmission fails. Probably the best way is to - * break on failures and set notifications when - * ring->avail == 0 || j != k - */ - tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr); - if (unlikely(tx_ret)) { - RD(5, "start_xmit failed: err %d [%u,%u,%u,%u]", - tx_ret, kring->nr_ntc, j, k, kring->nr_hwavail); - /* - * No room for this mbuf in the device driver. - * Request a notification FOR A PREVIOUS MBUF, - * then call generic_netmap_tx_clean(kring) to do the - * double check and see if we can free more buffers. - * If there is space continue, else break; - * NOTE: the double check is necessary if the problem - * occurs in the txsync call after selrecord(). - * Also, we need some way to tell the caller that not - * all buffers were queued onto the device (this was - * not a problem with native netmap driver where space - * is preallocated). The bridge has a similar problem - * and we solve it there by dropping the excess packets. - */ - generic_set_tx_event(kring, j); - if (generic_netmap_tx_clean(kring)) { /* space now available */ - continue; - } else { - break; - } - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - if (unlikely(++j == num_slots)) - j = 0; - ntx++; - } - - /* Update hwcur to the next slot to transmit. */ - kring->nr_hwcur = j; - - /* - * Report all new slots as unavailable, even those not sent. - * We account for them with with hwreserved, so that - * nr_hwreserved =:= cur - nr_hwcur + nm_i = kring->nr_hwcur; + if (nm_i != head) { /* we have new packets to send */ + while (nm_i != head) { + struct netmap_slot *slot = &ring->slot[nm_i]; + u_int len = slot->len; + void *addr = NMB(slot); + + /* device-specific */ + struct mbuf *m; + int tx_ret; + + NM_CHECK_ADDR_LEN(addr, len); + + /* Tale a mbuf from the tx pool and copy in the user packet. */ + m = kring->tx_pool[nm_i]; + if (unlikely(!m)) { + RD(5, "This should never happen"); + kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); + if (unlikely(m == NULL)) { + D("mbuf allocation failed"); + break; + } + } + /* XXX we should ask notifications when NS_REPORT is set, + * or roughly every half frame. We can optimize this + * by lazily requesting notifications only when a + * transmission fails. Probably the best way is to + * break on failures and set notifications when + * ring->cur == ring->tail || nm_i != cur + */ + tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr); + if (unlikely(tx_ret)) { + RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]", + tx_ret, nm_i, head, kring->nr_hwtail); + /* + * No room for this mbuf in the device driver. + * Request a notification FOR A PREVIOUS MBUF, + * then call generic_netmap_tx_clean(kring) to do the + * double check and see if we can free more buffers. + * If there is space continue, else break; + * NOTE: the double check is necessary if the problem + * occurs in the txsync call after selrecord(). + * Also, we need some way to tell the caller that not + * all buffers were queued onto the device (this was + * not a problem with native netmap driver where space + * is preallocated). The bridge has a similar problem + * and we solve it there by dropping the excess packets. + */ + generic_set_tx_event(kring, nm_i); + if (generic_netmap_tx_clean(kring)) { /* space now available */ + continue; + } else { + break; + } + } + slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); + nm_i = nm_next(nm_i, lim); + } + + /* Update hwcur to the next slot to transmit. */ + kring->nr_hwcur = nm_i; /* not head, we could break early */ + + IFRATE(rate_ctx.new.txpkt += ntx); + } + + /* + * Second, reclaim completed buffers */ - kring->nr_hwavail -= new_slots; - kring->nr_hwreserved = k - j; - if (kring->nr_hwreserved < 0) { - kring->nr_hwreserved += num_slots; - } - - IFRATE(rate_ctx.new.txpkt += ntx); - - if (!kring->nr_hwavail) { - /* No more available slots? Set a notification event - * on a netmap slot that will be cleaned in the future. - * No doublecheck is performed, since txsync() will be - * called twice by netmap_poll(). - */ - generic_set_tx_event(kring, j); - } - ND("tx #%d, hwavail = %d", n, kring->nr_hwavail); - } - - /* Synchronize the user's view to the kernel view. */ - ring->avail = kring->nr_hwavail; - ring->reserved = kring->nr_hwreserved; - - return 0; + if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { + /* No more available slots? Set a notification event + * on a netmap slot that will be cleaned in the future. + * No doublecheck is performed, since txsync() will be + * called twice by netmap_poll(). + */ + generic_set_tx_event(kring, nm_i); + } + ND("tx #%d, hwtail = %d", n, kring->nr_hwtail); + + generic_netmap_tx_clean(kring); + + nm_txsync_finalize(kring); + + return 0; } + /* * This handler is registered (through netmap_catch_rx()) * within the attached network interface @@ -615,38 +585,38 @@ generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) * Stolen packets are put in a queue where the * generic_netmap_rxsync() callback can extract them. */ -void generic_rx_handler(struct ifnet *ifp, struct mbuf *m) +void +generic_rx_handler(struct ifnet *ifp, struct mbuf *m) { - struct netmap_adapter *na = NA(ifp); - struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; - u_int work_done; - u_int rr = 0; // receive ring number - - ND("called"); - /* limit the size of the queue */ - if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) { - m_freem(m); - } else { - mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m); - } - - if (netmap_generic_mit < 32768) { - /* no rx mitigation, pass notification up */ - netmap_generic_irq(na->ifp, rr, &work_done); - IFRATE(rate_ctx.new.rxirq++); - } else { - /* same as send combining, filter notification if there is a - * pending timer, otherwise pass it up and start a timer. - */ - if (likely(netmap_mitigation_active(gna))) { - /* Record that there is some pending work. */ - gna->mit_pending = 1; - } else { - netmap_generic_irq(na->ifp, rr, &work_done); - IFRATE(rate_ctx.new.rxirq++); - netmap_mitigation_start(gna); - } - } + struct netmap_adapter *na = NA(ifp); + struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; + u_int work_done; + u_int rr = 0; // receive ring number + + /* limit the size of the queue */ + if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) { + m_freem(m); + } else { + mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m); + } + + if (netmap_generic_mit < 32768) { + /* no rx mitigation, pass notification up */ + netmap_generic_irq(na->ifp, rr, &work_done); + IFRATE(rate_ctx.new.rxirq++); + } else { + /* same as send combining, filter notification if there is a + * pending timer, otherwise pass it up and start a timer. + */ + if (likely(netmap_mitigation_active(gna))) { + /* Record that there is some pending work. */ + gna->mit_pending = 1; + } else { + netmap_generic_irq(na->ifp, rr, &work_done); + IFRATE(rate_ctx.new.rxirq++); + netmap_mitigation_start(gna); + } + } } /* @@ -658,105 +628,99 @@ void generic_rx_handler(struct ifnet *ifp, struct mbuf *m) static int generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) { - struct netmap_kring *kring = &na->rx_rings[ring_nr]; - struct netmap_ring *ring = kring->ring; - u_int j, n, lim = kring->nkr_num_slots - 1; - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - u_int k, resvd = ring->reserved; - - if (ring->cur > lim) - return netmap_ring_reinit(kring); - - /* Import newly received packets into the netmap ring. */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - struct mbuf *m; - - n = 0; - j = kring->nr_ntc; /* first empty slot in the receive ring */ - /* extract buffers from the rx queue, stop at most one - * slot before nr_hwcur (index k) + struct netmap_kring *kring = &na->rx_rings[ring_nr]; + struct netmap_ring *ring = kring->ring; + u_int nm_i; /* index into the netmap ring */ //j, + u_int n; + u_int const lim = kring->nkr_num_slots - 1; + u_int const head = nm_rxsync_prologue(kring); + int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; + + if (head > lim) + return netmap_ring_reinit(kring); + + /* + * First part: import newly received packets. + */ + if (netmap_no_pendintr || force_update) { + /* extract buffers from the rx queue, stop at most one + * slot before nr_hwcur (stop_i) + */ + uint16_t slot_flags = kring->nkr_slot_flags; + u_int stop_i = nm_prev(kring->nr_hwcur, lim); + + nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */ + for (n = 0; nm_i != stop_i; n++) { + int len; + void *addr = NMB(&ring->slot[nm_i]); + struct mbuf *m; + + /* we only check the address here on generic rx rings */ + if (addr == netmap_buffer_base) { /* Bad buffer */ + return netmap_ring_reinit(kring); + } + /* + * Call the locked version of the function. + * XXX Ideally we could grab a batch of mbufs at once + * and save some locking overhead. + */ + m = mbq_safe_dequeue(&kring->rx_queue); + if (!m) /* no more data */ + break; + len = MBUF_LEN(m); + m_copydata(m, 0, len, addr); + ring->slot[nm_i].len = len; + ring->slot[nm_i].flags = slot_flags; + m_freem(m); + nm_i = nm_next(nm_i, lim); + n++; + } + if (n) { + kring->nr_hwtail = nm_i; + IFRATE(rate_ctx.new.rxpkt += n); + } + kring->nr_kflags &= ~NKR_PENDINTR; + } + + // XXX should we invert the order ? + /* + * Second part: skip past packets that userspace has released. */ - k = (kring->nr_hwcur) ? kring->nr_hwcur-1 : lim; - while (j != k) { - int len; - void *addr = NMB(&ring->slot[j]); - - if (addr == netmap_buffer_base) { /* Bad buffer */ - return netmap_ring_reinit(kring); - } - /* - * Call the locked version of the function. - * XXX Ideally we could grab a batch of mbufs at once, - * by changing rx_queue into a ring. - */ - m = mbq_safe_dequeue(&kring->rx_queue); - if (!m) - break; - len = MBUF_LEN(m); - m_copydata(m, 0, len, addr); - ring->slot[j].len = len; - ring->slot[j].flags = slot_flags; - m_freem(m); - if (unlikely(j++ == lim)) - j = 0; - n++; - } - if (n) { - kring->nr_ntc = j; - kring->nr_hwavail += n; - IFRATE(rate_ctx.new.rxpkt += n); - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - // XXX should we invert the order ? - /* Skip past packets that userspace has released */ - j = kring->nr_hwcur; - k = ring->cur; - if (resvd > 0) { - if (resvd + ring->avail >= lim + 1) { - D("XXX invalid reserve/avail %d %d", resvd, ring->avail); - ring->reserved = resvd = 0; // XXX panic... - } - k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; - } - if (j != k) { - /* Userspace has released some packets. */ - for (n = 0; j != k; n++) { - struct netmap_slot *slot = &ring->slot[j]; - - slot->flags &= ~NS_BUF_CHANGED; - if (unlikely(j++ == lim)) - j = 0; - } - kring->nr_hwavail -= n; - kring->nr_hwcur = k; - } - /* Tell userspace that there are new packets. */ - ring->avail = kring->nr_hwavail - resvd; - IFRATE(rate_ctx.new.rxsync++); - - return 0; + nm_i = kring->nr_hwcur; + if (nm_i != head) { + /* Userspace has released some packets. */ + for (n = 0; nm_i != head; n++) { + struct netmap_slot *slot = &ring->slot[nm_i]; + + slot->flags &= ~NS_BUF_CHANGED; + nm_i = nm_next(nm_i, lim); + } + kring->nr_hwcur = head; + } + /* tell userspace that there might be new packets. */ + nm_rxsync_finalize(kring); + IFRATE(rate_ctx.new.rxsync++); + + return 0; } static void generic_netmap_dtor(struct netmap_adapter *na) { - struct ifnet *ifp = na->ifp; - struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; - struct netmap_adapter *prev_na = gna->prev; - - if (prev_na != NULL) { - D("Released generic NA %p", gna); - if_rele(na->ifp); - netmap_adapter_put(prev_na); - } - if (ifp != NULL) { - WNA(ifp) = prev_na; - D("Restored native NA %p", prev_na); - na->ifp = NULL; - } + struct ifnet *ifp = na->ifp; + struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; + struct netmap_adapter *prev_na = gna->prev; + + if (prev_na != NULL) { + D("Released generic NA %p", gna); + if_rele(na->ifp); + netmap_adapter_put(prev_na); + } + if (ifp != NULL) { + WNA(ifp) = prev_na; + D("Restored native NA %p", prev_na); + na->ifp = NULL; + } } /* @@ -773,46 +737,46 @@ generic_netmap_dtor(struct netmap_adapter *na) int generic_netmap_attach(struct ifnet *ifp) { - struct netmap_adapter *na; - struct netmap_generic_adapter *gna; - int retval; - u_int num_tx_desc, num_rx_desc; - - num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ - - generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); - ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); - - gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO); - if (gna == NULL) { - D("no memory on attach, give up"); - return ENOMEM; - } - na = (struct netmap_adapter *)gna; - na->ifp = ifp; - na->num_tx_desc = num_tx_desc; - na->num_rx_desc = num_rx_desc; - na->nm_register = &generic_netmap_register; - na->nm_txsync = &generic_netmap_txsync; - na->nm_rxsync = &generic_netmap_rxsync; - na->nm_dtor = &generic_netmap_dtor; - /* when using generic, IFCAP_NETMAP is set so we force - * NAF_SKIP_INTR to use the regular interrupt handler - */ - na->na_flags = NAF_SKIP_INTR; - - ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", - ifp->num_tx_queues, ifp->real_num_tx_queues, - ifp->tx_queue_len); - ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", - ifp->num_rx_queues, ifp->real_num_rx_queues); - - generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); - - retval = netmap_attach_common(na); - if (retval) { - free(gna, M_DEVBUF); - } - - return retval; + struct netmap_adapter *na; + struct netmap_generic_adapter *gna; + int retval; + u_int num_tx_desc, num_rx_desc; + + num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ + + generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); + ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); + + gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO); + if (gna == NULL) { + D("no memory on attach, give up"); + return ENOMEM; + } + na = (struct netmap_adapter *)gna; + na->ifp = ifp; + na->num_tx_desc = num_tx_desc; + na->num_rx_desc = num_rx_desc; + na->nm_register = &generic_netmap_register; + na->nm_txsync = &generic_netmap_txsync; + na->nm_rxsync = &generic_netmap_rxsync; + na->nm_dtor = &generic_netmap_dtor; + /* when using generic, IFCAP_NETMAP is set so we force + * NAF_SKIP_INTR to use the regular interrupt handler + */ + na->na_flags = NAF_SKIP_INTR; + + ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", + ifp->num_tx_queues, ifp->real_num_tx_queues, + ifp->tx_queue_len); + ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", + ifp->num_rx_queues, ifp->real_num_rx_queues); + + generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); + + retval = netmap_attach_common(na); + if (retval) { + free(gna, M_DEVBUF); + } + + return retval; } diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 9381cd4cedd3..74a46297ff3d 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -1,6 +1,6 @@ /* - * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. - * Copyright (C) 2013 Universita` di Pisa. All rights reserved. + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -53,7 +53,7 @@ #define NM_SELINFO_T struct selinfo #define MBUF_LEN(m) ((m)->m_pkthdr.len) #define MBUF_IFP(m) ((m)->m_pkthdr.rcvif) -#define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m) +#define NM_SEND_UP(ifp, m) ((NA(ifp))->if_input)(ifp, m) #define NM_ATOMIC_T volatile int // XXX ? /* atomic operations */ @@ -76,7 +76,11 @@ struct hrtimer { #define NM_SELINFO_T wait_queue_head_t #define MBUF_LEN(m) ((m)->len) #define MBUF_IFP(m) ((m)->dev) -#define NM_SEND_UP(ifp, m) netif_rx(m) +#define NM_SEND_UP(ifp, m) \ + do { \ + m->priority = NM_MAGIC_PRIORITY; \ + netif_rx(m); \ + } while (0) #define NM_ATOMIC_T volatile long unsigned int @@ -125,9 +129,9 @@ struct hrtimer { do { \ struct timeval __xxts; \ microtime(&__xxts); \ - printf("%03d.%06d %s [%d] " format "\n", \ + printf("%03d.%06d [%4d] %-25s " format "\n", \ (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \ - __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + __LINE__, __FUNCTION__, ##__VA_ARGS__); \ } while (0) /* rate limited, lps indicates how many per second */ @@ -158,15 +162,23 @@ extern NMG_LOCK_T netmap_global_lock; * a ring across system calls. * * nr_hwcur index of the next buffer to refill. - * It corresponds to ring->cur - ring->reserved + * It corresponds to ring->head + * at the time the system call returns. * - * nr_hwavail the number of slots "owned" by userspace. - * nr_hwavail =:= ring->avail + ring->reserved + * nr_hwtail index of the first buffer owned by the kernel. + * On RX, hwcur->hwtail are receive buffers + * not yet released. hwcur is advanced following + * ring->head, hwtail is advanced on incoming packets, + * and a wakeup is generated when hwtail passes ring->cur + * On TX, hwcur->rcur have been filled by the sender + * but not sent yet to the NIC; rcur->hwtail are available + * for new transmissions, and hwtail->hwcur-1 are pending + * transmissions not yet acknowledged. * * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots. * This is so that, on a reset, buffers owned by userspace are not * modified by the kernel. In particular: - * RX rings: the next empty buffer (hwcur + hwavail + hwofs) coincides with + * RX rings: the next empty buffer (hwtail + hwofs) coincides with * the next empty buffer as known by the hardware (next_to_check or so). * TX rings: hwcur + hwofs coincides with next_to_send * @@ -184,44 +196,76 @@ extern NMG_LOCK_T netmap_global_lock; * from nr_hwlease, advances it, then does the * copy outside the lock. * In RX rings (used for VALE ports), - * nkr_hwcur + nkr_hwavail <= nkr_hwlease < nkr_hwcur+N-1 + * nkr_hwtail <= nkr_hwlease < nkr_hwcur+N-1 * In TX rings (used for NIC or host stack ports) - * nkr_hwcur <= nkr_hwlease < nkr_hwcur+ nkr_hwavail + * nkr_hwcur <= nkr_hwlease < nkr_hwtail * nkr_leases array of nkr_num_slots where writers can report * completion of their block. NR_NOSLOT (~0) indicates * that the writer has not finished yet * nkr_lease_idx index of next free slot in nr_leases, to be assigned * * The kring is manipulated by txsync/rxsync and generic netmap function. - * q_lock is used to arbitrate access to the kring from within the netmap - * code, and this and other protections guarantee that there is never - * more than 1 concurrent call to txsync or rxsync. So we are free - * to manipulate the kring from within txsync/rxsync without any extra - * locks. + * + * Concurrent rxsync or txsync on the same ring are prevented through + * by nm_kr_lock() which in turn uses nr_busy. This is all we need + * for NIC rings, and for TX rings attached to the host stack. + * + * RX rings attached to the host stack use an mbq (rx_queue) on both + * rxsync_from_host() and netmap_transmit(). The mbq is protected + * by its internal lock. + * + * RX rings attached to the VALE switch are accessed by both sender + * and receiver. They are protected through the q_lock on the RX ring. */ struct netmap_kring { - struct netmap_ring *ring; - uint32_t nr_hwcur; - uint32_t nr_hwavail; - uint32_t nr_kflags; /* private driver flags */ - int32_t nr_hwreserved; -#define NKR_PENDINTR 0x1 // Pending interrupt. - uint32_t nkr_num_slots; - int32_t nkr_hwofs; /* offset between NIC and netmap ring */ + struct netmap_ring *ring; + + uint32_t nr_hwcur; + uint32_t nr_hwtail; + + /* + * Copies of values in user rings, so we do not need to look + * at the ring (which could be modified). These are set in the + * *sync_prologue()/finalize() routines. + */ + uint32_t rhead; + uint32_t rcur; + uint32_t rtail; + + uint32_t nr_kflags; /* private driver flags */ +#define NKR_PENDINTR 0x1 // Pending interrupt. + uint32_t nkr_num_slots; + + /* + * On a NIC reset, the NIC ring indexes may be reset but the + * indexes in the netmap rings remain the same. nkr_hwofs + * keeps track of the offset between the two. + */ + int32_t nkr_hwofs; uint16_t nkr_slot_flags; /* initial value for flags */ + + /* last_reclaim is opaque marker to help reduce the frequency + * of operations such as reclaiming tx buffers. A possible use + * is set it to ticks and do the reclaim only once per tick. + */ + uint64_t last_reclaim; + + + NM_SELINFO_T si; /* poll/select wait queue */ + NM_LOCK_T q_lock; /* protects kring and ring. */ + NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */ + struct netmap_adapter *na; - struct nm_bdg_fwd *nkr_ft; - uint32_t *nkr_leases; -#define NR_NOSLOT ((uint32_t)~0) - uint32_t nkr_hwlease; - uint32_t nkr_lease_idx; - NM_SELINFO_T si; /* poll/select wait queue */ - NM_LOCK_T q_lock; /* protects kring and ring. */ - NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */ + /* The folloiwing fields are for VALE switch support */ + struct nm_bdg_fwd *nkr_ft; + uint32_t *nkr_leases; +#define NR_NOSLOT ((uint32_t)~0) /* used in nkr_*lease* */ + uint32_t nkr_hwlease; + uint32_t nkr_lease_idx; - volatile int nkr_stopped; + volatile int nkr_stopped; // XXX what for ? /* support for adapters without native netmap support. * On tx rings we preallocate an array of tx buffers @@ -230,8 +274,11 @@ struct netmap_kring { * XXX who writes to the rx queue ? */ struct mbuf **tx_pool; - u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */ - struct mbq rx_queue; /* A queue for intercepted rx mbufs. */ + // u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */ + struct mbq rx_queue; /* intercepted rx mbufs. */ + + uint32_t ring_id; /* debugging */ + char name[64]; /* diagnostic */ } __attribute__((__aligned__(64))); @@ -243,6 +290,15 @@ nm_next(uint32_t i, uint32_t lim) return unlikely (i == lim) ? 0 : i + 1; } + +/* return the previous index, with wraparound */ +static inline uint32_t +nm_prev(uint32_t i, uint32_t lim) +{ + return unlikely (i == 0) ? lim : i - 1; +} + + /* * * Here is the layout for the Rx and Tx rings. @@ -253,36 +309,36 @@ nm_next(uint32_t i, uint32_t lim) | | | | |XXX free slot XXX| |XXX free slot XXX| +-----------------+ +-----------------+ - | |<-hwcur | |<-hwcur - | reserved h | | (ready | - +----------- w -+ | to be | - cur->| a | | sent) h | - | v | +---------- w | - | a | cur->| (being a | - | i | | prepared) v | - | avail l | | a | - +-----------------+ + a ------ i + - | | ... | v l |<-hwlease - | (being | ... | a | ... - | prepared) | ... | i | ... - +-----------------+ ... | l | ... - | |<-hwlease +-----------------+ +head->| owned by user |<-hwcur | not sent to nic |<-hwcur + | | | yet | + +-----------------+ | | + cur->| available to | | | + | user, not read | +-----------------+ + | yet | cur->| (being | + | | | prepared) | | | | | + +-----------------+ + ------ + +tail->| |<-hwtail | |<-hwlease + | (being | ... | | ... + | prepared) | ... | | ... + +-----------------+ ... | | ... + | |<-hwlease +-----------------+ + | | tail->| |<-hwtail | | | | | | | | | | | | +-----------------+ +-----------------+ - * The cur/avail (user view) and hwcur/hwavail (kernel view) + * The cur/tail (user view) and hwcur/hwtail (kernel view) * are used in the normal operation of the card. * * When a ring is the output of a switch port (Rx ring for * a VALE port, Tx ring for the host stack or NIC), slots * are reserved in blocks through 'hwlease' which points * to the next unused slot. - * On an Rx ring, hwlease is always after hwavail, - * and completions cause avail to advance. - * On a Tx ring, hwlease is always between cur and hwavail, + * On an Rx ring, hwlease is always after hwtail, + * and completions cause hwtail to advance. + * On a Tx ring, hwlease is always between cur and hwtail, * and completions cause cur to advance. * * nm_kr_space() returns the maximum number of slots that @@ -294,7 +350,6 @@ nm_next(uint32_t i, uint32_t lim) - enum txrx { NR_RX = 0, NR_TX = 1 }; /* @@ -349,6 +404,7 @@ struct netmap_adapter { */ struct netmap_kring *tx_rings; /* array of TX rings. */ struct netmap_kring *rx_rings; /* array of RX rings. */ + void *tailroom; /* space below the rings array */ /* (used for leases) */ @@ -360,11 +416,38 @@ struct netmap_adapter { */ int (*if_transmit)(struct ifnet *, struct mbuf *); + /* copy of if_input for netmap_send_up() */ + void (*if_input)(struct ifnet *, struct mbuf *); + /* references to the ifnet and device routines, used by * the generic netmap functions. */ struct ifnet *ifp; /* adapter is ifp->if_softc */ + /*---- callbacks for this netmap adapter -----*/ + /* + * nm_dtor() is the cleanup routine called when destroying + * the adapter. + * + * nm_register() is called on NIOCREGIF and close() to enter + * or exit netmap mode on the NIC + * + * nm_txsync() pushes packets to the underlying hw/switch + * + * nm_rxsync() collects packets from the underlying hw/switch + * + * nm_config() returns configuration information from the OS + * + * nm_krings_create() XXX + * + * nm_krings_delete() XXX + * + * nm_notify() is used to act after data have become available. + * For hw devices this is typically a selwakeup(), + * but for NIC/host ports attached to a switch (or vice-versa) + * we also need to invoke the 'txsync' code downstream. + */ + /* private cleanup */ void (*nm_dtor)(struct netmap_adapter *); @@ -403,6 +486,7 @@ struct netmap_adapter { void *na_private; }; + /* * If the NIC is owned by the kernel * (i.e., bridge), neither another bridge nor user can use it; @@ -433,13 +517,15 @@ struct netmap_vp_adapter { /* VALE software port */ u_int offset; /* Offset of ethernet header for each packet. */ }; + struct netmap_hw_adapter { /* physical device */ struct netmap_adapter up; struct net_device_ops nm_ndo; // XXX linux only }; -struct netmap_generic_adapter { /* non-native device */ + +struct netmap_generic_adapter { /* emulated device */ struct netmap_hw_adapter up; /* Pointer to a previously used netmap adapter. */ @@ -455,16 +541,20 @@ struct netmap_generic_adapter { /* non-native device */ struct hrtimer mit_timer; int mit_pending; +#ifdef linux + netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *); +#endif }; #ifdef WITH_VALE -/* bridge wrapper for non VALE ports. It is used to connect real devices to the bridge. +/* + * Bridge wrapper for non VALE ports attached to a VALE switch. * - * The real device must already have its own netmap adapter (hwna). The - * bridge wrapper and the hwna adapter share the same set of netmap rings and - * buffers, but they have two separate sets of krings descriptors, with tx/rx - * meanings swapped: + * The real device must already have its own netmap adapter (hwna). + * The bridge wrapper and the hwna adapter share the same set of + * netmap rings and buffers, but they have two separate sets of + * krings descriptors, with tx/rx meanings swapped: * * netmap * bwrap krings rings krings hwna @@ -478,23 +568,28 @@ struct netmap_generic_adapter { /* non-native device */ * | | +------+ +-----+ +------+ | | * +------+ +------+ * - * - packets coming from the bridge go to the brwap rx rings, which are also the - * hwna tx rings. The bwrap notify callback will then complete the hwna tx - * (see netmap_bwrap_notify). - * - packets coming from the outside go to the hwna rx rings, which are also the - * bwrap tx rings. The (overwritten) hwna notify method will then complete - * the bridge tx (see netmap_bwrap_intr_notify). + * - packets coming from the bridge go to the brwap rx rings, + * which are also the hwna tx rings. The bwrap notify callback + * will then complete the hwna tx (see netmap_bwrap_notify). * - * The bridge wrapper may optionally connect the hwna 'host' rings to the - * bridge. This is done by using a second port in the bridge and connecting it - * to the 'host' netmap_vp_adapter contained in the netmap_bwrap_adapter. - * The brwap host adapter cross-links the hwna host rings in the same way as shown above. + * - packets coming from the outside go to the hwna rx rings, + * which are also the bwrap tx rings. The (overwritten) hwna + * notify method will then complete the bridge tx + * (see netmap_bwrap_intr_notify). * - * - packets coming from the bridge and directed to host stack are handled by the - * bwrap host notify callback (see netmap_bwrap_host_notify) - * - packets coming from the host stack are still handled by the overwritten - * hwna notify callback (netmap_bwrap_intr_notify), but are diverted to the - * host adapter depending on the ring number. + * The bridge wrapper may optionally connect the hwna 'host' rings + * to the bridge. This is done by using a second port in the + * bridge and connecting it to the 'host' netmap_vp_adapter + * contained in the netmap_bwrap_adapter. The brwap host adapter + * cross-links the hwna host rings in the same way as shown above. + * + * - packets coming from the bridge and directed to the host stack + * are handled by the bwrap host notify callback + * (see netmap_bwrap_host_notify) + * + * - packets coming from the host stack are still handled by the + * overwritten hwna notify callback (netmap_bwrap_intr_notify), + * but are diverted to the host adapter depending on the ring number. * */ struct netmap_bwrap_adapter { @@ -505,103 +600,39 @@ struct netmap_bwrap_adapter { /* backup of the hwna notify callback */ int (*save_notify)(struct netmap_adapter *, u_int ring, enum txrx, int flags); - /* When we attach a physical interface to the bridge, we + + /* + * When we attach a physical interface to the bridge, we * allow the controlling process to terminate, so we need * a place to store the netmap_priv_d data structure. - * This is only done when physical interfaces are attached to a bridge. + * This is only done when physical interfaces + * are attached to a bridge. */ struct netmap_priv_d *na_kpriv; }; -/* - * Available space in the ring. Only used in VALE code - */ -static inline uint32_t -nm_kr_space(struct netmap_kring *k, int is_rx) -{ - int space; - - if (is_rx) { - int busy = k->nkr_hwlease - k->nr_hwcur + k->nr_hwreserved; - if (busy < 0) - busy += k->nkr_num_slots; - space = k->nkr_num_slots - 1 - busy; - } else { - space = k->nr_hwcur + k->nr_hwavail - k->nkr_hwlease; - if (space < 0) - space += k->nkr_num_slots; - } -#if 0 - // sanity check - if (k->nkr_hwlease >= k->nkr_num_slots || - k->nr_hwcur >= k->nkr_num_slots || - k->nr_hwavail >= k->nkr_num_slots || - busy < 0 || - busy >= k->nkr_num_slots) { - D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease, - k->nkr_lease_idx, k->nkr_num_slots); - } -#endif - return space; -} - - +#endif /* WITH_VALE */ -/* make a lease on the kring for N positions. return the - * lease index - */ +/* return slots reserved to rx clients; used in drivers */ static inline uint32_t -nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) +nm_kr_rxspace(struct netmap_kring *k) { - uint32_t lim = k->nkr_num_slots - 1; - uint32_t lease_idx = k->nkr_lease_idx; - - k->nkr_leases[lease_idx] = NR_NOSLOT; - k->nkr_lease_idx = nm_next(lease_idx, lim); + int space = k->nr_hwtail - k->nr_hwcur; + if (space < 0) + space += k->nkr_num_slots; + ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail); - if (n > nm_kr_space(k, is_rx)) { - D("invalid request for %d slots", n); - panic("x"); - } - /* XXX verify that there are n slots */ - k->nkr_hwlease += n; - if (k->nkr_hwlease > lim) - k->nkr_hwlease -= lim + 1; - - if (k->nkr_hwlease >= k->nkr_num_slots || - k->nr_hwcur >= k->nkr_num_slots || - k->nr_hwavail >= k->nkr_num_slots || - k->nkr_lease_idx >= k->nkr_num_slots) { - D("invalid kring %s, cur %d avail %d lease %d lease_idx %d lim %d", - k->na->ifp->if_xname, - k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease, - k->nkr_lease_idx, k->nkr_num_slots); - } - return lease_idx; + return space; } -#endif /* WITH_VALE */ -/* return update position */ -static inline uint32_t -nm_kr_rxpos(struct netmap_kring *k) +/* True if no space in the tx ring. only valid after txsync_prologue */ +static inline int +nm_kr_txempty(struct netmap_kring *kring) { - uint32_t pos = k->nr_hwcur + k->nr_hwavail; - if (pos >= k->nkr_num_slots) - pos -= k->nkr_num_slots; -#if 0 - if (pos >= k->nkr_num_slots || - k->nkr_hwlease >= k->nkr_num_slots || - k->nr_hwcur >= k->nkr_num_slots || - k->nr_hwavail >= k->nkr_num_slots || - k->nkr_lease_idx >= k->nkr_num_slots) { - D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease, - k->nkr_lease_idx, k->nkr_num_slots); - } -#endif - return pos; + return kring->rcur == kring->nr_hwtail; } @@ -613,11 +644,13 @@ nm_kr_rxpos(struct netmap_kring *k) #define NM_KR_BUSY 1 #define NM_KR_STOPPED 2 + static __inline void nm_kr_put(struct netmap_kring *kr) { NM_ATOMIC_CLEAR(&kr->nr_busy); } + static __inline int nm_kr_tryget(struct netmap_kring *kr) { /* check a first time without taking the lock @@ -640,7 +673,7 @@ static __inline int nm_kr_tryget(struct netmap_kring *kr) /* - * The following are support routines used by individual drivers to + * The following functions are used by individual drivers to * support netmap operation. * * netmap_attach() initializes a struct netmap_adapter, allocating the @@ -666,7 +699,17 @@ struct netmap_slot *netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, u_int new_cur); int netmap_ring_reinit(struct netmap_kring *); -/* set/clear native flags. XXX maybe also if_transmit ? */ +/* default functions to handle rx/tx interrupts */ +int netmap_rx_irq(struct ifnet *, u_int, u_int *); +#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL) +void netmap_common_irq(struct ifnet *, u_int, u_int *work_done); + +void netmap_disable_all_rings(struct ifnet *); +void netmap_enable_all_rings(struct ifnet *); +void netmap_disable_ring(struct netmap_kring *kr); + + +/* set/clear native flags and if_transmit/netdev_ops */ static inline void nm_set_native_flags(struct netmap_adapter *na) { @@ -685,6 +728,7 @@ nm_set_native_flags(struct netmap_adapter *na) #endif } + static inline void nm_clear_native_flags(struct netmap_adapter *na) { @@ -701,36 +745,58 @@ nm_clear_native_flags(struct netmap_adapter *na) #endif } + /* - * validates parameters in the ring/kring, returns a value for cur, - * and the 'new_slots' value in the argument. - * If any error, returns cur > lim to force a reinit. + * validates parameters in the ring/kring, returns a value for head + * If any error, returns ring_size to force a reinit. */ -u_int nm_txsync_prologue(struct netmap_kring *, u_int *); +uint32_t nm_txsync_prologue(struct netmap_kring *); + /* - * validates parameters in the ring/kring, returns a value for cur, + * validates parameters in the ring/kring, returns a value for head, * and the 'reserved' value in the argument. - * If any error, returns cur > lim to force a reinit. + * If any error, returns ring_size lim to force a reinit. + */ +uint32_t nm_rxsync_prologue(struct netmap_kring *); + + +/* + * update kring and ring at the end of txsync. */ -u_int nm_rxsync_prologue(struct netmap_kring *, u_int *); +static inline void +nm_txsync_finalize(struct netmap_kring *kring) +{ + /* update ring head/tail to what the kernel knows */ + kring->ring->tail = kring->rtail = kring->nr_hwtail; + kring->ring->head = kring->rhead = kring->nr_hwcur; + + /* note, head/rhead/hwcur might be behind cur/rcur + * if no carrier + */ + ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d", + kring->name, kring->nr_hwcur, kring->nr_hwtail, + kring->rhead, kring->rcur, kring->rtail); +} + /* - * update kring and ring at the end of txsync + * update kring and ring at the end of rxsync */ static inline void -nm_txsync_finalize(struct netmap_kring *kring, u_int cur) +nm_rxsync_finalize(struct netmap_kring *kring) { - /* recompute hwreserved */ - kring->nr_hwreserved = cur - kring->nr_hwcur; - if (kring->nr_hwreserved < 0) - kring->nr_hwreserved += kring->nkr_num_slots; - - /* update avail and reserved to what the kernel knows */ - kring->ring->avail = kring->nr_hwavail; - kring->ring->reserved = kring->nr_hwreserved; + /* tell userspace that there might be new packets */ + //struct netmap_ring *ring = kring->ring; + ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail, + kring->nr_hwtail); + kring->ring->tail = kring->rtail = kring->nr_hwtail; + /* make a copy of the state for next round */ + kring->rhead = kring->ring->head; + kring->rcur = kring->ring->cur; } + /* check/fix address and len in tx rings */ #if 1 /* debug version */ #define NM_CHECK_ADDR_LEN(_a, _l) do { \ @@ -755,6 +821,8 @@ nm_txsync_finalize(struct netmap_kring *kring, u_int cur) int netmap_update_config(struct netmap_adapter *na); int netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom); void netmap_krings_delete(struct netmap_adapter *na); +int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait); + struct netmap_if * netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, @@ -766,10 +834,13 @@ u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg); int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create); int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na); + #ifdef WITH_VALE /* - * The following bridge-related interfaces are used by other kernel modules - * In the version that only supports unicast or broadcast, the lookup + * The following bridge-related functions are used by other + * kernel modules. + * + * VALE only supports unicast or broadcast. The lookup * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown. * XXX in practice "unknown" might be handled same as broadcast. @@ -799,8 +870,6 @@ int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func); /* Various prototypes */ int netmap_poll(struct cdev *dev, int events, struct thread *td); - - int netmap_init(void); void netmap_fini(void); int netmap_get_memory(struct netmap_priv_d* p); @@ -811,7 +880,8 @@ int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct t /* netmap_adapter creation/destruction */ #define NM_IFPNAME(ifp) ((ifp) ? (ifp)->if_xname : "zombie") -#define NM_DEBUG_PUTGET 1 + +// #define NM_DEBUG_PUTGET 1 #ifdef NM_DEBUG_PUTGET @@ -844,12 +914,15 @@ int netmap_adapter_put(struct netmap_adapter *na); #endif /* !NM_DEBUG_PUTGET */ +/* + * module variables + */ extern u_int netmap_buf_size; #define NETMAP_BUF_SIZE netmap_buf_size // XXX remove -extern int netmap_mitigate; +extern int netmap_mitigate; // XXX not really used extern int netmap_no_pendintr; -extern u_int netmap_total_buffers; -extern char *netmap_buffer_base; +extern u_int netmap_total_buffers; // global allocator +extern char *netmap_buffer_base; // global allocator extern int netmap_verbose; // XXX debugging enum { /* verbose flags */ NM_VERB_ON = 1, /* generic verbose */ @@ -908,7 +981,7 @@ extern int netmap_generic_ringsize; #ifdef __FreeBSD__ -/* Callback invoked by the dma machinery after a successfull dmamap_load */ +/* Callback invoked by the dma machinery after a successful dmamap_load */ static void netmap_dmamap_cb(__unused void *arg, __unused bus_dma_segment_t * segs, __unused int nseg, __unused int error) { @@ -1053,31 +1126,27 @@ BDG_NMB(struct netmap_adapter *na, struct netmap_slot *slot) lut[0].vaddr : lut[i].vaddr; } -/* default functions to handle rx/tx interrupts */ -int netmap_rx_irq(struct ifnet *, u_int, u_int *); -#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL) -void netmap_common_irq(struct ifnet *, u_int, u_int *work_done); void netmap_txsync_to_host(struct netmap_adapter *na); -void netmap_disable_all_rings(struct ifnet *); -void netmap_enable_all_rings(struct ifnet *); -void netmap_disable_ring(struct netmap_kring *kr); -/* Structure associated to each thread which registered an interface. +/* + * Structure associated to each thread which registered an interface. * * The first 4 fields of this structure are written by NIOCREGIF and * read by poll() and NIOC?XSYNC. - * There is low contention among writers (actually, a correct user program - * should have no contention among writers) and among writers and readers, - * so we use a single global lock to protect the structure initialization. - * Since initialization involves the allocation of memory, we reuse the memory - * allocator lock. + * + * There is low contention among writers (a correct user program + * should have none) and among writers and readers, so we use a + * single global lock to protect the structure initialization; + * since initialization involves the allocation of memory, + * we reuse the memory allocator lock. + * * Read access to the structure is lock free. Readers must check that * np_nifp is not NULL before using the other fields. - * If np_nifp is NULL initialization has not been performed, so they should - * return an error to userlevel. + * If np_nifp is NULL initialization has not been performed, + * so they should return an error to userspace. * * The ref_done field is used to regulate access to the refcount in the * memory allocator. The refcount must be incremented at most once for @@ -1091,38 +1160,29 @@ struct netmap_priv_d { struct netmap_if * volatile np_nifp; /* netmap if descriptor. */ struct netmap_adapter *np_na; - int np_ringid; /* from the ioctl */ - u_int np_qfirst, np_qlast; /* range of rings to scan */ - uint16_t np_txpoll; + int np_ringid; /* from the ioctl */ + u_int np_qfirst, np_qlast; /* range of rings to scan */ + uint16_t np_txpoll; struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */ /* np_refcount is only used on FreeBSD */ - int np_refcount; /* use with NMG_LOCK held */ + int np_refcount; /* use with NMG_LOCK held */ }; /* * generic netmap emulation for devices that do not have * native netmap support. - * XXX generic_netmap_register() is only exported to implement - * nma_is_generic(). */ -int generic_netmap_register(struct netmap_adapter *na, int enable); int generic_netmap_attach(struct ifnet *ifp); int netmap_catch_rx(struct netmap_adapter *na, int intercept); void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);; -void netmap_catch_packet_steering(struct netmap_generic_adapter *na, int enable); +void netmap_catch_tx(struct netmap_generic_adapter *na, int enable); int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr); int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx); void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq); -static __inline int -nma_is_generic(struct netmap_adapter *na) -{ - return na->nm_register == generic_netmap_register; -} - /* * netmap_mitigation API. This is used by the generic adapter * to reduce the number of interrupt requests/selwakeup @@ -1134,6 +1194,4 @@ void netmap_mitigation_restart(struct netmap_generic_adapter *na); int netmap_mitigation_active(struct netmap_generic_adapter *na); void netmap_mitigation_cleanup(struct netmap_generic_adapter *na); -// int generic_timer_handler(struct hrtimer *t); - #endif /* _NET_NETMAP_KERN_H_ */ diff --git a/sys/dev/netmap/netmap_mbq.c b/sys/dev/netmap/netmap_mbq.c index c8e581b69fe5..2606b13d48dc 100644 --- a/sys/dev/netmap/netmap_mbq.c +++ b/sys/dev/netmap/netmap_mbq.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Vincenzo Maffione. All rights reserved. + * Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -47,17 +47,20 @@ static inline void __mbq_init(struct mbq *q) q->count = 0; } + void mbq_safe_init(struct mbq *q) { mtx_init(&q->lock, "mbq", NULL, MTX_SPIN); __mbq_init(q); } + void mbq_init(struct mbq *q) { __mbq_init(q); } + static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m) { m->m_nextpkt = NULL; @@ -70,6 +73,7 @@ static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m) q->count++; } + void mbq_safe_enqueue(struct mbq *q, struct mbuf *m) { mtx_lock(&q->lock); @@ -77,11 +81,13 @@ void mbq_safe_enqueue(struct mbq *q, struct mbuf *m) mtx_unlock(&q->lock); } + void mbq_enqueue(struct mbq *q, struct mbuf *m) { __mbq_enqueue(q, m); } + static inline struct mbuf *__mbq_dequeue(struct mbq *q) { struct mbuf *ret = NULL; @@ -99,6 +105,7 @@ static inline struct mbuf *__mbq_dequeue(struct mbq *q) return ret; } + struct mbuf *mbq_safe_dequeue(struct mbq *q) { struct mbuf *ret; @@ -110,11 +117,13 @@ struct mbuf *mbq_safe_dequeue(struct mbq *q) return ret; } + struct mbuf *mbq_dequeue(struct mbq *q) { return __mbq_dequeue(q); } + /* XXX seems pointless to have a generic purge */ static void __mbq_purge(struct mbq *q, int safe) { @@ -130,16 +139,19 @@ static void __mbq_purge(struct mbq *q, int safe) } } + void mbq_purge(struct mbq *q) { __mbq_purge(q, 0); } + void mbq_safe_purge(struct mbq *q) { __mbq_purge(q, 1); } + void mbq_safe_destroy(struct mbq *q) { mtx_destroy(&q->lock); @@ -149,4 +161,3 @@ void mbq_safe_destroy(struct mbq *q) void mbq_destroy(struct mbq *q) { } - diff --git a/sys/dev/netmap/netmap_mbq.h b/sys/dev/netmap/netmap_mbq.h index ad023b617a5d..d273d8a8fa23 100644 --- a/sys/dev/netmap/netmap_mbq.h +++ b/sys/dev/netmap/netmap_mbq.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Vincenzo Maffione. All rights reserved. + * Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index f28f2c04751a..b25f79cef3a4 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2013 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved. + * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -506,7 +506,7 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj p->r_objsize = objsize; #define MAX_CLUSTSIZE (1<<17) -#define LINE_ROUND 64 +#define LINE_ROUND NM_CACHE_ALIGN // 64 if (objsize >= MAX_CLUSTSIZE) { /* we could do it but there is no point */ D("unsupported allocation for %d bytes", objsize); @@ -960,13 +960,15 @@ netmap_mem_rings_create(struct netmap_adapter *na) ND("txring[%d] at %p ofs %d", i, ring); kring->ring = ring; *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc; - *(ssize_t *)(uintptr_t)&ring->buf_ofs = + *(int64_t *)(uintptr_t)&ring->buf_ofs = (na->nm_mem->pools[NETMAP_IF_POOL].memtotal + na->nm_mem->pools[NETMAP_RING_POOL].memtotal) - netmap_ring_offset(na->nm_mem, ring); - ring->avail = kring->nr_hwavail; - ring->cur = kring->nr_hwcur; + /* copy values from kring */ + ring->head = kring->rhead; + ring->cur = kring->rcur; + ring->tail = kring->rtail; *(uint16_t *)(uintptr_t)&ring->nr_buf_size = NETMAP_BDG_BUF_SIZE(na->nm_mem); ND("initializing slots for txring"); @@ -989,13 +991,15 @@ netmap_mem_rings_create(struct netmap_adapter *na) kring->ring = ring; *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc; - *(ssize_t *)(uintptr_t)&ring->buf_ofs = + *(int64_t *)(uintptr_t)&ring->buf_ofs = (na->nm_mem->pools[NETMAP_IF_POOL].memtotal + na->nm_mem->pools[NETMAP_RING_POOL].memtotal) - netmap_ring_offset(na->nm_mem, ring); - ring->cur = kring->nr_hwcur; - ring->avail = kring->nr_hwavail; + /* copy values from kring */ + ring->head = kring->rhead; + ring->cur = kring->rcur; + ring->tail = kring->rtail; *(int *)(uintptr_t)&ring->nr_buf_size = NETMAP_BDG_BUF_SIZE(na->nm_mem); ND("initializing slots for rxring[%d]", i); diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h index f492f9814b79..8e6c58cbc4ee 100644 --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2013 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved. + * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index 32d6422de120..f988b84e78b2 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Universita` di Pisa. All rights reserved. + * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -251,44 +251,6 @@ struct nm_bridge nm_bridges[NM_BRIDGES]; /* - * A few function to tell which kind of port are we using. - * XXX should we hold a lock ? - * - * nma_is_vp() virtual port - * nma_is_host() port connected to the host stack - * nma_is_hw() port connected to a NIC - * nma_is_generic() generic netmap adapter XXX stop this madness - */ -static __inline int -nma_is_vp(struct netmap_adapter *na) -{ - return na->nm_register == bdg_netmap_reg; -} - - -static __inline int -nma_is_host(struct netmap_adapter *na) -{ - return na->nm_register == NULL; -} - - -static __inline int -nma_is_hw(struct netmap_adapter *na) -{ - /* In case of sw adapter, nm_register is NULL */ - return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na); -} - -static __inline int -nma_is_bwrap(struct netmap_adapter *na) -{ - return na->nm_register == netmap_bwrap_register; -} - - - -/* * this is a slightly optimized copy routine which rounds * to multiple of 64 bytes and is often faster than dealing * with other odd sizes. We assume there is enough room @@ -318,7 +280,6 @@ pkt_copy(void *_src, void *_dst, int l) } - /* * locate a bridge among the existing ones. * MUST BE CALLED WITH NMG_LOCK() @@ -393,8 +354,8 @@ nm_free_bdgfwd(struct netmap_adapter *na) struct netmap_kring *kring; NMG_LOCK_ASSERT(); - nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings; - kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings; + nrings = na->num_tx_rings; + kring = na->tx_rings; for (i = 0; i < nrings; i++) { if (kring[i].nkr_ft) { free(kring[i].nkr_ft, M_DEVBUF); @@ -502,6 +463,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) } } + static void netmap_adapter_vp_dtor(struct netmap_adapter *na) { @@ -520,6 +482,16 @@ netmap_adapter_vp_dtor(struct netmap_adapter *na) na->ifp = NULL; } + +/* Try to get a reference to a netmap adapter attached to a VALE switch. + * If the adapter is found (or is created), this function returns 0, a + * non NULL pointer is returned into *na, and the caller holds a + * reference to the adapter. + * If an adapter is not found, then no reference is grabbed and the + * function returns an error code, or 0 if there is just a VALE prefix + * mismatch. Therefore the caller holds a reference when + * (*na != NULL && return == 0). + */ int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) { @@ -688,18 +660,12 @@ nm_bdg_attach(struct nmreq *nmr) return ENOMEM; NMG_LOCK(); /* XXX probably netmap_get_bdg_na() */ - error = netmap_get_na(nmr, &na, 1 /* create if not exists */); + error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); if (error) /* no device, or another bridge or user owns the device */ goto unlock_exit; - /* netmap_get_na() sets na_bdg if this is a physical interface - * that we can attach to a switch. - */ - if (!nma_is_bwrap(na)) { - /* got reference to a virtual port or direct access to a NIC. - * perhaps specified no bridge prefix or wrong NIC name - */ + if (na == NULL) { /* VALE prefix missing */ error = EINVAL; - goto unref_exit; + goto unlock_exit; } if (na->active_fds > 0) { /* already registered */ @@ -727,6 +693,7 @@ unlock_exit: return error; } + static int nm_bdg_detach(struct nmreq *nmr) { @@ -736,17 +703,15 @@ nm_bdg_detach(struct nmreq *nmr) int last_instance; NMG_LOCK(); - error = netmap_get_na(nmr, &na, 0 /* don't create */); + error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); if (error) { /* no device, or another bridge or user owns the device */ goto unlock_exit; } - if (!nma_is_bwrap(na)) { - /* got reference to a virtual port or direct access to a NIC. - * perhaps specified no bridge's prefix or wrong NIC's name - */ + if (na == NULL) { /* VALE prefix missing */ error = EINVAL; - goto unref_exit; + goto unlock_exit; } + bna = (struct netmap_bwrap_adapter *)na; if (na->active_fds == 0) { /* not registered */ @@ -890,12 +855,13 @@ netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func) case NETMAP_BDG_OFFSET: NMG_LOCK(); error = netmap_get_bdg_na(nmr, &na, 0); - if (!error) { + if (na && !error) { vpna = (struct netmap_vp_adapter *)na; if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET) nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET; vpna->offset = nmr->nr_arg1; D("Using offset %d for %p", vpna->offset, vpna); + netmap_adapter_put(na); } NMG_UNLOCK(); break; @@ -947,6 +913,7 @@ netmap_vp_krings_create(struct netmap_adapter *na) return 0; } + static void netmap_vp_krings_delete(struct netmap_adapter *na) { @@ -1027,10 +994,6 @@ nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr, } -/* - *---- support for virtual bridge ----- - */ - /* ----- FreeBSD if_bridge hash function ------- */ /* @@ -1052,6 +1015,7 @@ do { \ c -= a; c -= b; c ^= (b >> 15); \ } while (/*CONSTCOND*/0) + static __inline uint32_t nm_bridge_rthash(const uint8_t *addr) { @@ -1144,6 +1108,77 @@ netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring, /* + * Available space in the ring. Only used in VALE code + * and only with is_rx = 1 + */ +static inline uint32_t +nm_kr_space(struct netmap_kring *k, int is_rx) +{ + int space; + + if (is_rx) { + int busy = k->nkr_hwlease - k->nr_hwcur; + if (busy < 0) + busy += k->nkr_num_slots; + space = k->nkr_num_slots - 1 - busy; + } else { + /* XXX never used in this branch */ + space = k->nr_hwtail - k->nkr_hwlease; + if (space < 0) + space += k->nkr_num_slots; + } +#if 0 + // sanity check + if (k->nkr_hwlease >= k->nkr_num_slots || + k->nr_hwcur >= k->nkr_num_slots || + k->nr_tail >= k->nkr_num_slots || + busy < 0 || + busy >= k->nkr_num_slots) { + D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, + k->nkr_lease_idx, k->nkr_num_slots); + } +#endif + return space; +} + + + + +/* make a lease on the kring for N positions. return the + * lease index + * XXX only used in VALE code and with is_rx = 1 + */ +static inline uint32_t +nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) +{ + uint32_t lim = k->nkr_num_slots - 1; + uint32_t lease_idx = k->nkr_lease_idx; + + k->nkr_leases[lease_idx] = NR_NOSLOT; + k->nkr_lease_idx = nm_next(lease_idx, lim); + + if (n > nm_kr_space(k, is_rx)) { + D("invalid request for %d slots", n); + panic("x"); + } + /* XXX verify that there are n slots */ + k->nkr_hwlease += n; + if (k->nkr_hwlease > lim) + k->nkr_hwlease -= lim + 1; + + if (k->nkr_hwlease >= k->nkr_num_slots || + k->nr_hwcur >= k->nkr_num_slots || + k->nr_hwtail >= k->nkr_num_slots || + k->nkr_lease_idx >= k->nkr_num_slots) { + D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", + k->na->ifp->if_xname, + k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, + k->nkr_lease_idx, k->nkr_num_slots); + } + return lease_idx; +} + +/* * This flush routine supports only unicast and broadcast but a large * number of ports, and lets us replace the learn and dispatch functions. */ @@ -1357,28 +1392,30 @@ retry: dst = BDG_NMB(&dst_na->up, slot); if (unlikely(fix_mismatch)) { - if (na->offset > dst_na->offset) { - src += na->offset - dst_na->offset; - copy_len -= na->offset - dst_na->offset; - dst_len = copy_len; - } else { - bzero(dst, dst_na->offset - na->offset); - dst_len += dst_na->offset - na->offset; - dst += dst_na->offset - na->offset; - } - /* fix the first fragment only */ - fix_mismatch = 0; - /* completely skip an header only fragment */ - if (copy_len == 0) { - ft_p++; - continue; - } + /* We are processing the first fragment + * and there is a mismatch between source + * and destination offsets. Create a zeroed + * header for the destination, independently + * of the source header length and content. + */ + src += na->offset; + copy_len -= na->offset; + bzero(dst, dst_na->offset); + dst += dst_na->offset; + dst_len = dst_na->offset + copy_len; + /* fix the first fragment only */ + fix_mismatch = 0; + /* Here it could be copy_len == dst_len == 0, + * and so a zero length fragment is passed. + */ } + + ND("send [%d] %d(%d) bytes at %s:%d", + i, (int)copy_len, (int)dst_len, + NM_IFPNAME(dst_ifp), j); /* round to a multiple of 64 */ copy_len = (copy_len + 63) & ~63; - ND("send %d %d bytes at %s:%d", - i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j); if (ft_p->ft_flags & NS_INDIRECT) { if (copyin(src, dst, copy_len)) { // invalid user pointer, pretend len is 0 @@ -1426,7 +1463,7 @@ retry: } p[lease_idx] = j; /* report I am done */ - update_pos = nm_kr_rxpos(kring); + update_pos = kring->nr_hwtail; if (my_start == update_pos) { /* all slots before my_start have been reported, @@ -1443,15 +1480,7 @@ retry: * means there are new buffers to report */ if (likely(j != my_start)) { - uint32_t old_avail = kring->nr_hwavail; - - kring->nr_hwavail = (j >= kring->nr_hwcur) ? - j - kring->nr_hwcur : - j + lim + 1 - kring->nr_hwcur; - if (kring->nr_hwavail < old_avail) { - D("avail shrink %d -> %d", - old_avail, kring->nr_hwavail); - } + kring->nr_hwtail = j; dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); still_locked = 0; mtx_unlock(&kring->q_lock); @@ -1471,35 +1500,32 @@ cleanup: return 0; } + static int netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags) { struct netmap_kring *kring = &na->up.tx_rings[ring_nr]; - struct netmap_ring *ring = kring->ring; - u_int j, k, lim = kring->nkr_num_slots - 1; - - k = ring->cur; - if (k > lim) - return netmap_ring_reinit(kring); + u_int done; + u_int const lim = kring->nkr_num_slots - 1; + u_int const cur = kring->rcur; if (bridge_batch <= 0) { /* testing only */ - j = k; // used all + done = cur; // used all goto done; } if (bridge_batch > NM_BDG_BATCH) bridge_batch = NM_BDG_BATCH; - j = nm_bdg_preflush(na, ring_nr, kring, k); - if (j != k) - D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail); - /* k-j modulo ring size is the number of slots processed */ - if (k < j) - k += kring->nkr_num_slots; - kring->nr_hwavail = lim - (k - j); - + done = nm_bdg_preflush(na, ring_nr, kring, cur); done: - kring->nr_hwcur = j; - ring->avail = kring->nr_hwavail; + if (done != cur) + D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail); + /* + * packets between 'done' and 'cur' are left unsent. + */ + kring->nr_hwcur = done; + kring->nr_hwtail = nm_prev(done, lim); + nm_txsync_finalize(kring); if (netmap_verbose) D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags); return 0; @@ -1518,46 +1544,30 @@ bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) return netmap_vp_txsync(vpna, ring_nr, flags); } - -/* - * user process reading from a VALE switch. - * Already protected against concurrent calls from userspace, - * but we must acquire the queue's lock to protect against - * writers on the same queue. - */ static int -bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) +netmap_vp_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) { struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - u_int j, lim = kring->nkr_num_slots - 1; - u_int k = ring->cur, resvd = ring->reserved; + u_int nm_i, lim = kring->nkr_num_slots - 1; + u_int head = nm_rxsync_prologue(kring); int n; - mtx_lock(&kring->q_lock); - if (k > lim) { + if (head > lim) { D("ouch dangerous reset!!!"); n = netmap_ring_reinit(kring); goto done; } - /* skip past packets that userspace has released */ - j = kring->nr_hwcur; /* netmap ring index */ - if (resvd > 0) { - if (resvd + ring->avail >= lim + 1) { - D("XXX invalid reserve/avail %d %d", resvd, ring->avail); - ring->reserved = resvd = 0; // XXX panic... - } - k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; - } + /* First part, import newly received packets. */ + /* actually nothing to do here, they are already in the kring */ - if (j != k) { /* userspace has released some packets. */ - n = k - j; - if (n < 0) - n += kring->nkr_num_slots; - ND("userspace releases %d packets", n); - for (n = 0; likely(j != k); n++) { - struct netmap_slot *slot = &ring->slot[j]; + /* Second part, skip past packets that userspace has released. */ + nm_i = kring->nr_hwcur; + if (nm_i != head) { + /* consistency check, but nothing really important here */ + for (n = 0; likely(nm_i != head); n++) { + struct netmap_slot *slot = &ring->slot[nm_i]; void *addr = BDG_NMB(na, slot); if (addr == netmap_buffer_base) { /* bad buf */ @@ -1565,19 +1575,37 @@ bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) slot->buf_idx); } slot->flags &= ~NS_BUF_CHANGED; - j = nm_next(j, lim); + nm_i = nm_next(nm_i, lim); } - kring->nr_hwavail -= n; - kring->nr_hwcur = k; + kring->nr_hwcur = head; } + /* tell userspace that there are new packets */ - ring->avail = kring->nr_hwavail - resvd; + nm_rxsync_finalize(kring); n = 0; done: + return n; +} + +/* + * user process reading from a VALE switch. + * Already protected against concurrent calls from userspace, + * but we must acquire the queue's lock to protect against + * writers on the same queue. + */ +static int +bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) +{ + struct netmap_kring *kring = &na->rx_rings[ring_nr]; + int n; + + mtx_lock(&kring->q_lock); + n = netmap_vp_rxsync(na, ring_nr, flags); mtx_unlock(&kring->q_lock); return n; } + static int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp) { @@ -1627,6 +1655,7 @@ bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp) return 0; } + static void netmap_bwrap_dtor(struct netmap_adapter *na) { @@ -1652,16 +1681,22 @@ netmap_bwrap_dtor(struct netmap_adapter *na) } + /* - * Pass packets from nic to the bridge. + * Intr callback for NICs connected to a bridge. + * Simply ignore tx interrupts (maybe we could try to recover space ?) + * and pass received packets from nic to the bridge. + * * XXX TODO check locking: this is called from the interrupt * handler so we should make sure that the interface is not * disconnected while passing down an interrupt. * - * Note, no user process can access this NIC so we can ignore - * the info in the 'ring'. - */ -/* callback that overwrites the hwna notify callback. + * Note, no user process can access this NIC or the host stack. + * The only part of the ring that is significant are the slots, + * and head/cur/tail are set from the kring as needed + * (part as a receive ring, part as a transmit ring). + * + * callback that overwrites the hwna notify callback. * Packets come from the outside or from the host stack and are put on an hwna rx ring. * The bridge wrapper then sends the packets through the bridge. */ @@ -1677,21 +1712,24 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, struct netmap_vp_adapter *vpna = &bna->up; int error = 0; - ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags); + if (netmap_verbose) + D("%s %s%d 0x%x", NM_IFPNAME(ifp), + (tx == NR_TX ? "TX" : "RX"), ring_nr, flags); if (flags & NAF_DISABLE_NOTIFY) { kring = tx == NR_TX ? na->tx_rings : na->rx_rings; bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings; - if (kring->nkr_stopped) - netmap_disable_ring(bkring); + if (kring[ring_nr].nkr_stopped) + netmap_disable_ring(&bkring[ring_nr]); else - bkring->nkr_stopped = 0; + bkring[ring_nr].nkr_stopped = 0; return 0; } if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP)) return 0; + /* we only care about receive interrupts */ if (tx == NR_TX) return 0; @@ -1707,7 +1745,24 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, goto put_out; } + /* Here we expect ring->head = ring->cur = ring->tail + * because everything has been released from the previous round. + * However the ring is shared and we might have info from + * the wrong side (the tx ring). Hence we overwrite with + * the info from the rx kring. + */ + if (netmap_verbose) + D("%s head %d cur %d tail %d (kring %d %d %d)", NM_IFPNAME(ifp), + ring->head, ring->cur, ring->tail, + kring->rhead, kring->rcur, kring->rtail); + + ring->head = kring->rhead; + ring->cur = kring->rcur; + ring->tail = kring->rtail; + + /* simulate a user wakeup on the rx ring */ if (is_host_ring) { + netmap_rxsync_from_host(na, NULL, NULL); vpna = hostna; ring_nr = 0; } else { @@ -1718,23 +1773,46 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, if (error) goto put_out; } - if (kring->nr_hwavail == 0 && netmap_verbose) { + if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { D("how strange, interrupt with no packets on %s", NM_IFPNAME(ifp)); goto put_out; } - /* XXX avail ? */ - ring->cur = nm_kr_rxpos(kring); + + /* new packets are ring->cur to ring->tail, and the bkring + * had hwcur == ring->cur. So advance ring->cur to ring->tail + * to push all packets out. + */ + ring->head = ring->cur = ring->tail; + + /* also set tail to what the bwrap expects */ + bkring = &vpna->up.tx_rings[ring_nr]; + ring->tail = bkring->nr_hwtail; // rtail too ? + + /* pass packets to the switch */ + nm_txsync_prologue(bkring); // XXX error checking ? netmap_vp_txsync(vpna, ring_nr, flags); - if (!is_host_ring) + /* mark all buffers as released on this ring */ + ring->head = ring->cur = kring->nr_hwtail; + ring->tail = kring->rtail; + /* another call to actually release the buffers */ + if (!is_host_ring) { error = na->nm_rxsync(na, ring_nr, 0); + } else { + /* mark all packets as released, as in the + * second part of netmap_rxsync_from_host() + */ + kring->nr_hwcur = kring->nr_hwtail; + nm_rxsync_finalize(kring); + } put_out: nm_kr_put(kring); return error; } + static int netmap_bwrap_register(struct netmap_adapter *na, int onoff) { @@ -1744,7 +1822,7 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff) struct netmap_vp_adapter *hostna = &bna->host; int error; - ND("%s %d", NM_IFPNAME(ifp), onoff); + ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off"); if (onoff) { int i; @@ -1788,6 +1866,7 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff) return 0; } + static int netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, u_int *rxr, u_int *rxd) @@ -1807,6 +1886,7 @@ netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, return 0; } + static int netmap_bwrap_krings_create(struct netmap_adapter *na) { @@ -1834,6 +1914,7 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) return 0; } + static void netmap_bwrap_krings_delete(struct netmap_adapter *na) { @@ -1847,6 +1928,7 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na) netmap_vp_krings_delete(na); } + /* notify method for the bridge-->hwna direction */ static int netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) @@ -1856,7 +1938,7 @@ netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int f struct netmap_adapter *hwna = bna->hwna; struct netmap_kring *kring, *hw_kring; struct netmap_ring *ring; - u_int lim, k; + u_int lim; int error = 0; if (tx == NR_TX) @@ -1865,35 +1947,49 @@ netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int f kring = &na->rx_rings[ring_n]; hw_kring = &hwna->tx_rings[ring_n]; ring = kring->ring; - lim = kring->nkr_num_slots - 1; - k = nm_kr_rxpos(kring); if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP)) return 0; - ring->cur = k; - ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)", + /* first step: simulate a user wakeup on the rx ring */ + netmap_vp_rxsync(na, ring_n, flags); + ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", NM_IFPNAME(na->ifp), ring_n, - kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved, - ring->cur, ring->avail, ring->reserved, - hw_kring->nr_hwcur, hw_kring->nr_hwavail); + kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, + ring->head, ring->cur, ring->tail, + hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); + /* second step: the simulated user consumes all new packets */ + ring->head = ring->cur = ring->tail; + + /* third step: the new packets are sent on the tx ring + * (which is actually the same ring) + */ + /* set tail to what the hw expects */ + ring->tail = hw_kring->rtail; if (ring_n == na->num_rx_rings) { netmap_txsync_to_host(hwna); } else { + nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ? error = hwna->nm_txsync(hwna, ring_n, flags); } - kring->nr_hwcur = ring->cur; - kring->nr_hwavail = 0; - kring->nr_hwreserved = lim - ring->avail; - ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)", + + /* fourth step: now we are back the rx ring */ + /* claim ownership on all hw owned bufs */ + ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */ + ring->tail = kring->rtail; /* restore saved value of tail, for safety */ + + /* fifth step: the user goes to sleep again, causing another rxsync */ + netmap_vp_rxsync(na, ring_n, flags); + ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", NM_IFPNAME(na->ifp), ring_n, - kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved, - ring->cur, ring->avail, ring->reserved, - hw_kring->nr_hwcur, hw_kring->nr_hwavail); + kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, + ring->head, ring->cur, ring->tail, + hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); return error; } + static int netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) { @@ -1904,6 +2000,7 @@ netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags); } + /* attach a bridge wrapper to the 'real' device */ static int netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real) @@ -1957,7 +2054,8 @@ netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real) hostna->nm_mem = na->nm_mem; hostna->na_private = bna; - D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname, + ND("%s<->%s txr %d txd %d rxr %d rxd %d", + fake->if_xname, real->if_xname, na->num_tx_rings, na->num_tx_desc, na->num_rx_rings, na->num_rx_desc); @@ -1970,6 +2068,7 @@ netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real) return 0; } + void netmap_init_bridges(void) { |