aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/e1000/if_em.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/e1000/if_em.c')
-rw-r--r--sys/dev/e1000/if_em.c702
1 files changed, 473 insertions, 229 deletions
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index ab39c4355782..8032345d09ae 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,8 @@
******************************************************************************/
/*$FreeBSD$*/
+#include "opt_em.h"
+#include "opt_ddb.h"
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -41,6 +43,10 @@
#include <sys/param.h>
#include <sys/systm.h>
+#ifdef DDB
+#include <sys/types.h>
+#include <ddb/ddb.h>
+#endif
#if __FreeBSD_version >= 800000
#include <sys/buf_ring.h>
#endif
@@ -52,6 +58,7 @@
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/rman.h>
+#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
@@ -208,7 +215,7 @@ static int em_resume(device_t);
#ifdef EM_MULTIQUEUE
static int em_mq_start(if_t, struct mbuf *);
static int em_mq_start_locked(if_t,
- struct tx_ring *, struct mbuf *);
+ struct tx_ring *);
static void em_qflush(if_t);
#else
static void em_start(if_t);
@@ -299,6 +306,10 @@ static void em_handle_tx(void *context, int pending);
static void em_handle_rx(void *context, int pending);
static void em_handle_link(void *context, int pending);
+#ifdef EM_MULTIQUEUE
+static void em_enable_vectors_82574(struct adapter *);
+#endif
+
static void em_set_sysctl_value(struct adapter *, const char *,
const char *, int *, int);
static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
@@ -388,6 +399,19 @@ static int em_enable_msix = TRUE;
SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
"Enable MSI-X interrupts");
+#ifdef EM_MULTIQUEUE
+static int em_num_queues = 1;
+SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
+ "82574 only: Number of queues to configure, 0 indicates autoconfigure");
+#endif
+
+/*
+** Global variable to store last used CPU when binding queues
+** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
+** queue is bound to a cpu.
+*/
+static int em_last_bind_cpu = -1;
+
/* How many packets rxeof tries to clean at a time */
static int em_rx_process_limit = 100;
SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
@@ -420,10 +444,10 @@ static int
em_probe(device_t dev)
{
char adapter_name[60];
- u16 pci_vendor_id = 0;
- u16 pci_device_id = 0;
- u16 pci_subvendor_id = 0;
- u16 pci_subdevice_id = 0;
+ uint16_t pci_vendor_id = 0;
+ uint16_t pci_device_id = 0;
+ uint16_t pci_subvendor_id = 0;
+ uint16_t pci_subdevice_id = 0;
em_vendor_info_t *ent;
INIT_DEBUGOUT("em_probe: begin");
@@ -550,6 +574,11 @@ em_attach(device_t dev)
goto err_pci;
}
+ /*
+ * Setup MSI/X or MSI if PCI Express
+ */
+ adapter->msix = em_setup_msix(adapter);
+
e1000_get_bus_info(hw);
/* Set up some sysctls for the tunable interrupt delays */
@@ -880,7 +909,7 @@ em_resume(device_t dev)
EM_TX_LOCK(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
@@ -894,107 +923,7 @@ em_resume(device_t dev)
}
-#ifdef EM_MULTIQUEUE
-/*********************************************************************
- * Multiqueue Transmit routines
- *
- * em_mq_start is called by the stack to initiate a transmit.
- * however, if busy the driver can queue the request rather
- * than do an immediate send. It is this that is an advantage
- * in this driver, rather than also having multiple tx queues.
- **********************************************************************/
-static int
-em_mq_start_locked(if_t ifp, struct tx_ring *txr, struct mbuf *m)
-{
- struct adapter *adapter = txr->adapter;
- struct mbuf *next;
- int err = 0, enq = 0;
-
- if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
- IFF_DRV_RUNNING || adapter->link_active == 0) {
- if (m != NULL)
- err = drbr_enqueue(ifp, txr->br, m);
- return (err);
- }
-
- enq = 0;
- if (m != NULL) {
- err = drbr_enqueue(ifp, txr->br, m);
- if (err)
- return (err);
- }
-
- /* Process the queue */
- while ((next = drbr_peek(ifp, txr->br)) != NULL) {
- if ((err = em_xmit(txr, &next)) != 0) {
- if (next == NULL)
- drbr_advance(ifp, txr->br);
- else
- drbr_putback(ifp, txr->br, next);
- break;
- }
- drbr_advance(ifp, txr->br);
- enq++;
- if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
- if (next->m_flags & M_MCAST)
- if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
- if_etherbpfmtap(ifp, next);
- if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
- break;
- }
-
- if (enq > 0) {
- /* Set the watchdog */
- txr->queue_status = EM_QUEUE_WORKING;
- txr->watchdog_time = ticks;
- }
-
- if (txr->tx_avail < EM_MAX_SCATTER)
- em_txeof(txr);
- if (txr->tx_avail < EM_MAX_SCATTER)
- if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
- return (err);
-}
-
-/*
-** Multiqueue capable stack interface
-*/
-static int
-em_mq_start(if_t ifp, struct mbuf *m)
-{
- struct adapter *adapter = if_getsoftc(ifp);
- struct tx_ring *txr = adapter->tx_rings;
- int error;
-
- if (EM_TX_TRYLOCK(txr)) {
- error = em_mq_start_locked(ifp, txr, m);
- EM_TX_UNLOCK(txr);
- } else
- error = drbr_enqueue(ifp, txr->br, m);
-
- return (error);
-}
-
-/*
-** Flush all ring buffers
-*/
-static void
-em_qflush(if_t ifp)
-{
- struct adapter *adapter = if_getsoftc(ifp);
- struct tx_ring *txr = adapter->tx_rings;
- struct mbuf *m;
-
- for (int i = 0; i < adapter->num_queues; i++, txr++) {
- EM_TX_LOCK(txr);
- while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
- m_freem(m);
- EM_TX_UNLOCK(txr);
- }
- if_qflush(ifp);
-}
-#else /* !EM_MULTIQUEUE */
-
+#ifndef EM_MULTIQUEUE
static void
em_start_locked(if_t ifp, struct tx_ring *txr)
{
@@ -1032,12 +961,13 @@ em_start_locked(if_t ifp, struct tx_ring *txr)
break;
}
+ /* Mark the queue as having work */
+ if (txr->busy == EM_TX_IDLE)
+ txr->busy = EM_TX_BUSY;
+
/* Send a copy of the frame to the BPF listener */
- if_etherbpfmtap(ifp, m_head);
+ ETHER_BPF_MTAP(ifp, m_head);
- /* Set timeout in case hardware has problems transmitting. */
- txr->watchdog_time = ticks;
- txr->queue_status = EM_QUEUE_WORKING;
}
return;
@@ -1056,6 +986,115 @@ em_start(if_t ifp)
}
return;
}
+#else /* EM_MULTIQUEUE */
+/*********************************************************************
+ * Multiqueue Transmit routines
+ *
+ * em_mq_start is called by the stack to initiate a transmit.
+ * however, if busy the driver can queue the request rather
+ * than do an immediate send. It is this that is an advantage
+ * in this driver, rather than also having multiple tx queues.
+ **********************************************************************/
+/*
+** Multiqueue capable stack interface
+*/
+static int
+em_mq_start(if_t ifp, struct mbuf *m)
+{
+ struct adapter *adapter = if_getsoftc(ifp);
+ struct tx_ring *txr = adapter->tx_rings;
+ unsigned int i, error;
+
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ i = m->m_pkthdr.flowid % adapter->num_queues;
+ else
+ i = curcpu % adapter->num_queues;
+
+ txr = &adapter->tx_rings[i];
+
+ error = drbr_enqueue(ifp, txr->br, m);
+ if (error)
+ return (error);
+
+ if (EM_TX_TRYLOCK(txr)) {
+ em_mq_start_locked(ifp, txr);
+ EM_TX_UNLOCK(txr);
+ } else
+ taskqueue_enqueue(txr->tq, &txr->tx_task);
+
+ return (0);
+}
+
+static int
+em_mq_start_locked(if_t ifp, struct tx_ring *txr)
+{
+ struct adapter *adapter = txr->adapter;
+ struct mbuf *next;
+ int err = 0, enq = 0;
+
+ EM_TX_LOCK_ASSERT(txr);
+
+ if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
+ adapter->link_active == 0) {
+ return (ENETDOWN);
+ }
+
+ /* Process the queue */
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
+ if ((err = em_xmit(txr, &next)) != 0) {
+ if (next == NULL) {
+ /* It was freed, move forward */
+ drbr_advance(ifp, txr->br);
+ } else {
+ /*
+ * Still have one left, it may not be
+ * the same since the transmit function
+ * may have changed it.
+ */
+ drbr_putback(ifp, txr->br, next);
+ }
+ break;
+ }
+ drbr_advance(ifp, txr->br);
+ enq++;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
+ if (next->m_flags & M_MCAST)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+ ETHER_BPF_MTAP(ifp, next);
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
+ break;
+ }
+
+ /* Mark the queue as having work */
+ if ((enq > 0) && (txr->busy == EM_TX_IDLE))
+ txr->busy = EM_TX_BUSY;
+
+ if (txr->tx_avail < EM_MAX_SCATTER)
+ em_txeof(txr);
+ if (txr->tx_avail < EM_MAX_SCATTER) {
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
+ }
+ return (err);
+}
+
+/*
+** Flush all ring buffers
+*/
+static void
+em_qflush(if_t ifp)
+{
+ struct adapter *adapter = if_getsoftc(ifp);
+ struct tx_ring *txr = adapter->tx_rings;
+ struct mbuf *m;
+
+ for (int i = 0; i < adapter->num_queues; i++, txr++) {
+ EM_TX_LOCK(txr);
+ while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
+ m_freem(m);
+ EM_TX_UNLOCK(txr);
+ }
+ if_qflush(ifp);
+}
#endif /* EM_MULTIQUEUE */
/*********************************************************************
@@ -1451,7 +1490,7 @@ em_poll(if_t ifp, enum poll_cmd cmd, int count)
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
@@ -1518,14 +1557,14 @@ em_handle_que(void *context, int pending)
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
-
if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
+
EM_TX_LOCK(txr);
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
@@ -1559,11 +1598,12 @@ em_msix_tx(void *arg)
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
+
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
EM_TX_UNLOCK(txr);
@@ -1589,9 +1629,10 @@ em_msix_rx(void *arg)
more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
if (more)
taskqueue_enqueue(rxr->tq, &rxr->rx_task);
- else
+ else {
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
+ }
return;
}
@@ -1618,6 +1659,16 @@ em_msix_link(void *arg)
} else
E1000_WRITE_REG(&adapter->hw, E1000_IMS,
EM_MSIX_LINK | E1000_IMS_LSC);
+ /*
+ ** Because we must read the ICR for this interrupt
+ ** it may clear other causes using autoclear, for
+ ** this reason we simply create a soft interrupt
+ ** for all these vectors.
+ */
+ if (reg_icr) {
+ E1000_WRITE_REG(&adapter->hw,
+ E1000_ICS, adapter->ims);
+ }
return;
}
@@ -1631,9 +1682,10 @@ em_handle_rx(void *context, int pending)
more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
if (more)
taskqueue_enqueue(rxr->tq, &rxr->rx_task);
- else
+ else {
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
+ }
}
static void
@@ -1647,7 +1699,7 @@ em_handle_tx(void *context, int pending)
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
@@ -1677,7 +1729,7 @@ em_handle_link(void *context, int pending)
EM_TX_LOCK(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (if_sendq_empty(ifp))
em_start_locked(ifp, txr);
@@ -2102,8 +2154,6 @@ retry:
*/
tx_buffer = &txr->tx_buffers[first];
tx_buffer->next_eop = last;
- /* Update the watchdog time early and often */
- txr->watchdog_time = ticks;
/*
* Advance the Transmit Descriptor Tail (TDT), this tells the E1000
@@ -2223,7 +2273,7 @@ em_local_timer(void *arg)
if_t ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
- u32 trigger;
+ u32 trigger = 0;
EM_CORE_LOCK_ASSERT(adapter);
@@ -2236,9 +2286,11 @@ em_local_timer(void *arg)
e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
/* Mask to use in the irq trigger */
- if (adapter->msix_mem)
- trigger = rxr->ims;
- else
+ if (adapter->msix_mem) {
+ for (int i = 0; i < adapter->num_queues; i++, rxr++)
+ trigger |= rxr->ims;
+ rxr = adapter->rx_rings;
+ } else
trigger = E1000_ICS_RXDMT0;
/*
@@ -2247,15 +2299,15 @@ em_local_timer(void *arg)
** and the HUNG state will be static if set.
*/
for (int i = 0; i < adapter->num_queues; i++, txr++) {
- if ((txr->queue_status == EM_QUEUE_HUNG) &&
- (adapter->pause_frames == 0))
+ if (txr->busy == EM_TX_HUNG)
goto hung;
+ if (txr->busy >= EM_TX_MAXTRIES)
+ txr->busy = EM_TX_HUNG;
/* Schedule a TX tasklet if needed */
if (txr->tx_avail <= EM_MAX_SCATTER)
taskqueue_enqueue(txr->tq, &txr->tx_task);
}
- adapter->pause_frames = 0;
callout_reset(&adapter->timer, hz, em_local_timer, adapter);
#ifndef DEVICE_POLLING
/* Trigger an RX interrupt to guarantee mbuf refresh */
@@ -2264,17 +2316,11 @@ em_local_timer(void *arg)
return;
hung:
/* Looks like we're hung */
- device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
- device_printf(adapter->dev,
- "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
- E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
- E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
- device_printf(adapter->dev,"TX(%d) desc avail = %d,"
- "Next TX to Clean = %d\n",
- txr->me, txr->tx_avail, txr->next_to_clean);
+ device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
+ txr->me);
+ em_print_debug_info(adapter);
if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
adapter->watchdog_events++;
- adapter->pause_frames = 0;
em_init_locked(adapter);
}
@@ -2324,7 +2370,7 @@ em_update_link_status(struct adapter *adapter)
(hw->mac.type == e1000_82572))) {
int tarc0;
tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
- tarc0 &= ~SPEED_MODE_BIT;
+ tarc0 &= ~TARC_SPEED_MODE_BIT;
E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
}
if (bootverbose)
@@ -2343,9 +2389,9 @@ em_update_link_status(struct adapter *adapter)
if (bootverbose)
device_printf(dev, "Link is Down\n");
adapter->link_active = 0;
- /* Link down, disable watchdog */
+ /* Link down, disable hang detection */
for (int i = 0; i < adapter->num_queues; i++, txr++)
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
if_link_state_change(ifp, LINK_STATE_DOWN);
}
}
@@ -2376,10 +2422,10 @@ em_stop(void *arg)
/* Tell the stack that the interface is no longer active */
if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
- /* Unarm watchdog timer. */
+ /* Disarm Hang Detection. */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
EM_TX_LOCK(txr);
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
EM_TX_UNLOCK(txr);
}
@@ -2440,14 +2486,6 @@ em_allocate_pci_resources(struct adapter *adapter)
rman_get_bushandle(adapter->memory);
adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
- /* Default to a single queue */
- adapter->num_queues = 1;
-
- /*
- * Setup MSI/X or MSI if PCI Express
- */
- adapter->msix = em_setup_msix(adapter);
-
adapter->hw.back = &adapter->osdep;
return (0);
@@ -2522,13 +2560,14 @@ em_allocate_msix(struct adapter *adapter)
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
int error, rid, vector = 0;
+ int cpu_id = 0;
/* Make sure all interrupts are disabled */
E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
/* First set up ring resources */
- for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
/* RX ring */
rid = vector + 1;
@@ -2548,14 +2587,20 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
+ bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
#endif
- rxr->msix = vector++; /* NOTE increment vector for TX */
+ rxr->msix = vector;
+
+ if (em_last_bind_cpu < 0)
+ em_last_bind_cpu = CPU_FIRST();
+ cpu_id = em_last_bind_cpu;
+ bus_bind_intr(dev, rxr->res, cpu_id);
+
TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
taskqueue_thread_enqueue, &rxr->tq);
- taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
- device_get_nameunit(adapter->dev));
+ taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
+ device_get_nameunit(adapter->dev), cpu_id);
/*
** Set the bit to enable interrupt
** in E1000_IMS -- bits 20 and 21
@@ -2563,8 +2608,13 @@ em_allocate_msix(struct adapter *adapter)
** NOTHING to do with the MSIX vector
*/
rxr->ims = 1 << (20 + i);
+ adapter->ims |= rxr->ims;
adapter->ivars |= (8 | rxr->msix) << (i * 4);
+ em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
+ }
+
+ for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
/* TX ring */
rid = vector + 1;
txr->res = bus_alloc_resource_any(dev,
@@ -2582,14 +2632,20 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
+ bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
#endif
- txr->msix = vector++; /* Increment vector for next pass */
+ txr->msix = vector;
+
+ if (em_last_bind_cpu < 0)
+ em_last_bind_cpu = CPU_FIRST();
+ cpu_id = em_last_bind_cpu;
+ bus_bind_intr(dev, txr->res, cpu_id);
+
TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
taskqueue_thread_enqueue, &txr->tq);
- taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
- device_get_nameunit(adapter->dev));
+ taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
+ device_get_nameunit(adapter->dev), cpu_id);
/*
** Set the bit to enable interrupt
** in E1000_IMS -- bits 22 and 23
@@ -2597,13 +2653,16 @@ em_allocate_msix(struct adapter *adapter)
** NOTHING to do with the MSIX vector
*/
txr->ims = 1 << (22 + i);
+ adapter->ims |= txr->ims;
adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
+
+ em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
}
/* Link interrupt */
- ++rid;
+ rid = vector + 1;
adapter->res = bus_alloc_resource_any(dev,
- SYS_RES_IRQ, &rid, RF_ACTIVE);
+ SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (!adapter->res) {
device_printf(dev,"Unable to allocate "
"bus resource: Link interrupt [%d]\n", rid);
@@ -2619,7 +2678,7 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, adapter->res, adapter->tag, "link");
+ bus_describe_intr(dev, adapter->res, adapter->tag, "link");
#endif
adapter->linkvec = vector;
adapter->ivars |= (8 | vector) << 16;
@@ -2643,9 +2702,8 @@ em_free_pci_resources(struct adapter *adapter)
*/
for (int i = 0; i < adapter->num_queues; i++) {
txr = &adapter->tx_rings[i];
- rxr = &adapter->rx_rings[i];
/* an early abort? */
- if ((txr == NULL) || (rxr == NULL))
+ if (txr == NULL)
break;
rid = txr->msix +1;
if (txr->tag != NULL) {
@@ -2655,6 +2713,11 @@ em_free_pci_resources(struct adapter *adapter)
if (txr->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ,
rid, txr->res);
+
+ rxr = &adapter->rx_rings[i];
+ /* an early abort? */
+ if (rxr == NULL)
+ break;
rid = rxr->msix +1;
if (rxr->tag != NULL) {
bus_teardown_intr(dev, rxr->res, rxr->tag);
@@ -2704,14 +2767,19 @@ em_setup_msix(struct adapter *adapter)
device_t dev = adapter->dev;
int val;
+ /* Nearly always going to use one queue */
+ adapter->num_queues = 1;
+
/*
- ** Setup MSI/X for Hartwell: tests have shown
- ** use of two queues to be unstable, and to
- ** provide no great gain anyway, so we simply
- ** seperate the interrupts and use a single queue.
+ ** Try using MSI-X for Hartwell adapters
*/
if ((adapter->hw.mac.type == e1000_82574) &&
(em_enable_msix == TRUE)) {
+#ifdef EM_MULTIQUEUE
+ adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
+ if (adapter->num_queues > 1)
+ em_enable_vectors_82574(adapter);
+#endif
/* Map the MSIX BAR */
int rid = PCIR_BAR(EM_MSIX_BAR);
adapter->msix_mem = bus_alloc_resource_any(dev,
@@ -2723,16 +2791,34 @@ em_setup_msix(struct adapter *adapter)
goto msi;
}
val = pci_msix_count(dev);
- /* We only need/want 3 vectors */
- if (val >= 3)
- val = 3;
- else {
- device_printf(adapter->dev,
- "MSIX: insufficient vectors, using MSI\n");
- goto msi;
+
+#ifdef EM_MULTIQUEUE
+ /* We need 5 vectors in the multiqueue case */
+ if (adapter->num_queues > 1 ) {
+ if (val >= 5)
+ val = 5;
+ else {
+ adapter->num_queues = 1;
+ device_printf(adapter->dev,
+ "Insufficient MSIX vectors for >1 queue, "
+ "using single queue...\n");
+ goto msix_one;
+ }
+ } else {
+msix_one:
+#endif
+ if (val >= 3)
+ val = 3;
+ else {
+ device_printf(adapter->dev,
+ "Insufficient MSIX vectors, using MSI\n");
+ goto msi;
+ }
+#ifdef EM_MULTIQUEUE
}
+#endif
- if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
+ if ((pci_alloc_msix(dev, &val) == 0)) {
device_printf(adapter->dev,
"Using MSIX interrupts "
"with %d vectors\n", val);
@@ -2753,7 +2839,7 @@ msi:
}
val = 1;
if (pci_alloc_msi(dev, &val) == 0) {
- device_printf(adapter->dev,"Using an MSI interrupt\n");
+ device_printf(adapter->dev, "Using an MSI interrupt\n");
return (val);
}
/* Should only happen due to manual configuration */
@@ -3358,7 +3444,7 @@ em_setup_transmit_ring(struct tx_ring *txr)
/* Set number of descriptors available */
txr->tx_avail = adapter->num_tx_desc;
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
/* Clear checksum offload context. */
txr->last_hw_offload = 0;
@@ -3398,7 +3484,7 @@ em_initialize_transmit_unit(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
struct e1000_hw *hw = &adapter->hw;
- u32 tctl, tarc, tipg = 0;
+ u32 tctl, txdctl = 0, tarc, tipg = 0;
INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
@@ -3419,7 +3505,16 @@ em_initialize_transmit_unit(struct adapter *adapter)
E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
+ txdctl = 0; /* clear txdctl */
+ txdctl |= 0x1f; /* PTHRESH */
+ txdctl |= 1 << 8; /* HTHRESH */
+ txdctl |= 1 << 16;/* WTHRESH */
+ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
+ txdctl |= E1000_TXDCTL_GRAN;
+ txdctl |= 1 << 25; /* LWTHRESH */
+
+ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
}
/* Set the default values for the Tx Inter Packet Gap timer */
@@ -3450,15 +3545,25 @@ em_initialize_transmit_unit(struct adapter *adapter)
if ((adapter->hw.mac.type == e1000_82571) ||
(adapter->hw.mac.type == e1000_82572)) {
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
- tarc |= SPEED_MODE_BIT;
+ tarc |= TARC_SPEED_MODE_BIT;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
} else if (adapter->hw.mac.type == e1000_80003es2lan) {
+ /* errata: program both queues to unweighted RR */
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
tarc |= 1;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
tarc |= 1;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
+ } else if (adapter->hw.mac.type == e1000_82574) {
+ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
+ tarc |= TARC_ERRATA_BIT;
+ if ( adapter->num_queues > 1) {
+ tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
+ } else
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
}
adapter->txd_cmd = E1000_TXD_CMD_IFCS;
@@ -3802,9 +3907,9 @@ em_txeof(struct tx_ring *txr)
return;
#endif /* DEV_NETMAP */
- /* No work, make sure watchdog is off */
+ /* No work, make sure hang detection is disabled */
if (txr->tx_avail == adapter->num_tx_desc) {
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
return;
}
@@ -3847,7 +3952,6 @@ em_txeof(struct tx_ring *txr)
tx_buffer->m_head = NULL;
}
tx_buffer->next_eop = -1;
- txr->watchdog_time = ticks;
if (++first == adapter->num_tx_desc)
first = 0;
@@ -3872,14 +3976,16 @@ em_txeof(struct tx_ring *txr)
txr->next_to_clean = first;
/*
- ** Watchdog calculation, we know there's
- ** work outstanding or the first return
- ** would have been taken, so none processed
- ** for too long indicates a hang. local timer
- ** will examine this and do a reset if needed.
+ ** Hang detection: we know there's work outstanding
+ ** or the entry return would have been taken, so no
+ ** descriptor processed here indicates a potential hang.
+ ** The local timer will examine this and do a reset if needed.
*/
- if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
- txr->queue_status = EM_QUEUE_HUNG;
+ if (processed == 0) {
+ if (txr->busy != EM_TX_HUNG)
+ ++txr->busy;
+ } else /* At least one descriptor was cleaned */
+ txr->busy = EM_TX_BUSY; /* note this clears HUNG */
/*
* If we have a minimum free, clear IFF_DRV_OACTIVE
@@ -3888,13 +3994,13 @@ em_txeof(struct tx_ring *txr)
* TX lock which, with a single queue, guarantees
* sanity.
*/
- if (txr->tx_avail >= EM_MAX_SCATTER)
+ if (txr->tx_avail >= EM_MAX_SCATTER) {
if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
+ }
- /* Disable watchdog if all clean */
- if (txr->tx_avail == adapter->num_tx_desc) {
- txr->queue_status = EM_QUEUE_IDLE;
- }
+ /* Disable hang detection if all clean */
+ if (txr->tx_avail == adapter->num_tx_desc)
+ txr->busy = EM_TX_IDLE;
}
@@ -4262,6 +4368,9 @@ em_initialize_receive_unit(struct adapter *adapter)
E1000_WRITE_REG(&adapter->hw, E1000_RADV,
adapter->rx_abs_int_delay.value);
+
+ E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
+ adapter->rx_int_delay.value);
/*
* Set the interrupt throttling rate. Value is calculated
* as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
@@ -4273,20 +4382,65 @@ em_initialize_receive_unit(struct adapter *adapter)
** using the EITR register (82574 only)
*/
if (hw->mac.type == e1000_82574) {
+ u32 rfctl;
for (int i = 0; i < 4; i++)
E1000_WRITE_REG(hw, E1000_EITR_82574(i),
DEFAULT_ITR);
/* Disable accelerated acknowledge */
- E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
+ rfctl = E1000_READ_REG(hw, E1000_RFCTL);
+ rfctl |= E1000_RFCTL_ACK_DIS;
+ E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
}
rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
- if (if_getcapenable(ifp) & IFCAP_RXCSUM)
+ if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
+#ifdef EM_MULTIQUEUE
+ rxcsum |= E1000_RXCSUM_TUOFL |
+ E1000_RXCSUM_IPOFL |
+ E1000_RXCSUM_PCSD;
+#else
rxcsum |= E1000_RXCSUM_TUOFL;
- else
+#endif
+ } else
rxcsum &= ~E1000_RXCSUM_TUOFL;
+
E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
+#ifdef EM_MULTIQUEUE
+ if (adapter->num_queues > 1) {
+ uint32_t rss_key[10];
+ uint32_t reta;
+ int i;
+
+ /*
+ * Configure RSS key
+ */
+ arc4rand(rss_key, sizeof(rss_key), 0);
+ for (i = 0; i < 10; ++i)
+ E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]);
+
+ /*
+ * Configure RSS redirect table in following fashion:
+ * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
+ */
+ reta = 0;
+ for (i = 0; i < 4; ++i) {
+ uint32_t q;
+ q = (i % adapter->num_queues) << 7;
+ reta |= q << (8 * i);
+ }
+ for (i = 0; i < 32; ++i)
+ E1000_WRITE_REG(hw, E1000_RETA(i), reta);
+
+ E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
+ E1000_MRQC_RSS_FIELD_IPV4_TCP |
+ E1000_MRQC_RSS_FIELD_IPV4 |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
+ E1000_MRQC_RSS_FIELD_IPV6_EX |
+ E1000_MRQC_RSS_FIELD_IPV6 |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP);
+ }
+#endif
/*
** XXX TEMPORARY WORKAROUND: on some systems with 82573
** long latencies are observed, like Lenovo X60. This
@@ -4321,13 +4475,30 @@ em_initialize_receive_unit(struct adapter *adapter)
E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
}
- /* Set PTHRESH for improved jumbo performance */
+ /*
+ * Set PTHRESH for improved jumbo performance
+ * According to 10.2.5.11 of Intel 82574 Datasheet,
+ * RXDCTL(1) is written whenever RXDCTL(0) is written.
+ * Only write to RXDCTL(1) if there is a need for different
+ * settings.
+ */
if (((adapter->hw.mac.type == e1000_ich9lan) ||
(adapter->hw.mac.type == e1000_pch2lan) ||
(adapter->hw.mac.type == e1000_ich10lan)) &&
(if_getmtu(ifp) > ETHERMTU)) {
u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
+ } else if ((adapter->hw.mac.type == e1000_82574) &&
+ (if_getmtu(ifp) > ETHERMTU)) {
+ for (int i = 0; i < adapter->num_queues; i++) {
+ u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
+
+ rxdctl |= 0x20; /* PTHRESH */
+ rxdctl |= 4 << 8; /* HTHRESH */
+ rxdctl |= 4 << 16;/* WTHRESH */
+ rxdctl |= 1 << 24; /* Switch to granularity */
+ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
+ }
}
if (adapter->hw.mac.type >= e1000_pch2lan) {
@@ -4394,6 +4565,11 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
EM_RX_LOCK(rxr);
+ /* Sync the ring */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+
#ifdef DEV_NETMAP
if (netmap_rx_irq(ifp, rxr->me, &processed)) {
EM_RX_UNLOCK(rxr);
@@ -4406,9 +4582,6 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
break;
- bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
-
cur = &rxr->rx_base[i];
status = cur->status;
mp = sendmp = NULL;
@@ -4474,6 +4647,10 @@ skip:
rxr->fmp = rxr->lmp = NULL;
}
next_desc:
+ /* Sync the ring */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
/* Zero out the receive descriptors status. */
cur->status = 0;
++rxdone; /* cumulative for POLL */
@@ -5130,12 +5307,7 @@ em_update_stats_counters(struct adapter *adapter)
adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
- /*
- ** For watchdog management we need to know if we have been
- ** paused during the last interval, so capture that here.
- */
- adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
- adapter->stats.xoffrxc += adapter->pause_frames;
+ adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
@@ -5300,10 +5472,10 @@ em_add_hw_stats(struct adapter *adapter)
CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
"Flow Control Low Watermark");
- for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
- snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
+ for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
- CTLFLAG_RD, NULL, "Queue Name");
+ CTLFLAG_RD, NULL, "TX Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
@@ -5322,7 +5494,12 @@ em_add_hw_stats(struct adapter *adapter)
SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
CTLFLAG_RD, &txr->no_desc_avail,
"Queue No Descriptor Available");
-
+
+ snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
+ queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+ CTLFLAG_RD, NULL, "RX Queue Name");
+ queue_list = SYSCTL_CHILDREN(queue_node);
+
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
CTLTYPE_UINT | CTLFLAG_RD, adapter,
E1000_RDH(rxr->me),
@@ -5756,19 +5933,86 @@ em_print_debug_info(struct adapter *adapter)
else
printf("and ACTIVE\n");
- device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
- E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
- E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
- device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
- E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
- E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
- device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
- device_printf(dev, "TX descriptors avail = %d\n",
- txr->tx_avail);
- device_printf(dev, "Tx Descriptors avail failure = %ld\n",
- txr->no_desc_avail);
- device_printf(dev, "RX discarded packets = %ld\n",
- rxr->rx_discarded);
- device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
- device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
+ for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ device_printf(dev, "TX Queue %d ------\n", i);
+ device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
+ E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
+ E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
+ device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
+ device_printf(dev, "TX descriptors avail = %d\n",
+ txr->tx_avail);
+ device_printf(dev, "Tx Descriptors avail failure = %ld\n",
+ txr->no_desc_avail);
+ device_printf(dev, "RX Queue %d ------\n", i);
+ device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
+ E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
+ E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
+ device_printf(dev, "RX discarded packets = %ld\n",
+ rxr->rx_discarded);
+ device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
+ device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
+ }
+}
+
+#ifdef EM_MULTIQUEUE
+/*
+ * 82574 only:
+ * Write a new value to the EEPROM increasing the number of MSIX
+ * vectors from 3 to 5, for proper multiqueue support.
+ */
+static void
+em_enable_vectors_82574(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ device_t dev = adapter->dev;
+ u16 edata;
+
+ e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
+ printf("Current cap: %#06x\n", edata);
+ if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
+ device_printf(dev, "Writing to eeprom: increasing "
+ "reported MSIX vectors from 3 to 5...\n");
+ edata &= ~(EM_NVM_MSIX_N_MASK);
+ edata |= 4 << EM_NVM_MSIX_N_SHIFT;
+ e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
+ e1000_update_nvm_checksum(hw);
+ device_printf(dev, "Writing to eeprom: done\n");
+ }
+}
+#endif
+
+#ifdef DDB
+DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
+{
+ devclass_t dc;
+ int max_em;
+
+ dc = devclass_find("em");
+ max_em = devclass_get_maxunit(dc);
+
+ for (int index = 0; index < (max_em - 1); index++) {
+ device_t dev;
+ dev = devclass_get_device(dc, index);
+ if (device_get_driver(dev) == &em_driver) {
+ struct adapter *adapter = device_get_softc(dev);
+ em_init_locked(adapter);
+ }
+ }
+}
+DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
+{
+ devclass_t dc;
+ int max_em;
+
+ dc = devclass_find("em");
+ max_em = devclass_get_maxunit(dc);
+
+ for (int index = 0; index < (max_em - 1); index++) {
+ device_t dev;
+ dev = devclass_get_device(dc, index);
+ if (device_get_driver(dev) == &em_driver)
+ em_print_debug_info(device_get_softc(dev));
+ }
+
}
+#endif