aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/mxge/if_mxge.c
diff options
context:
space:
mode:
authorAndrew Gallatin <gallatin@FreeBSD.org>2008-01-15 20:34:49 +0000
committerAndrew Gallatin <gallatin@FreeBSD.org>2008-01-15 20:34:49 +0000
commit1e413cf93298b5b97441a21d9a50fdcd0ee9945e (patch)
tree01abb766903631b9fe718b5563f563b63e330fdc /sys/dev/mxge/if_mxge.c
parent707dd4784955cf6abcaeb8f2296ec3a46c3e2d03 (diff)
downloadsrc-1e413cf93298b5b97441a21d9a50fdcd0ee9945e.tar.gz
src-1e413cf93298b5b97441a21d9a50fdcd0ee9945e.zip
Add optional support to mxge for MSI-X interrupts and multiple receive
queues (which we call slices). The NIC will steer traffic into up to hw.mxge.max_slices different receive rings based on a configurable hash type (hw.mxge.rss_hash_type). Currently the driver defaults to using a single slice, so the default behavior is unchanged. Also, transmit from non-zero slices is disabled currently.
Notes
Notes: svn path=/head/; revision=175365
Diffstat (limited to 'sys/dev/mxge/if_mxge.c')
-rw-r--r--sys/dev/mxge/if_mxge.c1568
1 files changed, 1101 insertions, 467 deletions
diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c
index b3fc7e0cede4..c79a9a411ef1 100644
--- a/sys/dev/mxge/if_mxge.c
+++ b/sys/dev/mxge/if_mxge.c
@@ -1,6 +1,6 @@
/******************************************************************************
-Copyright (c) 2006-2007, Myricom Inc.
+Copyright (c) 2006-2008, Myricom Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
#include <machine/resource.h>
#include <sys/bus.h>
#include <sys/rman.h>
+#include <sys/smp.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
@@ -82,6 +83,7 @@ __FBSDID("$FreeBSD$");
#include <dev/mxge/mxge_mcp.h>
#include <dev/mxge/mcp_gen_header.h>
+/*#define MXGE_FAKE_IFP*/
#include <dev/mxge/if_mxge_var.h>
/* tunable params */
@@ -93,8 +95,13 @@ static int mxge_flow_control = 1;
static int mxge_verbose = 0;
static int mxge_lro_cnt = 8;
static int mxge_ticks;
+static int mxge_max_slices = 1;
+static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
+static int mxge_always_promisc = 0;
static char *mxge_fw_unaligned = "mxge_ethp_z8e";
static char *mxge_fw_aligned = "mxge_eth_z8e";
+static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
+static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
static int mxge_probe(device_t dev);
static int mxge_attach(device_t dev);
@@ -126,7 +133,7 @@ DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
MODULE_DEPEND(mxge, firmware, 1, 1, 1);
MODULE_DEPEND(mxge, zlib, 1, 1, 1);
-static int mxge_load_firmware(mxge_softc_t *sc);
+static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
static int mxge_close(mxge_softc_t *sc);
static int mxge_open(mxge_softc_t *sc);
@@ -195,17 +202,26 @@ mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
{
int err;
device_t dev = sc->dev;
+ bus_size_t boundary, maxsegsize;
+
+ if (bytes > 4096 && alignment == 4096) {
+ boundary = 0;
+ maxsegsize = bytes;
+ } else {
+ boundary = 4096;
+ maxsegsize = 4096;
+ }
/* allocate DMAable memory tags */
err = bus_dma_tag_create(sc->parent_dmat, /* parent */
alignment, /* alignment */
- 4096, /* boundary */
+ boundary, /* boundary */
BUS_SPACE_MAXADDR, /* low */
BUS_SPACE_MAXADDR, /* high */
NULL, NULL, /* filter */
bytes, /* maxsize */
1, /* num segs */
- 4096, /* maxsegsize */
+ maxsegsize, /* maxsegsize */
BUS_DMA_COHERENT, /* flags */
NULL, NULL, /* lock */
&dma->dmat); /* tag */
@@ -453,7 +469,7 @@ mxge_dma_test(mxge_softc_t *sc, int test_type)
* transfers took to complete.
*/
- len = sc->tx.boundary;
+ len = sc->tx_boundary;
cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
@@ -509,9 +525,9 @@ abort:
* already been enabled, then it must use a firmware image which works
* around unaligned completion packets (ethp_z8e.dat), and it should
* also ensure that it never gives the device a Read-DMA which is
- * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
+ * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
* enabled, then the driver should use the aligned (eth_z8e.dat)
- * firmware image, and set tx.boundary to 4KB.
+ * firmware image, and set tx_boundary to 4KB.
*/
static int
@@ -521,7 +537,7 @@ mxge_firmware_probe(mxge_softc_t *sc)
int reg, status;
uint16_t pectl;
- sc->tx.boundary = 4096;
+ sc->tx_boundary = 4096;
/*
* Verify the max read request size was set to 4KB
* before trying the test with 4KB.
@@ -531,7 +547,7 @@ mxge_firmware_probe(mxge_softc_t *sc)
if ((pectl & (5 << 12)) != (5 << 12)) {
device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
pectl);
- sc->tx.boundary = 2048;
+ sc->tx_boundary = 2048;
}
}
@@ -540,7 +556,7 @@ mxge_firmware_probe(mxge_softc_t *sc)
* completions) in order to see if it works on this host.
*/
sc->fw_name = mxge_fw_aligned;
- status = mxge_load_firmware(sc);
+ status = mxge_load_firmware(sc, 1);
if (status != 0) {
return status;
}
@@ -601,12 +617,12 @@ mxge_select_firmware(mxge_softc_t *sc)
abort:
if (aligned) {
sc->fw_name = mxge_fw_aligned;
- sc->tx.boundary = 4096;
+ sc->tx_boundary = 4096;
} else {
sc->fw_name = mxge_fw_unaligned;
- sc->tx.boundary = 2048;
+ sc->tx_boundary = 2048;
}
- return (mxge_load_firmware(sc));
+ return (mxge_load_firmware(sc, 0));
}
union qualhack
@@ -923,7 +939,7 @@ mxge_adopt_running_firmware(mxge_softc_t *sc)
static int
-mxge_load_firmware(mxge_softc_t *sc)
+mxge_load_firmware(mxge_softc_t *sc, int adopt)
{
volatile uint32_t *confirm;
volatile char *submit;
@@ -936,6 +952,8 @@ mxge_load_firmware(mxge_softc_t *sc)
size = sc->sram_size;
status = mxge_load_firmware_helper(sc, &size);
if (status) {
+ if (!adopt)
+ return status;
/* Try to use the currently running firmware, if
it is new enough */
status = mxge_adopt_running_firmware(sc);
@@ -946,7 +964,7 @@ mxge_load_firmware(mxge_softc_t *sc)
}
device_printf(sc->dev,
"Successfully adopted running firmware\n");
- if (sc->tx.boundary == 4096) {
+ if (sc->tx_boundary == 4096) {
device_printf(sc->dev,
"Using firmware currently running on NIC"
". For optimal\n");
@@ -955,7 +973,7 @@ mxge_load_firmware(mxge_softc_t *sc)
"firmware\n");
}
sc->fw_name = mxge_fw_unaligned;
- sc->tx.boundary = 2048;
+ sc->tx_boundary = 2048;
return 0;
}
/* clear confirmation addr */
@@ -1049,6 +1067,9 @@ mxge_change_promisc(mxge_softc_t *sc, int promisc)
mxge_cmd_t cmd;
int status;
+ if (mxge_always_promisc)
+ promisc = 1;
+
if (promisc)
status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
&cmd);
@@ -1153,10 +1174,11 @@ mxge_max_mtu(mxge_softc_t *sc)
static int
mxge_reset(mxge_softc_t *sc, int interrupts_setup)
{
-
+ struct mxge_slice_state *ss;
+ mxge_rx_done_t *rx_done;
+ volatile uint32_t *irq_claim;
mxge_cmd_t cmd;
- size_t bytes;
- int status;
+ int slice, status;
/* try to send a reset command to the card to see if it
is alive */
@@ -1169,15 +1191,59 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup)
mxge_dummy_rdma(sc, 1);
+
+ /* set the intrq size */
+ cmd.data0 = sc->rx_ring_size;
+ status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
+
+ /*
+ * Even though we already know how many slices are supported
+ * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
+ * has magic side effects, and must be called after a reset.
+ * It must be called prior to calling any RSS related cmds,
+ * including assigning an interrupt queue for anything but
+ * slice 0. It must also be called *after*
+ * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
+ * the firmware to compute offsets.
+ */
+
+ if (sc->num_slices > 1) {
+ /* ask the maximum number of slices it supports */
+ status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
+ &cmd);
+ if (status != 0) {
+ device_printf(sc->dev,
+ "failed to get number of slices\n");
+ return status;
+ }
+ /*
+ * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
+ * to setting up the interrupt queue DMA
+ */
+ cmd.data0 = sc->num_slices;
+ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+ status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
+ &cmd);
+ if (status != 0) {
+ device_printf(sc->dev,
+ "failed to set number of slices\n");
+ return status;
+ }
+ }
+
+
if (interrupts_setup) {
/* Now exchange information about interrupts */
- bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry);
- memset(sc->rx_done.entry, 0, bytes);
- cmd.data0 = (uint32_t)bytes;
- status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
- cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr);
- cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr);
- status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd);
+ for (slice = 0; slice < sc->num_slices; slice++) {
+ rx_done = &sc->ss[slice].rx_done;
+ memset(rx_done->entry, 0, sc->rx_ring_size);
+ cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
+ cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
+ cmd.data2 = slice;
+ status |= mxge_send_cmd(sc,
+ MXGEFW_CMD_SET_INTRQ_DMA,
+ &cmd);
+ }
}
status |= mxge_send_cmd(sc,
@@ -1187,7 +1253,7 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup)
sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
- sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
+ irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
@@ -1205,23 +1271,30 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup)
/* run a DMA benchmark */
(void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
- /* reset mcp/driver shared state back to 0 */
- sc->rx_done.idx = 0;
- sc->rx_done.cnt = 0;
- sc->tx.req = 0;
- sc->tx.done = 0;
- sc->tx.pkt_done = 0;
- sc->tx.wake = 0;
- sc->tx_defrag = 0;
- sc->tx.stall = 0;
- sc->rx_big.cnt = 0;
- sc->rx_small.cnt = 0;
+ for (slice = 0; slice < sc->num_slices; slice++) {
+ ss = &sc->ss[slice];
+
+ ss->irq_claim = irq_claim + (2 * slice);
+ /* reset mcp/driver shared state back to 0 */
+ ss->rx_done.idx = 0;
+ ss->rx_done.cnt = 0;
+ ss->tx.req = 0;
+ ss->tx.done = 0;
+ ss->tx.pkt_done = 0;
+ ss->tx.wake = 0;
+ ss->tx.defrag = 0;
+ ss->tx.stall = 0;
+ ss->rx_big.cnt = 0;
+ ss->rx_small.cnt = 0;
+ ss->lro_bad_csum = 0;
+ ss->lro_queued = 0;
+ ss->lro_flushed = 0;
+ if (ss->fw_stats != NULL) {
+ ss->fw_stats->valid = 0;
+ ss->fw_stats->send_done_count = 0;
+ }
+ }
sc->rdma_tags_available = 15;
- sc->fw_stats->valid = 0;
- sc->fw_stats->send_done_count = 0;
- sc->lro_bad_csum = 0;
- sc->lro_queued = 0;
- sc->lro_flushed = 0;
status = mxge_update_mac_address(sc);
mxge_change_promisc(sc, 0);
mxge_change_pause(sc, sc->pause);
@@ -1340,15 +1413,38 @@ mxge_handle_be32(SYSCTL_HANDLER_ARGS)
}
static void
+mxge_rem_sysctls(mxge_softc_t *sc)
+{
+ struct mxge_slice_state *ss;
+ int slice;
+
+ if (sc->slice_sysctl_tree == NULL)
+ return;
+
+ for (slice = 0; slice < sc->num_slices; slice++) {
+ ss = &sc->ss[slice];
+ if (ss == NULL || ss->sysctl_tree == NULL)
+ continue;
+ sysctl_ctx_free(&ss->sysctl_ctx);
+ ss->sysctl_tree = NULL;
+ }
+ sysctl_ctx_free(&sc->slice_sysctl_ctx);
+ sc->slice_sysctl_tree = NULL;
+}
+
+static void
mxge_add_sysctls(mxge_softc_t *sc)
{
struct sysctl_ctx_list *ctx;
struct sysctl_oid_list *children;
mcp_irq_data_t *fw;
+ struct mxge_slice_state *ss;
+ int slice;
+ char slice_num[8];
ctx = device_get_sysctl_ctx(sc->dev);
children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
- fw = sc->fw_stats;
+ fw = sc->ss[0].fw_stats;
/* random information */
SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
@@ -1369,7 +1465,7 @@ mxge_add_sysctls(mxge_softc_t *sc)
0, "tx_boundary");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_boundary",
- CTLFLAG_RD, &sc->tx.boundary,
+ CTLFLAG_RD, &sc->tx_boundary,
0, "tx_boundary");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"write_combine",
@@ -1482,40 +1578,6 @@ mxge_add_sysctls(mxge_softc_t *sc)
0, mxge_handle_be32,
"I", "dropped_unicast_filtered");
- /* host counters exported for debugging */
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "rx_small_cnt",
- CTLFLAG_RD, &sc->rx_small.cnt,
- 0, "rx_small_cnt");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "rx_big_cnt",
- CTLFLAG_RD, &sc->rx_big.cnt,
- 0, "rx_small_cnt");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "tx_req",
- CTLFLAG_RD, &sc->tx.req,
- 0, "tx_req");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "tx_done",
- CTLFLAG_RD, &sc->tx.done,
- 0, "tx_done");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "tx_pkt_done",
- CTLFLAG_RD, &sc->tx.pkt_done,
- 0, "tx_done");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "tx_stall",
- CTLFLAG_RD, &sc->tx.stall,
- 0, "tx_stall");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "tx_wake",
- CTLFLAG_RD, &sc->tx.wake,
- 0, "tx_wake");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "tx_defrag",
- CTLFLAG_RD, &sc->tx_defrag,
- 0, "tx_defrag");
-
/* verbose printing? */
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"verbose",
@@ -1529,21 +1591,76 @@ mxge_add_sysctls(mxge_softc_t *sc)
0, mxge_change_lro,
"I", "number of lro merge queues");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "lro_flushed", CTLFLAG_RD, &sc->lro_flushed,
- 0, "number of lro merge queues flushed");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "lro_queued", CTLFLAG_RD, &sc->lro_queued,
- 0, "number of frames appended to lro merge queues");
+ /* add counters exported for debugging from all slices */
+ sysctl_ctx_init(&sc->slice_sysctl_ctx);
+ sc->slice_sysctl_tree =
+ SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
+ "slice", CTLFLAG_RD, 0, "");
+
+ for (slice = 0; slice < sc->num_slices; slice++) {
+ ss = &sc->ss[slice];
+ sysctl_ctx_init(&ss->sysctl_ctx);
+ ctx = &ss->sysctl_ctx;
+ children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
+ sprintf(slice_num, "%d", slice);
+ ss->sysctl_tree =
+ SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
+ CTLFLAG_RD, 0, "");
+ children = SYSCTL_CHILDREN(ss->sysctl_tree);
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "rx_small_cnt",
+ CTLFLAG_RD, &ss->rx_small.cnt,
+ 0, "rx_small_cnt");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "rx_big_cnt",
+ CTLFLAG_RD, &ss->rx_big.cnt,
+ 0, "rx_small_cnt");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "tx_req",
+ CTLFLAG_RD, &ss->tx.req,
+ 0, "tx_req");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
+ 0, "number of lro merge queues flushed");
+
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "lro_queued", CTLFLAG_RD, &ss->lro_queued,
+ 0, "number of frames appended to lro merge"
+ "queues");
+
+ /* only transmit from slice 0 for now */
+ if (slice > 0)
+ continue;
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "tx_done",
+ CTLFLAG_RD, &ss->tx.done,
+ 0, "tx_done");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "tx_pkt_done",
+ CTLFLAG_RD, &ss->tx.pkt_done,
+ 0, "tx_done");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "tx_stall",
+ CTLFLAG_RD, &ss->tx.stall,
+ 0, "tx_stall");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "tx_wake",
+ CTLFLAG_RD, &ss->tx.wake,
+ 0, "tx_wake");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "tx_defrag",
+ CTLFLAG_RD, &ss->tx.defrag,
+ 0, "tx_defrag");
+ }
}
/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
backwards one at a time and handle ring wraps */
static inline void
-mxge_submit_req_backwards(mxge_tx_buf_t *tx,
+mxge_submit_req_backwards(mxge_tx_ring_t *tx,
mcp_kreq_ether_send_t *src, int cnt)
{
int idx, starting_slot;
@@ -1565,7 +1682,7 @@ mxge_submit_req_backwards(mxge_tx_buf_t *tx,
*/
static inline void
-mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src,
+mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
int cnt)
{
int idx, i;
@@ -1614,10 +1731,10 @@ mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src,
}
static void
-mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt,
- int ip_off)
+mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
+ int busdma_seg_cnt, int ip_off)
{
- mxge_tx_buf_t *tx;
+ mxge_tx_ring_t *tx;
mcp_kreq_ether_send_t *req;
bus_dma_segment_t *seg;
struct ip *ip;
@@ -1628,7 +1745,7 @@ mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt,
uint16_t pseudo_hdr_offset, cksum_offset, mss;
uint8_t flags, flags_next;
static int once;
-
+
mss = m->m_pkthdr.tso_segsz;
/* negative cum_len signifies to the
@@ -1641,15 +1758,15 @@ mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt,
it to a scratch buffer if not */
if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
m_copydata(m, 0, ip_off + sizeof (*ip),
- sc->scratch);
- ip = (struct ip *)(sc->scratch + ip_off);
+ ss->scratch);
+ ip = (struct ip *)(ss->scratch + ip_off);
} else {
ip = (struct ip *)(mtod(m, char *) + ip_off);
}
if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
+ sizeof (*tcp))) {
m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
- + sizeof (*tcp), sc->scratch);
+ + sizeof (*tcp), ss->scratch);
ip = (struct ip *)(mtod(m, char *) + ip_off);
}
@@ -1666,7 +1783,7 @@ mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt,
* the checksum by parsing the header. */
pseudo_hdr_offset = htobe16(mss);
- tx = &sc->tx;
+ tx = &ss->tx;
req = tx->req_list;
seg = tx->seg_list;
cnt = 0;
@@ -1761,7 +1878,7 @@ mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt,
drop:
bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
m_freem(m);
- sc->ifp->if_oerrors++;
+ ss->sc->ifp->if_oerrors++;
if (!once) {
printf("tx->max_desc exceeded via TSO!\n");
printf("mss = %d, %ld, %d!\n", mss,
@@ -1805,22 +1922,23 @@ mxge_vlan_tag_insert(struct mbuf *m)
}
static void
-mxge_encap(mxge_softc_t *sc, struct mbuf *m)
+mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
{
+ mxge_softc_t *sc;
mcp_kreq_ether_send_t *req;
bus_dma_segment_t *seg;
struct mbuf *m_tmp;
struct ifnet *ifp;
- mxge_tx_buf_t *tx;
+ mxge_tx_ring_t *tx;
struct ip *ip;
int cnt, cum_len, err, i, idx, odd_flag, ip_off;
uint16_t pseudo_hdr_offset;
uint8_t flags, cksum_offset;
-
+ sc = ss->sc;
ifp = sc->ifp;
- tx = &sc->tx;
+ tx = &ss->tx;
ip_off = sizeof (struct ether_header);
if (m->m_flags & M_VLANTAG) {
@@ -1842,7 +1960,7 @@ mxge_encap(mxge_softc_t *sc, struct mbuf *m)
if (m_tmp == NULL) {
goto drop;
}
- sc->tx_defrag++;
+ ss->tx.defrag++;
m = m_tmp;
err = bus_dmamap_load_mbuf_sg(tx->dmat,
tx->info[idx].map,
@@ -1861,7 +1979,7 @@ mxge_encap(mxge_softc_t *sc, struct mbuf *m)
/* TSO is different enough, we handle it in another routine */
if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
- mxge_encap_tso(sc, m, cnt, ip_off);
+ mxge_encap_tso(ss, m, cnt, ip_off);
return;
}
@@ -1876,8 +1994,8 @@ mxge_encap(mxge_softc_t *sc, struct mbuf *m)
it to a scratch buffer if not */
if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
m_copydata(m, 0, ip_off + sizeof (*ip),
- sc->scratch);
- ip = (struct ip *)(sc->scratch + ip_off);
+ ss->scratch);
+ ip = (struct ip *)(ss->scratch + ip_off);
} else {
ip = (struct ip *)(mtod(m, char *) + ip_off);
}
@@ -1963,14 +2081,16 @@ drop:
static inline void
-mxge_start_locked(mxge_softc_t *sc)
+mxge_start_locked(struct mxge_slice_state *ss)
{
+ mxge_softc_t *sc;
struct mbuf *m;
struct ifnet *ifp;
- mxge_tx_buf_t *tx;
+ mxge_tx_ring_t *tx;
+ sc = ss->sc;
ifp = sc->ifp;
- tx = &sc->tx;
+ tx = &ss->tx;
while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
if (m == NULL) {
@@ -1980,7 +2100,7 @@ mxge_start_locked(mxge_softc_t *sc)
BPF_MTAP(ifp, m);
/* give it to the nic */
- mxge_encap(sc, m);
+ mxge_encap(ss, m);
}
/* ran out of transmit slots */
if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
@@ -1993,11 +2113,13 @@ static void
mxge_start(struct ifnet *ifp)
{
mxge_softc_t *sc = ifp->if_softc;
+ struct mxge_slice_state *ss;
-
- mtx_lock(&sc->tx_mtx);
- mxge_start_locked(sc);
- mtx_unlock(&sc->tx_mtx);
+ /* only use the first slice for now */
+ ss = &sc->ss[0];
+ mtx_lock(&ss->tx.mtx);
+ mxge_start_locked(ss);
+ mtx_unlock(&ss->tx.mtx);
}
/*
@@ -2025,11 +2147,11 @@ mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
}
static int
-mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx)
+mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
{
bus_dma_segment_t seg;
struct mbuf *m;
- mxge_rx_buf_t *rx = &sc->rx_small;
+ mxge_rx_ring_t *rx = &ss->rx_small;
int cnt, err;
m = m_gethdr(M_DONTWAIT, MT_DATA);
@@ -2058,11 +2180,11 @@ done:
}
static int
-mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx)
+mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
{
bus_dma_segment_t seg[3];
struct mbuf *m;
- mxge_rx_buf_t *rx = &sc->rx_big;
+ mxge_rx_ring_t *rx = &ss->rx_big;
int cnt, err, i;
if (rx->cl_size == MCLBYTES)
@@ -2180,24 +2302,26 @@ mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
static inline void
-mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum)
+mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
{
+ mxge_softc_t *sc;
struct ifnet *ifp;
struct mbuf *m;
struct ether_header *eh;
- mxge_rx_buf_t *rx;
+ mxge_rx_ring_t *rx;
bus_dmamap_t old_map;
int idx;
uint16_t tcpudp_csum;
+ sc = ss->sc;
ifp = sc->ifp;
- rx = &sc->rx_big;
+ rx = &ss->rx_big;
idx = rx->cnt & rx->mask;
rx->cnt += rx->nbufs;
/* save a pointer to the received mbuf */
m = rx->info[idx].m;
/* try to replace the received mbuf */
- if (mxge_get_buf_big(sc, rx->extra_map, idx)) {
+ if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
/* drop the frame -- the old mbuf is re-cycled */
ifp->if_ierrors++;
return;
@@ -2218,14 +2342,14 @@ mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum)
m->m_pkthdr.rcvif = ifp;
m->m_len = m->m_pkthdr.len = len;
- ifp->if_ipackets++;
+ ss->ipackets++;
eh = mtod(m, struct ether_header *);
if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
mxge_vlan_tag_remove(m, &csum);
}
/* if the checksum is valid, mark it in the mbuf header */
if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
- if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum)))
+ if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
return;
/* otherwise, it was a UDP frame, or a TCP frame which
we could not do LRO on. Tell the stack that the
@@ -2238,24 +2362,26 @@ mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum)
}
static inline void
-mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum)
+mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
{
+ mxge_softc_t *sc;
struct ifnet *ifp;
struct ether_header *eh;
struct mbuf *m;
- mxge_rx_buf_t *rx;
+ mxge_rx_ring_t *rx;
bus_dmamap_t old_map;
int idx;
uint16_t tcpudp_csum;
+ sc = ss->sc;
ifp = sc->ifp;
- rx = &sc->rx_small;
+ rx = &ss->rx_small;
idx = rx->cnt & rx->mask;
rx->cnt++;
/* save a pointer to the received mbuf */
m = rx->info[idx].m;
/* try to replace the received mbuf */
- if (mxge_get_buf_small(sc, rx->extra_map, idx)) {
+ if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
/* drop the frame -- the old mbuf is re-cycled */
ifp->if_ierrors++;
return;
@@ -2276,14 +2402,14 @@ mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum)
m->m_pkthdr.rcvif = ifp;
m->m_len = m->m_pkthdr.len = len;
- ifp->if_ipackets++;
+ ss->ipackets++;
eh = mtod(m, struct ether_header *);
if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
mxge_vlan_tag_remove(m, &csum);
}
/* if the checksum is valid, mark it in the mbuf header */
if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
- if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum)))
+ if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
return;
/* otherwise, it was a UDP frame, or a TCP frame which
we could not do LRO on. Tell the stack that the
@@ -2291,15 +2417,14 @@ mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum)
m->m_pkthdr.csum_data = 0xffff;
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
}
-
/* pass the frame up the stack */
(*ifp->if_input)(ifp, m);
}
static inline void
-mxge_clean_rx_done(mxge_softc_t *sc)
+mxge_clean_rx_done(struct mxge_slice_state *ss)
{
- mxge_rx_done_t *rx_done = &sc->rx_done;
+ mxge_rx_done_t *rx_done = &ss->rx_done;
struct lro_entry *lro;
int limit = 0;
uint16_t length;
@@ -2311,9 +2436,9 @@ mxge_clean_rx_done(mxge_softc_t *sc)
rx_done->entry[rx_done->idx].length = 0;
checksum = rx_done->entry[rx_done->idx].checksum;
if (length <= (MHLEN - MXGEFW_PAD))
- mxge_rx_done_small(sc, length, checksum);
+ mxge_rx_done_small(ss, length, checksum);
else
- mxge_rx_done_big(sc, length, checksum);
+ mxge_rx_done_big(ss, length, checksum);
rx_done->cnt++;
rx_done->idx = rx_done->cnt & rx_done->mask;
@@ -2321,25 +2446,25 @@ mxge_clean_rx_done(mxge_softc_t *sc)
if (__predict_false(++limit > rx_done->mask / 2))
break;
}
- while(!SLIST_EMPTY(&sc->lro_active)) {
- lro = SLIST_FIRST(&sc->lro_active);
- SLIST_REMOVE_HEAD(&sc->lro_active, next);
- mxge_lro_flush(sc, lro);
+ while (!SLIST_EMPTY(&ss->lro_active)) {
+ lro = SLIST_FIRST(&ss->lro_active);
+ SLIST_REMOVE_HEAD(&ss->lro_active, next);
+ mxge_lro_flush(ss, lro);
}
}
static inline void
-mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx)
+mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
{
struct ifnet *ifp;
- mxge_tx_buf_t *tx;
+ mxge_tx_ring_t *tx;
struct mbuf *m;
bus_dmamap_t map;
int idx;
- tx = &sc->tx;
- ifp = sc->ifp;
+ tx = &ss->tx;
+ ifp = ss->sc->ifp;
while (tx->pkt_done != mcp_idx) {
idx = tx->done & tx->mask;
tx->done++;
@@ -2364,11 +2489,11 @@ mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx)
if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
tx->req - tx->done < (tx->mask + 1)/4) {
- mtx_lock(&sc->tx_mtx);
+ mtx_lock(&ss->tx.mtx);
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- sc->tx.wake++;
- mxge_start_locked(sc);
- mtx_unlock(&sc->tx_mtx);
+ ss->tx.wake++;
+ mxge_start_locked(ss);
+ mtx_unlock(&ss->tx.mtx);
}
}
@@ -2511,14 +2636,23 @@ mxge_media_probe(mxge_softc_t *sc)
static void
mxge_intr(void *arg)
{
- mxge_softc_t *sc = arg;
- mcp_irq_data_t *stats = sc->fw_stats;
- mxge_tx_buf_t *tx = &sc->tx;
- mxge_rx_done_t *rx_done = &sc->rx_done;
+ struct mxge_slice_state *ss = arg;
+ mxge_softc_t *sc = ss->sc;
+ mcp_irq_data_t *stats = ss->fw_stats;
+ mxge_tx_ring_t *tx = &ss->tx;
+ mxge_rx_done_t *rx_done = &ss->rx_done;
uint32_t send_done_count;
uint8_t valid;
+ /* an interrupt on a non-zero slice is implicitly valid
+ since MSI-X irqs are not shared */
+ if (ss != sc->ss) {
+ mxge_clean_rx_done(ss);
+ *ss->irq_claim = be32toh(3);
+ return;
+ }
+
/* make sure the DMA has finished */
if (!stats->valid) {
return;
@@ -2541,8 +2675,8 @@ mxge_intr(void *arg)
send_done_count = be32toh(stats->send_done_count);
while ((send_done_count != tx->pkt_done) ||
(rx_done->entry[rx_done->idx].length != 0)) {
- mxge_tx_done(sc, (int)send_done_count);
- mxge_clean_rx_done(sc);
+ mxge_tx_done(ss, (int)send_done_count);
+ mxge_clean_rx_done(ss);
send_done_count = be32toh(stats->send_done_count);
}
} while (*((volatile uint8_t *) &stats->valid));
@@ -2562,9 +2696,9 @@ mxge_intr(void *arg)
sc->need_media_probe = 1;
}
if (sc->rdma_tags_available !=
- be32toh(sc->fw_stats->rdma_tags_available)) {
+ be32toh(stats->rdma_tags_available)) {
sc->rdma_tags_available =
- be32toh(sc->fw_stats->rdma_tags_available);
+ be32toh(stats->rdma_tags_available);
device_printf(sc->dev, "RDMA timed out! %d tags "
"left\n", sc->rdma_tags_available);
}
@@ -2578,8 +2712,8 @@ mxge_intr(void *arg)
/* check to see if we have rx token to pass back */
if (valid & 0x1)
- *sc->irq_claim = be32toh(3);
- *(sc->irq_claim + 1) = be32toh(3);
+ *ss->irq_claim = be32toh(3);
+ *(ss->irq_claim + 1) = be32toh(3);
}
static void
@@ -2590,196 +2724,173 @@ mxge_init(void *arg)
static void
-mxge_free_mbufs(mxge_softc_t *sc)
+mxge_free_slice_mbufs(struct mxge_slice_state *ss)
{
+ struct lro_entry *lro_entry;
int i;
- for (i = 0; i <= sc->rx_big.mask; i++) {
- if (sc->rx_big.info[i].m == NULL)
+ while (!SLIST_EMPTY(&ss->lro_free)) {
+ lro_entry = SLIST_FIRST(&ss->lro_free);
+ SLIST_REMOVE_HEAD(&ss->lro_free, next);
+ free(lro_entry, M_DEVBUF);
+ }
+
+ for (i = 0; i <= ss->rx_big.mask; i++) {
+ if (ss->rx_big.info[i].m == NULL)
continue;
- bus_dmamap_unload(sc->rx_big.dmat,
- sc->rx_big.info[i].map);
- m_freem(sc->rx_big.info[i].m);
- sc->rx_big.info[i].m = NULL;
+ bus_dmamap_unload(ss->rx_big.dmat,
+ ss->rx_big.info[i].map);
+ m_freem(ss->rx_big.info[i].m);
+ ss->rx_big.info[i].m = NULL;
}
- for (i = 0; i <= sc->rx_small.mask; i++) {
- if (sc->rx_small.info[i].m == NULL)
+ for (i = 0; i <= ss->rx_small.mask; i++) {
+ if (ss->rx_small.info[i].m == NULL)
continue;
- bus_dmamap_unload(sc->rx_small.dmat,
- sc->rx_small.info[i].map);
- m_freem(sc->rx_small.info[i].m);
- sc->rx_small.info[i].m = NULL;
+ bus_dmamap_unload(ss->rx_small.dmat,
+ ss->rx_small.info[i].map);
+ m_freem(ss->rx_small.info[i].m);
+ ss->rx_small.info[i].m = NULL;
}
- for (i = 0; i <= sc->tx.mask; i++) {
- sc->tx.info[i].flag = 0;
- if (sc->tx.info[i].m == NULL)
+ /* transmit ring used only on the first slice */
+ if (ss->tx.info == NULL)
+ return;
+
+ for (i = 0; i <= ss->tx.mask; i++) {
+ ss->tx.info[i].flag = 0;
+ if (ss->tx.info[i].m == NULL)
continue;
- bus_dmamap_unload(sc->tx.dmat,
- sc->tx.info[i].map);
- m_freem(sc->tx.info[i].m);
- sc->tx.info[i].m = NULL;
+ bus_dmamap_unload(ss->tx.dmat,
+ ss->tx.info[i].map);
+ m_freem(ss->tx.info[i].m);
+ ss->tx.info[i].m = NULL;
}
}
static void
-mxge_free_rings(mxge_softc_t *sc)
+mxge_free_mbufs(mxge_softc_t *sc)
+{
+ int slice;
+
+ for (slice = 0; slice < sc->num_slices; slice++)
+ mxge_free_slice_mbufs(&sc->ss[slice]);
+}
+
+static void
+mxge_free_slice_rings(struct mxge_slice_state *ss)
{
int i;
- if (sc->rx_done.entry != NULL)
- mxge_dma_free(&sc->rx_done.dma);
- sc->rx_done.entry = NULL;
- if (sc->tx.req_bytes != NULL)
- free(sc->tx.req_bytes, M_DEVBUF);
- if (sc->tx.seg_list != NULL)
- free(sc->tx.seg_list, M_DEVBUF);
- if (sc->rx_small.shadow != NULL)
- free(sc->rx_small.shadow, M_DEVBUF);
- if (sc->rx_big.shadow != NULL)
- free(sc->rx_big.shadow, M_DEVBUF);
- if (sc->tx.info != NULL) {
- if (sc->tx.dmat != NULL) {
- for (i = 0; i <= sc->tx.mask; i++) {
- bus_dmamap_destroy(sc->tx.dmat,
- sc->tx.info[i].map);
+
+ if (ss->rx_done.entry != NULL)
+ mxge_dma_free(&ss->rx_done.dma);
+ ss->rx_done.entry = NULL;
+
+ if (ss->tx.req_bytes != NULL)
+ free(ss->tx.req_bytes, M_DEVBUF);
+ ss->tx.req_bytes = NULL;
+
+ if (ss->tx.seg_list != NULL)
+ free(ss->tx.seg_list, M_DEVBUF);
+ ss->tx.seg_list = NULL;
+
+ if (ss->rx_small.shadow != NULL)
+ free(ss->rx_small.shadow, M_DEVBUF);
+ ss->rx_small.shadow = NULL;
+
+ if (ss->rx_big.shadow != NULL)
+ free(ss->rx_big.shadow, M_DEVBUF);
+ ss->rx_big.shadow = NULL;
+
+ if (ss->tx.info != NULL) {
+ if (ss->tx.dmat != NULL) {
+ for (i = 0; i <= ss->tx.mask; i++) {
+ bus_dmamap_destroy(ss->tx.dmat,
+ ss->tx.info[i].map);
}
- bus_dma_tag_destroy(sc->tx.dmat);
+ bus_dma_tag_destroy(ss->tx.dmat);
}
- free(sc->tx.info, M_DEVBUF);
+ free(ss->tx.info, M_DEVBUF);
}
- if (sc->rx_small.info != NULL) {
- if (sc->rx_small.dmat != NULL) {
- for (i = 0; i <= sc->rx_small.mask; i++) {
- bus_dmamap_destroy(sc->rx_small.dmat,
- sc->rx_small.info[i].map);
+ ss->tx.info = NULL;
+
+ if (ss->rx_small.info != NULL) {
+ if (ss->rx_small.dmat != NULL) {
+ for (i = 0; i <= ss->rx_small.mask; i++) {
+ bus_dmamap_destroy(ss->rx_small.dmat,
+ ss->rx_small.info[i].map);
}
- bus_dmamap_destroy(sc->rx_small.dmat,
- sc->rx_small.extra_map);
- bus_dma_tag_destroy(sc->rx_small.dmat);
+ bus_dmamap_destroy(ss->rx_small.dmat,
+ ss->rx_small.extra_map);
+ bus_dma_tag_destroy(ss->rx_small.dmat);
}
- free(sc->rx_small.info, M_DEVBUF);
+ free(ss->rx_small.info, M_DEVBUF);
}
- if (sc->rx_big.info != NULL) {
- if (sc->rx_big.dmat != NULL) {
- for (i = 0; i <= sc->rx_big.mask; i++) {
- bus_dmamap_destroy(sc->rx_big.dmat,
- sc->rx_big.info[i].map);
+ ss->rx_small.info = NULL;
+
+ if (ss->rx_big.info != NULL) {
+ if (ss->rx_big.dmat != NULL) {
+ for (i = 0; i <= ss->rx_big.mask; i++) {
+ bus_dmamap_destroy(ss->rx_big.dmat,
+ ss->rx_big.info[i].map);
}
- bus_dmamap_destroy(sc->rx_big.dmat,
- sc->rx_big.extra_map);
- bus_dma_tag_destroy(sc->rx_big.dmat);
+ bus_dmamap_destroy(ss->rx_big.dmat,
+ ss->rx_big.extra_map);
+ bus_dma_tag_destroy(ss->rx_big.dmat);
}
- free(sc->rx_big.info, M_DEVBUF);
+ free(ss->rx_big.info, M_DEVBUF);
}
+ ss->rx_big.info = NULL;
}
-static int
-mxge_alloc_rings(mxge_softc_t *sc)
+static void
+mxge_free_rings(mxge_softc_t *sc)
{
- mxge_cmd_t cmd;
- int tx_ring_size, rx_ring_size;
- int tx_ring_entries, rx_ring_entries;
- int i, err;
- unsigned long bytes;
-
- /* get ring sizes */
- err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
- tx_ring_size = cmd.data0;
- err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
- if (err != 0) {
- device_printf(sc->dev, "Cannot determine ring sizes\n");
- goto abort_with_nothing;
- }
+ int slice;
- rx_ring_size = cmd.data0;
-
- tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
- rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t);
- IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
- sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
- IFQ_SET_READY(&sc->ifp->if_snd);
+ for (slice = 0; slice < sc->num_slices; slice++)
+ mxge_free_slice_rings(&sc->ss[slice]);
+}
- sc->tx.mask = tx_ring_entries - 1;
- sc->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
- sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1;
- sc->rx_done.mask = (2 * rx_ring_entries) - 1;
+static int
+mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
+ int tx_ring_entries)
+{
+ mxge_softc_t *sc = ss->sc;
+ size_t bytes;
+ int err, i;
err = ENOMEM;
- /* allocate interrupt queues */
- bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry);
- err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096);
- if (err != 0)
- goto abort_with_nothing;
- sc->rx_done.entry = sc->rx_done.dma.addr;
- bzero(sc->rx_done.entry, bytes);
+ /* allocate per-slice receive resources */
- /* allocate the tx request copy block */
- bytes = 8 +
- sizeof (*sc->tx.req_list) * (sc->tx.max_desc + 4);
- sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
- if (sc->tx.req_bytes == NULL)
- goto abort_with_alloc;
- /* ensure req_list entries are aligned to 8 bytes */
- sc->tx.req_list = (mcp_kreq_ether_send_t *)
- ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL);
-
- /* allocate the tx busdma segment list */
- bytes = sizeof (*sc->tx.seg_list) * sc->tx.max_desc;
- sc->tx.seg_list = (bus_dma_segment_t *)
- malloc(bytes, M_DEVBUF, M_WAITOK);
- if (sc->tx.seg_list == NULL)
- goto abort_with_alloc;
+ ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
+ ss->rx_done.mask = (2 * rx_ring_entries) - 1;
/* allocate the rx shadow rings */
- bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow);
- sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
- if (sc->rx_small.shadow == NULL)
- goto abort_with_alloc;
-
- bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow);
- sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
- if (sc->rx_big.shadow == NULL)
- goto abort_with_alloc;
-
- /* allocate the host info rings */
- bytes = tx_ring_entries * sizeof (*sc->tx.info);
- sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
- if (sc->tx.info == NULL)
- goto abort_with_alloc;
-
- bytes = rx_ring_entries * sizeof (*sc->rx_small.info);
- sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
- if (sc->rx_small.info == NULL)
- goto abort_with_alloc;
-
- bytes = rx_ring_entries * sizeof (*sc->rx_big.info);
- sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
- if (sc->rx_big.info == NULL)
- goto abort_with_alloc;
-
- /* allocate the busdma resources */
- err = bus_dma_tag_create(sc->parent_dmat, /* parent */
- 1, /* alignment */
- sc->tx.boundary, /* boundary */
- BUS_SPACE_MAXADDR, /* low */
- BUS_SPACE_MAXADDR, /* high */
- NULL, NULL, /* filter */
- 65536 + 256, /* maxsize */
- sc->tx.max_desc - 2, /* num segs */
- sc->tx.boundary, /* maxsegsize */
- BUS_DMA_ALLOCNOW, /* flags */
- NULL, NULL, /* lock */
- &sc->tx.dmat); /* tag */
-
- if (err != 0) {
- device_printf(sc->dev, "Err %d allocating tx dmat\n",
- err);
- goto abort_with_alloc;
- }
-
+ bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
+ ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
+ if (ss->rx_small.shadow == NULL)
+ return err;;
+
+ bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
+ ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
+ if (ss->rx_big.shadow == NULL)
+ return err;;
+
+ /* allocate the rx host info rings */
+ bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
+ ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
+ if (ss->rx_small.info == NULL)
+ return err;;
+
+ bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
+ ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
+ if (ss->rx_big.info == NULL)
+ return err;;
+
+ /* allocate the rx busdma resources */
err = bus_dma_tag_create(sc->parent_dmat, /* parent */
1, /* alignment */
4096, /* boundary */
@@ -2791,11 +2902,11 @@ mxge_alloc_rings(mxge_softc_t *sc)
MHLEN, /* maxsegsize */
BUS_DMA_ALLOCNOW, /* flags */
NULL, NULL, /* lock */
- &sc->rx_small.dmat); /* tag */
+ &ss->rx_small.dmat); /* tag */
if (err != 0) {
device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
err);
- goto abort_with_alloc;
+ return err;;
}
err = bus_dma_tag_create(sc->parent_dmat, /* parent */
@@ -2809,66 +2920,152 @@ mxge_alloc_rings(mxge_softc_t *sc)
4096, /* maxsegsize */
BUS_DMA_ALLOCNOW, /* flags */
NULL, NULL, /* lock */
- &sc->rx_big.dmat); /* tag */
+ &ss->rx_big.dmat); /* tag */
if (err != 0) {
device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
err);
- goto abort_with_alloc;
- }
-
- /* now use these tags to setup dmamaps for each slot
- in each ring */
- for (i = 0; i <= sc->tx.mask; i++) {
- err = bus_dmamap_create(sc->tx.dmat, 0,
- &sc->tx.info[i].map);
- if (err != 0) {
- device_printf(sc->dev, "Err %d tx dmamap\n",
- err);
- goto abort_with_alloc;
- }
+ return err;;
}
- for (i = 0; i <= sc->rx_small.mask; i++) {
- err = bus_dmamap_create(sc->rx_small.dmat, 0,
- &sc->rx_small.info[i].map);
+ for (i = 0; i <= ss->rx_small.mask; i++) {
+ err = bus_dmamap_create(ss->rx_small.dmat, 0,
+ &ss->rx_small.info[i].map);
if (err != 0) {
device_printf(sc->dev, "Err %d rx_small dmamap\n",
err);
- goto abort_with_alloc;
+ return err;;
}
}
- err = bus_dmamap_create(sc->rx_small.dmat, 0,
- &sc->rx_small.extra_map);
+ err = bus_dmamap_create(ss->rx_small.dmat, 0,
+ &ss->rx_small.extra_map);
if (err != 0) {
device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
err);
- goto abort_with_alloc;
+ return err;;
}
- for (i = 0; i <= sc->rx_big.mask; i++) {
- err = bus_dmamap_create(sc->rx_big.dmat, 0,
- &sc->rx_big.info[i].map);
+ for (i = 0; i <= ss->rx_big.mask; i++) {
+ err = bus_dmamap_create(ss->rx_big.dmat, 0,
+ &ss->rx_big.info[i].map);
if (err != 0) {
device_printf(sc->dev, "Err %d rx_big dmamap\n",
- err);
- goto abort_with_alloc;
+ err);
+ return err;;
}
}
- err = bus_dmamap_create(sc->rx_big.dmat, 0,
- &sc->rx_big.extra_map);
+ err = bus_dmamap_create(ss->rx_big.dmat, 0,
+ &ss->rx_big.extra_map);
if (err != 0) {
device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
err);
- goto abort_with_alloc;
+ return err;;
+ }
+
+ /* now allocate TX resouces */
+
+ /* only use a single TX ring for now */
+ if (ss != ss->sc->ss)
+ return 0;
+
+ ss->tx.mask = tx_ring_entries - 1;
+ ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
+
+
+ /* allocate the tx request copy block */
+ bytes = 8 +
+ sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
+ ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
+ if (ss->tx.req_bytes == NULL)
+ return err;;
+ /* ensure req_list entries are aligned to 8 bytes */
+ ss->tx.req_list = (mcp_kreq_ether_send_t *)
+ ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
+
+ /* allocate the tx busdma segment list */
+ bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
+ ss->tx.seg_list = (bus_dma_segment_t *)
+ malloc(bytes, M_DEVBUF, M_WAITOK);
+ if (ss->tx.seg_list == NULL)
+ return err;;
+
+ /* allocate the tx host info ring */
+ bytes = tx_ring_entries * sizeof (*ss->tx.info);
+ ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
+ if (ss->tx.info == NULL)
+ return err;;
+
+ /* allocate the tx busdma resources */
+ err = bus_dma_tag_create(sc->parent_dmat, /* parent */
+ 1, /* alignment */
+ sc->tx_boundary, /* boundary */
+ BUS_SPACE_MAXADDR, /* low */
+ BUS_SPACE_MAXADDR, /* high */
+ NULL, NULL, /* filter */
+ 65536 + 256, /* maxsize */
+ ss->tx.max_desc - 2, /* num segs */
+ sc->tx_boundary, /* maxsegsz */
+ BUS_DMA_ALLOCNOW, /* flags */
+ NULL, NULL, /* lock */
+ &ss->tx.dmat); /* tag */
+
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d allocating tx dmat\n",
+ err);
+ return err;;
+ }
+
+ /* now use these tags to setup dmamaps for each slot
+ in the ring */
+ for (i = 0; i <= ss->tx.mask; i++) {
+ err = bus_dmamap_create(ss->tx.dmat, 0,
+ &ss->tx.info[i].map);
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d tx dmamap\n",
+ err);
+ return err;;
+ }
}
return 0;
-abort_with_alloc:
- mxge_free_rings(sc);
+}
-abort_with_nothing:
+static int
+mxge_alloc_rings(mxge_softc_t *sc)
+{
+ mxge_cmd_t cmd;
+ int tx_ring_size;
+ int tx_ring_entries, rx_ring_entries;
+ int err, slice;
+
+ /* get ring sizes */
+ err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
+ tx_ring_size = cmd.data0;
+ if (err != 0) {
+ device_printf(sc->dev, "Cannot determine tx ring sizes\n");
+ goto abort;
+ }
+
+ tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
+ rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
+ IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
+ sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
+ IFQ_SET_READY(&sc->ifp->if_snd);
+
+ for (slice = 0; slice < sc->num_slices; slice++) {
+ err = mxge_alloc_slice_rings(&sc->ss[slice],
+ rx_ring_entries,
+ tx_ring_entries);
+ if (err != 0)
+ goto abort;
+ }
+ return 0;
+
+abort:
+ mxge_free_rings(sc);
return err;
+
}
+
static void
mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
{
@@ -2898,62 +3095,50 @@ mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
*nbufs = 4;
}
-static int
-mxge_open(mxge_softc_t *sc)
+static int
+mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
{
+ mxge_softc_t *sc;
mxge_cmd_t cmd;
- int i, err, big_bytes;
bus_dmamap_t map;
- bus_addr_t bus;
struct lro_entry *lro_entry;
+ int err, i, slice;
- SLIST_INIT(&sc->lro_free);
- SLIST_INIT(&sc->lro_active);
+
+ sc = ss->sc;
+ slice = ss - sc->ss;
+
+ SLIST_INIT(&ss->lro_free);
+ SLIST_INIT(&ss->lro_active);
for (i = 0; i < sc->lro_cnt; i++) {
lro_entry = (struct lro_entry *)
- malloc(sizeof (*lro_entry), M_DEVBUF, M_NOWAIT | M_ZERO);
+ malloc(sizeof (*lro_entry), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
if (lro_entry == NULL) {
sc->lro_cnt = i;
break;
}
- SLIST_INSERT_HEAD(&sc->lro_free, lro_entry, next);
- }
-
- /* Copy the MAC address in case it was overridden */
- bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
-
- err = mxge_reset(sc, 1);
- if (err != 0) {
- device_printf(sc->dev, "failed to reset\n");
- return EIO;
- }
-
- mxge_choose_params(sc->ifp->if_mtu, &big_bytes,
- &sc->rx_big.cl_size, &sc->rx_big.nbufs);
-
- cmd.data0 = sc->rx_big.nbufs;
- err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
- &cmd);
- /* error is only meaningful if we're trying to set
- MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
- if (err && sc->rx_big.nbufs > 1) {
- device_printf(sc->dev,
- "Failed to set alway-use-n to %d\n",
- sc->rx_big.nbufs);
- return EIO;
+ SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
}
/* get the lanai pointers to the send and receive rings */
- err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
- sc->tx.lanai =
- (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
+ err = 0;
+ /* We currently only send from the first slice */
+ if (slice == 0) {
+ cmd.data0 = slice;
+ err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
+ ss->tx.lanai =
+ (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
+ }
+ cmd.data0 = slice;
err |= mxge_send_cmd(sc,
- MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
- sc->rx_small.lanai =
+ MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
+ ss->rx_small.lanai =
(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
+ cmd.data0 = slice;
err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
- sc->rx_big.lanai =
+ ss->rx_big.lanai =
(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
if (err != 0) {
@@ -2963,29 +3148,92 @@ mxge_open(mxge_softc_t *sc)
}
/* stock receive rings */
- for (i = 0; i <= sc->rx_small.mask; i++) {
- map = sc->rx_small.info[i].map;
- err = mxge_get_buf_small(sc, map, i);
+ for (i = 0; i <= ss->rx_small.mask; i++) {
+ map = ss->rx_small.info[i].map;
+ err = mxge_get_buf_small(ss, map, i);
if (err) {
device_printf(sc->dev, "alloced %d/%d smalls\n",
- i, sc->rx_small.mask + 1);
- goto abort;
+ i, ss->rx_small.mask + 1);
+ return ENOMEM;
}
}
- for (i = 0; i <= sc->rx_big.mask; i++) {
- sc->rx_big.shadow[i].addr_low = 0xffffffff;
- sc->rx_big.shadow[i].addr_high = 0xffffffff;
+ for (i = 0; i <= ss->rx_big.mask; i++) {
+ ss->rx_big.shadow[i].addr_low = 0xffffffff;
+ ss->rx_big.shadow[i].addr_high = 0xffffffff;
}
- for (i = 0; i <= sc->rx_big.mask; i += sc->rx_big.nbufs) {
- map = sc->rx_big.info[i].map;
- err = mxge_get_buf_big(sc, map, i);
+ ss->rx_big.nbufs = nbufs;
+ ss->rx_big.cl_size = cl_size;
+ for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
+ map = ss->rx_big.info[i].map;
+ err = mxge_get_buf_big(ss, map, i);
if (err) {
device_printf(sc->dev, "alloced %d/%d bigs\n",
- i, sc->rx_big.mask + 1);
- goto abort;
+ i, ss->rx_big.mask + 1);
+ return ENOMEM;
}
}
+ return 0;
+}
+
+static int
+mxge_open(mxge_softc_t *sc)
+{
+ mxge_cmd_t cmd;
+ int err, big_bytes, nbufs, slice, cl_size, i;
+ bus_addr_t bus;
+ volatile uint8_t *itable;
+
+ /* Copy the MAC address in case it was overridden */
+ bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
+
+ err = mxge_reset(sc, 1);
+ if (err != 0) {
+ device_printf(sc->dev, "failed to reset\n");
+ return EIO;
+ }
+
+ if (sc->num_slices > 1) {
+ /* setup the indirection table */
+ cmd.data0 = sc->num_slices;
+ err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
+ &cmd);
+
+ err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
+ &cmd);
+ if (err != 0) {
+ device_printf(sc->dev,
+ "failed to setup rss tables\n");
+ return err;
+ }
+ /* just enable an identity mapping */
+ itable = sc->sram + cmd.data0;
+ for (i = 0; i < sc->num_slices; i++)
+ itable[i] = (uint8_t)i;
+
+ cmd.data0 = 1;
+ cmd.data1 = mxge_rss_hash_type;
+ err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
+ if (err != 0) {
+ device_printf(sc->dev, "failed to enable slices\n");
+ return err;
+ }
+ }
+
+
+ mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
+
+ cmd.data0 = nbufs;
+ err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
+ &cmd);
+ /* error is only meaningful if we're trying to set
+ MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
+ if (err && nbufs > 1) {
+ device_printf(sc->dev,
+ "Failed to set alway-use-n to %d\n",
+ nbufs);
+ return EIO;
+ }
/* Give the firmware the mtu and the big and small buffer
sizes. The firmware wants the big buf size to be a power
of two. Luckily, FreeBSD's clusters are powers of two */
@@ -3003,13 +3251,13 @@ mxge_open(mxge_softc_t *sc)
}
/* Now give him the pointer to the stats block */
- cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr);
- cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr);
+ cmd.data0 = MXGE_LOWPART_TO_U32(sc->ss->fw_stats_dma.bus_addr);
+ cmd.data1 = MXGE_HIGHPART_TO_U32(sc->ss->fw_stats_dma.bus_addr);
cmd.data2 = sizeof(struct mcp_irq_data);
err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
if (err != 0) {
- bus = sc->fw_stats_dma.bus_addr;
+ bus = sc->ss->fw_stats_dma.bus_addr;
bus += offsetof(struct mcp_irq_data, send_done_count);
cmd.data0 = MXGE_LOWPART_TO_U32(bus);
cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
@@ -3027,6 +3275,15 @@ mxge_open(mxge_softc_t *sc)
goto abort;
}
+ for (slice = 0; slice < sc->num_slices; slice++) {
+ err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
+ if (err != 0) {
+ device_printf(sc->dev, "couldn't open slice %d\n",
+ slice);
+ goto abort;
+ }
+ }
+
/* Finally, start the firmware running */
err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
if (err) {
@@ -3048,7 +3305,6 @@ abort:
static int
mxge_close(mxge_softc_t *sc)
{
- struct lro_entry *lro_entry;
mxge_cmd_t cmd;
int err, old_down_cnt;
@@ -3063,16 +3319,13 @@ mxge_close(mxge_softc_t *sc)
/* wait for down irq */
DELAY(10 * sc->intr_coal_delay);
}
+ mb();
if (old_down_cnt == sc->down_cnt) {
device_printf(sc->dev, "never got down irq\n");
}
mxge_free_mbufs(sc);
- while (!SLIST_EMPTY(&sc->lro_free)) {
- lro_entry = SLIST_FIRST(&sc->lro_free);
- SLIST_REMOVE_HEAD(&sc->lro_free, next);
- }
return 0;
}
@@ -3166,10 +3419,10 @@ mxge_watchdog_reset(mxge_softc_t *sc)
} else {
device_printf(sc->dev, "NIC did not reboot, ring state:\n");
device_printf(sc->dev, "tx.req=%d tx.done=%d\n",
- sc->tx.req, sc->tx.done);
+ sc->ss->tx.req, sc->ss->tx.done);
device_printf(sc->dev, "pkt_done=%d fw=%d\n",
- sc->tx.pkt_done,
- be32toh(sc->fw_stats->send_done_count));
+ sc->ss->tx.pkt_done,
+ be32toh(sc->ss->fw_stats->send_done_count));
}
if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
@@ -3190,8 +3443,8 @@ abort:
static void
mxge_watchdog(mxge_softc_t *sc)
{
- mxge_tx_buf_t *tx = &sc->tx;
- uint32_t rx_pause = be32toh(sc->fw_stats->dropped_pause);
+ mxge_tx_ring_t *tx = &sc->ss->tx;
+ uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
/* see if we have outstanding transmits, which
have been pending for more than mxge_ticks */
@@ -3215,6 +3468,20 @@ mxge_watchdog(mxge_softc_t *sc)
}
static void
+mxge_update_stats(mxge_softc_t *sc)
+{
+ struct mxge_slice_state *ss;
+ u_long ipackets = 0;
+ int slice;
+
+ for(slice = 0; slice < sc->num_slices; slice++) {
+ ss = &sc->ss[slice];
+ ipackets += ss->ipackets;
+ }
+ sc->ifp->if_ipackets = ipackets;
+
+}
+static void
mxge_tick(void *arg)
{
mxge_softc_t *sc = arg;
@@ -3227,8 +3494,15 @@ mxge_tick(void *arg)
return;
}
+ /* aggregate stats from different slices */
+ mxge_update_stats(sc);
+
callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
- mxge_watchdog(sc);
+ if (!sc->watchdog_countdown) {
+ mxge_watchdog(sc);
+ sc->watchdog_countdown = 4;
+ }
+ sc->watchdog_countdown--;
}
static int
@@ -3314,8 +3588,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
}
} else {
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- mxge_close(sc);
callout_stop(&sc->co_hdl);
+ mxge_close(sc);
}
}
mtx_unlock(&sc->driver_mtx);
@@ -3389,7 +3663,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
static void
mxge_fetch_tunables(mxge_softc_t *sc)
{
-
+
+ TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
&mxge_flow_control);
TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
@@ -3404,6 +3679,8 @@ mxge_fetch_tunables(mxge_softc_t *sc)
&mxge_verbose);
TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
+ TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
+ TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
if (sc->lro_cnt != 0)
mxge_lro_cnt = sc->lro_cnt;
@@ -3412,30 +3689,420 @@ mxge_fetch_tunables(mxge_softc_t *sc)
if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
mxge_intr_coal_delay = 30;
if (mxge_ticks == 0)
- mxge_ticks = hz;
+ mxge_ticks = hz / 2;
sc->pause = mxge_flow_control;
+ if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
+ || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) {
+ mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
+ }
+}
+
+
+static void
+mxge_free_slices(mxge_softc_t *sc)
+{
+ struct mxge_slice_state *ss;
+ int i;
+
+
+ if (sc->ss == NULL)
+ return;
+
+ for (i = 0; i < sc->num_slices; i++) {
+ ss = &sc->ss[i];
+ if (ss->fw_stats != NULL) {
+ mxge_dma_free(&ss->fw_stats_dma);
+ ss->fw_stats = NULL;
+ mtx_destroy(&ss->tx.mtx);
+ }
+ if (ss->rx_done.entry != NULL) {
+ mxge_dma_free(&ss->rx_done.dma);
+ ss->rx_done.entry = NULL;
+ }
+ }
+ free(sc->ss, M_DEVBUF);
+ sc->ss = NULL;
+}
+
+static int
+mxge_alloc_slices(mxge_softc_t *sc)
+{
+ mxge_cmd_t cmd;
+ struct mxge_slice_state *ss;
+ size_t bytes;
+ int err, i, max_intr_slots;
+
+ err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
+ if (err != 0) {
+ device_printf(sc->dev, "Cannot determine rx ring size\n");
+ return err;
+ }
+ sc->rx_ring_size = cmd.data0;
+ max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
+
+ bytes = sizeof (*sc->ss) * sc->num_slices;
+ sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (sc->ss == NULL)
+ return (ENOMEM);
+ for (i = 0; i < sc->num_slices; i++) {
+ ss = &sc->ss[i];
+
+ ss->sc = sc;
+
+ /* allocate per-slice rx interrupt queues */
+
+ bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
+ err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
+ if (err != 0)
+ goto abort;
+ ss->rx_done.entry = ss->rx_done.dma.addr;
+ bzero(ss->rx_done.entry, bytes);
+
+ /*
+ * allocate the per-slice firmware stats; stats
+ * (including tx) are used used only on the first
+ * slice for now
+ */
+ if (i > 0)
+ continue;
+
+ bytes = sizeof (*ss->fw_stats);
+ err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
+ sizeof (*ss->fw_stats), 64);
+ if (err != 0)
+ goto abort;
+ ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
+ snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
+ "%s:tx(%d)", device_get_nameunit(sc->dev), i);
+ mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
+ }
+
+ return (0);
+
+abort:
+ mxge_free_slices(sc);
+ return (ENOMEM);
+}
+
+static void
+mxge_slice_probe(mxge_softc_t *sc)
+{
+ mxge_cmd_t cmd;
+ char *old_fw;
+ int msix_cnt, status, max_intr_slots;
+
+ sc->num_slices = 1;
+ /*
+ * don't enable multiple slices if they are not enabled,
+ * or if this is not an SMP system
+ */
+
+ if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
+ return;
+
+ /* see how many MSI-X interrupts are available */
+ msix_cnt = pci_msix_count(sc->dev);
+ if (msix_cnt < 2)
+ return;
+
+ /* now load the slice aware firmware see what it supports */
+ old_fw = sc->fw_name;
+ if (old_fw == mxge_fw_aligned)
+ sc->fw_name = mxge_fw_rss_aligned;
+ else
+ sc->fw_name = mxge_fw_rss_unaligned;
+ status = mxge_load_firmware(sc, 0);
+ if (status != 0) {
+ device_printf(sc->dev, "Falling back to a single slice\n");
+ return;
+ }
+
+ /* try to send a reset command to the card to see if it
+ is alive */
+ memset(&cmd, 0, sizeof (cmd));
+ status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
+ if (status != 0) {
+ device_printf(sc->dev, "failed reset\n");
+ goto abort_with_fw;
+ }
+
+ /* get rx ring size */
+ status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
+ if (status != 0) {
+ device_printf(sc->dev, "Cannot determine rx ring size\n");
+ goto abort_with_fw;
+ }
+ max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
+
+ /* tell it the size of the interrupt queues */
+ cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
+ status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
+ if (status != 0) {
+ device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
+ goto abort_with_fw;
+ }
+
+ /* ask the maximum number of slices it supports */
+ status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
+ if (status != 0) {
+ device_printf(sc->dev,
+ "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
+ goto abort_with_fw;
+ }
+ sc->num_slices = cmd.data0;
+ if (sc->num_slices > msix_cnt)
+ sc->num_slices = msix_cnt;
+
+ if (mxge_max_slices == -1) {
+ /* cap to number of CPUs in system */
+ if (sc->num_slices > mp_ncpus)
+ sc->num_slices = mp_ncpus;
+ } else {
+ if (sc->num_slices > mxge_max_slices)
+ sc->num_slices = mxge_max_slices;
+ }
+ /* make sure it is a power of two */
+ while (sc->num_slices & (sc->num_slices - 1))
+ sc->num_slices--;
+
+ if (mxge_verbose)
+ device_printf(sc->dev, "using %d slices\n",
+ sc->num_slices);
+
+ return;
+
+abort_with_fw:
+ sc->fw_name = old_fw;
+ (void) mxge_load_firmware(sc, 0);
+}
+
+static int
+mxge_add_msix_irqs(mxge_softc_t *sc)
+{
+ size_t bytes;
+ int count, err, i, rid;
+
+ rid = PCIR_BAR(2);
+ sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
+ &rid, RF_ACTIVE);
+
+ if (sc->msix_table_res == NULL) {
+ device_printf(sc->dev, "couldn't alloc MSIX table res\n");
+ return ENXIO;
+ }
+
+ count = sc->num_slices;
+ err = pci_alloc_msix(sc->dev, &count);
+ if (err != 0) {
+ device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
+ "err = %d \n", sc->num_slices, err);
+ goto abort_with_msix_table;
+ }
+ if (count < sc->num_slices) {
+ device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
+ count, sc->num_slices);
+ device_printf(sc->dev,
+ "Try setting hw.mxge.max_slices to %d\n",
+ count);
+ err = ENOSPC;
+ goto abort_with_msix;
+ }
+ bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
+ sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (sc->msix_irq_res == NULL) {
+ err = ENOMEM;
+ goto abort_with_msix;
+ }
+
+ for (i = 0; i < sc->num_slices; i++) {
+ rid = i + 1;
+ sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
+ SYS_RES_IRQ,
+ &rid, RF_ACTIVE);
+ if (sc->msix_irq_res[i] == NULL) {
+ device_printf(sc->dev, "couldn't allocate IRQ res"
+ " for message %d\n", i);
+ err = ENXIO;
+ goto abort_with_res;
+ }
+ }
+
+ bytes = sizeof (*sc->msix_ih) * sc->num_slices;
+ sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
+
+ for (i = 0; i < sc->num_slices; i++) {
+ err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
+ INTR_TYPE_NET | INTR_MPSAFE,
+ NULL, mxge_intr, &sc->ss[i],
+ &sc->msix_ih[i]);
+ if (err != 0) {
+ device_printf(sc->dev, "couldn't setup intr for "
+ "message %d\n", i);
+ goto abort_with_intr;
+ }
+ }
+
+ if (mxge_verbose) {
+ device_printf(sc->dev, "using %d msix IRQs:",
+ sc->num_slices);
+ for (i = 0; i < sc->num_slices; i++)
+ printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
+ printf("\n");
+ }
+ return (0);
+
+abort_with_intr:
+ for (i = 0; i < sc->num_slices; i++) {
+ if (sc->msix_ih[i] != NULL) {
+ bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
+ sc->msix_ih[i]);
+ sc->msix_ih[i] = NULL;
+ }
+ }
+ free(sc->msix_ih, M_DEVBUF);
+
+
+abort_with_res:
+ for (i = 0; i < sc->num_slices; i++) {
+ rid = i + 1;
+ if (sc->msix_irq_res[i] != NULL)
+ bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
+ sc->msix_irq_res[i]);
+ sc->msix_irq_res[i] = NULL;
+ }
+ free(sc->msix_irq_res, M_DEVBUF);
+
+
+abort_with_msix:
+ pci_release_msi(sc->dev);
+
+abort_with_msix_table:
+ bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
+ sc->msix_table_res);
+
+ return err;
+}
+
+static int
+mxge_add_single_irq(mxge_softc_t *sc)
+{
+ int count, err, rid;
+
+ count = pci_msi_count(sc->dev);
+ if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
+ rid = 1;
+ sc->msi_enabled = 1;
+ } else {
+ rid = 0;
+ }
+ sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
+ 1, RF_SHAREABLE | RF_ACTIVE);
+ if (sc->irq_res == NULL) {
+ device_printf(sc->dev, "could not alloc interrupt\n");
+ return ENXIO;
+ }
+ if (mxge_verbose)
+ device_printf(sc->dev, "using %s irq %ld\n",
+ sc->msi_enabled ? "MSI" : "INTx",
+ rman_get_start(sc->irq_res));
+ err = bus_setup_intr(sc->dev, sc->irq_res,
+ INTR_TYPE_NET | INTR_MPSAFE,
+ NULL, mxge_intr, &sc->ss[0], &sc->ih);
+ if (err != 0) {
+ bus_release_resource(sc->dev, SYS_RES_IRQ,
+ sc->msi_enabled ? 1 : 0, sc->irq_res);
+ if (sc->msi_enabled)
+ pci_release_msi(sc->dev);
+ }
+ return err;
+}
+
+static void
+mxge_rem_msix_irqs(mxge_softc_t *sc)
+{
+ int i, rid;
+
+ for (i = 0; i < sc->num_slices; i++) {
+ if (sc->msix_ih[i] != NULL) {
+ bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
+ sc->msix_ih[i]);
+ sc->msix_ih[i] = NULL;
+ }
+ }
+ free(sc->msix_ih, M_DEVBUF);
+
+ for (i = 0; i < sc->num_slices; i++) {
+ rid = i + 1;
+ if (sc->msix_irq_res[i] != NULL)
+ bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
+ sc->msix_irq_res[i]);
+ sc->msix_irq_res[i] = NULL;
+ }
+ free(sc->msix_irq_res, M_DEVBUF);
+
+ bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
+ sc->msix_table_res);
+ pci_release_msi(sc->dev);
+ return;
+}
+
+static void
+mxge_rem_single_irq(mxge_softc_t *sc)
+{
+ bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
+ bus_release_resource(sc->dev, SYS_RES_IRQ,
+ sc->msi_enabled ? 1 : 0, sc->irq_res);
+ if (sc->msi_enabled)
+ pci_release_msi(sc->dev);
+}
+
+static void
+mxge_rem_irq(mxge_softc_t *sc)
+{
+ if (sc->num_slices > 1)
+ mxge_rem_msix_irqs(sc);
+ else
+ mxge_rem_single_irq(sc);
+}
+
+static int
+mxge_add_irq(mxge_softc_t *sc)
+{
+ int err;
+
+ if (sc->num_slices > 1)
+ err = mxge_add_msix_irqs(sc);
+ else
+ err = mxge_add_single_irq(sc);
+
+ if (0 && err == 0 && sc->num_slices > 1) {
+ mxge_rem_msix_irqs(sc);
+ err = mxge_add_msix_irqs(sc);
+ }
+ return err;
}
+
static int
mxge_attach(device_t dev)
{
mxge_softc_t *sc = device_get_softc(dev);
struct ifnet *ifp;
- int count, rid, err;
+ int err, rid;
sc->dev = dev;
mxge_fetch_tunables(sc);
err = bus_dma_tag_create(NULL, /* parent */
1, /* alignment */
- 4096, /* boundary */
+ 0, /* boundary */
BUS_SPACE_MAXADDR, /* low */
BUS_SPACE_MAXADDR, /* high */
NULL, NULL, /* filter */
65536 + 256, /* maxsize */
MXGE_MAX_SEND_DESC, /* num segs */
- 4096, /* maxsegsize */
+ 65536, /* maxsegsize */
0, /* flags */
NULL, NULL, /* lock */
&sc->parent_dmat); /* tag */
@@ -3452,12 +4119,11 @@ mxge_attach(device_t dev)
err = ENOSPC;
goto abort_with_parent_dmat;
}
+ if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+
snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
device_get_nameunit(dev));
mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
- snprintf(sc->tx_mtx_name, sizeof(sc->tx_mtx_name), "%s:tx",
- device_get_nameunit(dev));
- mtx_init(&sc->tx_mtx, sc->tx_mtx_name, NULL, MTX_DEF);
snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
"%s:drv", device_get_nameunit(dev));
mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
@@ -3510,57 +4176,37 @@ mxge_attach(device_t dev)
if (err != 0)
goto abort_with_cmd_dma;
- err = mxge_dma_alloc(sc, &sc->fw_stats_dma,
- sizeof (*sc->fw_stats), 64);
- if (err != 0)
- goto abort_with_zeropad_dma;
- sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr;
-
err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
if (err != 0)
- goto abort_with_fw_stats;
+ goto abort_with_zeropad_dma;
- /* Add our ithread */
- count = pci_msi_count(dev);
- if (count == 1 && pci_alloc_msi(dev, &count) == 0) {
- rid = 1;
- sc->msi_enabled = 1;
- } else {
- rid = 0;
- }
- sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0,
- 1, RF_SHAREABLE | RF_ACTIVE);
- if (sc->irq_res == NULL) {
- device_printf(dev, "could not alloc interrupt\n");
- goto abort_with_dmabench;
- }
- if (mxge_verbose)
- device_printf(dev, "using %s irq %ld\n",
- sc->msi_enabled ? "MSI" : "INTx",
- rman_get_start(sc->irq_res));
/* select & load the firmware */
err = mxge_select_firmware(sc);
if (err != 0)
- goto abort_with_irq_res;
+ goto abort_with_dmabench;
sc->intr_coal_delay = mxge_intr_coal_delay;
+
+ mxge_slice_probe(sc);
+ err = mxge_alloc_slices(sc);
+ if (err != 0)
+ goto abort_with_dmabench;
+
err = mxge_reset(sc, 0);
if (err != 0)
- goto abort_with_irq_res;
+ goto abort_with_slices;
err = mxge_alloc_rings(sc);
if (err != 0) {
device_printf(sc->dev, "failed to allocate rings\n");
- goto abort_with_irq_res;
+ goto abort_with_dmabench;
}
- err = bus_setup_intr(sc->dev, sc->irq_res,
- INTR_TYPE_NET | INTR_MPSAFE,
- NULL, mxge_intr, sc, &sc->ih);
+ err = mxge_add_irq(sc);
if (err != 0) {
+ device_printf(sc->dev, "failed to add irq\n");
goto abort_with_rings;
}
- /* hook into the network stack */
- if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+
ifp->if_baudrate = 100000000;
ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
@@ -3598,15 +4244,10 @@ mxge_attach(device_t dev)
abort_with_rings:
mxge_free_rings(sc);
-abort_with_irq_res:
- bus_release_resource(dev, SYS_RES_IRQ,
- sc->msi_enabled ? 1 : 0, sc->irq_res);
- if (sc->msi_enabled)
- pci_release_msi(dev);
+abort_with_slices:
+ mxge_free_slices(sc);
abort_with_dmabench:
mxge_dma_free(&sc->dmabench_dma);
-abort_with_fw_stats:
- mxge_dma_free(&sc->fw_stats_dma);
abort_with_zeropad_dma:
mxge_dma_free(&sc->zeropad_dma);
abort_with_cmd_dma:
@@ -3616,7 +4257,6 @@ abort_with_mem_res:
abort_with_lock:
pci_disable_busmaster(dev);
mtx_destroy(&sc->cmd_mtx);
- mtx_destroy(&sc->tx_mtx);
mtx_destroy(&sc->driver_mtx);
if_free(ifp);
abort_with_parent_dmat:
@@ -3637,29 +4277,23 @@ mxge_detach(device_t dev)
return EBUSY;
}
mtx_lock(&sc->driver_mtx);
+ callout_stop(&sc->co_hdl);
if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
mxge_close(sc);
- callout_stop(&sc->co_hdl);
mtx_unlock(&sc->driver_mtx);
ether_ifdetach(sc->ifp);
ifmedia_removeall(&sc->media);
mxge_dummy_rdma(sc, 0);
- bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
+ mxge_rem_sysctls(sc);
+ mxge_rem_irq(sc);
mxge_free_rings(sc);
- bus_release_resource(dev, SYS_RES_IRQ,
- sc->msi_enabled ? 1 : 0, sc->irq_res);
- if (sc->msi_enabled)
- pci_release_msi(dev);
-
- sc->rx_done.entry = NULL;
- mxge_dma_free(&sc->fw_stats_dma);
+ mxge_free_slices(sc);
mxge_dma_free(&sc->dmabench_dma);
mxge_dma_free(&sc->zeropad_dma);
mxge_dma_free(&sc->cmd_dma);
bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
pci_disable_busmaster(dev);
mtx_destroy(&sc->cmd_mtx);
- mtx_destroy(&sc->tx_mtx);
mtx_destroy(&sc->driver_mtx);
if_free(sc->ifp);
bus_dma_tag_destroy(sc->parent_dmat);