diff options
author | Andrew Gallatin <gallatin@FreeBSD.org> | 2008-01-15 20:34:49 +0000 |
---|---|---|
committer | Andrew Gallatin <gallatin@FreeBSD.org> | 2008-01-15 20:34:49 +0000 |
commit | 1e413cf93298b5b97441a21d9a50fdcd0ee9945e (patch) | |
tree | 01abb766903631b9fe718b5563f563b63e330fdc /sys/dev/mxge/if_mxge.c | |
parent | 707dd4784955cf6abcaeb8f2296ec3a46c3e2d03 (diff) | |
download | src-1e413cf93298b5b97441a21d9a50fdcd0ee9945e.tar.gz src-1e413cf93298b5b97441a21d9a50fdcd0ee9945e.zip |
Add optional support to mxge for MSI-X interrupts and multiple receive
queues (which we call slices). The NIC will steer traffic into up to
hw.mxge.max_slices different receive rings based on a configurable
hash type (hw.mxge.rss_hash_type).
Currently the driver defaults to using a single slice, so the default
behavior is unchanged. Also, transmit from non-zero slices is
disabled currently.
Notes
Notes:
svn path=/head/; revision=175365
Diffstat (limited to 'sys/dev/mxge/if_mxge.c')
-rw-r--r-- | sys/dev/mxge/if_mxge.c | 1568 |
1 files changed, 1101 insertions, 467 deletions
diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c index b3fc7e0cede4..c79a9a411ef1 100644 --- a/sys/dev/mxge/if_mxge.c +++ b/sys/dev/mxge/if_mxge.c @@ -1,6 +1,6 @@ /****************************************************************************** -Copyright (c) 2006-2007, Myricom Inc. +Copyright (c) 2006-2008, Myricom Inc. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include <machine/resource.h> #include <sys/bus.h> #include <sys/rman.h> +#include <sys/smp.h> #include <dev/pci/pcireg.h> #include <dev/pci/pcivar.h> @@ -82,6 +83,7 @@ __FBSDID("$FreeBSD$"); #include <dev/mxge/mxge_mcp.h> #include <dev/mxge/mcp_gen_header.h> +/*#define MXGE_FAKE_IFP*/ #include <dev/mxge/if_mxge_var.h> /* tunable params */ @@ -93,8 +95,13 @@ static int mxge_flow_control = 1; static int mxge_verbose = 0; static int mxge_lro_cnt = 8; static int mxge_ticks; +static int mxge_max_slices = 1; +static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; +static int mxge_always_promisc = 0; static char *mxge_fw_unaligned = "mxge_ethp_z8e"; static char *mxge_fw_aligned = "mxge_eth_z8e"; +static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; +static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; static int mxge_probe(device_t dev); static int mxge_attach(device_t dev); @@ -126,7 +133,7 @@ DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); MODULE_DEPEND(mxge, firmware, 1, 1, 1); MODULE_DEPEND(mxge, zlib, 1, 1, 1); -static int mxge_load_firmware(mxge_softc_t *sc); +static int mxge_load_firmware(mxge_softc_t *sc, int adopt); static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); static int mxge_close(mxge_softc_t *sc); static int mxge_open(mxge_softc_t *sc); @@ -195,17 +202,26 @@ mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, { int err; device_t dev = sc->dev; + bus_size_t boundary, maxsegsize; + + if (bytes > 4096 && alignment == 4096) { + boundary = 0; + maxsegsize = bytes; + } else { + boundary = 4096; + maxsegsize = 4096; + } /* allocate DMAable memory tags */ err = bus_dma_tag_create(sc->parent_dmat, /* parent */ alignment, /* alignment */ - 4096, /* boundary */ + boundary, /* boundary */ BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ bytes, /* maxsize */ 1, /* num segs */ - 4096, /* maxsegsize */ + maxsegsize, /* maxsegsize */ BUS_DMA_COHERENT, /* flags */ NULL, NULL, /* lock */ &dma->dmat); /* tag */ @@ -453,7 +469,7 @@ mxge_dma_test(mxge_softc_t *sc, int test_type) * transfers took to complete. */ - len = sc->tx.boundary; + len = sc->tx_boundary; cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); @@ -509,9 +525,9 @@ abort: * already been enabled, then it must use a firmware image which works * around unaligned completion packets (ethp_z8e.dat), and it should * also ensure that it never gives the device a Read-DMA which is - * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is + * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is * enabled, then the driver should use the aligned (eth_z8e.dat) - * firmware image, and set tx.boundary to 4KB. + * firmware image, and set tx_boundary to 4KB. */ static int @@ -521,7 +537,7 @@ mxge_firmware_probe(mxge_softc_t *sc) int reg, status; uint16_t pectl; - sc->tx.boundary = 4096; + sc->tx_boundary = 4096; /* * Verify the max read request size was set to 4KB * before trying the test with 4KB. @@ -531,7 +547,7 @@ mxge_firmware_probe(mxge_softc_t *sc) if ((pectl & (5 << 12)) != (5 << 12)) { device_printf(dev, "Max Read Req. size != 4k (0x%x\n", pectl); - sc->tx.boundary = 2048; + sc->tx_boundary = 2048; } } @@ -540,7 +556,7 @@ mxge_firmware_probe(mxge_softc_t *sc) * completions) in order to see if it works on this host. */ sc->fw_name = mxge_fw_aligned; - status = mxge_load_firmware(sc); + status = mxge_load_firmware(sc, 1); if (status != 0) { return status; } @@ -601,12 +617,12 @@ mxge_select_firmware(mxge_softc_t *sc) abort: if (aligned) { sc->fw_name = mxge_fw_aligned; - sc->tx.boundary = 4096; + sc->tx_boundary = 4096; } else { sc->fw_name = mxge_fw_unaligned; - sc->tx.boundary = 2048; + sc->tx_boundary = 2048; } - return (mxge_load_firmware(sc)); + return (mxge_load_firmware(sc, 0)); } union qualhack @@ -923,7 +939,7 @@ mxge_adopt_running_firmware(mxge_softc_t *sc) static int -mxge_load_firmware(mxge_softc_t *sc) +mxge_load_firmware(mxge_softc_t *sc, int adopt) { volatile uint32_t *confirm; volatile char *submit; @@ -936,6 +952,8 @@ mxge_load_firmware(mxge_softc_t *sc) size = sc->sram_size; status = mxge_load_firmware_helper(sc, &size); if (status) { + if (!adopt) + return status; /* Try to use the currently running firmware, if it is new enough */ status = mxge_adopt_running_firmware(sc); @@ -946,7 +964,7 @@ mxge_load_firmware(mxge_softc_t *sc) } device_printf(sc->dev, "Successfully adopted running firmware\n"); - if (sc->tx.boundary == 4096) { + if (sc->tx_boundary == 4096) { device_printf(sc->dev, "Using firmware currently running on NIC" ". For optimal\n"); @@ -955,7 +973,7 @@ mxge_load_firmware(mxge_softc_t *sc) "firmware\n"); } sc->fw_name = mxge_fw_unaligned; - sc->tx.boundary = 2048; + sc->tx_boundary = 2048; return 0; } /* clear confirmation addr */ @@ -1049,6 +1067,9 @@ mxge_change_promisc(mxge_softc_t *sc, int promisc) mxge_cmd_t cmd; int status; + if (mxge_always_promisc) + promisc = 1; + if (promisc) status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); @@ -1153,10 +1174,11 @@ mxge_max_mtu(mxge_softc_t *sc) static int mxge_reset(mxge_softc_t *sc, int interrupts_setup) { - + struct mxge_slice_state *ss; + mxge_rx_done_t *rx_done; + volatile uint32_t *irq_claim; mxge_cmd_t cmd; - size_t bytes; - int status; + int slice, status; /* try to send a reset command to the card to see if it is alive */ @@ -1169,15 +1191,59 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup) mxge_dummy_rdma(sc, 1); + + /* set the intrq size */ + cmd.data0 = sc->rx_ring_size; + status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); + + /* + * Even though we already know how many slices are supported + * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES + * has magic side effects, and must be called after a reset. + * It must be called prior to calling any RSS related cmds, + * including assigning an interrupt queue for anything but + * slice 0. It must also be called *after* + * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by + * the firmware to compute offsets. + */ + + if (sc->num_slices > 1) { + /* ask the maximum number of slices it supports */ + status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, + &cmd); + if (status != 0) { + device_printf(sc->dev, + "failed to get number of slices\n"); + return status; + } + /* + * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior + * to setting up the interrupt queue DMA + */ + cmd.data0 = sc->num_slices; + cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; + status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, + &cmd); + if (status != 0) { + device_printf(sc->dev, + "failed to set number of slices\n"); + return status; + } + } + + if (interrupts_setup) { /* Now exchange information about interrupts */ - bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); - memset(sc->rx_done.entry, 0, bytes); - cmd.data0 = (uint32_t)bytes; - status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); - cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr); - cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr); - status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); + for (slice = 0; slice < sc->num_slices; slice++) { + rx_done = &sc->ss[slice].rx_done; + memset(rx_done->entry, 0, sc->rx_ring_size); + cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); + cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); + cmd.data2 = slice; + status |= mxge_send_cmd(sc, + MXGEFW_CMD_SET_INTRQ_DMA, + &cmd); + } } status |= mxge_send_cmd(sc, @@ -1187,7 +1253,7 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup) sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); - sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); + irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, @@ -1205,23 +1271,30 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup) /* run a DMA benchmark */ (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); - /* reset mcp/driver shared state back to 0 */ - sc->rx_done.idx = 0; - sc->rx_done.cnt = 0; - sc->tx.req = 0; - sc->tx.done = 0; - sc->tx.pkt_done = 0; - sc->tx.wake = 0; - sc->tx_defrag = 0; - sc->tx.stall = 0; - sc->rx_big.cnt = 0; - sc->rx_small.cnt = 0; + for (slice = 0; slice < sc->num_slices; slice++) { + ss = &sc->ss[slice]; + + ss->irq_claim = irq_claim + (2 * slice); + /* reset mcp/driver shared state back to 0 */ + ss->rx_done.idx = 0; + ss->rx_done.cnt = 0; + ss->tx.req = 0; + ss->tx.done = 0; + ss->tx.pkt_done = 0; + ss->tx.wake = 0; + ss->tx.defrag = 0; + ss->tx.stall = 0; + ss->rx_big.cnt = 0; + ss->rx_small.cnt = 0; + ss->lro_bad_csum = 0; + ss->lro_queued = 0; + ss->lro_flushed = 0; + if (ss->fw_stats != NULL) { + ss->fw_stats->valid = 0; + ss->fw_stats->send_done_count = 0; + } + } sc->rdma_tags_available = 15; - sc->fw_stats->valid = 0; - sc->fw_stats->send_done_count = 0; - sc->lro_bad_csum = 0; - sc->lro_queued = 0; - sc->lro_flushed = 0; status = mxge_update_mac_address(sc); mxge_change_promisc(sc, 0); mxge_change_pause(sc, sc->pause); @@ -1340,15 +1413,38 @@ mxge_handle_be32(SYSCTL_HANDLER_ARGS) } static void +mxge_rem_sysctls(mxge_softc_t *sc) +{ + struct mxge_slice_state *ss; + int slice; + + if (sc->slice_sysctl_tree == NULL) + return; + + for (slice = 0; slice < sc->num_slices; slice++) { + ss = &sc->ss[slice]; + if (ss == NULL || ss->sysctl_tree == NULL) + continue; + sysctl_ctx_free(&ss->sysctl_ctx); + ss->sysctl_tree = NULL; + } + sysctl_ctx_free(&sc->slice_sysctl_ctx); + sc->slice_sysctl_tree = NULL; +} + +static void mxge_add_sysctls(mxge_softc_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; mcp_irq_data_t *fw; + struct mxge_slice_state *ss; + int slice; + char slice_num[8]; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); - fw = sc->fw_stats; + fw = sc->ss[0].fw_stats; /* random information */ SYSCTL_ADD_STRING(ctx, children, OID_AUTO, @@ -1369,7 +1465,7 @@ mxge_add_sysctls(mxge_softc_t *sc) 0, "tx_boundary"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", - CTLFLAG_RD, &sc->tx.boundary, + CTLFLAG_RD, &sc->tx_boundary, 0, "tx_boundary"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", @@ -1482,40 +1578,6 @@ mxge_add_sysctls(mxge_softc_t *sc) 0, mxge_handle_be32, "I", "dropped_unicast_filtered"); - /* host counters exported for debugging */ - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "rx_small_cnt", - CTLFLAG_RD, &sc->rx_small.cnt, - 0, "rx_small_cnt"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "rx_big_cnt", - CTLFLAG_RD, &sc->rx_big.cnt, - 0, "rx_small_cnt"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "tx_req", - CTLFLAG_RD, &sc->tx.req, - 0, "tx_req"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "tx_done", - CTLFLAG_RD, &sc->tx.done, - 0, "tx_done"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "tx_pkt_done", - CTLFLAG_RD, &sc->tx.pkt_done, - 0, "tx_done"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "tx_stall", - CTLFLAG_RD, &sc->tx.stall, - 0, "tx_stall"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "tx_wake", - CTLFLAG_RD, &sc->tx.wake, - 0, "tx_wake"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "tx_defrag", - CTLFLAG_RD, &sc->tx_defrag, - 0, "tx_defrag"); - /* verbose printing? */ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "verbose", @@ -1529,21 +1591,76 @@ mxge_add_sysctls(mxge_softc_t *sc) 0, mxge_change_lro, "I", "number of lro merge queues"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "lro_flushed", CTLFLAG_RD, &sc->lro_flushed, - 0, "number of lro merge queues flushed"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "lro_queued", CTLFLAG_RD, &sc->lro_queued, - 0, "number of frames appended to lro merge queues"); + /* add counters exported for debugging from all slices */ + sysctl_ctx_init(&sc->slice_sysctl_ctx); + sc->slice_sysctl_tree = + SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, + "slice", CTLFLAG_RD, 0, ""); + + for (slice = 0; slice < sc->num_slices; slice++) { + ss = &sc->ss[slice]; + sysctl_ctx_init(&ss->sysctl_ctx); + ctx = &ss->sysctl_ctx; + children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); + sprintf(slice_num, "%d", slice); + ss->sysctl_tree = + SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, + CTLFLAG_RD, 0, ""); + children = SYSCTL_CHILDREN(ss->sysctl_tree); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "rx_small_cnt", + CTLFLAG_RD, &ss->rx_small.cnt, + 0, "rx_small_cnt"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "rx_big_cnt", + CTLFLAG_RD, &ss->rx_big.cnt, + 0, "rx_small_cnt"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "tx_req", + CTLFLAG_RD, &ss->tx.req, + 0, "tx_req"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, + 0, "number of lro merge queues flushed"); + + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "lro_queued", CTLFLAG_RD, &ss->lro_queued, + 0, "number of frames appended to lro merge" + "queues"); + + /* only transmit from slice 0 for now */ + if (slice > 0) + continue; + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "tx_done", + CTLFLAG_RD, &ss->tx.done, + 0, "tx_done"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "tx_pkt_done", + CTLFLAG_RD, &ss->tx.pkt_done, + 0, "tx_done"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "tx_stall", + CTLFLAG_RD, &ss->tx.stall, + 0, "tx_stall"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "tx_wake", + CTLFLAG_RD, &ss->tx.wake, + 0, "tx_wake"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "tx_defrag", + CTLFLAG_RD, &ss->tx.defrag, + 0, "tx_defrag"); + } } /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy backwards one at a time and handle ring wraps */ static inline void -mxge_submit_req_backwards(mxge_tx_buf_t *tx, +mxge_submit_req_backwards(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) { int idx, starting_slot; @@ -1565,7 +1682,7 @@ mxge_submit_req_backwards(mxge_tx_buf_t *tx, */ static inline void -mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, +mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) { int idx, i; @@ -1614,10 +1731,10 @@ mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, } static void -mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt, - int ip_off) +mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, + int busdma_seg_cnt, int ip_off) { - mxge_tx_buf_t *tx; + mxge_tx_ring_t *tx; mcp_kreq_ether_send_t *req; bus_dma_segment_t *seg; struct ip *ip; @@ -1628,7 +1745,7 @@ mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt, uint16_t pseudo_hdr_offset, cksum_offset, mss; uint8_t flags, flags_next; static int once; - + mss = m->m_pkthdr.tso_segsz; /* negative cum_len signifies to the @@ -1641,15 +1758,15 @@ mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt, it to a scratch buffer if not */ if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { m_copydata(m, 0, ip_off + sizeof (*ip), - sc->scratch); - ip = (struct ip *)(sc->scratch + ip_off); + ss->scratch); + ip = (struct ip *)(ss->scratch + ip_off); } else { ip = (struct ip *)(mtod(m, char *) + ip_off); } if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) + sizeof (*tcp))) { m_copydata(m, 0, ip_off + (ip->ip_hl << 2) - + sizeof (*tcp), sc->scratch); + + sizeof (*tcp), ss->scratch); ip = (struct ip *)(mtod(m, char *) + ip_off); } @@ -1666,7 +1783,7 @@ mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt, * the checksum by parsing the header. */ pseudo_hdr_offset = htobe16(mss); - tx = &sc->tx; + tx = &ss->tx; req = tx->req_list; seg = tx->seg_list; cnt = 0; @@ -1761,7 +1878,7 @@ mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt, drop: bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); m_freem(m); - sc->ifp->if_oerrors++; + ss->sc->ifp->if_oerrors++; if (!once) { printf("tx->max_desc exceeded via TSO!\n"); printf("mss = %d, %ld, %d!\n", mss, @@ -1805,22 +1922,23 @@ mxge_vlan_tag_insert(struct mbuf *m) } static void -mxge_encap(mxge_softc_t *sc, struct mbuf *m) +mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) { + mxge_softc_t *sc; mcp_kreq_ether_send_t *req; bus_dma_segment_t *seg; struct mbuf *m_tmp; struct ifnet *ifp; - mxge_tx_buf_t *tx; + mxge_tx_ring_t *tx; struct ip *ip; int cnt, cum_len, err, i, idx, odd_flag, ip_off; uint16_t pseudo_hdr_offset; uint8_t flags, cksum_offset; - + sc = ss->sc; ifp = sc->ifp; - tx = &sc->tx; + tx = &ss->tx; ip_off = sizeof (struct ether_header); if (m->m_flags & M_VLANTAG) { @@ -1842,7 +1960,7 @@ mxge_encap(mxge_softc_t *sc, struct mbuf *m) if (m_tmp == NULL) { goto drop; } - sc->tx_defrag++; + ss->tx.defrag++; m = m_tmp; err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, @@ -1861,7 +1979,7 @@ mxge_encap(mxge_softc_t *sc, struct mbuf *m) /* TSO is different enough, we handle it in another routine */ if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { - mxge_encap_tso(sc, m, cnt, ip_off); + mxge_encap_tso(ss, m, cnt, ip_off); return; } @@ -1876,8 +1994,8 @@ mxge_encap(mxge_softc_t *sc, struct mbuf *m) it to a scratch buffer if not */ if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { m_copydata(m, 0, ip_off + sizeof (*ip), - sc->scratch); - ip = (struct ip *)(sc->scratch + ip_off); + ss->scratch); + ip = (struct ip *)(ss->scratch + ip_off); } else { ip = (struct ip *)(mtod(m, char *) + ip_off); } @@ -1963,14 +2081,16 @@ drop: static inline void -mxge_start_locked(mxge_softc_t *sc) +mxge_start_locked(struct mxge_slice_state *ss) { + mxge_softc_t *sc; struct mbuf *m; struct ifnet *ifp; - mxge_tx_buf_t *tx; + mxge_tx_ring_t *tx; + sc = ss->sc; ifp = sc->ifp; - tx = &sc->tx; + tx = &ss->tx; while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) { @@ -1980,7 +2100,7 @@ mxge_start_locked(mxge_softc_t *sc) BPF_MTAP(ifp, m); /* give it to the nic */ - mxge_encap(sc, m); + mxge_encap(ss, m); } /* ran out of transmit slots */ if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { @@ -1993,11 +2113,13 @@ static void mxge_start(struct ifnet *ifp) { mxge_softc_t *sc = ifp->if_softc; + struct mxge_slice_state *ss; - - mtx_lock(&sc->tx_mtx); - mxge_start_locked(sc); - mtx_unlock(&sc->tx_mtx); + /* only use the first slice for now */ + ss = &sc->ss[0]; + mtx_lock(&ss->tx.mtx); + mxge_start_locked(ss); + mtx_unlock(&ss->tx.mtx); } /* @@ -2025,11 +2147,11 @@ mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, } static int -mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx) +mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) { bus_dma_segment_t seg; struct mbuf *m; - mxge_rx_buf_t *rx = &sc->rx_small; + mxge_rx_ring_t *rx = &ss->rx_small; int cnt, err; m = m_gethdr(M_DONTWAIT, MT_DATA); @@ -2058,11 +2180,11 @@ done: } static int -mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx) +mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) { bus_dma_segment_t seg[3]; struct mbuf *m; - mxge_rx_buf_t *rx = &sc->rx_big; + mxge_rx_ring_t *rx = &ss->rx_big; int cnt, err, i; if (rx->cl_size == MCLBYTES) @@ -2180,24 +2302,26 @@ mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) static inline void -mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum) +mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) { + mxge_softc_t *sc; struct ifnet *ifp; struct mbuf *m; struct ether_header *eh; - mxge_rx_buf_t *rx; + mxge_rx_ring_t *rx; bus_dmamap_t old_map; int idx; uint16_t tcpudp_csum; + sc = ss->sc; ifp = sc->ifp; - rx = &sc->rx_big; + rx = &ss->rx_big; idx = rx->cnt & rx->mask; rx->cnt += rx->nbufs; /* save a pointer to the received mbuf */ m = rx->info[idx].m; /* try to replace the received mbuf */ - if (mxge_get_buf_big(sc, rx->extra_map, idx)) { + if (mxge_get_buf_big(ss, rx->extra_map, idx)) { /* drop the frame -- the old mbuf is re-cycled */ ifp->if_ierrors++; return; @@ -2218,14 +2342,14 @@ mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum) m->m_pkthdr.rcvif = ifp; m->m_len = m->m_pkthdr.len = len; - ifp->if_ipackets++; + ss->ipackets++; eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { mxge_vlan_tag_remove(m, &csum); } /* if the checksum is valid, mark it in the mbuf header */ if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { - if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) + if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) return; /* otherwise, it was a UDP frame, or a TCP frame which we could not do LRO on. Tell the stack that the @@ -2238,24 +2362,26 @@ mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum) } static inline void -mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) +mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) { + mxge_softc_t *sc; struct ifnet *ifp; struct ether_header *eh; struct mbuf *m; - mxge_rx_buf_t *rx; + mxge_rx_ring_t *rx; bus_dmamap_t old_map; int idx; uint16_t tcpudp_csum; + sc = ss->sc; ifp = sc->ifp; - rx = &sc->rx_small; + rx = &ss->rx_small; idx = rx->cnt & rx->mask; rx->cnt++; /* save a pointer to the received mbuf */ m = rx->info[idx].m; /* try to replace the received mbuf */ - if (mxge_get_buf_small(sc, rx->extra_map, idx)) { + if (mxge_get_buf_small(ss, rx->extra_map, idx)) { /* drop the frame -- the old mbuf is re-cycled */ ifp->if_ierrors++; return; @@ -2276,14 +2402,14 @@ mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) m->m_pkthdr.rcvif = ifp; m->m_len = m->m_pkthdr.len = len; - ifp->if_ipackets++; + ss->ipackets++; eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { mxge_vlan_tag_remove(m, &csum); } /* if the checksum is valid, mark it in the mbuf header */ if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { - if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) + if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) return; /* otherwise, it was a UDP frame, or a TCP frame which we could not do LRO on. Tell the stack that the @@ -2291,15 +2417,14 @@ mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; } - /* pass the frame up the stack */ (*ifp->if_input)(ifp, m); } static inline void -mxge_clean_rx_done(mxge_softc_t *sc) +mxge_clean_rx_done(struct mxge_slice_state *ss) { - mxge_rx_done_t *rx_done = &sc->rx_done; + mxge_rx_done_t *rx_done = &ss->rx_done; struct lro_entry *lro; int limit = 0; uint16_t length; @@ -2311,9 +2436,9 @@ mxge_clean_rx_done(mxge_softc_t *sc) rx_done->entry[rx_done->idx].length = 0; checksum = rx_done->entry[rx_done->idx].checksum; if (length <= (MHLEN - MXGEFW_PAD)) - mxge_rx_done_small(sc, length, checksum); + mxge_rx_done_small(ss, length, checksum); else - mxge_rx_done_big(sc, length, checksum); + mxge_rx_done_big(ss, length, checksum); rx_done->cnt++; rx_done->idx = rx_done->cnt & rx_done->mask; @@ -2321,25 +2446,25 @@ mxge_clean_rx_done(mxge_softc_t *sc) if (__predict_false(++limit > rx_done->mask / 2)) break; } - while(!SLIST_EMPTY(&sc->lro_active)) { - lro = SLIST_FIRST(&sc->lro_active); - SLIST_REMOVE_HEAD(&sc->lro_active, next); - mxge_lro_flush(sc, lro); + while (!SLIST_EMPTY(&ss->lro_active)) { + lro = SLIST_FIRST(&ss->lro_active); + SLIST_REMOVE_HEAD(&ss->lro_active, next); + mxge_lro_flush(ss, lro); } } static inline void -mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx) +mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) { struct ifnet *ifp; - mxge_tx_buf_t *tx; + mxge_tx_ring_t *tx; struct mbuf *m; bus_dmamap_t map; int idx; - tx = &sc->tx; - ifp = sc->ifp; + tx = &ss->tx; + ifp = ss->sc->ifp; while (tx->pkt_done != mcp_idx) { idx = tx->done & tx->mask; tx->done++; @@ -2364,11 +2489,11 @@ mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx) if (ifp->if_drv_flags & IFF_DRV_OACTIVE && tx->req - tx->done < (tx->mask + 1)/4) { - mtx_lock(&sc->tx_mtx); + mtx_lock(&ss->tx.mtx); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - sc->tx.wake++; - mxge_start_locked(sc); - mtx_unlock(&sc->tx_mtx); + ss->tx.wake++; + mxge_start_locked(ss); + mtx_unlock(&ss->tx.mtx); } } @@ -2511,14 +2636,23 @@ mxge_media_probe(mxge_softc_t *sc) static void mxge_intr(void *arg) { - mxge_softc_t *sc = arg; - mcp_irq_data_t *stats = sc->fw_stats; - mxge_tx_buf_t *tx = &sc->tx; - mxge_rx_done_t *rx_done = &sc->rx_done; + struct mxge_slice_state *ss = arg; + mxge_softc_t *sc = ss->sc; + mcp_irq_data_t *stats = ss->fw_stats; + mxge_tx_ring_t *tx = &ss->tx; + mxge_rx_done_t *rx_done = &ss->rx_done; uint32_t send_done_count; uint8_t valid; + /* an interrupt on a non-zero slice is implicitly valid + since MSI-X irqs are not shared */ + if (ss != sc->ss) { + mxge_clean_rx_done(ss); + *ss->irq_claim = be32toh(3); + return; + } + /* make sure the DMA has finished */ if (!stats->valid) { return; @@ -2541,8 +2675,8 @@ mxge_intr(void *arg) send_done_count = be32toh(stats->send_done_count); while ((send_done_count != tx->pkt_done) || (rx_done->entry[rx_done->idx].length != 0)) { - mxge_tx_done(sc, (int)send_done_count); - mxge_clean_rx_done(sc); + mxge_tx_done(ss, (int)send_done_count); + mxge_clean_rx_done(ss); send_done_count = be32toh(stats->send_done_count); } } while (*((volatile uint8_t *) &stats->valid)); @@ -2562,9 +2696,9 @@ mxge_intr(void *arg) sc->need_media_probe = 1; } if (sc->rdma_tags_available != - be32toh(sc->fw_stats->rdma_tags_available)) { + be32toh(stats->rdma_tags_available)) { sc->rdma_tags_available = - be32toh(sc->fw_stats->rdma_tags_available); + be32toh(stats->rdma_tags_available); device_printf(sc->dev, "RDMA timed out! %d tags " "left\n", sc->rdma_tags_available); } @@ -2578,8 +2712,8 @@ mxge_intr(void *arg) /* check to see if we have rx token to pass back */ if (valid & 0x1) - *sc->irq_claim = be32toh(3); - *(sc->irq_claim + 1) = be32toh(3); + *ss->irq_claim = be32toh(3); + *(ss->irq_claim + 1) = be32toh(3); } static void @@ -2590,196 +2724,173 @@ mxge_init(void *arg) static void -mxge_free_mbufs(mxge_softc_t *sc) +mxge_free_slice_mbufs(struct mxge_slice_state *ss) { + struct lro_entry *lro_entry; int i; - for (i = 0; i <= sc->rx_big.mask; i++) { - if (sc->rx_big.info[i].m == NULL) + while (!SLIST_EMPTY(&ss->lro_free)) { + lro_entry = SLIST_FIRST(&ss->lro_free); + SLIST_REMOVE_HEAD(&ss->lro_free, next); + free(lro_entry, M_DEVBUF); + } + + for (i = 0; i <= ss->rx_big.mask; i++) { + if (ss->rx_big.info[i].m == NULL) continue; - bus_dmamap_unload(sc->rx_big.dmat, - sc->rx_big.info[i].map); - m_freem(sc->rx_big.info[i].m); - sc->rx_big.info[i].m = NULL; + bus_dmamap_unload(ss->rx_big.dmat, + ss->rx_big.info[i].map); + m_freem(ss->rx_big.info[i].m); + ss->rx_big.info[i].m = NULL; } - for (i = 0; i <= sc->rx_small.mask; i++) { - if (sc->rx_small.info[i].m == NULL) + for (i = 0; i <= ss->rx_small.mask; i++) { + if (ss->rx_small.info[i].m == NULL) continue; - bus_dmamap_unload(sc->rx_small.dmat, - sc->rx_small.info[i].map); - m_freem(sc->rx_small.info[i].m); - sc->rx_small.info[i].m = NULL; + bus_dmamap_unload(ss->rx_small.dmat, + ss->rx_small.info[i].map); + m_freem(ss->rx_small.info[i].m); + ss->rx_small.info[i].m = NULL; } - for (i = 0; i <= sc->tx.mask; i++) { - sc->tx.info[i].flag = 0; - if (sc->tx.info[i].m == NULL) + /* transmit ring used only on the first slice */ + if (ss->tx.info == NULL) + return; + + for (i = 0; i <= ss->tx.mask; i++) { + ss->tx.info[i].flag = 0; + if (ss->tx.info[i].m == NULL) continue; - bus_dmamap_unload(sc->tx.dmat, - sc->tx.info[i].map); - m_freem(sc->tx.info[i].m); - sc->tx.info[i].m = NULL; + bus_dmamap_unload(ss->tx.dmat, + ss->tx.info[i].map); + m_freem(ss->tx.info[i].m); + ss->tx.info[i].m = NULL; } } static void -mxge_free_rings(mxge_softc_t *sc) +mxge_free_mbufs(mxge_softc_t *sc) +{ + int slice; + + for (slice = 0; slice < sc->num_slices; slice++) + mxge_free_slice_mbufs(&sc->ss[slice]); +} + +static void +mxge_free_slice_rings(struct mxge_slice_state *ss) { int i; - if (sc->rx_done.entry != NULL) - mxge_dma_free(&sc->rx_done.dma); - sc->rx_done.entry = NULL; - if (sc->tx.req_bytes != NULL) - free(sc->tx.req_bytes, M_DEVBUF); - if (sc->tx.seg_list != NULL) - free(sc->tx.seg_list, M_DEVBUF); - if (sc->rx_small.shadow != NULL) - free(sc->rx_small.shadow, M_DEVBUF); - if (sc->rx_big.shadow != NULL) - free(sc->rx_big.shadow, M_DEVBUF); - if (sc->tx.info != NULL) { - if (sc->tx.dmat != NULL) { - for (i = 0; i <= sc->tx.mask; i++) { - bus_dmamap_destroy(sc->tx.dmat, - sc->tx.info[i].map); + + if (ss->rx_done.entry != NULL) + mxge_dma_free(&ss->rx_done.dma); + ss->rx_done.entry = NULL; + + if (ss->tx.req_bytes != NULL) + free(ss->tx.req_bytes, M_DEVBUF); + ss->tx.req_bytes = NULL; + + if (ss->tx.seg_list != NULL) + free(ss->tx.seg_list, M_DEVBUF); + ss->tx.seg_list = NULL; + + if (ss->rx_small.shadow != NULL) + free(ss->rx_small.shadow, M_DEVBUF); + ss->rx_small.shadow = NULL; + + if (ss->rx_big.shadow != NULL) + free(ss->rx_big.shadow, M_DEVBUF); + ss->rx_big.shadow = NULL; + + if (ss->tx.info != NULL) { + if (ss->tx.dmat != NULL) { + for (i = 0; i <= ss->tx.mask; i++) { + bus_dmamap_destroy(ss->tx.dmat, + ss->tx.info[i].map); } - bus_dma_tag_destroy(sc->tx.dmat); + bus_dma_tag_destroy(ss->tx.dmat); } - free(sc->tx.info, M_DEVBUF); + free(ss->tx.info, M_DEVBUF); } - if (sc->rx_small.info != NULL) { - if (sc->rx_small.dmat != NULL) { - for (i = 0; i <= sc->rx_small.mask; i++) { - bus_dmamap_destroy(sc->rx_small.dmat, - sc->rx_small.info[i].map); + ss->tx.info = NULL; + + if (ss->rx_small.info != NULL) { + if (ss->rx_small.dmat != NULL) { + for (i = 0; i <= ss->rx_small.mask; i++) { + bus_dmamap_destroy(ss->rx_small.dmat, + ss->rx_small.info[i].map); } - bus_dmamap_destroy(sc->rx_small.dmat, - sc->rx_small.extra_map); - bus_dma_tag_destroy(sc->rx_small.dmat); + bus_dmamap_destroy(ss->rx_small.dmat, + ss->rx_small.extra_map); + bus_dma_tag_destroy(ss->rx_small.dmat); } - free(sc->rx_small.info, M_DEVBUF); + free(ss->rx_small.info, M_DEVBUF); } - if (sc->rx_big.info != NULL) { - if (sc->rx_big.dmat != NULL) { - for (i = 0; i <= sc->rx_big.mask; i++) { - bus_dmamap_destroy(sc->rx_big.dmat, - sc->rx_big.info[i].map); + ss->rx_small.info = NULL; + + if (ss->rx_big.info != NULL) { + if (ss->rx_big.dmat != NULL) { + for (i = 0; i <= ss->rx_big.mask; i++) { + bus_dmamap_destroy(ss->rx_big.dmat, + ss->rx_big.info[i].map); } - bus_dmamap_destroy(sc->rx_big.dmat, - sc->rx_big.extra_map); - bus_dma_tag_destroy(sc->rx_big.dmat); + bus_dmamap_destroy(ss->rx_big.dmat, + ss->rx_big.extra_map); + bus_dma_tag_destroy(ss->rx_big.dmat); } - free(sc->rx_big.info, M_DEVBUF); + free(ss->rx_big.info, M_DEVBUF); } + ss->rx_big.info = NULL; } -static int -mxge_alloc_rings(mxge_softc_t *sc) +static void +mxge_free_rings(mxge_softc_t *sc) { - mxge_cmd_t cmd; - int tx_ring_size, rx_ring_size; - int tx_ring_entries, rx_ring_entries; - int i, err; - unsigned long bytes; - - /* get ring sizes */ - err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); - tx_ring_size = cmd.data0; - err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); - if (err != 0) { - device_printf(sc->dev, "Cannot determine ring sizes\n"); - goto abort_with_nothing; - } + int slice; - rx_ring_size = cmd.data0; - - tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); - rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t); - IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); - sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; - IFQ_SET_READY(&sc->ifp->if_snd); + for (slice = 0; slice < sc->num_slices; slice++) + mxge_free_slice_rings(&sc->ss[slice]); +} - sc->tx.mask = tx_ring_entries - 1; - sc->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); - sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1; - sc->rx_done.mask = (2 * rx_ring_entries) - 1; +static int +mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, + int tx_ring_entries) +{ + mxge_softc_t *sc = ss->sc; + size_t bytes; + int err, i; err = ENOMEM; - /* allocate interrupt queues */ - bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); - err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096); - if (err != 0) - goto abort_with_nothing; - sc->rx_done.entry = sc->rx_done.dma.addr; - bzero(sc->rx_done.entry, bytes); + /* allocate per-slice receive resources */ - /* allocate the tx request copy block */ - bytes = 8 + - sizeof (*sc->tx.req_list) * (sc->tx.max_desc + 4); - sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); - if (sc->tx.req_bytes == NULL) - goto abort_with_alloc; - /* ensure req_list entries are aligned to 8 bytes */ - sc->tx.req_list = (mcp_kreq_ether_send_t *) - ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL); - - /* allocate the tx busdma segment list */ - bytes = sizeof (*sc->tx.seg_list) * sc->tx.max_desc; - sc->tx.seg_list = (bus_dma_segment_t *) - malloc(bytes, M_DEVBUF, M_WAITOK); - if (sc->tx.seg_list == NULL) - goto abort_with_alloc; + ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; + ss->rx_done.mask = (2 * rx_ring_entries) - 1; /* allocate the rx shadow rings */ - bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow); - sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); - if (sc->rx_small.shadow == NULL) - goto abort_with_alloc; - - bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow); - sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); - if (sc->rx_big.shadow == NULL) - goto abort_with_alloc; - - /* allocate the host info rings */ - bytes = tx_ring_entries * sizeof (*sc->tx.info); - sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); - if (sc->tx.info == NULL) - goto abort_with_alloc; - - bytes = rx_ring_entries * sizeof (*sc->rx_small.info); - sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); - if (sc->rx_small.info == NULL) - goto abort_with_alloc; - - bytes = rx_ring_entries * sizeof (*sc->rx_big.info); - sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); - if (sc->rx_big.info == NULL) - goto abort_with_alloc; - - /* allocate the busdma resources */ - err = bus_dma_tag_create(sc->parent_dmat, /* parent */ - 1, /* alignment */ - sc->tx.boundary, /* boundary */ - BUS_SPACE_MAXADDR, /* low */ - BUS_SPACE_MAXADDR, /* high */ - NULL, NULL, /* filter */ - 65536 + 256, /* maxsize */ - sc->tx.max_desc - 2, /* num segs */ - sc->tx.boundary, /* maxsegsize */ - BUS_DMA_ALLOCNOW, /* flags */ - NULL, NULL, /* lock */ - &sc->tx.dmat); /* tag */ - - if (err != 0) { - device_printf(sc->dev, "Err %d allocating tx dmat\n", - err); - goto abort_with_alloc; - } - + bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); + ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); + if (ss->rx_small.shadow == NULL) + return err;; + + bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); + ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); + if (ss->rx_big.shadow == NULL) + return err;; + + /* allocate the rx host info rings */ + bytes = rx_ring_entries * sizeof (*ss->rx_small.info); + ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); + if (ss->rx_small.info == NULL) + return err;; + + bytes = rx_ring_entries * sizeof (*ss->rx_big.info); + ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); + if (ss->rx_big.info == NULL) + return err;; + + /* allocate the rx busdma resources */ err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 1, /* alignment */ 4096, /* boundary */ @@ -2791,11 +2902,11 @@ mxge_alloc_rings(mxge_softc_t *sc) MHLEN, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lock */ - &sc->rx_small.dmat); /* tag */ + &ss->rx_small.dmat); /* tag */ if (err != 0) { device_printf(sc->dev, "Err %d allocating rx_small dmat\n", err); - goto abort_with_alloc; + return err;; } err = bus_dma_tag_create(sc->parent_dmat, /* parent */ @@ -2809,66 +2920,152 @@ mxge_alloc_rings(mxge_softc_t *sc) 4096, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lock */ - &sc->rx_big.dmat); /* tag */ + &ss->rx_big.dmat); /* tag */ if (err != 0) { device_printf(sc->dev, "Err %d allocating rx_big dmat\n", err); - goto abort_with_alloc; - } - - /* now use these tags to setup dmamaps for each slot - in each ring */ - for (i = 0; i <= sc->tx.mask; i++) { - err = bus_dmamap_create(sc->tx.dmat, 0, - &sc->tx.info[i].map); - if (err != 0) { - device_printf(sc->dev, "Err %d tx dmamap\n", - err); - goto abort_with_alloc; - } + return err;; } - for (i = 0; i <= sc->rx_small.mask; i++) { - err = bus_dmamap_create(sc->rx_small.dmat, 0, - &sc->rx_small.info[i].map); + for (i = 0; i <= ss->rx_small.mask; i++) { + err = bus_dmamap_create(ss->rx_small.dmat, 0, + &ss->rx_small.info[i].map); if (err != 0) { device_printf(sc->dev, "Err %d rx_small dmamap\n", err); - goto abort_with_alloc; + return err;; } } - err = bus_dmamap_create(sc->rx_small.dmat, 0, - &sc->rx_small.extra_map); + err = bus_dmamap_create(ss->rx_small.dmat, 0, + &ss->rx_small.extra_map); if (err != 0) { device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); - goto abort_with_alloc; + return err;; } - for (i = 0; i <= sc->rx_big.mask; i++) { - err = bus_dmamap_create(sc->rx_big.dmat, 0, - &sc->rx_big.info[i].map); + for (i = 0; i <= ss->rx_big.mask; i++) { + err = bus_dmamap_create(ss->rx_big.dmat, 0, + &ss->rx_big.info[i].map); if (err != 0) { device_printf(sc->dev, "Err %d rx_big dmamap\n", - err); - goto abort_with_alloc; + err); + return err;; } } - err = bus_dmamap_create(sc->rx_big.dmat, 0, - &sc->rx_big.extra_map); + err = bus_dmamap_create(ss->rx_big.dmat, 0, + &ss->rx_big.extra_map); if (err != 0) { device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); - goto abort_with_alloc; + return err;; + } + + /* now allocate TX resouces */ + + /* only use a single TX ring for now */ + if (ss != ss->sc->ss) + return 0; + + ss->tx.mask = tx_ring_entries - 1; + ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); + + + /* allocate the tx request copy block */ + bytes = 8 + + sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); + ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); + if (ss->tx.req_bytes == NULL) + return err;; + /* ensure req_list entries are aligned to 8 bytes */ + ss->tx.req_list = (mcp_kreq_ether_send_t *) + ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); + + /* allocate the tx busdma segment list */ + bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; + ss->tx.seg_list = (bus_dma_segment_t *) + malloc(bytes, M_DEVBUF, M_WAITOK); + if (ss->tx.seg_list == NULL) + return err;; + + /* allocate the tx host info ring */ + bytes = tx_ring_entries * sizeof (*ss->tx.info); + ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); + if (ss->tx.info == NULL) + return err;; + + /* allocate the tx busdma resources */ + err = bus_dma_tag_create(sc->parent_dmat, /* parent */ + 1, /* alignment */ + sc->tx_boundary, /* boundary */ + BUS_SPACE_MAXADDR, /* low */ + BUS_SPACE_MAXADDR, /* high */ + NULL, NULL, /* filter */ + 65536 + 256, /* maxsize */ + ss->tx.max_desc - 2, /* num segs */ + sc->tx_boundary, /* maxsegsz */ + BUS_DMA_ALLOCNOW, /* flags */ + NULL, NULL, /* lock */ + &ss->tx.dmat); /* tag */ + + if (err != 0) { + device_printf(sc->dev, "Err %d allocating tx dmat\n", + err); + return err;; + } + + /* now use these tags to setup dmamaps for each slot + in the ring */ + for (i = 0; i <= ss->tx.mask; i++) { + err = bus_dmamap_create(ss->tx.dmat, 0, + &ss->tx.info[i].map); + if (err != 0) { + device_printf(sc->dev, "Err %d tx dmamap\n", + err); + return err;; + } } return 0; -abort_with_alloc: - mxge_free_rings(sc); +} -abort_with_nothing: +static int +mxge_alloc_rings(mxge_softc_t *sc) +{ + mxge_cmd_t cmd; + int tx_ring_size; + int tx_ring_entries, rx_ring_entries; + int err, slice; + + /* get ring sizes */ + err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); + tx_ring_size = cmd.data0; + if (err != 0) { + device_printf(sc->dev, "Cannot determine tx ring sizes\n"); + goto abort; + } + + tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); + rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); + IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); + sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; + IFQ_SET_READY(&sc->ifp->if_snd); + + for (slice = 0; slice < sc->num_slices; slice++) { + err = mxge_alloc_slice_rings(&sc->ss[slice], + rx_ring_entries, + tx_ring_entries); + if (err != 0) + goto abort; + } + return 0; + +abort: + mxge_free_rings(sc); return err; + } + static void mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) { @@ -2898,62 +3095,50 @@ mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) *nbufs = 4; } -static int -mxge_open(mxge_softc_t *sc) +static int +mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) { + mxge_softc_t *sc; mxge_cmd_t cmd; - int i, err, big_bytes; bus_dmamap_t map; - bus_addr_t bus; struct lro_entry *lro_entry; + int err, i, slice; - SLIST_INIT(&sc->lro_free); - SLIST_INIT(&sc->lro_active); + + sc = ss->sc; + slice = ss - sc->ss; + + SLIST_INIT(&ss->lro_free); + SLIST_INIT(&ss->lro_active); for (i = 0; i < sc->lro_cnt; i++) { lro_entry = (struct lro_entry *) - malloc(sizeof (*lro_entry), M_DEVBUF, M_NOWAIT | M_ZERO); + malloc(sizeof (*lro_entry), M_DEVBUF, + M_NOWAIT | M_ZERO); if (lro_entry == NULL) { sc->lro_cnt = i; break; } - SLIST_INSERT_HEAD(&sc->lro_free, lro_entry, next); - } - - /* Copy the MAC address in case it was overridden */ - bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); - - err = mxge_reset(sc, 1); - if (err != 0) { - device_printf(sc->dev, "failed to reset\n"); - return EIO; - } - - mxge_choose_params(sc->ifp->if_mtu, &big_bytes, - &sc->rx_big.cl_size, &sc->rx_big.nbufs); - - cmd.data0 = sc->rx_big.nbufs; - err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, - &cmd); - /* error is only meaningful if we're trying to set - MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ - if (err && sc->rx_big.nbufs > 1) { - device_printf(sc->dev, - "Failed to set alway-use-n to %d\n", - sc->rx_big.nbufs); - return EIO; + SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); } /* get the lanai pointers to the send and receive rings */ - err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); - sc->tx.lanai = - (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); + err = 0; + /* We currently only send from the first slice */ + if (slice == 0) { + cmd.data0 = slice; + err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); + ss->tx.lanai = + (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); + } + cmd.data0 = slice; err |= mxge_send_cmd(sc, - MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); - sc->rx_small.lanai = + MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); + ss->rx_small.lanai = (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); + cmd.data0 = slice; err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); - sc->rx_big.lanai = + ss->rx_big.lanai = (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); if (err != 0) { @@ -2963,29 +3148,92 @@ mxge_open(mxge_softc_t *sc) } /* stock receive rings */ - for (i = 0; i <= sc->rx_small.mask; i++) { - map = sc->rx_small.info[i].map; - err = mxge_get_buf_small(sc, map, i); + for (i = 0; i <= ss->rx_small.mask; i++) { + map = ss->rx_small.info[i].map; + err = mxge_get_buf_small(ss, map, i); if (err) { device_printf(sc->dev, "alloced %d/%d smalls\n", - i, sc->rx_small.mask + 1); - goto abort; + i, ss->rx_small.mask + 1); + return ENOMEM; } } - for (i = 0; i <= sc->rx_big.mask; i++) { - sc->rx_big.shadow[i].addr_low = 0xffffffff; - sc->rx_big.shadow[i].addr_high = 0xffffffff; + for (i = 0; i <= ss->rx_big.mask; i++) { + ss->rx_big.shadow[i].addr_low = 0xffffffff; + ss->rx_big.shadow[i].addr_high = 0xffffffff; } - for (i = 0; i <= sc->rx_big.mask; i += sc->rx_big.nbufs) { - map = sc->rx_big.info[i].map; - err = mxge_get_buf_big(sc, map, i); + ss->rx_big.nbufs = nbufs; + ss->rx_big.cl_size = cl_size; + for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { + map = ss->rx_big.info[i].map; + err = mxge_get_buf_big(ss, map, i); if (err) { device_printf(sc->dev, "alloced %d/%d bigs\n", - i, sc->rx_big.mask + 1); - goto abort; + i, ss->rx_big.mask + 1); + return ENOMEM; } } + return 0; +} + +static int +mxge_open(mxge_softc_t *sc) +{ + mxge_cmd_t cmd; + int err, big_bytes, nbufs, slice, cl_size, i; + bus_addr_t bus; + volatile uint8_t *itable; + + /* Copy the MAC address in case it was overridden */ + bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); + + err = mxge_reset(sc, 1); + if (err != 0) { + device_printf(sc->dev, "failed to reset\n"); + return EIO; + } + + if (sc->num_slices > 1) { + /* setup the indirection table */ + cmd.data0 = sc->num_slices; + err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, + &cmd); + + err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, + &cmd); + if (err != 0) { + device_printf(sc->dev, + "failed to setup rss tables\n"); + return err; + } + /* just enable an identity mapping */ + itable = sc->sram + cmd.data0; + for (i = 0; i < sc->num_slices; i++) + itable[i] = (uint8_t)i; + + cmd.data0 = 1; + cmd.data1 = mxge_rss_hash_type; + err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); + if (err != 0) { + device_printf(sc->dev, "failed to enable slices\n"); + return err; + } + } + + + mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); + + cmd.data0 = nbufs; + err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, + &cmd); + /* error is only meaningful if we're trying to set + MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ + if (err && nbufs > 1) { + device_printf(sc->dev, + "Failed to set alway-use-n to %d\n", + nbufs); + return EIO; + } /* Give the firmware the mtu and the big and small buffer sizes. The firmware wants the big buf size to be a power of two. Luckily, FreeBSD's clusters are powers of two */ @@ -3003,13 +3251,13 @@ mxge_open(mxge_softc_t *sc) } /* Now give him the pointer to the stats block */ - cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr); - cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr); + cmd.data0 = MXGE_LOWPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); + cmd.data1 = MXGE_HIGHPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); cmd.data2 = sizeof(struct mcp_irq_data); err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); if (err != 0) { - bus = sc->fw_stats_dma.bus_addr; + bus = sc->ss->fw_stats_dma.bus_addr; bus += offsetof(struct mcp_irq_data, send_done_count); cmd.data0 = MXGE_LOWPART_TO_U32(bus); cmd.data1 = MXGE_HIGHPART_TO_U32(bus); @@ -3027,6 +3275,15 @@ mxge_open(mxge_softc_t *sc) goto abort; } + for (slice = 0; slice < sc->num_slices; slice++) { + err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); + if (err != 0) { + device_printf(sc->dev, "couldn't open slice %d\n", + slice); + goto abort; + } + } + /* Finally, start the firmware running */ err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); if (err) { @@ -3048,7 +3305,6 @@ abort: static int mxge_close(mxge_softc_t *sc) { - struct lro_entry *lro_entry; mxge_cmd_t cmd; int err, old_down_cnt; @@ -3063,16 +3319,13 @@ mxge_close(mxge_softc_t *sc) /* wait for down irq */ DELAY(10 * sc->intr_coal_delay); } + mb(); if (old_down_cnt == sc->down_cnt) { device_printf(sc->dev, "never got down irq\n"); } mxge_free_mbufs(sc); - while (!SLIST_EMPTY(&sc->lro_free)) { - lro_entry = SLIST_FIRST(&sc->lro_free); - SLIST_REMOVE_HEAD(&sc->lro_free, next); - } return 0; } @@ -3166,10 +3419,10 @@ mxge_watchdog_reset(mxge_softc_t *sc) } else { device_printf(sc->dev, "NIC did not reboot, ring state:\n"); device_printf(sc->dev, "tx.req=%d tx.done=%d\n", - sc->tx.req, sc->tx.done); + sc->ss->tx.req, sc->ss->tx.done); device_printf(sc->dev, "pkt_done=%d fw=%d\n", - sc->tx.pkt_done, - be32toh(sc->fw_stats->send_done_count)); + sc->ss->tx.pkt_done, + be32toh(sc->ss->fw_stats->send_done_count)); } if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { @@ -3190,8 +3443,8 @@ abort: static void mxge_watchdog(mxge_softc_t *sc) { - mxge_tx_buf_t *tx = &sc->tx; - uint32_t rx_pause = be32toh(sc->fw_stats->dropped_pause); + mxge_tx_ring_t *tx = &sc->ss->tx; + uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); /* see if we have outstanding transmits, which have been pending for more than mxge_ticks */ @@ -3215,6 +3468,20 @@ mxge_watchdog(mxge_softc_t *sc) } static void +mxge_update_stats(mxge_softc_t *sc) +{ + struct mxge_slice_state *ss; + u_long ipackets = 0; + int slice; + + for(slice = 0; slice < sc->num_slices; slice++) { + ss = &sc->ss[slice]; + ipackets += ss->ipackets; + } + sc->ifp->if_ipackets = ipackets; + +} +static void mxge_tick(void *arg) { mxge_softc_t *sc = arg; @@ -3227,8 +3494,15 @@ mxge_tick(void *arg) return; } + /* aggregate stats from different slices */ + mxge_update_stats(sc); + callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); - mxge_watchdog(sc); + if (!sc->watchdog_countdown) { + mxge_watchdog(sc); + sc->watchdog_countdown = 4; + } + sc->watchdog_countdown--; } static int @@ -3314,8 +3588,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - mxge_close(sc); callout_stop(&sc->co_hdl); + mxge_close(sc); } } mtx_unlock(&sc->driver_mtx); @@ -3389,7 +3663,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) static void mxge_fetch_tunables(mxge_softc_t *sc) { - + + TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", &mxge_flow_control); TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", @@ -3404,6 +3679,8 @@ mxge_fetch_tunables(mxge_softc_t *sc) &mxge_verbose); TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); + TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); + TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); if (sc->lro_cnt != 0) mxge_lro_cnt = sc->lro_cnt; @@ -3412,30 +3689,420 @@ mxge_fetch_tunables(mxge_softc_t *sc) if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) mxge_intr_coal_delay = 30; if (mxge_ticks == 0) - mxge_ticks = hz; + mxge_ticks = hz / 2; sc->pause = mxge_flow_control; + if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 + || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) { + mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; + } +} + + +static void +mxge_free_slices(mxge_softc_t *sc) +{ + struct mxge_slice_state *ss; + int i; + + + if (sc->ss == NULL) + return; + + for (i = 0; i < sc->num_slices; i++) { + ss = &sc->ss[i]; + if (ss->fw_stats != NULL) { + mxge_dma_free(&ss->fw_stats_dma); + ss->fw_stats = NULL; + mtx_destroy(&ss->tx.mtx); + } + if (ss->rx_done.entry != NULL) { + mxge_dma_free(&ss->rx_done.dma); + ss->rx_done.entry = NULL; + } + } + free(sc->ss, M_DEVBUF); + sc->ss = NULL; +} + +static int +mxge_alloc_slices(mxge_softc_t *sc) +{ + mxge_cmd_t cmd; + struct mxge_slice_state *ss; + size_t bytes; + int err, i, max_intr_slots; + + err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); + if (err != 0) { + device_printf(sc->dev, "Cannot determine rx ring size\n"); + return err; + } + sc->rx_ring_size = cmd.data0; + max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); + + bytes = sizeof (*sc->ss) * sc->num_slices; + sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); + if (sc->ss == NULL) + return (ENOMEM); + for (i = 0; i < sc->num_slices; i++) { + ss = &sc->ss[i]; + + ss->sc = sc; + + /* allocate per-slice rx interrupt queues */ + + bytes = max_intr_slots * sizeof (*ss->rx_done.entry); + err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); + if (err != 0) + goto abort; + ss->rx_done.entry = ss->rx_done.dma.addr; + bzero(ss->rx_done.entry, bytes); + + /* + * allocate the per-slice firmware stats; stats + * (including tx) are used used only on the first + * slice for now + */ + if (i > 0) + continue; + + bytes = sizeof (*ss->fw_stats); + err = mxge_dma_alloc(sc, &ss->fw_stats_dma, + sizeof (*ss->fw_stats), 64); + if (err != 0) + goto abort; + ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; + snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), + "%s:tx(%d)", device_get_nameunit(sc->dev), i); + mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); + } + + return (0); + +abort: + mxge_free_slices(sc); + return (ENOMEM); +} + +static void +mxge_slice_probe(mxge_softc_t *sc) +{ + mxge_cmd_t cmd; + char *old_fw; + int msix_cnt, status, max_intr_slots; + + sc->num_slices = 1; + /* + * don't enable multiple slices if they are not enabled, + * or if this is not an SMP system + */ + + if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) + return; + + /* see how many MSI-X interrupts are available */ + msix_cnt = pci_msix_count(sc->dev); + if (msix_cnt < 2) + return; + + /* now load the slice aware firmware see what it supports */ + old_fw = sc->fw_name; + if (old_fw == mxge_fw_aligned) + sc->fw_name = mxge_fw_rss_aligned; + else + sc->fw_name = mxge_fw_rss_unaligned; + status = mxge_load_firmware(sc, 0); + if (status != 0) { + device_printf(sc->dev, "Falling back to a single slice\n"); + return; + } + + /* try to send a reset command to the card to see if it + is alive */ + memset(&cmd, 0, sizeof (cmd)); + status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); + if (status != 0) { + device_printf(sc->dev, "failed reset\n"); + goto abort_with_fw; + } + + /* get rx ring size */ + status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); + if (status != 0) { + device_printf(sc->dev, "Cannot determine rx ring size\n"); + goto abort_with_fw; + } + max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); + + /* tell it the size of the interrupt queues */ + cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); + status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); + if (status != 0) { + device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); + goto abort_with_fw; + } + + /* ask the maximum number of slices it supports */ + status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); + if (status != 0) { + device_printf(sc->dev, + "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); + goto abort_with_fw; + } + sc->num_slices = cmd.data0; + if (sc->num_slices > msix_cnt) + sc->num_slices = msix_cnt; + + if (mxge_max_slices == -1) { + /* cap to number of CPUs in system */ + if (sc->num_slices > mp_ncpus) + sc->num_slices = mp_ncpus; + } else { + if (sc->num_slices > mxge_max_slices) + sc->num_slices = mxge_max_slices; + } + /* make sure it is a power of two */ + while (sc->num_slices & (sc->num_slices - 1)) + sc->num_slices--; + + if (mxge_verbose) + device_printf(sc->dev, "using %d slices\n", + sc->num_slices); + + return; + +abort_with_fw: + sc->fw_name = old_fw; + (void) mxge_load_firmware(sc, 0); +} + +static int +mxge_add_msix_irqs(mxge_softc_t *sc) +{ + size_t bytes; + int count, err, i, rid; + + rid = PCIR_BAR(2); + sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + + if (sc->msix_table_res == NULL) { + device_printf(sc->dev, "couldn't alloc MSIX table res\n"); + return ENXIO; + } + + count = sc->num_slices; + err = pci_alloc_msix(sc->dev, &count); + if (err != 0) { + device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" + "err = %d \n", sc->num_slices, err); + goto abort_with_msix_table; + } + if (count < sc->num_slices) { + device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", + count, sc->num_slices); + device_printf(sc->dev, + "Try setting hw.mxge.max_slices to %d\n", + count); + err = ENOSPC; + goto abort_with_msix; + } + bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; + sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); + if (sc->msix_irq_res == NULL) { + err = ENOMEM; + goto abort_with_msix; + } + + for (i = 0; i < sc->num_slices; i++) { + rid = i + 1; + sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, + SYS_RES_IRQ, + &rid, RF_ACTIVE); + if (sc->msix_irq_res[i] == NULL) { + device_printf(sc->dev, "couldn't allocate IRQ res" + " for message %d\n", i); + err = ENXIO; + goto abort_with_res; + } + } + + bytes = sizeof (*sc->msix_ih) * sc->num_slices; + sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); + + for (i = 0; i < sc->num_slices; i++) { + err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], + INTR_TYPE_NET | INTR_MPSAFE, + NULL, mxge_intr, &sc->ss[i], + &sc->msix_ih[i]); + if (err != 0) { + device_printf(sc->dev, "couldn't setup intr for " + "message %d\n", i); + goto abort_with_intr; + } + } + + if (mxge_verbose) { + device_printf(sc->dev, "using %d msix IRQs:", + sc->num_slices); + for (i = 0; i < sc->num_slices; i++) + printf(" %ld", rman_get_start(sc->msix_irq_res[i])); + printf("\n"); + } + return (0); + +abort_with_intr: + for (i = 0; i < sc->num_slices; i++) { + if (sc->msix_ih[i] != NULL) { + bus_teardown_intr(sc->dev, sc->msix_irq_res[i], + sc->msix_ih[i]); + sc->msix_ih[i] = NULL; + } + } + free(sc->msix_ih, M_DEVBUF); + + +abort_with_res: + for (i = 0; i < sc->num_slices; i++) { + rid = i + 1; + if (sc->msix_irq_res[i] != NULL) + bus_release_resource(sc->dev, SYS_RES_IRQ, rid, + sc->msix_irq_res[i]); + sc->msix_irq_res[i] = NULL; + } + free(sc->msix_irq_res, M_DEVBUF); + + +abort_with_msix: + pci_release_msi(sc->dev); + +abort_with_msix_table: + bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), + sc->msix_table_res); + + return err; +} + +static int +mxge_add_single_irq(mxge_softc_t *sc) +{ + int count, err, rid; + + count = pci_msi_count(sc->dev); + if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { + rid = 1; + sc->msi_enabled = 1; + } else { + rid = 0; + } + sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, + 1, RF_SHAREABLE | RF_ACTIVE); + if (sc->irq_res == NULL) { + device_printf(sc->dev, "could not alloc interrupt\n"); + return ENXIO; + } + if (mxge_verbose) + device_printf(sc->dev, "using %s irq %ld\n", + sc->msi_enabled ? "MSI" : "INTx", + rman_get_start(sc->irq_res)); + err = bus_setup_intr(sc->dev, sc->irq_res, + INTR_TYPE_NET | INTR_MPSAFE, + NULL, mxge_intr, &sc->ss[0], &sc->ih); + if (err != 0) { + bus_release_resource(sc->dev, SYS_RES_IRQ, + sc->msi_enabled ? 1 : 0, sc->irq_res); + if (sc->msi_enabled) + pci_release_msi(sc->dev); + } + return err; +} + +static void +mxge_rem_msix_irqs(mxge_softc_t *sc) +{ + int i, rid; + + for (i = 0; i < sc->num_slices; i++) { + if (sc->msix_ih[i] != NULL) { + bus_teardown_intr(sc->dev, sc->msix_irq_res[i], + sc->msix_ih[i]); + sc->msix_ih[i] = NULL; + } + } + free(sc->msix_ih, M_DEVBUF); + + for (i = 0; i < sc->num_slices; i++) { + rid = i + 1; + if (sc->msix_irq_res[i] != NULL) + bus_release_resource(sc->dev, SYS_RES_IRQ, rid, + sc->msix_irq_res[i]); + sc->msix_irq_res[i] = NULL; + } + free(sc->msix_irq_res, M_DEVBUF); + + bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), + sc->msix_table_res); + pci_release_msi(sc->dev); + return; +} + +static void +mxge_rem_single_irq(mxge_softc_t *sc) +{ + bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); + bus_release_resource(sc->dev, SYS_RES_IRQ, + sc->msi_enabled ? 1 : 0, sc->irq_res); + if (sc->msi_enabled) + pci_release_msi(sc->dev); +} + +static void +mxge_rem_irq(mxge_softc_t *sc) +{ + if (sc->num_slices > 1) + mxge_rem_msix_irqs(sc); + else + mxge_rem_single_irq(sc); +} + +static int +mxge_add_irq(mxge_softc_t *sc) +{ + int err; + + if (sc->num_slices > 1) + err = mxge_add_msix_irqs(sc); + else + err = mxge_add_single_irq(sc); + + if (0 && err == 0 && sc->num_slices > 1) { + mxge_rem_msix_irqs(sc); + err = mxge_add_msix_irqs(sc); + } + return err; } + static int mxge_attach(device_t dev) { mxge_softc_t *sc = device_get_softc(dev); struct ifnet *ifp; - int count, rid, err; + int err, rid; sc->dev = dev; mxge_fetch_tunables(sc); err = bus_dma_tag_create(NULL, /* parent */ 1, /* alignment */ - 4096, /* boundary */ + 0, /* boundary */ BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ 65536 + 256, /* maxsize */ MXGE_MAX_SEND_DESC, /* num segs */ - 4096, /* maxsegsize */ + 65536, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lock */ &sc->parent_dmat); /* tag */ @@ -3452,12 +4119,11 @@ mxge_attach(device_t dev) err = ENOSPC; goto abort_with_parent_dmat; } + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", device_get_nameunit(dev)); mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); - snprintf(sc->tx_mtx_name, sizeof(sc->tx_mtx_name), "%s:tx", - device_get_nameunit(dev)); - mtx_init(&sc->tx_mtx, sc->tx_mtx_name, NULL, MTX_DEF); snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), "%s:drv", device_get_nameunit(dev)); mtx_init(&sc->driver_mtx, sc->driver_mtx_name, @@ -3510,57 +4176,37 @@ mxge_attach(device_t dev) if (err != 0) goto abort_with_cmd_dma; - err = mxge_dma_alloc(sc, &sc->fw_stats_dma, - sizeof (*sc->fw_stats), 64); - if (err != 0) - goto abort_with_zeropad_dma; - sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr; - err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); if (err != 0) - goto abort_with_fw_stats; + goto abort_with_zeropad_dma; - /* Add our ithread */ - count = pci_msi_count(dev); - if (count == 1 && pci_alloc_msi(dev, &count) == 0) { - rid = 1; - sc->msi_enabled = 1; - } else { - rid = 0; - } - sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, - 1, RF_SHAREABLE | RF_ACTIVE); - if (sc->irq_res == NULL) { - device_printf(dev, "could not alloc interrupt\n"); - goto abort_with_dmabench; - } - if (mxge_verbose) - device_printf(dev, "using %s irq %ld\n", - sc->msi_enabled ? "MSI" : "INTx", - rman_get_start(sc->irq_res)); /* select & load the firmware */ err = mxge_select_firmware(sc); if (err != 0) - goto abort_with_irq_res; + goto abort_with_dmabench; sc->intr_coal_delay = mxge_intr_coal_delay; + + mxge_slice_probe(sc); + err = mxge_alloc_slices(sc); + if (err != 0) + goto abort_with_dmabench; + err = mxge_reset(sc, 0); if (err != 0) - goto abort_with_irq_res; + goto abort_with_slices; err = mxge_alloc_rings(sc); if (err != 0) { device_printf(sc->dev, "failed to allocate rings\n"); - goto abort_with_irq_res; + goto abort_with_dmabench; } - err = bus_setup_intr(sc->dev, sc->irq_res, - INTR_TYPE_NET | INTR_MPSAFE, - NULL, mxge_intr, sc, &sc->ih); + err = mxge_add_irq(sc); if (err != 0) { + device_printf(sc->dev, "failed to add irq\n"); goto abort_with_rings; } - /* hook into the network stack */ - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + ifp->if_baudrate = 100000000; ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | @@ -3598,15 +4244,10 @@ mxge_attach(device_t dev) abort_with_rings: mxge_free_rings(sc); -abort_with_irq_res: - bus_release_resource(dev, SYS_RES_IRQ, - sc->msi_enabled ? 1 : 0, sc->irq_res); - if (sc->msi_enabled) - pci_release_msi(dev); +abort_with_slices: + mxge_free_slices(sc); abort_with_dmabench: mxge_dma_free(&sc->dmabench_dma); -abort_with_fw_stats: - mxge_dma_free(&sc->fw_stats_dma); abort_with_zeropad_dma: mxge_dma_free(&sc->zeropad_dma); abort_with_cmd_dma: @@ -3616,7 +4257,6 @@ abort_with_mem_res: abort_with_lock: pci_disable_busmaster(dev); mtx_destroy(&sc->cmd_mtx); - mtx_destroy(&sc->tx_mtx); mtx_destroy(&sc->driver_mtx); if_free(ifp); abort_with_parent_dmat: @@ -3637,29 +4277,23 @@ mxge_detach(device_t dev) return EBUSY; } mtx_lock(&sc->driver_mtx); + callout_stop(&sc->co_hdl); if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) mxge_close(sc); - callout_stop(&sc->co_hdl); mtx_unlock(&sc->driver_mtx); ether_ifdetach(sc->ifp); ifmedia_removeall(&sc->media); mxge_dummy_rdma(sc, 0); - bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); + mxge_rem_sysctls(sc); + mxge_rem_irq(sc); mxge_free_rings(sc); - bus_release_resource(dev, SYS_RES_IRQ, - sc->msi_enabled ? 1 : 0, sc->irq_res); - if (sc->msi_enabled) - pci_release_msi(dev); - - sc->rx_done.entry = NULL; - mxge_dma_free(&sc->fw_stats_dma); + mxge_free_slices(sc); mxge_dma_free(&sc->dmabench_dma); mxge_dma_free(&sc->zeropad_dma); mxge_dma_free(&sc->cmd_dma); bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); pci_disable_busmaster(dev); mtx_destroy(&sc->cmd_mtx); - mtx_destroy(&sc->tx_mtx); mtx_destroy(&sc->driver_mtx); if_free(sc->ifp); bus_dma_tag_destroy(sc->parent_dmat); |