diff options
Diffstat (limited to 'sys/dev/sfxge/sfxge_tx.c')
-rw-r--r-- | sys/dev/sfxge/sfxge_tx.c | 501 |
1 files changed, 329 insertions, 172 deletions
diff --git a/sys/dev/sfxge/sfxge_tx.c b/sys/dev/sfxge/sfxge_tx.c index 524b09d840c0..af8efc26be46 100644 --- a/sys/dev/sfxge/sfxge_tx.c +++ b/sys/dev/sfxge/sfxge_tx.c @@ -1,30 +1,34 @@ /*- - * Copyright (c) 2010-2011 Solarflare Communications, Inc. + * Copyright (c) 2010-2015 Solarflare Communications Inc. * All rights reserved. * * This software was developed in part by Philip Paeps under contract for * Solarflare Communications, Inc. * * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * modification, are permitted provided that the following conditions are met: * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are + * those of the authors and should not be interpreted as representing official + * policies, either expressed or implied, of the FreeBSD Project. */ /* Theory of operation: @@ -67,23 +71,6 @@ __FBSDID("$FreeBSD$"); #include "sfxge.h" #include "sfxge_tx.h" -/* - * Estimate maximum number of Tx descriptors required for TSO packet. - * With minimum MSS and maximum mbuf length we might need more (even - * than a ring-ful of descriptors), but this should not happen in - * practice except due to deliberate attack. In that case we will - * truncate the output at a packet boundary. - */ -#define SFXGE_TSO_MAX_DESC \ - (SFXGE_TSO_MAX_SEGS * 2 + SFXGE_TX_MAPPING_MAX_SEG - 1) - -/* - * Set the block level to ensure there is space to generate a - * large number of descriptors for TSO. - */ -#define SFXGE_TXQ_BLOCK_LEVEL(_entries) \ - (EFX_TXQ_LIMIT(_entries) - SFXGE_TSO_MAX_DESC) - #define SFXGE_PARAM_TX_DPL_GET_MAX SFXGE_PARAM(tx_dpl_get_max) static int sfxge_tx_dpl_get_max = SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT; @@ -108,6 +95,13 @@ SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN, &sfxge_tx_dpl_put_max, 0, "Maximum number of any packets in deferred packet put-list"); +#define SFXGE_PARAM_TSO_FW_ASSISTED SFXGE_PARAM(tso_fw_assisted) +static int sfxge_tso_fw_assisted = 1; +TUNABLE_INT(SFXGE_PARAM_TSO_FW_ASSISTED, &sfxge_tso_fw_assisted); +SYSCTL_INT(_hw_sfxge, OID_AUTO, tso_fw_assisted, CTLFLAG_RDTUN, + &sfxge_tso_fw_assisted, 0, + "Use FW-assisted TSO if supported by NIC firmware"); + static const struct { const char *name; @@ -134,7 +128,38 @@ static void sfxge_tx_qdpl_service(struct sfxge_txq *txq); static void sfxge_tx_qlist_post(struct sfxge_txq *txq); static void sfxge_tx_qunblock(struct sfxge_txq *txq); static int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf, - const bus_dma_segment_t *dma_seg, int n_dma_seg); + const bus_dma_segment_t *dma_seg, int n_dma_seg, + int vlan_tagged); + +static int +sfxge_tx_maybe_insert_tag(struct sfxge_txq *txq, struct mbuf *mbuf) +{ + uint16_t this_tag = ((mbuf->m_flags & M_VLANTAG) ? + mbuf->m_pkthdr.ether_vtag : + 0); + + if (this_tag == txq->hw_vlan_tci) + return (0); + + efx_tx_qdesc_vlantci_create(txq->common, + bswap16(this_tag), + &txq->pend_desc[0]); + txq->n_pend_desc = 1; + txq->hw_vlan_tci = this_tag; + return (1); +} + +static inline void +sfxge_next_stmp(struct sfxge_txq *txq, struct sfxge_tx_mapping **pstmp) +{ + KASSERT((*pstmp)->flags == 0, ("stmp flags are not 0")); + if (__predict_false(*pstmp == + &txq->stmp[txq->ptr_mask])) + *pstmp = &txq->stmp[0]; + else + (*pstmp)++; +} + void sfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq) @@ -252,29 +277,30 @@ static void sfxge_tx_qlist_post(struct sfxge_txq *txq) { unsigned int old_added; + unsigned int block_level; unsigned int level; int rc; SFXGE_TXQ_LOCK_ASSERT_OWNED(txq); KASSERT(txq->n_pend_desc != 0, ("txq->n_pend_desc == 0")); - KASSERT(txq->n_pend_desc <= SFXGE_TSO_MAX_DESC, + KASSERT(txq->n_pend_desc <= txq->max_pkt_desc, ("txq->n_pend_desc too large")); KASSERT(!txq->blocked, ("txq->blocked")); old_added = txq->added; /* Post the fragment list. */ - rc = efx_tx_qpost(txq->common, txq->pend_desc, txq->n_pend_desc, + rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, txq->n_pend_desc, txq->reaped, &txq->added); - KASSERT(rc == 0, ("efx_tx_qpost() failed")); + KASSERT(rc == 0, ("efx_tx_qdesc_post() failed")); - /* If efx_tx_qpost() had to refragment, our information about + /* If efx_tx_qdesc_post() had to refragment, our information about * buffers to free may be associated with the wrong * descriptors. */ KASSERT(txq->added - old_added == txq->n_pend_desc, - ("efx_tx_qpost() refragmented descriptors")); + ("efx_tx_qdesc_post() refragmented descriptors")); level = txq->added - txq->reaped; KASSERT(level <= txq->entries, ("overfilled TX queue")); @@ -282,14 +308,20 @@ sfxge_tx_qlist_post(struct sfxge_txq *txq) /* Clear the fragment list. */ txq->n_pend_desc = 0; + /* + * Set the block level to ensure there is space to generate a + * large number of descriptors for TSO. + */ + block_level = EFX_TXQ_LIMIT(txq->entries) - txq->max_pkt_desc; + /* Have we reached the block level? */ - if (level < SFXGE_TXQ_BLOCK_LEVEL(txq->entries)) + if (level < block_level) return; /* Reap, and check again */ sfxge_tx_qreap(txq); level = txq->added - txq->reaped; - if (level < SFXGE_TXQ_BLOCK_LEVEL(txq->entries)) + if (level < block_level) return; txq->blocked = 1; @@ -301,7 +333,7 @@ sfxge_tx_qlist_post(struct sfxge_txq *txq) mb(); sfxge_tx_qreap(txq); level = txq->added - txq->reaped; - if (level < SFXGE_TXQ_BLOCK_LEVEL(txq->entries)) { + if (level < block_level) { mb(); txq->blocked = 0; } @@ -314,10 +346,12 @@ static int sfxge_tx_queue_mbuf(struct sfxge_txq *txq, struct mbuf *mbuf) bus_dma_segment_t dma_seg[SFXGE_TX_MAPPING_MAX_SEG]; unsigned int id; struct sfxge_tx_mapping *stmp; - efx_buffer_t *desc; + efx_desc_t *desc; int n_dma_seg; int rc; int i; + int eop; + int vlan_tagged; KASSERT(!txq->blocked, ("txq->blocked")); @@ -354,35 +388,35 @@ static int sfxge_tx_queue_mbuf(struct sfxge_txq *txq, struct mbuf *mbuf) used_map = &stmp->map; + vlan_tagged = sfxge_tx_maybe_insert_tag(txq, mbuf); + if (vlan_tagged) { + sfxge_next_stmp(txq, &stmp); + } if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) { - rc = sfxge_tx_queue_tso(txq, mbuf, dma_seg, n_dma_seg); + rc = sfxge_tx_queue_tso(txq, mbuf, dma_seg, n_dma_seg, vlan_tagged); if (rc < 0) goto reject_mapped; - stmp = &txq->stmp[rc]; + stmp = &txq->stmp[(rc - 1) & txq->ptr_mask]; } else { /* Add the mapping to the fragment list, and set flags * for the buffer. */ + i = 0; for (;;) { - desc = &txq->pend_desc[i]; - desc->eb_addr = dma_seg[i].ds_addr; - desc->eb_size = dma_seg[i].ds_len; - if (i == n_dma_seg - 1) { - desc->eb_eop = 1; + desc = &txq->pend_desc[i + vlan_tagged]; + eop = (i == n_dma_seg - 1); + efx_tx_qdesc_dma_create(txq->common, + dma_seg[i].ds_addr, + dma_seg[i].ds_len, + eop, + desc); + if (eop) break; - } - desc->eb_eop = 0; i++; - - stmp->flags = 0; - if (__predict_false(stmp == - &txq->stmp[txq->ptr_mask])) - stmp = &txq->stmp[0]; - else - stmp++; + sfxge_next_stmp(txq, &stmp); } - txq->n_pend_desc = n_dma_seg; + txq->n_pend_desc = n_dma_seg + vlan_tagged; } /* @@ -469,7 +503,7 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq) /* Push the fragments to the hardware in batches. */ if (txq->added - pushed >= SFXGE_TX_BATCH) { - efx_tx_qpush(txq->common, txq->added); + efx_tx_qpush(txq->common, txq->added, pushed); pushed = txq->added; } } @@ -489,14 +523,13 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq) } if (txq->added != pushed) - efx_tx_qpush(txq->common, txq->added); + efx_tx_qpush(txq->common, txq->added, pushed); KASSERT(txq->blocked || stdp->std_get_count == 0, ("queue unblocked but count is non-zero")); } -#define SFXGE_TX_QDPL_PENDING(_txq) \ - ((_txq)->dpl.std_put != 0) +#define SFXGE_TX_QDPL_PENDING(_txq) ((_txq)->dpl.std_put != 0) /* * Service the deferred packet list. @@ -599,7 +632,7 @@ sfxge_tx_qdpl_put_unlocked(struct sfxge_txq *txq, struct mbuf *mbuf) * Called from if_transmit - will try to grab the txq lock and enqueue to the * put list if it succeeds, otherwise try to push onto the defer list if space. */ -int +static int sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m) { int rc; @@ -746,6 +779,10 @@ struct sfxge_tso_state { ssize_t tcph_off; /* Offset of TCP header */ unsigned header_len; /* Number of bytes of header */ unsigned seg_size; /* TCP segment size */ + int fw_assisted; /* Use FW-assisted TSO */ + u_short packet_id; /* IPv4 packet ID from the original packet */ + efx_desc_t header_desc; /* Precomputed header descriptor for + * FW-assisted TSO */ }; static const struct ip *tso_iph(const struct sfxge_tso_state *tso) @@ -816,12 +853,16 @@ static void tso_fini(struct sfxge_txq *txq) } } -static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf) +static void tso_start(struct sfxge_txq *txq, struct sfxge_tso_state *tso, + const bus_dma_segment_t *hdr_dma_seg, + struct mbuf *mbuf) { struct ether_header *eh = mtod(mbuf, struct ether_header *); + const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->sc->enp); const struct tcphdr *th; struct tcphdr th_copy; + tso->fw_assisted = txq->sc->tso_fw_assisted; tso->mbuf = mbuf; /* Find network protocol and header */ @@ -840,12 +881,19 @@ static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf) KASSERT(tso_iph(tso)->ip_p == IPPROTO_TCP, ("TSO required on non-TCP packet")); tso->tcph_off = tso->nh_off + 4 * tso_iph(tso)->ip_hl; + tso->packet_id = tso_iph(tso)->ip_id; } else { KASSERT(tso->protocol == htons(ETHERTYPE_IPV6), ("TSO required on non-IP packet")); KASSERT(tso_ip6h(tso)->ip6_nxt == IPPROTO_TCP, ("TSO required on non-TCP packet")); tso->tcph_off = tso->nh_off + sizeof(struct ip6_hdr); + tso->packet_id = 0; + } + if (tso->fw_assisted && + __predict_false(tso->tcph_off > + encp->enc_tx_tso_tcp_header_offset_limit)) { + tso->fw_assisted = 0; } KASSERT(mbuf->m_len >= tso->tcph_off, @@ -875,6 +923,17 @@ static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf) th->th_flags & (TH_URG | TH_SYN))); tso->out_len = mbuf->m_pkthdr.len - tso->header_len; + + if (tso->fw_assisted) { + if (hdr_dma_seg->ds_len >= tso->header_len) + efx_tx_qdesc_dma_create(txq->common, + hdr_dma_seg->ds_addr, + tso->header_len, + B_FALSE, + &tso->header_desc); + else + tso->fw_assisted = 0; + } } /* @@ -887,7 +946,7 @@ static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf) static void tso_fill_packet_with_fragment(struct sfxge_txq *txq, struct sfxge_tso_state *tso) { - efx_buffer_t *desc; + efx_desc_t *desc; int n; if (tso->in_len == 0 || tso->packet_space == 0) @@ -903,9 +962,11 @@ static void tso_fill_packet_with_fragment(struct sfxge_txq *txq, tso->in_len -= n; desc = &txq->pend_desc[txq->n_pend_desc++]; - desc->eb_addr = tso->dma_addr; - desc->eb_size = n; - desc->eb_eop = tso->out_len == 0 || tso->packet_space == 0; + efx_tx_qdesc_dma_create(txq->common, + tso->dma_addr, + n, + tso->out_len == 0 || tso->packet_space == 0, + desc); tso->dma_addr += n; } @@ -928,107 +989,139 @@ static void tso_map_long_header(void *dma_addr_ret, */ static int tso_start_new_packet(struct sfxge_txq *txq, struct sfxge_tso_state *tso, - unsigned int id) + unsigned int *idp) { - struct sfxge_tx_mapping *stmp = &txq->stmp[id]; + unsigned int id = *idp; struct tcphdr *tsoh_th; unsigned ip_length; caddr_t header; uint64_t dma_addr; bus_dmamap_t map; - efx_buffer_t *desc; + efx_desc_t *desc; int rc; - /* Allocate a DMA-mapped header buffer. */ - if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) { - unsigned int page_index = (id / 2) / TSOH_PER_PAGE; - unsigned int buf_index = (id / 2) % TSOH_PER_PAGE; + if (tso->fw_assisted) { + uint8_t tcp_flags = tso_tcph(tso)->th_flags; + + if (tso->out_len > tso->seg_size) + tcp_flags &= ~(TH_FIN | TH_PUSH); + + /* TSO option descriptor */ + desc = &txq->pend_desc[txq->n_pend_desc++]; + efx_tx_qdesc_tso_create(txq->common, + tso->packet_id, + tso->seqnum, + tcp_flags, + desc++); + KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); + id = (id + 1) & txq->ptr_mask; - header = (txq->tsoh_buffer[page_index].esm_base + - buf_index * TSOH_STD_SIZE); - dma_addr = (txq->tsoh_buffer[page_index].esm_addr + - buf_index * TSOH_STD_SIZE); - map = txq->tsoh_buffer[page_index].esm_map; + /* Header DMA descriptor */ + *desc = tso->header_desc; + txq->n_pend_desc++; + KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); + id = (id + 1) & txq->ptr_mask; - stmp->flags = 0; + tso->seqnum += tso->seg_size; } else { - /* We cannot use bus_dmamem_alloc() as that may sleep */ - header = malloc(tso->header_len, M_SFXGE, M_NOWAIT); - if (__predict_false(!header)) - return (ENOMEM); - rc = bus_dmamap_load(txq->packet_dma_tag, stmp->map, - header, tso->header_len, - tso_map_long_header, &dma_addr, - BUS_DMA_NOWAIT); - if (__predict_false(dma_addr == 0)) { - if (rc == 0) { - /* Succeeded but got >1 segment */ - bus_dmamap_unload(txq->packet_dma_tag, - stmp->map); - rc = EINVAL; + /* Allocate a DMA-mapped header buffer. */ + if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) { + unsigned int page_index = (id / 2) / TSOH_PER_PAGE; + unsigned int buf_index = (id / 2) % TSOH_PER_PAGE; + + header = (txq->tsoh_buffer[page_index].esm_base + + buf_index * TSOH_STD_SIZE); + dma_addr = (txq->tsoh_buffer[page_index].esm_addr + + buf_index * TSOH_STD_SIZE); + map = txq->tsoh_buffer[page_index].esm_map; + + KASSERT(txq->stmp[id].flags == 0, + ("stmp flags are not 0")); + } else { + struct sfxge_tx_mapping *stmp = &txq->stmp[id]; + + /* We cannot use bus_dmamem_alloc() as that may sleep */ + header = malloc(tso->header_len, M_SFXGE, M_NOWAIT); + if (__predict_false(!header)) + return (ENOMEM); + rc = bus_dmamap_load(txq->packet_dma_tag, stmp->map, + header, tso->header_len, + tso_map_long_header, &dma_addr, + BUS_DMA_NOWAIT); + if (__predict_false(dma_addr == 0)) { + if (rc == 0) { + /* Succeeded but got >1 segment */ + bus_dmamap_unload(txq->packet_dma_tag, + stmp->map); + rc = EINVAL; + } + free(header, M_SFXGE); + return (rc); } - free(header, M_SFXGE); - return (rc); - } - map = stmp->map; + map = stmp->map; - txq->tso_long_headers++; - stmp->u.heap_buf = header; - stmp->flags = TX_BUF_UNMAP; - } + txq->tso_long_headers++; + stmp->u.heap_buf = header; + stmp->flags = TX_BUF_UNMAP; + } - tsoh_th = (struct tcphdr *)(header + tso->tcph_off); + tsoh_th = (struct tcphdr *)(header + tso->tcph_off); - /* Copy and update the headers. */ - m_copydata(tso->mbuf, 0, tso->header_len, header); + /* Copy and update the headers. */ + m_copydata(tso->mbuf, 0, tso->header_len, header); - tsoh_th->th_seq = htonl(tso->seqnum); - tso->seqnum += tso->seg_size; - if (tso->out_len > tso->seg_size) { - /* This packet will not finish the TSO burst. */ - ip_length = tso->header_len - tso->nh_off + tso->seg_size; - tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH); - } else { - /* This packet will be the last in the TSO burst. */ - ip_length = tso->header_len - tso->nh_off + tso->out_len; - } + tsoh_th->th_seq = htonl(tso->seqnum); + tso->seqnum += tso->seg_size; + if (tso->out_len > tso->seg_size) { + /* This packet will not finish the TSO burst. */ + ip_length = tso->header_len - tso->nh_off + tso->seg_size; + tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH); + } else { + /* This packet will be the last in the TSO burst. */ + ip_length = tso->header_len - tso->nh_off + tso->out_len; + } - if (tso->protocol == htons(ETHERTYPE_IP)) { - struct ip *tsoh_iph = (struct ip *)(header + tso->nh_off); - tsoh_iph->ip_len = htons(ip_length); - /* XXX We should increment ip_id, but FreeBSD doesn't - * currently allocate extra IDs for multiple segments. - */ - } else { - struct ip6_hdr *tsoh_iph = - (struct ip6_hdr *)(header + tso->nh_off); - tsoh_iph->ip6_plen = htons(ip_length - sizeof(*tsoh_iph)); - } + if (tso->protocol == htons(ETHERTYPE_IP)) { + struct ip *tsoh_iph = (struct ip *)(header + tso->nh_off); + tsoh_iph->ip_len = htons(ip_length); + /* XXX We should increment ip_id, but FreeBSD doesn't + * currently allocate extra IDs for multiple segments. + */ + } else { + struct ip6_hdr *tsoh_iph = + (struct ip6_hdr *)(header + tso->nh_off); + tsoh_iph->ip6_plen = htons(ip_length - sizeof(*tsoh_iph)); + } - /* Make the header visible to the hardware. */ - bus_dmamap_sync(txq->packet_dma_tag, map, BUS_DMASYNC_PREWRITE); + /* Make the header visible to the hardware. */ + bus_dmamap_sync(txq->packet_dma_tag, map, BUS_DMASYNC_PREWRITE); + /* Form a descriptor for this header. */ + desc = &txq->pend_desc[txq->n_pend_desc++]; + efx_tx_qdesc_dma_create(txq->common, + dma_addr, + tso->header_len, + 0, + desc); + id = (id + 1) & txq->ptr_mask; + } tso->packet_space = tso->seg_size; txq->tso_packets++; - - /* Form a descriptor for this header. */ - desc = &txq->pend_desc[txq->n_pend_desc++]; - desc->eb_addr = dma_addr; - desc->eb_size = tso->header_len; - desc->eb_eop = 0; + *idp = id; return (0); } static int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf, - const bus_dma_segment_t *dma_seg, int n_dma_seg) + const bus_dma_segment_t *dma_seg, int n_dma_seg, + int vlan_tagged) { struct sfxge_tso_state tso; - unsigned int id, next_id; + unsigned int id; unsigned skipped = 0; - tso_start(&tso, mbuf); + tso_start(txq, &tso, dma_seg, mbuf); while (dma_seg->ds_len + skipped <= tso.header_len) { skipped += dma_seg->ds_len; @@ -1039,13 +1132,15 @@ sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf, tso.in_len = dma_seg->ds_len - (tso.header_len - skipped); tso.dma_addr = dma_seg->ds_addr + (tso.header_len - skipped); - id = txq->added & txq->ptr_mask; - if (__predict_false(tso_start_new_packet(txq, &tso, id))) + id = (txq->added + vlan_tagged) & txq->ptr_mask; + if (__predict_false(tso_start_new_packet(txq, &tso, &id))) return (-1); while (1) { - id = (id + 1) & txq->ptr_mask; tso_fill_packet_with_fragment(txq, &tso); + /* Exactly one DMA descriptor is added */ + KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); + id = (id + 1) & txq->ptr_mask; /* Move onto the next fragment? */ if (tso.in_len == 0) { @@ -1064,18 +1159,17 @@ sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf, * the remainder of the input mbuf but do not * roll back the work we have done. */ - if (txq->n_pend_desc + 1 /* header */ + n_dma_seg > - SFXGE_TSO_MAX_DESC) { + if (txq->n_pend_desc + tso.fw_assisted + + 1 /* header */ + n_dma_seg > + txq->max_pkt_desc) { txq->tso_pdrop_too_many++; break; } - next_id = (id + 1) & txq->ptr_mask; if (__predict_false(tso_start_new_packet(txq, &tso, - next_id))) { + &id))) { txq->tso_pdrop_no_rsrc++; break; } - id = next_id; } } @@ -1128,38 +1222,52 @@ sfxge_tx_qstop(struct sfxge_softc *sc, unsigned int index) struct sfxge_evq *evq; unsigned int count; + SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc); + txq = sc->txq[index]; evq = sc->evq[txq->evq_index]; + SFXGE_EVQ_LOCK(evq); SFXGE_TXQ_LOCK(txq); KASSERT(txq->init_state == SFXGE_TXQ_STARTED, ("txq->init_state != SFXGE_TXQ_STARTED")); txq->init_state = SFXGE_TXQ_INITIALIZED; - txq->flush_state = SFXGE_FLUSH_PENDING; - - /* Flush the transmit queue. */ - efx_tx_qflush(txq->common); - - SFXGE_TXQ_UNLOCK(txq); - count = 0; - do { - /* Spin for 100ms. */ - DELAY(100000); + if (txq->flush_state != SFXGE_FLUSH_DONE) { + txq->flush_state = SFXGE_FLUSH_PENDING; - if (txq->flush_state != SFXGE_FLUSH_PENDING) - break; - } while (++count < 20); + SFXGE_EVQ_UNLOCK(evq); + SFXGE_TXQ_UNLOCK(txq); - SFXGE_EVQ_LOCK(evq); - SFXGE_TXQ_LOCK(txq); + /* Flush the transmit queue. */ + if (efx_tx_qflush(txq->common) != 0) { + log(LOG_ERR, "%s: Flushing Tx queue %u failed\n", + device_get_nameunit(sc->dev), index); + txq->flush_state = SFXGE_FLUSH_DONE; + } else { + count = 0; + do { + /* Spin for 100ms. */ + DELAY(100000); + if (txq->flush_state != SFXGE_FLUSH_PENDING) + break; + } while (++count < 20); + } + SFXGE_EVQ_LOCK(evq); + SFXGE_TXQ_LOCK(txq); - KASSERT(txq->flush_state != SFXGE_FLUSH_FAILED, - ("txq->flush_state == SFXGE_FLUSH_FAILED")); + KASSERT(txq->flush_state != SFXGE_FLUSH_FAILED, + ("txq->flush_state == SFXGE_FLUSH_FAILED")); - txq->flush_state = SFXGE_FLUSH_DONE; + if (txq->flush_state != SFXGE_FLUSH_DONE) { + /* Flush timeout */ + log(LOG_ERR, "%s: Cannot flush Tx queue %u\n", + device_get_nameunit(sc->dev), index); + txq->flush_state = SFXGE_FLUSH_DONE; + } + } txq->blocked = 0; txq->pending = txq->added; @@ -1195,8 +1303,11 @@ sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index) efsys_mem_t *esmp; uint16_t flags; struct sfxge_evq *evq; + unsigned int desc_index; int rc; + SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc); + txq = sc->txq[index]; esmp = &txq->mem; evq = sc->evq[txq->evq_index]; @@ -1231,15 +1342,19 @@ sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index) /* Create the common code transmit queue. */ if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp, sc->txq_entries, txq->buf_base_id, flags, evq->common, - &txq->common)) != 0) + &txq->common, &desc_index)) != 0) goto fail; + /* Initialise queue descriptor indexes */ + txq->added = txq->pending = txq->completed = txq->reaped = desc_index; + SFXGE_TXQ_LOCK(txq); /* Enable the transmit queue. */ efx_tx_qenable(txq->common); txq->init_state = SFXGE_TXQ_STARTED; + txq->flush_state = SFXGE_FLUSH_REQUIRED; SFXGE_TXQ_UNLOCK(txq); @@ -1348,9 +1463,41 @@ sfxge_tx_qfini(struct sfxge_softc *sc, unsigned int index) free(txq, M_SFXGE); } +/* + * Estimate maximum number of Tx descriptors required for TSO packet. + * With minimum MSS and maximum mbuf length we might need more (even + * than a ring-ful of descriptors), but this should not happen in + * practice except due to deliberate attack. In that case we will + * truncate the output at a packet boundary. + */ +static unsigned int +sfxge_tx_max_pkt_desc(const struct sfxge_softc *sc, enum sfxge_txq_type type) +{ + /* One descriptor for every input fragment */ + unsigned int max_descs = SFXGE_TX_MAPPING_MAX_SEG; + + /* VLAN tagging Tx option descriptor may be required */ + if (efx_nic_cfg_get(sc->enp)->enc_hw_tx_insert_vlan_enabled) + max_descs++; + + if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) { + /* + * Plus header and payload descriptor for each output segment. + * Minus one since header fragment is already counted. + */ + max_descs += SFXGE_TSO_MAX_SEGS * 2 - 1; + + /* FW assisted TSO requires one more descriptor per segment */ + if (sc->tso_fw_assisted) + max_descs += SFXGE_TSO_MAX_SEGS; + } + + return (max_descs); +} + static int sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index, - enum sfxge_txq_type type, unsigned int evq_index) + enum sfxge_txq_type type, unsigned int evq_index) { char name[16]; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); @@ -1392,7 +1539,7 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index, } /* Allocate pending descriptor array for batching writes. */ - txq->pend_desc = malloc(sizeof(efx_buffer_t) * sc->txq_entries, + txq->pend_desc = malloc(sizeof(efx_desc_t) * sc->txq_entries, M_SFXGE, M_ZERO | M_WAITOK); /* Allocate and initialise mbuf DMA mapping array. */ @@ -1475,6 +1622,9 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index, txq->evq_index = evq_index; txq->txq_index = txq_index; txq->init_state = SFXGE_TXQ_INITIALIZED; + txq->hw_vlan_tci = 0; + + txq->max_pkt_desc = sfxge_tx_max_pkt_desc(sc, type); return (0); @@ -1572,6 +1722,7 @@ sfxge_tx_fini(struct sfxge_softc *sc) int sfxge_tx_init(struct sfxge_softc *sc) { + const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp); struct sfxge_intr *intr; int index; int rc; @@ -1583,6 +1734,12 @@ sfxge_tx_init(struct sfxge_softc *sc) sc->txq_count = SFXGE_TXQ_NTYPES - 1 + sc->intr.n_alloc; + sc->tso_fw_assisted = sfxge_tso_fw_assisted; + if (sc->tso_fw_assisted) + sc->tso_fw_assisted = + (encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO) && + (encp->enc_fw_assisted_tso_enabled); + sc->txqs_node = SYSCTL_ADD_NODE( device_get_sysctl_ctx(sc->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), |