aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/sfxge/sfxge_tx.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/sfxge/sfxge_tx.c')
-rw-r--r--sys/dev/sfxge/sfxge_tx.c501
1 files changed, 329 insertions, 172 deletions
diff --git a/sys/dev/sfxge/sfxge_tx.c b/sys/dev/sfxge/sfxge_tx.c
index 524b09d840c0..af8efc26be46 100644
--- a/sys/dev/sfxge/sfxge_tx.c
+++ b/sys/dev/sfxge/sfxge_tx.c
@@ -1,30 +1,34 @@
/*-
- * Copyright (c) 2010-2011 Solarflare Communications, Inc.
+ * Copyright (c) 2010-2015 Solarflare Communications Inc.
* All rights reserved.
*
* This software was developed in part by Philip Paeps under contract for
* Solarflare Communications, Inc.
*
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
+ * modification, are permitted provided that the following conditions are met:
*
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing official
+ * policies, either expressed or implied, of the FreeBSD Project.
*/
/* Theory of operation:
@@ -67,23 +71,6 @@ __FBSDID("$FreeBSD$");
#include "sfxge.h"
#include "sfxge_tx.h"
-/*
- * Estimate maximum number of Tx descriptors required for TSO packet.
- * With minimum MSS and maximum mbuf length we might need more (even
- * than a ring-ful of descriptors), but this should not happen in
- * practice except due to deliberate attack. In that case we will
- * truncate the output at a packet boundary.
- */
-#define SFXGE_TSO_MAX_DESC \
- (SFXGE_TSO_MAX_SEGS * 2 + SFXGE_TX_MAPPING_MAX_SEG - 1)
-
-/*
- * Set the block level to ensure there is space to generate a
- * large number of descriptors for TSO.
- */
-#define SFXGE_TXQ_BLOCK_LEVEL(_entries) \
- (EFX_TXQ_LIMIT(_entries) - SFXGE_TSO_MAX_DESC)
-
#define SFXGE_PARAM_TX_DPL_GET_MAX SFXGE_PARAM(tx_dpl_get_max)
static int sfxge_tx_dpl_get_max = SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT;
@@ -108,6 +95,13 @@ SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN,
&sfxge_tx_dpl_put_max, 0,
"Maximum number of any packets in deferred packet put-list");
+#define SFXGE_PARAM_TSO_FW_ASSISTED SFXGE_PARAM(tso_fw_assisted)
+static int sfxge_tso_fw_assisted = 1;
+TUNABLE_INT(SFXGE_PARAM_TSO_FW_ASSISTED, &sfxge_tso_fw_assisted);
+SYSCTL_INT(_hw_sfxge, OID_AUTO, tso_fw_assisted, CTLFLAG_RDTUN,
+ &sfxge_tso_fw_assisted, 0,
+ "Use FW-assisted TSO if supported by NIC firmware");
+
static const struct {
const char *name;
@@ -134,7 +128,38 @@ static void sfxge_tx_qdpl_service(struct sfxge_txq *txq);
static void sfxge_tx_qlist_post(struct sfxge_txq *txq);
static void sfxge_tx_qunblock(struct sfxge_txq *txq);
static int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
- const bus_dma_segment_t *dma_seg, int n_dma_seg);
+ const bus_dma_segment_t *dma_seg, int n_dma_seg,
+ int vlan_tagged);
+
+static int
+sfxge_tx_maybe_insert_tag(struct sfxge_txq *txq, struct mbuf *mbuf)
+{
+ uint16_t this_tag = ((mbuf->m_flags & M_VLANTAG) ?
+ mbuf->m_pkthdr.ether_vtag :
+ 0);
+
+ if (this_tag == txq->hw_vlan_tci)
+ return (0);
+
+ efx_tx_qdesc_vlantci_create(txq->common,
+ bswap16(this_tag),
+ &txq->pend_desc[0]);
+ txq->n_pend_desc = 1;
+ txq->hw_vlan_tci = this_tag;
+ return (1);
+}
+
+static inline void
+sfxge_next_stmp(struct sfxge_txq *txq, struct sfxge_tx_mapping **pstmp)
+{
+ KASSERT((*pstmp)->flags == 0, ("stmp flags are not 0"));
+ if (__predict_false(*pstmp ==
+ &txq->stmp[txq->ptr_mask]))
+ *pstmp = &txq->stmp[0];
+ else
+ (*pstmp)++;
+}
+
void
sfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq)
@@ -252,29 +277,30 @@ static void
sfxge_tx_qlist_post(struct sfxge_txq *txq)
{
unsigned int old_added;
+ unsigned int block_level;
unsigned int level;
int rc;
SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
KASSERT(txq->n_pend_desc != 0, ("txq->n_pend_desc == 0"));
- KASSERT(txq->n_pend_desc <= SFXGE_TSO_MAX_DESC,
+ KASSERT(txq->n_pend_desc <= txq->max_pkt_desc,
("txq->n_pend_desc too large"));
KASSERT(!txq->blocked, ("txq->blocked"));
old_added = txq->added;
/* Post the fragment list. */
- rc = efx_tx_qpost(txq->common, txq->pend_desc, txq->n_pend_desc,
+ rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, txq->n_pend_desc,
txq->reaped, &txq->added);
- KASSERT(rc == 0, ("efx_tx_qpost() failed"));
+ KASSERT(rc == 0, ("efx_tx_qdesc_post() failed"));
- /* If efx_tx_qpost() had to refragment, our information about
+ /* If efx_tx_qdesc_post() had to refragment, our information about
* buffers to free may be associated with the wrong
* descriptors.
*/
KASSERT(txq->added - old_added == txq->n_pend_desc,
- ("efx_tx_qpost() refragmented descriptors"));
+ ("efx_tx_qdesc_post() refragmented descriptors"));
level = txq->added - txq->reaped;
KASSERT(level <= txq->entries, ("overfilled TX queue"));
@@ -282,14 +308,20 @@ sfxge_tx_qlist_post(struct sfxge_txq *txq)
/* Clear the fragment list. */
txq->n_pend_desc = 0;
+ /*
+ * Set the block level to ensure there is space to generate a
+ * large number of descriptors for TSO.
+ */
+ block_level = EFX_TXQ_LIMIT(txq->entries) - txq->max_pkt_desc;
+
/* Have we reached the block level? */
- if (level < SFXGE_TXQ_BLOCK_LEVEL(txq->entries))
+ if (level < block_level)
return;
/* Reap, and check again */
sfxge_tx_qreap(txq);
level = txq->added - txq->reaped;
- if (level < SFXGE_TXQ_BLOCK_LEVEL(txq->entries))
+ if (level < block_level)
return;
txq->blocked = 1;
@@ -301,7 +333,7 @@ sfxge_tx_qlist_post(struct sfxge_txq *txq)
mb();
sfxge_tx_qreap(txq);
level = txq->added - txq->reaped;
- if (level < SFXGE_TXQ_BLOCK_LEVEL(txq->entries)) {
+ if (level < block_level) {
mb();
txq->blocked = 0;
}
@@ -314,10 +346,12 @@ static int sfxge_tx_queue_mbuf(struct sfxge_txq *txq, struct mbuf *mbuf)
bus_dma_segment_t dma_seg[SFXGE_TX_MAPPING_MAX_SEG];
unsigned int id;
struct sfxge_tx_mapping *stmp;
- efx_buffer_t *desc;
+ efx_desc_t *desc;
int n_dma_seg;
int rc;
int i;
+ int eop;
+ int vlan_tagged;
KASSERT(!txq->blocked, ("txq->blocked"));
@@ -354,35 +388,35 @@ static int sfxge_tx_queue_mbuf(struct sfxge_txq *txq, struct mbuf *mbuf)
used_map = &stmp->map;
+ vlan_tagged = sfxge_tx_maybe_insert_tag(txq, mbuf);
+ if (vlan_tagged) {
+ sfxge_next_stmp(txq, &stmp);
+ }
if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) {
- rc = sfxge_tx_queue_tso(txq, mbuf, dma_seg, n_dma_seg);
+ rc = sfxge_tx_queue_tso(txq, mbuf, dma_seg, n_dma_seg, vlan_tagged);
if (rc < 0)
goto reject_mapped;
- stmp = &txq->stmp[rc];
+ stmp = &txq->stmp[(rc - 1) & txq->ptr_mask];
} else {
/* Add the mapping to the fragment list, and set flags
* for the buffer.
*/
+
i = 0;
for (;;) {
- desc = &txq->pend_desc[i];
- desc->eb_addr = dma_seg[i].ds_addr;
- desc->eb_size = dma_seg[i].ds_len;
- if (i == n_dma_seg - 1) {
- desc->eb_eop = 1;
+ desc = &txq->pend_desc[i + vlan_tagged];
+ eop = (i == n_dma_seg - 1);
+ efx_tx_qdesc_dma_create(txq->common,
+ dma_seg[i].ds_addr,
+ dma_seg[i].ds_len,
+ eop,
+ desc);
+ if (eop)
break;
- }
- desc->eb_eop = 0;
i++;
-
- stmp->flags = 0;
- if (__predict_false(stmp ==
- &txq->stmp[txq->ptr_mask]))
- stmp = &txq->stmp[0];
- else
- stmp++;
+ sfxge_next_stmp(txq, &stmp);
}
- txq->n_pend_desc = n_dma_seg;
+ txq->n_pend_desc = n_dma_seg + vlan_tagged;
}
/*
@@ -469,7 +503,7 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq)
/* Push the fragments to the hardware in batches. */
if (txq->added - pushed >= SFXGE_TX_BATCH) {
- efx_tx_qpush(txq->common, txq->added);
+ efx_tx_qpush(txq->common, txq->added, pushed);
pushed = txq->added;
}
}
@@ -489,14 +523,13 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq)
}
if (txq->added != pushed)
- efx_tx_qpush(txq->common, txq->added);
+ efx_tx_qpush(txq->common, txq->added, pushed);
KASSERT(txq->blocked || stdp->std_get_count == 0,
("queue unblocked but count is non-zero"));
}
-#define SFXGE_TX_QDPL_PENDING(_txq) \
- ((_txq)->dpl.std_put != 0)
+#define SFXGE_TX_QDPL_PENDING(_txq) ((_txq)->dpl.std_put != 0)
/*
* Service the deferred packet list.
@@ -599,7 +632,7 @@ sfxge_tx_qdpl_put_unlocked(struct sfxge_txq *txq, struct mbuf *mbuf)
* Called from if_transmit - will try to grab the txq lock and enqueue to the
* put list if it succeeds, otherwise try to push onto the defer list if space.
*/
-int
+static int
sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m)
{
int rc;
@@ -746,6 +779,10 @@ struct sfxge_tso_state {
ssize_t tcph_off; /* Offset of TCP header */
unsigned header_len; /* Number of bytes of header */
unsigned seg_size; /* TCP segment size */
+ int fw_assisted; /* Use FW-assisted TSO */
+ u_short packet_id; /* IPv4 packet ID from the original packet */
+ efx_desc_t header_desc; /* Precomputed header descriptor for
+ * FW-assisted TSO */
};
static const struct ip *tso_iph(const struct sfxge_tso_state *tso)
@@ -816,12 +853,16 @@ static void tso_fini(struct sfxge_txq *txq)
}
}
-static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf)
+static void tso_start(struct sfxge_txq *txq, struct sfxge_tso_state *tso,
+ const bus_dma_segment_t *hdr_dma_seg,
+ struct mbuf *mbuf)
{
struct ether_header *eh = mtod(mbuf, struct ether_header *);
+ const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->sc->enp);
const struct tcphdr *th;
struct tcphdr th_copy;
+ tso->fw_assisted = txq->sc->tso_fw_assisted;
tso->mbuf = mbuf;
/* Find network protocol and header */
@@ -840,12 +881,19 @@ static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf)
KASSERT(tso_iph(tso)->ip_p == IPPROTO_TCP,
("TSO required on non-TCP packet"));
tso->tcph_off = tso->nh_off + 4 * tso_iph(tso)->ip_hl;
+ tso->packet_id = tso_iph(tso)->ip_id;
} else {
KASSERT(tso->protocol == htons(ETHERTYPE_IPV6),
("TSO required on non-IP packet"));
KASSERT(tso_ip6h(tso)->ip6_nxt == IPPROTO_TCP,
("TSO required on non-TCP packet"));
tso->tcph_off = tso->nh_off + sizeof(struct ip6_hdr);
+ tso->packet_id = 0;
+ }
+ if (tso->fw_assisted &&
+ __predict_false(tso->tcph_off >
+ encp->enc_tx_tso_tcp_header_offset_limit)) {
+ tso->fw_assisted = 0;
}
KASSERT(mbuf->m_len >= tso->tcph_off,
@@ -875,6 +923,17 @@ static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf)
th->th_flags & (TH_URG | TH_SYN)));
tso->out_len = mbuf->m_pkthdr.len - tso->header_len;
+
+ if (tso->fw_assisted) {
+ if (hdr_dma_seg->ds_len >= tso->header_len)
+ efx_tx_qdesc_dma_create(txq->common,
+ hdr_dma_seg->ds_addr,
+ tso->header_len,
+ B_FALSE,
+ &tso->header_desc);
+ else
+ tso->fw_assisted = 0;
+ }
}
/*
@@ -887,7 +946,7 @@ static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf)
static void tso_fill_packet_with_fragment(struct sfxge_txq *txq,
struct sfxge_tso_state *tso)
{
- efx_buffer_t *desc;
+ efx_desc_t *desc;
int n;
if (tso->in_len == 0 || tso->packet_space == 0)
@@ -903,9 +962,11 @@ static void tso_fill_packet_with_fragment(struct sfxge_txq *txq,
tso->in_len -= n;
desc = &txq->pend_desc[txq->n_pend_desc++];
- desc->eb_addr = tso->dma_addr;
- desc->eb_size = n;
- desc->eb_eop = tso->out_len == 0 || tso->packet_space == 0;
+ efx_tx_qdesc_dma_create(txq->common,
+ tso->dma_addr,
+ n,
+ tso->out_len == 0 || tso->packet_space == 0,
+ desc);
tso->dma_addr += n;
}
@@ -928,107 +989,139 @@ static void tso_map_long_header(void *dma_addr_ret,
*/
static int tso_start_new_packet(struct sfxge_txq *txq,
struct sfxge_tso_state *tso,
- unsigned int id)
+ unsigned int *idp)
{
- struct sfxge_tx_mapping *stmp = &txq->stmp[id];
+ unsigned int id = *idp;
struct tcphdr *tsoh_th;
unsigned ip_length;
caddr_t header;
uint64_t dma_addr;
bus_dmamap_t map;
- efx_buffer_t *desc;
+ efx_desc_t *desc;
int rc;
- /* Allocate a DMA-mapped header buffer. */
- if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) {
- unsigned int page_index = (id / 2) / TSOH_PER_PAGE;
- unsigned int buf_index = (id / 2) % TSOH_PER_PAGE;
+ if (tso->fw_assisted) {
+ uint8_t tcp_flags = tso_tcph(tso)->th_flags;
+
+ if (tso->out_len > tso->seg_size)
+ tcp_flags &= ~(TH_FIN | TH_PUSH);
+
+ /* TSO option descriptor */
+ desc = &txq->pend_desc[txq->n_pend_desc++];
+ efx_tx_qdesc_tso_create(txq->common,
+ tso->packet_id,
+ tso->seqnum,
+ tcp_flags,
+ desc++);
+ KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
+ id = (id + 1) & txq->ptr_mask;
- header = (txq->tsoh_buffer[page_index].esm_base +
- buf_index * TSOH_STD_SIZE);
- dma_addr = (txq->tsoh_buffer[page_index].esm_addr +
- buf_index * TSOH_STD_SIZE);
- map = txq->tsoh_buffer[page_index].esm_map;
+ /* Header DMA descriptor */
+ *desc = tso->header_desc;
+ txq->n_pend_desc++;
+ KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
+ id = (id + 1) & txq->ptr_mask;
- stmp->flags = 0;
+ tso->seqnum += tso->seg_size;
} else {
- /* We cannot use bus_dmamem_alloc() as that may sleep */
- header = malloc(tso->header_len, M_SFXGE, M_NOWAIT);
- if (__predict_false(!header))
- return (ENOMEM);
- rc = bus_dmamap_load(txq->packet_dma_tag, stmp->map,
- header, tso->header_len,
- tso_map_long_header, &dma_addr,
- BUS_DMA_NOWAIT);
- if (__predict_false(dma_addr == 0)) {
- if (rc == 0) {
- /* Succeeded but got >1 segment */
- bus_dmamap_unload(txq->packet_dma_tag,
- stmp->map);
- rc = EINVAL;
+ /* Allocate a DMA-mapped header buffer. */
+ if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) {
+ unsigned int page_index = (id / 2) / TSOH_PER_PAGE;
+ unsigned int buf_index = (id / 2) % TSOH_PER_PAGE;
+
+ header = (txq->tsoh_buffer[page_index].esm_base +
+ buf_index * TSOH_STD_SIZE);
+ dma_addr = (txq->tsoh_buffer[page_index].esm_addr +
+ buf_index * TSOH_STD_SIZE);
+ map = txq->tsoh_buffer[page_index].esm_map;
+
+ KASSERT(txq->stmp[id].flags == 0,
+ ("stmp flags are not 0"));
+ } else {
+ struct sfxge_tx_mapping *stmp = &txq->stmp[id];
+
+ /* We cannot use bus_dmamem_alloc() as that may sleep */
+ header = malloc(tso->header_len, M_SFXGE, M_NOWAIT);
+ if (__predict_false(!header))
+ return (ENOMEM);
+ rc = bus_dmamap_load(txq->packet_dma_tag, stmp->map,
+ header, tso->header_len,
+ tso_map_long_header, &dma_addr,
+ BUS_DMA_NOWAIT);
+ if (__predict_false(dma_addr == 0)) {
+ if (rc == 0) {
+ /* Succeeded but got >1 segment */
+ bus_dmamap_unload(txq->packet_dma_tag,
+ stmp->map);
+ rc = EINVAL;
+ }
+ free(header, M_SFXGE);
+ return (rc);
}
- free(header, M_SFXGE);
- return (rc);
- }
- map = stmp->map;
+ map = stmp->map;
- txq->tso_long_headers++;
- stmp->u.heap_buf = header;
- stmp->flags = TX_BUF_UNMAP;
- }
+ txq->tso_long_headers++;
+ stmp->u.heap_buf = header;
+ stmp->flags = TX_BUF_UNMAP;
+ }
- tsoh_th = (struct tcphdr *)(header + tso->tcph_off);
+ tsoh_th = (struct tcphdr *)(header + tso->tcph_off);
- /* Copy and update the headers. */
- m_copydata(tso->mbuf, 0, tso->header_len, header);
+ /* Copy and update the headers. */
+ m_copydata(tso->mbuf, 0, tso->header_len, header);
- tsoh_th->th_seq = htonl(tso->seqnum);
- tso->seqnum += tso->seg_size;
- if (tso->out_len > tso->seg_size) {
- /* This packet will not finish the TSO burst. */
- ip_length = tso->header_len - tso->nh_off + tso->seg_size;
- tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH);
- } else {
- /* This packet will be the last in the TSO burst. */
- ip_length = tso->header_len - tso->nh_off + tso->out_len;
- }
+ tsoh_th->th_seq = htonl(tso->seqnum);
+ tso->seqnum += tso->seg_size;
+ if (tso->out_len > tso->seg_size) {
+ /* This packet will not finish the TSO burst. */
+ ip_length = tso->header_len - tso->nh_off + tso->seg_size;
+ tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH);
+ } else {
+ /* This packet will be the last in the TSO burst. */
+ ip_length = tso->header_len - tso->nh_off + tso->out_len;
+ }
- if (tso->protocol == htons(ETHERTYPE_IP)) {
- struct ip *tsoh_iph = (struct ip *)(header + tso->nh_off);
- tsoh_iph->ip_len = htons(ip_length);
- /* XXX We should increment ip_id, but FreeBSD doesn't
- * currently allocate extra IDs for multiple segments.
- */
- } else {
- struct ip6_hdr *tsoh_iph =
- (struct ip6_hdr *)(header + tso->nh_off);
- tsoh_iph->ip6_plen = htons(ip_length - sizeof(*tsoh_iph));
- }
+ if (tso->protocol == htons(ETHERTYPE_IP)) {
+ struct ip *tsoh_iph = (struct ip *)(header + tso->nh_off);
+ tsoh_iph->ip_len = htons(ip_length);
+ /* XXX We should increment ip_id, but FreeBSD doesn't
+ * currently allocate extra IDs for multiple segments.
+ */
+ } else {
+ struct ip6_hdr *tsoh_iph =
+ (struct ip6_hdr *)(header + tso->nh_off);
+ tsoh_iph->ip6_plen = htons(ip_length - sizeof(*tsoh_iph));
+ }
- /* Make the header visible to the hardware. */
- bus_dmamap_sync(txq->packet_dma_tag, map, BUS_DMASYNC_PREWRITE);
+ /* Make the header visible to the hardware. */
+ bus_dmamap_sync(txq->packet_dma_tag, map, BUS_DMASYNC_PREWRITE);
+ /* Form a descriptor for this header. */
+ desc = &txq->pend_desc[txq->n_pend_desc++];
+ efx_tx_qdesc_dma_create(txq->common,
+ dma_addr,
+ tso->header_len,
+ 0,
+ desc);
+ id = (id + 1) & txq->ptr_mask;
+ }
tso->packet_space = tso->seg_size;
txq->tso_packets++;
-
- /* Form a descriptor for this header. */
- desc = &txq->pend_desc[txq->n_pend_desc++];
- desc->eb_addr = dma_addr;
- desc->eb_size = tso->header_len;
- desc->eb_eop = 0;
+ *idp = id;
return (0);
}
static int
sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
- const bus_dma_segment_t *dma_seg, int n_dma_seg)
+ const bus_dma_segment_t *dma_seg, int n_dma_seg,
+ int vlan_tagged)
{
struct sfxge_tso_state tso;
- unsigned int id, next_id;
+ unsigned int id;
unsigned skipped = 0;
- tso_start(&tso, mbuf);
+ tso_start(txq, &tso, dma_seg, mbuf);
while (dma_seg->ds_len + skipped <= tso.header_len) {
skipped += dma_seg->ds_len;
@@ -1039,13 +1132,15 @@ sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
tso.in_len = dma_seg->ds_len - (tso.header_len - skipped);
tso.dma_addr = dma_seg->ds_addr + (tso.header_len - skipped);
- id = txq->added & txq->ptr_mask;
- if (__predict_false(tso_start_new_packet(txq, &tso, id)))
+ id = (txq->added + vlan_tagged) & txq->ptr_mask;
+ if (__predict_false(tso_start_new_packet(txq, &tso, &id)))
return (-1);
while (1) {
- id = (id + 1) & txq->ptr_mask;
tso_fill_packet_with_fragment(txq, &tso);
+ /* Exactly one DMA descriptor is added */
+ KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
+ id = (id + 1) & txq->ptr_mask;
/* Move onto the next fragment? */
if (tso.in_len == 0) {
@@ -1064,18 +1159,17 @@ sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
* the remainder of the input mbuf but do not
* roll back the work we have done.
*/
- if (txq->n_pend_desc + 1 /* header */ + n_dma_seg >
- SFXGE_TSO_MAX_DESC) {
+ if (txq->n_pend_desc + tso.fw_assisted +
+ 1 /* header */ + n_dma_seg >
+ txq->max_pkt_desc) {
txq->tso_pdrop_too_many++;
break;
}
- next_id = (id + 1) & txq->ptr_mask;
if (__predict_false(tso_start_new_packet(txq, &tso,
- next_id))) {
+ &id))) {
txq->tso_pdrop_no_rsrc++;
break;
}
- id = next_id;
}
}
@@ -1128,38 +1222,52 @@ sfxge_tx_qstop(struct sfxge_softc *sc, unsigned int index)
struct sfxge_evq *evq;
unsigned int count;
+ SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
+
txq = sc->txq[index];
evq = sc->evq[txq->evq_index];
+ SFXGE_EVQ_LOCK(evq);
SFXGE_TXQ_LOCK(txq);
KASSERT(txq->init_state == SFXGE_TXQ_STARTED,
("txq->init_state != SFXGE_TXQ_STARTED"));
txq->init_state = SFXGE_TXQ_INITIALIZED;
- txq->flush_state = SFXGE_FLUSH_PENDING;
-
- /* Flush the transmit queue. */
- efx_tx_qflush(txq->common);
-
- SFXGE_TXQ_UNLOCK(txq);
- count = 0;
- do {
- /* Spin for 100ms. */
- DELAY(100000);
+ if (txq->flush_state != SFXGE_FLUSH_DONE) {
+ txq->flush_state = SFXGE_FLUSH_PENDING;
- if (txq->flush_state != SFXGE_FLUSH_PENDING)
- break;
- } while (++count < 20);
+ SFXGE_EVQ_UNLOCK(evq);
+ SFXGE_TXQ_UNLOCK(txq);
- SFXGE_EVQ_LOCK(evq);
- SFXGE_TXQ_LOCK(txq);
+ /* Flush the transmit queue. */
+ if (efx_tx_qflush(txq->common) != 0) {
+ log(LOG_ERR, "%s: Flushing Tx queue %u failed\n",
+ device_get_nameunit(sc->dev), index);
+ txq->flush_state = SFXGE_FLUSH_DONE;
+ } else {
+ count = 0;
+ do {
+ /* Spin for 100ms. */
+ DELAY(100000);
+ if (txq->flush_state != SFXGE_FLUSH_PENDING)
+ break;
+ } while (++count < 20);
+ }
+ SFXGE_EVQ_LOCK(evq);
+ SFXGE_TXQ_LOCK(txq);
- KASSERT(txq->flush_state != SFXGE_FLUSH_FAILED,
- ("txq->flush_state == SFXGE_FLUSH_FAILED"));
+ KASSERT(txq->flush_state != SFXGE_FLUSH_FAILED,
+ ("txq->flush_state == SFXGE_FLUSH_FAILED"));
- txq->flush_state = SFXGE_FLUSH_DONE;
+ if (txq->flush_state != SFXGE_FLUSH_DONE) {
+ /* Flush timeout */
+ log(LOG_ERR, "%s: Cannot flush Tx queue %u\n",
+ device_get_nameunit(sc->dev), index);
+ txq->flush_state = SFXGE_FLUSH_DONE;
+ }
+ }
txq->blocked = 0;
txq->pending = txq->added;
@@ -1195,8 +1303,11 @@ sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index)
efsys_mem_t *esmp;
uint16_t flags;
struct sfxge_evq *evq;
+ unsigned int desc_index;
int rc;
+ SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
+
txq = sc->txq[index];
esmp = &txq->mem;
evq = sc->evq[txq->evq_index];
@@ -1231,15 +1342,19 @@ sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index)
/* Create the common code transmit queue. */
if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp,
sc->txq_entries, txq->buf_base_id, flags, evq->common,
- &txq->common)) != 0)
+ &txq->common, &desc_index)) != 0)
goto fail;
+ /* Initialise queue descriptor indexes */
+ txq->added = txq->pending = txq->completed = txq->reaped = desc_index;
+
SFXGE_TXQ_LOCK(txq);
/* Enable the transmit queue. */
efx_tx_qenable(txq->common);
txq->init_state = SFXGE_TXQ_STARTED;
+ txq->flush_state = SFXGE_FLUSH_REQUIRED;
SFXGE_TXQ_UNLOCK(txq);
@@ -1348,9 +1463,41 @@ sfxge_tx_qfini(struct sfxge_softc *sc, unsigned int index)
free(txq, M_SFXGE);
}
+/*
+ * Estimate maximum number of Tx descriptors required for TSO packet.
+ * With minimum MSS and maximum mbuf length we might need more (even
+ * than a ring-ful of descriptors), but this should not happen in
+ * practice except due to deliberate attack. In that case we will
+ * truncate the output at a packet boundary.
+ */
+static unsigned int
+sfxge_tx_max_pkt_desc(const struct sfxge_softc *sc, enum sfxge_txq_type type)
+{
+ /* One descriptor for every input fragment */
+ unsigned int max_descs = SFXGE_TX_MAPPING_MAX_SEG;
+
+ /* VLAN tagging Tx option descriptor may be required */
+ if (efx_nic_cfg_get(sc->enp)->enc_hw_tx_insert_vlan_enabled)
+ max_descs++;
+
+ if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) {
+ /*
+ * Plus header and payload descriptor for each output segment.
+ * Minus one since header fragment is already counted.
+ */
+ max_descs += SFXGE_TSO_MAX_SEGS * 2 - 1;
+
+ /* FW assisted TSO requires one more descriptor per segment */
+ if (sc->tso_fw_assisted)
+ max_descs += SFXGE_TSO_MAX_SEGS;
+ }
+
+ return (max_descs);
+}
+
static int
sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index,
- enum sfxge_txq_type type, unsigned int evq_index)
+ enum sfxge_txq_type type, unsigned int evq_index)
{
char name[16];
struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
@@ -1392,7 +1539,7 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index,
}
/* Allocate pending descriptor array for batching writes. */
- txq->pend_desc = malloc(sizeof(efx_buffer_t) * sc->txq_entries,
+ txq->pend_desc = malloc(sizeof(efx_desc_t) * sc->txq_entries,
M_SFXGE, M_ZERO | M_WAITOK);
/* Allocate and initialise mbuf DMA mapping array. */
@@ -1475,6 +1622,9 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index,
txq->evq_index = evq_index;
txq->txq_index = txq_index;
txq->init_state = SFXGE_TXQ_INITIALIZED;
+ txq->hw_vlan_tci = 0;
+
+ txq->max_pkt_desc = sfxge_tx_max_pkt_desc(sc, type);
return (0);
@@ -1572,6 +1722,7 @@ sfxge_tx_fini(struct sfxge_softc *sc)
int
sfxge_tx_init(struct sfxge_softc *sc)
{
+ const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
struct sfxge_intr *intr;
int index;
int rc;
@@ -1583,6 +1734,12 @@ sfxge_tx_init(struct sfxge_softc *sc)
sc->txq_count = SFXGE_TXQ_NTYPES - 1 + sc->intr.n_alloc;
+ sc->tso_fw_assisted = sfxge_tso_fw_assisted;
+ if (sc->tso_fw_assisted)
+ sc->tso_fw_assisted =
+ (encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO) &&
+ (encp->enc_fw_assisted_tso_enabled);
+
sc->txqs_node = SYSCTL_ADD_NODE(
device_get_sysctl_ctx(sc->dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),