aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNavdeep Parhar <np@FreeBSD.org>2020-09-18 03:01:47 +0000
committerNavdeep Parhar <np@FreeBSD.org>2020-09-18 03:01:47 +0000
commita4a4ad2dd9856f9f5a70c1774df765f83b4d7525 (patch)
tree222214c7eec82bf3bf2985030190bf273ea6e93b
parentb092fd6c973dab0ffaca8144f435c91a2588aa21 (diff)
downloadsrc-a4a4ad2dd9856f9f5a70c1774df765f83b4d7525.tar.gz
src-a4a4ad2dd9856f9f5a70c1774df765f83b4d7525.zip
cxgbe(4): add support for stateless offloads for VXLAN traffic.
Hardware assistance includes checksumming (tx and rx), TSO, and RSS on the inner traffic in a VXLAN tunnel. Relnotes: Yes Sponsored by: Chelsio Communications
Notes
Notes: svn path=/head/; revision=365871
-rw-r--r--share/man/man4/cxgbe.46
-rw-r--r--sys/dev/cxgbe/adapter.h10
-rw-r--r--sys/dev/cxgbe/common/common.h2
-rw-r--r--sys/dev/cxgbe/common/t4_hw.c14
-rw-r--r--sys/dev/cxgbe/firmware/t6fw_cfg.txt5
-rw-r--r--sys/dev/cxgbe/t4_main.c200
-rw-r--r--sys/dev/cxgbe/t4_sge.c626
7 files changed, 681 insertions, 182 deletions
diff --git a/share/man/man4/cxgbe.4 b/share/man/man4/cxgbe.4
index 14af7f9561ba..a3bb9690dec7 100644
--- a/share/man/man4/cxgbe.4
+++ b/share/man/man4/cxgbe.4
@@ -31,7 +31,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd Dec 10, 2019
+.Dd September 17, 2020
.Dt CXGBE 4
.Os
.Sh NAME
@@ -61,8 +61,8 @@ driver provides support for PCI Express Ethernet adapters based on
the Chelsio Terminator 4, Terminator 5, and Terminator 6 ASICs (T4, T5, and T6).
The driver supports Jumbo Frames, Transmit/Receive checksum offload,
TCP segmentation offload (TSO), Large Receive Offload (LRO), VLAN
-tag insertion/extraction, VLAN checksum offload, VLAN TSO, and
-Receive Side Steering (RSS).
+tag insertion/extraction, VLAN checksum offload, VLAN TSO, VXLAN checksum
+offload, VXLAN TSO, and Receive Side Steering (RSS).
For further hardware information and questions related to hardware
requirements, see
.Pa http://www.chelsio.com/ .
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 3e7b736ede29..1e70d7b674db 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -119,6 +119,7 @@ enum {
TX_SGL_SEGS = 39,
TX_SGL_SEGS_TSO = 38,
TX_SGL_SEGS_EO_TSO = 30, /* XXX: lower for IPv6. */
+ TX_SGL_SEGS_VXLAN_TSO = 37,
TX_WR_FLITS = SGE_MAX_WR_LEN / 8
};
@@ -286,6 +287,7 @@ struct port_info {
int nvi;
int up_vis;
int uld_vis;
+ bool vxlan_tcam_entry;
struct tx_sched_params *sched_params;
@@ -593,6 +595,8 @@ struct sge_txq {
uint64_t txpkts0_pkts; /* # of frames in type0 coalesced tx WRs */
uint64_t txpkts1_pkts; /* # of frames in type1 coalesced tx WRs */
uint64_t raw_wrs; /* # of raw work requests (alloc_wr_mbuf) */
+ uint64_t vxlan_tso_wrs; /* # of VXLAN TSO work requests */
+ uint64_t vxlan_txcsum;
uint64_t kern_tls_records;
uint64_t kern_tls_short;
@@ -625,6 +629,7 @@ struct sge_rxq {
uint64_t rxcsum; /* # of times hardware assisted with checksum */
uint64_t vlan_extraction;/* # of times VLAN tag was extracted */
+ uint64_t vxlan_rxcsum;
/* stats for not-that-common events */
@@ -848,6 +853,11 @@ struct adapter {
int lro_timeout;
int sc_do_rxcopy;
+ int vxlan_port;
+ u_int vxlan_refcount;
+ int rawf_base;
+ int nrawf;
+
struct taskqueue *tq[MAX_NCHAN]; /* General purpose taskqueues */
struct task async_event_task;
struct port_info *port[MAX_NPORTS];
diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h
index d8961fa7db99..820e4e10daff 100644
--- a/sys/dev/cxgbe/common/common.h
+++ b/sys/dev/cxgbe/common/common.h
@@ -249,7 +249,7 @@ struct tp_params {
uint32_t max_rx_pdu;
uint32_t max_tx_pdu;
uint64_t hash_filter_mask;
- __be16 err_vec_mask;
+ bool rx_pkt_encap;
int8_t fcoe_shift;
int8_t port_shift;
diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c
index 004339ff3dae..79dd3ecdcbdd 100644
--- a/sys/dev/cxgbe/common/t4_hw.c
+++ b/sys/dev/cxgbe/common/t4_hw.c
@@ -9647,19 +9647,11 @@ int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
read_filter_mode_and_ingress_config(adap, sleep_ok);
- /*
- * Cache a mask of the bits that represent the error vector portion of
- * rx_pkt.err_vec. T6+ can use a compressed error vector to make room
- * for information about outer encapsulation (GENEVE/VXLAN/NVGRE).
- */
- tpp->err_vec_mask = htobe16(0xffff);
if (chip_id(adap) > CHELSIO_T5) {
v = t4_read_reg(adap, A_TP_OUT_CONFIG);
- if (v & F_CRXPKTENC) {
- tpp->err_vec_mask =
- htobe16(V_T6_COMPR_RXERR_VEC(M_T6_COMPR_RXERR_VEC));
- }
- }
+ tpp->rx_pkt_encap = v & F_CRXPKTENC;
+ } else
+ tpp->rx_pkt_encap = false;
rx_len = t4_read_reg(adap, A_TP_PMM_RX_PAGE_SIZE);
tx_len = t4_read_reg(adap, A_TP_PMM_TX_PAGE_SIZE);
diff --git a/sys/dev/cxgbe/firmware/t6fw_cfg.txt b/sys/dev/cxgbe/firmware/t6fw_cfg.txt
index 0f15a1a59ccb..6e5649642b29 100644
--- a/sys/dev/cxgbe/firmware/t6fw_cfg.txt
+++ b/sys/dev/cxgbe/firmware/t6fw_cfg.txt
@@ -146,7 +146,8 @@
nethctrl = 1024
neq = 2048
nqpcq = 8192
- nexactf = 456
+ nexactf = 454
+ nrawf = 2
cmask = all
pmask = all
ncrypto_lookaside = 16
@@ -272,7 +273,7 @@
[fini]
version = 0x1
- checksum = 0x13640470
+ checksum = 0xa92352a8
#
# $FreeBSD$
#
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 87858639dbe6..042eadfbe2ab 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/kernel.h>
#include <sys/bus.h>
+#include <sys/eventhandler.h>
#include <sys/module.h>
#include <sys/malloc.h>
#include <sys/queue.h>
@@ -1069,6 +1070,8 @@ t4_attach(device_t dev)
TASK_INIT(&sc->async_event_task, 0, t4_async_event, sc);
#endif
+ refcount_init(&sc->vxlan_refcount, 0);
+
rc = t4_map_bars_0_and_4(sc);
if (rc != 0)
goto done; /* error message displayed already */
@@ -1716,6 +1719,7 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
struct ifnet *ifp;
struct sbuf *sb;
struct pfil_head_args pa;
+ struct adapter *sc = vi->adapter;
vi->xact_addr_filt = -1;
callout_init(&vi->tick, 1);
@@ -1749,28 +1753,36 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
ifp->if_capabilities = T4_CAP;
ifp->if_capenable = T4_CAP_ENABLE;
+ ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
+ CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
+ if (chip_id(sc) >= CHELSIO_T6) {
+ ifp->if_capabilities |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
+ ifp->if_capenable |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
+ ifp->if_hwassist |= CSUM_INNER_IP6_UDP | CSUM_INNER_IP6_TCP |
+ CSUM_INNER_IP6_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP |
+ CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_ENCAP_VXLAN;
+ }
+
#ifdef TCP_OFFLOAD
- if (vi->nofldrxq != 0 && (vi->adapter->flags & KERN_TLS_OK) == 0)
+ if (vi->nofldrxq != 0 && (sc->flags & KERN_TLS_OK) == 0)
ifp->if_capabilities |= IFCAP_TOE;
#endif
#ifdef RATELIMIT
- if (is_ethoffload(vi->adapter) && vi->nofldtxq != 0) {
+ if (is_ethoffload(sc) && vi->nofldtxq != 0) {
ifp->if_capabilities |= IFCAP_TXRTLMT;
ifp->if_capenable |= IFCAP_TXRTLMT;
}
#endif
- ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
- CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
ifp->if_hw_tsomax = IP_MAXPACKET;
ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
#ifdef RATELIMIT
- if (is_ethoffload(vi->adapter) && vi->nofldtxq != 0)
+ if (is_ethoffload(sc) && vi->nofldtxq != 0)
ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
#endif
ifp->if_hw_tsomaxsegsize = 65536;
#ifdef KERN_TLS
- if (vi->adapter->flags & KERN_TLS_OK) {
+ if (sc->flags & KERN_TLS_OK) {
ifp->if_capabilities |= IFCAP_TXTLS;
ifp->if_capenable |= IFCAP_TXTLS;
}
@@ -2100,6 +2112,17 @@ cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
if (mask & IFCAP_TXTLS)
ifp->if_capenable ^= (mask & IFCAP_TXTLS);
#endif
+ if (mask & IFCAP_VXLAN_HWCSUM) {
+ ifp->if_capenable ^= IFCAP_VXLAN_HWCSUM;
+ ifp->if_hwassist ^= CSUM_INNER_IP6_UDP |
+ CSUM_INNER_IP6_TCP | CSUM_INNER_IP |
+ CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP;
+ }
+ if (mask & IFCAP_VXLAN_HWTSO) {
+ ifp->if_capenable ^= IFCAP_VXLAN_HWTSO;
+ ifp->if_hwassist ^= CSUM_INNER_IP6_TSO |
+ CSUM_INNER_IP_TSO;
+ }
#ifdef VLAN_CAPABILITIES
VLAN_CAPABILITIES(ifp);
@@ -4411,6 +4434,19 @@ get_params__post_init(struct adapter *sc)
MPASS(sc->tids.hpftid_base == 0);
MPASS(sc->tids.tid_base == sc->tids.nhpftids);
}
+
+ param[0] = FW_PARAM_PFVF(RAWF_START);
+ param[1] = FW_PARAM_PFVF(RAWF_END);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to query rawf parameters: %d.\n", rc);
+ return (rc);
+ }
+ if ((int)val[1] > (int)val[0]) {
+ sc->rawf_base = val[0];
+ sc->nrawf = val[1] - val[0] + 1;
+ }
}
/*
@@ -5142,6 +5178,7 @@ update_mac_settings(struct ifnet *ifp, int flags)
struct port_info *pi = vi->pi;
struct adapter *sc = pi->adapter;
int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
+ uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
ASSERT_SYNCHRONIZED_OP(sc);
KASSERT(flags, ("%s: not told what to update.", __func__));
@@ -5215,7 +5252,7 @@ update_mac_settings(struct ifnet *ifp, int flags)
rc = -rc;
for (j = 0; j < ctx.i; j++) {
if_printf(ifp,
- "failed to add mc address"
+ "failed to add mcast address"
" %02x:%02x:%02x:"
"%02x:%02x:%02x rc=%d\n",
ctx.mcaddr[j][0], ctx.mcaddr[j][1],
@@ -5225,12 +5262,34 @@ update_mac_settings(struct ifnet *ifp, int flags)
}
return (rc);
}
+ ctx.del = 0;
} else
NET_EPOCH_EXIT(et);
rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, ctx.hash, 0);
if (rc != 0)
- if_printf(ifp, "failed to set mc address hash: %d", rc);
+ if_printf(ifp, "failed to set mcast address hash: %d\n",
+ rc);
+ if (ctx.del == 0) {
+ /* We clobbered the VXLAN entry if there was one. */
+ pi->vxlan_tcam_entry = false;
+ }
+ }
+
+ if (IS_MAIN_VI(vi) && sc->vxlan_refcount > 0 &&
+ pi->vxlan_tcam_entry == false) {
+ rc = t4_alloc_raw_mac_filt(sc, vi->viid, match_all_mac,
+ match_all_mac, sc->rawf_base + pi->port_id, 1, pi->port_id,
+ true);
+ if (rc < 0) {
+ rc = -rc;
+ if_printf(ifp, "failed to add VXLAN TCAM entry: %d.\n",
+ rc);
+ } else {
+ MPASS(rc == sc->rawf_base + pi->port_id);
+ rc = 0;
+ pi->vxlan_tcam_entry = true;
+ }
}
return (rc);
@@ -10407,6 +10466,7 @@ clear_stats(struct adapter *sc, u_int port_id)
#endif
rxq->rxcsum = 0;
rxq->vlan_extraction = 0;
+ rxq->vxlan_rxcsum = 0;
rxq->fl.cl_allocated = 0;
rxq->fl.cl_recycled = 0;
@@ -10425,6 +10485,8 @@ clear_stats(struct adapter *sc, u_int port_id)
txq->txpkts0_pkts = 0;
txq->txpkts1_pkts = 0;
txq->raw_wrs = 0;
+ txq->vxlan_tso_wrs = 0;
+ txq->vxlan_txcsum = 0;
txq->kern_tls_records = 0;
txq->kern_tls_short = 0;
txq->kern_tls_partial = 0;
@@ -11235,6 +11297,116 @@ DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
}
#endif
+static eventhandler_tag vxlan_start_evtag;
+static eventhandler_tag vxlan_stop_evtag;
+
+struct vxlan_evargs {
+ struct ifnet *ifp;
+ uint16_t port;
+};
+
+static void
+t4_vxlan_start(struct adapter *sc, void *arg)
+{
+ struct vxlan_evargs *v = arg;
+ struct port_info *pi;
+ uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
+ int i, rc;
+
+ if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
+ return;
+ if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxst") != 0)
+ return;
+
+ if (sc->vxlan_refcount == 0) {
+ sc->vxlan_port = v->port;
+ sc->vxlan_refcount = 1;
+ t4_write_reg(sc, A_MPS_RX_VXLAN_TYPE,
+ V_VXLAN(v->port) | F_VXLAN_EN);
+ for_each_port(sc, i) {
+ pi = sc->port[i];
+ if (pi->vxlan_tcam_entry == true)
+ continue;
+ rc = t4_alloc_raw_mac_filt(sc, pi->vi[0].viid,
+ match_all_mac, match_all_mac,
+ sc->rawf_base + pi->port_id, 1, pi->port_id, true);
+ if (rc < 0) {
+ rc = -rc;
+ log(LOG_ERR,
+ "%s: failed to add VXLAN TCAM entry: %d.\n",
+ device_get_name(pi->vi[0].dev), rc);
+ } else {
+ MPASS(rc == sc->rawf_base + pi->port_id);
+ rc = 0;
+ pi->vxlan_tcam_entry = true;
+ }
+ }
+ } else if (sc->vxlan_port == v->port) {
+ sc->vxlan_refcount++;
+ } else {
+ log(LOG_ERR, "%s: VXLAN already configured on port %d; "
+ "ignoring attempt to configure it on port %d\n",
+ device_get_nameunit(sc->dev), sc->vxlan_port, v->port);
+ }
+ end_synchronized_op(sc, 0);
+}
+
+static void
+t4_vxlan_stop(struct adapter *sc, void *arg)
+{
+ struct vxlan_evargs *v = arg;
+
+ if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
+ return;
+ if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxsp") != 0)
+ return;
+
+ /*
+ * VXLANs may have been configured before the driver was loaded so we
+ * may see more stops than starts. This is not handled cleanly but at
+ * least we keep the refcount sane.
+ */
+ if (sc->vxlan_port != v->port)
+ goto done;
+ if (sc->vxlan_refcount == 0) {
+ log(LOG_ERR,
+ "%s: VXLAN operation on port %d was stopped earlier; "
+ "ignoring attempt to stop it again.\n",
+ device_get_nameunit(sc->dev), sc->vxlan_port);
+ } else if (--sc->vxlan_refcount == 0) {
+ t4_set_reg_field(sc, A_MPS_RX_VXLAN_TYPE, F_VXLAN_EN, 0);
+ }
+done:
+ end_synchronized_op(sc, 0);
+}
+
+static void
+t4_vxlan_start_handler(void *arg __unused, struct ifnet *ifp,
+ sa_family_t family, u_int port)
+{
+ struct vxlan_evargs v;
+
+ MPASS(family == AF_INET || family == AF_INET6);
+ v.ifp = ifp;
+ v.port = port;
+
+ t4_iterate(t4_vxlan_start, &v);
+}
+
+static void
+t4_vxlan_stop_handler(void *arg __unused, struct ifnet *ifp, sa_family_t family,
+ u_int port)
+{
+ struct vxlan_evargs v;
+
+ MPASS(family == AF_INET || family == AF_INET6);
+ v.ifp = ifp;
+ v.port = port;
+
+ t4_iterate(t4_vxlan_stop, &v);
+}
+
+
static struct sx mlu; /* mod load unload */
SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
@@ -11278,6 +11450,14 @@ mod_event(module_t mod, int cmd, void *arg)
#endif
t4_tracer_modload();
tweak_tunables();
+ vxlan_start_evtag =
+ EVENTHANDLER_REGISTER(vxlan_start,
+ t4_vxlan_start_handler, NULL,
+ EVENTHANDLER_PRI_ANY);
+ vxlan_stop_evtag =
+ EVENTHANDLER_REGISTER(vxlan_stop,
+ t4_vxlan_stop_handler, NULL,
+ EVENTHANDLER_PRI_ANY);
}
sx_xunlock(&mlu);
break;
@@ -11314,6 +11494,10 @@ mod_event(module_t mod, int cmd, void *arg)
sx_sunlock(&t4_list_lock);
if (t4_sge_extfree_refs() == 0) {
+ EVENTHANDLER_DEREGISTER(vxlan_start,
+ vxlan_start_evtag);
+ EVENTHANDLER_DEREGISTER(vxlan_stop,
+ vxlan_stop_evtag);
t4_tracer_modunload();
#ifdef KERN_TLS
t6_ktls_modunload();
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index 5a938179b084..0b08fc33528d 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_vlan_var.h>
+#include <net/if_vxlan.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
@@ -266,8 +267,9 @@ static int find_refill_source(struct adapter *, int, bool);
static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
static inline void get_pkt_gl(struct mbuf *, struct sglist *);
-static inline u_int txpkt_len16(u_int, u_int);
-static inline u_int txpkt_vm_len16(u_int, u_int);
+static inline u_int txpkt_len16(u_int, const u_int);
+static inline u_int txpkt_vm_len16(u_int, const u_int);
+static inline void calculate_mbuf_len16(struct adapter *, struct mbuf *);
static inline u_int txpkts0_len16(u_int);
static inline u_int txpkts1_len16(void);
static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int);
@@ -1917,12 +1919,41 @@ eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d,
#if defined(INET) || defined(INET6)
struct lro_ctrl *lro = &rxq->lro;
#endif
+ uint16_t err_vec, tnl_type, tnlhdr_len;
static const int sw_hashtype[4][2] = {
{M_HASHTYPE_NONE, M_HASHTYPE_NONE},
{M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6},
{M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6},
{M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6},
};
+ static const int sw_csum_flags[2][2] = {
+ {
+ /* IP, inner IP */
+ CSUM_ENCAP_VXLAN |
+ CSUM_L3_CALC | CSUM_L3_VALID |
+ CSUM_L4_CALC | CSUM_L4_VALID |
+ CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID |
+ CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
+
+ /* IP, inner IP6 */
+ CSUM_ENCAP_VXLAN |
+ CSUM_L3_CALC | CSUM_L3_VALID |
+ CSUM_L4_CALC | CSUM_L4_VALID |
+ CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
+ },
+ {
+ /* IP6, inner IP */
+ CSUM_ENCAP_VXLAN |
+ CSUM_L4_CALC | CSUM_L4_VALID |
+ CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID |
+ CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
+
+ /* IP6, inner IP6 */
+ CSUM_ENCAP_VXLAN |
+ CSUM_L4_CALC | CSUM_L4_VALID |
+ CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
+ },
+ };
MPASS(plen > sc->params.sge.fl_pktshift);
if (vi->pfil != NULL && PFIL_HOOKED_IN(vi->pfil) &&
@@ -1963,23 +1994,73 @@ have_mbuf:
m0->m_pkthdr.flowid = be32toh(d->rss.hash_val);
cpl = (const void *)(&d->rss + 1);
- if (cpl->csum_calc && !(cpl->err_vec & sc->params.tp.err_vec_mask)) {
- if (ifp->if_capenable & IFCAP_RXCSUM &&
- cpl->l2info & htobe32(F_RXF_IP)) {
- m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
- CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
- rxq->rxcsum++;
- } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
- cpl->l2info & htobe32(F_RXF_IP6)) {
- m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
- CSUM_PSEUDO_HDR);
+ if (sc->params.tp.rx_pkt_encap) {
+ const uint16_t ev = be16toh(cpl->err_vec);
+
+ err_vec = G_T6_COMPR_RXERR_VEC(ev);
+ tnl_type = G_T6_RX_TNL_TYPE(ev);
+ tnlhdr_len = G_T6_RX_TNLHDR_LEN(ev);
+ } else {
+ err_vec = be16toh(cpl->err_vec);
+ tnl_type = 0;
+ tnlhdr_len = 0;
+ }
+ if (cpl->csum_calc && err_vec == 0) {
+ int ipv6 = !!(cpl->l2info & htobe32(F_RXF_IP6));
+
+ /* checksum(s) calculated and found to be correct. */
+
+ MPASS((cpl->l2info & htobe32(F_RXF_IP)) ^
+ (cpl->l2info & htobe32(F_RXF_IP6)));
+ m0->m_pkthdr.csum_data = be16toh(cpl->csum);
+ if (tnl_type == 0) {
+ if (!ipv6 && ifp->if_capenable & IFCAP_RXCSUM) {
+ m0->m_pkthdr.csum_flags = CSUM_L3_CALC |
+ CSUM_L3_VALID | CSUM_L4_CALC |
+ CSUM_L4_VALID;
+ } else if (ipv6 && ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
+ m0->m_pkthdr.csum_flags = CSUM_L4_CALC |
+ CSUM_L4_VALID;
+ }
rxq->rxcsum++;
- }
+ } else {
+ MPASS(tnl_type == RX_PKT_TNL_TYPE_VXLAN);
+ if (__predict_false(cpl->ip_frag)) {
+ /*
+ * csum_data is for the inner frame (which is an
+ * IP fragment) and is not 0xffff. There is no
+ * way to pass the inner csum_data to the stack.
+ * We don't want the stack to use the inner
+ * csum_data to validate the outer frame or it
+ * will get rejected. So we fix csum_data here
+ * and let sw do the checksum of inner IP
+ * fragments.
+ *
+ * XXX: Need 32b for csum_data2 in an rx mbuf.
+ * Maybe stuff it into rcv_tstmp?
+ */
+ m0->m_pkthdr.csum_data = 0xffff;
+ if (ipv6) {
+ m0->m_pkthdr.csum_flags = CSUM_L4_CALC |
+ CSUM_L4_VALID;
+ } else {
+ m0->m_pkthdr.csum_flags = CSUM_L3_CALC |
+ CSUM_L3_VALID | CSUM_L4_CALC |
+ CSUM_L4_VALID;
+ }
+ } else {
+ int outer_ipv6;
- if (__predict_false(cpl->ip_frag))
- m0->m_pkthdr.csum_data = be16toh(cpl->csum);
- else
- m0->m_pkthdr.csum_data = 0xffff;
+ MPASS(m0->m_pkthdr.csum_data == 0xffff);
+
+ outer_ipv6 = tnlhdr_len >=
+ sizeof(struct ether_header) +
+ sizeof(struct ip6_hdr);
+ m0->m_pkthdr.csum_flags =
+ sw_csum_flags[outer_ipv6][ipv6];
+ }
+ rxq->vxlan_rxcsum++;
+ }
}
if (cpl->vlan_ex) {
@@ -2007,7 +2088,7 @@ have_mbuf:
m0->m_pkthdr.numa_domain = ifp->if_numa_domain;
#endif
#if defined(INET) || defined(INET6)
- if (rxq->iq.flags & IQ_LRO_ENABLED &&
+ if (rxq->iq.flags & IQ_LRO_ENABLED && tnl_type == 0 &&
(M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV4 ||
M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV6)) {
if (sort_before_lro(lro)) {
@@ -2179,10 +2260,10 @@ mbuf_nsegs(struct mbuf *m)
{
M_ASSERTPKTHDR(m);
- KASSERT(m->m_pkthdr.l5hlen > 0,
+ KASSERT(m->m_pkthdr.inner_l5hlen > 0,
("%s: mbuf %p missing information on # of segments.", __func__, m));
- return (m->m_pkthdr.l5hlen);
+ return (m->m_pkthdr.inner_l5hlen);
}
static inline void
@@ -2190,7 +2271,7 @@ set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs)
{
M_ASSERTPKTHDR(m);
- m->m_pkthdr.l5hlen = nsegs;
+ m->m_pkthdr.inner_l5hlen = nsegs;
}
static inline int
@@ -2316,63 +2397,108 @@ alloc_wr_mbuf(int len, int how)
return (m);
}
-static inline int
+static inline bool
needs_hwcsum(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP |
+ CSUM_IP_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP |
+ CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_IP6_UDP |
+ CSUM_IP6_TCP | CSUM_IP6_TSO | CSUM_INNER_IP6_UDP |
+ CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_IP |
- CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6));
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
-static inline int
+static inline bool
needs_tso(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP_TSO | CSUM_IP6_TSO |
+ CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & CSUM_TSO);
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
-static inline int
+static inline bool
+needs_vxlan_csum(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+
+ return (m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN);
+}
+
+static inline bool
+needs_vxlan_tso(struct mbuf *m)
+{
+ const uint32_t csum_flags = CSUM_ENCAP_VXLAN | CSUM_INNER_IP_TSO |
+ CSUM_INNER_IP6_TSO;
+
+ M_ASSERTPKTHDR(m);
+
+ return ((m->m_pkthdr.csum_flags & csum_flags) != 0 &&
+ (m->m_pkthdr.csum_flags & csum_flags) != CSUM_ENCAP_VXLAN);
+}
+
+static inline bool
+needs_inner_tcp_csum(struct mbuf *m)
+{
+ const uint32_t csum_flags = CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO;
+
+ M_ASSERTPKTHDR(m);
+
+ return (m->m_pkthdr.csum_flags & csum_flags);
+}
+
+static inline bool
needs_l3_csum(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP | CSUM_IP_TSO | CSUM_INNER_IP |
+ CSUM_INNER_IP_TSO;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO));
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
-static inline int
-needs_tcp_csum(struct mbuf *m)
+static inline bool
+needs_outer_tcp_csum(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP_TCP | CSUM_IP_TSO | CSUM_IP6_TCP |
+ CSUM_IP6_TSO;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TCP_IPV6 | CSUM_TSO));
+
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
#ifdef RATELIMIT
-static inline int
-needs_l4_csum(struct mbuf *m)
+static inline bool
+needs_outer_l4_csum(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP_TSO |
+ CSUM_IP6_UDP | CSUM_IP6_TCP | CSUM_IP6_TSO;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
- CSUM_TCP_IPV6 | CSUM_TSO));
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
-static inline int
-needs_udp_csum(struct mbuf *m)
+static inline bool
+needs_outer_udp_csum(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP6_UDP;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6));
+
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
#endif
-static inline int
+static inline bool
needs_vlan_insertion(struct mbuf *m)
{
@@ -2513,6 +2639,23 @@ count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cflags)
}
/*
+ * The maximum number of segments that can fit in a WR.
+ */
+static int
+max_nsegs_allowed(struct mbuf *m)
+{
+
+ if (needs_tso(m)) {
+ if (needs_vxlan_tso(m))
+ return (TX_SGL_SEGS_VXLAN_TSO);
+ else
+ return (TX_SGL_SEGS_TSO);
+ }
+
+ return (TX_SGL_SEGS);
+}
+
+/*
* Analyze the mbuf to determine its tx needs. The mbuf passed in may change:
* a) caller can assume it's been freed if this function returns with an error.
* b) it may get defragged up if the gather list is too long for the hardware.
@@ -2570,7 +2713,7 @@ restart:
return (0);
}
#endif
- if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) {
+ if (nsegs > max_nsegs_allowed(m0)) {
if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) {
rc = EFBIG;
goto fail;
@@ -2592,18 +2735,15 @@ restart:
}
set_mbuf_nsegs(m0, nsegs);
set_mbuf_cflags(m0, cflags);
- if (sc->flags & IS_VF)
- set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0)));
- else
- set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0)));
+ calculate_mbuf_len16(sc, m0);
#ifdef RATELIMIT
/*
* Ethofld is limited to TCP and UDP for now, and only when L4 hw
- * checksumming is enabled. needs_l4_csum happens to check for all the
- * right things.
+ * checksumming is enabled. needs_outer_l4_csum happens to check for
+ * all the right things.
*/
- if (__predict_false(needs_eo(cst) && !needs_l4_csum(m0))) {
+ if (__predict_false(needs_eo(cst) && !needs_outer_l4_csum(m0))) {
m_snd_tag_rele(m0->m_pkthdr.snd_tag);
m0->m_pkthdr.snd_tag = NULL;
m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
@@ -2635,21 +2775,27 @@ restart:
switch (eh_type) {
#ifdef INET6
case ETHERTYPE_IPV6:
- {
- struct ip6_hdr *ip6 = l3hdr;
-
- MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP);
-
- m0->m_pkthdr.l3hlen = sizeof(*ip6);
+ m0->m_pkthdr.l3hlen = sizeof(struct ip6_hdr);
break;
- }
#endif
#ifdef INET
case ETHERTYPE_IP:
{
struct ip *ip = l3hdr;
- m0->m_pkthdr.l3hlen = ip->ip_hl * 4;
+ if (needs_vxlan_csum(m0)) {
+ /* Driver will do the outer IP hdr checksum. */
+ ip->ip_sum = 0;
+ if (needs_vxlan_tso(m0)) {
+ const uint16_t ipl = ip->ip_len;
+
+ ip->ip_len = 0;
+ ip->ip_sum = ~in_cksum_hdr(ip);
+ ip->ip_len = ipl;
+ } else
+ ip->ip_sum = in_cksum_hdr(ip);
+ }
+ m0->m_pkthdr.l3hlen = ip->ip_hl << 2;
break;
}
#endif
@@ -2659,8 +2805,59 @@ restart:
__func__, eh_type);
}
+ if (needs_vxlan_csum(m0)) {
+ m0->m_pkthdr.l4hlen = sizeof(struct udphdr);
+ m0->m_pkthdr.l5hlen = sizeof(struct vxlan_header);
+
+ /* Inner headers. */
+ eh = m_advance(&m, &offset, m0->m_pkthdr.l3hlen +
+ sizeof(struct udphdr) + sizeof(struct vxlan_header));
+ eh_type = ntohs(eh->ether_type);
+ if (eh_type == ETHERTYPE_VLAN) {
+ struct ether_vlan_header *evh = (void *)eh;
+
+ eh_type = ntohs(evh->evl_proto);
+ m0->m_pkthdr.inner_l2hlen = sizeof(*evh);
+ } else
+ m0->m_pkthdr.inner_l2hlen = sizeof(*eh);
+ l3hdr = m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen);
+
+ switch (eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ m0->m_pkthdr.inner_l3hlen = sizeof(struct ip6_hdr);
+ break;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct ip *ip = l3hdr;
+
+ m0->m_pkthdr.inner_l3hlen = ip->ip_hl << 2;
+ break;
+ }
+#endif
+ default:
+ panic("%s: VXLAN hw offload requested with unknown "
+ "ethertype 0x%04x. if_cxgbe must be compiled"
+ " with the same INET/INET6 options as the kernel.",
+ __func__, eh_type);
+ }
#if defined(INET) || defined(INET6)
- if (needs_tcp_csum(m0)) {
+ if (needs_inner_tcp_csum(m0)) {
+ tcp = m_advance(&m, &offset, m0->m_pkthdr.inner_l3hlen);
+ m0->m_pkthdr.inner_l4hlen = tcp->th_off * 4;
+ }
+#endif
+ MPASS((m0->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
+ m0->m_pkthdr.csum_flags &= CSUM_INNER_IP6_UDP |
+ CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP |
+ CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO |
+ CSUM_ENCAP_VXLAN;
+ }
+
+#if defined(INET) || defined(INET6)
+ if (needs_outer_tcp_csum(m0)) {
tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen);
m0->m_pkthdr.l4hlen = tcp->th_off * 4;
#ifdef RATELIMIT
@@ -2670,7 +2867,7 @@ restart:
V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1));
} else
set_mbuf_eo_tsclk_tsoff(m0, 0);
- } else if (needs_udp_csum(m0)) {
+ } else if (needs_outer_udp_csum(m0)) {
m0->m_pkthdr.l4hlen = sizeof(struct udphdr);
#endif
}
@@ -3627,6 +3824,9 @@ alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx,
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction",
CTLFLAG_RD, &rxq->vlan_extraction,
"# of times hardware extracted 802.1Q tag");
+ SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_rxcsum",
+ CTLFLAG_RD, &rxq->vxlan_rxcsum,
+ "# of times hardware assisted with inner checksum (VXLAN) ");
add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl);
@@ -4281,6 +4481,11 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx,
"# of frames tx'd using type1 txpkts work requests");
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD,
&txq->raw_wrs, "# of raw work requests (non-packets)");
+ SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_tso_wrs",
+ CTLFLAG_RD, &txq->vxlan_tso_wrs, "# of VXLAN TSO work requests");
+ SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_txcsum",
+ CTLFLAG_RD, &txq->vxlan_txcsum,
+ "# of times hardware assisted with inner checksums (VXLAN)");
#ifdef KERN_TLS
if (sc->flags & KERN_TLS_OK) {
@@ -4570,27 +4775,25 @@ get_pkt_gl(struct mbuf *m, struct sglist *gl)
KASSERT(gl->sg_nseg == mbuf_nsegs(m),
("%s: nsegs changed for mbuf %p from %d to %d", __func__, m,
mbuf_nsegs(m), gl->sg_nseg));
- KASSERT(gl->sg_nseg > 0 &&
- gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS),
+ KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m),
("%s: %d segments, should have been 1 <= nsegs <= %d", __func__,
- gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS));
+ gl->sg_nseg, max_nsegs_allowed(m)));
}
/*
* len16 for a txpkt WR with a GL. Includes the firmware work request header.
*/
static inline u_int
-txpkt_len16(u_int nsegs, u_int tso)
+txpkt_len16(u_int nsegs, const u_int extra)
{
u_int n;
MPASS(nsegs > 0);
nsegs--; /* first segment is part of ulptx_sgl */
- n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) +
+ n = extra + sizeof(struct fw_eth_tx_pkt_wr) +
+ sizeof(struct cpl_tx_pkt_core) +
sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
- if (tso)
- n += sizeof(struct cpl_tx_pkt_lso_core);
return (howmany(n, 16));
}
@@ -4600,22 +4803,43 @@ txpkt_len16(u_int nsegs, u_int tso)
* request header.
*/
static inline u_int
-txpkt_vm_len16(u_int nsegs, u_int tso)
+txpkt_vm_len16(u_int nsegs, const u_int extra)
{
u_int n;
MPASS(nsegs > 0);
nsegs--; /* first segment is part of ulptx_sgl */
- n = sizeof(struct fw_eth_tx_pkt_vm_wr) +
+ n = extra + sizeof(struct fw_eth_tx_pkt_vm_wr) +
sizeof(struct cpl_tx_pkt_core) +
sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
- if (tso)
- n += sizeof(struct cpl_tx_pkt_lso_core);
return (howmany(n, 16));
}
+static inline void
+calculate_mbuf_len16(struct adapter *sc, struct mbuf *m)
+{
+ const int lso = sizeof(struct cpl_tx_pkt_lso_core);
+ const int tnl_lso = sizeof(struct cpl_tx_tnl_lso);
+
+ if (sc->flags & IS_VF) {
+ if (needs_tso(m))
+ set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), lso));
+ else
+ set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), 0));
+ return;
+ }
+
+ if (needs_tso(m)) {
+ if (needs_vxlan_tso(m))
+ set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), tnl_lso));
+ else
+ set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), lso));
+ } else
+ set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), 0));
+}
+
/*
* len16 for a txpkts type 0 WR with a GL. Does not include the firmware work
* request header.
@@ -4664,51 +4888,162 @@ static inline uint64_t
csum_to_ctrl(struct adapter *sc, struct mbuf *m)
{
uint64_t ctrl;
- int csum_type;
+ int csum_type, l2hlen, l3hlen;
+ int x, y;
+ static const int csum_types[3][2] = {
+ {TX_CSUM_TCPIP, TX_CSUM_TCPIP6},
+ {TX_CSUM_UDPIP, TX_CSUM_UDPIP6},
+ {TX_CSUM_IP, 0}
+ };
M_ASSERTPKTHDR(m);
- if (needs_hwcsum(m) == 0)
+ if (!needs_hwcsum(m))
return (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS);
+ MPASS(m->m_pkthdr.l2hlen >= ETHER_HDR_LEN);
+ MPASS(m->m_pkthdr.l3hlen >= sizeof(struct ip));
+
+ if (needs_vxlan_csum(m)) {
+ MPASS(m->m_pkthdr.l4hlen > 0);
+ MPASS(m->m_pkthdr.l5hlen > 0);
+ MPASS(m->m_pkthdr.inner_l2hlen >= ETHER_HDR_LEN);
+ MPASS(m->m_pkthdr.inner_l3hlen >= sizeof(struct ip));
+
+ l2hlen = m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
+ m->m_pkthdr.l4hlen + m->m_pkthdr.l5hlen +
+ m->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN;
+ l3hlen = m->m_pkthdr.inner_l3hlen;
+ } else {
+ l2hlen = m->m_pkthdr.l2hlen - ETHER_HDR_LEN;
+ l3hlen = m->m_pkthdr.l3hlen;
+ }
+
ctrl = 0;
- if (needs_l3_csum(m) == 0)
+ if (!needs_l3_csum(m))
ctrl |= F_TXPKT_IPCSUM_DIS;
- switch (m->m_pkthdr.csum_flags &
- (CSUM_IP_TCP | CSUM_IP_UDP | CSUM_IP6_TCP | CSUM_IP6_UDP)) {
- case CSUM_IP_TCP:
- csum_type = TX_CSUM_TCPIP;
- break;
- case CSUM_IP_UDP:
- csum_type = TX_CSUM_UDPIP;
- break;
- case CSUM_IP6_TCP:
- csum_type = TX_CSUM_TCPIP6;
- break;
- case CSUM_IP6_UDP:
- csum_type = TX_CSUM_UDPIP6;
- break;
- default:
- /* needs_hwcsum told us that at least some hwcsum is needed. */
- MPASS(ctrl == 0);
- MPASS(m->m_pkthdr.csum_flags & CSUM_IP);
- ctrl |= F_TXPKT_L4CSUM_DIS;
- csum_type = TX_CSUM_IP;
- break;
- }
- MPASS(m->m_pkthdr.l2hlen > 0);
- MPASS(m->m_pkthdr.l3hlen > 0);
- ctrl |= V_TXPKT_CSUM_TYPE(csum_type) |
- V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
+ if (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_INNER_IP_TCP |
+ CSUM_IP6_TCP | CSUM_INNER_IP6_TCP))
+ x = 0; /* TCP */
+ else if (m->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_INNER_IP_UDP |
+ CSUM_IP6_UDP | CSUM_INNER_IP6_UDP))
+ x = 1; /* UDP */
+ else
+ x = 2;
+
+ if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP |
+ CSUM_INNER_IP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP))
+ y = 0; /* IPv4 */
+ else {
+ MPASS(m->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP |
+ CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP));
+ y = 1; /* IPv6 */
+ }
+ /*
+ * needs_hwcsum returned true earlier so there must be some kind of
+ * checksum to calculate.
+ */
+ csum_type = csum_types[x][y];
+ MPASS(csum_type != 0);
+ if (csum_type == TX_CSUM_IP)
+ ctrl |= F_TXPKT_L4CSUM_DIS;
+ ctrl |= V_TXPKT_CSUM_TYPE(csum_type) | V_TXPKT_IPHDR_LEN(l3hlen);
if (chip_id(sc) <= CHELSIO_T5)
- ctrl |= V_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN);
+ ctrl |= V_TXPKT_ETHHDR_LEN(l2hlen);
else
- ctrl |= V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN);
+ ctrl |= V_T6_TXPKT_ETHHDR_LEN(l2hlen);
return (ctrl);
}
+static inline void *
+write_lso_cpl(void *cpl, struct mbuf *m0)
+{
+ struct cpl_tx_pkt_lso_core *lso;
+ uint32_t ctrl;
+
+ KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
+ m0->m_pkthdr.l4hlen > 0,
+ ("%s: mbuf %p needs TSO but missing header lengths",
+ __func__, m0));
+
+ ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) |
+ F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE |
+ V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) |
+ V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
+ V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
+ if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
+ ctrl |= F_LSO_IPV6;
+
+ lso = cpl;
+ lso->lso_ctrl = htobe32(ctrl);
+ lso->ipid_ofst = htobe16(0);
+ lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
+ lso->seqno_offset = htobe32(0);
+ lso->len = htobe32(m0->m_pkthdr.len);
+
+ return (lso + 1);
+}
+
+static void *
+write_tnl_lso_cpl(void *cpl, struct mbuf *m0)
+{
+ struct cpl_tx_tnl_lso *tnl_lso = cpl;
+ uint32_t ctrl;
+
+ KASSERT(m0->m_pkthdr.inner_l2hlen > 0 &&
+ m0->m_pkthdr.inner_l3hlen > 0 && m0->m_pkthdr.inner_l4hlen > 0 &&
+ m0->m_pkthdr.inner_l5hlen > 0,
+ ("%s: mbuf %p needs VXLAN_TSO but missing inner header lengths",
+ __func__, m0));
+ KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
+ m0->m_pkthdr.l4hlen > 0 && m0->m_pkthdr.l5hlen > 0,
+ ("%s: mbuf %p needs VXLAN_TSO but missing outer header lengths",
+ __func__, m0));
+
+ /* Outer headers. */
+ ctrl = V_CPL_TX_TNL_LSO_OPCODE(CPL_TX_TNL_LSO) |
+ F_CPL_TX_TNL_LSO_FIRST | F_CPL_TX_TNL_LSO_LAST |
+ V_CPL_TX_TNL_LSO_ETHHDRLENOUT(
+ (m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) |
+ V_CPL_TX_TNL_LSO_IPHDRLENOUT(m0->m_pkthdr.l3hlen >> 2) |
+ F_CPL_TX_TNL_LSO_IPLENSETOUT;
+ if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
+ ctrl |= F_CPL_TX_TNL_LSO_IPV6OUT;
+ else {
+ ctrl |= F_CPL_TX_TNL_LSO_IPHDRCHKOUT |
+ F_CPL_TX_TNL_LSO_IPIDINCOUT;
+ }
+ tnl_lso->op_to_IpIdSplitOut = htobe32(ctrl);
+ tnl_lso->IpIdOffsetOut = 0;
+ tnl_lso->UdpLenSetOut_to_TnlHdrLen =
+ htobe16(F_CPL_TX_TNL_LSO_UDPCHKCLROUT |
+ F_CPL_TX_TNL_LSO_UDPLENSETOUT |
+ V_CPL_TX_TNL_LSO_TNLHDRLEN(m0->m_pkthdr.l2hlen +
+ m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen +
+ m0->m_pkthdr.l5hlen) |
+ V_CPL_TX_TNL_LSO_TNLTYPE(TX_TNL_TYPE_VXLAN));
+ tnl_lso->r1 = 0;
+
+ /* Inner headers. */
+ ctrl = V_CPL_TX_TNL_LSO_ETHHDRLEN(
+ (m0->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN) >> 2) |
+ V_CPL_TX_TNL_LSO_IPHDRLEN(m0->m_pkthdr.inner_l3hlen >> 2) |
+ V_CPL_TX_TNL_LSO_TCPHDRLEN(m0->m_pkthdr.inner_l4hlen >> 2);
+ if (m0->m_pkthdr.inner_l3hlen == sizeof(struct ip6_hdr))
+ ctrl |= F_CPL_TX_TNL_LSO_IPV6;
+ tnl_lso->Flow_to_TcpHdrLen = htobe32(ctrl);
+ tnl_lso->IpIdOffset = 0;
+ tnl_lso->IpIdSplit_to_Mss =
+ htobe16(V_CPL_TX_TNL_LSO_MSS(m0->m_pkthdr.tso_segsz));
+ tnl_lso->TCPSeqOffset = 0;
+ tnl_lso->EthLenOffset_Size =
+ htobe32(V_CPL_TX_TNL_LSO_SIZE(m0->m_pkthdr.len));
+
+ return (tnl_lso + 1);
+}
+
#define VM_TX_L2HDR_LEN 16 /* ethmacdst to vlantci */
/*
@@ -4762,29 +5097,7 @@ write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0)
m_copydata(m0, 0, VM_TX_L2HDR_LEN, wr->ethmacdst);
if (needs_tso(m0)) {
- struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
-
- KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
- m0->m_pkthdr.l4hlen > 0,
- ("%s: mbuf %p needs TSO but missing header lengths",
- __func__, m0));
-
- ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
- F_LSO_LAST_SLICE | V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen -
- ETHER_HDR_LEN) >> 2) |
- V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
- V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
- if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
- ctrl |= F_LSO_IPV6;
-
- lso->lso_ctrl = htobe32(ctrl);
- lso->ipid_ofst = htobe16(0);
- lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
- lso->seqno_offset = htobe32(0);
- lso->len = htobe32(pktlen);
-
- cpl = (void *)(lso + 1);
-
+ cpl = write_lso_cpl(wr + 1, m0);
txq->tso_wrs++;
} else
cpl = (void *)(wr + 1);
@@ -4892,9 +5205,12 @@ write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0,
nsegs = mbuf_nsegs(m0);
pktlen = m0->m_pkthdr.len;
ctrl = sizeof(struct cpl_tx_pkt_core);
- if (needs_tso(m0))
- ctrl += sizeof(struct cpl_tx_pkt_lso_core);
- else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) &&
+ if (needs_tso(m0)) {
+ if (needs_vxlan_tso(m0))
+ ctrl += sizeof(struct cpl_tx_tnl_lso);
+ else
+ ctrl += sizeof(struct cpl_tx_pkt_lso_core);
+ } else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) &&
available >= 2) {
/* Immediate data. Recalculate len16 and set nsegs to 0. */
ctrl += pktlen;
@@ -4916,41 +5232,30 @@ write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0,
wr->r3 = 0;
if (needs_tso(m0)) {
- struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
-
- KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
- m0->m_pkthdr.l4hlen > 0,
- ("%s: mbuf %p needs TSO but missing header lengths",
- __func__, m0));
-
- ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
- F_LSO_LAST_SLICE | V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen -
- ETHER_HDR_LEN) >> 2) |
- V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
- V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
- if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
- ctrl |= F_LSO_IPV6;
-
- lso->lso_ctrl = htobe32(ctrl);
- lso->ipid_ofst = htobe16(0);
- lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
- lso->seqno_offset = htobe32(0);
- lso->len = htobe32(pktlen);
-
- cpl = (void *)(lso + 1);
-
- txq->tso_wrs++;
+ if (needs_vxlan_tso(m0)) {
+ cpl = write_tnl_lso_cpl(wr + 1, m0);
+ txq->vxlan_tso_wrs++;
+ } else {
+ cpl = write_lso_cpl(wr + 1, m0);
+ txq->tso_wrs++;
+ }
} else
cpl = (void *)(wr + 1);
/* Checksum offload */
ctrl1 = csum_to_ctrl(sc, m0);
- if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS))
- txq->txcsum++; /* some hardware assistance provided */
+ if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) {
+ /* some hardware assistance provided */
+ if (needs_vxlan_csum(m0))
+ txq->vxlan_txcsum++;
+ else
+ txq->txcsum++;
+ }
/* VLAN tag insertion */
if (needs_vlan_insertion(m0)) {
- ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
+ ctrl1 |= F_TXPKT_VLAN_VLD |
+ V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
txq->vlan_insertion++;
}
@@ -4962,6 +5267,8 @@ write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0,
/* SGL */
dst = (void *)(cpl + 1);
+ if (__predict_false((uintptr_t)dst == (uintptr_t)&eq->desc[eq->sidx]))
+ dst = (caddr_t)&eq->desc[0];
if (nsegs > 0) {
write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx);
@@ -5207,8 +5514,13 @@ write_txpkts_wr(struct adapter *sc, struct sge_txq *txq)
/* Checksum offload */
ctrl1 = csum_to_ctrl(sc, m);
- if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS))
- txq->txcsum++; /* some hardware assistance provided */
+ if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) {
+ /* some hardware assistance provided */
+ if (needs_vxlan_csum(m))
+ txq->vxlan_txcsum++;
+ else
+ txq->txcsum++;
+ }
/* VLAN tag insertion */
if (needs_vlan_insertion(m)) {
@@ -5967,7 +6279,7 @@ write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr,
wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) |
V_FW_WR_FLOWID(cst->etid));
wr->r3 = 0;
- if (needs_udp_csum(m0)) {
+ if (needs_outer_udp_csum(m0)) {
wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG;
wr->u.udpseg.ethlen = m0->m_pkthdr.l2hlen;
wr->u.udpseg.iplen = htobe16(m0->m_pkthdr.l3hlen);
@@ -5979,7 +6291,7 @@ write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr,
wr->u.udpseg.plen = htobe32(pktlen - immhdrs);
cpl = (void *)(wr + 1);
} else {
- MPASS(needs_tcp_csum(m0));
+ MPASS(needs_outer_tcp_csum(m0));
wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG;
wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen;
wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen);
@@ -6016,7 +6328,7 @@ write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr,
}
/* Checksum offload must be requested for ethofld. */
- MPASS(needs_l4_csum(m0));
+ MPASS(needs_outer_l4_csum(m0));
ctrl1 = csum_to_ctrl(cst->adapter, m0);
/* VLAN tag insertion */