aboutsummaryrefslogtreecommitdiff
path: root/sys/net
diff options
context:
space:
mode:
authorAndrew Gallatin <gallatin@FreeBSD.org>2020-03-09 13:44:51 +0000
committerAndrew Gallatin <gallatin@FreeBSD.org>2020-03-09 13:44:51 +0000
commit98085bae8cc0aaef80c781691eb679d93bc69c25 (patch)
tree57465d363fcd219ecb1cc2d093a1465046d22318 /sys/net
parent711b7264f4be956e57a50de29f6f10049b7242f0 (diff)
downloadsrc-98085bae8cc0aaef80c781691eb679d93bc69c25.tar.gz
src-98085bae8cc0aaef80c781691eb679d93bc69c25.zip
make lacp's use_numa hashing aware of send tags
When I did the use_numa support, I missed the fact that there is a separate hash function for send tag nic selection. So when use_numa is enabled, ktls offload does not work properly, as it does not reliably allocate a send tag on the proper egress nic since different egress nics are selected for send-tag allocation and packet transmit. To fix this, this change: - refectors lacp_select_tx_port_by_hash() and lacp_select_tx_port() to make lacp_select_tx_port_by_hash() always called by lacp_select_tx_port() - pre-shifts flowids to convert them to hashes when calling lacp_select_tx_port_by_hash() - adds a numa_domain field to if_snd_tag_alloc_params - plumbs the numa domain into places where we allocate send tags In testing with NIC TLS setup on a NUMA machine, I see thousands of output errors before the change when enabling kern.ipc.tls.ifnet.permitted=1. After the change, I see no errors, and I see the NIC sysctl counters showing active TLS offload sessions. Reviewed by: rrs, hselasky, jhb Sponsored by: Netflix
Notes
Notes: svn path=/head/; revision=358808
Diffstat (limited to 'sys/net')
-rw-r--r--sys/net/ieee8023ad_lacp.c43
-rw-r--r--sys/net/ieee8023ad_lacp.h4
-rw-r--r--sys/net/if_lagg.c11
-rw-r--r--sys/net/if_var.h1
4 files changed, 22 insertions, 37 deletions
diff --git a/sys/net/ieee8023ad_lacp.c b/sys/net/ieee8023ad_lacp.c
index 907c7f958cdd..af887f2f0dee 100644
--- a/sys/net/ieee8023ad_lacp.c
+++ b/sys/net/ieee8023ad_lacp.c
@@ -832,13 +832,12 @@ lacp_stop(struct lagg_softc *sc)
}
struct lagg_port *
-lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
+lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t hash, uint8_t numa_domain)
{
struct lacp_softc *lsc = LACP_SOFTC(sc);
struct lacp_portmap *pm;
struct lacp_port *lp;
struct lacp_port **map;
- uint32_t hash;
int count;
if (__predict_false(lsc->lsc_suppress_distributing)) {
@@ -854,10 +853,10 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
#ifdef NUMA
if ((sc->sc_opts & LAGG_OPT_USE_NUMA) &&
- pm->pm_num_dom > 1 && m->m_pkthdr.numa_domain < MAXMEMDOM) {
- count = pm->pm_numa[m->m_pkthdr.numa_domain].count;
+ pm->pm_num_dom > 1 && numa_domain < MAXMEMDOM) {
+ count = pm->pm_numa[numa_domain].count;
if (count > 0) {
- map = pm->pm_numa[m->m_pkthdr.numa_domain].map;
+ map = pm->pm_numa[numa_domain].map;
} else {
/* No ports on this domain; use global hash. */
map = pm->pm_map;
@@ -869,11 +868,6 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
map = pm->pm_map;
count = pm->pm_count;
}
- if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
- M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
- hash = m->m_pkthdr.flowid >> sc->flowid_shift;
- else
- hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
hash %= count;
lp = map[hash];
@@ -884,33 +878,22 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
return (lp->lp_lagg);
}
-#if defined(RATELIMIT) || defined(KERN_TLS)
struct lagg_port *
-lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t flowid)
+lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
{
struct lacp_softc *lsc = LACP_SOFTC(sc);
- struct lacp_portmap *pm;
- struct lacp_port *lp;
uint32_t hash;
+ uint8_t numa_domain;
- if (__predict_false(lsc->lsc_suppress_distributing)) {
- LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
- return (NULL);
- }
-
- pm = &lsc->lsc_pmap[lsc->lsc_activemap];
- if (pm->pm_count == 0) {
- LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
- return (NULL);
- }
-
- hash = flowid >> sc->flowid_shift;
- hash %= pm->pm_count;
- lp = pm->pm_map[hash];
+ if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+ M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ hash = m->m_pkthdr.flowid >> sc->flowid_shift;
+ else
+ hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
- return (lp->lp_lagg);
+ numa_domain = m->m_pkthdr.numa_domain;
+ return (lacp_select_tx_port_by_hash(sc, hash, numa_domain));
}
-#endif
/*
* lacp_suppress_distributing: drop transmit packets for a while
diff --git a/sys/net/ieee8023ad_lacp.h b/sys/net/ieee8023ad_lacp.h
index b6a0860ff1e0..2262f1d74294 100644
--- a/sys/net/ieee8023ad_lacp.h
+++ b/sys/net/ieee8023ad_lacp.h
@@ -293,9 +293,7 @@ struct lacp_softc {
struct mbuf *lacp_input(struct lagg_port *, struct mbuf *);
struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
-#if defined(RATELIMIT) || defined(KERN_TLS)
-struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t);
-#endif
+struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t, uint8_t);
void lacp_attach(struct lagg_softc *);
void lacp_detach(void *);
void lacp_init(struct lagg_softc *);
diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c
index 37b3a708f8a9..018e82c4a1f1 100644
--- a/sys/net/if_lagg.c
+++ b/sys/net/if_lagg.c
@@ -1609,12 +1609,13 @@ mst_to_lst(struct m_snd_tag *mst)
* contents.
*/
static struct lagg_port *
-lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
+lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype,
+ uint8_t numa_domain)
{
struct lagg_softc *sc;
struct lagg_port *lp;
struct lagg_lb *lb;
- uint32_t p;
+ uint32_t hash, p;
sc = ifp->if_softc;
@@ -1634,7 +1635,8 @@ lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
flowtype == M_HASHTYPE_NONE)
return (NULL);
- return (lacp_select_tx_port_by_hash(sc, flowid));
+ hash = flowid >> sc->flowid_shift;
+ return (lacp_select_tx_port_by_hash(sc, hash, numa_domain));
default:
return (NULL);
}
@@ -1654,7 +1656,8 @@ lagg_snd_tag_alloc(struct ifnet *ifp,
sc = ifp->if_softc;
LAGG_RLOCK();
- lp = lookup_snd_tag_port(ifp, params->hdr.flowid, params->hdr.flowtype);
+ lp = lookup_snd_tag_port(ifp, params->hdr.flowid,
+ params->hdr.flowtype, params->hdr.numa_domain);
if (lp == NULL) {
LAGG_RUNLOCK();
return (EOPNOTSUPP);
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index 864f519495e9..e37aaaaa1dca 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -198,6 +198,7 @@ struct if_snd_tag_alloc_header {
uint32_t type; /* send tag type, see IF_SND_TAG_XXX */
uint32_t flowid; /* mbuf hash value */
uint32_t flowtype; /* mbuf hash type */
+ uint8_t numa_domain; /* numa domain of associated inp */
};
struct if_snd_tag_alloc_rate_limit {