aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sbin/ifconfig/ifconfig.818
-rw-r--r--sbin/ifconfig/iflagg.c4
-rw-r--r--sys/net/ieee8023ad_lacp.c46
-rw-r--r--sys/net/ieee8023ad_lacp.h7
-rw-r--r--sys/net/if_lagg.c11
-rw-r--r--sys/net/if_lagg.h6
6 files changed, 86 insertions, 6 deletions
diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8
index 7d94bca57b20..92b319869cbd 100644
--- a/sbin/ifconfig/ifconfig.8
+++ b/sbin/ifconfig/ifconfig.8
@@ -28,7 +28,7 @@
.\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94
.\" $FreeBSD$
.\"
-.Dd June 27, 2018
+.Dd May 3, 2019
.Dt IFCONFIG 8
.Os
.Sh NAME
@@ -2497,6 +2497,22 @@ Use the RSS hash from the network card if available.
Set a shift parameter for RSS local hash computation.
Hash is calculated by using flowid bits in a packet header mbuf
which are shifted by the number of this parameter.
+.It Cm use_numa
+Enable selection of egress ports based on the native
+.Xr NUMA 4
+domain for the packets being transmitted.
+This is currently only implemented for lacp mode.
+This works only on
+.Xr NUMA 4
+hardware, running a kernel compiled with the
+.Xr NUMA 4
+option, and when interfaces from multiple
+.Xr NUMA 4
+domains are ports of the aggregation interface.
+.It Cm -use_numa
+Disable selection of egress ports based on the native
+.Xr NUMA 4
+domain for the packets being transmitted.
.It Cm lacp_fast_timeout
Enable lacp fast-timeout on the interface.
.It Cm -lacp_fast_timeout
diff --git a/sbin/ifconfig/iflagg.c b/sbin/ifconfig/iflagg.c
index 89b9ce097f35..3b9e900605b5 100644
--- a/sbin/ifconfig/iflagg.c
+++ b/sbin/ifconfig/iflagg.c
@@ -130,6 +130,8 @@ setlaggsetopt(const char *val, int d, int s, const struct afswtch *afp)
switch (ro.ro_opts) {
case LAGG_OPT_USE_FLOWID:
case -LAGG_OPT_USE_FLOWID:
+ case LAGG_OPT_USE_NUMA:
+ case -LAGG_OPT_USE_NUMA:
case LAGG_OPT_LACP_STRICT:
case -LAGG_OPT_LACP_STRICT:
case LAGG_OPT_LACP_TXTEST:
@@ -303,6 +305,8 @@ static struct cmd lagg_cmds[] = {
DEF_CMD_ARG("lagghash", setlagghash),
DEF_CMD("use_flowid", LAGG_OPT_USE_FLOWID, setlaggsetopt),
DEF_CMD("-use_flowid", -LAGG_OPT_USE_FLOWID, setlaggsetopt),
+ DEF_CMD("use_numa", LAGG_OPT_USE_NUMA, setlaggsetopt),
+ DEF_CMD("-use_numa", -LAGG_OPT_USE_NUMA, setlaggsetopt),
DEF_CMD("lacp_strict", LAGG_OPT_LACP_STRICT, setlaggsetopt),
DEF_CMD("-lacp_strict", -LAGG_OPT_LACP_STRICT, setlaggsetopt),
DEF_CMD("lacp_txtest", LAGG_OPT_LACP_TXTEST, setlaggsetopt),
diff --git a/sys/net/ieee8023ad_lacp.c b/sys/net/ieee8023ad_lacp.c
index b10e59bab680..b6f41b204f9c 100644
--- a/sys/net/ieee8023ad_lacp.c
+++ b/sys/net/ieee8023ad_lacp.c
@@ -835,7 +835,9 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
struct lacp_softc *lsc = LACP_SOFTC(sc);
struct lacp_portmap *pm;
struct lacp_port *lp;
+ struct lacp_port **map;
uint32_t hash;
+ int count;
if (__predict_false(lsc->lsc_suppress_distributing)) {
LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
@@ -848,13 +850,31 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
return (NULL);
}
+#ifdef NUMA
+ if ((sc->sc_opts & LAGG_OPT_USE_NUMA) &&
+ pm->pm_num_dom > 1 && m->m_pkthdr.numa_domain < MAXMEMDOM) {
+ count = pm->pm_numa[m->m_pkthdr.numa_domain].count;
+ if (count > 0) {
+ map = pm->pm_numa[m->m_pkthdr.numa_domain].map;
+ } else {
+ /* No ports on this domain; use global hash. */
+ map = pm->pm_map;
+ count = pm->pm_count;
+ }
+ } else
+#endif
+ {
+ map = pm->pm_map;
+ count = pm->pm_count;
+ }
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
hash = m->m_pkthdr.flowid >> sc->flowid_shift;
else
hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
- hash %= pm->pm_count;
- lp = pm->pm_map[hash];
+
+ hash %= count;
+ lp = map[hash];
KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
("aggregated port is not distributing"));
@@ -1044,6 +1064,10 @@ lacp_update_portmap(struct lacp_softc *lsc)
uint64_t speed;
u_int newmap;
int i;
+#ifdef NUMA
+ int count;
+ uint8_t domain;
+#endif
newmap = lsc->lsc_activemap == 0 ? 1 : 0;
p = &lsc->lsc_pmap[newmap];
@@ -1054,9 +1078,25 @@ lacp_update_portmap(struct lacp_softc *lsc)
if (la != NULL && la->la_nports > 0) {
p->pm_count = la->la_nports;
i = 0;
- TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
+ TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) {
p->pm_map[i++] = lp;
+#ifdef NUMA
+ domain = lp->lp_ifp->if_numa_domain;
+ if (domain >= MAXMEMDOM)
+ continue;
+ count = p->pm_numa[domain].count;
+ p->pm_numa[domain].map[count] = lp;
+ p->pm_numa[domain].count++;
+#endif
+ }
KASSERT(i == p->pm_count, ("Invalid port count"));
+
+#ifdef NUMA
+ for (i = 0; i < MAXMEMDOM; i++) {
+ if (p->pm_numa[i].count != 0)
+ p->pm_num_dom++;
+ }
+#endif
speed = lacp_aggregator_bandwidth(la);
}
sc->sc_ifp->if_baudrate = speed;
diff --git a/sys/net/ieee8023ad_lacp.h b/sys/net/ieee8023ad_lacp.h
index 5ae48cebb62d..8d6438c1ec59 100644
--- a/sys/net/ieee8023ad_lacp.h
+++ b/sys/net/ieee8023ad_lacp.h
@@ -197,8 +197,15 @@ enum lacp_mux_state {
#define LACP_MAX_PORTS 32
+struct lacp_numa {
+ int count;
+ struct lacp_port *map[LACP_MAX_PORTS];
+};
+
struct lacp_portmap {
int pm_count;
+ int pm_num_dom;
+ struct lacp_numa pm_numa[MAXMEMDOM];
struct lacp_port *pm_map[LACP_MAX_PORTS];
};
diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c
index 7368f3ee1b43..cf7788986299 100644
--- a/sys/net/if_lagg.c
+++ b/sys/net/if_lagg.c
@@ -264,6 +264,13 @@ SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
&VNET_NAME(def_use_flowid), 0,
"Default setting for using flow id for load sharing");
+/* Default value for using numa */
+VNET_DEFINE_STATIC(int, def_use_numa) = 1;
+#define V_def_use_numa VNET(def_use_numa)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN,
+ &VNET_NAME(def_use_numa), 0,
+ "Use numa to steer flows");
+
/* Default value for flowid shift */
VNET_DEFINE_STATIC(int, def_flowid_shift) = 16;
#define V_def_flowid_shift VNET(def_flowid_shift)
@@ -491,6 +498,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
LAGG_XLOCK(sc);
if (V_def_use_flowid)
sc->sc_opts |= LAGG_OPT_USE_FLOWID;
+ if (V_def_use_numa)
+ sc->sc_opts |= LAGG_OPT_USE_NUMA;
sc->flowid_shift = V_def_flowid_shift;
/* Hash all layers by default */
@@ -1247,6 +1256,8 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
switch (ro->ro_opts) {
case LAGG_OPT_USE_FLOWID:
case -LAGG_OPT_USE_FLOWID:
+ case LAGG_OPT_USE_NUMA:
+ case -LAGG_OPT_USE_NUMA:
case LAGG_OPT_FLOWIDSHIFT:
valid = 1;
lacp = 0;
diff --git a/sys/net/if_lagg.h b/sys/net/if_lagg.h
index f1e2d8f4b0b0..2c566c0de049 100644
--- a/sys/net/if_lagg.h
+++ b/sys/net/if_lagg.h
@@ -143,6 +143,7 @@ struct lagg_reqopts {
#define LAGG_OPT_USE_FLOWID 0x01 /* enable use of flowid */
/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */
#define LAGG_OPT_FLOWIDSHIFT 0x02 /* set flowid shift */
+#define LAGG_OPT_USE_NUMA 0x04 /* enable use of numa */
#define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */
#define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */
#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */
@@ -158,8 +159,9 @@ struct lagg_reqopts {
#define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts)
#define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts)
-#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \
- "\006LACP_TXTEST\007LACP_RXTEST"
+#define LAGG_OPT_BITS "\020\001USE_FLOWID\003USE_NUMA" \
+ "\005LACP_STRICT\006LACP_TXTEST" \
+ "\007LACP_RXTEST"
#ifdef _KERNEL