aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Thompson <thompsa@FreeBSD.org>2007-04-10 00:27:25 +0000
committerAndrew Thompson <thompsa@FreeBSD.org>2007-04-10 00:27:25 +0000
commitb47888cebab427ce710262f2b515c709c4f29efb (patch)
tree474c91da17bd5f3ae59c9c4351d9aafe27da1c11
parent60dd8da7752f1d9182a9bc363e7aeb938a97db5c (diff)
downloadsrc-b47888cebab427ce710262f2b515c709c4f29efb.tar.gz
src-b47888cebab427ce710262f2b515c709c4f29efb.zip
Add the trunk(4) driver for providing link aggregation, failover and fault
tolerance. This driver allows aggregation of multiple network interfaces as one virtual interface using a number of different protocols/algorithms. failover - Sends traffic through the secondary port if the master becomes inactive. fec - Supports Cisco Fast EtherChannel. lacp - Supports the IEEE 802.3ad Link Aggregation Control Protocol (LACP) and the Marker Protocol. loadbalance - Static loadbalancing using an outgoing hash. roundrobin - Distributes outgoing traffic using a round-robin scheduler through all active ports. This code was obtained from OpenBSD and this also includes 802.3ad LACP support from agr(4) in NetBSD.
Notes
Notes: svn path=/head/; revision=168561
-rw-r--r--sbin/ifconfig/iftrunk.c153
-rw-r--r--share/man/man4/trunk.4172
-rw-r--r--sys/net/ieee8023ad_lacp.c1763
-rw-r--r--sys/net/ieee8023ad_lacp.h289
-rw-r--r--sys/net/if.c6
-rw-r--r--sys/net/if_ethersubr.c14
-rw-r--r--sys/net/if_trunk.c1590
-rw-r--r--sys/net/if_trunk.h209
-rw-r--r--sys/net/if_var.h1
-rw-r--r--sys/sys/priv.h1
10 files changed, 4198 insertions, 0 deletions
diff --git a/sbin/ifconfig/iftrunk.c b/sbin/ifconfig/iftrunk.c
new file mode 100644
index 000000000000..cd0c02d1375d
--- /dev/null
+++ b/sbin/ifconfig/iftrunk.c
@@ -0,0 +1,153 @@
+/*-
+ */
+
+#ifndef lint
+static const char rcsid[] =
+ "$FreeBSD$";
+#endif /* not lint */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_trunk.h>
+#include <net/route.h>
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <err.h>
+#include <errno.h>
+
+#include "ifconfig.h"
+
+static void
+settrunkport(const char *val, int d, int s, const struct afswtch *afp)
+{
+ struct trunk_reqport rp;
+
+ bzero(&rp, sizeof(rp));
+ strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname));
+ strlcpy(rp.rp_portname, val, sizeof(rp.rp_portname));
+
+ if (ioctl(s, SIOCSTRUNKPORT, &rp))
+ err(1, "SIOCSTRUNKPORT");
+}
+
+static void
+unsettrunkport(const char *val, int d, int s, const struct afswtch *afp)
+{
+ struct trunk_reqport rp;
+
+ bzero(&rp, sizeof(rp));
+ strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname));
+ strlcpy(rp.rp_portname, val, sizeof(rp.rp_portname));
+
+ if (ioctl(s, SIOCSTRUNKDELPORT, &rp))
+ err(1, "SIOCSTRUNKDELPORT");
+}
+
+static void
+settrunkproto(const char *val, int d, int s, const struct afswtch *afp)
+{
+ struct trunk_protos tpr[] = TRUNK_PROTOS;
+ struct trunk_reqall ra;
+ int i;
+
+ bzero(&ra, sizeof(ra));
+ ra.ra_proto = TRUNK_PROTO_MAX;
+
+ for (i = 0; i < (sizeof(tpr) / sizeof(tpr[0])); i++) {
+ if (strcmp(val, tpr[i].tpr_name) == 0) {
+ ra.ra_proto = tpr[i].tpr_proto;
+ break;
+ }
+ }
+ if (ra.ra_proto == TRUNK_PROTO_MAX)
+ errx(1, "Invalid trunk protocol: %s", val);
+
+ strlcpy(ra.ra_ifname, name, sizeof(ra.ra_ifname));
+ if (ioctl(s, SIOCSTRUNK, &ra) != 0)
+ err(1, "SIOCSTRUNK");
+}
+
+static void
+trunk_status(int s)
+{
+ struct trunk_protos tpr[] = TRUNK_PROTOS;
+ struct trunk_reqport rp, rpbuf[TRUNK_MAX_PORTS];
+ struct trunk_reqall ra;
+ const char *proto = "<unknown>";
+ int i, isport = 0;
+
+ bzero(&rp, sizeof(rp));
+ bzero(&ra, sizeof(ra));
+
+ strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname));
+ strlcpy(rp.rp_portname, name, sizeof(rp.rp_portname));
+
+ if (ioctl(s, SIOCGTRUNKPORT, &rp) == 0)
+ isport = 1;
+
+ strlcpy(ra.ra_ifname, name, sizeof(ra.ra_ifname));
+ ra.ra_size = sizeof(rpbuf);
+ ra.ra_port = rpbuf;
+
+ if (ioctl(s, SIOCGTRUNK, &ra) == 0) {
+ for (i = 0; i < (sizeof(tpr) / sizeof(tpr[0])); i++) {
+ if (ra.ra_proto == tpr[i].tpr_proto) {
+ proto = tpr[i].tpr_name;
+ break;
+ }
+ }
+
+ printf("\ttrunk: trunkproto %s", proto);
+ if (isport)
+ printf(" trunkdev %s", rp.rp_ifname);
+ putchar('\n');
+
+ for (i = 0; i < ra.ra_ports; i++) {
+ printf("\t\ttrunkport %s ", rpbuf[i].rp_portname);
+ printb("", rpbuf[i].rp_flags, TRUNK_PORT_BITS);
+ putchar('\n');
+ }
+
+ if (0 /* XXX */) {
+ printf("\tsupported trunk protocols:\n");
+ for (i = 0; i < (sizeof(tpr) / sizeof(tpr[0])); i++)
+ printf("\t\ttrunkproto %s\n", tpr[i].tpr_name);
+ }
+ } else if (isport)
+ printf("\ttrunk: trunkdev %s\n", rp.rp_ifname);
+}
+
+static struct cmd trunk_cmds[] = {
+ DEF_CMD_ARG("trunkport", settrunkport),
+ DEF_CMD_ARG("-trunkport", unsettrunkport),
+ DEF_CMD_ARG("trunkproto", settrunkproto),
+};
+static struct afswtch af_trunk = {
+ .af_name = "af_trunk",
+ .af_af = AF_UNSPEC,
+ .af_other_status = trunk_status,
+};
+
+static __constructor void
+trunk_ctor(void)
+{
+#define N(a) (sizeof(a) / sizeof(a[0]))
+ int i;
+
+ for (i = 0; i < N(trunk_cmds); i++)
+ cmd_register(&trunk_cmds[i]);
+ af_register(&af_trunk);
+#undef N
+}
diff --git a/share/man/man4/trunk.4 b/share/man/man4/trunk.4
new file mode 100644
index 000000000000..7cbd331cb645
--- /dev/null
+++ b/share/man/man4/trunk.4
@@ -0,0 +1,172 @@
+.\" $OpenBSD: trunk.4,v 1.18 2006/06/09 13:53:34 jmc Exp $
+.\"
+.\" Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 6, 2007
+.Dt TRUNK 4
+.Os
+.Sh NAME
+.Nm trunk
+.Nd link aggregation and link failover interface
+.Sh SYNOPSIS
+To compile this driver into the kernel,
+place the following line in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device trunk"
+.Ed
+.Pp
+Alternatively, to load the driver as a
+module at boot time, place the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+if_trunk_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+interface allows aggregation of multiple network interfaces as one virtual
+.Nm
+interface for the purpose of providing fault-tolerance and high-speed links.
+.Pp
+A
+.Nm
+interface can be created using the
+.Ic ifconfig trunk Ns Ar N Ic create
+command.
+It can use different link aggregation protocols specified
+using the
+.Ic trunkproto Ar proto
+option.
+Child interfaces can be added using the
+.Ic trunkport Ar child-iface
+option and removed using the
+.Ic -trunkport Ar child-iface
+option.
+.Pp
+The driver currently supports the trunk protocols
+.Ic failover
+(the default),
+.Ic fec ,
+.Ic lacp ,
+.Ic loadbalance ,
+.Ic roundrobin ,
+and
+.Ic none .
+The protocols determine which ports are used for outgoing traffic
+and whether a specific port accepts incoming traffic.
+The interface link state is used to validate if the port is active or
+not.
+.Bl -tag -width loadbalance
+.It Ic failover
+Sends and receives traffic only through the master port.
+If the master port becomes unavailable,
+the next active port is used.
+The first interface added is the master port;
+any interfaces added after that are used as failover devices.
+.It Ic fec
+Supports Cisco EtherChannel.
+This is a static setup and does not negotiate aggregation with the peer or
+exchange frames to monitor the link.
+.It Ic lacp
+Supports the IEEE 802.3ad Link Aggregation Control Protocol (LACP) and the
+Marker Protocol.
+LACP will negotiate a set of aggregable links with the peer in to one or more
+Link Aggregated Groups.
+Each LAG is composed of ports of the same speed, set to full-duplex operation.
+The traffic will be balanced across the ports in the LAG with the greatest
+total speed, in most cases there will only be one LAG which contains all ports.
+In the event of changes in physical connectivity, Link Aggregation will quickly
+converge to a new configuration.
+.It Ic loadbalance
+Balances outgoing traffic across the active ports based on hashed
+protocol header information and accepts incoming traffic from
+any active port.
+This is a static setup and does not negotiate aggregation with the peer or
+exchange frames to monitor the link.
+The hash includes the Ethernet source and destination address, and, if
+available, the VLAN tag, and the IP source and destination address.
+.It Ic roundrobin
+Distributes outgoing traffic using a round-robin scheduler
+through all active ports and accepts incoming traffic from
+any active port.
+.It Ic none
+This protocol is intended to do nothing: it disables any traffic without
+disabling the
+.Nm
+interface itself.
+.El
+.Pp
+Each
+.Nm
+interface is created at runtime using interface cloning.
+This is
+most easily done with the
+.Xr ifconfig 8
+.Cm create
+command or using the
+.Va cloned_interfaces
+variable in
+.Xr rc.conf 5 .
+.Sh EXAMPLES
+Create a 802.3ad trunk using LACP with two
+.Xr bge 4
+Gigabit Ethernet interfaces:
+.Bd -literal -offset indent
+# ifconfig bge0 up
+# ifconfig bge1 up
+# ifconfig trunk0 trunkproto lacp trunkport bge0 trunkport bge1 \e
+ 192.168.1.1 netmask 255.255.255.0
+.Ed
+.Pp
+The following example uses an active failover trunk to set up roaming
+between wired and wireless networks using two network devices.
+Whenever the wired master interface is unplugged, the wireless failover
+device will be used:
+.Bd -literal -offset indent
+# ifconfig em0 up
+# ifconfig ath0 nwid my_net up
+# ifconfig trunk0 trunkproto failover trunkport em0 trunkport ath0 \e
+ 192.168.1.1 netmask 255.255.255.0
+.Ed
+.Sh SEE ALSO
+.Xr ng_fec 4 ,
+.Xr ng_one2many 4 ,
+.Xr ifconfig 8
+.Sh HISTORY
+The
+.Nm
+device first appeared in
+.Fx 7.0 .
+.Sh AUTHORS
+.An -nosplit
+The
+.Nm
+driver was written by
+.An Reyk Floeter Aq reyk@openbsd.org .
+The LACP implementation was written by
+.An YAMAMOTO Takashi
+for
+.Nx .
+.Sh BUGS
+There is no way to configure LACP administrative variables, including system
+and port priorities.
+The current implementation always performs active-mode LACP and uses 0x8000 as
+system and port priorities.
+.Pp
+WPA security does not currently work correctly with a wireless interface added
+to the trunk.
diff --git a/sys/net/ieee8023ad_lacp.c b/sys/net/ieee8023ad_lacp.c
new file mode 100644
index 000000000000..26b87a42a2e1
--- /dev/null
+++ b/sys/net/ieee8023ad_lacp.c
@@ -0,0 +1,1763 @@
+/* $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $ */
+
+/*-
+ * Copyright (c)2005 YAMAMOTO Takashi,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/callout.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h> /* hz */
+#include <sys/socket.h> /* for net/if.h */
+#include <sys/sockio.h>
+#include <machine/stdarg.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/ethernet.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+
+#include <net/if_trunk.h>
+#include <net/ieee8023ad_lacp.h>
+
+/*
+ * actor system priority and port priority.
+ * XXX should be configurable.
+ */
+
+#define LACP_SYSTEM_PRIO 0x8000
+#define LACP_PORT_PRIO 0x8000
+
+const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
+ { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
+
+static const struct tlv_template lacp_info_tlv_template[] = {
+ { LACP_TYPE_ACTORINFO,
+ sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
+ { LACP_TYPE_PARTNERINFO,
+ sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
+ { LACP_TYPE_COLLECTORINFO,
+ sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
+ { 0, 0 },
+};
+
+typedef void (*lacp_timer_func_t)(struct lacp_port *);
+
+static const struct tlv_template marker_info_tlv_template[] = {
+ { MARKER_TYPE_INFO, 16 },
+ { 0, 0 },
+};
+
+static const struct tlv_template marker_response_tlv_template[] = {
+ { MARKER_TYPE_RESPONSE, 16 },
+ { 0, 0 },
+};
+
+static void lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *);
+
+static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *);
+static void lacp_suppress_distributing(struct lacp_softc *,
+ struct lacp_aggregator *);
+static void lacp_transit_expire(void *);
+static void lacp_select_active_aggregator(struct lacp_softc *);
+static uint16_t lacp_compose_key(struct lacp_port *);
+static int tlv_check(const void *, size_t, const struct tlvhdr *,
+ const struct tlv_template *, boolean_t);
+static void lacp_tick(void *);
+
+static void lacp_fill_aggregator_id(struct lacp_aggregator *,
+ const struct lacp_port *);
+static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *,
+ const struct lacp_peerinfo *);
+static int lacp_aggregator_is_compatible(const struct lacp_aggregator *,
+ const struct lacp_port *);
+static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *,
+ const struct lacp_peerinfo *);
+
+static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *,
+ struct lacp_port *);
+static void lacp_aggregator_addref(struct lacp_softc *,
+ struct lacp_aggregator *);
+static void lacp_aggregator_delref(struct lacp_softc *,
+ struct lacp_aggregator *);
+
+/* receive machine */
+
+static void lacp_sm_rx(struct lacp_port *, const struct lacpdu *);
+static void lacp_sm_rx_timer(struct lacp_port *);
+static void lacp_sm_rx_set_expired(struct lacp_port *);
+static void lacp_sm_rx_update_ntt(struct lacp_port *,
+ const struct lacpdu *);
+static void lacp_sm_rx_record_pdu(struct lacp_port *,
+ const struct lacpdu *);
+static void lacp_sm_rx_update_selected(struct lacp_port *,
+ const struct lacpdu *);
+static void lacp_sm_rx_record_default(struct lacp_port *);
+static void lacp_sm_rx_update_default_selected(struct lacp_port *);
+static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *,
+ const struct lacp_peerinfo *);
+
+/* mux machine */
+
+static void lacp_sm_mux(struct lacp_port *);
+static void lacp_set_mux(struct lacp_port *, enum lacp_mux_state);
+static void lacp_sm_mux_timer(struct lacp_port *);
+
+/* periodic transmit machine */
+
+static void lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t);
+static void lacp_sm_ptx_tx_schedule(struct lacp_port *);
+static void lacp_sm_ptx_timer(struct lacp_port *);
+
+/* transmit machine */
+
+static void lacp_sm_tx(struct lacp_port *);
+static void lacp_sm_assert_ntt(struct lacp_port *);
+
+static void lacp_run_timers(struct lacp_port *);
+static int lacp_compare_peerinfo(const struct lacp_peerinfo *,
+ const struct lacp_peerinfo *);
+static int lacp_compare_systemid(const struct lacp_systemid *,
+ const struct lacp_systemid *);
+static void lacp_port_enable(struct lacp_port *);
+static void lacp_port_disable(struct lacp_port *);
+static void lacp_select(struct lacp_port *);
+static void lacp_unselect(struct lacp_port *);
+static void lacp_disable_collecting(struct lacp_port *);
+static void lacp_enable_collecting(struct lacp_port *);
+static void lacp_disable_distributing(struct lacp_port *);
+static void lacp_enable_distributing(struct lacp_port *);
+static int lacp_xmit_lacpdu(struct lacp_port *);
+
+#if defined(LACP_DEBUG)
+static void lacp_dump_lacpdu(const struct lacpdu *);
+static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
+ size_t);
+static const char *lacp_format_lagid(const struct lacp_peerinfo *,
+ const struct lacp_peerinfo *, char *, size_t);
+static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *,
+ char *, size_t);
+static const char *lacp_format_state(uint8_t, char *, size_t);
+static const char *lacp_format_mac(const uint8_t *, char *, size_t);
+static const char *lacp_format_systemid(const struct lacp_systemid *, char *,
+ size_t);
+static const char *lacp_format_portid(const struct lacp_portid *, char *,
+ size_t);
+static void lacp_dprintf(const struct lacp_port *, const char *, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+#define LACP_DPRINTF(a) lacp_dprintf a
+#else
+#define LACP_DPRINTF(a) /* nothing */
+#endif
+
+/*
+ * partner administration variables.
+ * XXX should be configurable.
+ */
+
+static const struct lacp_peerinfo lacp_partner_admin = {
+ .lip_systemid = { .lsi_prio = 0xffff },
+ .lip_portid = { .lpi_prio = 0xffff },
+#if 1
+ /* optimistic */
+ .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
+ LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
+#else
+ /* pessimistic */
+ .lip_state = 0,
+#endif
+};
+
+static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
+ [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer,
+ [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer,
+ [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer,
+};
+
+/*
+ * lacp_input: process lacpdu
+ */
+int
+lacp_input(struct trunk_port *tp, struct mbuf *m)
+{
+ struct lacp_port *lp = LACP_PORT(tp);
+ struct lacpdu *du;
+ int error = 0;
+
+ TRUNK_LOCK_ASSERT(tp->tp_trunk);
+
+ if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) {
+ goto bad;
+ }
+
+ if (m->m_pkthdr.len != sizeof(*du)) {
+ goto bad;
+ }
+
+ if ((m->m_flags & M_MCAST) == 0) {
+ goto bad;
+ }
+
+ if (m->m_len < sizeof(*du)) {
+ m = m_pullup(m, sizeof(*du));
+ if (m == NULL) {
+ return (ENOMEM);
+ }
+ }
+
+ du = mtod(m, struct lacpdu *);
+
+ if (memcmp(&du->ldu_eh.ether_dhost,
+ &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
+ goto bad;
+ }
+
+ /* XXX
+ KASSERT(du->ldu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_LACP,
+ ("a very bad kassert!"));
+ */
+
+ /*
+ * ignore the version for compatibility with
+ * the future protocol revisions.
+ */
+
+#if 0
+ if (du->ldu_sph.sph_version != 1) {
+ goto bad;
+ }
+#endif
+
+ /*
+ * ignore tlv types for compatibility with
+ * the future protocol revisions.
+ */
+
+ if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
+ lacp_info_tlv_template, FALSE)) {
+ goto bad;
+ }
+
+#if defined(LACP_DEBUG)
+ LACP_DPRINTF((lp, "lacpdu receive\n"));
+ lacp_dump_lacpdu(du);
+#endif /* defined(LACP_DEBUG) */
+ lacp_sm_rx(lp, du);
+
+ m_freem(m);
+
+ return (error);
+
+bad:
+ m_freem(m);
+ return (EINVAL);
+}
+
+static void
+lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info)
+{
+ struct trunk_port *tp = lp->lp_trunk;
+ struct trunk_softc *tr = tp->tp_trunk;
+
+ info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO);
+ memcpy(&info->lip_systemid.lsi_mac,
+ IF_LLADDR(tr->tr_ifp), ETHER_ADDR_LEN);
+ info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO);
+ info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index);
+ info->lip_state = lp->lp_state;
+}
+
+static int
+lacp_xmit_lacpdu(struct lacp_port *lp)
+{
+ struct trunk_port *tp = lp->lp_trunk;
+ struct mbuf *m;
+ struct lacpdu *du;
+ int error;
+
+ TRUNK_LOCK_ASSERT(tp->tp_trunk);
+
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL) {
+ return (ENOMEM);
+ }
+ m->m_len = m->m_pkthdr.len = sizeof(*du);
+
+ du = mtod(m, struct lacpdu *);
+ memset(du, 0, sizeof(*du));
+
+ memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
+ ETHER_ADDR_LEN);
+ memcpy(&du->ldu_eh.ether_shost, tp->tp_lladdr, ETHER_ADDR_LEN);
+ du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW);
+
+ du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
+ du->ldu_sph.sph_version = 1;
+
+ TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
+ du->ldu_actor = lp->lp_actor;
+
+ TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
+ sizeof(du->ldu_partner));
+ du->ldu_partner = lp->lp_partner;
+
+ TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
+ sizeof(du->ldu_collector));
+ du->ldu_collector.lci_maxdelay = 0;
+
+#if defined(LACP_DEBUG)
+ LACP_DPRINTF((lp, "lacpdu transmit\n"));
+ lacp_dump_lacpdu(du);
+#endif /* defined(LACP_DEBUG) */
+
+ m->m_flags |= M_MCAST;
+
+ /*
+ * XXX should use higher priority queue.
+ * otherwise network congestion can break aggregation.
+ */
+
+ error = trunk_enqueue(lp->lp_ifp, m);
+ return (error);
+}
+
+void
+lacp_linkstate(struct trunk_port *tp)
+{
+ struct lacp_port *lp = LACP_PORT(tp);
+ struct ifnet *ifp = tp->tp_ifp;
+ struct ifmediareq ifmr;
+ int error = 0;
+ u_int media;
+ uint8_t old_state;
+ uint16_t old_key;
+
+ TRUNK_LOCK_ASSERT(tp->tp_trunk);
+
+ bzero((char *)&ifmr, sizeof(ifmr));
+ error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
+ if (error != 0)
+ return;
+
+ media = ifmr.ifm_active;
+ LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x\n", lp->lp_media, media));
+ old_state = lp->lp_state;
+ old_key = lp->lp_key;
+
+ lp->lp_media = media;
+ if ((media & IFM_HDX) != 0 || ifp->if_link_state == LINK_STATE_DOWN) {
+ lacp_port_disable(lp);
+ } else {
+ lacp_port_enable(lp);
+ }
+ lp->lp_key = lacp_compose_key(lp);
+
+ if (old_state != lp->lp_state || old_key != lp->lp_key) {
+ LACP_DPRINTF((lp, "-> UNSELECTED\n"));
+ lp->lp_selected = LACP_UNSELECTED;
+ }
+}
+
+static void
+lacp_tick(void *arg)
+{
+ struct lacp_softc *lsc = arg;
+ struct lacp_port *lp;
+
+ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
+ if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
+ continue;
+
+ lacp_run_timers(lp);
+
+ lacp_select(lp);
+ lacp_sm_mux(lp);
+ lacp_sm_tx(lp);
+ lacp_sm_ptx_tx_schedule(lp);
+ }
+ callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
+}
+
+int
+lacp_port_create(struct trunk_port *tp)
+{
+ struct trunk_softc *tr = tp->tp_trunk;
+ struct lacp_softc *lsc = LACP_SOFTC(tr);
+ struct lacp_port *lp;
+ struct ifnet *ifp = tp->tp_ifp;
+ struct sockaddr_dl sdl;
+ struct ifmultiaddr *rifma = NULL;
+ int error;
+
+ boolean_t active = TRUE; /* XXX should be configurable */
+ boolean_t fast = FALSE; /* XXX should be configurable */
+
+ TRUNK_LOCK_ASSERT(tr);
+
+ bzero((char *)&sdl, sizeof(sdl));
+ sdl.sdl_len = sizeof(sdl);
+ sdl.sdl_family = AF_LINK;
+ sdl.sdl_index = ifp->if_index;
+ sdl.sdl_type = IFT_ETHER;
+ sdl.sdl_alen = ETHER_ADDR_LEN;
+
+ bcopy(&ethermulticastaddr_slowprotocols,
+ LLADDR(&sdl), ETHER_ADDR_LEN);
+ error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
+ if (error) {
+ printf("%s: ADDMULTI failed on %s\n", __func__, tp->tp_ifname);
+ return (error);
+ }
+
+ lp = malloc(sizeof(struct lacp_port),
+ M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (lp == NULL)
+ return (ENOMEM);
+
+ tp->tp_psc = (caddr_t)lp;
+ lp->lp_ifp = ifp;
+ lp->lp_trunk = tp;
+ lp->lp_lsc = lsc;
+
+ LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next);
+
+ lacp_fill_actorinfo(lp, &lp->lp_actor);
+ lp->lp_state =
+ (active ? LACP_STATE_ACTIVITY : 0) |
+ (fast ? LACP_STATE_TIMEOUT : 0);
+ lp->lp_aggregator = NULL;
+ lacp_linkstate(tp);
+ lacp_sm_rx_set_expired(lp);
+
+ return (0);
+}
+
+void
+lacp_port_destroy(struct trunk_port *tp)
+{
+ struct lacp_port *lp = LACP_PORT(tp);
+ struct ifnet *ifp = tp->tp_ifp;
+ struct sockaddr_dl sdl;
+ int i, error;
+
+ TRUNK_LOCK_ASSERT(tp->tp_trunk);
+
+ for (i = 0; i < LACP_NTIMER; i++) {
+ LACP_TIMER_DISARM(lp, i);
+ }
+
+ lacp_disable_collecting(lp);
+ lacp_disable_distributing(lp);
+ lacp_unselect(lp);
+
+ bzero((char *)&sdl, sizeof(sdl));
+ sdl.sdl_len = sizeof(sdl);
+ sdl.sdl_family = AF_LINK;
+ sdl.sdl_index = ifp->if_index;
+ sdl.sdl_type = IFT_ETHER;
+ sdl.sdl_alen = ETHER_ADDR_LEN;
+
+ bcopy(&ethermulticastaddr_slowprotocols,
+ LLADDR(&sdl), ETHER_ADDR_LEN);
+ error = if_delmulti(ifp, (struct sockaddr *)&sdl);
+ if (error)
+ printf("%s: DELMULTI failed on %s\n", __func__, tp->tp_ifname);
+
+ LIST_REMOVE(lp, lp_next);
+ free(lp, M_DEVBUF);
+}
+
+int
+lacp_port_isactive(struct trunk_port *tp)
+{
+ struct lacp_port *lp = LACP_PORT(tp);
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct lacp_aggregator *la = lp->lp_aggregator;
+
+ /* This port is joined to the active aggregator */
+ if (la != NULL && la == lsc->lsc_active_aggregator)
+ return (1);
+
+ return (0);
+}
+
+static void
+lacp_disable_collecting(struct lacp_port *lp)
+{
+ struct trunk_port *tp = lp->lp_trunk;
+
+ LACP_DPRINTF((lp, "collecting disabled\n"));
+
+ lp->lp_state &= ~LACP_STATE_COLLECTING;
+ tp->tp_flags &= ~TRUNK_PORT_COLLECTING;
+}
+
+static void
+lacp_enable_collecting(struct lacp_port *lp)
+{
+ struct trunk_port *tp = lp->lp_trunk;
+
+ LACP_DPRINTF((lp, "collecting enabled\n"));
+
+ lp->lp_state |= LACP_STATE_COLLECTING;
+ tp->tp_flags |= TRUNK_PORT_COLLECTING;
+}
+
+static void
+lacp_disable_distributing(struct lacp_port *lp)
+{
+ struct lacp_aggregator *la = lp->lp_aggregator;
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct trunk_port *tp = lp->lp_trunk;
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif /* defined(LACP_DEBUG) */
+
+ TRUNK_LOCK_ASSERT(tp->tp_trunk);
+
+ if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
+ return;
+ }
+
+ KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports"));
+ KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports));
+ KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid"));
+
+ LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
+ "nports %d -> %d\n",
+ lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
+ la->la_nports, la->la_nports - 1));
+
+ TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
+ la->la_nports--;
+
+ lacp_suppress_distributing(lsc, la);
+
+ lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
+ tp->tp_flags &= ~TRUNK_PORT_DISTRIBUTING;
+
+ if (lsc->lsc_active_aggregator == la) {
+ lacp_select_active_aggregator(lsc);
+ }
+}
+
+static void
+lacp_enable_distributing(struct lacp_port *lp)
+{
+ struct lacp_aggregator *la = lp->lp_aggregator;
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct trunk_port *tp = lp->lp_trunk;
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif /* defined(LACP_DEBUG) */
+
+ TRUNK_LOCK_ASSERT(tp->tp_trunk);
+
+ if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
+ return;
+ }
+
+ LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
+ "nports %d -> %d\n",
+ lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
+ la->la_nports, la->la_nports + 1));
+
+ KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
+ TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
+ la->la_nports++;
+
+ lacp_suppress_distributing(lsc, la);
+
+ lp->lp_state |= LACP_STATE_DISTRIBUTING;
+ tp->tp_flags |= TRUNK_PORT_DISTRIBUTING;
+
+ if (lsc->lsc_active_aggregator != la) {
+ lacp_select_active_aggregator(lsc);
+ }
+}
+
+static void
+lacp_transit_expire(void *vp)
+{
+ struct lacp_softc *lsc = vp;
+
+ LACP_DPRINTF((NULL, "%s\n", __func__));
+ lsc->lsc_suppress_distributing = FALSE;
+}
+
+int
+lacp_attach(struct trunk_softc *tr)
+{
+ struct lacp_softc *lsc;
+
+ TRUNK_LOCK_ASSERT(tr);
+
+ lsc = malloc(sizeof(struct lacp_softc),
+ M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (lsc == NULL)
+ return (ENOMEM);
+
+ tr->tr_psc = (caddr_t)lsc;
+ lsc->lsc_trunk = tr;
+
+ lsc->lsc_hashkey = arc4random();
+ lsc->lsc_active_aggregator = NULL;
+ TAILQ_INIT(&lsc->lsc_aggregators);
+ LIST_INIT(&lsc->lsc_ports);
+
+ callout_init_mtx(&lsc->lsc_transit_callout, &tr->tr_mtx, 0);
+ callout_init_mtx(&lsc->lsc_callout, &tr->tr_mtx, 0);
+
+ /* if the trunk is already up then do the same */
+ if (tr->tr_ifp->if_drv_flags & IFF_DRV_RUNNING)
+ lacp_init(tr);
+
+ return (0);
+}
+
+int
+lacp_detach(struct trunk_softc *tr)
+{
+ struct lacp_softc *lsc = LACP_SOFTC(tr);
+
+ KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
+ ("aggregators still active"));
+ KASSERT(lsc->lsc_active_aggregator == NULL,
+ ("aggregator still attached"));
+
+ tr->tr_psc = NULL;
+ callout_drain(&lsc->lsc_transit_callout);
+ callout_drain(&lsc->lsc_callout);
+
+ free(lsc, M_DEVBUF);
+ return (0);
+}
+
+void
+lacp_init(struct trunk_softc *tr)
+{
+ struct lacp_softc *lsc = LACP_SOFTC(tr);
+
+ callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
+}
+
+void
+lacp_stop(struct trunk_softc *tr)
+{
+ struct lacp_softc *lsc = LACP_SOFTC(tr);
+
+ callout_stop(&lsc->lsc_transit_callout);
+ callout_stop(&lsc->lsc_callout);
+}
+
+struct trunk_port *
+lacp_select_tx_port(struct trunk_softc *tr, struct mbuf *m)
+{
+ struct lacp_softc *lsc = LACP_SOFTC(tr);
+ struct lacp_aggregator *la;
+ struct lacp_port *lp;
+ uint32_t hash;
+ int nports;
+
+ TRUNK_LOCK_ASSERT(tr);
+
+ if (__predict_false(lsc->lsc_suppress_distributing)) {
+ LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
+ return (NULL);
+ }
+
+ la = lsc->lsc_active_aggregator;
+ if (__predict_false(la == NULL)) {
+ LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
+ return (NULL);
+ }
+
+ nports = la->la_nports;
+ KASSERT(nports > 0, ("no ports available"));
+
+ hash = trunk_hashmbuf(m, lsc->lsc_hashkey);
+ hash %= nports;
+ lp = TAILQ_FIRST(&la->la_ports);
+ while (hash--) {
+ lp = TAILQ_NEXT(lp, lp_dist_q);
+ }
+
+ KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
+ ("aggregated port is not distributing"));
+
+ return (lp->lp_trunk);
+}
+/*
+ * lacp_suppress_distributing: drop transmit packets for a while
+ * to preserve packet ordering.
+ */
+
+static void
+lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
+{
+ if (lsc->lsc_active_aggregator != la) {
+ return;
+ }
+
+ LACP_DPRINTF((NULL, "%s\n", __func__));
+ lsc->lsc_suppress_distributing = TRUE;
+ /* XXX should consider collector max delay */
+ callout_reset(&lsc->lsc_transit_callout,
+ LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc);
+}
+
+static int
+lacp_compare_peerinfo(const struct lacp_peerinfo *a,
+ const struct lacp_peerinfo *b)
+{
+ return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state)));
+}
+
+static int
+lacp_compare_systemid(const struct lacp_systemid *a,
+ const struct lacp_systemid *b)
+{
+ return (memcmp(a, b, sizeof(*a)));
+}
+
+#if 0 /* unused */
+static int
+lacp_compare_portid(const struct lacp_portid *a,
+ const struct lacp_portid *b)
+{
+ return (memcmp(a, b, sizeof(*a)));
+}
+#endif
+
+static uint64_t
+lacp_aggregator_bandwidth(struct lacp_aggregator *la)
+{
+ struct lacp_port *lp;
+ uint64_t speed;
+
+ lp = TAILQ_FIRST(&la->la_ports);
+ if (lp == NULL) {
+ return (0);
+ }
+
+ speed = ifmedia_baudrate(lp->lp_media);
+ speed *= la->la_nports;
+ if (speed == 0) {
+ LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
+ lp->lp_media, la->la_nports));
+ }
+
+ return (speed);
+}
+
+/*
+ * lacp_select_active_aggregator: select an aggregator to be used to transmit
+ * packets from trunk(4) interface.
+ */
+
+static void
+lacp_select_active_aggregator(struct lacp_softc *lsc)
+{
+ struct lacp_aggregator *la;
+ struct lacp_aggregator *best_la = NULL;
+ uint64_t best_speed = 0;
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif /* defined(LACP_DEBUG) */
+
+ LACP_DPRINTF((NULL, "%s:\n", __func__));
+
+ TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
+ uint64_t speed;
+
+ if (la->la_nports == 0) {
+ continue;
+ }
+
+ speed = lacp_aggregator_bandwidth(la);
+ LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n",
+ lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
+ speed, la->la_nports));
+ if (speed > best_speed ||
+ (speed == best_speed &&
+ la == lsc->lsc_active_aggregator)) {
+ best_la = la;
+ best_speed = speed;
+ }
+ }
+
+ KASSERT(best_la == NULL || best_la->la_nports > 0,
+ ("invalid aggregator refcnt"));
+ KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
+ ("invalid aggregator list"));
+
+#if defined(LACP_DEBUG)
+ if (lsc->lsc_active_aggregator != best_la) {
+ LACP_DPRINTF((NULL, "active aggregator changed\n"));
+ LACP_DPRINTF((NULL, "old %s\n",
+ lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
+ buf, sizeof(buf))));
+ } else {
+ LACP_DPRINTF((NULL, "active aggregator not changed\n"));
+ }
+ LACP_DPRINTF((NULL, "new %s\n",
+ lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
+#endif /* defined(LACP_DEBUG) */
+
+ if (lsc->lsc_active_aggregator != best_la) {
+ lsc->lsc_active_aggregator = best_la;
+ if (best_la) {
+ lacp_suppress_distributing(lsc, best_la);
+ }
+ }
+}
+
+static uint16_t
+lacp_compose_key(struct lacp_port *lp)
+{
+ struct trunk_port *tp = lp->lp_trunk;
+ struct trunk_softc *tr = tp->tp_trunk;
+ u_int media = lp->lp_media;
+ uint16_t key;
+
+ KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid interface type"));
+
+ if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) {
+
+ /*
+ * non-aggregatable links should have unique keys.
+ *
+ * XXX this isn't really unique as if_index is 16 bit.
+ */
+
+ /* bit 0..14: (some bits of) if_index of this port */
+ key = lp->lp_ifp->if_index;
+ /* bit 15: 1 */
+ key |= 0x8000;
+ } else {
+ u_int subtype = IFM_SUBTYPE(media);
+
+ KASSERT((media & IFM_HDX) == 0, ("aggregating HDX interface"));
+
+ /* bit 0..4: IFM_SUBTYPE */
+ key = subtype;
+ /* bit 5..14: (some bits of) if_index of trunk device */
+ key |= 0x7fe0 & ((tr->tr_ifp->if_index) << 5);
+ /* bit 15: 0 */
+ }
+ return (htons(key));
+}
+
+static void
+lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
+{
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+ LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
+ __func__,
+ lacp_format_lagid(&la->la_actor, &la->la_partner,
+ buf, sizeof(buf)),
+ la->la_refcnt, la->la_refcnt + 1));
+
+ KASSERT(la->la_refcnt > 0, ("refcount <= 0"));
+ la->la_refcnt++;
+ KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount"));
+}
+
+static void
+lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la)
+{
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+ LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
+ __func__,
+ lacp_format_lagid(&la->la_actor, &la->la_partner,
+ buf, sizeof(buf)),
+ la->la_refcnt, la->la_refcnt - 1));
+
+ KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt"));
+ la->la_refcnt--;
+ if (la->la_refcnt > 0) {
+ return;
+ }
+
+ KASSERT(la->la_refcnt == 0, ("refcount not zero"));
+ KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active"));
+
+ TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q);
+
+ free(la, M_DEVBUF);
+}
+
+/*
+ * lacp_aggregator_get: allocate an aggregator.
+ */
+
+static struct lacp_aggregator *
+lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp)
+{
+ struct lacp_aggregator *la;
+
+ la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT);
+ if (la) {
+ la->la_refcnt = 1;
+ la->la_nports = 0;
+ TAILQ_INIT(&la->la_ports);
+ la->la_pending = 0;
+ TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q);
+ }
+
+ return (la);
+}
+
+/*
+ * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port.
+ */
+
+static void
+lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp)
+{
+ lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner);
+ lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor);
+
+ la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION;
+}
+
+static void
+lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr,
+ const struct lacp_peerinfo *lpi_port)
+{
+ memset(lpi_aggr, 0, sizeof(*lpi_aggr));
+ lpi_aggr->lip_systemid = lpi_port->lip_systemid;
+ lpi_aggr->lip_key = lpi_port->lip_key;
+}
+
+/*
+ * lacp_aggregator_is_compatible: check if a port can join to an aggregator.
+ */
+
+static int
+lacp_aggregator_is_compatible(const struct lacp_aggregator *la,
+ const struct lacp_port *lp)
+{
+ if (!(lp->lp_state & LACP_STATE_AGGREGATION) ||
+ !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) {
+ return (0);
+ }
+
+ if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) {
+ return (0);
+ }
+
+ if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) {
+ return (0);
+ }
+
+ if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) {
+ return (0);
+ }
+
+ return (1);
+}
+
+static int
+lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a,
+ const struct lacp_peerinfo *b)
+{
+ if (memcmp(&a->lip_systemid, &b->lip_systemid,
+ sizeof(a->lip_systemid))) {
+ return (0);
+ }
+
+ if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) {
+ return (0);
+ }
+
+ return (1);
+}
+
+static void
+lacp_port_enable(struct lacp_port *lp)
+{
+ lp->lp_state |= LACP_STATE_AGGREGATION;
+}
+
+static void
+lacp_port_disable(struct lacp_port *lp)
+{
+ lacp_set_mux(lp, LACP_MUX_DETACHED);
+
+ lp->lp_state &= ~LACP_STATE_AGGREGATION;
+ lp->lp_selected = LACP_UNSELECTED;
+ lacp_sm_rx_record_default(lp);
+ lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION;
+ lp->lp_state &= ~LACP_STATE_EXPIRED;
+}
+
+/*
+ * lacp_select: select an aggregator. create one if necessary.
+ */
+static void
+lacp_select(struct lacp_port *lp)
+{
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct lacp_aggregator *la;
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+ if (lp->lp_aggregator) {
+ return;
+ }
+
+ KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
+ ("timer_wait_while still active"));
+
+ LACP_DPRINTF((lp, "port lagid=%s\n",
+ lacp_format_lagid(&lp->lp_actor, &lp->lp_partner,
+ buf, sizeof(buf))));
+
+ TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
+ if (lacp_aggregator_is_compatible(la, lp)) {
+ break;
+ }
+ }
+
+ if (la == NULL) {
+ la = lacp_aggregator_get(lsc, lp);
+ if (la == NULL) {
+ LACP_DPRINTF((lp, "aggregator creation failed\n"));
+
+ /*
+ * will retry on the next tick.
+ */
+
+ return;
+ }
+ lacp_fill_aggregator_id(la, lp);
+ LACP_DPRINTF((lp, "aggregator created\n"));
+ } else {
+ LACP_DPRINTF((lp, "compatible aggregator found\n"));
+ lacp_aggregator_addref(lsc, la);
+ }
+
+ LACP_DPRINTF((lp, "aggregator lagid=%s\n",
+ lacp_format_lagid(&la->la_actor, &la->la_partner,
+ buf, sizeof(buf))));
+
+ lp->lp_aggregator = la;
+ lp->lp_selected = LACP_SELECTED;
+}
+
+/*
+ * lacp_unselect: finish unselect/detach process.
+ */
+
+static void
+lacp_unselect(struct lacp_port *lp)
+{
+ struct lacp_softc *lsc = lp->lp_lsc;
+ struct lacp_aggregator *la = lp->lp_aggregator;
+
+ KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
+ ("timer_wait_while still active"));
+
+ if (la == NULL) {
+ return;
+ }
+
+ lp->lp_aggregator = NULL;
+ lacp_aggregator_delref(lsc, la);
+}
+
+/* mux machine */
+
+static void
+lacp_sm_mux(struct lacp_port *lp)
+{
+ enum lacp_mux_state new_state;
+ boolean_t p_sync =
+ (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
+ boolean_t p_collecting =
+ (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0;
+ enum lacp_selected selected = lp->lp_selected;
+ struct lacp_aggregator *la;
+
+ /* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */
+
+re_eval:
+ la = lp->lp_aggregator;
+ KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL,
+ ("MUX not detached"));
+ new_state = lp->lp_mux_state;
+ switch (lp->lp_mux_state) {
+ case LACP_MUX_DETACHED:
+ if (selected != LACP_UNSELECTED) {
+ new_state = LACP_MUX_WAITING;
+ }
+ break;
+ case LACP_MUX_WAITING:
+ KASSERT(la->la_pending > 0 ||
+ !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
+ ("timer_wait_while still active"));
+ if (selected == LACP_SELECTED && la->la_pending == 0) {
+ new_state = LACP_MUX_ATTACHED;
+ } else if (selected == LACP_UNSELECTED) {
+ new_state = LACP_MUX_DETACHED;
+ }
+ break;
+ case LACP_MUX_ATTACHED:
+ if (selected == LACP_SELECTED && p_sync) {
+ new_state = LACP_MUX_COLLECTING;
+ } else if (selected != LACP_SELECTED) {
+ new_state = LACP_MUX_DETACHED;
+ }
+ break;
+ case LACP_MUX_COLLECTING:
+ if (selected == LACP_SELECTED && p_sync && p_collecting) {
+ new_state = LACP_MUX_DISTRIBUTING;
+ } else if (selected != LACP_SELECTED || !p_sync) {
+ new_state = LACP_MUX_ATTACHED;
+ }
+ break;
+ case LACP_MUX_DISTRIBUTING:
+ if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
+ new_state = LACP_MUX_COLLECTING;
+ }
+ break;
+ default:
+ panic("%s: unknown state", __func__);
+ }
+
+ if (lp->lp_mux_state == new_state) {
+ return;
+ }
+
+ lacp_set_mux(lp, new_state);
+ goto re_eval;
+}
+
+static void
+lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state)
+{
+ struct lacp_aggregator *la = lp->lp_aggregator;
+
+ if (lp->lp_mux_state == new_state) {
+ return;
+ }
+
+ switch (new_state) {
+ case LACP_MUX_DETACHED:
+ lp->lp_state &= ~LACP_STATE_SYNC;
+ lacp_disable_distributing(lp);
+ lacp_disable_collecting(lp);
+ lacp_sm_assert_ntt(lp);
+ /* cancel timer */
+ if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) {
+ KASSERT(la->la_pending > 0,
+ ("timer_wait_while not active"));
+ la->la_pending--;
+ }
+ LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE);
+ lacp_unselect(lp);
+ break;
+ case LACP_MUX_WAITING:
+ LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE,
+ LACP_AGGREGATE_WAIT_TIME);
+ la->la_pending++;
+ break;
+ case LACP_MUX_ATTACHED:
+ lp->lp_state |= LACP_STATE_SYNC;
+ lacp_disable_collecting(lp);
+ lacp_sm_assert_ntt(lp);
+ break;
+ case LACP_MUX_COLLECTING:
+ lacp_enable_collecting(lp);
+ lacp_disable_distributing(lp);
+ lacp_sm_assert_ntt(lp);
+ break;
+ case LACP_MUX_DISTRIBUTING:
+ lacp_enable_distributing(lp);
+ break;
+ default:
+ panic("%s: unknown state", __func__);
+ }
+
+ LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state));
+
+ lp->lp_mux_state = new_state;
+}
+
+static void
+lacp_sm_mux_timer(struct lacp_port *lp)
+{
+ struct lacp_aggregator *la = lp->lp_aggregator;
+#if defined(LACP_DEBUG)
+ char buf[LACP_LAGIDSTR_MAX+1];
+#endif
+
+ KASSERT(la->la_pending > 0, ("no pending event"));
+
+ LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__,
+ lacp_format_lagid(&la->la_actor, &la->la_partner,
+ buf, sizeof(buf)),
+ la->la_pending, la->la_pending - 1));
+
+ la->la_pending--;
+}
+
+/* periodic transmit machine */
+
+static void
+lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate)
+{
+ if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state,
+ LACP_STATE_TIMEOUT)) {
+ return;
+ }
+
+ LACP_DPRINTF((lp, "partner timeout changed\n"));
+
+ /*
+ * FAST_PERIODIC -> SLOW_PERIODIC
+ * or
+ * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC
+ *
+ * let lacp_sm_ptx_tx_schedule to update timeout.
+ */
+
+ LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
+
+ /*
+ * if timeout has been shortened, assert NTT.
+ */
+
+ if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) {
+ lacp_sm_assert_ntt(lp);
+ }
+}
+
+static void
+lacp_sm_ptx_tx_schedule(struct lacp_port *lp)
+{
+ int timeout;
+
+ if (!(lp->lp_state & LACP_STATE_ACTIVITY) &&
+ !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) {
+
+ /*
+ * NO_PERIODIC
+ */
+
+ LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
+ return;
+ }
+
+ if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) {
+ return;
+ }
+
+ timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ?
+ LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME;
+
+ LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout);
+}
+
+static void
+lacp_sm_ptx_timer(struct lacp_port *lp)
+{
+ lacp_sm_assert_ntt(lp);
+}
+
+static void
+lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du)
+{
+ int timeout;
+
+ /*
+ * check LACP_DISABLED first
+ */
+
+ if (!(lp->lp_state & LACP_STATE_AGGREGATION)) {
+ return;
+ }
+
+ /*
+ * check loopback condition.
+ */
+
+ if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid,
+ &lp->lp_actor.lip_systemid)) {
+ return;
+ }
+
+ /*
+ * EXPIRED, DEFAULTED, CURRENT -> CURRENT
+ */
+
+ lacp_sm_rx_update_selected(lp, du);
+ lacp_sm_rx_update_ntt(lp, du);
+ lacp_sm_rx_record_pdu(lp, du);
+
+ timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ?
+ LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME;
+ LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout);
+
+ lp->lp_state &= ~LACP_STATE_EXPIRED;
+
+ /*
+ * kick transmit machine without waiting the next tick.
+ */
+
+ lacp_sm_tx(lp);
+}
+
+static void
+lacp_sm_rx_set_expired(struct lacp_port *lp)
+{
+ lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
+ lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT;
+ LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME);
+ lp->lp_state |= LACP_STATE_EXPIRED;
+}
+
+static void
+lacp_sm_rx_timer(struct lacp_port *lp)
+{
+ if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) {
+ /* CURRENT -> EXPIRED */
+ LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__));
+ lacp_sm_rx_set_expired(lp);
+ } else {
+ /* EXPIRED -> DEFAULTED */
+ LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__));
+ lacp_sm_rx_update_default_selected(lp);
+ lacp_sm_rx_record_default(lp);
+ lp->lp_state &= ~LACP_STATE_EXPIRED;
+ }
+}
+
+static void
+lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
+{
+ boolean_t active;
+ uint8_t oldpstate;
+#if defined(LACP_DEBUG)
+ char buf[LACP_STATESTR_MAX+1];
+#endif
+
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ oldpstate = lp->lp_partner.lip_state;
+
+ active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY)
+ || ((lp->lp_state & LACP_STATE_ACTIVITY) &&
+ (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY));
+
+ lp->lp_partner = du->ldu_actor;
+ if (active &&
+ ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
+ LACP_STATE_AGGREGATION) &&
+ !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner))
+ || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) {
+ /* XXX nothing? */
+ } else {
+ lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
+ }
+
+ lp->lp_state &= ~LACP_STATE_DEFAULTED;
+
+ if (oldpstate != lp->lp_partner.lip_state) {
+ LACP_DPRINTF((lp, "old pstate %s\n",
+ lacp_format_state(oldpstate, buf, sizeof(buf))));
+ LACP_DPRINTF((lp, "new pstate %s\n",
+ lacp_format_state(lp->lp_partner.lip_state, buf,
+ sizeof(buf))));
+ }
+
+ lacp_sm_ptx_update_timeout(lp, oldpstate);
+}
+
+static void
+lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du)
+{
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) ||
+ !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
+ LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) {
+ LACP_DPRINTF((lp, "%s: assert ntt\n", __func__));
+ lacp_sm_assert_ntt(lp);
+ }
+}
+
+static void
+lacp_sm_rx_record_default(struct lacp_port *lp)
+{
+ uint8_t oldpstate;
+
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ oldpstate = lp->lp_partner.lip_state;
+ lp->lp_partner = lacp_partner_admin;
+ lp->lp_state |= LACP_STATE_DEFAULTED;
+ lacp_sm_ptx_update_timeout(lp, oldpstate);
+}
+
+static void
+lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
+ const struct lacp_peerinfo *info)
+{
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ if (lacp_compare_peerinfo(&lp->lp_partner, info) ||
+ !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state,
+ LACP_STATE_AGGREGATION)) {
+ lp->lp_selected = LACP_UNSELECTED;
+ /* mux machine will clean up lp->lp_aggregator */
+ }
+}
+
+static void
+lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
+{
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor);
+}
+
+static void
+lacp_sm_rx_update_default_selected(struct lacp_port *lp)
+{
+ /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
+}
+
+/* transmit machine */
+
+static void
+lacp_sm_tx(struct lacp_port *lp)
+{
+ int error;
+
+ if (!(lp->lp_state & LACP_STATE_AGGREGATION)
+#if 1
+ || (!(lp->lp_state & LACP_STATE_ACTIVITY)
+ && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY))
+#endif
+ ) {
+ lp->lp_flags &= ~LACP_PORT_NTT;
+ }
+
+ if (!(lp->lp_flags & LACP_PORT_NTT)) {
+ return;
+ }
+
+ /* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */
+ if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent,
+ (3 / LACP_FAST_PERIODIC_TIME)) == 0) {
+ LACP_DPRINTF((lp, "rate limited pdu\n"));
+ return;
+ }
+
+ error = lacp_xmit_lacpdu(lp);
+
+ if (error == 0) {
+ lp->lp_flags &= ~LACP_PORT_NTT;
+ } else {
+ LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n",
+ error));
+ }
+}
+
+static void
+lacp_sm_assert_ntt(struct lacp_port *lp)
+{
+
+ lp->lp_flags |= LACP_PORT_NTT;
+}
+
+static void
+lacp_run_timers(struct lacp_port *lp)
+{
+ int i;
+
+ for (i = 0; i < LACP_NTIMER; i++) {
+ KASSERT(lp->lp_timer[i] >= 0,
+ ("invalid timer value %d", lp->lp_timer[i]));
+ if (lp->lp_timer[i] == 0) {
+ continue;
+ } else if (--lp->lp_timer[i] <= 0) {
+ if (lacp_timer_funcs[i]) {
+ (*lacp_timer_funcs[i])(lp);
+ }
+ }
+ }
+}
+
+int
+lacp_marker_input(struct trunk_port *tp, struct mbuf *m)
+{
+ struct lacp_port *lp = LACP_PORT(tp);
+ struct markerdu *mdu;
+ int error = 0;
+
+ TRUNK_LOCK_ASSERT(tp->tp_trunk);
+
+ if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) {
+ goto bad;
+ }
+
+ if (m->m_pkthdr.len != sizeof(*mdu)) {
+ goto bad;
+ }
+
+ if ((m->m_flags & M_MCAST) == 0) {
+ goto bad;
+ }
+
+ if (m->m_len < sizeof(*mdu)) {
+ m = m_pullup(m, sizeof(*mdu));
+ if (m == NULL) {
+ return (ENOMEM);
+ }
+ }
+
+ mdu = mtod(m, struct markerdu *);
+
+ if (memcmp(&mdu->mdu_eh.ether_dhost,
+ &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
+ goto bad;
+ }
+
+ /* XXX
+ KASSERT(mdu->mdu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_MARKER,
+ ("a very bad kassert!"));
+ */
+
+ if (mdu->mdu_sph.sph_version != 1) {
+ goto bad;
+ }
+
+ switch (mdu->mdu_tlv.tlv_type) {
+ case MARKER_TYPE_INFO:
+ if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
+ marker_info_tlv_template, TRUE)) {
+ goto bad;
+ }
+ mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE;
+ memcpy(&mdu->mdu_eh.ether_dhost,
+ &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN);
+ memcpy(&mdu->mdu_eh.ether_shost,
+ tp->tp_lladdr, ETHER_ADDR_LEN);
+ error = trunk_enqueue(lp->lp_ifp, m);
+ break;
+
+ case MARKER_TYPE_RESPONSE:
+ if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
+ marker_response_tlv_template, TRUE)) {
+ goto bad;
+ }
+ /*
+ * we are not interested in responses as
+ * we don't have a marker sender.
+ */
+ /* FALLTHROUGH */
+ default:
+ goto bad;
+ }
+
+ return (error);
+
+bad:
+ m_freem(m);
+ return (EINVAL);
+}
+
+static int
+tlv_check(const void *p, size_t size, const struct tlvhdr *tlv,
+ const struct tlv_template *tmpl, boolean_t check_type)
+{
+ while (/* CONSTCOND */ 1) {
+ if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) {
+ return (EINVAL);
+ }
+ if ((check_type && tlv->tlv_type != tmpl->tmpl_type) ||
+ tlv->tlv_length != tmpl->tmpl_length) {
+ return (EINVAL);
+ }
+ if (tmpl->tmpl_type == 0) {
+ break;
+ }
+ tlv = (const struct tlvhdr *)
+ ((const char *)tlv + tlv->tlv_length);
+ tmpl++;
+ }
+
+ return (0);
+}
+
+#if defined(LACP_DEBUG)
+const char *
+lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
+{
+ snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X",
+ (int)mac[0],
+ (int)mac[1],
+ (int)mac[2],
+ (int)mac[3],
+ (int)mac[4],
+ (int)mac[5]);
+
+ return (buf);
+}
+
+const char *
+lacp_format_systemid(const struct lacp_systemid *sysid,
+ char *buf, size_t buflen)
+{
+ char macbuf[LACP_MACSTR_MAX+1];
+
+ snprintf(buf, buflen, "%04X,%s",
+ ntohs(sysid->lsi_prio),
+ lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf)));
+
+ return (buf);
+}
+
+const char *
+lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen)
+{
+ snprintf(buf, buflen, "%04X,%04X",
+ ntohs(portid->lpi_prio),
+ ntohs(portid->lpi_portno));
+
+ return (buf);
+}
+
+const char *
+lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen)
+{
+ char sysid[LACP_SYSTEMIDSTR_MAX+1];
+ char portid[LACP_PORTIDSTR_MAX+1];
+
+ snprintf(buf, buflen, "(%s,%04X,%s)",
+ lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)),
+ ntohs(peer->lip_key),
+ lacp_format_portid(&peer->lip_portid, portid, sizeof(portid)));
+
+ return (buf);
+}
+
+const char *
+lacp_format_lagid(const struct lacp_peerinfo *a,
+ const struct lacp_peerinfo *b, char *buf, size_t buflen)
+{
+ char astr[LACP_PARTNERSTR_MAX+1];
+ char bstr[LACP_PARTNERSTR_MAX+1];
+
+#if 0
+ /*
+ * there's a convention to display small numbered peer
+ * in the left.
+ */
+
+ if (lacp_compare_peerinfo(a, b) > 0) {
+ const struct lacp_peerinfo *t;
+
+ t = a;
+ a = b;
+ b = t;
+ }
+#endif
+
+ snprintf(buf, buflen, "[%s,%s]",
+ lacp_format_partner(a, astr, sizeof(astr)),
+ lacp_format_partner(b, bstr, sizeof(bstr)));
+
+ return (buf);
+}
+
+const char *
+lacp_format_lagid_aggregator(const struct lacp_aggregator *la,
+ char *buf, size_t buflen)
+{
+ if (la == NULL) {
+ return ("(none)");
+ }
+
+ return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen));
+}
+
+const char *
+lacp_format_state(uint8_t state, char *buf, size_t buflen)
+{
+ snprintf(buf, buflen, "%b", state, LACP_STATE_BITS);
+ return (buf);
+}
+
+static void
+lacp_dump_lacpdu(const struct lacpdu *du)
+{
+ char buf[LACP_PARTNERSTR_MAX+1];
+ char buf2[LACP_STATESTR_MAX+1];
+
+ printf("actor=%s\n",
+ lacp_format_partner(&du->ldu_actor, buf, sizeof(buf)));
+ printf("actor.state=%s\n",
+ lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2)));
+ printf("partner=%s\n",
+ lacp_format_partner(&du->ldu_partner, buf, sizeof(buf)));
+ printf("partner.state=%s\n",
+ lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2)));
+
+ printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay));
+}
+
+static void
+lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...)
+{
+ va_list va;
+
+ if (lp) {
+ printf("%s: ", lp->lp_ifp->if_xname);
+ }
+
+ va_start(va, fmt);
+ vprintf(fmt, va);
+ va_end(va);
+}
+#endif
diff --git a/sys/net/ieee8023ad_lacp.h b/sys/net/ieee8023ad_lacp.h
new file mode 100644
index 000000000000..95d3ae9693e1
--- /dev/null
+++ b/sys/net/ieee8023ad_lacp.h
@@ -0,0 +1,289 @@
+/* $NetBSD: ieee8023ad_impl.h,v 1.2 2005/12/10 23:21:39 elad Exp $ */
+
+/*-
+ * Copyright (c)2005 YAMAMOTO Takashi,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * IEEE802.3ad LACP
+ *
+ * implementation details.
+ */
+
+#define LACP_TIMER_CURRENT_WHILE 0
+#define LACP_TIMER_PERIODIC 1
+#define LACP_TIMER_WAIT_WHILE 2
+#define LACP_NTIMER 3
+
+#define LACP_TIMER_ARM(port, timer, val) \
+ (port)->lp_timer[(timer)] = (val)
+#define LACP_TIMER_DISARM(port, timer) \
+ (port)->lp_timer[(timer)] = 0
+#define LACP_TIMER_ISARMED(port, timer) \
+ ((port)->lp_timer[(timer)] > 0)
+
+/*
+ * IEEE802.3ad LACP
+ *
+ * protocol definitions.
+ */
+
+#define LACP_STATE_ACTIVITY (1<<0)
+#define LACP_STATE_TIMEOUT (1<<1)
+#define LACP_STATE_AGGREGATION (1<<2)
+#define LACP_STATE_SYNC (1<<3)
+#define LACP_STATE_COLLECTING (1<<4)
+#define LACP_STATE_DISTRIBUTING (1<<5)
+#define LACP_STATE_DEFAULTED (1<<6)
+#define LACP_STATE_EXPIRED (1<<7)
+
+#define LACP_PORT_NTT 0x00000001
+#define LACP_PORT_PROMISC 0x00000004
+#define LACP_PORT_LADDRCHANGED 0x00000008
+#define LACP_PORT_ATTACHED 0x00000010
+#define LACP_PORT_LARVAL 0x00000020
+#define LACP_PORT_DETACHING 0x00000040
+
+#define LACP_STATE_BITS \
+ "\020" \
+ "\001ACTIVITY" \
+ "\002TIMEOUT" \
+ "\003AGGREGATION" \
+ "\004SYNC" \
+ "\005COLLECTING" \
+ "\006DISTRIBUTING" \
+ "\007DEFAULTED" \
+ "\010EXPIRED"
+
+/*
+ * IEEE802.3 slow protocols
+ *
+ * protocol (on-wire) definitions.
+ *
+ * XXX should be elsewhere.
+ */
+
+#define SLOWPROTOCOLS_SUBTYPE_LACP 1
+#define SLOWPROTOCOLS_SUBTYPE_MARKER 2
+
+struct slowprothdr {
+ uint8_t sph_subtype;
+ uint8_t sph_version;
+} __packed;
+
+/*
+ * TLV on-wire structure.
+ */
+
+struct tlvhdr {
+ uint8_t tlv_type;
+ uint8_t tlv_length;
+ /* uint8_t tlv_value[]; */
+} __packed;
+
+/*
+ * ... and our implementation.
+ */
+
+#define TLV_SET(tlv, type, length) \
+ do { \
+ (tlv)->tlv_type = (type); \
+ (tlv)->tlv_length = sizeof(*tlv) + (length); \
+ } while (/*CONSTCOND*/0)
+
+struct tlv_template {
+ uint8_t tmpl_type;
+ uint8_t tmpl_length;
+};
+
+struct lacp_systemid {
+ uint16_t lsi_prio;
+ uint8_t lsi_mac[6];
+} __packed;
+
+struct lacp_portid {
+ uint16_t lpi_prio;
+ uint16_t lpi_portno;
+} __packed;
+
+struct lacp_peerinfo {
+ struct lacp_systemid lip_systemid;
+ uint16_t lip_key;
+ struct lacp_portid lip_portid;
+ uint8_t lip_state;
+ uint8_t lip_resv[3];
+} __packed;
+
+struct lacp_collectorinfo {
+ uint16_t lci_maxdelay;
+ uint8_t lci_resv[12];
+} __packed;
+
+struct lacpdu {
+ struct ether_header ldu_eh;
+ struct slowprothdr ldu_sph;
+
+ struct tlvhdr ldu_tlv_actor;
+ struct lacp_peerinfo ldu_actor;
+ struct tlvhdr ldu_tlv_partner;
+ struct lacp_peerinfo ldu_partner;
+ struct tlvhdr ldu_tlv_collector;
+ struct lacp_collectorinfo ldu_collector;
+ struct tlvhdr ldu_tlv_term;
+ uint8_t ldu_resv[50];
+} __packed;
+
+#define LACP_TRANSIT_DELAY 1000 /* in msec */
+
+enum lacp_selected {
+ LACP_UNSELECTED,
+ LACP_STANDBY, /* not used in this implementation */
+ LACP_SELECTED,
+};
+
+enum lacp_mux_state {
+ LACP_MUX_DETACHED,
+ LACP_MUX_WAITING,
+ LACP_MUX_ATTACHED,
+ LACP_MUX_COLLECTING,
+ LACP_MUX_DISTRIBUTING,
+};
+
+struct lacp_port {
+ TAILQ_ENTRY(lacp_port) lp_dist_q;
+ LIST_ENTRY(lacp_port) lp_next;
+ struct lacp_softc *lp_lsc;
+ struct trunk_port *lp_trunk;
+ struct ifnet *lp_ifp;
+ struct lacp_peerinfo lp_partner;
+ struct lacp_peerinfo lp_actor;
+#define lp_state lp_actor.lip_state
+#define lp_key lp_actor.lip_key
+ struct timeval lp_last_lacpdu;
+ int lp_lacpdu_sent;
+ enum lacp_mux_state lp_mux_state;
+ enum lacp_selected lp_selected;
+ int lp_flags;
+ u_int lp_media; /* XXX redundant */
+ int lp_timer[LACP_NTIMER];
+
+ struct lacp_aggregator *lp_aggregator;
+};
+
+struct lacp_aggregator {
+ TAILQ_ENTRY(lacp_aggregator) la_q;
+ int la_refcnt; /* num of ports which selected us */
+ int la_nports; /* num of distributing ports */
+ TAILQ_HEAD(, lacp_port) la_ports; /* distributing ports */
+ struct lacp_peerinfo la_partner;
+ struct lacp_peerinfo la_actor;
+ int la_pending; /* number of ports which is waiting wait_while */
+};
+
+struct lacp_softc {
+ struct trunk_softc *lsc_trunk;
+ struct lacp_aggregator *lsc_active_aggregator;
+ TAILQ_HEAD(, lacp_aggregator) lsc_aggregators;
+ boolean_t lsc_suppress_distributing;
+ struct callout lsc_transit_callout;
+ struct callout lsc_callout;
+ LIST_HEAD(, lacp_port) lsc_ports;
+ u_int32_t lsc_hashkey;
+};
+
+#define LACP_TYPE_ACTORINFO 1
+#define LACP_TYPE_PARTNERINFO 2
+#define LACP_TYPE_COLLECTORINFO 3
+
+/* timeout values (in sec) */
+#define LACP_FAST_PERIODIC_TIME (1)
+#define LACP_SLOW_PERIODIC_TIME (30)
+#define LACP_SHORT_TIMEOUT_TIME (3 * LACP_FAST_PERIODIC_TIME)
+#define LACP_LONG_TIMEOUT_TIME (3 * LACP_SLOW_PERIODIC_TIME)
+#define LACP_CHURN_DETECTION_TIME (60)
+#define LACP_AGGREGATE_WAIT_TIME (2)
+
+/*
+int tlv_check(const void *, size_t, const struct tlvhdr *,
+ const struct tlv_template *, boolean_t);
+*/
+
+/*
+ * IEEE802.3ad marker protocol
+ *
+ * protocol (on-wire) definitions.
+ */
+
+struct markerdu {
+ struct ether_header mdu_eh;
+ struct slowprothdr mdu_sph;
+
+ struct tlvhdr mdu_tlv;
+ uint16_t mdu_rq_port;
+ uint8_t mdu_rq_system[6];
+ uint8_t mdu_rq_xid[4];
+ uint8_t mdu_pad[2];
+
+ struct tlvhdr mdu_tlv_term;
+ uint8_t mdu_resv[90];
+} __packed;
+
+#define MARKER_TYPE_INFO 1
+#define MARKER_TYPE_RESPONSE 2
+
+#define LACP_STATE_EQ(s1, s2, mask) \
+ ((((s1) ^ (s2)) & (mask)) == 0)
+
+#define LACP_PORT(_tp) ((struct lacp_port *)(_tp)->tp_psc)
+#define LACP_SOFTC(_tr) ((struct lacp_softc *)(_tr)->tr_psc)
+
+int lacp_input(struct trunk_port *, struct mbuf *);
+int lacp_marker_input(struct trunk_port *, struct mbuf *);
+struct trunk_port *lacp_select_tx_port(struct trunk_softc *, struct mbuf *);
+int lacp_attach(struct trunk_softc *);
+int lacp_detach(struct trunk_softc *);
+void lacp_init(struct trunk_softc *);
+void lacp_stop(struct trunk_softc *);
+int lacp_port_create(struct trunk_port *);
+void lacp_port_destroy(struct trunk_port *);
+void lacp_linkstate(struct trunk_port *);
+int lacp_port_isactive(struct trunk_port *);
+
+/* following constants don't include terminating NUL */
+#define LACP_MACSTR_MAX (2*6 + 5)
+#define LACP_SYSTEMPRIOSTR_MAX (4)
+#define LACP_SYSTEMIDSTR_MAX (LACP_SYSTEMPRIOSTR_MAX + 1 + LACP_MACSTR_MAX)
+#define LACP_PORTPRIOSTR_MAX (4)
+#define LACP_PORTNOSTR_MAX (4)
+#define LACP_PORTIDSTR_MAX (LACP_PORTPRIOSTR_MAX + 1 + LACP_PORTNOSTR_MAX)
+#define LACP_KEYSTR_MAX (4)
+#define LACP_PARTNERSTR_MAX \
+ (1 + LACP_SYSTEMIDSTR_MAX + 1 + LACP_KEYSTR_MAX + 1 \
+ + LACP_PORTIDSTR_MAX + 1)
+#define LACP_LAGIDSTR_MAX \
+ (1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1)
+#define LACP_STATESTR_MAX (255) /* XXX */
diff --git a/sys/net/if.c b/sys/net/if.c
index 33adc5007978..5eee0d4f0196 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -96,6 +96,7 @@ SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
void (*bstp_linkstate_p)(struct ifnet *ifp, int state);
void (*ng_ether_link_state_p)(struct ifnet *ifp, int state);
+void (*trunk_linkstate_p)(struct ifnet *ifp, int state);
struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
@@ -1378,6 +1379,10 @@ do_link_state_change(void *arg, int pending)
KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
(*bstp_linkstate_p)(ifp, link_state);
}
+ if (ifp->if_trunk) {
+ KASSERT(trunk_linkstate_p != NULL,("if_trunk not loaded!"));
+ (*trunk_linkstate_p)(ifp, link_state);
+ }
devctl_notify("IFNET", ifp->if_xname,
(link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
@@ -2593,6 +2598,7 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
case IFT_L2VLAN:
case IFT_BRIDGE:
case IFT_ARCNET:
+ case IFT_IEEE8023ADLAG:
bcopy(lladdr, LLADDR(sdl), len);
break;
default:
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 3c9cc8e769af..0ebdc569bc4f 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -113,6 +113,9 @@ int (*bridge_output_p)(struct ifnet *, struct mbuf *,
struct sockaddr *, struct rtentry *);
void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+/* if_trunk(4) support */
+struct mbuf *(*trunk_input_p)(struct ifnet *, struct mbuf *);
+
static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -602,6 +605,17 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
return;
}
+ /* Handle input from a trunk(4) port */
+ if (ifp->if_type == IFT_IEEE8023ADLAG) {
+ KASSERT(trunk_input_p != NULL,
+ ("%s: if_trunk not loaded!", __func__));
+ m = (*trunk_input_p)(ifp, m);
+ if (m != NULL)
+ ifp = m->m_pkthdr.rcvif;
+ else
+ return;
+ }
+
/*
* If the hardware did not process an 802.1Q tag, do this now,
* to allow 802.1P priority frames to be passed to the main input
diff --git a/sys/net/if_trunk.c b/sys/net/if_trunk.c
new file mode 100644
index 000000000000..836b1bbf814d
--- /dev/null
+++ b/sys/net/if_trunk.c
@@ -0,0 +1,1590 @@
+/* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */
+
+/*
+ * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/hash.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_clone.h>
+#include <net/if_arp.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/bpf.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#endif
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/if_vlan_var.h>
+#include <net/if_trunk.h>
+#include <net/ieee8023ad_lacp.h>
+
+/* Special flags we should propagate to the trunk ports. */
+static struct {
+ int flag;
+ int (*func)(struct ifnet *, int);
+} trunk_pflags[] = {
+ {IFF_PROMISC, ifpromisc},
+ {IFF_ALLMULTI, if_allmulti},
+ {0, NULL}
+};
+
+SLIST_HEAD(__trhead, trunk_softc) trunk_list; /* list of trunks */
+static struct mtx trunk_list_mtx;
+eventhandler_tag trunk_detach_cookie = NULL;
+
+static int trunk_clone_create(struct if_clone *, int, caddr_t);
+static void trunk_clone_destroy(struct ifnet *);
+static void trunk_lladdr(struct trunk_softc *, uint8_t *);
+static int trunk_capabilities(struct trunk_softc *);
+static void trunk_port_lladdr(struct trunk_port *, uint8_t *);
+static int trunk_port_create(struct trunk_softc *, struct ifnet *);
+static int trunk_port_destroy(struct trunk_port *, int);
+static struct mbuf *trunk_input(struct ifnet *, struct mbuf *);
+static void trunk_port_state(struct ifnet *, int);
+static int trunk_port_ioctl(struct ifnet *, u_long, caddr_t);
+static int trunk_port_output(struct ifnet *, struct mbuf *,
+ struct sockaddr *, struct rtentry *);
+static void trunk_port_ifdetach(void *arg __unused, struct ifnet *);
+static int trunk_port_checkstacking(struct trunk_softc *);
+static void trunk_port2req(struct trunk_port *, struct trunk_reqport *);
+static void trunk_init(void *);
+static void trunk_stop(struct trunk_softc *);
+static int trunk_ioctl(struct ifnet *, u_long, caddr_t);
+static int trunk_ether_setmulti(struct trunk_softc *, struct trunk_port *);
+static int trunk_ether_purgemulti(struct trunk_softc *,
+ struct trunk_port *);
+static int trunk_setflag(struct trunk_port *, int, int,
+ int (*func)(struct ifnet *, int));
+static int trunk_setflags(struct trunk_port *, int status);
+static void trunk_start(struct ifnet *);
+static int trunk_media_change(struct ifnet *);
+static void trunk_media_status(struct ifnet *, struct ifmediareq *);
+static struct trunk_port *trunk_link_active(struct trunk_softc *,
+ struct trunk_port *);
+static const void *trunk_gethdr(struct mbuf *, u_int, u_int, void *);
+
+IFC_SIMPLE_DECLARE(trunk, 0);
+
+/* Simple round robin */
+static int trunk_rr_attach(struct trunk_softc *);
+static int trunk_rr_detach(struct trunk_softc *);
+static void trunk_rr_port_destroy(struct trunk_port *);
+static int trunk_rr_start(struct trunk_softc *, struct mbuf *);
+static struct mbuf *trunk_rr_input(struct trunk_softc *, struct trunk_port *,
+ struct mbuf *);
+
+/* Active failover */
+static int trunk_fail_attach(struct trunk_softc *);
+static int trunk_fail_detach(struct trunk_softc *);
+static int trunk_fail_start(struct trunk_softc *, struct mbuf *);
+static struct mbuf *trunk_fail_input(struct trunk_softc *, struct trunk_port *,
+ struct mbuf *);
+
+/* Loadbalancing */
+static int trunk_lb_attach(struct trunk_softc *);
+static int trunk_lb_detach(struct trunk_softc *);
+static int trunk_lb_port_create(struct trunk_port *);
+static void trunk_lb_port_destroy(struct trunk_port *);
+static int trunk_lb_start(struct trunk_softc *, struct mbuf *);
+static struct mbuf *trunk_lb_input(struct trunk_softc *, struct trunk_port *,
+ struct mbuf *);
+static int trunk_lb_porttable(struct trunk_softc *, struct trunk_port *);
+
+/* 802.3ad LACP */
+static int trunk_lacp_attach(struct trunk_softc *);
+static int trunk_lacp_detach(struct trunk_softc *);
+static int trunk_lacp_start(struct trunk_softc *, struct mbuf *);
+static struct mbuf *trunk_lacp_input(struct trunk_softc *, struct trunk_port *,
+ struct mbuf *);
+static void trunk_lacp_lladdr(struct trunk_softc *);
+
+/* Trunk protocol table */
+static const struct {
+ int ti_proto;
+ int (*ti_attach)(struct trunk_softc *);
+} trunk_protos[] = {
+ { TRUNK_PROTO_ROUNDROBIN, trunk_rr_attach },
+ { TRUNK_PROTO_FAILOVER, trunk_fail_attach },
+ { TRUNK_PROTO_LOADBALANCE, trunk_lb_attach },
+ { TRUNK_PROTO_ETHERCHANNEL, trunk_lb_attach },
+ { TRUNK_PROTO_LACP, trunk_lacp_attach },
+ { TRUNK_PROTO_NONE, NULL }
+};
+
+static int
+trunk_modevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ mtx_init(&trunk_list_mtx, "if_trunk list", NULL, MTX_DEF);
+ SLIST_INIT(&trunk_list);
+ if_clone_attach(&trunk_cloner);
+ trunk_input_p = trunk_input;
+ trunk_linkstate_p = trunk_port_state;
+ trunk_detach_cookie = EVENTHANDLER_REGISTER(
+ ifnet_departure_event, trunk_port_ifdetach, NULL,
+ EVENTHANDLER_PRI_ANY);
+ break;
+ case MOD_UNLOAD:
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+ trunk_detach_cookie);
+ if_clone_detach(&trunk_cloner);
+ trunk_input_p = NULL;
+ trunk_linkstate_p = NULL;
+ mtx_destroy(&trunk_list_mtx);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t trunk_mod = {
+ "if_trunk",
+ trunk_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_trunk, trunk_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+
+static int
+trunk_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct trunk_softc *tr;
+ struct ifnet *ifp;
+ int i, error = 0;
+ static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
+
+ tr = malloc(sizeof(*tr), M_DEVBUF, M_WAITOK|M_ZERO);
+ ifp = tr->tr_ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ free(tr, M_DEVBUF);
+ return (ENOSPC);
+ }
+
+ tr->tr_proto = TRUNK_PROTO_NONE;
+ for (i = 0; trunk_protos[i].ti_proto != TRUNK_PROTO_NONE; i++) {
+ if (trunk_protos[i].ti_proto == TRUNK_PROTO_DEFAULT) {
+ tr->tr_proto = trunk_protos[i].ti_proto;
+ if ((error = trunk_protos[i].ti_attach(tr)) != 0) {
+ if_free_type(ifp, IFT_ETHER);
+ free(tr, M_DEVBUF);
+ return (error);
+ }
+ break;
+ }
+ }
+ TRUNK_LOCK_INIT(tr);
+ SLIST_INIT(&tr->tr_ports);
+
+ /* Initialise pseudo media types */
+ ifmedia_init(&tr->tr_media, 0, trunk_media_change,
+ trunk_media_status);
+ ifmedia_add(&tr->tr_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&tr->tr_media, IFM_ETHER | IFM_AUTO);
+
+ if_initname(ifp, ifc->ifc_name, unit);
+ ifp->if_type = IFT_ETHER;
+ ifp->if_softc = tr;
+ ifp->if_start = trunk_start;
+ ifp->if_init = trunk_init;
+ ifp->if_ioctl = trunk_ioctl;
+ ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
+
+ IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
+ ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
+ IFQ_SET_READY(&ifp->if_snd);
+
+ /*
+ * Attach as an ordinary ethernet device, childs will be attached
+ * as special device IFT_IEEE8023ADLAG.
+ */
+ ether_ifattach(ifp, eaddr);
+
+ /* Insert into the global list of trunks */
+ mtx_lock(&trunk_list_mtx);
+ SLIST_INSERT_HEAD(&trunk_list, tr, tr_entries);
+ mtx_unlock(&trunk_list_mtx);
+
+ return (0);
+}
+
+static void
+trunk_clone_destroy(struct ifnet *ifp)
+{
+ struct trunk_softc *tr = (struct trunk_softc *)ifp->if_softc;
+ struct trunk_port *tp;
+
+ TRUNK_LOCK(tr);
+
+ trunk_stop(tr);
+ ifp->if_flags &= ~IFF_UP;
+
+ /* Remove any multicast groups that we may have joined. */
+ trunk_ether_purgemulti(tr, NULL);
+
+ /* Shutdown and remove trunk ports */
+ while ((tp = SLIST_FIRST(&tr->tr_ports)) != NULL)
+ trunk_port_destroy(tp, 1);
+ /* Unhook the trunking protocol */
+ if (tr->tr_detach != NULL)
+ (*tr->tr_detach)(tr);
+
+ TRUNK_UNLOCK(tr);
+
+ ifmedia_removeall(&tr->tr_media);
+ ether_ifdetach(ifp);
+ if_free_type(ifp, IFT_ETHER);
+
+ mtx_lock(&trunk_list_mtx);
+ SLIST_REMOVE(&trunk_list, tr, trunk_softc, tr_entries);
+ mtx_unlock(&trunk_list_mtx);
+
+ TRUNK_LOCK_DESTROY(tr);
+ free(tr, M_DEVBUF);
+}
+
+static void
+trunk_lladdr(struct trunk_softc *tr, uint8_t *lladdr)
+{
+ struct ifnet *ifp = tr->tr_ifp;
+
+ if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+ return;
+
+ bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ /* Let the protocol know the MAC has changed */
+ if (tr->tr_lladdr != NULL)
+ (*tr->tr_lladdr)(tr);
+}
+
+static int
+trunk_capabilities(struct trunk_softc *tr)
+{
+ struct trunk_port *tp;
+ int cap = ~0, priv;
+
+ TRUNK_LOCK_ASSERT(tr);
+
+ /* Preserve private capabilities */
+ priv = tr->tr_capabilities & IFCAP_TRUNK_MASK;
+
+ /* Get capabilities from the trunk ports */
+ SLIST_FOREACH(tp, &tr->tr_ports, tp_entries)
+ cap &= tp->tp_capabilities;
+
+ if (tr->tr_ifflags & IFF_DEBUG) {
+ printf("%s: capabilities 0x%08x\n",
+ tr->tr_ifname, cap == ~0 ? priv : (cap | priv));
+ }
+
+ return (cap == ~0 ? priv : (cap | priv));
+}
+
+static void
+trunk_port_lladdr(struct trunk_port *tp, uint8_t *lladdr)
+{
+ struct ifnet *ifp = tp->tp_ifp;
+ int error;
+
+ if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+ return;
+
+ /* Set the link layer address */
+ error = if_setlladdr(ifp, lladdr, ETHER_ADDR_LEN);
+ if (error)
+ printf("%s: setlladdr failed on %s\n", __func__, tp->tp_ifname);
+
+}
+
+static int
+trunk_port_create(struct trunk_softc *tr, struct ifnet *ifp)
+{
+ struct trunk_softc *tr_ptr;
+ struct trunk_port *tp;
+ int error = 0;
+
+ TRUNK_LOCK_ASSERT(tr);
+
+ /* Limit the maximal number of trunk ports */
+ if (tr->tr_count >= TRUNK_MAX_PORTS)
+ return (ENOSPC);
+
+ /* New trunk port has to be in an idle state */
+ if (ifp->if_drv_flags & IFF_DRV_OACTIVE)
+ return (EBUSY);
+
+ /* Check if port has already been associated to a trunk */
+ if (ifp->if_trunk != NULL)
+ return (EBUSY);
+
+ /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
+ if (ifp->if_type != IFT_ETHER)
+ return (EPROTONOSUPPORT);
+
+ if ((tp = malloc(sizeof(struct trunk_port),
+ M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
+ return (ENOMEM);
+
+ /* Check if port is a stacked trunk */
+ mtx_lock(&trunk_list_mtx);
+ SLIST_FOREACH(tr_ptr, &trunk_list, tr_entries) {
+ if (ifp == tr_ptr->tr_ifp) {
+ mtx_unlock(&trunk_list_mtx);
+ free(tp, M_DEVBUF);
+ return (EINVAL);
+ /* XXX disable stacking for the moment, its untested
+ tp->tp_flags |= TRUNK_PORT_STACK;
+ if (trunk_port_checkstacking(tr_ptr) >=
+ TRUNK_MAX_STACKING) {
+ mtx_unlock(&trunk_list_mtx);
+ free(tp, M_DEVBUF);
+ return (E2BIG);
+ }
+ */
+ }
+ }
+ mtx_unlock(&trunk_list_mtx);
+
+ /* Change the interface type */
+ tp->tp_iftype = ifp->if_type;
+ ifp->if_type = IFT_IEEE8023ADLAG;
+ ifp->if_trunk = tp;
+ tp->tp_ioctl = ifp->if_ioctl;
+ ifp->if_ioctl = trunk_port_ioctl;
+ tp->tp_output = ifp->if_output;
+ ifp->if_output = trunk_port_output;
+
+ tp->tp_ifp = ifp;
+ tp->tp_trunk = tr;
+
+ /* Save port link layer address */
+ bcopy(IF_LLADDR(ifp), tp->tp_lladdr, ETHER_ADDR_LEN);
+
+ if (SLIST_EMPTY(&tr->tr_ports)) {
+ tr->tr_primary = tp;
+ trunk_lladdr(tr, IF_LLADDR(ifp));
+ } else {
+ /* Update link layer address for this port */
+ trunk_port_lladdr(tp, IF_LLADDR(tr->tr_ifp));
+ }
+
+ /* Insert into the list of ports */
+ SLIST_INSERT_HEAD(&tr->tr_ports, tp, tp_entries);
+ tr->tr_count++;
+
+ /* Update trunk capabilities */
+ tr->tr_capabilities = trunk_capabilities(tr);
+
+ /* Add multicast addresses and interface flags to this port */
+ trunk_ether_setmulti(tr, tp);
+ trunk_setflags(tp, 1);
+
+ if (tr->tr_port_create != NULL)
+ error = (*tr->tr_port_create)(tp);
+ if (error) {
+ /* remove the port again, without calling tr_port_destroy */
+ trunk_port_destroy(tp, 0);
+ return (error);
+ }
+
+ return (error);
+}
+
+static int
+trunk_port_checkstacking(struct trunk_softc *tr)
+{
+ struct trunk_softc *tr_ptr;
+ struct trunk_port *tp;
+ int m = 0;
+
+ TRUNK_LOCK_ASSERT(tr);
+
+ SLIST_FOREACH(tp, &tr->tr_ports, tp_entries) {
+ if (tp->tp_flags & TRUNK_PORT_STACK) {
+ tr_ptr = (struct trunk_softc *)tp->tp_ifp->if_softc;
+ m = MAX(m, trunk_port_checkstacking(tr_ptr));
+ }
+ }
+
+ return (m + 1);
+}
+
+static int
+trunk_port_destroy(struct trunk_port *tp, int runpd)
+{
+ struct trunk_softc *tr = tp->tp_trunk;
+ struct trunk_port *tp_ptr;
+ struct ifnet *ifp = tp->tp_ifp;
+
+ TRUNK_LOCK_ASSERT(tr);
+
+ if (runpd && tr->tr_port_destroy != NULL)
+ (*tr->tr_port_destroy)(tp);
+
+ /* Remove multicast addresses and interface flags from this port */
+ trunk_ether_purgemulti(tr, tp);
+ trunk_setflags(tp, 0);
+
+ /* Restore interface */
+ ifp->if_type = tp->tp_iftype;
+ ifp->if_ioctl = tp->tp_ioctl;
+ ifp->if_output = tp->tp_output;
+ ifp->if_trunk = NULL;
+
+ /* Finally, remove the port from the trunk */
+ SLIST_REMOVE(&tr->tr_ports, tp, trunk_port, tp_entries);
+ tr->tr_count--;
+
+ /* Update the primary interface */
+ if (tp == tr->tr_primary) {
+ uint8_t lladdr[ETHER_ADDR_LEN];
+
+ if ((tp_ptr = SLIST_FIRST(&tr->tr_ports)) == NULL) {
+ bzero(&lladdr, ETHER_ADDR_LEN);
+ } else {
+ bcopy(tp_ptr->tp_lladdr,
+ lladdr, ETHER_ADDR_LEN);
+ }
+ trunk_lladdr(tr, lladdr);
+ tr->tr_primary = tp_ptr;
+
+ /* Update link layer address for each port */
+ SLIST_FOREACH(tp_ptr, &tr->tr_ports, tp_entries)
+ trunk_port_lladdr(tp_ptr, lladdr);
+ }
+
+ /* Reset the port lladdr */
+ trunk_port_lladdr(tp, tp->tp_lladdr);
+
+ if (tp->tp_ifflags)
+ if_printf(ifp, "%s: tp_ifflags unclean\n", __func__);
+
+ free(tp, M_DEVBUF);
+
+ /* Update trunk capabilities */
+ tr->tr_capabilities = trunk_capabilities(tr);
+
+ return (0);
+}
+
+static int
+trunk_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct trunk_reqport *rp = (struct trunk_reqport *)data;
+ struct trunk_softc *tr;
+ struct trunk_port *tp = NULL;
+ int error = 0;
+
+ /* Should be checked by the caller */
+ if (ifp->if_type != IFT_IEEE8023ADLAG ||
+ (tp = ifp->if_trunk) == NULL || (tr = tp->tp_trunk) == NULL)
+ goto fallback;
+
+ switch (cmd) {
+ case SIOCGTRUNKPORT:
+ TRUNK_LOCK(tr);
+ if (rp->rp_portname[0] == '\0' ||
+ ifunit(rp->rp_portname) != ifp) {
+ error = EINVAL;
+ break;
+ }
+
+ if (tp->tp_trunk != tr) {
+ error = ENOENT;
+ break;
+ }
+
+ trunk_port2req(tp, rp);
+ TRUNK_UNLOCK(tr);
+ break;
+ default:
+ goto fallback;
+ }
+
+ return (error);
+
+fallback:
+ if (tp != NULL)
+ return ((*tp->tp_ioctl)(ifp, cmd, data));
+
+ return (EINVAL);
+}
+
+static int
+trunk_port_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct rtentry *rt0)
+{
+ struct trunk_port *tp = ifp->if_trunk;
+ struct ether_header *eh;
+ short type;
+
+ switch (dst->sa_family) {
+ case pseudo_AF_HDRCMPLT:
+ case AF_UNSPEC:
+ eh = (struct ether_header *)dst->sa_data;
+ type = eh->ether_type;
+ break;
+ }
+
+ /*
+ * Only allow ethernet types required to initiate or maintain the link,
+ * trunked frames take a different path.
+ */
+ switch (ntohs(type)) {
+ case ETHERTYPE_PAE: /* EAPOL PAE/802.1x */
+ return ((*tp->tp_output)(ifp, m, dst, rt0));
+ }
+
+ /* drop any other frames */
+ m_freem(m);
+ return (EBUSY);
+}
+
+static void
+trunk_port_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+ struct trunk_port *tp;
+ struct trunk_softc *tr;
+
+ if ((tp = ifp->if_trunk) == NULL)
+ return;
+
+ tr = tp->tp_trunk;
+
+ TRUNK_LOCK(tr);
+ trunk_port_destroy(tp, 1);
+ TRUNK_UNLOCK(tr);
+}
+
+static void
+trunk_port2req(struct trunk_port *tp, struct trunk_reqport *rp)
+{
+ struct trunk_softc *tr = tp->tp_trunk;
+ strlcpy(rp->rp_ifname, tr->tr_ifname, sizeof(rp->rp_ifname));
+ strlcpy(rp->rp_portname, tp->tp_ifp->if_xname, sizeof(rp->rp_portname));
+ rp->rp_prio = tp->tp_prio;
+ rp->rp_flags = tp->tp_flags;
+
+ /* Add protocol specific flags */
+ switch (tr->tr_proto) {
+ case TRUNK_PROTO_FAILOVER:
+ if (tp == tr->tr_primary)
+ tp->tp_flags |= TRUNK_PORT_MASTER;
+ /* FALLTHROUGH */
+ case TRUNK_PROTO_ROUNDROBIN:
+ case TRUNK_PROTO_LOADBALANCE:
+ case TRUNK_PROTO_ETHERCHANNEL:
+ if (TRUNK_PORTACTIVE(tp))
+ rp->rp_flags |= TRUNK_PORT_ACTIVE;
+ break;
+
+ case TRUNK_PROTO_LACP:
+ /* LACP has a different definition of active */
+ if (lacp_port_isactive(tp))
+ rp->rp_flags |= TRUNK_PORT_ACTIVE;
+ break;
+ }
+
+}
+
+static void
+trunk_init(void *xsc)
+{
+ struct trunk_softc *tr = (struct trunk_softc *)xsc;
+ struct trunk_port *tp;
+ struct ifnet *ifp = tr->tr_ifp;
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ return;
+
+ TRUNK_LOCK(tr);
+
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ /* Update the port lladdrs */
+ SLIST_FOREACH(tp, &tr->tr_ports, tp_entries)
+ trunk_port_lladdr(tp, IF_LLADDR(ifp));
+
+ if (tr->tr_init != NULL)
+ (*tr->tr_init)(tr);
+
+ TRUNK_UNLOCK(tr);
+}
+
+static void
+trunk_stop(struct trunk_softc *tr)
+{
+ struct ifnet *ifp = tr->tr_ifp;
+
+ TRUNK_LOCK_ASSERT(tr);
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return;
+
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
+ if (tr->tr_stop != NULL)
+ (*tr->tr_stop)(tr);
+}
+
+static int
+trunk_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct trunk_softc *tr = (struct trunk_softc *)ifp->if_softc;
+ struct trunk_reqall *ra = (struct trunk_reqall *)data;
+ struct trunk_reqport *rp = (struct trunk_reqport *)data, rpbuf;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct trunk_port *tp;
+ struct ifnet *tpif;
+ struct thread *td = curthread;
+ int i, error = 0, unlock = 1;
+
+ TRUNK_LOCK(tr);
+
+ bzero(&rpbuf, sizeof(rpbuf));
+
+ switch (cmd) {
+ case SIOCGTRUNK:
+ ra->ra_proto = tr->tr_proto;
+ ra->ra_ports = i = 0;
+ tp = SLIST_FIRST(&tr->tr_ports);
+ while (tp && ra->ra_size >=
+ i + sizeof(struct trunk_reqport)) {
+ trunk_port2req(tp, &rpbuf);
+ error = copyout(&rpbuf, (caddr_t)ra->ra_port + i,
+ sizeof(struct trunk_reqport));
+ if (error)
+ break;
+ i += sizeof(struct trunk_reqport);
+ ra->ra_ports++;
+ tp = SLIST_NEXT(tp, tp_entries);
+ }
+ break;
+ case SIOCSTRUNK:
+ error = priv_check(td, PRIV_NET_TRUNK);
+ if (error)
+ break;
+ if (ra->ra_proto >= TRUNK_PROTO_MAX) {
+ error = EPROTONOSUPPORT;
+ break;
+ }
+ if (tr->tr_proto != TRUNK_PROTO_NONE) {
+ error = tr->tr_detach(tr);
+ /* Reset protocol and pointers */
+ tr->tr_proto = TRUNK_PROTO_NONE;
+ tr->tr_detach = NULL;
+ tr->tr_start = NULL;
+ tr->tr_input = NULL;
+ tr->tr_port_create = NULL;
+ tr->tr_port_destroy = NULL;
+ tr->tr_linkstate = NULL;
+ tr->tr_init = NULL;
+ tr->tr_stop = NULL;
+ tr->tr_lladdr = NULL;
+ }
+ if (error != 0)
+ break;
+ for (i = 0; i < (sizeof(trunk_protos) /
+ sizeof(trunk_protos[0])); i++) {
+ if (trunk_protos[i].ti_proto == ra->ra_proto) {
+ if (tr->tr_ifflags & IFF_DEBUG)
+ printf("%s: using proto %u\n",
+ tr->tr_ifname,
+ trunk_protos[i].ti_proto);
+ tr->tr_proto = trunk_protos[i].ti_proto;
+ if (tr->tr_proto != TRUNK_PROTO_NONE)
+ error = trunk_protos[i].ti_attach(tr);
+ goto out;
+ }
+ }
+ error = EPROTONOSUPPORT;
+ break;
+ case SIOCGTRUNKPORT:
+ if (rp->rp_portname[0] == '\0' ||
+ (tpif = ifunit(rp->rp_portname)) == NULL) {
+ error = EINVAL;
+ break;
+ }
+
+ if ((tp = (struct trunk_port *)tpif->if_trunk) == NULL ||
+ tp->tp_trunk != tr) {
+ error = ENOENT;
+ break;
+ }
+
+ trunk_port2req(tp, rp);
+ break;
+ case SIOCSTRUNKPORT:
+ error = priv_check(td, PRIV_NET_TRUNK);
+ if (error)
+ break;
+ if (rp->rp_portname[0] == '\0' ||
+ (tpif = ifunit(rp->rp_portname)) == NULL) {
+ error = EINVAL;
+ break;
+ }
+ error = trunk_port_create(tr, tpif);
+ break;
+ case SIOCSTRUNKDELPORT:
+ error = priv_check(td, PRIV_NET_TRUNK);
+ if (error)
+ break;
+ if (rp->rp_portname[0] == '\0' ||
+ (tpif = ifunit(rp->rp_portname)) == NULL) {
+ error = EINVAL;
+ break;
+ }
+
+ if ((tp = (struct trunk_port *)tpif->if_trunk) == NULL ||
+ tp->tp_trunk != tr) {
+ error = ENOENT;
+ break;
+ }
+
+ error = trunk_port_destroy(tp, 1);
+ break;
+ case SIOCSIFFLAGS:
+ /* Set flags on ports too */
+ SLIST_FOREACH(tp, &tr->tr_ports, tp_entries) {
+ trunk_setflags(tp, 1);
+ }
+
+ if (!(ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ /*
+ * If interface is marked down and it is running,
+ * then stop and disable it.
+ */
+ trunk_stop(tr);
+ } else if ((ifp->if_flags & IFF_UP) &&
+ !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ /*
+ * If interface is marked up and it is stopped, then
+ * start it.
+ */
+ TRUNK_UNLOCK(tr);
+ unlock = 0;
+ (*ifp->if_init)(tr);
+ }
+ break;
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ error = trunk_ether_setmulti(tr, NULL);
+ break;
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ TRUNK_UNLOCK(tr);
+ unlock = 0;
+ error = ifmedia_ioctl(ifp, ifr, &tr->tr_media, cmd);
+ break;
+ default:
+ TRUNK_UNLOCK(tr);
+ unlock = 0;
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+
+out:
+ if (unlock)
+ TRUNK_UNLOCK(tr);
+ return (error);
+}
+
+static int
+trunk_ether_setmulti(struct trunk_softc *tr, struct trunk_port *tp)
+{
+ struct ifnet *trifp = tr->tr_ifp;
+ struct ifnet *ifp;
+ struct ifmultiaddr *ifma, *rifma = NULL;
+ struct trunk_port *tp2;
+ struct trunk_mc *mc;
+ struct sockaddr_dl sdl;
+ int error;
+
+ bzero((char *)&sdl, sizeof(sdl));
+ sdl.sdl_len = sizeof(sdl);
+ sdl.sdl_family = AF_LINK;
+ sdl.sdl_type = IFT_ETHER;
+ sdl.sdl_alen = ETHER_ADDR_LEN;
+
+ /* First, remove any existing filter entries. */
+ trunk_ether_purgemulti(tr, tp);
+
+ /* Now program new ones. */
+ TAILQ_FOREACH(ifma, &trifp->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ mc = malloc(sizeof(struct trunk_mc), M_DEVBUF, M_NOWAIT);
+ if (mc == NULL)
+ return (ENOMEM);
+ bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
+ (char *)&mc->mc_addr, ETHER_ADDR_LEN);
+ SLIST_INSERT_HEAD(&tr->tr_mc_head, mc, mc_entries);
+ bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
+ LLADDR(&sdl), ETHER_ADDR_LEN);
+
+ /* do all the ports */
+ SLIST_FOREACH(tp2, &tr->tr_ports, tp_entries) {
+ /* if we are only looking for one then skip */
+ if (tp != NULL && tp2 != tp)
+ continue;
+
+ ifp = tp2->tp_ifp;
+ sdl.sdl_index = ifp->if_index;
+ error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
+ if (error)
+ return (error);
+ }
+ }
+
+ return (0);
+}
+
+static int
+trunk_ether_purgemulti(struct trunk_softc *tr, struct trunk_port *tp)
+{
+ struct ifnet *ifp;
+ struct trunk_port *tp2;
+ struct trunk_mc *mc;
+ struct sockaddr_dl sdl;
+ int error;
+
+ bzero((char *)&sdl, sizeof(sdl));
+ sdl.sdl_len = sizeof(sdl);
+ sdl.sdl_family = AF_LINK;
+ sdl.sdl_type = IFT_ETHER;
+ sdl.sdl_alen = ETHER_ADDR_LEN;
+
+ while ((mc = SLIST_FIRST(&tr->tr_mc_head)) != NULL) {
+ bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN);
+ /* do all the ports */
+ SLIST_FOREACH(tp2, &tr->tr_ports, tp_entries) {
+ /* if we are only looking for one then skip */
+ if (tp != NULL && tp2 != tp)
+ continue;
+
+ ifp = tp2->tp_ifp;
+ sdl.sdl_index = ifp->if_index;
+ error = if_delmulti(ifp, (struct sockaddr *)&sdl);
+ if (error)
+ return (error);
+ }
+ SLIST_REMOVE(&tr->tr_mc_head, mc, trunk_mc, mc_entries);
+ free(mc, M_DEVBUF);
+ }
+ return (0);
+}
+
+/* Handle a ref counted flag that should be set on the trunk port as well */
+static int
+trunk_setflag(struct trunk_port *tp, int flag, int status,
+ int (*func)(struct ifnet *, int))
+{
+ struct trunk_softc *tr = tp->tp_trunk;
+ struct ifnet *trifp = tr->tr_ifp;
+ struct ifnet *ifp = tp->tp_ifp;
+ int error;
+
+ TRUNK_LOCK_ASSERT(tr);
+
+ status = status ? (trifp->if_flags & flag) : 0;
+ /* Now "status" contains the flag value or 0 */
+
+ /*
+ * See if recorded ports status is different from what
+ * we want it to be. If it is, flip it. We record ports
+ * status in tp_ifflags so that we won't clear ports flag
+ * we haven't set. In fact, we don't clear or set ports
+ * flags directly, but get or release references to them.
+ * That's why we can be sure that recorded flags still are
+ * in accord with actual ports flags.
+ */
+ if (status != (tp->tp_ifflags & flag)) {
+ error = (*func)(ifp, status);
+ if (error)
+ return (error);
+ tp->tp_ifflags &= ~flag;
+ tp->tp_ifflags |= status;
+ }
+ return (0);
+}
+
+/*
+ * Handle IFF_* flags that require certain changes on the trunk port
+ * if "status" is true, update ports flags respective to the trunk
+ * if "status" is false, forcedly clear the flags set on port.
+ */
+static int
+trunk_setflags(struct trunk_port *tp, int status)
+{
+ int error, i;
+
+ for (i = 0; trunk_pflags[i].flag; i++) {
+ error = trunk_setflag(tp, trunk_pflags[i].flag,
+ status, trunk_pflags[i].func);
+ if (error)
+ return (error);
+ }
+ return (0);
+}
+
+static void
+trunk_start(struct ifnet *ifp)
+{
+ struct trunk_softc *tr = (struct trunk_softc *)ifp->if_softc;
+ struct mbuf *m;
+ int error = 0;
+
+ for (;; error = 0) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m == NULL)
+ break;
+
+ BPF_MTAP(ifp, m);
+
+ if (tr->tr_proto != TRUNK_PROTO_NONE) {
+ TRUNK_LOCK(tr);
+ error = (*tr->tr_start)(tr, m);
+ TRUNK_UNLOCK(tr);
+ } else
+ m_free(m);
+
+ if (error == 0)
+ ifp->if_opackets++;
+ else
+ ifp->if_oerrors++;
+ }
+
+ return;
+}
+
+static struct mbuf *
+trunk_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct trunk_port *tp = ifp->if_trunk;
+ struct trunk_softc *tr = tp->tp_trunk;
+ struct ifnet *trifp = tr->tr_ifp;
+
+ if ((trifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ tr->tr_proto == TRUNK_PROTO_NONE) {
+ m_freem(m);
+ return (NULL);
+ }
+
+ TRUNK_LOCK(tr);
+ BPF_MTAP(trifp, m);
+
+ m = (*tr->tr_input)(tr, tp, m);
+
+ if (m != NULL) {
+ ifp->if_ipackets++;
+ ifp->if_ibytes += m->m_pkthdr.len;
+ trifp->if_ipackets++;
+ trifp->if_ibytes += m->m_pkthdr.len;
+ }
+
+ TRUNK_UNLOCK(tr);
+ return (m);
+}
+
+static int
+trunk_media_change(struct ifnet *ifp)
+{
+ struct trunk_softc *tr = (struct trunk_softc *)ifp->if_softc;
+
+ if (tr->tr_ifflags & IFF_DEBUG)
+ printf("%s\n", __func__);
+
+ /* Ignore */
+ return (0);
+}
+
+static void
+trunk_media_status(struct ifnet *ifp, struct ifmediareq *imr)
+{
+ struct trunk_softc *tr = (struct trunk_softc *)ifp->if_softc;
+ struct trunk_port *tp;
+
+ imr->ifm_status = IFM_AVALID;
+ imr->ifm_active = IFM_ETHER | IFM_AUTO;
+
+ TRUNK_LOCK(tr);
+ tp = tr->tr_primary;
+ if (tp != NULL && tp->tp_ifp->if_flags & IFF_UP)
+ imr->ifm_status |= IFM_ACTIVE;
+ TRUNK_UNLOCK(tr);
+}
+
+static void
+trunk_port_state(struct ifnet *ifp, int state)
+{
+ struct trunk_port *tp = (struct trunk_port *)ifp->if_trunk;
+ struct trunk_softc *tr = NULL;
+
+ if (tp != NULL)
+ tr = tp->tp_trunk;
+ if (tr == NULL)
+ return;
+
+ TRUNK_LOCK(tr);
+ if (tr->tr_linkstate != NULL)
+ (*tr->tr_linkstate)(tp);
+ TRUNK_UNLOCK(tr);
+}
+
+struct trunk_port *
+trunk_link_active(struct trunk_softc *tr, struct trunk_port *tp)
+{
+ struct trunk_port *tp_next, *rval = NULL;
+ // int new_link = LINK_STATE_DOWN;
+
+ TRUNK_LOCK_ASSERT(tr);
+ /*
+ * Search a port which reports an active link state.
+ */
+
+ if (tp == NULL)
+ goto search;
+ if (TRUNK_PORTACTIVE(tp)) {
+ rval = tp;
+ goto found;
+ }
+ if ((tp_next = SLIST_NEXT(tp, tp_entries)) != NULL &&
+ TRUNK_PORTACTIVE(tp_next)) {
+ rval = tp_next;
+ goto found;
+ }
+
+search:
+ SLIST_FOREACH(tp_next, &tr->tr_ports, tp_entries) {
+ if (TRUNK_PORTACTIVE(tp_next)) {
+ rval = tp_next;
+ goto found;
+ }
+ }
+
+found:
+ if (rval != NULL) {
+ /*
+ * The IEEE 802.1D standard assumes that a trunk with
+ * multiple ports is always full duplex. This is valid
+ * for load sharing trunks and if at least two links
+ * are active. Unfortunately, checking the latter would
+ * be too expensive at this point.
+ XXX
+ if ((tr->tr_capabilities & IFCAP_TRUNK_FULLDUPLEX) &&
+ (tr->tr_count > 1))
+ new_link = LINK_STATE_FULL_DUPLEX;
+ else
+ new_link = rval->tp_link_state;
+ */
+ }
+
+ return (rval);
+}
+
+static const void *
+trunk_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
+{
+ if (m->m_pkthdr.len < (off + len)) {
+ return (NULL);
+ } else if (m->m_len < (off + len)) {
+ m_copydata(m, off, len, buf);
+ return (buf);
+ }
+ return (mtod(m, char *) + off);
+}
+
+uint32_t
+trunk_hashmbuf(struct mbuf *m, uint32_t key)
+{
+ uint16_t etype;
+ uint32_t p = 0;
+ int off;
+ struct ether_header *eh;
+ struct ether_vlan_header vlanbuf;
+ const struct ether_vlan_header *vlan;
+#ifdef INET
+ const struct ip *ip;
+ struct ip ipbuf;
+#endif
+#ifdef INET6
+ const struct ip6_hdr *ip6;
+ struct ip6_hdr ip6buf;
+#endif
+
+ off = sizeof(*eh);
+ if (m->m_len < off)
+ goto out;
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key);
+ p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+
+ /* Special handling for encapsulating VLAN frames */
+ if (m->m_flags & M_VLANTAG) {
+ p = hash32_buf(&m->m_pkthdr.ether_vtag,
+ sizeof(m->m_pkthdr.ether_vtag), p);
+ } else if (etype == ETHERTYPE_VLAN) {
+ vlan = trunk_gethdr(m, off, sizeof(*vlan), &vlanbuf);
+ if (vlan == NULL)
+ goto out;
+
+ p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+ etype = ntohs(vlan->evl_proto);
+ off += sizeof(*vlan) - sizeof(*eh);
+ }
+
+ switch (etype) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ ip = trunk_gethdr(m, off, sizeof(*ip), &ipbuf);
+ if (ip == NULL)
+ goto out;
+
+ p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
+ p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+ break;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ ip6 = trunk_gethdr(m, off, sizeof(*ip6), &ip6buf);
+ if (ip6 == NULL)
+ goto out;
+
+ p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
+ p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
+ break;
+#endif
+ }
+out:
+ return (p);
+}
+
+int
+trunk_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+ int error = 0;
+
+ /* Send mbuf */
+ IFQ_ENQUEUE(&ifp->if_snd, m, error);
+ if (error)
+ return (error);
+ if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
+ (*ifp->if_start)(ifp);
+
+ ifp->if_obytes += m->m_pkthdr.len;
+ if (m->m_flags & M_MCAST)
+ ifp->if_omcasts++;
+
+ return (error);
+}
+
+/*
+ * Simple round robin trunking
+ */
+
+static int
+trunk_rr_attach(struct trunk_softc *tr)
+{
+ struct trunk_port *tp;
+
+ tr->tr_detach = trunk_rr_detach;
+ tr->tr_start = trunk_rr_start;
+ tr->tr_input = trunk_rr_input;
+ tr->tr_port_create = NULL;
+ tr->tr_port_destroy = trunk_rr_port_destroy;
+ tr->tr_capabilities = IFCAP_TRUNK_FULLDUPLEX;
+
+ tp = SLIST_FIRST(&tr->tr_ports);
+ tr->tr_psc = (caddr_t)tp;
+
+ return (0);
+}
+
+static int
+trunk_rr_detach(struct trunk_softc *tr)
+{
+ tr->tr_psc = NULL;
+ return (0);
+}
+
+static void
+trunk_rr_port_destroy(struct trunk_port *tp)
+{
+ struct trunk_softc *tr = tp->tp_trunk;
+
+ if (tp == (struct trunk_port *)tr->tr_psc)
+ tr->tr_psc = NULL;
+}
+
+static int
+trunk_rr_start(struct trunk_softc *tr, struct mbuf *m)
+{
+ struct trunk_port *tp = (struct trunk_port *)tr->tr_psc, *tp_next;
+ int error = 0;
+
+ if (tp == NULL && (tp = trunk_link_active(tr, NULL)) == NULL)
+ return (ENOENT);
+
+ /* Send mbuf */
+ error = trunk_enqueue(tp->tp_ifp, m);
+
+ /* Get next active port */
+ tp_next = trunk_link_active(tr, SLIST_NEXT(tp, tp_entries));
+ tr->tr_psc = (caddr_t)tp_next;
+
+ return (error);
+}
+
+static struct mbuf *
+trunk_rr_input(struct trunk_softc *tr, struct trunk_port *tp, struct mbuf *m)
+{
+ struct ifnet *ifp = tr->tr_ifp;
+
+ /* Just pass in the packet to our trunk device */
+ m->m_pkthdr.rcvif = ifp;
+
+ return (m);
+}
+
+/*
+ * Active failover
+ */
+
+static int
+trunk_fail_attach(struct trunk_softc *tr)
+{
+ tr->tr_detach = trunk_fail_detach;
+ tr->tr_start = trunk_fail_start;
+ tr->tr_input = trunk_fail_input;
+ tr->tr_port_create = NULL;
+ tr->tr_port_destroy = NULL;
+
+ return (0);
+}
+
+static int
+trunk_fail_detach(struct trunk_softc *tr)
+{
+ return (0);
+}
+
+static int
+trunk_fail_start(struct trunk_softc *tr, struct mbuf *m)
+{
+ struct trunk_port *tp;
+
+ /* Use the master port if active or the next available port */
+ if ((tp = trunk_link_active(tr, tr->tr_primary)) == NULL)
+ return (ENOENT);
+
+ /* Send mbuf */
+ return (trunk_enqueue(tp->tp_ifp, m));
+}
+
+static struct mbuf *
+trunk_fail_input(struct trunk_softc *tr, struct trunk_port *tp, struct mbuf *m)
+{
+ struct ifnet *ifp = tr->tr_ifp;
+ struct trunk_port *tmp_tp;
+
+ if (tp == tr->tr_primary) {
+ m->m_pkthdr.rcvif = ifp;
+ return (m);
+ }
+
+ if (tr->tr_primary->tp_link_state == LINK_STATE_DOWN) {
+ tmp_tp = trunk_link_active(tr, NULL);
+ /*
+ * If tmp_tp is null, we've recieved a packet when all
+ * our links are down. Weird, but process it anyways.
+ */
+ if ((tmp_tp == NULL || tmp_tp == tp)) {
+ m->m_pkthdr.rcvif = ifp;
+ return (m);
+ }
+ }
+
+ m_freem(m);
+ return (NULL);
+}
+
+/*
+ * Loadbalancing
+ */
+
+static int
+trunk_lb_attach(struct trunk_softc *tr)
+{
+ struct trunk_port *tp;
+ struct trunk_lb *lb;
+
+ if ((lb = (struct trunk_lb *)malloc(sizeof(struct trunk_lb),
+ M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
+ return (ENOMEM);
+
+ tr->tr_detach = trunk_lb_detach;
+ tr->tr_start = trunk_lb_start;
+ tr->tr_input = trunk_lb_input;
+ tr->tr_port_create = trunk_lb_port_create;
+ tr->tr_port_destroy = trunk_lb_port_destroy;
+ tr->tr_capabilities = IFCAP_TRUNK_FULLDUPLEX;
+
+ lb->lb_key = arc4random();
+ tr->tr_psc = (caddr_t)lb;
+
+ SLIST_FOREACH(tp, &tr->tr_ports, tp_entries)
+ trunk_lb_port_create(tp);
+
+ return (0);
+}
+
+static int
+trunk_lb_detach(struct trunk_softc *tr)
+{
+ struct trunk_lb *lb = (struct trunk_lb *)tr->tr_psc;
+ if (lb != NULL)
+ free(lb, M_DEVBUF);
+ return (0);
+}
+
+static int
+trunk_lb_porttable(struct trunk_softc *tr, struct trunk_port *tp)
+{
+ struct trunk_lb *lb = (struct trunk_lb *)tr->tr_psc;
+ struct trunk_port *tp_next;
+ int i = 0;
+
+ bzero(&lb->lb_ports, sizeof(lb->lb_ports));
+ SLIST_FOREACH(tp_next, &tr->tr_ports, tp_entries) {
+ if (tp_next == tp)
+ continue;
+ if (i >= TRUNK_MAX_PORTS)
+ return (EINVAL);
+ if (tr->tr_ifflags & IFF_DEBUG)
+ printf("%s: port %s at index %d\n",
+ tr->tr_ifname, tp_next->tp_ifname, i);
+ lb->lb_ports[i++] = tp_next;
+ }
+
+ return (0);
+}
+
+static int
+trunk_lb_port_create(struct trunk_port *tp)
+{
+ struct trunk_softc *tr = tp->tp_trunk;
+ return (trunk_lb_porttable(tr, NULL));
+}
+
+static void
+trunk_lb_port_destroy(struct trunk_port *tp)
+{
+ struct trunk_softc *tr = tp->tp_trunk;
+ trunk_lb_porttable(tr, tp);
+}
+
+static int
+trunk_lb_start(struct trunk_softc *tr, struct mbuf *m)
+{
+ struct trunk_lb *lb = (struct trunk_lb *)tr->tr_psc;
+ struct trunk_port *tp = NULL;
+ uint32_t p = 0;
+ int idx;
+
+ p = trunk_hashmbuf(m, lb->lb_key);
+ if ((idx = p % tr->tr_count) >= TRUNK_MAX_PORTS)
+ return (EINVAL);
+ tp = lb->lb_ports[idx];
+
+ /*
+ * Check the port's link state. This will return the next active
+ * port if the link is down or the port is NULL.
+ */
+ if ((tp = trunk_link_active(tr, tp)) == NULL)
+ return (ENOENT);
+
+ /* Send mbuf */
+ return (trunk_enqueue(tp->tp_ifp, m));
+}
+
+static struct mbuf *
+trunk_lb_input(struct trunk_softc *tr, struct trunk_port *tp, struct mbuf *m)
+{
+ struct ifnet *ifp = tr->tr_ifp;
+
+ /* Just pass in the packet to our trunk device */
+ m->m_pkthdr.rcvif = ifp;
+
+ return (m);
+}
+
+/*
+ * 802.3ad LACP
+ */
+
+static int
+trunk_lacp_attach(struct trunk_softc *tr)
+{
+ struct trunk_port *tp;
+ int error;
+
+ tr->tr_detach = trunk_lacp_detach;
+ tr->tr_port_create = lacp_port_create;
+ tr->tr_port_destroy = lacp_port_destroy;
+ tr->tr_linkstate = lacp_linkstate;
+ tr->tr_start = trunk_lacp_start;
+ tr->tr_input = trunk_lacp_input;
+ tr->tr_init = lacp_init;
+ tr->tr_stop = lacp_stop;
+ tr->tr_lladdr = trunk_lacp_lladdr;
+
+ error = lacp_attach(tr);
+ if (error)
+ return (error);
+
+ SLIST_FOREACH(tp, &tr->tr_ports, tp_entries)
+ lacp_port_create(tp);
+
+ return (error);
+}
+
+static int
+trunk_lacp_detach(struct trunk_softc *tr)
+{
+ struct trunk_port *tp;
+ int error;
+
+ SLIST_FOREACH(tp, &tr->tr_ports, tp_entries)
+ lacp_port_destroy(tp);
+
+ /* unlocking is safe here */
+ TRUNK_UNLOCK(tr);
+ error = lacp_detach(tr);
+ TRUNK_LOCK(tr);
+
+ return (error);
+}
+
+static void
+trunk_lacp_lladdr(struct trunk_softc *tr)
+{
+ struct trunk_port *tp;
+
+ /* purge all the lacp ports */
+ SLIST_FOREACH(tp, &tr->tr_ports, tp_entries)
+ lacp_port_destroy(tp);
+
+ /* add them back in */
+ SLIST_FOREACH(tp, &tr->tr_ports, tp_entries)
+ lacp_port_create(tp);
+}
+
+static int
+trunk_lacp_start(struct trunk_softc *tr, struct mbuf *m)
+{
+ struct trunk_port *tp;
+
+ tp = lacp_select_tx_port(tr, m);
+ if (tp == NULL)
+ return (EBUSY);
+
+ /* Send mbuf */
+ return (trunk_enqueue(tp->tp_ifp, m));
+}
+
+static struct mbuf *
+trunk_lacp_input(struct trunk_softc *tr, struct trunk_port *tp, struct mbuf *m)
+{
+ struct ifnet *ifp = tr->tr_ifp;
+ struct ether_header *eh;
+ u_short etype;
+ uint8_t subtype;
+
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+
+ /* Tap off LACP control messages */
+ if (etype == ETHERTYPE_SLOW) {
+ if (m->m_pkthdr.len < sizeof(*eh) + sizeof(subtype)) {
+ m_freem(m);
+ return (NULL);
+ }
+
+ m_copydata(m, sizeof(*eh), sizeof(subtype), &subtype);
+ switch (subtype) {
+ case SLOWPROTOCOLS_SUBTYPE_LACP:
+ lacp_input(tp, m);
+ break;
+
+ case SLOWPROTOCOLS_SUBTYPE_MARKER:
+ lacp_marker_input(tp, m);
+ break;
+
+ default:
+ /* Unknown LACP packet type */
+ m_freem(m);
+ break;
+ }
+ return (NULL);
+ }
+
+ /*
+ * If the port is not collecting or not in the active aggregator then
+ * free and return.
+ */
+ if ((tp->tp_flags & TRUNK_PORT_COLLECTING) == 0 ||
+ lacp_port_isactive(tp) == 0) {
+ m_freem(m);
+ return (NULL);
+ }
+
+ m->m_pkthdr.rcvif = ifp;
+ return (m);
+}
diff --git a/sys/net/if_trunk.h b/sys/net/if_trunk.h
new file mode 100644
index 000000000000..89f9f3e77a80
--- /dev/null
+++ b/sys/net/if_trunk.h
@@ -0,0 +1,209 @@
+/* $OpenBSD: if_trunk.h,v 1.11 2007/01/31 06:20:19 reyk Exp $ */
+
+/*
+ * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_TRUNK_H
+#define _NET_TRUNK_H
+
+/*
+ * Global definitions
+ */
+
+#define TRUNK_MAX_PORTS 32 /* logically */
+#define TRUNK_MAX_NAMESIZE 32 /* name of a protocol */
+#define TRUNK_MAX_STACKING 4 /* maximum number of stacked trunks */
+
+/* Port flags */
+#define TRUNK_PORT_SLAVE 0x00000000 /* normal enslaved port */
+#define TRUNK_PORT_MASTER 0x00000001 /* primary port */
+#define TRUNK_PORT_STACK 0x00000002 /* stacked trunk port */
+#define TRUNK_PORT_ACTIVE 0x00000004 /* port is active */
+#define TRUNK_PORT_COLLECTING 0x00000008 /* port is active */
+#define TRUNK_PORT_DISTRIBUTING 0x00000010 /* port is active */
+#define TRUNK_PORT_GLOBAL 0x80000000 /* IOCTL: global flag */
+#define TRUNK_PORT_BITS "\20\01MASTER\02STACK\03ACTIVE\04COLLECTING" \
+ "\05DISTRIBUTING"
+
+/* Supported trunk PROTOs */
+#define TRUNK_PROTO_NONE 0 /* no trunk protocol defined */
+#define TRUNK_PROTO_ROUNDROBIN 1 /* simple round robin */
+#define TRUNK_PROTO_FAILOVER 2 /* active failover */
+#define TRUNK_PROTO_LOADBALANCE 3 /* loadbalance */
+#define TRUNK_PROTO_LACP 4 /* 802.3ad lacp */
+#define TRUNK_PROTO_ETHERCHANNEL 5 /* Cisco FEC */
+#define TRUNK_PROTO_MAX 6
+
+struct trunk_protos {
+ const char *tpr_name;
+ int tpr_proto;
+};
+
+#define TRUNK_PROTO_DEFAULT TRUNK_PROTO_FAILOVER
+#define TRUNK_PROTOS { \
+ { "failover", TRUNK_PROTO_FAILOVER }, \
+ { "fec", TRUNK_PROTO_ETHERCHANNEL }, \
+ { "lacp", TRUNK_PROTO_LACP }, \
+ { "loadbalance", TRUNK_PROTO_LOADBALANCE }, \
+ { "roundrobin", TRUNK_PROTO_ROUNDROBIN }, \
+ { "none", TRUNK_PROTO_NONE }, \
+ { "default", TRUNK_PROTO_DEFAULT } \
+}
+
+/*
+ * Trunk ioctls.
+ */
+
+/* Trunk port settings */
+struct trunk_reqport {
+ char rp_ifname[IFNAMSIZ]; /* name of the trunk */
+ char rp_portname[IFNAMSIZ]; /* name of the port */
+ u_int32_t rp_prio; /* port priority */
+ u_int32_t rp_flags; /* port flags */
+};
+
+#define SIOCGTRUNKPORT _IOWR('i', 140, struct trunk_reqport)
+#define SIOCSTRUNKPORT _IOW('i', 141, struct trunk_reqport)
+#define SIOCSTRUNKDELPORT _IOW('i', 142, struct trunk_reqport)
+
+/* Trunk, ports and options */
+struct trunk_reqall {
+ char ra_ifname[IFNAMSIZ]; /* name of the trunk */
+ u_int ra_proto; /* trunk protocol */
+
+ size_t ra_size; /* size of buffer */
+ struct trunk_reqport *ra_port; /* allocated buffer */
+ int ra_ports; /* total port count */
+};
+
+#define SIOCGTRUNK _IOWR('i', 143, struct trunk_reqall)
+#define SIOCSTRUNK _IOW('i', 144, struct trunk_reqall)
+
+#ifdef _KERNEL
+/*
+ * Internal kernel part
+ */
+
+#define tp_ifname tp_ifp->if_xname /* interface name */
+#define tp_link_state tp_ifp->if_link_state /* link state */
+#define tp_capabilities tp_ifp->if_capabilities /* capabilities */
+
+#define TRUNK_PORTACTIVE(_tp) ( \
+ ((_tp)->tp_link_state == LINK_STATE_UP) && \
+ ((_tp)->tp_ifp->if_flags & IFF_UP) \
+)
+
+#define mc_enm mc_u.mcu_enm
+
+struct trunk_ifreq {
+ union {
+ struct ifreq ifreq;
+ struct {
+ char ifr_name[IFNAMSIZ];
+ struct sockaddr_storage ifr_ss;
+ } ifreq_storage;
+ } ifreq;
+};
+
+#define tr_ifflags tr_ifp->if_flags /* flags */
+#define tr_ifname tr_ifp->if_xname /* name */
+#define tr_capabilities tr_ifp->if_capabilities /* capabilities */
+
+#define IFCAP_TRUNK_MASK 0xffff0000 /* private capabilities */
+#define IFCAP_TRUNK_FULLDUPLEX 0x00010000 /* full duplex with >1 ports */
+
+/* Private data used by the loadbalancing protocol */
+#define TRUNK_LB_MAXKEYS 8
+struct trunk_lb {
+ u_int32_t lb_key;
+ struct trunk_port *lb_ports[TRUNK_MAX_PORTS];
+};
+
+struct trunk_mc {
+ union {
+ struct ether_multi *mcu_enm;
+ } mc_u;
+ struct sockaddr_storage mc_addr;
+
+ SLIST_ENTRY(trunk_mc) mc_entries;
+};
+
+struct trunk_softc {
+ struct ifnet *tr_ifp; /* virtual interface */
+ struct mtx tr_mtx;
+ int tr_proto; /* trunk protocol */
+ u_int tr_count; /* number of ports */
+ struct trunk_port *tr_primary; /* primary port */
+ struct ifmedia tr_media; /* media config */
+ caddr_t tr_psc; /* protocol data */
+
+ SLIST_HEAD(__tplhd, trunk_port) tr_ports; /* list of interfaces */
+ SLIST_ENTRY(trunk_softc) tr_entries;
+
+ SLIST_HEAD(__mclhd, trunk_mc) tr_mc_head; /* multicast addresses */
+
+ /* Trunk protocol callbacks */
+ int (*tr_detach)(struct trunk_softc *);
+ int (*tr_start)(struct trunk_softc *, struct mbuf *);
+ struct mbuf *(*tr_input)(struct trunk_softc *, struct trunk_port *,
+ struct mbuf *);
+ int (*tr_port_create)(struct trunk_port *);
+ void (*tr_port_destroy)(struct trunk_port *);
+ void (*tr_linkstate)(struct trunk_port *);
+ void (*tr_init)(struct trunk_softc *);
+ void (*tr_stop)(struct trunk_softc *);
+ void (*tr_lladdr)(struct trunk_softc *);
+};
+
+struct trunk_port {
+ struct ifnet *tp_ifp; /* physical interface */
+ struct trunk_softc *tp_trunk; /* parent trunk */
+ uint8_t tp_lladdr[ETHER_ADDR_LEN];
+
+ u_char tp_iftype; /* interface type */
+ uint32_t tp_prio; /* port priority */
+ uint32_t tp_flags; /* port flags */
+ int tp_ifflags; /* saved ifp flags */
+ void *lh_cookie; /* if state hook */
+ caddr_t tp_psc; /* protocol data */
+
+ /* Redirected callbacks */
+ int (*tp_ioctl)(struct ifnet *, u_long, caddr_t);
+ int (*tp_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *);
+
+ SLIST_ENTRY(trunk_port) tp_entries;
+};
+
+#define TRUNK_LOCK_INIT(_tr) mtx_init(&(_tr)->tr_mtx, "if_trunk", NULL, \
+ MTX_DEF)
+#define TRUNK_LOCK_DESTROY(_tr) mtx_destroy(&(_tr)->tr_mtx)
+#define TRUNK_LOCK(_tr) mtx_lock(&(_tr)->tr_mtx)
+#define TRUNK_UNLOCK(_tr) mtx_unlock(&(_tr)->tr_mtx)
+#define TRUNK_LOCKED(_tr) mtx_owned(&(_tr)->tr_mtx)
+#define TRUNK_LOCK_ASSERT(_tr) mtx_assert(&(_tr)->tr_mtx, MA_OWNED)
+
+extern struct mbuf *(*trunk_input_p)(struct ifnet *, struct mbuf *);
+extern void (*trunk_linkstate_p)(struct ifnet *, int );
+
+int trunk_enqueue(struct ifnet *, struct mbuf *);
+uint32_t trunk_hashmbuf(struct mbuf *, uint32_t);
+
+#endif /* _KERNEL */
+
+#endif /* _NET_TRUNK_H */
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index d66774ccf90e..1b4ef9347e13 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -186,6 +186,7 @@ struct ifnet {
TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
/* protected by if_addr_mtx */
void *if_pf_kif;
+ void *if_trunk; /* trunk glue */
};
typedef void if_init_f_t(void *);
diff --git a/sys/sys/priv.h b/sys/sys/priv.h
index 3c40322081da..d5b9cd3fca19 100644
--- a/sys/sys/priv.h
+++ b/sys/sys/priv.h
@@ -319,6 +319,7 @@
#define PRIV_NET_IFDESTROY 412 /* Destroy cloned interface. */
#define PRIV_NET_ADDIFADDR 413 /* Add protocol addr to interface. */
#define PRIV_NET_DELIFADDR 414 /* Delete protocol addr on interface. */
+#define PRIV_NET_TRUNK 415 /* Administer trunk. */
/*
* 802.11-related privileges.