diff options
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/if.c | 64 | ||||
-rw-r--r-- | sys/net/if_bridge.c | 56 | ||||
-rw-r--r-- | sys/net/if_bridgevar.h | 2 | ||||
-rw-r--r-- | sys/net/if_clone.h | 2 | ||||
-rw-r--r-- | sys/net/if_epair.c | 62 | ||||
-rw-r--r-- | sys/net/if_ethersubr.c | 2 | ||||
-rw-r--r-- | sys/net/if_ovpn.c | 6 | ||||
-rw-r--r-- | sys/net/if_tap.h | 2 | ||||
-rw-r--r-- | sys/net/if_tun.h | 2 | ||||
-rw-r--r-- | sys/net/if_tuntap.c | 88 | ||||
-rw-r--r-- | sys/net/if_var.h | 1 | ||||
-rw-r--r-- | sys/net/iflib.c | 120 | ||||
-rw-r--r-- | sys/net/pfvar.h | 24 |
13 files changed, 340 insertions, 91 deletions
diff --git a/sys/net/if.c b/sys/net/if.c index 79c883fd4a0a..b6a798aa0fab 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -74,7 +74,6 @@ #include <vm/uma.h> #include <net/bpf.h> -#include <net/ethernet.h> #include <net/if.h> #include <net/if_arp.h> #include <net/if_clone.h> @@ -1102,6 +1101,7 @@ if_detach_internal(struct ifnet *ifp, bool vmove) struct ifaddr *ifa; int i; struct domain *dp; + void *if_afdata[AF_MAX]; #ifdef VIMAGE bool shutdown; @@ -1225,15 +1225,30 @@ finish_vnet_shutdown: IF_AFDATA_LOCK(ifp); i = ifp->if_afdata_initialized; ifp->if_afdata_initialized = 0; + if (i != 0) { + /* + * Defer the dom_ifdetach call. + */ + _Static_assert(sizeof(if_afdata) == sizeof(ifp->if_afdata), + "array size mismatch"); + memcpy(if_afdata, ifp->if_afdata, sizeof(if_afdata)); + memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata)); + } IF_AFDATA_UNLOCK(ifp); if (i == 0) return; + /* + * XXXZL: This net epoch wait is not necessary if we have done right. + * But if we do not, at least we can make a guarantee that threads those + * enter net epoch will see NULL address family dependent data, + * e.g. if_afdata[AF_INET6]. A clear NULL pointer derefence is much + * better than writing to freed memory. + */ + NET_EPOCH_WAIT(); SLIST_FOREACH(dp, &domains, dom_next) { - if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) { - (*dp->dom_ifdetach)(ifp, - ifp->if_afdata[dp->dom_family]); - ifp->if_afdata[dp->dom_family] = NULL; - } + if (dp->dom_ifdetach != NULL && + if_afdata[dp->dom_family] != NULL) + (*dp->dom_ifdetach)(ifp, if_afdata[dp->dom_family]); } } @@ -2589,16 +2604,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) * flip. They require special handling because in-kernel * consumers may indepdently toggle them. */ - if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { - if (new_flags & IFF_PPROMISC) - ifp->if_flags |= IFF_PROMISC; - else if (ifp->if_pcount == 0) - ifp->if_flags &= ~IFF_PROMISC; - if (log_promisc_mode_change) - if_printf(ifp, "permanently promiscuous mode %s\n", - ((new_flags & IFF_PPROMISC) ? - "enabled" : "disabled")); - } + if_setppromisc(ifp, new_flags & IFF_PPROMISC); if ((ifp->if_flags ^ new_flags) & IFF_PALLMULTI) { if (new_flags & IFF_PALLMULTI) ifp->if_flags |= IFF_ALLMULTI; @@ -4456,6 +4462,32 @@ if_getmtu_family(const if_t ifp, int family) return (ifp->if_mtu); } +void +if_setppromisc(if_t ifp, bool ppromisc) +{ + int new_flags; + + if (ppromisc) + new_flags = ifp->if_flags | IFF_PPROMISC; + else + new_flags = ifp->if_flags & ~IFF_PPROMISC; + if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { + if (new_flags & IFF_PPROMISC) + new_flags |= IFF_PROMISC; + /* + * Only unset IFF_PROMISC if there are no more consumers of + * promiscuity, i.e. the ifp->if_pcount refcount is 0. + */ + else if (ifp->if_pcount == 0) + new_flags &= ~IFF_PROMISC; + if (log_promisc_mode_change) + if_printf(ifp, "permanently promiscuous mode %s\n", + ((new_flags & IFF_PPROMISC) ? + "enabled" : "disabled")); + } + ifp->if_flags = new_flags; +} + /* * Methods for drivers to access interface unicast and multicast * link level addresses. Driver shall not know 'struct ifaddr' neither diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 66555fd1feb5..d7911a348d87 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -522,11 +522,11 @@ SYSCTL_BOOL(_net_link_bridge, OID_AUTO, log_mac_flap, "Log MAC address port flapping"); /* allow IP addresses on bridge members */ -VNET_DEFINE_STATIC(bool, member_ifaddrs) = false; +VNET_DEFINE_STATIC(bool, member_ifaddrs) = true; #define V_member_ifaddrs VNET(member_ifaddrs) SYSCTL_BOOL(_net_link_bridge, OID_AUTO, member_ifaddrs, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(member_ifaddrs), false, - "Allow layer 3 addresses on bridge members"); + "Allow layer 3 addresses on bridge members (deprecated)"); static bool bridge_member_ifaddrs(void) @@ -1448,24 +1448,30 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) /* * If member_ifaddrs is disabled, do not allow an interface with - * assigned IP addresses to be added to a bridge. + * assigned IP addresses to be added to a bridge. Skip this check + * for gif interfaces, because the IP address assigned to a gif + * interface is separate from the bridge's Ethernet segment. */ - if (!V_member_ifaddrs) { + if (ifs->if_type != IFT_GIF) { struct ifaddr *ifa; CK_STAILQ_FOREACH(ifa, &ifs->if_addrhead, ifa_link) { -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) - return (EXTERROR(EINVAL, - "Member interface may not have " - "an IPv4 address configured")); -#endif -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) + if (ifa->ifa_addr->sa_family != AF_INET && + ifa->ifa_addr->sa_family != AF_INET6) + continue; + + if (V_member_ifaddrs) { + if_printf(sc->sc_ifp, + "WARNING: Adding member interface %s which " + "has an IP address assigned is deprecated " + "and will be unsupported in a future " + "release.\n", ifs->if_xname); + break; + } else { return (EXTERROR(EINVAL, "Member interface may not have " - "an IPv6 address configured")); -#endif + "an IP address assigned")); + } } } @@ -2398,6 +2404,12 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m, return (EINVAL); } + /* Do VLAN filtering. */ + if (!bridge_vfilter_out(bif, m)) { + m_freem(m); + return (0); + } + /* We may be sending a fragment so traverse the mbuf */ for (; m; m = m0) { m0 = m->m_nextpkt; @@ -2817,10 +2829,6 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) goto drop; - /* Do VLAN filtering. */ - if (!bridge_vfilter_out(dbif, m)) - goto drop; - if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; @@ -3189,10 +3197,6 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)) continue; - /* Do VLAN filtering. */ - if (!bridge_vfilter_out(dbif, m)) - continue; - if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; @@ -3358,6 +3362,14 @@ bridge_vfilter_out(const struct bridge_iflist *dbif, const struct mbuf *m) NET_EPOCH_ASSERT(); + /* + * If the interface is in span mode, then bif_sc will be NULL. + * Since the purpose of span interfaces is to receive all frames, + * pass everything. + */ + if (dbif->bif_sc == NULL) + return (true); + /* If VLAN filtering isn't enabled, pass everything. */ if ((dbif->bif_sc->sc_flags & IFBRF_VLANFILTER) == 0) return (true); diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h index b0f579f688ac..5ed8c19f3128 100644 --- a/sys/net/if_bridgevar.h +++ b/sys/net/if_bridgevar.h @@ -159,7 +159,7 @@ struct ifbreq { uint32_t ifbr_addrexceeded; /* member if addr violations */ ether_vlanid_t ifbr_pvid; /* member if PVID */ uint16_t ifbr_vlanproto; /* member if VLAN protocol */ - uint8_t pad[32]; + uint8_t pad[28]; }; /* BRDGGIFFLAGS, BRDGSIFFLAGS */ diff --git a/sys/net/if_clone.h b/sys/net/if_clone.h index 5a74ffa1cc2f..d780e49af25f 100644 --- a/sys/net/if_clone.h +++ b/sys/net/if_clone.h @@ -153,7 +153,7 @@ int if_clone_destroy(const char *); int if_clone_list(struct if_clonereq *); void if_clone_restoregroup(struct ifnet *); -/* The below interfaces are used only by epair(4). */ +/* The below interfaces are used only by epair(4) and tun(4)/tap(4). */ void if_clone_addif(struct if_clone *, struct ifnet *); int if_clone_destroyif(struct if_clone *, struct ifnet *); diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c index a213a84e17db..fbffa8f359a0 100644 --- a/sys/net/if_epair.c +++ b/sys/net/if_epair.c @@ -67,9 +67,9 @@ #include <net/if_var.h> #include <net/if_clone.h> #include <net/if_media.h> -#include <net/if_var.h> #include <net/if_private.h> #include <net/if_types.h> +#include <net/if_vlan_var.h> #include <net/netisr.h> #ifdef RSS #include <net/rss_config.h> @@ -435,6 +435,21 @@ epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; } +/* + * Update ifp->if_hwassist according to the current value of ifp->if_capenable. + */ +static void +epair_caps_changed(struct ifnet *ifp) +{ + uint64_t hwassist = 0; + + if (ifp->if_capenable & IFCAP_TXCSUM) + hwassist |= CSUM_IP_TCP | CSUM_IP_UDP; + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) + hwassist |= CSUM_IP6_TCP | CSUM_IP6_UDP; + ifp->if_hwassist = hwassist; +} + static int epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { @@ -462,6 +477,44 @@ epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = 0; break; + case SIOCGIFCAP: + ifr->ifr_reqcap = ifp->if_capabilities; + ifr->ifr_curcap = ifp->if_capenable; + error = 0; + break; + case SIOCSIFCAP: + /* + * Enable/disable capabilities as requested, besides + * IFCAP_RXCSUM(_IPV6), which always remain enabled. + * Incoming packets may have the mbuf flag CSUM_DATA_VALID set. + * Without IFCAP_RXCSUM(_IPV6), this flag would have to be + * removed, which does not seem helpful. + */ + ifp->if_capenable = ifr->ifr_reqcap | IFCAP_RXCSUM | + IFCAP_RXCSUM_IPV6; + epair_caps_changed(ifp); + /* + * If IFCAP_TXCSUM(_IPV6) has been changed, change it on the + * other epair interface as well. + * A bridge disables IFCAP_TXCSUM(_IPV6) when adding one epair + * interface if another interface in the bridge has it disabled. + * In that case this capability needs to be disabled on the + * other epair interface to avoid sending packets in the bridge + * that rely on this capability. + */ + sc = ifp->if_softc; + if ((ifp->if_capenable ^ sc->oifp->if_capenable) & + (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6)) { + sc->oifp->if_capenable &= + ~(IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6); + sc->oifp->if_capenable |= ifp->if_capenable & + (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6); + epair_caps_changed(sc->oifp); + } + VLAN_CAPABILITIES(ifp); + error = 0; + break; + default: /* Let the common ethernet handler process this. */ error = ether_ioctl(ifp, cmd, data); @@ -573,8 +626,11 @@ epair_setup_ifp(struct epair_softc *sc, char *name, int unit) ifp->if_dname = epairname; ifp->if_dunit = unit; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_capabilities = IFCAP_VLAN_MTU; - ifp->if_capenable = IFCAP_VLAN_MTU; + ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_TXCSUM | + IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; + ifp->if_capenable = IFCAP_VLAN_MTU | IFCAP_TXCSUM | + IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; + epair_caps_changed(ifp); ifp->if_transmit = epair_transmit; ifp->if_qflush = epair_qflush; ifp->if_start = epair_start; diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 3ae0c01c0efc..9c157bf3d3c2 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -695,7 +695,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) * seen by upper protocol layers. */ if (!ETHER_IS_MULTICAST(eh->ether_dhost) && - bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0) + memcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0) m->m_flags |= M_PROMISC; } diff --git a/sys/net/if_ovpn.c b/sys/net/if_ovpn.c index fe015632f33e..1c18baac3417 100644 --- a/sys/net/if_ovpn.c +++ b/sys/net/if_ovpn.c @@ -904,9 +904,11 @@ ovpn_create_kkey_dir(struct ovpn_kkey_dir **kdirp, kdir->cipher = cipher; kdir->keylen = keylen; kdir->tx_seq = 1; - memcpy(kdir->key, key, keylen); + if (keylen != 0) + memcpy(kdir->key, key, keylen); kdir->noncelen = ivlen; - memcpy(kdir->nonce, iv, ivlen); + if (ivlen != 0) + memcpy(kdir->nonce, iv, ivlen); if (kdir->cipher != OVPN_CIPHER_ALG_NONE) { /* Crypto init */ diff --git a/sys/net/if_tap.h b/sys/net/if_tap.h index d84cd2eba6f3..8297b8d9e3d2 100644 --- a/sys/net/if_tap.h +++ b/sys/net/if_tap.h @@ -57,6 +57,8 @@ #define TAPGIFNAME TUNGIFNAME #define TAPSVNETHDR _IOW('t', 91, int) #define TAPGVNETHDR _IOR('t', 94, int) +#define TAPSTRANSIENT TUNSTRANSIENT +#define TAPGTRANSIENT TUNGTRANSIENT /* VMware ioctl's */ #define VMIO_SIOCSIFFLAGS _IOWINT('V', 0) diff --git a/sys/net/if_tun.h b/sys/net/if_tun.h index a8fb61db45a2..ccdc25944823 100644 --- a/sys/net/if_tun.h +++ b/sys/net/if_tun.h @@ -43,5 +43,7 @@ struct tuninfo { #define TUNSIFPID _IO('t', 95) #define TUNSIFHEAD _IOW('t', 96, int) #define TUNGIFHEAD _IOR('t', 97, int) +#define TUNSTRANSIENT _IOW('t', 98, int) +#define TUNGTRANSIENT _IOR('t', 99, int) #endif /* !_NET_IF_TUN_H_ */ diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c index 5e6f65c04b2f..c8dbb6aa8893 100644 --- a/sys/net/if_tuntap.c +++ b/sys/net/if_tuntap.c @@ -132,6 +132,7 @@ struct tuntap_softc { #define TUN_DYING 0x0200 #define TUN_L2 0x0400 #define TUN_VMNET 0x0800 +#define TUN_TRANSIENT 0x1000 #define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET) #define TUN_READY (TUN_OPEN | TUN_INITED) @@ -443,6 +444,18 @@ tuntap_name2info(const char *name, int *outunit, int *outflags) return (0); } +static struct if_clone * +tuntap_cloner_from_flags(int tun_flags) +{ + + for (u_int i = 0; i < NDRV; i++) + if ((tun_flags & TUN_DRIVER_IDENT_MASK) == + tuntap_drivers[i].ident_flags) + return (V_tuntap_driver_cloners[i]); + + return (NULL); +} + /* * Get driver information from a set of flags specified. Masks the identifying * part of the flags and compares it against all of the available @@ -615,19 +628,39 @@ out: CURVNET_RESTORE(); } -static void -tun_destroy(struct tuntap_softc *tp) +static int +tun_destroy(struct tuntap_softc *tp, bool may_intr) { + int error; TUN_LOCK(tp); + + /* + * Transient tunnels may have set TUN_DYING if we're being destroyed as + * a result of the last close, which we'll allow. + */ + MPASS((tp->tun_flags & (TUN_DYING | TUN_TRANSIENT)) != TUN_DYING); tp->tun_flags |= TUN_DYING; - if (tp->tun_busy != 0) - cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); - else - TUN_UNLOCK(tp); + error = 0; + while (tp->tun_busy != 0) { + if (may_intr) + error = cv_wait_sig(&tp->tun_cv, &tp->tun_mtx); + else + cv_wait(&tp->tun_cv, &tp->tun_mtx); + if (error != 0) { + tp->tun_flags &= ~TUN_DYING; + TUN_UNLOCK(tp); + return (error); + } + } + TUN_UNLOCK(tp); CURVNET_SET(TUN2IFP(tp)->if_vnet); + mtx_lock(&tunmtx); + TAILQ_REMOVE(&tunhead, tp, tun_list); + mtx_unlock(&tunmtx); + /* destroy_dev will take care of any alias. */ destroy_dev(tp->tun_dev); seldrain(&tp->tun_rsel); @@ -648,6 +681,8 @@ tun_destroy(struct tuntap_softc *tp) cv_destroy(&tp->tun_cv); free(tp, M_TUN); CURVNET_RESTORE(); + + return (0); } static int @@ -655,12 +690,7 @@ tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp, uint32_t fla { struct tuntap_softc *tp = ifp->if_softc; - mtx_lock(&tunmtx); - TAILQ_REMOVE(&tunhead, tp, tun_list); - mtx_unlock(&tunmtx); - tun_destroy(tp); - - return (0); + return (tun_destroy(tp, true)); } static void @@ -702,9 +732,9 @@ tun_uninit(const void *unused __unused) mtx_lock(&tunmtx); while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { - TAILQ_REMOVE(&tunhead, tp, tun_list); mtx_unlock(&tunmtx); - tun_destroy(tp); + /* tun_destroy() will remove it from the tailq. */ + tun_destroy(tp, false); mtx_lock(&tunmtx); } mtx_unlock(&tunmtx); @@ -1217,6 +1247,23 @@ out: tun_vnethdr_set(ifp, 0); tun_unbusy_locked(tp); + if ((tp->tun_flags & TUN_TRANSIENT) != 0) { + struct if_clone *cloner; + int error __diagused; + + /* Mark it busy so that nothing can re-open it. */ + tp->tun_flags |= TUN_DYING; + TUN_UNLOCK(tp); + + CURVNET_SET_QUIET(ifp->if_home_vnet); + cloner = tuntap_cloner_from_flags(tp->tun_flags); + CURVNET_RESTORE(); + + error = if_clone_destroyif(cloner, ifp); + MPASS(error == 0 || error == EINTR || error == ERESTART); + return; + } + TUN_UNLOCK(tp); } @@ -1668,6 +1715,19 @@ tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, case TUNGDEBUG: *(int *)data = tundebug; break; + case TUNSTRANSIENT: + TUN_LOCK(tp); + if (*(int *)data) + tp->tun_flags |= TUN_TRANSIENT; + else + tp->tun_flags &= ~TUN_TRANSIENT; + TUN_UNLOCK(tp); + break; + case TUNGTRANSIENT: + TUN_LOCK(tp); + *(int *)data = (tp->tun_flags & TUN_TRANSIENT) != 0; + TUN_UNLOCK(tp); + break; case FIONBIO: break; case FIOASYNC: diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 08435e7bd5f6..f2df612b19c1 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -622,6 +622,7 @@ int if_setmtu(if_t ifp, int mtu); int if_getmtu(const if_t ifp); int if_getmtu_family(const if_t ifp, int family); void if_notifymtu(if_t ifp); +void if_setppromisc(const if_t ifp, bool ppromisc); int if_setflagbits(if_t ifp, int set, int clear); int if_setflags(if_t ifp, int flags); int if_getflags(const if_t ifp); diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 2b43f6f19051..1e6d98291c04 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -142,6 +142,7 @@ struct iflib_ctx; static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); static void iflib_timer(void *arg); static void iflib_tqg_detach(if_ctx_t ctx); +static int iflib_simple_transmit(if_t ifp, struct mbuf *m); typedef struct iflib_filter_info { driver_filter_t *ifi_filter; @@ -198,6 +199,7 @@ struct iflib_ctx { uint8_t ifc_sysctl_use_logical_cores; uint16_t ifc_sysctl_extra_msix_vectors; bool ifc_cpus_are_physical_cores; + bool ifc_sysctl_simple_tx; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; @@ -710,7 +712,7 @@ static uint32_t iflib_txq_can_drain(struct ifmp_ring *); static void iflib_altq_if_start(if_t ifp); static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m); #endif -static int iflib_register(if_ctx_t); +static void iflib_register(if_ctx_t); static void iflib_deregister(if_ctx_t); static void iflib_unregister_vlan_handlers(if_ctx_t ctx); static uint16_t iflib_get_mbuf_size_for(unsigned int size); @@ -725,6 +727,7 @@ static void iflib_free_intr_mem(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf *iflib_fixup_rx(struct mbuf *m); #endif +static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh); static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = SLIST_HEAD_INITIALIZER(cpu_offsets); @@ -2624,8 +2627,10 @@ iflib_stop(if_ctx_t ctx) #endif /* DEV_NETMAP */ CALLOUT_UNLOCK(txq); - /* clean any enqueued buffers */ - iflib_ifmp_purge(txq); + if (!ctx->ifc_sysctl_simple_tx) { + /* clean any enqueued buffers */ + iflib_ifmp_purge(txq); + } /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); @@ -3635,13 +3640,22 @@ defrag: * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { - txq->ift_no_desc_avail++; - bus_dmamap_unload(buf_tag, map); - DBG_COUNTER_INC(encap_txq_avail_fail); - DBG_COUNTER_INC(encap_txd_encap_fail); - if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) - GROUPTASK_ENQUEUE(&txq->ift_task); - return (ENOBUFS); + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { + txq->ift_no_desc_avail++; + bus_dmamap_unload(buf_tag, map); + DBG_COUNTER_INC(encap_txq_avail_fail); + DBG_COUNTER_INC(encap_txd_encap_fail); + if (ctx->ifc_sysctl_simple_tx) { + *m_headp = m_head = iflib_remove_mbuf(txq); + m_freem(*m_headp); + DBG_COUNTER_INC(tx_frees); + *m_headp = NULL; + } + if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) + GROUPTASK_ENQUEUE(&txq->ift_task); + return (ENOBUFS); + } } /* * On Intel cards we can greatly reduce the number of TX interrupts @@ -4014,6 +4028,12 @@ _task_fn_tx(void *context) netmap_tx_irq(ifp, txq->ift_id)) goto skip_ifmp; #endif + if (ctx->ifc_sysctl_simple_tx) { + mtx_lock(&txq->ift_mtx); + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + mtx_unlock(&txq->ift_mtx); + goto skip_ifmp; + } #ifdef ALTQ if (if_altq_is_enabled(ifp)) iflib_altq_if_start(ifp); @@ -4027,9 +4047,8 @@ _task_fn_tx(void *context) */ if (abdicate) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); -#ifdef DEV_NETMAP + skip_ifmp: -#endif if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else @@ -4285,6 +4304,10 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); DBG_COUNTER_INC(tx_frees); + if (err == ENOBUFS) + if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); + else + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } return (err); @@ -5123,15 +5146,19 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct ctx->ifc_dev = dev; ctx->ifc_softc = sc; - if ((err = iflib_register(ctx)) != 0) { - device_printf(dev, "iflib_register failed %d\n", err); - goto fail_ctx_free; - } + iflib_register(ctx); iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; - + if (ctx->ifc_sysctl_simple_tx) { +#ifndef ALTQ + if_settransmitfn(ifp, iflib_simple_transmit); + device_printf(dev, "using simple if_transmit\n"); +#else + device_printf(dev, "ALTQ prevents using simple if_transmit\n"); +#endif + } iflib_reset_qvalues(ctx); IFNET_WLOCK(); CTX_LOCK(ctx); @@ -5343,7 +5370,6 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct DEBUGNET_SET(ctx->ifc_ifp, iflib); - if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); iflib_add_pfil(ctx); ctx->ifc_flags |= IFC_INIT_DONE; @@ -5367,7 +5393,6 @@ fail_unlock: CTX_UNLOCK(ctx); IFNET_WUNLOCK(); iflib_deregister(ctx); -fail_ctx_free: device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); @@ -5665,7 +5690,7 @@ _iflib_pre_assert(if_softc_ctx_t scctx) MPASS(scctx->isc_txrx->ift_rxd_flush); } -static int +static void iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; @@ -5698,6 +5723,7 @@ iflib_register(if_ctx_t ctx) if_settransmitfn(ifp, iflib_if_transmit); #endif if_setqflushfn(ifp, iflib_if_qflush); + if_setgetcounterfn(ifp, iflib_if_get_counter); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, @@ -5711,7 +5737,6 @@ iflib_register(if_ctx_t ctx) ifmedia_init(ctx->ifc_mediap, IFM_IMASK, iflib_media_change, iflib_media_status); } - return (0); } static void @@ -6766,6 +6791,9 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, "driver version"); + SYSCTL_ADD_BOOL(ctx_list, oid_list, OID_AUTO, "simple_tx", + CTLFLAG_RDTUN, &ctx->ifc_sysctl_simple_tx, 0, + "use simple tx ring"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); @@ -7088,3 +7116,53 @@ iflib_debugnet_poll(if_t ifp, int count) return (0); } #endif /* DEBUGNET */ + + +static inline iflib_txq_t +iflib_simple_select_queue(if_ctx_t ctx, struct mbuf *m) +{ + int qidx; + + if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) + qidx = QIDX(ctx, m); + else + qidx = NTXQSETS(ctx) + FIRST_QSET(ctx) - 1; + return (&ctx->ifc_txqs[qidx]); +} + +static int +iflib_simple_transmit(if_t ifp, struct mbuf *m) +{ + if_ctx_t ctx; + iflib_txq_t txq; + int error; + int bytes_sent = 0, pkt_sent = 0, mcast_sent = 0; + + + ctx = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (EBUSY); + txq = iflib_simple_select_queue(ctx, m); + mtx_lock(&txq->ift_mtx); + error = iflib_encap(txq, &m); + if (error == 0) { + pkt_sent++; + bytes_sent += m->m_pkthdr.len; + mcast_sent += !!(m->m_flags & M_MCAST); + (void)iflib_txd_db_check(txq, true); + } else { + if (error == ENOBUFS) + if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); + else + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + } + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + mtx_unlock(&txq->ift_mtx); + if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); + if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); + if (mcast_sent) + if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); + + return (error); +} diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index d6c13470f2eb..8b102f198de8 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -326,6 +326,7 @@ pf_counter_u64_zero(struct pf_counter_u64 *pfcu64) _Static_assert(sizeof(time_t) == 4 || sizeof(time_t) == 8, "unexpected time_t size"); SYSCTL_DECL(_net_pf); +MALLOC_DECLARE(M_PF); MALLOC_DECLARE(M_PFHASH); MALLOC_DECLARE(M_PF_RULE_ITEM); @@ -645,6 +646,7 @@ struct pf_kpool { int tblidx; u_int16_t proxy_port[2]; u_int8_t opts; + sa_family_t ipv6_nexthop_af; }; struct pf_rule_actions { @@ -859,8 +861,8 @@ struct pf_krule { u_int8_t keep_state; sa_family_t af; u_int8_t proto; - u_int8_t type; - u_int8_t code; + uint16_t type; + uint16_t code; u_int8_t flags; u_int8_t flagset; u_int8_t min_ttl; @@ -1749,6 +1751,7 @@ struct pf_kstatus { counter_u64_t lcounters[KLCNT_MAX]; /* limit counters */ struct pf_counter_u64 fcounters[FCNT_MAX]; /* state operation counters */ counter_u64_t scounters[SCNT_MAX]; /* src_node operation counters */ + counter_u64_t ncounters[NCNT_MAX]; uint32_t states; uint32_t src_nodes; uint32_t running; @@ -2389,8 +2392,6 @@ extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t, u_int16_t, u_int16_t, u_int8_t); -VNET_DECLARE(struct ifnet *, sync_ifp); -#define V_sync_ifp VNET(sync_ifp); VNET_DECLARE(struct pf_krule, pf_default_rule); #define V_pf_default_rule VNET(pf_default_rule) extern void pf_addrcpy(struct pf_addr *, const struct pf_addr *, @@ -2421,7 +2422,7 @@ int pf_multihome_scan_init(int, int, struct pf_pdesc *); int pf_multihome_scan_asconf(int, int, struct pf_pdesc *); u_int32_t pf_new_isn(struct pf_kstate *); -void *pf_pull_hdr(const struct mbuf *, int, void *, int, u_short *, u_short *, +void *pf_pull_hdr(const struct mbuf *, int, void *, int, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t, @@ -2438,6 +2439,7 @@ int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); void pf_normalize_init(void); void pf_normalize_cleanup(void); +uint64_t pf_normalize_get_frag_count(void); int pf_normalize_tcp(struct pf_pdesc *); void pf_normalize_tcp_cleanup(struct pf_kstate *); int pf_normalize_tcp_init(struct pf_pdesc *, @@ -2541,22 +2543,23 @@ struct mbuf *pf_build_tcp(const struct pf_krule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, - u_int16_t, u_int16_t, u_int, int); + u_int16_t, u_int16_t, u_int, int, u_short *); void pf_send_tcp(const struct pf_krule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, - u_int16_t, u_int16_t, int); + u_int16_t, u_int16_t, int, u_short *); void pf_syncookies_init(void); void pf_syncookies_cleanup(void); int pf_get_syncookies(struct pfioc_nv *); int pf_set_syncookies(struct pfioc_nv *); int pf_synflood_check(struct pf_pdesc *); -void pf_syncookie_send(struct pf_pdesc *); +void pf_syncookie_send(struct pf_pdesc *, u_short *); bool pf_syncookie_check(struct pf_pdesc *); u_int8_t pf_syncookie_validate(struct pf_pdesc *); -struct mbuf * pf_syncookie_recreate_syn(struct pf_pdesc *); +struct mbuf * pf_syncookie_recreate_syn(struct pf_pdesc *, + u_short *); VNET_DECLARE(struct pf_kstatus, pf_status); #define V_pf_status VNET(pf_status) @@ -2612,6 +2615,7 @@ struct pf_kruleset *pf_find_kruleset(const char *); struct pf_kruleset *pf_get_leaf_kruleset(char *, char **); struct pf_kruleset *pf_find_or_create_kruleset(const char *); void pf_rs_initialize(void); +void pf_rule_tree_free(struct pf_krule_global *); struct pf_krule *pf_krule_alloc(void); @@ -2680,7 +2684,7 @@ u_short pf_map_addr(sa_family_t, struct pf_krule *, struct pf_addr *, struct pf_kpool *); u_short pf_map_addr_sn(u_int8_t, struct pf_krule *, struct pf_addr *, struct pf_addr *, - sa_family_t *, struct pfi_kkif **nkif, + sa_family_t *, struct pfi_kkif **, struct pf_addr *, struct pf_kpool *, pf_sn_types_t); int pf_get_transaddr_af(struct pf_krule *, |