diff options
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/if.c | 37 | ||||
-rw-r--r-- | sys/net/if_bridge.c | 34 | ||||
-rw-r--r-- | sys/net/if_bridgevar.h | 2 | ||||
-rw-r--r-- | sys/net/if_clone.h | 2 | ||||
-rw-r--r-- | sys/net/if_epair.c | 1 | ||||
-rw-r--r-- | sys/net/if_gif.c | 10 | ||||
-rw-r--r-- | sys/net/if_pfsync.h | 4 | ||||
-rw-r--r-- | sys/net/if_tap.h | 2 | ||||
-rw-r--r-- | sys/net/if_tun.h | 2 | ||||
-rw-r--r-- | sys/net/if_tuntap.c | 88 | ||||
-rw-r--r-- | sys/net/if_var.h | 1 | ||||
-rw-r--r-- | sys/net/iflib.c | 267 | ||||
-rw-r--r-- | sys/net/pfvar.h | 69 |
13 files changed, 267 insertions, 252 deletions
diff --git a/sys/net/if.c b/sys/net/if.c index 79c883fd4a0a..202be4794f6e 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -2589,16 +2589,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) * flip. They require special handling because in-kernel * consumers may indepdently toggle them. */ - if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { - if (new_flags & IFF_PPROMISC) - ifp->if_flags |= IFF_PROMISC; - else if (ifp->if_pcount == 0) - ifp->if_flags &= ~IFF_PROMISC; - if (log_promisc_mode_change) - if_printf(ifp, "permanently promiscuous mode %s\n", - ((new_flags & IFF_PPROMISC) ? - "enabled" : "disabled")); - } + if_setppromisc(ifp, new_flags & IFF_PPROMISC); if ((ifp->if_flags ^ new_flags) & IFF_PALLMULTI) { if (new_flags & IFF_PALLMULTI) ifp->if_flags |= IFF_ALLMULTI; @@ -4456,6 +4447,32 @@ if_getmtu_family(const if_t ifp, int family) return (ifp->if_mtu); } +void +if_setppromisc(if_t ifp, bool ppromisc) +{ + int new_flags; + + if (ppromisc) + new_flags = ifp->if_flags | IFF_PPROMISC; + else + new_flags = ifp->if_flags & ~IFF_PPROMISC; + if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { + if (new_flags & IFF_PPROMISC) + new_flags |= IFF_PROMISC; + /* + * Only unset IFF_PROMISC if there are no more consumers of + * promiscuity, i.e. the ifp->if_pcount refcount is 0. + */ + else if (ifp->if_pcount == 0) + new_flags &= ~IFF_PROMISC; + if (log_promisc_mode_change) + if_printf(ifp, "permanently promiscuous mode %s\n", + ((new_flags & IFF_PPROMISC) ? + "enabled" : "disabled")); + } + ifp->if_flags = new_flags; +} + /* * Methods for drivers to access interface unicast and multicast * link level addresses. Driver shall not know 'struct ifaddr' neither diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 3aed54c58e04..66555fd1feb5 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -1500,8 +1500,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER; bif->bif_savedcaps = ifs->if_capenable; bif->bif_vlanproto = ETHERTYPE_VLAN; - if (sc->sc_flags & IFBRF_VLANFILTER) - bif->bif_pvid = sc->sc_defpvid; + bif->bif_pvid = sc->sc_defpvid; if (sc->sc_flags & IFBRF_DEFQINQ) bif->bif_flags |= IFBIF_QINQ; @@ -1970,9 +1969,6 @@ bridge_ioctl_sifpvid(struct bridge_softc *sc, void *arg) struct ifbreq *req = arg; struct bridge_iflist *bif; - if ((sc->sc_flags & IFBRF_VLANFILTER) == 0) - return (EXTERROR(EINVAL, "VLAN filtering not enabled")); - bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); @@ -2410,12 +2406,10 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m, mflags = m->m_flags; /* - * If VLAN filtering is enabled, and the native VLAN ID of the - * outgoing interface matches the VLAN ID of the frame, remove - * the VLAN header. + * If the native VLAN ID of the outgoing interface matches the + * VLAN ID of the frame, remove the VLAN tag. */ - if ((sc->sc_flags & IFBRF_VLANFILTER) && - bif->bif_pvid != DOT1Q_VID_NULL && + if (bif->bif_pvid != DOT1Q_VID_NULL && VLANTAGOF(m) == bif->bif_pvid) { m->m_flags &= ~M_VLANTAG; m->m_pkthdr.ether_vtag = 0; @@ -2441,6 +2435,12 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m, } M_ASSERTPKTHDR(m); /* We shouldn't transmit mbuf without pkthdr */ + /* + * XXXZL: gif(4) requires the af to be saved in csum_data field + * so that gif_transmit() routine can pull it back. + */ + if (dst_ifp->if_type == IFT_GIF) + m->m_pkthdr.csum_data = AF_LINK; if ((err = dst_ifp->if_transmit(dst_ifp, m))) { int n; @@ -3290,9 +3290,19 @@ bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m) if (vlan > DOT1Q_VID_MAX) return (false); - /* If VLAN filtering isn't enabled, pass everything. */ - if ((sbif->bif_sc->sc_flags & IFBRF_VLANFILTER) == 0) + /* + * If VLAN filtering isn't enabled, pass everything, but add a tag + * if the port has a pvid configured. + */ + if ((sbif->bif_sc->sc_flags & IFBRF_VLANFILTER) == 0) { + if (vlan == DOT1Q_VID_NULL && + sbif->bif_pvid != DOT1Q_VID_NULL) { + m->m_pkthdr.ether_vtag = sbif->bif_pvid; + m->m_flags |= M_VLANTAG; + } + return (true); + } /* If Q-in-Q is disabled, check for stacked tags. */ if ((sbif->bif_flags & IFBIF_QINQ) == 0) { diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h index b0f579f688ac..5ed8c19f3128 100644 --- a/sys/net/if_bridgevar.h +++ b/sys/net/if_bridgevar.h @@ -159,7 +159,7 @@ struct ifbreq { uint32_t ifbr_addrexceeded; /* member if addr violations */ ether_vlanid_t ifbr_pvid; /* member if PVID */ uint16_t ifbr_vlanproto; /* member if VLAN protocol */ - uint8_t pad[32]; + uint8_t pad[28]; }; /* BRDGGIFFLAGS, BRDGSIFFLAGS */ diff --git a/sys/net/if_clone.h b/sys/net/if_clone.h index 5a74ffa1cc2f..d780e49af25f 100644 --- a/sys/net/if_clone.h +++ b/sys/net/if_clone.h @@ -153,7 +153,7 @@ int if_clone_destroy(const char *); int if_clone_list(struct if_clonereq *); void if_clone_restoregroup(struct ifnet *); -/* The below interfaces are used only by epair(4). */ +/* The below interfaces are used only by epair(4) and tun(4)/tap(4). */ void if_clone_addif(struct if_clone *, struct ifnet *); int if_clone_destroyif(struct if_clone *, struct ifnet *); diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c index a213a84e17db..581c2434b8fb 100644 --- a/sys/net/if_epair.c +++ b/sys/net/if_epair.c @@ -67,7 +67,6 @@ #include <net/if_var.h> #include <net/if_clone.h> #include <net/if_media.h> -#include <net/if_var.h> #include <net/if_private.h> #include <net/if_types.h> #include <net/netisr.h> diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index ef64c15074ed..272ab214a788 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -312,10 +312,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) goto err; } /* Now pull back the af that we stashed in the csum_data. */ - if (ifp->if_bridge) - af = AF_LINK; - else - af = m->m_pkthdr.csum_data; + af = m->m_pkthdr.csum_data; m->m_flags &= ~(M_BCAST|M_MCAST); M_SETFIB(m, sc->gif_fibnum); BPF_MTAP2(ifp, &af, sizeof(af), m); @@ -355,6 +352,8 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) break; #endif case AF_LINK: + KASSERT(ifp->if_bridge != NULL, + ("%s: bridge not attached", __func__)); proto = IPPROTO_ETHERIP; M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT); if (m == NULL) { @@ -405,9 +404,6 @@ gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, { uint32_t af; - KASSERT(ifp->if_bridge == NULL, - ("%s: unexpectedly called with bridge attached", __func__)); - /* BPF writes need to be handled specially. */ if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT) memcpy(&af, dst->sa_data, sizeof(af)); diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h index 1efc220aa8e1..e99df0b85ccf 100644 --- a/sys/net/if_pfsync.h +++ b/sys/net/if_pfsync.h @@ -160,8 +160,8 @@ struct pfsync_ins_ack { struct pfsync_upd_c { u_int64_t id; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; + struct pf_state_peer_export src; + struct pf_state_peer_export dst; u_int32_t creatorid; u_int32_t expire; u_int8_t timeout; diff --git a/sys/net/if_tap.h b/sys/net/if_tap.h index d84cd2eba6f3..8297b8d9e3d2 100644 --- a/sys/net/if_tap.h +++ b/sys/net/if_tap.h @@ -57,6 +57,8 @@ #define TAPGIFNAME TUNGIFNAME #define TAPSVNETHDR _IOW('t', 91, int) #define TAPGVNETHDR _IOR('t', 94, int) +#define TAPSTRANSIENT TUNSTRANSIENT +#define TAPGTRANSIENT TUNGTRANSIENT /* VMware ioctl's */ #define VMIO_SIOCSIFFLAGS _IOWINT('V', 0) diff --git a/sys/net/if_tun.h b/sys/net/if_tun.h index a8fb61db45a2..ccdc25944823 100644 --- a/sys/net/if_tun.h +++ b/sys/net/if_tun.h @@ -43,5 +43,7 @@ struct tuninfo { #define TUNSIFPID _IO('t', 95) #define TUNSIFHEAD _IOW('t', 96, int) #define TUNGIFHEAD _IOR('t', 97, int) +#define TUNSTRANSIENT _IOW('t', 98, int) +#define TUNGTRANSIENT _IOR('t', 99, int) #endif /* !_NET_IF_TUN_H_ */ diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c index 5e6f65c04b2f..c8dbb6aa8893 100644 --- a/sys/net/if_tuntap.c +++ b/sys/net/if_tuntap.c @@ -132,6 +132,7 @@ struct tuntap_softc { #define TUN_DYING 0x0200 #define TUN_L2 0x0400 #define TUN_VMNET 0x0800 +#define TUN_TRANSIENT 0x1000 #define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET) #define TUN_READY (TUN_OPEN | TUN_INITED) @@ -443,6 +444,18 @@ tuntap_name2info(const char *name, int *outunit, int *outflags) return (0); } +static struct if_clone * +tuntap_cloner_from_flags(int tun_flags) +{ + + for (u_int i = 0; i < NDRV; i++) + if ((tun_flags & TUN_DRIVER_IDENT_MASK) == + tuntap_drivers[i].ident_flags) + return (V_tuntap_driver_cloners[i]); + + return (NULL); +} + /* * Get driver information from a set of flags specified. Masks the identifying * part of the flags and compares it against all of the available @@ -615,19 +628,39 @@ out: CURVNET_RESTORE(); } -static void -tun_destroy(struct tuntap_softc *tp) +static int +tun_destroy(struct tuntap_softc *tp, bool may_intr) { + int error; TUN_LOCK(tp); + + /* + * Transient tunnels may have set TUN_DYING if we're being destroyed as + * a result of the last close, which we'll allow. + */ + MPASS((tp->tun_flags & (TUN_DYING | TUN_TRANSIENT)) != TUN_DYING); tp->tun_flags |= TUN_DYING; - if (tp->tun_busy != 0) - cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); - else - TUN_UNLOCK(tp); + error = 0; + while (tp->tun_busy != 0) { + if (may_intr) + error = cv_wait_sig(&tp->tun_cv, &tp->tun_mtx); + else + cv_wait(&tp->tun_cv, &tp->tun_mtx); + if (error != 0) { + tp->tun_flags &= ~TUN_DYING; + TUN_UNLOCK(tp); + return (error); + } + } + TUN_UNLOCK(tp); CURVNET_SET(TUN2IFP(tp)->if_vnet); + mtx_lock(&tunmtx); + TAILQ_REMOVE(&tunhead, tp, tun_list); + mtx_unlock(&tunmtx); + /* destroy_dev will take care of any alias. */ destroy_dev(tp->tun_dev); seldrain(&tp->tun_rsel); @@ -648,6 +681,8 @@ tun_destroy(struct tuntap_softc *tp) cv_destroy(&tp->tun_cv); free(tp, M_TUN); CURVNET_RESTORE(); + + return (0); } static int @@ -655,12 +690,7 @@ tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp, uint32_t fla { struct tuntap_softc *tp = ifp->if_softc; - mtx_lock(&tunmtx); - TAILQ_REMOVE(&tunhead, tp, tun_list); - mtx_unlock(&tunmtx); - tun_destroy(tp); - - return (0); + return (tun_destroy(tp, true)); } static void @@ -702,9 +732,9 @@ tun_uninit(const void *unused __unused) mtx_lock(&tunmtx); while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { - TAILQ_REMOVE(&tunhead, tp, tun_list); mtx_unlock(&tunmtx); - tun_destroy(tp); + /* tun_destroy() will remove it from the tailq. */ + tun_destroy(tp, false); mtx_lock(&tunmtx); } mtx_unlock(&tunmtx); @@ -1217,6 +1247,23 @@ out: tun_vnethdr_set(ifp, 0); tun_unbusy_locked(tp); + if ((tp->tun_flags & TUN_TRANSIENT) != 0) { + struct if_clone *cloner; + int error __diagused; + + /* Mark it busy so that nothing can re-open it. */ + tp->tun_flags |= TUN_DYING; + TUN_UNLOCK(tp); + + CURVNET_SET_QUIET(ifp->if_home_vnet); + cloner = tuntap_cloner_from_flags(tp->tun_flags); + CURVNET_RESTORE(); + + error = if_clone_destroyif(cloner, ifp); + MPASS(error == 0 || error == EINTR || error == ERESTART); + return; + } + TUN_UNLOCK(tp); } @@ -1668,6 +1715,19 @@ tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, case TUNGDEBUG: *(int *)data = tundebug; break; + case TUNSTRANSIENT: + TUN_LOCK(tp); + if (*(int *)data) + tp->tun_flags |= TUN_TRANSIENT; + else + tp->tun_flags &= ~TUN_TRANSIENT; + TUN_UNLOCK(tp); + break; + case TUNGTRANSIENT: + TUN_LOCK(tp); + *(int *)data = (tp->tun_flags & TUN_TRANSIENT) != 0; + TUN_UNLOCK(tp); + break; case FIONBIO: break; case FIOASYNC: diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 08435e7bd5f6..f2df612b19c1 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -622,6 +622,7 @@ int if_setmtu(if_t ifp, int mtu); int if_getmtu(const if_t ifp); int if_getmtu_family(const if_t ifp, int family); void if_notifymtu(if_t ifp); +void if_setppromisc(const if_t ifp, bool ppromisc); int if_setflagbits(if_t ifp, int set, int clear); int if_setflags(if_t ifp, int flags); int if_getflags(const if_t ifp); diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 2b8f0e617df3..98c59e5de988 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -70,6 +70,7 @@ #include <netinet/ip.h> #include <netinet/ip6.h> #include <netinet/tcp.h> +#include <netinet/udp.h> #include <netinet/ip_var.h> #include <netinet6/ip6_var.h> @@ -141,6 +142,7 @@ struct iflib_ctx; static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); static void iflib_timer(void *arg); static void iflib_tqg_detach(if_ctx_t ctx); +static int iflib_simple_transmit(if_t ifp, struct mbuf *m); typedef struct iflib_filter_info { driver_filter_t *ifi_filter; @@ -197,6 +199,7 @@ struct iflib_ctx { uint8_t ifc_sysctl_use_logical_cores; uint16_t ifc_sysctl_extra_msix_vectors; bool ifc_cpus_are_physical_cores; + bool ifc_sysctl_simple_tx; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; @@ -724,6 +727,7 @@ static void iflib_free_intr_mem(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf *iflib_fixup_rx(struct mbuf *m); #endif +static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh); static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = SLIST_HEAD_INITIALIZER(cpu_offsets); @@ -2623,8 +2627,10 @@ iflib_stop(if_ctx_t ctx) #endif /* DEV_NETMAP */ CALLOUT_UNLOCK(txq); - /* clean any enqueued buffers */ - iflib_ifmp_purge(txq); + if (!ctx->ifc_sysctl_simple_tx) { + /* clean any enqueued buffers */ + iflib_ifmp_purge(txq); + } /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); @@ -2889,51 +2895,6 @@ iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) return (m); } -#if defined(INET6) || defined(INET) -static void -iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) -{ - CURVNET_SET(if_getvnet(lc->ifp)); -#if defined(INET6) - *v6 = V_ip6_forwarding; -#endif -#if defined(INET) - *v4 = V_ipforwarding; -#endif - CURVNET_RESTORE(); -} - -/* - * Returns true if it's possible this packet could be LROed. - * if it returns false, it is guaranteed that tcp_lro_rx() - * would not return zero. - */ -static bool -iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) -{ - struct ether_header *eh; - - eh = mtod(m, struct ether_header *); - switch (eh->ether_type) { -#if defined(INET6) - case htons(ETHERTYPE_IPV6): - return (!v6_forwarding); -#endif -#if defined(INET) - case htons(ETHERTYPE_IP): - return (!v4_forwarding); -#endif - } - - return (false); -} -#else -static void -iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) -{ -} -#endif - static void _task_fn_rx_watchdog(void *context) { @@ -2954,19 +2915,19 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; +#if defined(INET6) || defined(INET) int lro_enabled; - bool v4_forwarding, v6_forwarding, lro_possible; +#endif uint8_t retval = 0; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ - struct mbuf *m, *mh, *mt, *mf; + struct mbuf *m, *mh, *mt; NET_EPOCH_ASSERT(); - lro_possible = v4_forwarding = v6_forwarding = false; ifp = ctx->ifc_ifp; mh = mt = NULL; MPASS(budget > 0); @@ -2982,6 +2943,10 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) return (retval); } +#if defined(INET6) || defined(INET) + lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); +#endif + /* pfil needs the vnet to be set */ CURVNET_SET_QUIET(if_getvnet(ifp)); for (budget_left = budget; budget_left > 0 && avail > 0;) { @@ -3026,7 +2991,17 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) if (__predict_false(m == NULL)) continue; - /* imm_pkt: -- cxgb */ +#ifndef __NO_STRICT_ALIGNMENT + if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) + continue; +#endif +#if defined(INET6) || defined(INET) + if (lro_enabled) { + tcp_lro_queue_mbuf(&rxq->ifr_lc, m); + continue; + } +#endif + if (mh == NULL) mh = mt = m; else { @@ -3039,49 +3014,8 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) retval |= iflib_fl_refill_all(ctx, fl); - lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); - if (lro_enabled) - iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); - mt = mf = NULL; - while (mh != NULL) { - m = mh; - mh = mh->m_nextpkt; - m->m_nextpkt = NULL; -#ifndef __NO_STRICT_ALIGNMENT - if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) - continue; -#endif -#if defined(INET6) || defined(INET) - if (lro_enabled) { - if (!lro_possible) { - lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); - if (lro_possible && mf != NULL) { - if_input(ifp, mf); - DBG_COUNTER_INC(rx_if_input); - mt = mf = NULL; - } - } - if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC | CSUM_L4_VALID)) == - (CSUM_L4_CALC | CSUM_L4_VALID)) { - if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) - continue; - } - } -#endif - if (lro_possible) { - if_input(ifp, m); - DBG_COUNTER_INC(rx_if_input); - continue; - } - - if (mf == NULL) - mf = m; - if (mt != NULL) - mt->m_nextpkt = m; - mt = m; - } - if (mf != NULL) { - if_input(ifp, mf); + if (mh != NULL) { + if_input(ifp, mh); DBG_COUNTER_INC(rx_if_input); } @@ -3372,42 +3306,28 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) #ifdef INET case ETHERTYPE_IP: { - struct mbuf *n; - struct ip *ip = NULL; - struct tcphdr *th = NULL; - int minthlen; + struct ip *ip; + struct tcphdr *th; + uint8_t hlen; - minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); - if (__predict_false(m->m_len < minthlen)) { - /* - * if this code bloat is causing too much of a hit - * move it to a separate function and mark it noinline - */ - if (m->m_len == pi->ipi_ehdrlen) { - n = m->m_next; - MPASS(n); - if (n->m_len >= sizeof(*ip)) { - ip = (struct ip *)n->m_data; - if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) - th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); - } else { - txq->ift_pullups++; - if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) - return (ENOMEM); - ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); - } - } else { - txq->ift_pullups++; - if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) - return (ENOMEM); - ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); - if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) - th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); - } - } else { - ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); - if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) - th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); + hlen = pi->ipi_ehdrlen + sizeof(*ip); + if (__predict_false(m->m_len < hlen)) { + txq->ift_pullups++; + if (__predict_false((m = m_pullup(m, hlen)) == NULL)) + return (ENOMEM); + } + ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); + hlen = pi->ipi_ehdrlen + (ip->ip_hl << 2); + if (ip->ip_p == IPPROTO_TCP) { + hlen += sizeof(*th); + th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); + } else if (ip->ip_p == IPPROTO_UDP) { + hlen += sizeof(struct udphdr); + } + if (__predict_false(m->m_len < hlen)) { + txq->ift_pullups++; + if ((m = m_pullup(m, hlen)) == NULL) + return (ENOMEM); } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; @@ -3417,12 +3337,6 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) /* TCP checksum offload may require TCP header length */ if (IS_TX_OFFLOAD4(pi)) { if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) { - if (__predict_false(th == NULL)) { - txq->ift_pullups++; - if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) - return (ENOMEM); - th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); - } pi->ipi_tcp_hflags = tcp_get_flags(th); pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; @@ -3726,13 +3640,16 @@ defrag: * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { - txq->ift_no_desc_avail++; - bus_dmamap_unload(buf_tag, map); - DBG_COUNTER_INC(encap_txq_avail_fail); - DBG_COUNTER_INC(encap_txd_encap_fail); - if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) - GROUPTASK_ENQUEUE(&txq->ift_task); - return (ENOBUFS); + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { + txq->ift_no_desc_avail++; + bus_dmamap_unload(buf_tag, map); + DBG_COUNTER_INC(encap_txq_avail_fail); + DBG_COUNTER_INC(encap_txd_encap_fail); + if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) + GROUPTASK_ENQUEUE(&txq->ift_task); + return (ENOBUFS); + } } /* * On Intel cards we can greatly reduce the number of TX interrupts @@ -4105,6 +4022,12 @@ _task_fn_tx(void *context) netmap_tx_irq(ifp, txq->ift_id)) goto skip_ifmp; #endif + if (ctx->ifc_sysctl_simple_tx) { + mtx_lock(&txq->ift_mtx); + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + mtx_unlock(&txq->ift_mtx); + goto skip_ifmp; + } #ifdef ALTQ if (if_altq_is_enabled(ifp)) iflib_altq_if_start(ifp); @@ -4118,9 +4041,8 @@ _task_fn_tx(void *context) */ if (abdicate) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); -#ifdef DEV_NETMAP + skip_ifmp: -#endif if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else @@ -5222,7 +5144,14 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; - + if (ctx->ifc_sysctl_simple_tx) { +#ifndef ALTQ + if_settransmitfn(ifp, iflib_simple_transmit); + device_printf(dev, "using simple if_transmit\n"); +#else + device_printf(dev, "ALTQ prevents using simple if_transmit\n"); +#endif + } iflib_reset_qvalues(ctx); IFNET_WLOCK(); CTX_LOCK(ctx); @@ -6857,6 +6786,9 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, "driver version"); + SYSCTL_ADD_BOOL(ctx_list, oid_list, OID_AUTO, "simple_tx", + CTLFLAG_RDTUN, &ctx->ifc_sysctl_simple_tx, 0, + "use simple tx ring"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); @@ -7179,3 +7111,48 @@ iflib_debugnet_poll(if_t ifp, int count) return (0); } #endif /* DEBUGNET */ + + +static inline iflib_txq_t +iflib_simple_select_queue(if_ctx_t ctx, struct mbuf *m) +{ + int qidx; + + if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) + qidx = QIDX(ctx, m); + else + qidx = NTXQSETS(ctx) + FIRST_QSET(ctx) - 1; + return (&ctx->ifc_txqs[qidx]); +} + +static int +iflib_simple_transmit(if_t ifp, struct mbuf *m) +{ + if_ctx_t ctx; + iflib_txq_t txq; + int error; + int bytes_sent = 0, pkt_sent = 0, mcast_sent = 0; + + + ctx = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (EBUSY); + txq = iflib_simple_select_queue(ctx, m); + mtx_lock(&txq->ift_mtx); + error = iflib_encap(txq, &m); + if (error == 0) { + pkt_sent++; + bytes_sent += m->m_pkthdr.len; + mcast_sent += !!(m->m_flags & M_MCAST); + (void)iflib_txd_db_check(txq, true); + } + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + mtx_unlock(&txq->ift_mtx); + if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); + if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); + if (mcast_sent) + if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); + + return (error); +} diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index c397f0b67896..d6c13470f2eb 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1020,7 +1020,7 @@ struct pf_state_scrub_export { #define PF_SCRUB_FLAG_VALID 0x01 uint8_t scrub_flag; uint32_t pfss_ts_mod; /* timestamp modulation */ -}; +} __packed; struct pf_state_key_export { struct pf_addr addr[2]; @@ -1037,7 +1037,7 @@ struct pf_state_peer_export { uint8_t state; /* active state level */ uint8_t wscale; /* window scaling factor */ uint8_t dummy[6]; -}; +} __packed; _Static_assert(sizeof(struct pf_state_peer_export) == 32, "size incorrect"); struct pf_state_export { @@ -1179,26 +1179,6 @@ struct pf_test_ctx { * Unified state structures for pulling states out of the kernel * used by pfsync(4) and the pf(4) ioctl. */ -struct pfsync_state_scrub { - u_int16_t pfss_flags; - u_int8_t pfss_ttl; /* stashed TTL */ -#define PFSYNC_SCRUB_FLAG_VALID 0x01 - u_int8_t scrub_flag; - u_int32_t pfss_ts_mod; /* timestamp modulation */ -} __packed; - -struct pfsync_state_peer { - struct pfsync_state_scrub scrub; /* state is scrubbed */ - u_int32_t seqlo; /* Max sequence number sent */ - u_int32_t seqhi; /* Max the other end ACKd + win */ - u_int32_t seqdiff; /* Sequence number modulator */ - u_int16_t max_win; /* largest window (pre scaling) */ - u_int16_t mss; /* Maximum segment size option */ - u_int8_t state; /* active state level */ - u_int8_t wscale; /* window scaling factor */ - u_int8_t pad[6]; -} __packed; - struct pfsync_state_key { struct pf_addr addr[2]; u_int16_t port[2]; @@ -1208,8 +1188,8 @@ struct pfsync_state_1301 { u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; + struct pf_state_peer_export src; + struct pf_state_peer_export dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; @@ -1235,8 +1215,8 @@ struct pfsync_state_1400 { u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; + struct pf_state_peer_export src; + struct pf_state_peer_export dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; @@ -1323,39 +1303,10 @@ extern pflog_packet_t *pflog_packet_ptr; /* for copies to/from network byte order */ /* ioctl interface also uses network byte order */ -#define pf_state_peer_hton(s,d) do { \ - (d)->seqlo = htonl((s)->seqlo); \ - (d)->seqhi = htonl((s)->seqhi); \ - (d)->seqdiff = htonl((s)->seqdiff); \ - (d)->max_win = htons((s)->max_win); \ - (d)->mss = htons((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub) { \ - (d)->scrub.pfss_flags = \ - htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ - (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ - (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ - (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ - } \ -} while (0) - -#define pf_state_peer_ntoh(s,d) do { \ - (d)->seqlo = ntohl((s)->seqlo); \ - (d)->seqhi = ntohl((s)->seqhi); \ - (d)->seqdiff = ntohl((s)->seqdiff); \ - (d)->max_win = ntohs((s)->max_win); \ - (d)->mss = ntohs((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ - (d)->scrub != NULL) { \ - (d)->scrub->pfss_flags = \ - ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ - (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ - (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ - } \ -} while (0) +void pf_state_peer_hton(const struct pf_state_peer *, + struct pf_state_peer_export *); +void pf_state_peer_ntoh(const struct pf_state_peer_export *, + struct pf_state_peer *); #define pf_state_counter_hton(s,d) do { \ d[0] = htonl((s>>32)&0xffffffff); \ |