19 files changed, 484 insertions, 240 deletions
diff --git a/sys/dev/sdio/sdiob.c b/sys/dev/sdio/sdiob.c
index 4ec2058fa2e4..cb2cc0da6b77 100644
--- a/sys/dev/sdio/sdiob.c
+++ b/sys/dev/sdio/sdiob.c
@@ -150,7 +150,7 @@ sdiob_rw_direct_sc(struct sdiob_softc *sc, uint8_t fn, uint32_t addr, bool wr,
 		sc->ccb = xpt_alloc_ccb();
 	else
 		memset(sc->ccb, 0, sizeof(*sc->ccb));
-	xpt_setup_ccb(&sc->ccb->ccb_h, sc->periph->path, CAM_PRIORITY_NONE);
+	xpt_setup_ccb(&sc->ccb->ccb_h, sc->periph->path, CAM_PRIORITY_NORMAL);
 	CAM_DEBUG(sc->ccb->ccb_h.path, CAM_DEBUG_TRACE,
 	    ("%s(fn=%d, addr=%#02x, wr=%d, *val=%#02x)\n", __func__,
 	    fn, addr, wr, *val));
@@ -250,7 +250,7 @@ sdiob_rw_extended_cam(struct sdiob_softc *sc, uint8_t fn, uint32_t addr,
 		sc->ccb = xpt_alloc_ccb();
 	else
 		memset(sc->ccb, 0, sizeof(*sc->ccb));
-	xpt_setup_ccb(&sc->ccb->ccb_h, sc->periph->path, CAM_PRIORITY_NONE);
+	xpt_setup_ccb(&sc->ccb->ccb_h, sc->periph->path, CAM_PRIORITY_NORMAL);
 	CAM_DEBUG(sc->ccb->ccb_h.path, CAM_DEBUG_TRACE,
 	    ("%s(fn=%d addr=%#0x wr=%d b_count=%u blksz=%u buf=%p incr=%d)\n",
 	    __func__, fn, addr, wr, b_count, blksz, buffer, incaddr));
@@ -977,9 +977,6 @@ sdiobdiscover(void *context, int pending)
 
 	if (sc->ccb == NULL)
 		sc->ccb = xpt_alloc_ccb();
-	else
-		memset(sc->ccb, 0, sizeof(*sc->ccb));
-	xpt_setup_ccb(&sc->ccb->ccb_h, periph->path, CAM_PRIORITY_NONE);
 
 	/*
 	 * Read CCCR and FBR of each function, get manufacturer and device IDs,
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index 32872e8f3f3a..b90ce60ec664 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -1219,36 +1219,20 @@ fuse_vnop_getattr(struct vop_getattr_args *ap)
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
-
 	int err = 0;
-	int dataflags;
-
-	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
-
-	/* Note that we are not bailing out on a dead file system just yet. */
 
-	if (!(dataflags & FSESS_INITED)) {
-		if (!vnode_isvroot(vp)) {
-			fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
-			return (EXTERROR(ENOTCONN, "FUSE daemon is not "
-			    "initialized"));
-		} else {
-			goto fake;
-		}
-	}
 	err = fuse_internal_getattr(vp, vap, cred, td);
 	if (err == ENOTCONN && vnode_isvroot(vp)) {
-		/* see comment in fuse_vfsop_statfs() */
-		goto fake;
-	} else {
-		return err;
+		/*
+		 * We want to seem a legitimate fs even if the daemon is dead,
+		 * so that, eg., we can still do path based unmounting after
+		 * the daemon dies.
+		 */
+		err = 0;
+		bzero(vap, sizeof(*vap));
+		vap->va_type = vnode_vtype(vp);
 	}
-
-fake:
-	bzero(vap, sizeof(*vap));
-	vap->va_type = vnode_vtype(vp);
-
-	return 0;
+	return err;
 }
 
 /*
diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c
index 7ee3b9e2048a..7351e9cb6313 100644
--- a/sys/kern/kern_racct.c
+++ b/sys/kern/kern_racct.c
@@ -96,6 +96,13 @@ static void racct_sub_cred_locked(struct ucred *cred, int resource,
 		uint64_t amount);
 static void racct_add_cred_locked(struct ucred *cred, int resource,
 		uint64_t amount);
+static int racct_set_locked(struct proc *p, int resource, uint64_t amount,
+                int force);
+static void racct_updatepcpu_locked(struct proc *p);
+static void racct_updatepcpu_racct_locked(struct racct *racct);
+static void racct_updatepcpu_containers(void);
+static void racct_settime_locked(struct proc *p, bool exit);
+static void racct_zeropcpu_locked(struct proc *p);
 
 SDT_PROVIDER_DEFINE(racct);
 SDT_PROBE_DEFINE3(racct, , rusage, add,
@@ -308,68 +315,6 @@ fixpt_t ccpu_exp[] = {
 
 #define	CCPU_EXP_MAX	110
 
-/*
- * This function is analogical to the getpcpu() function in the ps(1) command.
- * They should both calculate in the same way so that the racct %cpu
- * calculations are consistent with the values shown by the ps(1) tool.
- * The calculations are more complex in the 4BSD scheduler because of the value
- * of the ccpu variable.  In ULE it is defined to be zero which saves us some
- * work.
- */
-static uint64_t
-racct_getpcpu(struct proc *p, u_int pcpu)
-{
-	u_int swtime;
-#ifdef SCHED_4BSD
-	fixpt_t pctcpu, pctcpu_next;
-#endif
-	fixpt_t p_pctcpu;
-	struct thread *td;
-
-	ASSERT_RACCT_ENABLED();
-	KASSERT((p->p_flag & P_IDLEPROC) == 0,
-	    ("racct_getpcpu: idle process %p", p));
-
-	swtime = (ticks - p->p_swtick) / hz;
-
-	/*
-	 * For short-lived processes, the sched_pctcpu() returns small
-	 * values even for cpu intensive processes.  Therefore we use
-	 * our own estimate in this case.
-	 */
-	if (swtime < RACCT_PCPU_SECS)
-		return (pcpu);
-
-	p_pctcpu = 0;
-	FOREACH_THREAD_IN_PROC(p, td) {
-		thread_lock(td);
-#ifdef SCHED_4BSD
-		pctcpu = sched_pctcpu(td);
-		/* Count also the yet unfinished second. */
-		pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT;
-		pctcpu_next += sched_pctcpu_delta(td);
-		p_pctcpu += max(pctcpu, pctcpu_next);
-#else
-		/*
-		 * In ULE the %cpu statistics are updated on every
-		 * sched_pctcpu() call.  So special calculations to
-		 * account for the latest (unfinished) second are
-		 * not needed.
-		 */
-		p_pctcpu += sched_pctcpu(td);
-#endif
-		thread_unlock(td);
-	}
-
-#ifdef SCHED_4BSD
-	if (swtime <= CCPU_EXP_MAX)
-		return ((100 * (uint64_t)p_pctcpu * 1000000) /
-		    (FSCALE - ccpu_exp[swtime]));
-#endif
-
-	return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE);
-}
-
 static void
 racct_add_racct(struct racct *dest, const struct racct *src)
 {
@@ -499,19 +444,6 @@ racct_adjust_resource(struct racct *racct, int resource,
 		    ("%s: resource %d usage < 0", __func__, resource));
 		racct->r_resources[resource] = 0;
 	}
-
-	/*
-	 * There are some cases where the racct %cpu resource would grow
-	 * beyond 100% per core.  For example in racct_proc_exit() we add
-	 * the process %cpu usage to the ucred racct containers.  If too
-	 * many processes terminated in a short time span, the ucred %cpu
-	 * resource could grow too much.  Also, the 4BSD scheduler sometimes
-	 * returns for a thread more than 100% cpu usage. So we set a sane
-	 * boundary here to 100% * the maximum number of CPUs.
-	 */
-	if ((resource == RACCT_PCTCPU) &&
-	    (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU))
-		racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU;
 }
 
 static int
@@ -635,10 +567,44 @@ racct_add_buf(struct proc *p, const struct buf *bp, int is_write)
 	RACCT_UNLOCK();
 }
 
+static void
+racct_settime_locked(struct proc *p, bool exit)
+{
+	struct thread *td;
+	struct timeval wallclock;
+	uint64_t runtime;
+
+	ASSERT_RACCT_ENABLED();
+	RACCT_LOCK_ASSERT();
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	if (exit) {
+		/*
+		 * proc_reap() has already calculated rux
+		 * and added crux to rux.
+		 */
+		runtime = cputick2usec(p->p_rux.rux_runtime -
+		    p->p_crux.rux_runtime);
+	} else {
+		PROC_STATLOCK(p);
+		FOREACH_THREAD_IN_PROC(p, td)
+			ruxagg(p, td);
+		PROC_STATUNLOCK(p);
+		runtime = cputick2usec(p->p_rux.rux_runtime);
+	}
+	microuptime(&wallclock);
+	timevalsub(&wallclock, &p->p_stats->p_start);
+
+	racct_set_locked(p, RACCT_CPU, runtime, 0);
+	racct_set_locked(p, RACCT_WALLCLOCK,
+	    (uint64_t)wallclock.tv_sec * 1000000 +
+	    wallclock.tv_usec, 0);
+}
+
 static int
 racct_set_locked(struct proc *p, int resource, uint64_t amount, int force)
 {
-	int64_t old_amount, decayed_amount, diff_proc, diff_cred;
+	int64_t old_amount, diff_proc, diff_cred;
 #ifdef RCTL
 	int error;
 #endif
@@ -655,17 +621,7 @@ racct_set_locked(struct proc *p, int resource, uint64_t amount, int force)
 	 * The diffs may be negative.
 	 */
 	diff_proc = amount - old_amount;
-	if (resource == RACCT_PCTCPU) {
-		/*
-		 * Resources in per-credential racct containers may decay.
-		 * If this is the case, we need to calculate the difference
-		 * between the new amount and the proportional value of the
-		 * old amount that has decayed in the ucred racct containers.
-		 */
-		decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE;
-		diff_cred = amount - decayed_amount;
-	} else
-		diff_cred = diff_proc;
+	diff_cred = diff_proc;
 #ifdef notyet
 	KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource),
 	    ("%s: usage of non-droppable resource %d dropping", __func__,
@@ -908,8 +864,6 @@ racct_proc_fork(struct proc *parent, struct proc *child)
 		goto out;
 #endif
 
-	/* Init process cpu time. */
-	child->p_prev_runtime = 0;
 	child->p_throttled = 0;
 
 	/*
@@ -964,37 +918,16 @@ racct_proc_fork_done(struct proc *child)
 void
 racct_proc_exit(struct proc *p)
 {
-	struct timeval wallclock;
-	uint64_t pct_estimate, pct, runtime;
 	int i;
 
 	if (!racct_enable)
 		return;
 
 	PROC_LOCK(p);
-	/*
-	 * We don't need to calculate rux, proc_reap() has already done this.
-	 */
-	runtime = cputick2usec(p->p_rux.rux_runtime);
-#ifdef notyet
-	KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
-#else
-	if (runtime < p->p_prev_runtime)
-		runtime = p->p_prev_runtime;
-#endif
-	microuptime(&wallclock);
-	timevalsub(&wallclock, &p->p_stats->p_start);
-	if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) {
-		pct_estimate = (1000000 * runtime * 100) /
-		    ((uint64_t)wallclock.tv_sec * 1000000 +
-		    wallclock.tv_usec);
-	} else
-		pct_estimate = 0;
-	pct = racct_getpcpu(p, pct_estimate);
-
 	RACCT_LOCK();
-	racct_set_locked(p, RACCT_CPU, runtime, 0);
-	racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct);
+
+	racct_settime_locked(p, true);
+	racct_zeropcpu_locked(p);
 
 	KASSERT(p->p_racct->r_resources[RACCT_RSS] == 0,
 	    ("process reaped with %ju allocated for RSS\n",
@@ -1068,6 +1001,10 @@ racct_move(struct racct *dest, struct racct *src)
 	RACCT_LOCK();
 	racct_add_racct(dest, src);
 	racct_sub_racct(src, src);
+	dest->r_runtime = src->r_runtime;
+	dest->r_time = src->r_time;
+	src->r_runtime = 0;
+	timevalsub(&src->r_time, &src->r_time);
 	RACCT_UNLOCK();
 }
 
@@ -1170,8 +1107,6 @@ racct_proc_wakeup(struct proc *p)
 static void
 racct_decay_callback(struct racct *racct, void *dummy1, void *dummy2)
 {
-	int64_t r_old, r_new;
-
 	ASSERT_RACCT_ENABLED();
 	RACCT_LOCK_ASSERT();
 
@@ -1181,15 +1116,6 @@ racct_decay_callback(struct racct *racct, void *dummy1, void *dummy2)
 	rctl_throttle_decay(racct, RACCT_READIOPS);
 	rctl_throttle_decay(racct, RACCT_WRITEIOPS);
 #endif
-
-	r_old = racct->r_resources[RACCT_PCTCPU];
-
-	/* If there is nothing to decay, just exit. */
-	if (r_old <= 0)
-		return;
-
-	r_new = r_old * RACCT_DECAY_FACTOR / FSCALE;
-	racct->r_resources[RACCT_PCTCPU] = r_new;
 }
 
 static void
@@ -1221,15 +1147,105 @@ racct_decay(void)
 }
 
 static void
+racct_updatepcpu_racct_locked(struct racct *racct)
+{
+	struct timeval diff;
+	uint64_t elapsed;
+	uint64_t runtime;
+	uint64_t newpcpu;
+	uint64_t oldpcpu;
+
+	ASSERT_RACCT_ENABLED();
+	RACCT_LOCK_ASSERT();
+
+	/* Difference between now and previously-recorded time. */
+	microuptime(&diff);
+	timevalsub(&diff, &racct->r_time);
+	elapsed = (uint64_t)diff.tv_sec * 1000000 + diff.tv_usec;
+
+	/* Difference between current and previously-recorded runtime. */
+	runtime = racct->r_resources[RACCT_CPU] - racct->r_runtime;
+
+	newpcpu = runtime * 100 * 1000000 / elapsed;
+	oldpcpu = racct->r_resources[RACCT_PCTCPU];
+	/*
+	 * This calculation is equivalent to
+	 *    (1 - 0.3) * newpcpu + 0.3 * oldpcpu
+	 * where RACCT_DECAY_FACTOR = 0.3 * FSCALE.
+	 */
+	racct->r_resources[RACCT_PCTCPU] = ((FSCALE - RACCT_DECAY_FACTOR) *
+	    newpcpu + RACCT_DECAY_FACTOR * oldpcpu) / FSCALE;
+	if (racct->r_resources[RACCT_PCTCPU] >
+	    100 * 1000000 * (uint64_t)mp_ncpus)
+		racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 *
+		    (uint64_t)mp_ncpus;
+
+	/* Record current times. */
+	racct->r_runtime = racct->r_resources[RACCT_CPU];
+	timevaladd(&racct->r_time, &diff);
+}
+
+static void
+racct_zeropcpu_locked(struct proc *p)
+{
+	ASSERT_RACCT_ENABLED();
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	p->p_racct->r_resources[RACCT_PCTCPU] = 0;
+}
+
+static void
+racct_updatepcpu_locked(struct proc *p)
+{
+	ASSERT_RACCT_ENABLED();
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	racct_updatepcpu_racct_locked(p->p_racct);
+}
+
+static void
+racct_updatepcpu_pre(void)
+{
+
+	RACCT_LOCK();
+}
+
+static void
+racct_updatepcpu_post(void)
+{
+
+	RACCT_UNLOCK();
+}
+
+static void
+racct_updatepcpu_racct_callback(struct racct *racct, void *dummy1, void *dummy2)
+{
+	racct_updatepcpu_racct_locked(racct);
+}
+
+static void
+racct_updatepcpu_containers(void)
+{
+	ASSERT_RACCT_ENABLED();
+
+	ui_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre,
+	    racct_updatepcpu_post, NULL, NULL);
+	loginclass_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre,
+	    racct_updatepcpu_post, NULL, NULL);
+	prison_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre,
+	    racct_updatepcpu_post, NULL, NULL);
+}
+
+static void
 racctd(void)
 {
-	struct thread *td;
 	struct proc *p;
-	struct timeval wallclock;
-	uint64_t pct, pct_estimate, runtime;
+	struct proc *idle;
 
 	ASSERT_RACCT_ENABLED();
 
+	idle = STAILQ_FIRST(&cpuhead)->pc_idlethread->td_proc;
+
 	for (;;) {
 		racct_decay();
 
@@ -1237,36 +1253,16 @@ racctd(void)
 
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
+			if (p == idle) {
+				PROC_UNLOCK(p);
+				continue;
+			}
 			if (p->p_state != PRS_NORMAL ||
 			    (p->p_flag & P_IDLEPROC) != 0) {
-				if (p->p_state == PRS_ZOMBIE)
-					racct_set(p, RACCT_PCTCPU, 0);
 				PROC_UNLOCK(p);
 				continue;
 			}
 
-			microuptime(&wallclock);
-			timevalsub(&wallclock, &p->p_stats->p_start);
-			PROC_STATLOCK(p);
-			FOREACH_THREAD_IN_PROC(p, td)
-				ruxagg(p, td);
-			runtime = cputick2usec(p->p_rux.rux_runtime);
-			PROC_STATUNLOCK(p);
-#ifdef notyet
-			KASSERT(runtime >= p->p_prev_runtime,
-			    ("runtime < p_prev_runtime"));
-#else
-			if (runtime < p->p_prev_runtime)
-				runtime = p->p_prev_runtime;
-#endif
-			p->p_prev_runtime = runtime;
-			if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) {
-				pct_estimate = (1000000 * runtime * 100) /
-				    ((uint64_t)wallclock.tv_sec * 1000000 +
-				    wallclock.tv_usec);
-			} else
-				pct_estimate = 0;
-			pct = racct_getpcpu(p, pct_estimate);
 			RACCT_LOCK();
 #ifdef RCTL
 			rctl_throttle_decay(p->p_racct, RACCT_READBPS);
@@ -1274,11 +1270,8 @@ racctd(void)
 			rctl_throttle_decay(p->p_racct, RACCT_READIOPS);
 			rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS);
 #endif
-			racct_set_locked(p, RACCT_PCTCPU, pct, 1);
-			racct_set_locked(p, RACCT_CPU, runtime, 0);
-			racct_set_locked(p, RACCT_WALLCLOCK,
-			    (uint64_t)wallclock.tv_sec * 1000000 +
-			    wallclock.tv_usec, 0);
+			racct_settime_locked(p, false);
+			racct_updatepcpu_locked(p);
 			RACCT_UNLOCK();
 			PROC_UNLOCK(p);
 		}
@@ -1306,6 +1299,8 @@ racctd(void)
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
+
+		racct_updatepcpu_containers();
 		pause("-", hz);
 	}
 }
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 46e54339a171..3aed54c58e04 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -259,6 +259,7 @@ struct bridge_iflist {
 	struct epoch_context	bif_epoch_ctx;
 	ether_vlanid_t		bif_pvid;	/* port vlan id */
 	ifbvlan_set_t		bif_vlan_set;	/* if allowed tagged vlans */
+	uint16_t		bif_vlanproto;	/* vlan protocol */
 };
 
 /*
@@ -423,6 +424,7 @@ static int	bridge_ioctl_gflags(struct bridge_softc *, void *);
 static int	bridge_ioctl_sflags(struct bridge_softc *, void *);
 static int	bridge_ioctl_gdefpvid(struct bridge_softc *, void *);
 static int	bridge_ioctl_sdefpvid(struct bridge_softc *, void *);
+static int	bridge_ioctl_svlanproto(struct bridge_softc *, void *);
 static int	bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
 		    int);
 #ifdef INET
@@ -654,6 +656,9 @@ static const struct bridge_control bridge_control_table[] = {
 
 	{ bridge_ioctl_sdefpvid,	sizeof(struct ifbrparam),
 	  BC_F_COPYIN|BC_F_SUSER },
+
+	{ bridge_ioctl_svlanproto,	sizeof(struct ifbreq),
+	  BC_F_COPYIN|BC_F_SUSER },
 };
 static const int bridge_control_table_size = nitems(bridge_control_table);
 
@@ -1494,8 +1499,11 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 	bif->bif_ifp = ifs;
 	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
 	bif->bif_savedcaps = ifs->if_capenable;
+	bif->bif_vlanproto = ETHERTYPE_VLAN;
 	if (sc->sc_flags & IFBRF_VLANFILTER)
 		bif->bif_pvid = sc->sc_defpvid;
+	if (sc->sc_flags & IFBRF_DEFQINQ)
+		bif->bif_flags |= IFBIF_QINQ;
 
 	/*
 	 * Assign the interface's MAC address to the bridge if it's the first
@@ -1577,6 +1585,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
 	req->ifbr_addrmax = bif->bif_addrmax;
 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
 	req->ifbr_pvid = bif->bif_pvid;
+	req->ifbr_vlanproto = bif->bif_vlanproto;
 
 	/* Copy STP state options as flags */
 	if (bp->bp_operedge)
@@ -2252,6 +2261,24 @@ bridge_ioctl_sdefpvid(struct bridge_softc *sc, void *arg)
 	return (0);
 }
 
+static int
+bridge_ioctl_svlanproto(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+	struct bridge_iflist *bif;
+
+	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+	if (bif == NULL)
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
+
+	if (req->ifbr_vlanproto != ETHERTYPE_VLAN &&
+	    req->ifbr_vlanproto != ETHERTYPE_QINQ)
+		return (EXTERROR(EINVAL, "Invalid VLAN protocol"));
+
+	bif->bif_vlanproto = req->ifbr_vlanproto;
+
+	return (0);
+}
 /*
  * bridge_ifdetach:
  *
@@ -2395,12 +2422,15 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
 		}
 
 		/*
-		 * If underlying interface can not do VLAN tag insertion itself
-		 * then attach a packet tag that holds it.
+		 * There are two cases where we have to insert our own tag:
+		 * if the member interface doesn't support hardware tagging,
+		 * or if the tag proto is not 802.1q.
 		 */
 		if ((m->m_flags & M_VLANTAG) &&
-		    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
-			m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
+		    ((dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0 ||
+		      bif->bif_vlanproto != ETHERTYPE_VLAN)) {
+			m = ether_vlanencap_proto(m, m->m_pkthdr.ether_vtag,
+			    bif->bif_vlanproto);
 			if (m == NULL) {
 				if_printf(dst_ifp,
 				    "unable to prepend VLAN header\n");
@@ -2828,9 +2858,29 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 
 	NET_EPOCH_ASSERT();
 
+	/* We need the Ethernet header later, so make sure we have it now. */
+	if (m->m_len < ETHER_HDR_LEN) {
+		m = m_pullup(m, ETHER_HDR_LEN);
+		if (m == NULL) {
+			if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
+			m_freem(m);
+			return (NULL);
+		}
+	}
+
 	eh = mtod(m, struct ether_header *);
 	vlan = VLANTAGOF(m);
 
+	/*
+	 * If this frame has a VLAN tag and the receiving interface has a
+	 * vlan(4) trunk, then it is is destined for vlan(4), not for us.
+	 * This means if vlan(4) and bridge(4) are configured on the same
+	 * interface, vlan(4) is preferred, which is what users typically
+	 * expect.
+	 */
+	if (vlan != DOT1Q_VID_NULL && ifp->if_vlantrunk != NULL)
+		return (m);
+
 	bif = ifp->if_bridge;
 	if (bif)
 		sc = bif->bif_sc;
@@ -3031,19 +3081,13 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 	do { GRAB_OUR_PACKETS(bifp) } while (0);
 
 	/*
-	 * Check the interface the packet arrived on.  For tagged frames,
-	 * we need to do this even if member_ifaddrs is disabled because
-	 * vlan(4) might need to handle the traffic.
+	 * If member_ifaddrs is enabled, see if the packet is destined for
+	 * one of the members' addresses.
 	 */
-	if (V_member_ifaddrs || (vlan && ifp->if_vlantrunk))
+	if (V_member_ifaddrs) {
+		/* Check the interface the packet arrived on. */
 		do { GRAB_OUR_PACKETS(ifp) } while (0);
 
-	/*
-	 * We only need to check other members interface if member_ifaddrs
-	 * is enabled; otherwise we should have never traffic destined for
-	 * a member's lladdr.
-	 */
-	if (V_member_ifaddrs) {
 		CK_LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) {
 			GRAB_OUR_PACKETS(bif2->bif_ifp)
 		}
@@ -3250,6 +3294,18 @@ bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m)
 	if ((sbif->bif_sc->sc_flags & IFBRF_VLANFILTER) == 0)
 		return (true);
 
+	/* If Q-in-Q is disabled, check for stacked tags. */
+	if ((sbif->bif_flags & IFBIF_QINQ) == 0) {
+		struct ether_header *eh;
+		uint16_t proto;
+
+		eh = mtod(m, struct ether_header *);
+		proto = ntohs(eh->ether_type);
+
+		if (proto == ETHERTYPE_VLAN || proto == ETHERTYPE_QINQ)
+			return (false);
+	}
+
 	if (vlan == DOT1Q_VID_NULL) {
 		/*
 		 * The frame doesn't have a tag.  If the interface does not
diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h
index 6718c5ebcc34..b0f579f688ac 100644
--- a/sys/net/if_bridgevar.h
+++ b/sys/net/if_bridgevar.h
@@ -131,13 +131,15 @@
 #define	BRDGSFLAGS		35	/* set bridge flags (ifbrparam) */
 #define	BRDGGDEFPVID		36	/* get default pvid (ifbrparam) */
 #define	BRDGSDEFPVID		37	/* set default pvid (ifbrparam) */
+#define	BRDGSIFVLANPROTO	38	/* set if vlan protocol (ifbreq) */
 
 /* BRDGSFLAGS, Bridge flags (non-interface-specific) */
 typedef uint32_t ifbr_flags_t;
 
 #define	IFBRF_VLANFILTER	(1U<<0)	/* VLAN filtering enabled */
+#define	IFBRF_DEFQINQ		(1U<<1)	/* 802.1ad Q-in-Q allowed by default */
 
-#define	IFBRFBITS	"\020\01VLANFILTER"
+#define	IFBRFBITS	"\020\01VLANFILTER\02DEFQINQ"
 
 /*
  * Generic bridge control request.
@@ -156,6 +158,7 @@ struct ifbreq {
 	uint32_t	ifbr_addrmax;		/* member if addr max */
 	uint32_t	ifbr_addrexceeded;	/* member if addr violations */
 	ether_vlanid_t	ifbr_pvid;		/* member if PVID */
+	uint16_t	ifbr_vlanproto;		/* member if VLAN protocol */
 	uint8_t		pad[32];
 };
 
@@ -172,12 +175,11 @@ struct ifbreq {
 #define	IFBIF_BSTP_ADMEDGE	0x0200	/* member stp admin edge enabled */
 #define	IFBIF_BSTP_ADMCOST	0x0400	/* member stp admin path cost */
 #define	IFBIF_PRIVATE		0x0800	/* if is a private segment */
-/* was	IFBIF_VLANFILTER	0x1000 */
-#define	IFBIF_QINQ		0x2000	/* if allows 802.1ad Q-in-Q */
+#define	IFBIF_QINQ		0x1000	/* if allows 802.1ad Q-in-Q */
 
 #define	IFBIFBITS	"\020\001LEARNING\002DISCOVER\003STP\004SPAN" \
 			"\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \
-			"\011AUTOPTP"
+			"\011AUTOPTP\015QINQ"
 #define	IFBIFMASK	~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \
 			IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \
 			IFBIF_BSTP_ADMCOST)	/* not saved */
@@ -252,6 +254,7 @@ struct ifbrparam {
 							 * addresses */
 #define	ifbrp_flags	ifbrp_ifbrpu.ifbrpu_int32	/* bridge flags */
 #define	ifbrp_defpvid	ifbrp_ifbrpu.ifbrpu_int16	/* default pvid */
+#define	ifbrp_vlanproto	ifbrp_ifbrpu.ifbrpu_int8	/* vlan protocol */
 
 /*
  * Bridge current operational parameters structure.
diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c
index 7051e31565d4..4d35584925a1 100644
--- a/sys/net/if_epair.c
+++ b/sys/net/if_epair.c
@@ -58,6 +58,7 @@
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
+#include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <net/bpf.h>
@@ -97,6 +98,15 @@ static unsigned int next_index = 0;
 #define	EPAIR_LOCK()			mtx_lock(&epair_n_index_mtx)
 #define	EPAIR_UNLOCK()			mtx_unlock(&epair_n_index_mtx)
 
+SYSCTL_DECL(_net_link);
+static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+	"Pair of virtual cross-over connected Ethernet-like interfaces");
+
+static bool use_ether_gen_addr = false;
+SYSCTL_BOOL(_net_link_epair, OID_AUTO, ether_gen_addr, CTLFLAG_RWTUN,
+	&use_ether_gen_addr, false,
+	"Generate MAC with FreeBSD OUI using ether_gen_addr(9)");
+
 struct epair_softc;
 struct epair_queue {
 	struct mtx		 mtx;
@@ -496,15 +506,29 @@ epair_clone_match(struct if_clone *ifc, const char *name)
 }
 
 static void
+epair_generate_mac_byname(struct epair_softc *sc, uint8_t eaddr[])
+{
+	struct ether_addr gen_eaddr;
+	int i;
+
+	ether_gen_addr_byname(if_name(sc->ifp), &gen_eaddr);
+	for (i = 0; i < ETHER_ADDR_LEN; i++)
+		eaddr[i] = gen_eaddr.octet[i];
+}
+
+static void
 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb)
 {
 	struct ifnet *ifp;
 	uint8_t eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
 
 	ifp = scb->ifp;
-	/* Copy epairNa etheraddr and change the last byte. */
-	memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN);
-	eaddr[5] = 0x0b;
+	if (!use_ether_gen_addr) {
+		/* Copy epairNa etheraddr and change the last byte. */
+		memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN);
+		eaddr[5] = 0x0b;
+	} else
+		epair_generate_mac_byname(scb, eaddr);
 	ether_ifattach(ifp, eaddr);
 
 	if_clone_addif(ifc, ifp);
@@ -719,7 +743,10 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len,
 	/* Finish initialization of interface <n>a. */
 	ifp = sca->ifp;
 	epair_setup_ifp(sca, name, unit);
-	epair_generate_mac(sca, eaddr);
+	if (!use_ether_gen_addr)
+		epair_generate_mac(sca, eaddr);
+	else
+		epair_generate_mac_byname(sca, eaddr);
 
 	ether_ifattach(ifp, eaddr);
 
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 8dae95c2cc2e..c397f0b67896 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -2338,6 +2338,10 @@ VNET_DECLARE(uma_zone_t,	 pf_udp_mapping_z);
 #define	V_pf_udp_mapping_z	 VNET(pf_udp_mapping_z)
 VNET_DECLARE(uma_zone_t,	 pf_state_scrub_z);
 #define	V_pf_state_scrub_z	 VNET(pf_state_scrub_z)
+VNET_DECLARE(uma_zone_t,	 pf_anchor_z);
+#define	V_pf_anchor_z		 VNET(pf_anchor_z)
+VNET_DECLARE(uma_zone_t,	 pf_eth_anchor_z);
+#define	V_pf_eth_anchor_z	 VNET(pf_eth_anchor_z)
 
 extern void			 pf_purge_thread(void *);
 extern void			 pf_unload_vnet_purge(void);
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index 4405098a8620..66275cb04bdd 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -283,7 +283,7 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
 	/* Check arguments. */
-	KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("rcv_start <= rcv_end"));
+	KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("SEG_GT(rcv_start, rcv_end)"));
 
 	if ((rcv_start == rcv_end) &&
 	    (tp->rcv_numsacks >= 1) &&
@@ -498,8 +498,8 @@ tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole)
 	tp->snd_numholes--;
 	atomic_subtract_int(&V_tcp_sack_globalholes, 1);
 
-	KASSERT(tp->snd_numholes >= 0, ("tp->snd_numholes >= 0"));
-	KASSERT(V_tcp_sack_globalholes >= 0, ("tcp_sack_globalholes >= 0"));
+	KASSERT(tp->snd_numholes >= 0, ("tp->snd_numholes < 0"));
+	KASSERT(V_tcp_sack_globalholes >= 0, ("tcp_sack_globalholes < 0"));
 }
 
 /*
@@ -583,6 +583,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 	 */
 	if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
 		left_edge_delta = th_ack - tp->snd_una;
+		delivered_data += left_edge_delta;
 		sack_blocks[num_sack_blks].start = tp->snd_una;
 		sack_blocks[num_sack_blks++].end = th_ack;
 		/*
@@ -590,7 +591,6 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 		 * due to DSACK blocks
 		 */
 		if (SEQ_LT(tp->snd_fack, th_ack)) {
-			delivered_data += th_ack - tp->snd_una;
 			tp->snd_fack = th_ack;
 			sack_changed = SACK_CHANGE;
 		}
@@ -684,7 +684,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 			delivered_data += sblkp->end - sblkp->start;
 			tp->sackhint.hole_bytes += temp->end - temp->start;
 			KASSERT(tp->sackhint.hole_bytes >= 0,
-			    ("sackhint hole bytes >= 0"));
+			    ("sackhint hole bytes < 0"));
 			tp->snd_fack = sblkp->end;
 			sblkp--;
 			sack_changed = SACK_NEWLOSS;
@@ -783,7 +783,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 		tp->sackhint.sack_bytes_rexmit -=
 		    (SEQ_MIN(cur->rxmit, cur->end) - cur->start);
 		KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
-		    ("sackhint bytes rtx >= 0"));
+		    ("sackhint bytes rtx < 0"));
 		sack_changed = SACK_CHANGE;
 		if (SEQ_LEQ(sblkp->start, cur->start)) {
 			/* Data acks at least the beginning of hole. */
@@ -874,13 +874,13 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 
 	if (TAILQ_EMPTY(&tp->snd_holes)) {
 		KASSERT(tp->sackhint.hole_bytes == 0,
-		    ("SACK scoreboard empty, but accounting non-zero\n"));
+		    ("SACK scoreboard empty, but sackhint hole bytes != 0"));
 		tp->sackhint.sack_bytes_rexmit = 0;
 		tp->sackhint.sacked_bytes = 0;
 		tp->sackhint.lost_bytes = 0;
 	} else {
 		KASSERT(tp->sackhint.hole_bytes > 0,
-		    ("SACK scoreboard not empty, but has no bytes\n"));
+		    ("SACK scoreboard not empty, but sackhint hole bytes <= 0"));
 		tp->sackhint.delivered_data = delivered_data;
 		tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
 		KASSERT((tp->sackhint.sacked_bytes >= 0), ("sacked_bytes < 0"));
@@ -918,9 +918,9 @@ tcp_free_sackholes(struct tcpcb *tp)
 	tp->sackhint.hole_bytes = 0;
 	tp->sackhint.lost_bytes = 0;
 
-	KASSERT(tp->snd_numholes == 0, ("tp->snd_numholes == 0"));
+	KASSERT(tp->snd_numholes == 0, ("tp->snd_numholes != 0"));
 	KASSERT(tp->sackhint.nexthole == NULL,
-		("tp->sackhint.nexthole == NULL"));
+		("tp->sackhint.nexthole != NULL"));
 }
 
 /*
@@ -1061,11 +1061,15 @@ tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
 			}
 		}
 	}
-	KASSERT(SEQ_LT(hole->start, hole->end), ("%s: hole.start >= hole.end", __func__));
+	KASSERT(SEQ_LT(hole->start, hole->end),
+	    ("%s: SEQ_GEQ(hole.start, hole.end)", __func__));
 	if (!(V_tcp_do_newsack)) {
-		KASSERT(SEQ_LT(hole->start, tp->snd_fack), ("%s: hole.start >= snd.fack", __func__));
-		KASSERT(SEQ_LT(hole->end, tp->snd_fack), ("%s: hole.end >= snd.fack", __func__));
-		KASSERT(SEQ_LT(hole->rxmit, tp->snd_fack), ("%s: hole.rxmit >= snd.fack", __func__));
+		KASSERT(SEQ_LT(hole->start, tp->snd_fack),
+		    ("%s: SEG_GEQ(hole.start, snd.fack)", __func__));
+		KASSERT(SEQ_LT(hole->end, tp->snd_fack),
+		    ("%s: SEG_GEQ(hole.end, snd.fack)", __func__));
+		KASSERT(SEQ_LT(hole->rxmit, tp->snd_fack),
+		    ("%s: SEQ_GEQ(hole.rxmit, snd.fack)", __func__));
 		if (SEQ_GEQ(hole->start, hole->end) ||
 		    SEQ_GEQ(hole->start, tp->snd_fack) ||
 		    SEQ_GEQ(hole->end, tp->snd_fack) ||
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c
index d1c4ba58bf55..fb013d3d17f0 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -509,11 +509,9 @@ void
 ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th,
     int32_t rstreason, int32_t tlen)
 {
-	if (tp != NULL) {
-		tcp_dropwithreset(m, th, tp, tlen, rstreason);
+	tcp_dropwithreset(m, th, tp, tlen, rstreason);
+	if (tp != NULL)
 		INP_WUNLOCK(tptoinpcb(tp));
-	} else
-		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
 }
 
 void
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 4801b3e2c766..3fa7789efcfe 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -621,7 +621,7 @@ static void
 pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk)
 {
 
-	switch (pd->proto) {
+	switch (pd->virtual_proto) {
 	case IPPROTO_TCP: {
 		struct tcphdr *th = &pd->hdr.tcp;
 
@@ -1254,6 +1254,21 @@ pf_initialize(void)
 		    MTX_DEF | MTX_DUPOK);
 	}
 
+	/* Anchors */
+	V_pf_anchor_z = uma_zcreate("pf anchors",
+	    sizeof(struct pf_kanchor), NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+	V_pf_limits[PF_LIMIT_ANCHORS].zone = V_pf_anchor_z;
+	uma_zone_set_max(V_pf_anchor_z, PF_ANCHOR_HIWAT);
+	uma_zone_set_warning(V_pf_anchor_z, "PF anchor limit reached");
+
+	V_pf_eth_anchor_z = uma_zcreate("pf Ethernet anchors",
+	    sizeof(struct pf_keth_anchor), NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+	V_pf_limits[PF_LIMIT_ETH_ANCHORS].zone = V_pf_eth_anchor_z;
+	uma_zone_set_max(V_pf_eth_anchor_z, PF_ANCHOR_HIWAT);
+	uma_zone_set_warning(V_pf_eth_anchor_z, "PF Ethernet anchor limit reached");
+
 	/* ALTQ */
 	TAILQ_INIT(&V_pf_altqs[0]);
 	TAILQ_INIT(&V_pf_altqs[1]);
@@ -6376,7 +6391,7 @@ pf_translate_compat(struct pf_test_ctx *ctx)
 	KASSERT(ctx->sk != NULL, ("%s: null sk", __func__));
 	KASSERT(ctx->nk != NULL, ("%s: null nk", __func__));
 
-	switch (pd->proto) {
+	switch (pd->virtual_proto) {
 	case IPPROTO_TCP:
 		if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
 		    nk->port[pd->sidx] != pd->nsport) {
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index cfff58064922..51b3fd6390e1 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -120,7 +120,8 @@ enum	{
 
 enum	{ PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO };
 enum	{ PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
-	  PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX };
+	  PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_ANCHORS, PF_LIMIT_ETH_ANCHORS,
+	  PF_LIMIT_MAX };
 #define PF_POOL_IDMASK		0x0f
 enum	{ PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM,
 	  PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN };
@@ -490,6 +491,7 @@ struct pf_osfp_ioctl {
 
 #define	PF_ANCHOR_NAME_SIZE	 64
 #define	PF_ANCHOR_MAXPATH	(MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)
+#define	PF_ANCHOR_HIWAT		512
 #define	PF_OPTIMIZER_TABLE_PFX	"__automatic_"
 
 struct pf_rule {
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index 178ee01649c6..b6f5d74b5b42 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -331,6 +331,8 @@ pfattach_vnet(void)
 
 	V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
 	V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
+	V_pf_limits[PF_LIMIT_ANCHORS].limit = PF_ANCHOR_HIWAT;
+	V_pf_limits[PF_LIMIT_ETH_ANCHORS].limit = PF_ANCHOR_HIWAT;
 
 	RB_INIT(&V_pf_anchors);
 	pf_init_kruleset(&pf_main_ruleset);
@@ -4973,6 +4975,7 @@ DIOCCHANGEADDR_error:
 			goto fail;
 		}
 		PF_RULES_WLOCK();
+		io->pfrio_nadd = 0;
 		error = pfr_add_addrs(&io->pfrio_table, pfras,
 		    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
 		    PFR_FLAG_USERIOCTL);
diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c
index c5de1e84a287..09754359ec2d 100644
--- a/sys/netpfil/pf/pf_nl.c
+++ b/sys/netpfil/pf/pf_nl.c
@@ -2082,6 +2082,123 @@ pf_handle_clear_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt)
 	return (error);
 }
 
+TAILQ_HEAD(pfr_addrq, pfr_addr_item);
+struct nl_parsed_table_addrs {
+	struct pfr_table table;
+	uint32_t flags;
+	struct pfr_addr addrs[256];
+	size_t addr_count;
+	int nadd;
+	int ndel;
+};
+#define _OUT(_field)	offsetof(struct pfr_addr, _field)
+static const struct nlattr_parser nla_p_pfr_addr[] = {
+	{ .type = PFR_A_AF, .off = _OUT(pfra_af), .cb = nlattr_get_uint8 },
+	{ .type = PFR_A_NET, .off = _OUT(pfra_net), .cb = nlattr_get_uint8 },
+	{ .type = PFR_A_NOT, .off = _OUT(pfra_not), .cb = nlattr_get_bool },
+	{ .type = PFR_A_ADDR, .off = _OUT(pfra_u), .cb = nlattr_get_in6_addr },
+};
+#undef _OUT
+NL_DECLARE_ATTR_PARSER(pfra_addr_parser, nla_p_pfr_addr);
+
+static int
+nlattr_get_pfr_addr(struct nlattr *nla, struct nl_pstate *npt, const void *arg,
+    void *target)
+{
+	struct nl_parsed_table_addrs *attrs = target;
+	struct pfr_addr addr = { 0 };
+	int error;
+
+	if (attrs->addr_count >= nitems(attrs->addrs))
+		return (E2BIG);
+
+	error = nlattr_get_nested(nla, npt, &pfra_addr_parser, &addr);
+	if (error != 0)
+		return (error);
+
+	memcpy(&attrs->addrs[attrs->addr_count], &addr, sizeof(addr));
+	attrs->addr_count++;
+
+	return (0);
+}
+
+NL_DECLARE_ATTR_PARSER(nested_table_parser, nla_p_table);
+
+#define _OUT(_field)	offsetof(struct nl_parsed_table_addrs, _field)
+static const struct nlattr_parser nla_p_table_addr[] = {
+	{ .type = PF_TA_TABLE, .off = _OUT(table), .arg = &nested_table_parser, .cb = nlattr_get_nested },
+	{ .type = PF_TA_ADDR, .cb = nlattr_get_pfr_addr },
+	{ .type = PF_TA_FLAGS, .off = _OUT(flags), .cb = nlattr_get_uint32 },
+};
+NL_DECLARE_PARSER(table_addr_parser, struct genlmsghdr, nlf_p_empty, nla_p_table_addr);
+#undef _OUT
+
+static int
+pf_handle_table_add_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt)
+{
+	struct nl_parsed_table_addrs attrs = { 0 };
+	struct nl_writer *nw = npt->nw;
+	struct genlmsghdr *ghdr_new;
+	int error;
+
+	error = nl_parse_nlmsg(hdr, &table_addr_parser, npt, &attrs);
+	if (error != 0)
+		return  (error);
+
+	PF_RULES_WLOCK();
+	error = pfr_add_addrs(&attrs.table, &attrs.addrs[0],
+	    attrs.addr_count, &attrs.nadd, attrs.flags | PFR_FLAG_USERIOCTL);
+	PF_RULES_WUNLOCK();
+
+	if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr)))
+		return (ENOMEM);
+
+	ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
+	ghdr_new->cmd = PFNL_CMD_TABLE_ADD_ADDR;
+	ghdr_new->version = 0;
+	ghdr_new->reserved = 0;
+
+	nlattr_add_u32(nw, PF_TA_NBR_ADDED, attrs.nadd);
+
+	if (!nlmsg_end(nw))
+		return (ENOMEM);
+
+	return (error);
+}
+
+static int
+pf_handle_table_del_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt)
+{
+	struct nl_parsed_table_addrs attrs = { 0 };
+	struct nl_writer *nw = npt->nw;
+	struct genlmsghdr *ghdr_new;
+	int error;
+
+	error = nl_parse_nlmsg(hdr, &table_addr_parser, npt, &attrs);
+	if (error != 0)
+		return  (error);
+
+	PF_RULES_WLOCK();
+	error = pfr_del_addrs(&attrs.table, &attrs.addrs[0],
+	    attrs.addr_count, &attrs.ndel, attrs.flags | PFR_FLAG_USERIOCTL);
+	PF_RULES_WUNLOCK();
+
+	if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr)))
+		return (ENOMEM);
+
+	ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
+	ghdr_new->cmd = PFNL_CMD_TABLE_DEL_ADDR;
+	ghdr_new->version = 0;
+	ghdr_new->reserved = 0;
+
+	nlattr_add_u32(nw, PF_TA_NBR_DELETED, attrs.ndel);
+
+	if (!nlmsg_end(nw))
+		return (ENOMEM);
+
+	return (error);
+}
+
 static const struct nlhdr_parser *all_parsers[] = {
 	&state_parser,
 	&addrule_parser,
@@ -2096,6 +2213,7 @@ static const struct nlhdr_parser *all_parsers[] = {
 	&add_addr_parser,
 	&ruleset_parser,
 	&table_parser,
+	&table_addr_parser,
 };
 
 static uint16_t family_id;
@@ -2318,6 +2436,20 @@ static const struct genl_cmd pf_cmds[] = {
 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL,
 		.cmd_priv = PRIV_NETINET_PF,
 	},
+	{
+		.cmd_num = PFNL_CMD_TABLE_ADD_ADDR,
+		.cmd_name = "TABLE_ADD_ADDRS",
+		.cmd_cb = pf_handle_table_add_addrs,
+		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL,
+		.cmd_priv = PRIV_NETINET_PF,
+	},
+	{
+		.cmd_num = PFNL_CMD_TABLE_DEL_ADDR,
+		.cmd_name = "TABLE_DEL_ADDRS",
+		.cmd_cb = pf_handle_table_del_addrs,
+		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL,
+		.cmd_priv = PRIV_NETINET_PF,
+	},
 };
 
 void
diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h
index d263a0b22deb..87daac393821 100644
--- a/sys/netpfil/pf/pf_nl.h
+++ b/sys/netpfil/pf/pf_nl.h
@@ -67,6 +67,8 @@ enum {
 	PFNL_CMD_GET_TSTATS = 29,
 	PFNL_CMD_CLR_TSTATS = 30,
 	PFNL_CMD_CLR_ADDRS = 31,
+	PFNL_CMD_TABLE_ADD_ADDR = 32,
+	PFNL_CMD_TABLE_DEL_ADDR = 33,
 	__PFNL_CMD_MAX,
 };
 #define PFNL_CMD_MAX (__PFNL_CMD_MAX -1)
@@ -461,6 +463,23 @@ enum pf_tstats_t {
 	PF_TS_NZERO		= 9, /* u64 */
 };
 
+enum pfr_addr_t {
+	PFR_A_UNSPEC,
+	PFR_A_AF		= 1, /* uint8_t */
+	PFR_A_NET		= 2, /* uint8_t */
+	PFR_A_NOT		= 3, /* bool */
+	PFR_A_ADDR		= 4, /* in6_addr */
+};
+
+enum pf_table_addrs_t {
+	PF_TA_UNSPEC,
+	PF_TA_TABLE		= 1, /* nested, pf_table_t */
+	PF_TA_ADDR		= 2, /* nested, pfr_addr_t */
+	PF_TA_FLAGS		= 3, /* u32 */
+	PF_TA_NBR_ADDED		= 4, /* u32 */
+	PF_TA_NBR_DELETED	= 5, /* u32 */
+};
+
 #ifdef _KERNEL
 
 void	pf_nl_register(void);
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index 8cea9557633c..a684d778ab42 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -118,6 +118,8 @@ VNET_DEFINE_STATIC(uma_zone_t, pf_frnode_z);
 #define	V_pf_frnode_z	VNET(pf_frnode_z)
 VNET_DEFINE_STATIC(uma_zone_t, pf_frag_z);
 #define	V_pf_frag_z	VNET(pf_frag_z)
+VNET_DEFINE(uma_zone_t, pf_anchor_z);
+VNET_DEFINE(uma_zone_t, pf_eth_anchor_z);
 
 TAILQ_HEAD(pf_fragqueue, pf_fragment);
 TAILQ_HEAD(pf_cachequeue, pf_fragment);
diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c
index 43b51f2933f4..039908a53126 100644
--- a/sys/netpfil/pf/pf_ruleset.c
+++ b/sys/netpfil/pf/pf_ruleset.c
@@ -238,7 +238,7 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname)
 	   ((parent != NULL) && (strlen(parent->path) >= PF_ANCHOR_MAXPATH)))
 		return (NULL);
 
-	anchor = rs_malloc(sizeof(*anchor));
+	anchor = uma_zalloc(V_pf_anchor_z, M_NOWAIT | M_ZERO);
 	if (anchor == NULL)
 		return (NULL);
 
@@ -259,7 +259,7 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname)
 		printf("%s: RB_INSERT1 "
 		    "'%s' '%s' collides with '%s' '%s'\n", __func__,
 		    anchor->path, anchor->name, dup->path, dup->name);
-		rs_free(anchor);
+		uma_zfree(V_pf_anchor_z, anchor);
 		return (NULL);
 	}
 
@@ -273,7 +273,7 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname)
 			    anchor->name, dup->path, dup->name);
 			RB_REMOVE(pf_kanchor_global, &V_pf_anchors,
 			    anchor);
-			rs_free(anchor);
+			uma_zfree(V_pf_anchor_z, anchor);
 			return (NULL);
 		}
 	}
@@ -350,7 +350,7 @@ pf_remove_if_empty_kruleset(struct pf_kruleset *ruleset)
 		if ((parent = ruleset->anchor->parent) != NULL)
 			RB_REMOVE(pf_kanchor_node, &parent->children,
 			    ruleset->anchor);
-		rs_free(ruleset->anchor);
+		uma_zfree(V_pf_anchor_z, ruleset->anchor);
 		if (parent == NULL)
 			return;
 		ruleset = &parent->ruleset;
@@ -613,7 +613,7 @@ pf_find_or_create_keth_ruleset(const char *path)
 			rs_free(p);
 			return (NULL);
 		}
-		anchor = (struct pf_keth_anchor *)rs_malloc(sizeof(*anchor));
+		anchor = uma_zalloc(V_pf_eth_anchor_z, M_NOWAIT | M_ZERO);
 		if (anchor == NULL) {
 			rs_free(p);
 			return (NULL);
@@ -631,7 +631,7 @@ pf_find_or_create_keth_ruleset(const char *path)
 			printf("%s: RB_INSERT1 "
 			    "'%s' '%s' collides with '%s' '%s'\n", __func__,
 			    anchor->path, anchor->name, dup->path, dup->name);
-			rs_free(anchor);
+			uma_zfree(V_pf_eth_anchor_z, anchor);
 			rs_free(p);
 			return (NULL);
 		}
@@ -645,7 +645,7 @@ pf_find_or_create_keth_ruleset(const char *path)
 				    anchor->name, dup->path, dup->name);
 				RB_REMOVE(pf_keth_anchor_global, &V_pf_keth_anchors,
 				    anchor);
-				rs_free(anchor);
+				uma_zfree(V_pf_eth_anchor_z, anchor);
 				rs_free(p);
 				return (NULL);
 			}
@@ -754,7 +754,7 @@ pf_remove_if_empty_keth_ruleset(struct pf_keth_ruleset *ruleset)
 		if ((parent = ruleset->anchor->parent) != NULL)
 			RB_REMOVE(pf_keth_anchor_node, &parent->children,
 			    ruleset->anchor);
-		rs_free(ruleset->anchor);
+		uma_zfree(V_pf_eth_anchor_z, ruleset->anchor);
 		if (parent == NULL)
 			return;
 		ruleset = &parent->ruleset;
diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c
index ecc185f89ad7..73ec18fa7646 100644
--- a/sys/netpfil/pf/pf_table.c
+++ b/sys/netpfil/pf/pf_table.c
@@ -294,7 +294,7 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
 	else
 		pfr_destroy_kentries(&workq);
 	if (nadd != NULL)
-		*nadd = xadd;
+		*nadd += xadd;
 	pfr_destroy_ktable(tmpkt, 0);
 	return (0);
 _bad:
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index af9cafa99dd0..9140cee56885 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -763,7 +763,6 @@ struct proc {
 	LIST_HEAD(, mqueue_notifier)	p_mqnotifier; /* (c) mqueue notifiers.*/
 	struct kdtrace_proc	*p_dtrace; /* (*) DTrace-specific data. */
 	struct cv	p_pwait;	/* (*) wait cv for exit/exec. */
-	uint64_t	p_prev_runtime;	/* (c) Resource usage accounting. */
 	struct racct	*p_racct;	/* (b) Resource accounting. */
 	int		p_throttled;	/* (c) Flag for racct pcpu throttling */
 	/*
diff --git a/sys/sys/racct.h b/sys/sys/racct.h
index c6020b82c865..92b50353774e 100644
--- a/sys/sys/racct.h
+++ b/sys/sys/racct.h
@@ -141,13 +141,17 @@ extern bool racct_enable;
 
 /*
  * The 'racct' structure defines resource consumption for a particular
- * subject, such as process or jail.
+ * subject, such as process or jail. It also contains the total
+ * cpu time and real time of the subject, recorded at the most recent
+ * time that RACCT_PCPU was updated.
  *
  * This structure must be filled with zeroes initially.
  */
 struct racct {
 	int64_t				r_resources[RACCT_MAX + 1];
 	LIST_HEAD(, rctl_rule_link)	r_rule_links;
+	uint64_t			r_runtime;
+	struct timeval			r_time;
 };
 
 SYSCTL_DECL(_kern_racct);