aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/dev/sdio/sdiob.c7
-rw-r--r--sys/fs/fuse/fuse_vnops.c34
-rw-r--r--sys/kern/kern_racct.c307
-rw-r--r--sys/net/if_bridge.c84
-rw-r--r--sys/net/if_bridgevar.h11
-rw-r--r--sys/net/if_epair.c35
-rw-r--r--sys/net/pfvar.h4
-rw-r--r--sys/netinet/tcp_sack.c32
-rw-r--r--sys/netinet/tcp_stacks/rack_bbr_common.c6
-rw-r--r--sys/netpfil/pf/pf.c19
-rw-r--r--sys/netpfil/pf/pf.h4
-rw-r--r--sys/netpfil/pf/pf_ioctl.c3
-rw-r--r--sys/netpfil/pf/pf_nl.c132
-rw-r--r--sys/netpfil/pf/pf_nl.h19
-rw-r--r--sys/netpfil/pf/pf_norm.c2
-rw-r--r--sys/netpfil/pf/pf_ruleset.c16
-rw-r--r--sys/netpfil/pf/pf_table.c2
-rw-r--r--sys/sys/proc.h1
-rw-r--r--sys/sys/racct.h6
19 files changed, 484 insertions, 240 deletions
diff --git a/sys/dev/sdio/sdiob.c b/sys/dev/sdio/sdiob.c
index 4ec2058fa2e4..cb2cc0da6b77 100644
--- a/sys/dev/sdio/sdiob.c
+++ b/sys/dev/sdio/sdiob.c
@@ -150,7 +150,7 @@ sdiob_rw_direct_sc(struct sdiob_softc *sc, uint8_t fn, uint32_t addr, bool wr,
sc->ccb = xpt_alloc_ccb();
else
memset(sc->ccb, 0, sizeof(*sc->ccb));
- xpt_setup_ccb(&sc->ccb->ccb_h, sc->periph->path, CAM_PRIORITY_NONE);
+ xpt_setup_ccb(&sc->ccb->ccb_h, sc->periph->path, CAM_PRIORITY_NORMAL);
CAM_DEBUG(sc->ccb->ccb_h.path, CAM_DEBUG_TRACE,
("%s(fn=%d, addr=%#02x, wr=%d, *val=%#02x)\n", __func__,
fn, addr, wr, *val));
@@ -250,7 +250,7 @@ sdiob_rw_extended_cam(struct sdiob_softc *sc, uint8_t fn, uint32_t addr,
sc->ccb = xpt_alloc_ccb();
else
memset(sc->ccb, 0, sizeof(*sc->ccb));
- xpt_setup_ccb(&sc->ccb->ccb_h, sc->periph->path, CAM_PRIORITY_NONE);
+ xpt_setup_ccb(&sc->ccb->ccb_h, sc->periph->path, CAM_PRIORITY_NORMAL);
CAM_DEBUG(sc->ccb->ccb_h.path, CAM_DEBUG_TRACE,
("%s(fn=%d addr=%#0x wr=%d b_count=%u blksz=%u buf=%p incr=%d)\n",
__func__, fn, addr, wr, b_count, blksz, buffer, incaddr));
@@ -977,9 +977,6 @@ sdiobdiscover(void *context, int pending)
if (sc->ccb == NULL)
sc->ccb = xpt_alloc_ccb();
- else
- memset(sc->ccb, 0, sizeof(*sc->ccb));
- xpt_setup_ccb(&sc->ccb->ccb_h, periph->path, CAM_PRIORITY_NONE);
/*
* Read CCCR and FBR of each function, get manufacturer and device IDs,
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index 32872e8f3f3a..b90ce60ec664 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -1219,36 +1219,20 @@ fuse_vnop_getattr(struct vop_getattr_args *ap)
struct vattr *vap = ap->a_vap;
struct ucred *cred = ap->a_cred;
struct thread *td = curthread;
-
int err = 0;
- int dataflags;
-
- dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
-
- /* Note that we are not bailing out on a dead file system just yet. */
- if (!(dataflags & FSESS_INITED)) {
- if (!vnode_isvroot(vp)) {
- fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
- return (EXTERROR(ENOTCONN, "FUSE daemon is not "
- "initialized"));
- } else {
- goto fake;
- }
- }
err = fuse_internal_getattr(vp, vap, cred, td);
if (err == ENOTCONN && vnode_isvroot(vp)) {
- /* see comment in fuse_vfsop_statfs() */
- goto fake;
- } else {
- return err;
+ /*
+ * We want to seem a legitimate fs even if the daemon is dead,
+ * so that, eg., we can still do path based unmounting after
+ * the daemon dies.
+ */
+ err = 0;
+ bzero(vap, sizeof(*vap));
+ vap->va_type = vnode_vtype(vp);
}
-
-fake:
- bzero(vap, sizeof(*vap));
- vap->va_type = vnode_vtype(vp);
-
- return 0;
+ return err;
}
/*
diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c
index 7ee3b9e2048a..7351e9cb6313 100644
--- a/sys/kern/kern_racct.c
+++ b/sys/kern/kern_racct.c
@@ -96,6 +96,13 @@ static void racct_sub_cred_locked(struct ucred *cred, int resource,
uint64_t amount);
static void racct_add_cred_locked(struct ucred *cred, int resource,
uint64_t amount);
+static int racct_set_locked(struct proc *p, int resource, uint64_t amount,
+ int force);
+static void racct_updatepcpu_locked(struct proc *p);
+static void racct_updatepcpu_racct_locked(struct racct *racct);
+static void racct_updatepcpu_containers(void);
+static void racct_settime_locked(struct proc *p, bool exit);
+static void racct_zeropcpu_locked(struct proc *p);
SDT_PROVIDER_DEFINE(racct);
SDT_PROBE_DEFINE3(racct, , rusage, add,
@@ -308,68 +315,6 @@ fixpt_t ccpu_exp[] = {
#define CCPU_EXP_MAX 110
-/*
- * This function is analogical to the getpcpu() function in the ps(1) command.
- * They should both calculate in the same way so that the racct %cpu
- * calculations are consistent with the values shown by the ps(1) tool.
- * The calculations are more complex in the 4BSD scheduler because of the value
- * of the ccpu variable. In ULE it is defined to be zero which saves us some
- * work.
- */
-static uint64_t
-racct_getpcpu(struct proc *p, u_int pcpu)
-{
- u_int swtime;
-#ifdef SCHED_4BSD
- fixpt_t pctcpu, pctcpu_next;
-#endif
- fixpt_t p_pctcpu;
- struct thread *td;
-
- ASSERT_RACCT_ENABLED();
- KASSERT((p->p_flag & P_IDLEPROC) == 0,
- ("racct_getpcpu: idle process %p", p));
-
- swtime = (ticks - p->p_swtick) / hz;
-
- /*
- * For short-lived processes, the sched_pctcpu() returns small
- * values even for cpu intensive processes. Therefore we use
- * our own estimate in this case.
- */
- if (swtime < RACCT_PCPU_SECS)
- return (pcpu);
-
- p_pctcpu = 0;
- FOREACH_THREAD_IN_PROC(p, td) {
- thread_lock(td);
-#ifdef SCHED_4BSD
- pctcpu = sched_pctcpu(td);
- /* Count also the yet unfinished second. */
- pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT;
- pctcpu_next += sched_pctcpu_delta(td);
- p_pctcpu += max(pctcpu, pctcpu_next);
-#else
- /*
- * In ULE the %cpu statistics are updated on every
- * sched_pctcpu() call. So special calculations to
- * account for the latest (unfinished) second are
- * not needed.
- */
- p_pctcpu += sched_pctcpu(td);
-#endif
- thread_unlock(td);
- }
-
-#ifdef SCHED_4BSD
- if (swtime <= CCPU_EXP_MAX)
- return ((100 * (uint64_t)p_pctcpu * 1000000) /
- (FSCALE - ccpu_exp[swtime]));
-#endif
-
- return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE);
-}
-
static void
racct_add_racct(struct racct *dest, const struct racct *src)
{
@@ -499,19 +444,6 @@ racct_adjust_resource(struct racct *racct, int resource,
("%s: resource %d usage < 0", __func__, resource));
racct->r_resources[resource] = 0;
}
-
- /*
- * There are some cases where the racct %cpu resource would grow
- * beyond 100% per core. For example in racct_proc_exit() we add
- * the process %cpu usage to the ucred racct containers. If too
- * many processes terminated in a short time span, the ucred %cpu
- * resource could grow too much. Also, the 4BSD scheduler sometimes
- * returns for a thread more than 100% cpu usage. So we set a sane
- * boundary here to 100% * the maximum number of CPUs.
- */
- if ((resource == RACCT_PCTCPU) &&
- (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU))
- racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU;
}
static int
@@ -635,10 +567,44 @@ racct_add_buf(struct proc *p, const struct buf *bp, int is_write)
RACCT_UNLOCK();
}
+static void
+racct_settime_locked(struct proc *p, bool exit)
+{
+ struct thread *td;
+ struct timeval wallclock;
+ uint64_t runtime;
+
+ ASSERT_RACCT_ENABLED();
+ RACCT_LOCK_ASSERT();
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+
+ if (exit) {
+ /*
+ * proc_reap() has already calculated rux
+ * and added crux to rux.
+ */
+ runtime = cputick2usec(p->p_rux.rux_runtime -
+ p->p_crux.rux_runtime);
+ } else {
+ PROC_STATLOCK(p);
+ FOREACH_THREAD_IN_PROC(p, td)
+ ruxagg(p, td);
+ PROC_STATUNLOCK(p);
+ runtime = cputick2usec(p->p_rux.rux_runtime);
+ }
+ microuptime(&wallclock);
+ timevalsub(&wallclock, &p->p_stats->p_start);
+
+ racct_set_locked(p, RACCT_CPU, runtime, 0);
+ racct_set_locked(p, RACCT_WALLCLOCK,
+ (uint64_t)wallclock.tv_sec * 1000000 +
+ wallclock.tv_usec, 0);
+}
+
static int
racct_set_locked(struct proc *p, int resource, uint64_t amount, int force)
{
- int64_t old_amount, decayed_amount, diff_proc, diff_cred;
+ int64_t old_amount, diff_proc, diff_cred;
#ifdef RCTL
int error;
#endif
@@ -655,17 +621,7 @@ racct_set_locked(struct proc *p, int resource, uint64_t amount, int force)
* The diffs may be negative.
*/
diff_proc = amount - old_amount;
- if (resource == RACCT_PCTCPU) {
- /*
- * Resources in per-credential racct containers may decay.
- * If this is the case, we need to calculate the difference
- * between the new amount and the proportional value of the
- * old amount that has decayed in the ucred racct containers.
- */
- decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE;
- diff_cred = amount - decayed_amount;
- } else
- diff_cred = diff_proc;
+ diff_cred = diff_proc;
#ifdef notyet
KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource),
("%s: usage of non-droppable resource %d dropping", __func__,
@@ -908,8 +864,6 @@ racct_proc_fork(struct proc *parent, struct proc *child)
goto out;
#endif
- /* Init process cpu time. */
- child->p_prev_runtime = 0;
child->p_throttled = 0;
/*
@@ -964,37 +918,16 @@ racct_proc_fork_done(struct proc *child)
void
racct_proc_exit(struct proc *p)
{
- struct timeval wallclock;
- uint64_t pct_estimate, pct, runtime;
int i;
if (!racct_enable)
return;
PROC_LOCK(p);
- /*
- * We don't need to calculate rux, proc_reap() has already done this.
- */
- runtime = cputick2usec(p->p_rux.rux_runtime);
-#ifdef notyet
- KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
-#else
- if (runtime < p->p_prev_runtime)
- runtime = p->p_prev_runtime;
-#endif
- microuptime(&wallclock);
- timevalsub(&wallclock, &p->p_stats->p_start);
- if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) {
- pct_estimate = (1000000 * runtime * 100) /
- ((uint64_t)wallclock.tv_sec * 1000000 +
- wallclock.tv_usec);
- } else
- pct_estimate = 0;
- pct = racct_getpcpu(p, pct_estimate);
-
RACCT_LOCK();
- racct_set_locked(p, RACCT_CPU, runtime, 0);
- racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct);
+
+ racct_settime_locked(p, true);
+ racct_zeropcpu_locked(p);
KASSERT(p->p_racct->r_resources[RACCT_RSS] == 0,
("process reaped with %ju allocated for RSS\n",
@@ -1068,6 +1001,10 @@ racct_move(struct racct *dest, struct racct *src)
RACCT_LOCK();
racct_add_racct(dest, src);
racct_sub_racct(src, src);
+ dest->r_runtime = src->r_runtime;
+ dest->r_time = src->r_time;
+ src->r_runtime = 0;
+ timevalsub(&src->r_time, &src->r_time);
RACCT_UNLOCK();
}
@@ -1170,8 +1107,6 @@ racct_proc_wakeup(struct proc *p)
static void
racct_decay_callback(struct racct *racct, void *dummy1, void *dummy2)
{
- int64_t r_old, r_new;
-
ASSERT_RACCT_ENABLED();
RACCT_LOCK_ASSERT();
@@ -1181,15 +1116,6 @@ racct_decay_callback(struct racct *racct, void *dummy1, void *dummy2)
rctl_throttle_decay(racct, RACCT_READIOPS);
rctl_throttle_decay(racct, RACCT_WRITEIOPS);
#endif
-
- r_old = racct->r_resources[RACCT_PCTCPU];
-
- /* If there is nothing to decay, just exit. */
- if (r_old <= 0)
- return;
-
- r_new = r_old * RACCT_DECAY_FACTOR / FSCALE;
- racct->r_resources[RACCT_PCTCPU] = r_new;
}
static void
@@ -1221,15 +1147,105 @@ racct_decay(void)
}
static void
+racct_updatepcpu_racct_locked(struct racct *racct)
+{
+ struct timeval diff;
+ uint64_t elapsed;
+ uint64_t runtime;
+ uint64_t newpcpu;
+ uint64_t oldpcpu;
+
+ ASSERT_RACCT_ENABLED();
+ RACCT_LOCK_ASSERT();
+
+ /* Difference between now and previously-recorded time. */
+ microuptime(&diff);
+ timevalsub(&diff, &racct->r_time);
+ elapsed = (uint64_t)diff.tv_sec * 1000000 + diff.tv_usec;
+
+ /* Difference between current and previously-recorded runtime. */
+ runtime = racct->r_resources[RACCT_CPU] - racct->r_runtime;
+
+ newpcpu = runtime * 100 * 1000000 / elapsed;
+ oldpcpu = racct->r_resources[RACCT_PCTCPU];
+ /*
+ * This calculation is equivalent to
+ * (1 - 0.3) * newpcpu + 0.3 * oldpcpu
+ * where RACCT_DECAY_FACTOR = 0.3 * FSCALE.
+ */
+ racct->r_resources[RACCT_PCTCPU] = ((FSCALE - RACCT_DECAY_FACTOR) *
+ newpcpu + RACCT_DECAY_FACTOR * oldpcpu) / FSCALE;
+ if (racct->r_resources[RACCT_PCTCPU] >
+ 100 * 1000000 * (uint64_t)mp_ncpus)
+ racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 *
+ (uint64_t)mp_ncpus;
+
+ /* Record current times. */
+ racct->r_runtime = racct->r_resources[RACCT_CPU];
+ timevaladd(&racct->r_time, &diff);
+}
+
+static void
+racct_zeropcpu_locked(struct proc *p)
+{
+ ASSERT_RACCT_ENABLED();
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+
+ p->p_racct->r_resources[RACCT_PCTCPU] = 0;
+}
+
+static void
+racct_updatepcpu_locked(struct proc *p)
+{
+ ASSERT_RACCT_ENABLED();
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+
+ racct_updatepcpu_racct_locked(p->p_racct);
+}
+
+static void
+racct_updatepcpu_pre(void)
+{
+
+ RACCT_LOCK();
+}
+
+static void
+racct_updatepcpu_post(void)
+{
+
+ RACCT_UNLOCK();
+}
+
+static void
+racct_updatepcpu_racct_callback(struct racct *racct, void *dummy1, void *dummy2)
+{
+ racct_updatepcpu_racct_locked(racct);
+}
+
+static void
+racct_updatepcpu_containers(void)
+{
+ ASSERT_RACCT_ENABLED();
+
+ ui_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre,
+ racct_updatepcpu_post, NULL, NULL);
+ loginclass_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre,
+ racct_updatepcpu_post, NULL, NULL);
+ prison_racct_foreach(racct_updatepcpu_racct_callback, racct_updatepcpu_pre,
+ racct_updatepcpu_post, NULL, NULL);
+}
+
+static void
racctd(void)
{
- struct thread *td;
struct proc *p;
- struct timeval wallclock;
- uint64_t pct, pct_estimate, runtime;
+ struct proc *idle;
ASSERT_RACCT_ENABLED();
+ idle = STAILQ_FIRST(&cpuhead)->pc_idlethread->td_proc;
+
for (;;) {
racct_decay();
@@ -1237,36 +1253,16 @@ racctd(void)
FOREACH_PROC_IN_SYSTEM(p) {
PROC_LOCK(p);
+ if (p == idle) {
+ PROC_UNLOCK(p);
+ continue;
+ }
if (p->p_state != PRS_NORMAL ||
(p->p_flag & P_IDLEPROC) != 0) {
- if (p->p_state == PRS_ZOMBIE)
- racct_set(p, RACCT_PCTCPU, 0);
PROC_UNLOCK(p);
continue;
}
- microuptime(&wallclock);
- timevalsub(&wallclock, &p->p_stats->p_start);
- PROC_STATLOCK(p);
- FOREACH_THREAD_IN_PROC(p, td)
- ruxagg(p, td);
- runtime = cputick2usec(p->p_rux.rux_runtime);
- PROC_STATUNLOCK(p);
-#ifdef notyet
- KASSERT(runtime >= p->p_prev_runtime,
- ("runtime < p_prev_runtime"));
-#else
- if (runtime < p->p_prev_runtime)
- runtime = p->p_prev_runtime;
-#endif
- p->p_prev_runtime = runtime;
- if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) {
- pct_estimate = (1000000 * runtime * 100) /
- ((uint64_t)wallclock.tv_sec * 1000000 +
- wallclock.tv_usec);
- } else
- pct_estimate = 0;
- pct = racct_getpcpu(p, pct_estimate);
RACCT_LOCK();
#ifdef RCTL
rctl_throttle_decay(p->p_racct, RACCT_READBPS);
@@ -1274,11 +1270,8 @@ racctd(void)
rctl_throttle_decay(p->p_racct, RACCT_READIOPS);
rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS);
#endif
- racct_set_locked(p, RACCT_PCTCPU, pct, 1);
- racct_set_locked(p, RACCT_CPU, runtime, 0);
- racct_set_locked(p, RACCT_WALLCLOCK,
- (uint64_t)wallclock.tv_sec * 1000000 +
- wallclock.tv_usec, 0);
+ racct_settime_locked(p, false);
+ racct_updatepcpu_locked(p);
RACCT_UNLOCK();
PROC_UNLOCK(p);
}
@@ -1306,6 +1299,8 @@ racctd(void)
PROC_UNLOCK(p);
}
sx_sunlock(&allproc_lock);
+
+ racct_updatepcpu_containers();
pause("-", hz);
}
}
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 46e54339a171..3aed54c58e04 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -259,6 +259,7 @@ struct bridge_iflist {
struct epoch_context bif_epoch_ctx;
ether_vlanid_t bif_pvid; /* port vlan id */
ifbvlan_set_t bif_vlan_set; /* if allowed tagged vlans */
+ uint16_t bif_vlanproto; /* vlan protocol */
};
/*
@@ -423,6 +424,7 @@ static int bridge_ioctl_gflags(struct bridge_softc *, void *);
static int bridge_ioctl_sflags(struct bridge_softc *, void *);
static int bridge_ioctl_gdefpvid(struct bridge_softc *, void *);
static int bridge_ioctl_sdefpvid(struct bridge_softc *, void *);
+static int bridge_ioctl_svlanproto(struct bridge_softc *, void *);
static int bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
int);
#ifdef INET
@@ -654,6 +656,9 @@ static const struct bridge_control bridge_control_table[] = {
{ bridge_ioctl_sdefpvid, sizeof(struct ifbrparam),
BC_F_COPYIN|BC_F_SUSER },
+
+ { bridge_ioctl_svlanproto, sizeof(struct ifbreq),
+ BC_F_COPYIN|BC_F_SUSER },
};
static const int bridge_control_table_size = nitems(bridge_control_table);
@@ -1494,8 +1499,11 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
bif->bif_ifp = ifs;
bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
bif->bif_savedcaps = ifs->if_capenable;
+ bif->bif_vlanproto = ETHERTYPE_VLAN;
if (sc->sc_flags & IFBRF_VLANFILTER)
bif->bif_pvid = sc->sc_defpvid;
+ if (sc->sc_flags & IFBRF_DEFQINQ)
+ bif->bif_flags |= IFBIF_QINQ;
/*
* Assign the interface's MAC address to the bridge if it's the first
@@ -1577,6 +1585,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
req->ifbr_addrmax = bif->bif_addrmax;
req->ifbr_addrexceeded = bif->bif_addrexceeded;
req->ifbr_pvid = bif->bif_pvid;
+ req->ifbr_vlanproto = bif->bif_vlanproto;
/* Copy STP state options as flags */
if (bp->bp_operedge)
@@ -2252,6 +2261,24 @@ bridge_ioctl_sdefpvid(struct bridge_softc *sc, void *arg)
return (0);
}
+static int
+bridge_ioctl_svlanproto(struct bridge_softc *sc, void *arg)
+{
+ struct ifbreq *req = arg;
+ struct bridge_iflist *bif;
+
+ bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+ if (bif == NULL)
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
+
+ if (req->ifbr_vlanproto != ETHERTYPE_VLAN &&
+ req->ifbr_vlanproto != ETHERTYPE_QINQ)
+ return (EXTERROR(EINVAL, "Invalid VLAN protocol"));
+
+ bif->bif_vlanproto = req->ifbr_vlanproto;
+
+ return (0);
+}
/*
* bridge_ifdetach:
*
@@ -2395,12 +2422,15 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
}
/*
- * If underlying interface can not do VLAN tag insertion itself
- * then attach a packet tag that holds it.
+ * There are two cases where we have to insert our own tag:
+ * if the member interface doesn't support hardware tagging,
+ * or if the tag proto is not 802.1q.
*/
if ((m->m_flags & M_VLANTAG) &&
- (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
- m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
+ ((dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0 ||
+ bif->bif_vlanproto != ETHERTYPE_VLAN)) {
+ m = ether_vlanencap_proto(m, m->m_pkthdr.ether_vtag,
+ bif->bif_vlanproto);
if (m == NULL) {
if_printf(dst_ifp,
"unable to prepend VLAN header\n");
@@ -2828,9 +2858,29 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
NET_EPOCH_ASSERT();
+ /* We need the Ethernet header later, so make sure we have it now. */
+ if (m->m_len < ETHER_HDR_LEN) {
+ m = m_pullup(m, ETHER_HDR_LEN);
+ if (m == NULL) {
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ return (NULL);
+ }
+ }
+
eh = mtod(m, struct ether_header *);
vlan = VLANTAGOF(m);
+ /*
+ * If this frame has a VLAN tag and the receiving interface has a
+ * vlan(4) trunk, then it is is destined for vlan(4), not for us.
+ * This means if vlan(4) and bridge(4) are configured on the same
+ * interface, vlan(4) is preferred, which is what users typically
+ * expect.
+ */
+ if (vlan != DOT1Q_VID_NULL && ifp->if_vlantrunk != NULL)
+ return (m);
+
bif = ifp->if_bridge;
if (bif)
sc = bif->bif_sc;
@@ -3031,19 +3081,13 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
do { GRAB_OUR_PACKETS(bifp) } while (0);
/*
- * Check the interface the packet arrived on. For tagged frames,
- * we need to do this even if member_ifaddrs is disabled because
- * vlan(4) might need to handle the traffic.
+ * If member_ifaddrs is enabled, see if the packet is destined for
+ * one of the members' addresses.
*/
- if (V_member_ifaddrs || (vlan && ifp->if_vlantrunk))
+ if (V_member_ifaddrs) {
+ /* Check the interface the packet arrived on. */
do { GRAB_OUR_PACKETS(ifp) } while (0);
- /*
- * We only need to check other members interface if member_ifaddrs
- * is enabled; otherwise we should have never traffic destined for
- * a member's lladdr.
- */
- if (V_member_ifaddrs) {
CK_LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) {
GRAB_OUR_PACKETS(bif2->bif_ifp)
}
@@ -3250,6 +3294,18 @@ bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m)
if ((sbif->bif_sc->sc_flags & IFBRF_VLANFILTER) == 0)
return (true);
+ /* If Q-in-Q is disabled, check for stacked tags. */
+ if ((sbif->bif_flags & IFBIF_QINQ) == 0) {
+ struct ether_header *eh;
+ uint16_t proto;
+
+ eh = mtod(m, struct ether_header *);
+ proto = ntohs(eh->ether_type);
+
+ if (proto == ETHERTYPE_VLAN || proto == ETHERTYPE_QINQ)
+ return (false);
+ }
+
if (vlan == DOT1Q_VID_NULL) {
/*
* The frame doesn't have a tag. If the interface does not
diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h
index 6718c5ebcc34..b0f579f688ac 100644
--- a/sys/net/if_bridgevar.h
+++ b/sys/net/if_bridgevar.h
@@ -131,13 +131,15 @@
#define BRDGSFLAGS 35 /* set bridge flags (ifbrparam) */
#define BRDGGDEFPVID 36 /* get default pvid (ifbrparam) */
#define BRDGSDEFPVID 37 /* set default pvid (ifbrparam) */
+#define BRDGSIFVLANPROTO 38 /* set if vlan protocol (ifbreq) */
/* BRDGSFLAGS, Bridge flags (non-interface-specific) */
typedef uint32_t ifbr_flags_t;
#define IFBRF_VLANFILTER (1U<<0) /* VLAN filtering enabled */
+#define IFBRF_DEFQINQ (1U<<1) /* 802.1ad Q-in-Q allowed by default */
-#define IFBRFBITS "\020\01VLANFILTER"
+#define IFBRFBITS "\020\01VLANFILTER\02DEFQINQ"
/*
* Generic bridge control request.
@@ -156,6 +158,7 @@ struct ifbreq {
uint32_t ifbr_addrmax; /* member if addr max */
uint32_t ifbr_addrexceeded; /* member if addr violations */
ether_vlanid_t ifbr_pvid; /* member if PVID */
+ uint16_t ifbr_vlanproto; /* member if VLAN protocol */
uint8_t pad[32];
};
@@ -172,12 +175,11 @@ struct ifbreq {
#define IFBIF_BSTP_ADMEDGE 0x0200 /* member stp admin edge enabled */
#define IFBIF_BSTP_ADMCOST 0x0400 /* member stp admin path cost */
#define IFBIF_PRIVATE 0x0800 /* if is a private segment */
-/* was IFBIF_VLANFILTER 0x1000 */
-#define IFBIF_QINQ 0x2000 /* if allows 802.1ad Q-in-Q */
+#define IFBIF_QINQ 0x1000 /* if allows 802.1ad Q-in-Q */
#define IFBIFBITS "\020\001LEARNING\002DISCOVER\003STP\004SPAN" \
"\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \
- "\011AUTOPTP"
+ "\011AUTOPTP\015QINQ"
#define IFBIFMASK ~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \
IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \
IFBIF_BSTP_ADMCOST) /* not saved */
@@ -252,6 +254,7 @@ struct ifbrparam {
* addresses */
#define ifbrp_flags ifbrp_ifbrpu.ifbrpu_int32 /* bridge flags */
#define ifbrp_defpvid ifbrp_ifbrpu.ifbrpu_int16 /* default pvid */
+#define ifbrp_vlanproto ifbrp_ifbrpu.ifbrpu_int8 /* vlan protocol */
/*
* Bridge current operational parameters structure.
diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c
index 7051e31565d4..4d35584925a1 100644
--- a/sys/net/if_epair.c
+++ b/sys/net/if_epair.c
@@ -58,6 +58,7 @@
#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sockio.h>
+#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <net/bpf.h>
@@ -97,6 +98,15 @@ static unsigned int next_index = 0;
#define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx)
#define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx)
+SYSCTL_DECL(_net_link);
+static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "Pair of virtual cross-over connected Ethernet-like interfaces");
+
+static bool use_ether_gen_addr = false;
+SYSCTL_BOOL(_net_link_epair, OID_AUTO, ether_gen_addr, CTLFLAG_RWTUN,
+ &use_ether_gen_addr, false,
+ "Generate MAC with FreeBSD OUI using ether_gen_addr(9)");
+
struct epair_softc;
struct epair_queue {
struct mtx mtx;
@@ -496,15 +506,29 @@ epair_clone_match(struct if_clone *ifc, const char *name)
}
static void
+epair_generate_mac_byname(struct epair_softc *sc, uint8_t eaddr[])
+{
+ struct ether_addr gen_eaddr;
+ int i;
+
+ ether_gen_addr_byname(if_name(sc->ifp), &gen_eaddr);
+ for (i = 0; i < ETHER_ADDR_LEN; i++)
+ eaddr[i] = gen_eaddr.octet[i];
+}
+
+static void
epair_clone_add(struct if_clone *ifc, struct epair_softc *scb)
{
struct ifnet *ifp;
uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
ifp = scb->ifp;
- /* Copy epairNa etheraddr and change the last byte. */
- memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN);
- eaddr[5] = 0x0b;
+ if (!use_ether_gen_addr) {
+ /* Copy epairNa etheraddr and change the last byte. */
+ memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN);
+ eaddr[5] = 0x0b;
+ } else
+ epair_generate_mac_byname(scb, eaddr);
ether_ifattach(ifp, eaddr);
if_clone_addif(ifc, ifp);
@@ -719,7 +743,10 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len,
/* Finish initialization of interface <n>a. */
ifp = sca->ifp;
epair_setup_ifp(sca, name, unit);
- epair_generate_mac(sca, eaddr);
+ if (!use_ether_gen_addr)
+ epair_generate_mac(sca, eaddr);
+ else
+ epair_generate_mac_byname(sca, eaddr);
ether_ifattach(ifp, eaddr);
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 8dae95c2cc2e..c397f0b67896 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -2338,6 +2338,10 @@ VNET_DECLARE(uma_zone_t, pf_udp_mapping_z);
#define V_pf_udp_mapping_z VNET(pf_udp_mapping_z)
VNET_DECLARE(uma_zone_t, pf_state_scrub_z);
#define V_pf_state_scrub_z VNET(pf_state_scrub_z)
+VNET_DECLARE(uma_zone_t, pf_anchor_z);
+#define V_pf_anchor_z VNET(pf_anchor_z)
+VNET_DECLARE(uma_zone_t, pf_eth_anchor_z);
+#define V_pf_eth_anchor_z VNET(pf_eth_anchor_z)
extern void pf_purge_thread(void *);
extern void pf_unload_vnet_purge(void);
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index 4405098a8620..66275cb04bdd 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -283,7 +283,7 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
INP_WLOCK_ASSERT(tptoinpcb(tp));
/* Check arguments. */
- KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("rcv_start <= rcv_end"));
+ KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("SEG_GT(rcv_start, rcv_end)"));
if ((rcv_start == rcv_end) &&
(tp->rcv_numsacks >= 1) &&
@@ -498,8 +498,8 @@ tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole)
tp->snd_numholes--;
atomic_subtract_int(&V_tcp_sack_globalholes, 1);
- KASSERT(tp->snd_numholes >= 0, ("tp->snd_numholes >= 0"));
- KASSERT(V_tcp_sack_globalholes >= 0, ("tcp_sack_globalholes >= 0"));
+ KASSERT(tp->snd_numholes >= 0, ("tp->snd_numholes < 0"));
+ KASSERT(V_tcp_sack_globalholes >= 0, ("tcp_sack_globalholes < 0"));
}
/*
@@ -583,6 +583,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
*/
if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
left_edge_delta = th_ack - tp->snd_una;
+ delivered_data += left_edge_delta;
sack_blocks[num_sack_blks].start = tp->snd_una;
sack_blocks[num_sack_blks++].end = th_ack;
/*
@@ -590,7 +591,6 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
* due to DSACK blocks
*/
if (SEQ_LT(tp->snd_fack, th_ack)) {
- delivered_data += th_ack - tp->snd_una;
tp->snd_fack = th_ack;
sack_changed = SACK_CHANGE;
}
@@ -684,7 +684,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
delivered_data += sblkp->end - sblkp->start;
tp->sackhint.hole_bytes += temp->end - temp->start;
KASSERT(tp->sackhint.hole_bytes >= 0,
- ("sackhint hole bytes >= 0"));
+ ("sackhint hole bytes < 0"));
tp->snd_fack = sblkp->end;
sblkp--;
sack_changed = SACK_NEWLOSS;
@@ -783,7 +783,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
tp->sackhint.sack_bytes_rexmit -=
(SEQ_MIN(cur->rxmit, cur->end) - cur->start);
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
- ("sackhint bytes rtx >= 0"));
+ ("sackhint bytes rtx < 0"));
sack_changed = SACK_CHANGE;
if (SEQ_LEQ(sblkp->start, cur->start)) {
/* Data acks at least the beginning of hole. */
@@ -874,13 +874,13 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
if (TAILQ_EMPTY(&tp->snd_holes)) {
KASSERT(tp->sackhint.hole_bytes == 0,
- ("SACK scoreboard empty, but accounting non-zero\n"));
+ ("SACK scoreboard empty, but sackhint hole bytes != 0"));
tp->sackhint.sack_bytes_rexmit = 0;
tp->sackhint.sacked_bytes = 0;
tp->sackhint.lost_bytes = 0;
} else {
KASSERT(tp->sackhint.hole_bytes > 0,
- ("SACK scoreboard not empty, but has no bytes\n"));
+ ("SACK scoreboard not empty, but sackhint hole bytes <= 0"));
tp->sackhint.delivered_data = delivered_data;
tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
KASSERT((tp->sackhint.sacked_bytes >= 0), ("sacked_bytes < 0"));
@@ -918,9 +918,9 @@ tcp_free_sackholes(struct tcpcb *tp)
tp->sackhint.hole_bytes = 0;
tp->sackhint.lost_bytes = 0;
- KASSERT(tp->snd_numholes == 0, ("tp->snd_numholes == 0"));
+ KASSERT(tp->snd_numholes == 0, ("tp->snd_numholes != 0"));
KASSERT(tp->sackhint.nexthole == NULL,
- ("tp->sackhint.nexthole == NULL"));
+ ("tp->sackhint.nexthole != NULL"));
}
/*
@@ -1061,11 +1061,15 @@ tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
}
}
}
- KASSERT(SEQ_LT(hole->start, hole->end), ("%s: hole.start >= hole.end", __func__));
+ KASSERT(SEQ_LT(hole->start, hole->end),
+ ("%s: SEQ_GEQ(hole.start, hole.end)", __func__));
if (!(V_tcp_do_newsack)) {
- KASSERT(SEQ_LT(hole->start, tp->snd_fack), ("%s: hole.start >= snd.fack", __func__));
- KASSERT(SEQ_LT(hole->end, tp->snd_fack), ("%s: hole.end >= snd.fack", __func__));
- KASSERT(SEQ_LT(hole->rxmit, tp->snd_fack), ("%s: hole.rxmit >= snd.fack", __func__));
+ KASSERT(SEQ_LT(hole->start, tp->snd_fack),
+ ("%s: SEG_GEQ(hole.start, snd.fack)", __func__));
+ KASSERT(SEQ_LT(hole->end, tp->snd_fack),
+ ("%s: SEG_GEQ(hole.end, snd.fack)", __func__));
+ KASSERT(SEQ_LT(hole->rxmit, tp->snd_fack),
+ ("%s: SEQ_GEQ(hole.rxmit, snd.fack)", __func__));
if (SEQ_GEQ(hole->start, hole->end) ||
SEQ_GEQ(hole->start, tp->snd_fack) ||
SEQ_GEQ(hole->end, tp->snd_fack) ||
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c
index d1c4ba58bf55..fb013d3d17f0 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -509,11 +509,9 @@ void
ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th,
int32_t rstreason, int32_t tlen)
{
- if (tp != NULL) {
- tcp_dropwithreset(m, th, tp, tlen, rstreason);
+ tcp_dropwithreset(m, th, tp, tlen, rstreason);
+ if (tp != NULL)
INP_WUNLOCK(tptoinpcb(tp));
- } else
- tcp_dropwithreset(m, th, NULL, tlen, rstreason);
}
void
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 4801b3e2c766..3fa7789efcfe 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -621,7 +621,7 @@ static void
pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk)
{
- switch (pd->proto) {
+ switch (pd->virtual_proto) {
case IPPROTO_TCP: {
struct tcphdr *th = &pd->hdr.tcp;
@@ -1254,6 +1254,21 @@ pf_initialize(void)
MTX_DEF | MTX_DUPOK);
}
+ /* Anchors */
+ V_pf_anchor_z = uma_zcreate("pf anchors",
+ sizeof(struct pf_kanchor), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ V_pf_limits[PF_LIMIT_ANCHORS].zone = V_pf_anchor_z;
+ uma_zone_set_max(V_pf_anchor_z, PF_ANCHOR_HIWAT);
+ uma_zone_set_warning(V_pf_anchor_z, "PF anchor limit reached");
+
+ V_pf_eth_anchor_z = uma_zcreate("pf Ethernet anchors",
+ sizeof(struct pf_keth_anchor), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ V_pf_limits[PF_LIMIT_ETH_ANCHORS].zone = V_pf_eth_anchor_z;
+ uma_zone_set_max(V_pf_eth_anchor_z, PF_ANCHOR_HIWAT);
+ uma_zone_set_warning(V_pf_eth_anchor_z, "PF Ethernet anchor limit reached");
+
/* ALTQ */
TAILQ_INIT(&V_pf_altqs[0]);
TAILQ_INIT(&V_pf_altqs[1]);
@@ -6376,7 +6391,7 @@ pf_translate_compat(struct pf_test_ctx *ctx)
KASSERT(ctx->sk != NULL, ("%s: null sk", __func__));
KASSERT(ctx->nk != NULL, ("%s: null nk", __func__));
- switch (pd->proto) {
+ switch (pd->virtual_proto) {
case IPPROTO_TCP:
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
nk->port[pd->sidx] != pd->nsport) {
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index cfff58064922..51b3fd6390e1 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -120,7 +120,8 @@ enum {
enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO };
enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
- PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX };
+ PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_ANCHORS, PF_LIMIT_ETH_ANCHORS,
+ PF_LIMIT_MAX };
#define PF_POOL_IDMASK 0x0f
enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM,
PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN };
@@ -490,6 +491,7 @@ struct pf_osfp_ioctl {
#define PF_ANCHOR_NAME_SIZE 64
#define PF_ANCHOR_MAXPATH (MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)
+#define PF_ANCHOR_HIWAT 512
#define PF_OPTIMIZER_TABLE_PFX "__automatic_"
struct pf_rule {
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index 178ee01649c6..b6f5d74b5b42 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -331,6 +331,8 @@ pfattach_vnet(void)
V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
+ V_pf_limits[PF_LIMIT_ANCHORS].limit = PF_ANCHOR_HIWAT;
+ V_pf_limits[PF_LIMIT_ETH_ANCHORS].limit = PF_ANCHOR_HIWAT;
RB_INIT(&V_pf_anchors);
pf_init_kruleset(&pf_main_ruleset);
@@ -4973,6 +4975,7 @@ DIOCCHANGEADDR_error:
goto fail;
}
PF_RULES_WLOCK();
+ io->pfrio_nadd = 0;
error = pfr_add_addrs(&io->pfrio_table, pfras,
io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
PFR_FLAG_USERIOCTL);
diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c
index c5de1e84a287..09754359ec2d 100644
--- a/sys/netpfil/pf/pf_nl.c
+++ b/sys/netpfil/pf/pf_nl.c
@@ -2082,6 +2082,123 @@ pf_handle_clear_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt)
return (error);
}
+TAILQ_HEAD(pfr_addrq, pfr_addr_item);
+struct nl_parsed_table_addrs {
+ struct pfr_table table;
+ uint32_t flags;
+ struct pfr_addr addrs[256];
+ size_t addr_count;
+ int nadd;
+ int ndel;
+};
+#define _OUT(_field) offsetof(struct pfr_addr, _field)
+static const struct nlattr_parser nla_p_pfr_addr[] = {
+ { .type = PFR_A_AF, .off = _OUT(pfra_af), .cb = nlattr_get_uint8 },
+ { .type = PFR_A_NET, .off = _OUT(pfra_net), .cb = nlattr_get_uint8 },
+ { .type = PFR_A_NOT, .off = _OUT(pfra_not), .cb = nlattr_get_bool },
+ { .type = PFR_A_ADDR, .off = _OUT(pfra_u), .cb = nlattr_get_in6_addr },
+};
+#undef _OUT
+NL_DECLARE_ATTR_PARSER(pfra_addr_parser, nla_p_pfr_addr);
+
+static int
+nlattr_get_pfr_addr(struct nlattr *nla, struct nl_pstate *npt, const void *arg,
+ void *target)
+{
+ struct nl_parsed_table_addrs *attrs = target;
+ struct pfr_addr addr = { 0 };
+ int error;
+
+ if (attrs->addr_count >= nitems(attrs->addrs))
+ return (E2BIG);
+
+ error = nlattr_get_nested(nla, npt, &pfra_addr_parser, &addr);
+ if (error != 0)
+ return (error);
+
+ memcpy(&attrs->addrs[attrs->addr_count], &addr, sizeof(addr));
+ attrs->addr_count++;
+
+ return (0);
+}
+
+NL_DECLARE_ATTR_PARSER(nested_table_parser, nla_p_table);
+
+#define _OUT(_field) offsetof(struct nl_parsed_table_addrs, _field)
+static const struct nlattr_parser nla_p_table_addr[] = {
+ { .type = PF_TA_TABLE, .off = _OUT(table), .arg = &nested_table_parser, .cb = nlattr_get_nested },
+ { .type = PF_TA_ADDR, .cb = nlattr_get_pfr_addr },
+ { .type = PF_TA_FLAGS, .off = _OUT(flags), .cb = nlattr_get_uint32 },
+};
+NL_DECLARE_PARSER(table_addr_parser, struct genlmsghdr, nlf_p_empty, nla_p_table_addr);
+#undef _OUT
+
+static int
+pf_handle_table_add_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt)
+{
+ struct nl_parsed_table_addrs attrs = { 0 };
+ struct nl_writer *nw = npt->nw;
+ struct genlmsghdr *ghdr_new;
+ int error;
+
+ error = nl_parse_nlmsg(hdr, &table_addr_parser, npt, &attrs);
+ if (error != 0)
+ return (error);
+
+ PF_RULES_WLOCK();
+ error = pfr_add_addrs(&attrs.table, &attrs.addrs[0],
+ attrs.addr_count, &attrs.nadd, attrs.flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+
+ if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr)))
+ return (ENOMEM);
+
+ ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
+ ghdr_new->cmd = PFNL_CMD_TABLE_ADD_ADDR;
+ ghdr_new->version = 0;
+ ghdr_new->reserved = 0;
+
+ nlattr_add_u32(nw, PF_TA_NBR_ADDED, attrs.nadd);
+
+ if (!nlmsg_end(nw))
+ return (ENOMEM);
+
+ return (error);
+}
+
+static int
+pf_handle_table_del_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt)
+{
+ struct nl_parsed_table_addrs attrs = { 0 };
+ struct nl_writer *nw = npt->nw;
+ struct genlmsghdr *ghdr_new;
+ int error;
+
+ error = nl_parse_nlmsg(hdr, &table_addr_parser, npt, &attrs);
+ if (error != 0)
+ return (error);
+
+ PF_RULES_WLOCK();
+ error = pfr_del_addrs(&attrs.table, &attrs.addrs[0],
+ attrs.addr_count, &attrs.ndel, attrs.flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+
+ if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr)))
+ return (ENOMEM);
+
+ ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
+ ghdr_new->cmd = PFNL_CMD_TABLE_DEL_ADDR;
+ ghdr_new->version = 0;
+ ghdr_new->reserved = 0;
+
+ nlattr_add_u32(nw, PF_TA_NBR_DELETED, attrs.ndel);
+
+ if (!nlmsg_end(nw))
+ return (ENOMEM);
+
+ return (error);
+}
+
static const struct nlhdr_parser *all_parsers[] = {
&state_parser,
&addrule_parser,
@@ -2096,6 +2213,7 @@ static const struct nlhdr_parser *all_parsers[] = {
&add_addr_parser,
&ruleset_parser,
&table_parser,
+ &table_addr_parser,
};
static uint16_t family_id;
@@ -2318,6 +2436,20 @@ static const struct genl_cmd pf_cmds[] = {
.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL,
.cmd_priv = PRIV_NETINET_PF,
},
+ {
+ .cmd_num = PFNL_CMD_TABLE_ADD_ADDR,
+ .cmd_name = "TABLE_ADD_ADDRS",
+ .cmd_cb = pf_handle_table_add_addrs,
+ .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL,
+ .cmd_priv = PRIV_NETINET_PF,
+ },
+ {
+ .cmd_num = PFNL_CMD_TABLE_DEL_ADDR,
+ .cmd_name = "TABLE_DEL_ADDRS",
+ .cmd_cb = pf_handle_table_del_addrs,
+ .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL,
+ .cmd_priv = PRIV_NETINET_PF,
+ },
};
void
diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h
index d263a0b22deb..87daac393821 100644
--- a/sys/netpfil/pf/pf_nl.h
+++ b/sys/netpfil/pf/pf_nl.h
@@ -67,6 +67,8 @@ enum {
PFNL_CMD_GET_TSTATS = 29,
PFNL_CMD_CLR_TSTATS = 30,
PFNL_CMD_CLR_ADDRS = 31,
+ PFNL_CMD_TABLE_ADD_ADDR = 32,
+ PFNL_CMD_TABLE_DEL_ADDR = 33,
__PFNL_CMD_MAX,
};
#define PFNL_CMD_MAX (__PFNL_CMD_MAX -1)
@@ -461,6 +463,23 @@ enum pf_tstats_t {
PF_TS_NZERO = 9, /* u64 */
};
+enum pfr_addr_t {
+ PFR_A_UNSPEC,
+ PFR_A_AF = 1, /* uint8_t */
+ PFR_A_NET = 2, /* uint8_t */
+ PFR_A_NOT = 3, /* bool */
+ PFR_A_ADDR = 4, /* in6_addr */
+};
+
+enum pf_table_addrs_t {
+ PF_TA_UNSPEC,
+ PF_TA_TABLE = 1, /* nested, pf_table_t */
+ PF_TA_ADDR = 2, /* nested, pfr_addr_t */
+ PF_TA_FLAGS = 3, /* u32 */
+ PF_TA_NBR_ADDED = 4, /* u32 */
+ PF_TA_NBR_DELETED = 5, /* u32 */
+};
+
#ifdef _KERNEL
void pf_nl_register(void);
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index 8cea9557633c..a684d778ab42 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -118,6 +118,8 @@ VNET_DEFINE_STATIC(uma_zone_t, pf_frnode_z);
#define V_pf_frnode_z VNET(pf_frnode_z)
VNET_DEFINE_STATIC(uma_zone_t, pf_frag_z);
#define V_pf_frag_z VNET(pf_frag_z)
+VNET_DEFINE(uma_zone_t, pf_anchor_z);
+VNET_DEFINE(uma_zone_t, pf_eth_anchor_z);
TAILQ_HEAD(pf_fragqueue, pf_fragment);
TAILQ_HEAD(pf_cachequeue, pf_fragment);
diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c
index 43b51f2933f4..039908a53126 100644
--- a/sys/netpfil/pf/pf_ruleset.c
+++ b/sys/netpfil/pf/pf_ruleset.c
@@ -238,7 +238,7 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname)
((parent != NULL) && (strlen(parent->path) >= PF_ANCHOR_MAXPATH)))
return (NULL);
- anchor = rs_malloc(sizeof(*anchor));
+ anchor = uma_zalloc(V_pf_anchor_z, M_NOWAIT | M_ZERO);
if (anchor == NULL)
return (NULL);
@@ -259,7 +259,7 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname)
printf("%s: RB_INSERT1 "
"'%s' '%s' collides with '%s' '%s'\n", __func__,
anchor->path, anchor->name, dup->path, dup->name);
- rs_free(anchor);
+ uma_zfree(V_pf_anchor_z, anchor);
return (NULL);
}
@@ -273,7 +273,7 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname)
anchor->name, dup->path, dup->name);
RB_REMOVE(pf_kanchor_global, &V_pf_anchors,
anchor);
- rs_free(anchor);
+ uma_zfree(V_pf_anchor_z, anchor);
return (NULL);
}
}
@@ -350,7 +350,7 @@ pf_remove_if_empty_kruleset(struct pf_kruleset *ruleset)
if ((parent = ruleset->anchor->parent) != NULL)
RB_REMOVE(pf_kanchor_node, &parent->children,
ruleset->anchor);
- rs_free(ruleset->anchor);
+ uma_zfree(V_pf_anchor_z, ruleset->anchor);
if (parent == NULL)
return;
ruleset = &parent->ruleset;
@@ -613,7 +613,7 @@ pf_find_or_create_keth_ruleset(const char *path)
rs_free(p);
return (NULL);
}
- anchor = (struct pf_keth_anchor *)rs_malloc(sizeof(*anchor));
+ anchor = uma_zalloc(V_pf_eth_anchor_z, M_NOWAIT | M_ZERO);
if (anchor == NULL) {
rs_free(p);
return (NULL);
@@ -631,7 +631,7 @@ pf_find_or_create_keth_ruleset(const char *path)
printf("%s: RB_INSERT1 "
"'%s' '%s' collides with '%s' '%s'\n", __func__,
anchor->path, anchor->name, dup->path, dup->name);
- rs_free(anchor);
+ uma_zfree(V_pf_eth_anchor_z, anchor);
rs_free(p);
return (NULL);
}
@@ -645,7 +645,7 @@ pf_find_or_create_keth_ruleset(const char *path)
anchor->name, dup->path, dup->name);
RB_REMOVE(pf_keth_anchor_global, &V_pf_keth_anchors,
anchor);
- rs_free(anchor);
+ uma_zfree(V_pf_eth_anchor_z, anchor);
rs_free(p);
return (NULL);
}
@@ -754,7 +754,7 @@ pf_remove_if_empty_keth_ruleset(struct pf_keth_ruleset *ruleset)
if ((parent = ruleset->anchor->parent) != NULL)
RB_REMOVE(pf_keth_anchor_node, &parent->children,
ruleset->anchor);
- rs_free(ruleset->anchor);
+ uma_zfree(V_pf_eth_anchor_z, ruleset->anchor);
if (parent == NULL)
return;
ruleset = &parent->ruleset;
diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c
index ecc185f89ad7..73ec18fa7646 100644
--- a/sys/netpfil/pf/pf_table.c
+++ b/sys/netpfil/pf/pf_table.c
@@ -294,7 +294,7 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
else
pfr_destroy_kentries(&workq);
if (nadd != NULL)
- *nadd = xadd;
+ *nadd += xadd;
pfr_destroy_ktable(tmpkt, 0);
return (0);
_bad:
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index af9cafa99dd0..9140cee56885 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -763,7 +763,6 @@ struct proc {
LIST_HEAD(, mqueue_notifier) p_mqnotifier; /* (c) mqueue notifiers.*/
struct kdtrace_proc *p_dtrace; /* (*) DTrace-specific data. */
struct cv p_pwait; /* (*) wait cv for exit/exec. */
- uint64_t p_prev_runtime; /* (c) Resource usage accounting. */
struct racct *p_racct; /* (b) Resource accounting. */
int p_throttled; /* (c) Flag for racct pcpu throttling */
/*
diff --git a/sys/sys/racct.h b/sys/sys/racct.h
index c6020b82c865..92b50353774e 100644
--- a/sys/sys/racct.h
+++ b/sys/sys/racct.h
@@ -141,13 +141,17 @@ extern bool racct_enable;
/*
* The 'racct' structure defines resource consumption for a particular
- * subject, such as process or jail.
+ * subject, such as process or jail. It also contains the total
+ * cpu time and real time of the subject, recorded at the most recent
+ * time that RACCT_PCPU was updated.
*
* This structure must be filled with zeroes initially.
*/
struct racct {
int64_t r_resources[RACCT_MAX + 1];
LIST_HEAD(, rctl_rule_link) r_rule_links;
+ uint64_t r_runtime;
+ struct timeval r_time;
};
SYSCTL_DECL(_kern_racct);