aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorErmal Luçi <eri@FreeBSD.org>2015-06-24 19:16:41 +0000
committerErmal Luçi <eri@FreeBSD.org>2015-06-24 19:16:41 +0000
commita5b789f65a306f8abd91a8b2eea77df2e77f90f6 (patch)
tree6c12b529ee86ec1dfc65e74eeb0c70085c50a35a /sys
parentcbc120b22cb0732f0f528581329734d98b053c84 (diff)
downloadsrc-a5b789f65a306f8abd91a8b2eea77df2e77f90f6.tar.gz
src-a5b789f65a306f8abd91a8b2eea77df2e77f90f6.zip
ALTQ FAIRQ discipline import from DragonFLY
Differential Revision: https://reviews.freebsd.org/D2847 Reviewed by: glebius, wblock(manpage) Approved by: gnn(mentor) Obtained from: pfSense Sponsored by: Netgate
Notes
Notes: svn path=/head/; revision=284777
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/NOTES1
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/options1
-rw-r--r--sys/net/altq/altq.h3
-rw-r--r--sys/net/altq/altq_fairq.c889
-rw-r--r--sys/net/altq/altq_fairq.h137
-rw-r--r--sys/net/altq/altq_subr.c30
-rw-r--r--sys/net/altq/altq_var.h7
-rw-r--r--sys/netpfil/pf/pf.c18
-rw-r--r--sys/netpfil/pf/pf_altq.h15
-rw-r--r--sys/netpfil/pf/pf_mtag.h1
11 files changed, 1102 insertions, 1 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 554a234e448f..5bbc29e97dad 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -709,6 +709,7 @@ options ALTQ_CBQ # Class Based Queueing
options ALTQ_RED # Random Early Detection
options ALTQ_RIO # RED In/Out
options ALTQ_HFSC # Hierarchical Packet Scheduler
+options ALTQ_FAIRQ # Fair Packet Scheduler
options ALTQ_CDNR # Traffic conditioner
options ALTQ_PRIQ # Priority Queueing
options ALTQ_NOPCC # Required if the TSC is unusable
diff --git a/sys/conf/files b/sys/conf/files
index 3296e7e2c7cb..2515071e2c28 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3254,6 +3254,7 @@ libkern/zlib.c optional crypto | geom_uzip | ipsec | \
net/altq/altq_cbq.c optional altq
net/altq/altq_cdnr.c optional altq
net/altq/altq_hfsc.c optional altq
+net/altq/altq_fairq.c optional altq
net/altq/altq_priq.c optional altq
net/altq/altq_red.c optional altq
net/altq/altq_rio.c optional altq
diff --git a/sys/conf/options b/sys/conf/options
index 511e57c0be5e..b75cd41be96f 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -389,6 +389,7 @@ ALTQ_CBQ opt_altq.h
ALTQ_CDNR opt_altq.h
ALTQ_DEBUG opt_altq.h
ALTQ_HFSC opt_altq.h
+ALTQ_FAIRQ opt_altq.h
ALTQ_NOPCC opt_altq.h
ALTQ_PRIQ opt_altq.h
ALTQ_RED opt_altq.h
diff --git a/sys/net/altq/altq.h b/sys/net/altq/altq.h
index cdf6546e810f..b6937b2951e2 100644
--- a/sys/net/altq/altq.h
+++ b/sys/net/altq/altq.h
@@ -63,7 +63,8 @@
#define ALTQT_BLUE 10 /* blue */
#define ALTQT_PRIQ 11 /* priority queue */
#define ALTQT_JOBS 12 /* JoBS */
-#define ALTQT_MAX 13 /* should be max discipline type + 1 */
+#define ALTQT_FAIRQ 13 /* fairq */
+#define ALTQT_MAX 14 /* should be max discipline type + 1 */
#ifdef ALTQ3_COMPAT
struct altqreq {
diff --git a/sys/net/altq/altq_fairq.c b/sys/net/altq/altq_fairq.c
new file mode 100644
index 000000000000..b513a671fe12
--- /dev/null
+++ b/sys/net/altq/altq_fairq.c
@@ -0,0 +1,889 @@
+/*
+ * Copyright (c) 2008 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $
+ * $FreeBSD$
+ */
+/*
+ * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
+ * fairq. The fairq algorithm is completely different then priq, of course,
+ * but because I used priq's skeleton I believe I should include priq's
+ * copyright.
+ *
+ * Copyright (C) 2000-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * FAIRQ - take traffic classified by keep state (hashed into
+ * mbuf->m_pkthdr.altq_state_hash) and bucketize it. Fairly extract
+ * the first packet from each bucket in a round-robin fashion.
+ *
+ * TODO - better overall qlimit support (right now it is per-bucket).
+ * - NOTE: red etc is per bucket, not overall.
+ * - better service curve support.
+ *
+ * EXAMPLE:
+ *
+ * altq on em0 fairq bandwidth 650Kb queue { std, bulk }
+ * queue std priority 3 bandwidth 400Kb \
+ * fairq (buckets 64, default, hogs 1Kb) qlimit 50
+ * queue bulk priority 2 bandwidth 100Kb \
+ * fairq (buckets 64, hogs 1Kb) qlimit 50
+ *
+ * pass out on em0 from any to any keep state queue std
+ * pass out on em0 inet proto tcp ..... port ... keep state queue bulk
+ */
+#include "opt_altq.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <net/pfvar.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_fairq.h>
+
+/*
+ * function prototypes
+ */
+static int fairq_clear_interface(struct fairq_if *);
+static int fairq_request(struct ifaltq *, int, void *);
+static void fairq_purge(struct fairq_if *);
+static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int);
+static int fairq_class_destroy(struct fairq_class *);
+static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *fairq_dequeue(struct ifaltq *, int);
+
+static int fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t);
+static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
+static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
+static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
+static void fairq_purgeq(struct fairq_class *);
+
+static void get_class_stats(struct fairq_classstats *, struct fairq_class *);
+static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
+
+int
+fairq_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+
+ error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc,
+ fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL);
+
+ return (error);
+}
+
+int
+fairq_add_altq(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+
+ pif = malloc(sizeof(struct fairq_if),
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ pif->pif_bandwidth = a->ifbandwidth;
+ pif->pif_maxpri = -1;
+ pif->pif_ifq = &ifp->if_snd;
+
+ /* keep the state in pf_altq */
+ a->altq_disc = pif;
+
+ return (0);
+}
+
+int
+fairq_remove_altq(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ fairq_clear_interface(pif);
+
+ free(pif, M_DEVBUF);
+ return (0);
+}
+
+int
+fairq_add_queue(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+ struct fairq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ /* check parameters */
+ if (a->priority >= FAIRQ_MAXPRI)
+ return (EINVAL);
+ if (a->qid == 0)
+ return (EINVAL);
+ if (pif->pif_classes[a->priority] != NULL)
+ return (EBUSY);
+ if (clh_to_clp(pif, a->qid) != NULL)
+ return (EBUSY);
+
+ cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
+ &a->pq_u.fairq_opts, a->qid);
+ if (cl == NULL)
+ return (ENOMEM);
+
+ return (0);
+}
+
+int
+fairq_remove_queue(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+ struct fairq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ return (fairq_class_destroy(cl));
+}
+
+int
+fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct fairq_if *pif;
+ struct fairq_class *cl;
+ struct fairq_classstats stats;
+ int error = 0;
+
+ if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+fairq_clear_interface(struct fairq_if *pif)
+{
+ struct fairq_class *cl;
+ int pri;
+
+ /* clear out the classes */
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ if ((cl = pif->pif_classes[pri]) != NULL)
+ fairq_class_destroy(cl);
+ }
+
+ return (0);
+}
+
+static int
+fairq_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ fairq_purge(pif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+fairq_purge(struct fairq_if *pif)
+{
+ struct fairq_class *cl;
+ int pri;
+
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
+ fairq_purgeq(cl);
+ }
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ pif->pif_ifq->ifq_len = 0;
+}
+
+static struct fairq_class *
+fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
+ u_int bandwidth, struct fairq_opts *opts, int qid)
+{
+ struct fairq_class *cl;
+ int flags = opts->flags;
+ u_int nbuckets = opts->nbuckets;
+ int i;
+
+#ifndef ALTQ_RED
+ if (flags & FARF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("fairq_class_create: RED not configured for FAIRQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+ if (nbuckets == 0)
+ nbuckets = 256;
+ if (nbuckets > FAIRQ_MAX_BUCKETS)
+ nbuckets = FAIRQ_MAX_BUCKETS;
+ /* enforce power-of-2 size */
+ while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
+ ++nbuckets;
+
+ if ((cl = pif->pif_classes[pri]) != NULL) {
+ /* modify the class instead of creating a new one */
+ IFQ_LOCK(cl->cl_pif->pif_ifq);
+ if (cl->cl_head)
+ fairq_purgeq(cl);
+ IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ red_destroy(cl->cl_red);
+#endif
+ } else {
+ cl = malloc(sizeof(struct fairq_class),
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ cl->cl_nbuckets = nbuckets;
+ cl->cl_nbucket_mask = nbuckets - 1;
+
+ cl->cl_buckets = malloc(
+ sizeof(struct fairq_bucket) * cl->cl_nbuckets,
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ cl->cl_head = NULL;
+ }
+
+ pif->pif_classes[pri] = cl;
+ if (flags & FARF_DEFAULTCLASS)
+ pif->pif_default = cl;
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ cl->cl_qlimit = qlimit;
+ for (i = 0; i < cl->cl_nbuckets; ++i) {
+ qlimit(&cl->cl_buckets[i].queue) = qlimit;
+ }
+ cl->cl_bandwidth = bandwidth / 8;
+ cl->cl_qtype = Q_DROPTAIL;
+ cl->cl_flags = flags & FARF_USERFLAGS;
+ cl->cl_pri = pri;
+ if (pri > pif->pif_maxpri)
+ pif->pif_maxpri = pri;
+ cl->cl_pif = pif;
+ cl->cl_handle = qid;
+ cl->cl_hogs_m1 = opts->hogs_m1 / 8;
+ cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */
+
+#ifdef ALTQ_RED
+ if (flags & (FARF_RED|FARF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & FARF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & FARF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (pif->pif_bandwidth < 8)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
+#ifdef ALTQ_RIO
+ if (flags & FARF_RIO) {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ cl->cl_qtype = Q_RIO;
+ } else
+#endif
+ if (flags & FARF_RED) {
+ cl->cl_red = red_alloc(0, 0,
+ cl->cl_qlimit * 10/100,
+ cl->cl_qlimit * 30/100,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ cl->cl_qtype = Q_RED;
+ }
+ }
+#endif /* ALTQ_RED */
+
+ return (cl);
+
+err_buckets:
+ if (cl->cl_buckets != NULL)
+ free(cl->cl_buckets, M_DEVBUF);
+err_ret:
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ red_destroy(cl->cl_red);
+#endif
+ }
+ if (cl != NULL)
+ free(cl, M_DEVBUF);
+ return (NULL);
+}
+
+static int
+fairq_class_destroy(struct fairq_class *cl)
+{
+ struct fairq_if *pif;
+ int pri;
+
+ IFQ_LOCK(cl->cl_pif->pif_ifq);
+
+ if (cl->cl_head)
+ fairq_purgeq(cl);
+
+ pif = cl->cl_pif;
+ pif->pif_classes[cl->cl_pri] = NULL;
+ if (pif->pif_poll_cache == cl)
+ pif->pif_poll_cache = NULL;
+ if (pif->pif_maxpri == cl->cl_pri) {
+ for (pri = cl->cl_pri; pri >= 0; pri--)
+ if (pif->pif_classes[pri] != NULL) {
+ pif->pif_maxpri = pri;
+ break;
+ }
+ if (pri < 0)
+ pif->pif_maxpri = -1;
+ }
+ IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ red_destroy(cl->cl_red);
+#endif
+ }
+ free(cl->cl_buckets, M_DEVBUF);
+ free(cl, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * fairq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+ struct fairq_class *cl = NULL; /* Make compiler happy */
+ struct pf_mtag *t;
+ u_int32_t qid_hash = 0;
+ int len;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+
+ if ((t = pf_find_mtag(m)) != NULL) {
+ cl = clh_to_clp(pif, t->qid);
+ qid_hash = t->qid_hash;
+ }
+ if (cl == NULL) {
+ cl = pif->pif_default;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+ cl->cl_flags |= FARF_HAS_PACKETS;
+ cl->cl_pktattr = NULL;
+ len = m_pktlen(m);
+ if (fairq_addq(cl, m, qid_hash) != 0) {
+ /* drop occurred. mbuf was freed in fairq_addq. */
+ PKTCNTR_ADD(&cl->cl_dropcnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+
+ return (0);
+}
+
+/*
+ * fairq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+fairq_dequeue(struct ifaltq *ifq, int op)
+{
+ struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+ struct fairq_class *cl;
+ struct fairq_class *best_cl;
+ struct mbuf *best_m;
+ struct mbuf *m = NULL;
+ uint64_t cur_time = read_machclk();
+ int pri;
+ int hit_limit;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (IFQ_IS_EMPTY(ifq)) {
+ return (NULL);
+ }
+
+ if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
+ best_cl = pif->pif_poll_cache;
+ m = fairq_getq(best_cl, cur_time);
+ pif->pif_poll_cache = NULL;
+ if (m) {
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
+ return (m);
+ }
+ } else {
+ best_cl = NULL;
+ best_m = NULL;
+
+ for (pri = pif->pif_maxpri; pri >= 0; pri--) {
+ if ((cl = pif->pif_classes[pri]) == NULL)
+ continue;
+ if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
+ continue;
+ m = fairq_pollq(cl, cur_time, &hit_limit);
+ if (m == NULL) {
+ cl->cl_flags &= ~FARF_HAS_PACKETS;
+ continue;
+ }
+
+ /*
+ * Only override the best choice if we are under
+ * the BW limit.
+ */
+ if (hit_limit == 0 || best_cl == NULL) {
+ best_cl = cl;
+ best_m = m;
+ }
+
+ /*
+ * Remember the highest priority mbuf in case we
+ * do not find any lower priority mbufs.
+ */
+ if (hit_limit)
+ continue;
+ break;
+ }
+ if (op == ALTDQ_POLL) {
+ pif->pif_poll_cache = best_cl;
+ m = best_m;
+ } else if (best_cl) {
+ m = fairq_getq(best_cl, cur_time);
+ if (m != NULL) {
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
+ }
+ }
+ return (m);
+ }
+ return (NULL);
+}
+
+static int
+fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid)
+{
+ fairq_bucket_t *b;
+ u_int hindex;
+ uint64_t bw;
+
+ /*
+ * If the packet doesn't have any keep state put it on the end of
+ * our queue. XXX this can result in out of order delivery.
+ */
+ if (bucketid == 0) {
+ if (cl->cl_head)
+ b = cl->cl_head->prev;
+ else
+ b = &cl->cl_buckets[0];
+ } else {
+ hindex = bucketid & cl->cl_nbucket_mask;
+ b = &cl->cl_buckets[hindex];
+ }
+
+ /*
+ * Add the bucket to the end of the circular list of active buckets.
+ *
+ * As a special case we add the bucket to the beginning of the list
+ * instead of the end if it was not previously on the list and if
+ * its traffic is less then the hog level.
+ */
+ if (b->in_use == 0) {
+ b->in_use = 1;
+ if (cl->cl_head == NULL) {
+ cl->cl_head = b;
+ b->next = b;
+ b->prev = b;
+ } else {
+ b->next = cl->cl_head;
+ b->prev = cl->cl_head->prev;
+ b->prev->next = b;
+ b->next->prev = b;
+
+ if (b->bw_delta && cl->cl_hogs_m1) {
+ bw = b->bw_bytes * machclk_freq / b->bw_delta;
+ if (bw < cl->cl_hogs_m1)
+ cl->cl_head = b;
+ }
+ }
+ }
+
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
+#endif
+ if (qlen(&b->queue) >= qlimit(&b->queue)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & FARF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(&b->queue, m);
+
+ return (0);
+}
+
+static struct mbuf *
+fairq_getq(struct fairq_class *cl, uint64_t cur_time)
+{
+ fairq_bucket_t *b;
+ struct mbuf *m;
+
+ b = fairq_selectq(cl, 0);
+ if (b == NULL)
+ m = NULL;
+#ifdef ALTQ_RIO
+ else if (cl->cl_qtype == Q_RIO)
+ m = rio_getq((rio_t *)cl->cl_red, &b->queue);
+#endif
+#ifdef ALTQ_RED
+ else if (cl->cl_qtype == Q_RED)
+ m = red_getq(cl->cl_red, &b->queue);
+#endif
+ else
+ m = _getq(&b->queue);
+
+ /*
+ * Calculate the BW change
+ */
+ if (m != NULL) {
+ uint64_t delta;
+
+ /*
+ * Per-class bandwidth calculation
+ */
+ delta = (cur_time - cl->cl_last_time);
+ if (delta > machclk_freq * 8)
+ delta = machclk_freq * 8;
+ cl->cl_bw_delta += delta;
+ cl->cl_bw_bytes += m->m_pkthdr.len;
+ cl->cl_last_time = cur_time;
+ cl->cl_bw_delta -= cl->cl_bw_delta >> 3;
+ cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3;
+
+ /*
+ * Per-bucket bandwidth calculation
+ */
+ delta = (cur_time - b->last_time);
+ if (delta > machclk_freq * 8)
+ delta = machclk_freq * 8;
+ b->bw_delta += delta;
+ b->bw_bytes += m->m_pkthdr.len;
+ b->last_time = cur_time;
+ b->bw_delta -= b->bw_delta >> 3;
+ b->bw_bytes -= b->bw_bytes >> 3;
+ }
+ return(m);
+}
+
+/*
+ * Figure out what the next packet would be if there were no limits. If
+ * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
+ * it is set to 0. A non-NULL mbuf is returned either way.
+ */
+static struct mbuf *
+fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
+{
+ fairq_bucket_t *b;
+ struct mbuf *m;
+ uint64_t delta;
+ uint64_t bw;
+
+ *hit_limit = 0;
+ b = fairq_selectq(cl, 1);
+ if (b == NULL)
+ return(NULL);
+ m = qhead(&b->queue);
+
+ /*
+ * Did this packet exceed the class bandwidth? Calculate the
+ * bandwidth component of the packet.
+ *
+ * - Calculate bytes per second
+ */
+ delta = cur_time - cl->cl_last_time;
+ if (delta > machclk_freq * 8)
+ delta = machclk_freq * 8;
+ cl->cl_bw_delta += delta;
+ cl->cl_last_time = cur_time;
+ if (cl->cl_bw_delta) {
+ bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
+
+ if (bw > cl->cl_bandwidth)
+ *hit_limit = 1;
+#ifdef ALTQ_DEBUG
+ printf("BW %6lld relative to %6u %d queue %p\n",
+ bw, cl->cl_bandwidth, *hit_limit, b);
+#endif
+ }
+ return(m);
+}
+
+/*
+ * Locate the next queue we want to pull a packet out of. This code
+ * is also responsible for removing empty buckets from the circular list.
+ */
+static
+fairq_bucket_t *
+fairq_selectq(struct fairq_class *cl, int ispoll)
+{
+ fairq_bucket_t *b;
+ uint64_t bw;
+
+ if (ispoll == 0 && cl->cl_polled) {
+ b = cl->cl_polled;
+ cl->cl_polled = NULL;
+ return(b);
+ }
+
+ while ((b = cl->cl_head) != NULL) {
+ /*
+ * Remove empty queues from consideration
+ */
+ if (qempty(&b->queue)) {
+ b->in_use = 0;
+ cl->cl_head = b->next;
+ if (cl->cl_head == b) {
+ cl->cl_head = NULL;
+ } else {
+ b->next->prev = b->prev;
+ b->prev->next = b->next;
+ }
+ continue;
+ }
+
+ /*
+ * Advance the round robin. Queues with bandwidths less
+ * then the hog bandwidth are allowed to burst.
+ */
+ if (cl->cl_hogs_m1 == 0) {
+ cl->cl_head = b->next;
+ } else if (b->bw_delta) {
+ bw = b->bw_bytes * machclk_freq / b->bw_delta;
+ if (bw >= cl->cl_hogs_m1) {
+ cl->cl_head = b->next;
+ }
+ /*
+ * XXX TODO -
+ */
+ }
+
+ /*
+ * Return bucket b.
+ */
+ break;
+ }
+ if (ispoll)
+ cl->cl_polled = b;
+ return(b);
+}
+
+static void
+fairq_purgeq(struct fairq_class *cl)
+{
+ fairq_bucket_t *b;
+ struct mbuf *m;
+
+ while ((b = fairq_selectq(cl, 0)) != NULL) {
+ while ((m = _getq(&b->queue)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
+ m_freem(m);
+ }
+ ASSERT(qlen(&b->queue) == 0);
+ }
+}
+
+static void
+get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
+{
+ fairq_bucket_t *b;
+
+ sp->class_handle = cl->cl_handle;
+ sp->qlimit = cl->cl_qlimit;
+ sp->xmit_cnt = cl->cl_xmitcnt;
+ sp->drop_cnt = cl->cl_dropcnt;
+ sp->qtype = cl->cl_qtype;
+ sp->qlength = 0;
+
+ if (cl->cl_head) {
+ b = cl->cl_head;
+ do {
+ sp->qlength += qlen(&b->queue);
+ b = b->next;
+ } while (b != cl->cl_head);
+ }
+
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct fairq_class *
+clh_to_clp(struct fairq_if *pif, uint32_t chandle)
+{
+ struct fairq_class *cl;
+ int idx;
+
+ if (chandle == 0)
+ return (NULL);
+
+ for (idx = pif->pif_maxpri; idx >= 0; idx--)
+ if ((cl = pif->pif_classes[idx]) != NULL &&
+ cl->cl_handle == chandle)
+ return (cl);
+
+ return (NULL);
+}
+
+#endif /* ALTQ_FAIRQ */
diff --git a/sys/net/altq/altq_fairq.h b/sys/net/altq/altq_fairq.h
new file mode 100644
index 000000000000..d89002e2860d
--- /dev/null
+++ b/sys/net/altq/altq_fairq.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2008 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $DragonFly: src/sys/net/altq/altq_fairq.h,v 1.1 2008/04/06 18:58:15 dillon Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_FAIRQ_H_
+#define _ALTQ_ALTQ_FAIRQ_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+#include <net/altq/altq_rmclass.h>
+
+#define FAIRQ_MAX_BUCKETS 2048 /* maximum number of sorting buckets */
+#define FAIRQ_MAXPRI RM_MAXPRIO
+#define FAIRQ_BITMAP_WIDTH (sizeof(fairq_bitmap_t)*8)
+#define FAIRQ_BITMAP_MASK (FAIRQ_BITMAP_WIDTH - 1)
+
+/* fairq class flags */
+#define FARF_RED 0x0001 /* use RED */
+#define FARF_ECN 0x0002 /* use RED/ECN */
+#define FARF_RIO 0x0004 /* use RIO */
+#define FARF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define FARF_DEFAULTCLASS 0x1000 /* default class */
+
+#define FARF_HAS_PACKETS 0x2000 /* might have queued packets */
+
+#define FARF_USERFLAGS (FARF_RED|FARF_ECN|FARF_RIO|FARF_CLEARDSCP| \
+ FARF_DEFAULTCLASS)
+
+/* special class handles */
+#define FAIRQ_NULLCLASS_HANDLE 0
+
+typedef u_int fairq_bitmap_t;
+
+struct fairq_classstats {
+ uint32_t class_handle;
+
+ u_int qlength;
+ u_int qlimit;
+ struct pktcntr xmit_cnt; /* transmitted packet counter */
+ struct pktcntr drop_cnt; /* dropped packet counter */
+
+ /* red and rio related info */
+ int qtype;
+ struct redstats red[3]; /* rio has 3 red stats */
+};
+
+#ifdef _KERNEL
+
+typedef struct fairq_bucket {
+ struct fairq_bucket *next; /* circular list */
+ struct fairq_bucket *prev; /* circular list */
+ class_queue_t queue; /* the actual queue */
+ uint64_t bw_bytes; /* statistics used to calculate bw */
+ uint64_t bw_delta; /* statistics used to calculate bw */
+ uint64_t last_time;
+ int in_use;
+} fairq_bucket_t;
+
+struct fairq_class {
+ uint32_t cl_handle; /* class handle */
+ u_int cl_nbuckets; /* (power of 2) */
+ u_int cl_nbucket_mask; /* bucket mask */
+ fairq_bucket_t *cl_buckets;
+ fairq_bucket_t *cl_head; /* head of circular bucket list */
+ fairq_bucket_t *cl_polled;
+ struct red *cl_red; /* RED state */
+ u_int cl_hogs_m1;
+ u_int cl_lssc_m1;
+ u_int cl_bandwidth;
+ uint64_t cl_bw_bytes;
+ uint64_t cl_bw_delta;
+ uint64_t cl_last_time;
+ int cl_qtype; /* rollup */
+ int cl_qlimit;
+ int cl_pri; /* priority */
+ int cl_flags; /* class flags */
+ struct fairq_if *cl_pif; /* back pointer to pif */
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ /* round robin index */
+
+ /* statistics */
+ struct pktcntr cl_xmitcnt; /* transmitted packet counter */
+ struct pktcntr cl_dropcnt; /* dropped packet counter */
+};
+
+/*
+ * fairq interface state
+ */
+struct fairq_if {
+ struct fairq_if *pif_next; /* interface state list */
+ struct ifaltq *pif_ifq; /* backpointer to ifaltq */
+ u_int pif_bandwidth; /* link bandwidth in bps */
+ int pif_maxpri; /* max priority in use */
+ struct fairq_class *pif_poll_cache;/* cached poll */
+ struct fairq_class *pif_default; /* default class */
+ struct fairq_class *pif_classes[FAIRQ_MAXPRI]; /* classes */
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_FAIRQ_H_ */
diff --git a/sys/net/altq/altq_subr.c b/sys/net/altq/altq_subr.c
index 0c3da66781ac..2d10c8c12721 100644
--- a/sys/net/altq/altq_subr.c
+++ b/sys/net/altq/altq_subr.c
@@ -507,6 +507,11 @@ altq_pfattach(struct pf_altq *a)
error = hfsc_pfattach(a);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_pfattach(a);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -578,6 +583,11 @@ altq_add(struct pf_altq *a)
error = hfsc_add_altq(a);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_add_altq(a);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -614,6 +624,11 @@ altq_remove(struct pf_altq *a)
error = hfsc_remove_altq(a);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_remove_altq(a);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -647,6 +662,11 @@ altq_add_queue(struct pf_altq *a)
error = hfsc_add_queue(a);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_add_queue(a);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -680,6 +700,11 @@ altq_remove_queue(struct pf_altq *a)
error = hfsc_remove_queue(a);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_remove_queue(a);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -713,6 +738,11 @@ altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
error = hfsc_getqstats(a, ubuf, nbytes);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_getqstats(a, ubuf, nbytes);
+ break;
+#endif
default:
error = ENXIO;
}
diff --git a/sys/net/altq/altq_var.h b/sys/net/altq/altq_var.h
index df121444312d..87c292c6a02e 100644
--- a/sys/net/altq/altq_var.h
+++ b/sys/net/altq/altq_var.h
@@ -227,5 +227,12 @@ int hfsc_add_queue(struct pf_altq *);
int hfsc_remove_queue(struct pf_altq *);
int hfsc_getqstats(struct pf_altq *, void *, int *);
+int fairq_pfattach(struct pf_altq *);
+int fairq_add_altq(struct pf_altq *);
+int fairq_remove_altq(struct pf_altq *);
+int fairq_add_queue(struct pf_altq *);
+int fairq_remove_queue(struct pf_altq *);
+int fairq_getqstats(struct pf_altq *, void *, int *);
+
#endif /* _KERNEL */
#endif /* _ALTQ_ALTQ_VAR_H_ */
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 5652365a0013..81d010fb17fd 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -439,6 +439,20 @@ pf_hashsrc(struct pf_addr *addr, sa_family_t af)
return (h & pf_srchashmask);
}
+#ifdef ALTQ
+static int
+pf_state_hash(struct pf_state *s)
+{
+ u_int32_t hv = (intptr_t)s / sizeof(*s);
+
+ hv ^= crc32(&s->src, sizeof(s->src));
+ hv ^= crc32(&s->dst, sizeof(s->dst));
+ if (hv == 0)
+ hv = 1;
+ return (hv);
+}
+#endif
+
#ifdef INET6
void
pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
@@ -5900,6 +5914,8 @@ done:
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
} else {
+ if (s != NULL)
+ pd.pf_mtag->qid_hash = pf_state_hash(s);
if (pqid || (pd.tos & IPTOS_LOWDELAY))
pd.pf_mtag->qid = r->pqid;
else
@@ -6332,6 +6348,8 @@ done:
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
} else {
+ if (s != NULL)
+ pd.pf_mtag->qid_hash = pf_state_hash(s);
if (pd.tos & IPTOS_LOWDELAY)
pd.pf_mtag->qid = r->pqid;
else
diff --git a/sys/netpfil/pf/pf_altq.h b/sys/netpfil/pf/pf_altq.h
index eda09651d643..db681fbb5d7c 100644
--- a/sys/netpfil/pf/pf_altq.h
+++ b/sys/netpfil/pf/pf_altq.h
@@ -65,6 +65,20 @@ struct hfsc_opts {
int flags;
};
+/*
+ * XXX this needs some work
+ */
+struct fairq_opts {
+ u_int nbuckets;
+ u_int hogs_m1;
+ int flags;
+
+ /* link sharing service curve */
+ u_int lssc_m1;
+ u_int lssc_d;
+ u_int lssc_m2;
+};
+
struct pf_altq {
char ifname[IFNAMSIZ];
@@ -91,6 +105,7 @@ struct pf_altq {
struct cbq_opts cbq_opts;
struct priq_opts priq_opts;
struct hfsc_opts hfsc_opts;
+ struct fairq_opts fairq_opts;
} pq_u;
uint32_t qid; /* return value */
diff --git a/sys/netpfil/pf/pf_mtag.h b/sys/netpfil/pf/pf_mtag.h
index 3aacb2e18c4e..fd8554aebe98 100644
--- a/sys/netpfil/pf/pf_mtag.h
+++ b/sys/netpfil/pf/pf_mtag.h
@@ -44,6 +44,7 @@
struct pf_mtag {
void *hdr; /* saved hdr pos in mbuf, for ECN */
u_int32_t qid; /* queue id */
+ u_int32_t qid_hash; /* queue hashid used by WFQ like algos */
u_int16_t tag; /* tag id */
u_int8_t flags;
u_int8_t routed;