aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Motin <mav@FreeBSD.org>2019-12-29 21:16:03 +0000
committerAlexander Motin <mav@FreeBSD.org>2019-12-29 21:16:03 +0000
commit86c06ff8864bc0e2233980c741b689714000850d (patch)
tree7a17d62013a575a1a0a8eaf6d509c5e0d8b2907f
parent2a73387f1c342808c4939305f019deea9245ff25 (diff)
Notes
-rw-r--r--lib/geom/Makefile.classes1
-rw-r--r--lib/geom/sched/Makefile9
-rw-r--r--lib/geom/sched/Makefile.depend19
-rw-r--r--lib/geom/sched/geom_sched.c128
-rw-r--r--lib/geom/sched/gsched.8162
-rw-r--r--sys/geom/geom.h13
-rw-r--r--sys/geom/geom_io.c77
-rw-r--r--sys/geom/sched/README142
-rw-r--r--sys/geom/sched/g_sched.c1729
-rw-r--r--sys/geom/sched/g_sched.h111
-rw-r--r--sys/geom/sched/gs_delay.c264
-rw-r--r--sys/geom/sched/gs_rr.c701
-rw-r--r--sys/geom/sched/gs_scheduler.h239
-rw-r--r--sys/modules/geom/Makefile1
-rw-r--r--sys/modules/geom/geom_sched/Makefile5
-rw-r--r--sys/modules/geom/geom_sched/Makefile.inc9
-rw-r--r--sys/modules/geom/geom_sched/gs_sched/Makefile6
-rw-r--r--sys/modules/geom/geom_sched/gsched_delay/Makefile7
-rw-r--r--sys/modules/geom/geom_sched/gsched_rr/Makefile7
-rw-r--r--sys/sys/bio.h7
-rw-r--r--sys/sys/ktr_class.h2
-rw-r--r--sys/sys/param.h2
22 files changed, 4 insertions, 3637 deletions
diff --git a/lib/geom/Makefile.classes b/lib/geom/Makefile.classes
index a8c4c81ba6e8..fcaa748825ee 100644
--- a/lib/geom/Makefile.classes
+++ b/lib/geom/Makefile.classes
@@ -20,7 +20,6 @@ GEOM_CLASSES+= nop
GEOM_CLASSES+= part
GEOM_CLASSES+= raid
GEOM_CLASSES+= raid3
-GEOM_CLASSES+= sched
GEOM_CLASSES+= shsec
GEOM_CLASSES+= stripe
GEOM_CLASSES+= virstor
diff --git a/lib/geom/sched/Makefile b/lib/geom/sched/Makefile
deleted file mode 100644
index ad3f5b131a18..000000000000
--- a/lib/geom/sched/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-# GEOM_LIBRARY_PATH
-# $FreeBSD$
-
-PACKAGE=runtime
-.PATH: ${.CURDIR:H:H}/misc
-
-GEOM_CLASS= sched
-
-.include <bsd.lib.mk>
diff --git a/lib/geom/sched/Makefile.depend b/lib/geom/sched/Makefile.depend
deleted file mode 100644
index fb5f86e931fb..000000000000
--- a/lib/geom/sched/Makefile.depend
+++ /dev/null
@@ -1,19 +0,0 @@
-# $FreeBSD$
-# Autogenerated - do NOT edit!
-
-DIRDEPS = \
- gnu/lib/csu \
- include \
- include/xlocale \
- lib/${CSU_DIR} \
- lib/libc \
- lib/libcompiler_rt \
- lib/libgeom \
- sbin/geom/core \
-
-
-.include <dirdeps.mk>
-
-.if ${DEP_RELDIR} == ${_DEP_RELDIR}
-# local dependencies - needed for -jN in clean tree
-.endif
diff --git a/lib/geom/sched/geom_sched.c b/lib/geom/sched/geom_sched.c
deleted file mode 100644
index 7b7b281d7d55..000000000000
--- a/lib/geom/sched/geom_sched.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009 Fabio Checconi
- * Copyright (c) 2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * This file implements the userspace library used by the 'geom'
- * command to load and manipulate disk schedulers.
- */
-
-#include <sys/cdefs.h>
-#include <sys/param.h>
-#include <sys/linker.h>
-#include <sys/module.h>
-
-#include <stdio.h>
-#include <stdint.h>
-#include <libgeom.h>
-
-#include "core/geom.h"
-#include "misc/subr.h"
-
-#define G_SCHED_VERSION 0
-
-uint32_t lib_version = G_LIB_VERSION;
-uint32_t version = G_SCHED_VERSION;
-
-/*
- * storage for parameters used by this geom class.
- * Right now only the scheduler name is used.
- */
-#define GSCHED_ALGO "rr" /* default scheduler */
-
-/*
- * Adapt to differences in geom library.
- * in V1 struct g_command misses gc_argname, eld, and G_BOOL is undefined
- */
-#if G_LIB_VERSION <= 1
-#define G_TYPE_BOOL G_TYPE_NUMBER
-#endif
-#if G_LIB_VERSION >= 3 && G_LIB_VERSION <= 4
-#define G_ARGNAME NULL,
-#else
-#define G_ARGNAME
-#endif
-
-static void
-gcmd_createinsert(struct gctl_req *req, unsigned flags __unused)
-{
- const char *reqalgo;
- char name[64];
-
- if (gctl_has_param(req, "algo"))
- reqalgo = gctl_get_ascii(req, "algo");
- else
- reqalgo = GSCHED_ALGO;
-
- snprintf(name, sizeof(name), "gsched_%s", reqalgo);
- /*
- * Do not complain about errors here, gctl_issue()
- * will fail anyway.
- */
- if (modfind(name) < 0)
- kldload(name);
- gctl_issue(req);
-}
-
-struct g_command class_commands[] = {
- { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, gcmd_createinsert,
- {
- { 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
- G_OPT_SENTINEL
- },
- G_ARGNAME "[-v] [-a algorithm_name] dev ..."
- },
- { "insert", G_FLAG_VERBOSE | G_FLAG_LOADKLD, gcmd_createinsert,
- {
- { 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
- G_OPT_SENTINEL
- },
- G_ARGNAME "[-v] [-a algorithm_name] dev ..."
- },
- { "configure", G_FLAG_VERBOSE, NULL,
- {
- { 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
- G_OPT_SENTINEL
- },
- G_ARGNAME "[-v] [-a algorithm_name] prov ..."
- },
- { "destroy", G_FLAG_VERBOSE, NULL,
- {
- { 'f', "force", NULL, G_TYPE_BOOL },
- G_OPT_SENTINEL
- },
- G_ARGNAME "[-fv] prov ..."
- },
- { "reset", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
- G_ARGNAME "[-v] prov ..."
- },
- G_CMD_SENTINEL
-};
diff --git a/lib/geom/sched/gsched.8 b/lib/geom/sched/gsched.8
deleted file mode 100644
index facb5c18440f..000000000000
--- a/lib/geom/sched/gsched.8
+++ /dev/null
@@ -1,162 +0,0 @@
-.\" Copyright (c) 2009-2010 Fabio Checconi
-.\" Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" $FreeBSD$
-.\"
-.Dd July 26, 2012
-.Dt GSCHED 8
-.Os
-.Sh NAME
-.Nm gsched
-.Nd "control utility for disk scheduler GEOM class"
-.Sh SYNOPSIS
-.Nm
-.Cm create
-.Op Fl v
-.Op Fl a Ar algorithm
-.Ar provider ...
-.Nm
-.Cm insert
-.Op Fl v
-.Op Fl a Ar algorithm
-.Ar provider ...
-.Nm
-.Cm configure
-.Op Fl v
-.Op Fl a Ar algorithm
-.Ar node ...
-.Nm
-.Cm destroy
-.Op Fl fv
-.Ar node ...
-.Nm
-.Cm reset
-.Op Fl v
-.Ar node ...
-.Nm
-.Cm { list | status | load | unload }
-.Sh DESCRIPTION
-The
-.Nm
-utility (also callable as
-.Nm geom sched ... )
-changes the scheduling policy of the requests going to a provider.
-.Pp
-The first argument to
-.Nm
-indicates an action to be performed:
-.Bl -tag -width ".Cm configure"
-.It Cm create
-Create a new provider and geom node using the specified scheduling algorithm.
-.Ar algorithm
-is the name of the scheduling algorithm used for the provider.
-Available algorithms include:
-.Ar rr ,
-which implements anticipatory scheduling with round robin service
-among clients;
-.Ar as ,
-which implements a simple form of anticipatory scheduling with
-no per-client queue.
-.Pp
-If the operation succeeds, the new provider should appear with name
-.Pa /dev/ Ns Ao Ar dev Ac Ns Pa .sched. .
-The kernel module
-.Pa geom_sched.ko
-will be loaded if it is not loaded already.
-.It Cm insert
-Operates as "create", but the insertion is "transparent",
-i.e. the existing provider is rerouted to the newly created geom,
-which in turn forwards requests to the existing geom.
-This operation allows one to start/stop a scheduling service
-on an already existing provider.
-.Pp
-A subsequent "destroy" will remove the newly created geom and
-hook the provider back to the original geom.
-.It Cm configure
-Configure existing scheduling provider. It supports the same options
-as the
-.Nm create
-command.
-.It Cm destroy
-Destroy the geom specified in the parameter.
-.It Cm reset
-Do nothing.
-.It Cm list | status | load | unload
-See
-.Xr geom 8 .
-.El
-.Pp
-Additional options:
-.Bl -tag -width ".Fl f"
-.It Fl f
-Force the removal of the specified provider.
-.It Fl v
-Be more verbose.
-.El
-.Sh SYSCTL VARIABLES
-The following
-.Xr sysctl 8
-variables can be used to control the behavior of the
-.Nm SCHED
-GEOM class.
-The default value is shown next to each variable.
-.Bl -tag -width indent
-.It Va kern.geom.sched.debug : No 0
-Debug level of the
-.Nm SCHED
-GEOM class.
-This can be set to a number between 0 and 2 inclusive.
-If set to 0 minimal debug information is printed, and if set to 2 the
-maximum amount of debug information is printed.
-.El
-.Sh EXIT STATUS
-Exit status is 0 on success, and 1 if the command fails.
-.Sh EXAMPLES
-The following example shows how to create a scheduling provider for disk
-.Pa /dev/ada0 ,
-and how to destroy it.
-.Bd -literal -offset indent
-# Load the geom_sched module:
-kldload geom_sched
-# Load some scheduler classes used by geom_sched:
-kldload gsched_rr
-# Configure device ada0 to use scheduler "rr":
-geom sched insert -a rr ada0
-# Now provider ada0 uses the "rr" algorithm;
-# the new geom is ada0.sched.
-# Remove the scheduler on the device:
-geom sched destroy -v ada0.sched.
-.Ed
-.Sh SEE ALSO
-.Xr geom 4 ,
-.Xr geom 8
-.Sh HISTORY
-The
-.Nm
-utility first appeared in
-.Fx 8.1 .
-.Sh AUTHORS
-.An Fabio Checconi Aq Mt fabio@FreeBSD.org
-.An Luigi Rizzo Aq Mt luigi@FreeBSD.org
diff --git a/sys/geom/geom.h b/sys/geom/geom.h
index 8e8f74235c78..3bc724c65ae2 100644
--- a/sys/geom/geom.h
+++ b/sys/geom/geom.h
@@ -231,17 +231,6 @@ struct g_provider {
u_int index;
};
-/*
- * Descriptor of a classifier. We can register a function and
- * an argument, which is called by g_io_request() on bio's
- * that are not previously classified.
- */
-struct g_classifier_hook {
- TAILQ_ENTRY(g_classifier_hook) link;
- int (*func)(void *arg, struct bio *bp);
- void *arg;
-};
-
/* BIO_GETATTR("GEOM::setstate") argument values. */
#define G_STATE_FAILED 0
#define G_STATE_REBUILD 1
@@ -344,8 +333,6 @@ int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr);
int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp);
int g_io_flush(struct g_consumer *cp);
int g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp);
-int g_register_classifier(struct g_classifier_hook *hook);
-void g_unregister_classifier(struct g_classifier_hook *hook);
void g_io_request(struct bio *bp, struct g_consumer *cp);
struct bio *g_new_bio(void);
struct bio *g_alloc_bio(void);
diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c
index 5b750f931691..f7969be2f443 100644
--- a/sys/geom/geom_io.c
+++ b/sys/geom/geom_io.c
@@ -87,15 +87,6 @@ static volatile u_int __read_mostly pace;
static uma_zone_t __read_mostly biozone;
-/*
- * The head of the list of classifiers used in g_io_request.
- * Use g_register_classifier() and g_unregister_classifier()
- * to add/remove entries to the list.
- * Classifiers are invoked in registration order.
- */
-static TAILQ_HEAD(, g_classifier_hook) g_classifier_tailq __read_mostly =
- TAILQ_HEAD_INITIALIZER(g_classifier_tailq);
-
#include <machine/atomic.h>
static void
@@ -224,9 +215,6 @@ g_clone_bio(struct bio *bp)
if (bp->bio_cmd == BIO_ZONE)
bcopy(&bp->bio_zone, &bp2->bio_zone,
sizeof(bp->bio_zone));
- /* Inherit classification info from the parent */
- bp2->bio_classifier1 = bp->bio_classifier1;
- bp2->bio_classifier2 = bp->bio_classifier2;
#if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
bp2->bio_track_bp = bp->bio_track_bp;
#endif
@@ -498,65 +486,6 @@ g_io_check(struct bio *bp)
return (EJUSTRETURN);
}
-/*
- * bio classification support.
- *
- * g_register_classifier() and g_unregister_classifier()
- * are used to add/remove a classifier from the list.
- * The list is protected using the g_bio_run_down lock,
- * because the classifiers are called in this path.
- *
- * g_io_request() passes bio's that are not already classified
- * (i.e. those with bio_classifier1 == NULL) to g_run_classifiers().
- * Classifiers can store their result in the two fields
- * bio_classifier1 and bio_classifier2.
- * A classifier that updates one of the fields should
- * return a non-zero value.
- * If no classifier updates the field, g_run_classifiers() sets
- * bio_classifier1 = BIO_NOTCLASSIFIED to avoid further calls.
- */
-
-int
-g_register_classifier(struct g_classifier_hook *hook)
-{
-
- g_bioq_lock(&g_bio_run_down);
- TAILQ_INSERT_TAIL(&g_classifier_tailq, hook, link);
- g_bioq_unlock(&g_bio_run_down);
-
- return (0);
-}
-
-void
-g_unregister_classifier(struct g_classifier_hook *hook)
-{
- struct g_classifier_hook *entry;
-
- g_bioq_lock(&g_bio_run_down);
- TAILQ_FOREACH(entry, &g_classifier_tailq, link) {
- if (entry == hook) {
- TAILQ_REMOVE(&g_classifier_tailq, hook, link);
- break;
- }
- }
- g_bioq_unlock(&g_bio_run_down);
-}
-
-static void
-g_run_classifiers(struct bio *bp)
-{
- struct g_classifier_hook *hook;
- int classified = 0;
-
- biotrack(bp, __func__);
-
- TAILQ_FOREACH(hook, &g_classifier_tailq, link)
- classified |= hook->func(hook->arg, bp);
-
- if (!classified)
- bp->bio_classifier1 = BIO_NOTCLASSIFIED;
-}
-
void
g_io_request(struct bio *bp, struct g_consumer *cp)
{
@@ -640,12 +569,6 @@ g_io_request(struct bio *bp, struct g_consumer *cp)
direct = 0;
#endif
- if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) {
- g_bioq_lock(&g_bio_run_down);
- g_run_classifiers(bp);
- g_bioq_unlock(&g_bio_run_down);
- }
-
/*
* The statistics collection is lockless, as such, but we
* can not update one instance of the statistics from more
diff --git a/sys/geom/sched/README b/sys/geom/sched/README
deleted file mode 100644
index b62d46889326..000000000000
--- a/sys/geom/sched/README
+++ /dev/null
@@ -1,142 +0,0 @@
-
- --- GEOM BASED DISK SCHEDULERS FOR FREEBSD ---
-
-This code contains a framework for GEOM-based disk schedulers and a
-couple of sample scheduling algorithms that use the framework and
-implement two forms of "anticipatory scheduling" (see below for more
-details).
-
-As a quick example of what this code can give you, try to run "dd",
-"tar", or some other program with highly SEQUENTIAL access patterns,
-together with "cvs", "cvsup", "svn" or other highly RANDOM access patterns
-(this is not a made-up example: it is pretty common for developers
-to have one or more apps doing random accesses, and others that do
-sequential accesses e.g., loading large binaries from disk, checking
-the integrity of tarballs, watching media streams and so on).
-
-These are the results we get on a local machine (AMD BE2400 dual
-core CPU, SATA 250GB disk):
-
- /mnt is a partition mounted on /dev/ad0s1f
-
- cvs: cvs -d /mnt/home/ncvs-local update -Pd /mnt/ports
- dd-read: dd bs=128k of=/dev/null if=/dev/ad0 (or ad0-sched-)
- dd-writew dd bs=128k if=/dev/zero of=/mnt/largefile
-
- NO SCHEDULER RR SCHEDULER
- dd cvs dd cvs
-
- dd-read only 72 MB/s ---- 72 MB/s ---
- dd-write only 55 MB/s --- 55 MB/s ---
- dd-read+cvs 6 MB/s ok 30 MB/s ok
- dd-write+cvs 55 MB/s slooow 14 MB/s ok
-
-As you can see, when a cvs is running concurrently with dd, the
-performance drops dramatically, and depending on read or write mode,
-one of the two is severely penalized. The use of the RR scheduler
-in this example makes the dd-reader go much faster when competing
-with cvs, and lets cvs progress when competing with a writer.
-
-To try it out:
-
-1. PLEASE MAKE SURE THAT THE DISK THAT YOU WILL BE USING FOR TESTS
- DOES NOT CONTAIN PRECIOUS DATA.
- This is experimental code, so we make no guarantees, though
- I am routinely using it on my desktop and laptop.
-
-2. EXTRACT AND BUILD THE PROGRAMS
- A 'make install' in the directory should work (with root privs),
- or you can even try the binary modules.
- If you want to build the modules yourself, look at the Makefile.
-
-3. LOAD THE MODULE, CREATE A GEOM NODE, RUN TESTS
-
- The scheduler's module must be loaded first:
-
- # kldload gsched_rr
-
- substitute with gsched_as to test AS. Then, supposing that you are
- using /dev/ad0 for testing, a scheduler can be attached to it with:
-
- # geom sched insert ad0
-
- The scheduler is inserted transparently in the geom chain, so
- mounted partitions and filesystems will keep working, but
- now requests will go through the scheduler.
-
- To change scheduler on-the-fly, you can reconfigure the geom:
-
- # geom sched configure -a as ad0.sched.
-
- assuming that gsched_as was loaded previously.
-
-5. SCHEDULER REMOVAL
-
- In principle it is possible to remove the scheduler module
- even on an active chain by doing
-
- # geom sched destroy ad0.sched.
-
- However, there is some race in the geom subsystem which makes
- the removal unsafe if there are active requests on a chain.
- So, in order to reduce the risk of data losses, make sure
- you don't remove a scheduler from a chain with ongoing transactions.
-
---- NOTES ON THE SCHEDULERS ---
-
-The important contribution of this code is the framework to experiment
-with different scheduling algorithms. 'Anticipatory scheduling'
-is a very powerful technique based on the following reasoning:
-
- The disk throughput is much better if it serves sequential requests.
- If we have a mix of sequential and random requests, and we see a
- non-sequential request, do not serve it immediately but instead wait
- a little bit (2..5ms) to see if there is another one coming that
- the disk can serve more efficiently.
-
-There are many details that should be added to make sure that the
-mechanism is effective with different workloads and systems, to
-gain a few extra percent in performance, to improve fairness,
-insulation among processes etc. A discussion of the vast literature
-on the subject is beyond the purpose of this short note.
-
---------------------------------------------------------------------------
-
-TRANSPARENT INSERT/DELETE
-
-geom_sched is an ordinary geom module, however it is convenient
-to plug it transparently into the geom graph, so that one can
-enable or disable scheduling on a mounted filesystem, and the
-names in /etc/fstab do not depend on the presence of the scheduler.
-
-To understand how this works in practice, remember that in GEOM
-we have "providers" and "geom" objects.
-Say that we want to hook a scheduler on provider "ad0",
-accessible through pointer 'pp'. Originally, pp is attached to
-geom "ad0" (same name, different object) accessible through pointer old_gp
-
- BEFORE ---> [ pp --> old_gp ...]
-
-A normal "geom sched create ad0" call would create a new geom node
-on top of provider ad0/pp, and export a newly created provider
-("ad0.sched." accessible through pointer newpp).
-
- AFTER create ---> [ newpp --> gp --> cp ] ---> [ pp --> old_gp ... ]
-
-On top of newpp, a whole tree will be created automatically, and we
-can e.g. mount partitions on /dev/ad0.sched.s1d, and those requests
-will go through the scheduler, whereas any partition mounted on
-the pre-existing device entries will not go through the scheduler.
-
-With the transparent insert mechanism, the original provider "ad0"/pp
-is hooked to the newly created geom, as follows:
-
- AFTER insert ---> [ pp --> gp --> cp ] ---> [ newpp --> old_gp ... ]
-
-so anything that was previously using provider pp will now have
-the requests routed through the scheduler node.
-
-A removal ("geom sched destroy ad0.sched.") will restore the original
-configuration.
-
-# $FreeBSD$
diff --git a/sys/geom/sched/g_sched.c b/sys/geom/sched/g_sched.c
deleted file mode 100644
index 537f581534b0..000000000000
--- a/sys/geom/sched/g_sched.c
+++ /dev/null
@@ -1,1729 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009-2010 Fabio Checconi
- * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * Main control module for geom-based disk schedulers ('sched').
- *
- * USER VIEW
- * A 'sched' node is typically inserted transparently between
- * an existing provider pp and its original geom gp
- *
- * [pp --> gp ..]
- *
- * using the command "geom sched insert <provider>" and
- * resulting in the following topology
- *
- * [pp --> sched_gp --> cp] [new_pp --> gp ... ]
- *
- * Deletion "geom sched destroy <provider>.sched." restores the
- * original chain. The normal "geom sched create <provide>"
- * is also supported.
- *
- * INTERNALS
- * Internally, the 'sched' uses the following data structures
- *
- * geom{} g_sched_softc{} g_gsched{}
- * +----------+ +---------------+ +-------------+
- * | softc *-|--->| sc_gsched *-|-->| gs_init |
- * | ... | | | | gs_fini |
- * | | | [ hash table] | | gs_start |
- * +----------+ | | | ... |
- * | | +-------------+
- * | |
- * | | g_*_softc{}
- * | | +-------------+
- * | sc_data *-|-->| |
- * +---------------+ | algorithm- |
- * | specific |
- * +-------------+
- *
- * A g_sched_softc{} is created with a "geom sched insert" call.
- * In turn this instantiates a specific scheduling algorithm,
- * which sets sc_gsched to point to the algorithm callbacks,
- * and calls gs_init() to create the g_*_softc{} .
- * The other callbacks (gs_start, gs_next, ...) are invoked
- * as needed
- *
- * g_sched_softc{} is defined in g_sched.h and mostly used here;
- * g_gsched{}, and the gs_callbacks, are documented in gs_scheduler.h;
- * g_*_softc{} is defined/implemented by each algorithm (gs_*.c)
- *
- * DATA MOVING
- * When a bio is received on the provider, it goes to the
- * g_sched_start() which calls gs_start() to initially queue it;
- * then we call g_sched_dispatch() that loops around gs_next()
- * to select zero or more bio's to be sent downstream.
- *
- * g_sched_dispatch() can also be called as a result of a timeout,
- * e.g. when doing anticipation or pacing requests.
- *
- * When a bio comes back, it goes to g_sched_done() which in turn
- * calls gs_done(). The latter does any necessary housekeeping in
- * the scheduling algorithm, and may decide to call g_sched_dispatch()
- * to send more bio's downstream.
- *
- * If an algorithm needs per-flow queues, these are created
- * calling gs_init_class() and destroyed with gs_fini_class(),
- * and they are also inserted in the hash table implemented in
- * the g_sched_softc{}
- *
- * If an algorithm is replaced, or a transparently-inserted node is
- * removed with "geom sched destroy", we need to remove all references
- * to the g_*_softc{} and g_sched_softc from the bio's still in
- * the scheduler. g_sched_forced_dispatch() helps doing this.
- * XXX need to explain better.
- */
-
-#include <sys/cdefs.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/module.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/bio.h>
-#include <sys/limits.h>
-#include <sys/hash.h>
-#include <sys/sbuf.h>
-#include <sys/sysctl.h>
-#include <sys/malloc.h>
-#include <sys/proc.h> /* we access curthread */
-#include <geom/geom.h>
-#include <geom/geom_dbg.h>
-#include "gs_scheduler.h"
-#include "g_sched.h" /* geom hooks */
-
-/*
- * Size of the per-geom hash table storing traffic classes.
- * We may decide to change it at a later time, it has no ABI
- * implications as it is only used for run-time allocations.
- */
-#define G_SCHED_HASH_SIZE 32
-
-static int g_sched_destroy(struct g_geom *gp, boolean_t force);
-static int g_sched_destroy_geom(struct gctl_req *req,
- struct g_class *mp, struct g_geom *gp);
-static void g_sched_config(struct gctl_req *req, struct g_class *mp,
- const char *verb);
-static struct g_geom *g_sched_taste(struct g_class *mp,
- struct g_provider *pp, int flags __unused);
-static void g_sched_dumpconf(struct sbuf *sb, const char *indent,
- struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
-static void g_sched_init(struct g_class *mp);
-static void g_sched_fini(struct g_class *mp);
-static int g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data,
- int fflag, struct thread *td);
-
-struct g_class g_sched_class = {
- .name = G_SCHED_CLASS_NAME,
- .version = G_VERSION,
- .ctlreq = g_sched_config,
- .taste = g_sched_taste,
- .destroy_geom = g_sched_destroy_geom,
- .init = g_sched_init,
- .ioctl = g_sched_ioctl,
- .fini = g_sched_fini
-};
-
-MALLOC_DEFINE(M_GEOM_SCHED, "GEOM_SCHED", "Geom schedulers data structures");
-
-/*
- * Global variables describing the state of the geom_sched module.
- * There is only one static instance of this structure.
- */
-LIST_HEAD(gs_list, g_gsched); /* type, link field */
-struct geom_sched_vars {
- struct mtx gs_mtx;
- struct gs_list gs_scheds; /* list of algorithms */
- u_int gs_debug;
- u_int gs_sched_count; /* how many algorithms ? */
- u_int gs_patched; /* g_io_request was patched */
-
- u_int gs_initialized;
- u_int gs_expire_secs; /* expiration of hash entries */
-
- struct bio_queue_head gs_pending;
- u_int gs_npending;
-
- /* The following are for stats, usually protected by gs_mtx. */
- u_long gs_requests; /* total requests */
- u_long gs_done; /* total done */
- u_int gs_in_flight; /* requests in flight */
- u_int gs_writes_in_flight;
- u_int gs_bytes_in_flight;
- u_int gs_write_bytes_in_flight;
-
- char gs_names[256]; /* names of schedulers */
-};
-
-static struct geom_sched_vars me = {
- .gs_expire_secs = 10,
-};
-
-SYSCTL_DECL(_kern_geom);
-SYSCTL_NODE(_kern_geom, OID_AUTO, sched, CTLFLAG_RW, 0,
- "GEOM_SCHED stuff");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_wb, CTLFLAG_RD,
- &me.gs_write_bytes_in_flight, 0, "Write bytes in flight");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_b, CTLFLAG_RD,
- &me.gs_bytes_in_flight, 0, "Bytes in flight");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_w, CTLFLAG_RD,
- &me.gs_writes_in_flight, 0, "Write Requests in flight");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight, CTLFLAG_RD,
- &me.gs_in_flight, 0, "Requests in flight");
-
-SYSCTL_ULONG(_kern_geom_sched, OID_AUTO, done, CTLFLAG_RD,
- &me.gs_done, 0, "Total done");
-
-SYSCTL_ULONG(_kern_geom_sched, OID_AUTO, requests, CTLFLAG_RD,
- &me.gs_requests, 0, "Total requests");
-
-SYSCTL_STRING(_kern_geom_sched, OID_AUTO, algorithms, CTLFLAG_RD,
- &me.gs_names, 0, "Algorithm names");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, alg_count, CTLFLAG_RD,
- &me.gs_sched_count, 0, "Number of algorithms");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, debug, CTLFLAG_RW,
- &me.gs_debug, 0, "Debug level");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, expire_secs, CTLFLAG_RW,
- &me.gs_expire_secs, 0, "Expire time in seconds");
-
-/*
- * g_sched calls the scheduler algorithms with this lock held.
- * The locking functions are exposed so the scheduler algorithms can also
- * protect themselves e.g. when running a callout handler.
- */
-void
-g_sched_lock(struct g_geom *gp)
-{
- struct g_sched_softc *sc = gp->softc;
-
- mtx_lock(&sc->sc_mtx);
-}
-
-void
-g_sched_unlock(struct g_geom *gp)
-{
- struct g_sched_softc *sc = gp->softc;
-
- mtx_unlock(&sc->sc_mtx);
-}
-
-/*
- * Support functions to handle references to the module,
- * which are coming from devices using this scheduler.
- */
-static inline void
-g_gsched_ref(struct g_gsched *gsp)
-{
-
- atomic_add_int(&gsp->gs_refs, 1);
-}
-
-static inline void
-g_gsched_unref(struct g_gsched *gsp)
-{
-
- atomic_add_int(&gsp->gs_refs, -1);
-}
-
-/*
- * Update the stats when this request is done.
- */
-static void
-g_sched_update_stats(struct bio *bio)
-{
-
- me.gs_done++;
- me.gs_in_flight--;
- me.gs_bytes_in_flight -= bio->bio_length;
- if (bio->bio_cmd == BIO_WRITE) {
- me.gs_writes_in_flight--;
- me.gs_write_bytes_in_flight -= bio->bio_length;
- }
-}
-
-/*
- * Dispatch any pending request.
- */
-static void
-g_sched_forced_dispatch(struct g_geom *gp)
-{
- struct g_sched_softc *sc = gp->softc;
- struct g_gsched *gsp = sc->sc_gsched;
- struct bio *bp;
-
- KASSERT(mtx_owned(&sc->sc_mtx),
- ("sc_mtx not owned during forced dispatch"));
-
- while ((bp = gsp->gs_next(sc->sc_data, 1)) != NULL)
- g_io_request(bp, LIST_FIRST(&gp->consumer));
-}
-
-/*
- * The main dispatch loop, called either here after the start
- * routine, or by scheduling algorithms when they receive a timeout
- * or a 'done' notification. Does not share code with the forced
- * dispatch path, since the gs_done() callback can call us.
- */
-void
-g_sched_dispatch(struct g_geom *gp)
-{
- struct g_sched_softc *sc = gp->softc;
- struct g_gsched *gsp = sc->sc_gsched;
- struct bio *bp;
-
- KASSERT(mtx_owned(&sc->sc_mtx), ("sc_mtx not owned during dispatch"));
-
- if ((sc->sc_flags & G_SCHED_FLUSHING))
- return;
-
- while ((bp = gsp->gs_next(sc->sc_data, 0)) != NULL)
- g_io_request(bp, LIST_FIRST(&gp->consumer));
-}
-
-/*
- * Recent (8.0 and above) versions of FreeBSD have support to
- * register classifiers of disk requests. The classifier is
- * invoked by g_io_request(), and stores the information into
- * bp->bio_classifier1.
- *
- * Support for older versions, which is left here only for
- * documentation purposes, relies on two hacks:
- * 1. classification info is written into the bio_caller1
- * field of the topmost node in the bio chain. This field
- * is rarely used, but this module is incompatible with
- * those that use bio_caller1 for other purposes,
- * such as ZFS and gjournal;
- * 2. g_io_request() is patched in-memory when the module is
- * loaded, so that the function calls a classifier as its
- * first thing. g_io_request() is restored when the module
- * is unloaded. This functionality is only supported for
- * x86 and amd64, other architectures need source code changes.
- */
-
-/*
- * Lookup the identity of the issuer of the original request.
- * In the current implementation we use the curthread of the
- * issuer, but different mechanisms may be implemented later
- * so we do not make assumptions on the return value which for
- * us is just an opaque identifier.
- */
-
-static inline u_long
-g_sched_classify(struct bio *bp)
-{
-
- /* we have classifier fields in the struct bio */
- return ((u_long)bp->bio_classifier1);
-}
-
-/* Return the hash chain for the given key. */
-static inline struct g_hash *
-g_sched_hash(struct g_sched_softc *sc, u_long key)
-{
-
- return (&sc->sc_hash[key & sc->sc_mask]);
-}
-
-/*
- * Helper function for the children classes, which takes
- * a geom and a bio and returns the private descriptor
- * associated to the request. This involves fetching
- * the classification field and [al]locating the
- * corresponding entry in the hash table.
- */
-void *
-g_sched_get_class(struct g_geom *gp, struct bio *bp)
-{
- struct g_sched_softc *sc;
- struct g_sched_class *gsc;
- struct g_gsched *gsp;
- struct g_hash *bucket;
- u_long key;
-
- sc = gp->softc;
- key = g_sched_classify(bp);
- bucket = g_sched_hash(sc, key);
- LIST_FOREACH(gsc, bucket, gsc_clist) {
- if (key == gsc->gsc_key) {
- gsc->gsc_refs++;
- return (gsc->gsc_priv);
- }
- }
-
- gsp = sc->sc_gsched;
- gsc = malloc(sizeof(*gsc) + gsp->gs_priv_size,
- M_GEOM_SCHED, M_NOWAIT | M_ZERO);
- if (!gsc)
- return (NULL);
-
- if (gsp->gs_init_class(sc->sc_data, gsc->gsc_priv)) {
- free(gsc, M_GEOM_SCHED);
- return (NULL);
- }
-
- gsc->gsc_refs = 2; /* 1 for the hash table, 1 for the caller. */
- gsc->gsc_key = key;
- LIST_INSERT_HEAD(bucket, gsc, gsc_clist);
-
- gsc->gsc_expire = ticks + me.gs_expire_secs * hz;
-
- return (gsc->gsc_priv);
-}
-
-/*
- * Release a reference to the per-client descriptor,
- */
-void
-g_sched_put_class(struct g_geom *gp, void *priv)
-{
- struct g_sched_class *gsc;
- struct g_sched_softc *sc;
-
- gsc = g_sched_priv2class(priv);
- gsc->gsc_expire = ticks + me.gs_expire_secs * hz;
-
- if (--gsc->gsc_refs > 0)
- return;
-
- sc = gp->softc;
- sc->sc_gsched->gs_fini_class(sc->sc_data, priv);
-
- LIST_REMOVE(gsc, gsc_clist);
- free(gsc, M_GEOM_SCHED);
-}
-
-static void
-g_sched_hash_fini(struct g_geom *gp, struct g_hash *hp, u_long mask,
- struct g_gsched *gsp, void *data)
-{
- struct g_sched_class *cp, *cp2;
- int i;
-
- if (!hp)
- return;
-
- if (data && gsp->gs_hash_unref)
- gsp->gs_hash_unref(data);
-
- for (i = 0; i < G_SCHED_HASH_SIZE; i++) {
- LIST_FOREACH_SAFE(cp, &hp[i], gsc_clist, cp2)
- g_sched_put_class(gp, cp->gsc_priv);
- }
-
- hashdestroy(hp, M_GEOM_SCHED, mask);
-}
-
-static struct g_hash *
-g_sched_hash_init(struct g_gsched *gsp, u_long *mask, int flags)
-{
- struct g_hash *hash;
-
- if (gsp->gs_priv_size == 0)
- return (NULL);
-
- hash = hashinit_flags(G_SCHED_HASH_SIZE, M_GEOM_SCHED, mask, flags);
-
- return (hash);
-}
-
-static void
-g_sched_flush_classes(struct g_geom *gp)
-{
- struct g_sched_softc *sc;
- struct g_sched_class *cp, *cp2;
- int i;
-
- sc = gp->softc;
-
- if (!sc->sc_hash || ticks - sc->sc_flush_ticks <= 0)
- return;
-
- for (i = 0; i < G_SCHED_HASH_SIZE; i++) {
- LIST_FOREACH_SAFE(cp, &sc->sc_hash[i], gsc_clist, cp2) {
- if (cp->gsc_refs == 1 && ticks - cp->gsc_expire > 0)
- g_sched_put_class(gp, cp->gsc_priv);
- }
- }
-
- sc->sc_flush_ticks = ticks + me.gs_expire_secs * hz;
-}
-
-/*
- * Wait for the completion of any outstanding request. To ensure
- * that this does not take forever the caller has to make sure that
- * no new request enter the scehduler before calling us.
- *
- * Must be called with the gp mutex held and topology locked.
- */
-static int
-g_sched_wait_pending(struct g_geom *gp)
-{
- struct g_sched_softc *sc = gp->softc;
- int endticks = ticks + hz;
-
- g_topology_assert();
-
- while (sc->sc_pending && endticks - ticks >= 0)
- msleep(gp, &sc->sc_mtx, 0, "sched_wait_pending", hz / 4);
-
- return (sc->sc_pending ? ETIMEDOUT : 0);
-}
-
-static int
-g_sched_remove_locked(struct g_geom *gp, struct g_gsched *gsp)
-{
- struct g_sched_softc *sc = gp->softc;
- int error;
-
- /* Set the flushing flag: new bios will not enter the scheduler. */
- sc->sc_flags |= G_SCHED_FLUSHING;
-
- g_sched_forced_dispatch(gp);
- error = g_sched_wait_pending(gp);
- if (error)
- goto failed;
-
- /* No more requests pending or in flight from the old gsp. */
-
- g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, gsp, sc->sc_data);
- sc->sc_hash = NULL;
-
- /*
- * Avoid deadlock here by releasing the gp mutex and reacquiring
- * it once done. It should be safe, since no reconfiguration or
- * destruction can take place due to the geom topology lock; no
- * new request can use the current sc_data since we flagged the
- * geom as being flushed.
- */
- g_sched_unlock(gp);
- gsp->gs_fini(sc->sc_data);
- g_sched_lock(gp);
-
- sc->sc_gsched = NULL;
- sc->sc_data = NULL;
- g_gsched_unref(gsp);
-
-failed:
- sc->sc_flags &= ~G_SCHED_FLUSHING;
-
- return (error);
-}
-
-static int
-g_sched_remove(struct g_geom *gp, struct g_gsched *gsp)
-{
- int error;
-
- g_sched_lock(gp);
- error = g_sched_remove_locked(gp, gsp); /* gsp is surely non-null */
- g_sched_unlock(gp);
-
- return (error);
-}
-
-/*
- * Support function for create/taste -- locate the desired
- * algorithm and grab a reference to it.
- */
-static struct g_gsched *
-g_gsched_find(const char *name)
-{
- struct g_gsched *gsp = NULL;
-
- mtx_lock(&me.gs_mtx);
- LIST_FOREACH(gsp, &me.gs_scheds, glist) {
- if (strcmp(name, gsp->gs_name) == 0) {
- g_gsched_ref(gsp);
- break;
- }
- }
- mtx_unlock(&me.gs_mtx);
-
- return (gsp);
-}
-
-/*
- * Rebuild the list of scheduler names.
- * To be called with me.gs_mtx lock held.
- */
-static void
-g_gsched_build_names(struct g_gsched *gsp)
-{
- int pos, l;
- struct g_gsched *cur;
-
- pos = 0;
- LIST_FOREACH(cur, &me.gs_scheds, glist) {
- l = strlen(cur->gs_name);
- if (l + pos + 1 + 1 < sizeof(me.gs_names)) {
- if (pos != 0)
- me.gs_names[pos++] = ' ';
- strcpy(me.gs_names + pos, cur->gs_name);
- pos += l;
- }
- }
- me.gs_names[pos] = '\0';
-}
-
-/*
- * Register or unregister individual scheduling algorithms.
- */
-static int
-g_gsched_register(struct g_gsched *gsp)
-{
- struct g_gsched *cur;
- int error = 0;
-
- mtx_lock(&me.gs_mtx);
- LIST_FOREACH(cur, &me.gs_scheds, glist) {
- if (strcmp(gsp->gs_name, cur->gs_name) == 0)
- break;
- }
- if (cur != NULL) {
- G_SCHED_DEBUG(0, "A scheduler named %s already"
- "exists.", gsp->gs_name);
- error = EEXIST;
- } else {
- LIST_INSERT_HEAD(&me.gs_scheds, gsp, glist);
- gsp->gs_refs = 1;
- me.gs_sched_count++;
- g_gsched_build_names(gsp);
- }
- mtx_unlock(&me.gs_mtx);
-
- return (error);
-}
-
-struct g_gsched_unregparm {
- struct g_gsched *gup_gsp;
- int gup_error;
-};
-
-static void
-g_gsched_unregister(void *arg, int flag)
-{
- struct g_gsched_unregparm *parm = arg;
- struct g_gsched *gsp = parm->gup_gsp, *cur, *tmp;
- struct g_sched_softc *sc;
- struct g_geom *gp, *gp_tmp;
- int error;
-
- parm->gup_error = 0;
-
- g_topology_assert();
-
- if (flag == EV_CANCEL)
- return;
-
- mtx_lock(&me.gs_mtx);
-
- LIST_FOREACH_SAFE(gp, &g_sched_class.geom, geom, gp_tmp) {
- if (gp->class != &g_sched_class)
- continue; /* Should not happen. */
-
- sc = gp->softc;
- if (sc->sc_gsched == gsp) {
- error = g_sched_remove(gp, gsp);
- if (error)
- goto failed;
- }
- }
-
- LIST_FOREACH_SAFE(cur, &me.gs_scheds, glist, tmp) {
- if (cur != gsp)
- continue;
-
- if (gsp->gs_refs != 1) {
- G_SCHED_DEBUG(0, "%s still in use.",
- gsp->gs_name);
- parm->gup_error = EBUSY;
- } else {
- LIST_REMOVE(gsp, glist);
- me.gs_sched_count--;
- g_gsched_build_names(gsp);
- }
- break;
- }
-
- if (cur == NULL) {
- G_SCHED_DEBUG(0, "%s not registered.", gsp->gs_name);
- parm->gup_error = ENOENT;
- }
-
-failed:
- mtx_unlock(&me.gs_mtx);
-}
-
-static inline void
-g_gsched_global_init(void)
-{
-
- if (!me.gs_initialized) {
- G_SCHED_DEBUG(0, "Initializing global data.");
- mtx_init(&me.gs_mtx, "gsched", NULL, MTX_DEF);
- LIST_INIT(&me.gs_scheds);
- bioq_init(&me.gs_pending);
- me.gs_initialized = 1;
- }
-}
-
-/*
- * Module event called when a scheduling algorithm module is loaded or
- * unloaded.
- */
-int
-g_gsched_modevent(module_t mod, int cmd, void *arg)
-{
- struct g_gsched *gsp = arg;
- struct g_gsched_unregparm parm;
- int error;
-
- G_SCHED_DEBUG(0, "Modevent %d.", cmd);
-
- /*
- * If the module is loaded at boot, the geom thread that calls
- * g_sched_init() might actually run after g_gsched_modevent(),
- * so make sure that the module is properly initialized.
- */
- g_gsched_global_init();
-
- error = EOPNOTSUPP;
- switch (cmd) {
- case MOD_LOAD:
- error = g_gsched_register(gsp);
- G_SCHED_DEBUG(0, "Loaded module %s error %d.",
- gsp->gs_name, error);
- if (error == 0)
- g_retaste(&g_sched_class);
- break;
-
- case MOD_UNLOAD:
- parm.gup_gsp = gsp;
- parm.gup_error = 0;
-
- error = g_waitfor_event(g_gsched_unregister,
- &parm, M_WAITOK, NULL);
- if (error == 0)
- error = parm.gup_error;
- G_SCHED_DEBUG(0, "Unloaded module %s error %d.",
- gsp->gs_name, error);
- break;
- }
-
- return (error);
-}
-
-#ifdef KTR
-#define TRC_BIO_EVENT(e, bp) g_sched_trace_bio_ ## e (bp)
-
-static inline char
-g_sched_type(struct bio *bp)
-{
-
- if (bp->bio_cmd == BIO_READ)
- return ('R');
- else if (bp->bio_cmd == BIO_WRITE)
- return ('W');
- return ('U');
-}
-
-static inline void
-g_sched_trace_bio_START(struct bio *bp)
-{
-
- CTR5(KTR_GSCHED, "S %lu %c %lu/%lu %lu", g_sched_classify(bp),
- g_sched_type(bp), bp->bio_offset / ULONG_MAX,
- bp->bio_offset, bp->bio_length);
-}
-
-static inline void
-g_sched_trace_bio_DONE(struct bio *bp)
-{
-
- CTR5(KTR_GSCHED, "D %lu %c %lu/%lu %lu", g_sched_classify(bp),
- g_sched_type(bp), bp->bio_offset / ULONG_MAX,
- bp->bio_offset, bp->bio_length);
-}
-#else /* !KTR */
-#define TRC_BIO_EVENT(e, bp)
-#endif /* !KTR */
-
-/*
- * g_sched_done() and g_sched_start() dispatch the geom requests to
- * the scheduling algorithm in use.
- */
-static void
-g_sched_done(struct bio *bio)
-{
- struct g_geom *gp = bio->bio_caller2;
- struct g_sched_softc *sc = gp->softc;
-
- TRC_BIO_EVENT(DONE, bio);
-
- KASSERT(bio->bio_caller1, ("null bio_caller1 in g_sched_done"));
-
- g_sched_lock(gp);
-
- g_sched_update_stats(bio);
- sc->sc_gsched->gs_done(sc->sc_data, bio);
- if (!--sc->sc_pending)
- wakeup(gp);
-
- g_sched_flush_classes(gp);
- g_sched_unlock(gp);
-
- g_std_done(bio);
-}
-
-static void
-g_sched_start(struct bio *bp)
-{
- struct g_geom *gp = bp->bio_to->geom;
- struct g_sched_softc *sc = gp->softc;
- struct bio *cbp;
-
- TRC_BIO_EVENT(START, bp);
- G_SCHED_LOGREQ(bp, "Request received.");
-
- cbp = g_clone_bio(bp);
- if (cbp == NULL) {
- g_io_deliver(bp, ENOMEM);
- return;
- }
- cbp->bio_done = g_sched_done;
- cbp->bio_to = LIST_FIRST(&gp->provider);
- KASSERT(cbp->bio_to != NULL, ("NULL provider"));
-
- /* We only schedule reads and writes. */
- if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE)
- goto bypass;
-
- G_SCHED_LOGREQ(cbp, "Sending request.");
-
- g_sched_lock(gp);
- /*
- * Call the algorithm's gs_start to queue the request in the
- * scheduler. If gs_start fails then pass the request down,
- * otherwise call g_sched_dispatch() which tries to push
- * one or more requests down.
- */
- if (!sc->sc_gsched || (sc->sc_flags & G_SCHED_FLUSHING) ||
- sc->sc_gsched->gs_start(sc->sc_data, cbp)) {
- g_sched_unlock(gp);
- goto bypass;
- }
- /*
- * We use bio_caller1 to mark requests that are scheduled
- * so make sure it is not NULL.
- */
- if (cbp->bio_caller1 == NULL)
- cbp->bio_caller1 = &me; /* anything not NULL */
-
- cbp->bio_caller2 = gp;
- sc->sc_pending++;
-
- /* Update general stats. */
- me.gs_in_flight++;
- me.gs_requests++;
- me.gs_bytes_in_flight += bp->bio_length;
- if (bp->bio_cmd == BIO_WRITE) {
- me.gs_writes_in_flight++;
- me.gs_write_bytes_in_flight += bp->bio_length;
- }
- g_sched_dispatch(gp);
- g_sched_unlock(gp);
- return;
-
-bypass:
- cbp->bio_done = g_std_done;
- cbp->bio_caller1 = NULL; /* not scheduled */
- g_io_request(cbp, LIST_FIRST(&gp->consumer));
-}
-
-/*
- * The next few functions are the geom glue.
- */
-static void
-g_sched_orphan(struct g_consumer *cp)
-{
-
- g_topology_assert();
- g_sched_destroy(cp->geom, 1);
-}
-
-static int
-g_sched_access(struct g_provider *pp, int dr, int dw, int de)
-{
- struct g_geom *gp;
- struct g_consumer *cp;
- int error;
-
- gp = pp->geom;
- cp = LIST_FIRST(&gp->consumer);
- error = g_access(cp, dr, dw, de);
-
- return (error);
-}
-
-static void
-g_sched_temporary_start(struct bio *bio)
-{
-
- mtx_lock(&me.gs_mtx);
- me.gs_npending++;
- bioq_disksort(&me.gs_pending, bio);
- mtx_unlock(&me.gs_mtx);
-}
-
-static void
-g_sched_flush_pending(g_start_t *start)
-{
- struct bio *bp;
-
- while ((bp = bioq_takefirst(&me.gs_pending)))
- start(bp);
-}
-
-static int
-g_insert_proxy(struct g_geom *gp, struct g_provider *newpp,
- struct g_geom *dstgp, struct g_provider *pp, struct g_consumer *cp)
-{
- struct g_sched_softc *sc = gp->softc;
- g_start_t *saved_start, *flush = g_sched_start;
- int error = 0, endticks = ticks + hz;
-
- g_cancel_event(newpp); /* prevent taste() */
- /* copy private fields */
- newpp->private = pp->private;
- newpp->index = pp->index;
-
- /* Queue all the early requests coming for us. */
- me.gs_npending = 0;
- saved_start = pp->geom->start;
- dstgp->start = g_sched_temporary_start;
-
- while (pp->nstart - pp->nend != me.gs_npending &&
- endticks - ticks >= 0)
- tsleep(pp, PRIBIO, "-", hz/10);
-
- if (pp->nstart - pp->nend != me.gs_npending) {
- flush = saved_start;
- error = ETIMEDOUT;
- goto fail;
- }
-
- /* link pp to this geom */
- LIST_REMOVE(pp, provider);
- pp->geom = gp;
- LIST_INSERT_HEAD(&gp->provider, pp, provider);
-
- /*
- * replicate the counts from the parent in the
- * new provider and consumer nodes
- */
- cp->acr = newpp->acr = pp->acr;
- cp->acw = newpp->acw = pp->acw;
- cp->ace = newpp->ace = pp->ace;
- sc->sc_flags |= G_SCHED_PROXYING;
-
-fail:
- dstgp->start = saved_start;
-
- g_sched_flush_pending(flush);
-
- return (error);
-}
-
-/*
- * Create a geom node for the device passed as *pp.
- * If successful, add a reference to this gsp.
- */
-static int
-g_sched_create(struct gctl_req *req, struct g_class *mp,
- struct g_provider *pp, struct g_gsched *gsp, int proxy)
-{
- struct g_sched_softc *sc = NULL;
- struct g_geom *gp, *dstgp;
- struct g_provider *newpp = NULL;
- struct g_consumer *cp = NULL;
- char name[64];
- int error;
-
- g_topology_assert();
-
- snprintf(name, sizeof(name), "%s%s", pp->name, G_SCHED_SUFFIX);
- LIST_FOREACH(gp, &mp->geom, geom) {
- if (strcmp(gp->name, name) == 0) {
- gctl_error(req, "Geom %s already exists.",
- name);
- return (EEXIST);
- }
- }
-
- gp = g_new_geomf(mp, "%s", name);
- dstgp = proxy ? pp->geom : gp; /* where do we link the provider */
-
- sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
- sc->sc_gsched = gsp;
- sc->sc_data = gsp->gs_init(gp);
- if (sc->sc_data == NULL) {
- error = ENOMEM;
- goto fail;
- }
-
- sc->sc_hash = g_sched_hash_init(gsp, &sc->sc_mask, HASH_WAITOK);
-
- /*
- * Do not initialize the flush mechanism, will be initialized
- * on the first insertion on the hash table.
- */
-
- mtx_init(&sc->sc_mtx, "g_sched_mtx", NULL, MTX_DEF);
-
- gp->softc = sc;
- gp->start = g_sched_start;
- gp->orphan = g_sched_orphan;
- gp->access = g_sched_access;
- gp->dumpconf = g_sched_dumpconf;
-
- newpp = g_new_providerf(dstgp, "%s", gp->name);
- newpp->mediasize = pp->mediasize;
- newpp->sectorsize = pp->sectorsize;
-
- cp = g_new_consumer(gp);
- error = g_attach(cp, proxy ? newpp : pp);
- if (error != 0) {
- gctl_error(req, "Cannot attach to provider %s.",
- pp->name);
- goto fail;
- }
-
- g_error_provider(newpp, 0);
- if (proxy) {
- error = g_insert_proxy(gp, newpp, dstgp, pp, cp);
- if (error)
- goto fail;
- }
- G_SCHED_DEBUG(0, "Device %s created.", gp->name);
-
- g_gsched_ref(gsp);
-
- return (0);
-
-fail:
- if (cp != NULL) {
- if (cp->provider != NULL)
- g_detach(cp);
- g_destroy_consumer(cp);
- }
- if (newpp != NULL)
- g_destroy_provider(newpp);
- if (sc->sc_hash)
- g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask,
- gsp, sc->sc_data);
- if (sc->sc_data)
- gsp->gs_fini(sc->sc_data);
- g_free(gp->softc);
- g_destroy_geom(gp);
-
- return (error);
-}
-
-/*
- * Support for dynamic switching of scheduling algorithms.
- * First initialize the data structures for the new algorithm,
- * then call g_sched_remove_locked() to flush all references
- * to the old one, finally link the new algorithm.
- */
-static int
-g_sched_change_algo(struct gctl_req *req, struct g_class *mp,
- struct g_provider *pp, struct g_gsched *gsp)
-{
- struct g_sched_softc *sc;
- struct g_geom *gp;
- struct g_hash *newh;
- void *data;
- u_long mask;
- int error = 0;
-
- gp = pp->geom;
- sc = gp->softc;
-
- data = gsp->gs_init(gp);
- if (data == NULL)
- return (ENOMEM);
-
- newh = g_sched_hash_init(gsp, &mask, HASH_WAITOK);
- if (gsp->gs_priv_size && !newh) {
- error = ENOMEM;
- goto fail;
- }
-
- g_sched_lock(gp);
- if (sc->sc_gsched) { /* can be NULL in some cases */
- error = g_sched_remove_locked(gp, sc->sc_gsched);
- if (error)
- goto fail;
- }
-
- g_gsched_ref(gsp);
- sc->sc_gsched = gsp;
- sc->sc_data = data;
- sc->sc_hash = newh;
- sc->sc_mask = mask;
-
- g_sched_unlock(gp);
-
- return (0);
-
-fail:
- if (newh)
- g_sched_hash_fini(gp, newh, mask, gsp, data);
-
- if (data)
- gsp->gs_fini(data);
-
- g_sched_unlock(gp);
-
- return (error);
-}
-
-/*
- * Stop the request flow directed to the proxy, redirecting the new
- * requests to the me.gs_pending queue.
- */
-static struct g_provider *
-g_detach_proxy(struct g_geom *gp)
-{
- struct g_consumer *cp;
- struct g_provider *pp, *newpp;
-
- do {
- pp = LIST_FIRST(&gp->provider);
- if (pp == NULL)
- break;
- cp = LIST_FIRST(&gp->consumer);
- if (cp == NULL)
- break;
- newpp = cp->provider;
- if (newpp == NULL)
- break;
-
- me.gs_npending = 0;
- pp->geom->start = g_sched_temporary_start;
-
- return (pp);
- } while (0);
- printf("%s error detaching proxy %s\n", __FUNCTION__, gp->name);
-
- return (NULL);
-}
-
-static void
-g_sched_blackhole(struct bio *bp)
-{
-
- g_io_deliver(bp, ENXIO);
-}
-
-static inline void
-g_reparent_provider(struct g_provider *pp, struct g_geom *gp,
- struct g_provider *newpp)
-{
-
- LIST_REMOVE(pp, provider);
- if (newpp) {
- pp->private = newpp->private;
- pp->index = newpp->index;
- }
- pp->geom = gp;
- LIST_INSERT_HEAD(&gp->provider, pp, provider);
-}
-
-static inline void
-g_unproxy_provider(struct g_provider *oldpp, struct g_provider *newpp)
-{
- struct g_geom *gp = oldpp->geom;
-
- g_reparent_provider(oldpp, newpp->geom, newpp);
-
- /*
- * Hackish: let the system destroy the old provider for us, just
- * in case someone attached a consumer to it, in which case a
- * direct call to g_destroy_provider() would not work.
- */
- g_reparent_provider(newpp, gp, NULL);
-}
-
-/*
- * Complete the proxy destruction, linking the old provider to its
- * original geom, and destroying the proxy provider. Also take care
- * of issuing the pending requests collected in me.gs_pending (if any).
- */
-static int
-g_destroy_proxy(struct g_geom *gp, struct g_provider *oldpp)
-{
- struct g_consumer *cp;
- struct g_provider *newpp;
-
- do {
- cp = LIST_FIRST(&gp->consumer);
- if (cp == NULL)
- break;
- newpp = cp->provider;
- if (newpp == NULL)
- break;
-
- /* Relink the provider to its original geom. */
- g_unproxy_provider(oldpp, newpp);
-
- /* Detach consumer from provider, and destroy provider. */
- cp->acr = newpp->acr = 0;
- cp->acw = newpp->acw = 0;
- cp->ace = newpp->ace = 0;
- g_detach(cp);
-
- /* Send the pending bios through the right start function. */
- g_sched_flush_pending(oldpp->geom->start);
-
- return (0);
- } while (0);
- printf("%s error destroying proxy %s\n", __FUNCTION__, gp->name);
-
- /* We cannot send the pending bios anywhere... */
- g_sched_flush_pending(g_sched_blackhole);
-
- return (EINVAL);
-}
-
-static int
-g_sched_destroy(struct g_geom *gp, boolean_t force)
-{
- struct g_provider *pp, *oldpp = NULL;
- struct g_sched_softc *sc;
- struct g_gsched *gsp;
- int error;
-
- g_topology_assert();
- sc = gp->softc;
- if (sc == NULL)
- return (ENXIO);
- if (!(sc->sc_flags & G_SCHED_PROXYING)) {
- pp = LIST_FIRST(&gp->provider);
- if (pp && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
- const char *msg = force ?
- "but we force removal" : "cannot remove";
-
- G_SCHED_DEBUG(!force,
- "Device %s is still open (r%dw%de%d), %s.",
- pp->name, pp->acr, pp->acw, pp->ace, msg);
- if (!force)
- return (EBUSY);
- } else {
- G_SCHED_DEBUG(0, "Device %s removed.", gp->name);
- }
- } else
- oldpp = g_detach_proxy(gp);
-
- gsp = sc->sc_gsched;
- if (gsp) {
- /*
- * XXX bad hack here: force a dispatch to release
- * any reference to the hash table still held by
- * the scheduler.
- */
- g_sched_lock(gp);
- /*
- * We are dying here, no new requests should enter
- * the scheduler. This is granted by the topolgy,
- * either in case we were proxying (new bios are
- * being redirected) or not (see the access check
- * above).
- */
- g_sched_forced_dispatch(gp);
- error = g_sched_wait_pending(gp);
-
- if (error) {
- /*
- * Not all the requests came home: this might happen
- * under heavy load, or if we were waiting for any
- * bio which is served in the event path (see
- * geom_slice.c for an example of how this can
- * happen). Try to restore a working configuration
- * if we can fail.
- */
- if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) {
- g_sched_flush_pending(force ?
- g_sched_blackhole : g_sched_start);
- }
-
- /*
- * In the forced destroy case there is not so much
- * we can do, we have pending bios that will call
- * g_sched_done() somehow, and we don't want them
- * to crash the system using freed memory. We tell
- * the user that something went wrong, and leak some
- * memory here.
- * Note: the callers using force = 1 ignore the
- * return value.
- */
- if (force) {
- G_SCHED_DEBUG(0, "Pending requests while "
- " destroying geom, some memory leaked.");
- }
-
- return (error);
- }
-
- g_sched_unlock(gp);
- g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask,
- gsp, sc->sc_data);
- sc->sc_hash = NULL;
- gsp->gs_fini(sc->sc_data);
- g_gsched_unref(gsp);
- sc->sc_gsched = NULL;
- } else
- error = 0;
-
- if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) {
- error = g_destroy_proxy(gp, oldpp);
-
- if (error) {
- if (force) {
- G_SCHED_DEBUG(0, "Unrecoverable error while "
- "destroying a proxy geom, leaking some "
- " memory.");
- }
-
- return (error);
- }
- }
-
- mtx_destroy(&sc->sc_mtx);
-
- g_free(gp->softc);
- gp->softc = NULL;
- g_wither_geom(gp, ENXIO);
-
- return (error);
-}
-
-static int
-g_sched_destroy_geom(struct gctl_req *req, struct g_class *mp,
- struct g_geom *gp)
-{
-
- return (g_sched_destroy(gp, 0));
-}
-
-/*
- * Functions related to the classification of requests.
- *
- * On recent FreeBSD versions (8.0 and above), we store a reference
- * to the issuer of a request in bp->bio_classifier1 as soon
- * as the bio is posted to the geom queue (and not later, because
- * requests are managed by the g_down thread afterwards).
- */
-
-/*
- * Classifier support for recent FreeBSD versions: we use
- * a very simple classifier, only use curthread to tag a request.
- * The classifier is registered at module load, and unregistered
- * at module unload.
- */
-static int
-g_sched_tag(void *arg, struct bio *bp)
-{
-
- bp->bio_classifier1 = curthread;
- return (1);
-}
-
-static struct g_classifier_hook g_sched_classifier = {
- .func = g_sched_tag,
-};
-
-static inline void
-g_classifier_ini(void)
-{
-
- g_register_classifier(&g_sched_classifier);
-}
-
-static inline void
-g_classifier_fini(void)
-{
-
- g_unregister_classifier(&g_sched_classifier);
-}
-
-static void
-g_sched_init(struct g_class *mp)
-{
-
- g_gsched_global_init();
-
- G_SCHED_DEBUG(0, "Loading: mp = %p, g_sched_class = %p.",
- mp, &g_sched_class);
-
- /* Patch g_io_request to store classification info in the bio. */
- g_classifier_ini();
-}
-
-static void
-g_sched_fini(struct g_class *mp)
-{
-
- g_classifier_fini();
-
- G_SCHED_DEBUG(0, "Unloading...");
-
- KASSERT(LIST_EMPTY(&me.gs_scheds), ("still registered schedulers"));
- mtx_destroy(&me.gs_mtx);
-}
-
-static int
-g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag,
- struct thread *td)
-{
- struct g_consumer *cp;
- struct g_geom *gp;
-
- cp = LIST_FIRST(&pp->geom->consumer);
- if (cp == NULL)
- return (ENOIOCTL);
- gp = cp->provider->geom;
- if (gp->ioctl == NULL)
- return (ENOIOCTL);
- return (gp->ioctl(cp->provider, cmd, data, fflag, td));
-}
-
-/*
- * Read the i-th argument for a request, skipping the /dev/
- * prefix if present.
- */
-static const char *
-g_sched_argi(struct gctl_req *req, int i)
-{
- static const char *dev_prefix = "/dev/";
- const char *name;
- char param[16];
- int l = strlen(dev_prefix);
-
- snprintf(param, sizeof(param), "arg%d", i);
- name = gctl_get_asciiparam(req, param);
- if (name == NULL)
- gctl_error(req, "No 'arg%d' argument", i);
- else if (strncmp(name, dev_prefix, l) == 0)
- name += l;
- return (name);
-}
-
-/*
- * Fetch nargs and do appropriate checks.
- */
-static int
-g_sched_get_nargs(struct gctl_req *req)
-{
- int *nargs;
-
- nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
- if (nargs == NULL) {
- gctl_error(req, "No 'nargs' argument");
- return (0);
- }
- if (*nargs <= 0)
- gctl_error(req, "Missing device(s).");
- return (*nargs);
-}
-
-/*
- * Check whether we should add the class on certain volumes when
- * this geom is created. Right now this is under control of a kenv
- * variable containing the names of all devices that we care about.
- * Probably we should only support transparent insertion as the
- * preferred mode of operation.
- */
-static struct g_geom *
-g_sched_taste(struct g_class *mp, struct g_provider *pp,
- int flags __unused)
-{
- struct g_gsched *gsp = NULL; /* the . algorithm we want */
- const char *s; /* generic string pointer */
- const char *taste_names; /* devices we like */
- int l;
-
- g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__,
- mp->name, pp->name);
- g_topology_assert();
-
- G_SCHED_DEBUG(2, "Tasting %s.", pp->name);
-
- do {
- /* do not taste on ourselves */
- if (pp->geom->class == mp)
- break;
-
- taste_names = kern_getenv("geom.sched.taste");
- if (taste_names == NULL)
- break;
-
- l = strlen(pp->name);
- for (s = taste_names; *s &&
- (s = strstr(s, pp->name)); s++) {
- /* further checks for an exact match */
- if ( (s == taste_names || s[-1] == ' ') &&
- (s[l] == '\0' || s[l] == ' ') )
- break;
- }
- if (s == NULL)
- break;
- G_SCHED_DEBUG(0, "Attach device %s match [%s]\n",
- pp->name, s);
-
- /* look up the provider name in the list */
- s = kern_getenv("geom.sched.algo");
- if (s == NULL)
- s = "rr";
-
- gsp = g_gsched_find(s); /* also get a reference */
- if (gsp == NULL) {
- G_SCHED_DEBUG(0, "Bad '%s' algorithm.", s);
- break;
- }
-
- /* XXX create with 1 as last argument ? */
- g_sched_create(NULL, mp, pp, gsp, 0);
- g_gsched_unref(gsp);
- } while (0);
- return NULL;
-}
-
-static void
-g_sched_ctl_create(struct gctl_req *req, struct g_class *mp, int proxy)
-{
- struct g_provider *pp;
- struct g_gsched *gsp;
- const char *name;
- int i, nargs;
-
- g_topology_assert();
-
- name = gctl_get_asciiparam(req, "algo");
- if (name == NULL) {
- gctl_error(req, "No '%s' argument", "algo");
- return;
- }
-
- gsp = g_gsched_find(name); /* also get a reference */
- if (gsp == NULL) {
- gctl_error(req, "Bad algorithm '%s'", name);
- return;
- }
-
- nargs = g_sched_get_nargs(req);
-
- /*
- * Run on the arguments, and break on any error.
- * We look for a device name, but skip the /dev/ prefix if any.
- */
- for (i = 0; i < nargs; i++) {
- name = g_sched_argi(req, i);
- if (name == NULL)
- break;
- pp = g_provider_by_name(name);
- if (pp == NULL) {
- G_SCHED_DEBUG(1, "Provider %s is invalid.", name);
- gctl_error(req, "Provider %s is invalid.", name);
- break;
- }
- if (g_sched_create(req, mp, pp, gsp, proxy) != 0)
- break;
- }
-
- g_gsched_unref(gsp);
-}
-
-static void
-g_sched_ctl_configure(struct gctl_req *req, struct g_class *mp)
-{
- struct g_provider *pp;
- struct g_gsched *gsp;
- const char *name;
- int i, nargs;
-
- g_topology_assert();
-
- name = gctl_get_asciiparam(req, "algo");
- if (name == NULL) {
- gctl_error(req, "No '%s' argument", "algo");
- return;
- }
-
- gsp = g_gsched_find(name); /* also get a reference */
- if (gsp == NULL) {
- gctl_error(req, "Bad algorithm '%s'", name);
- return;
- }
-
- nargs = g_sched_get_nargs(req);
-
- /*
- * Run on the arguments, and break on any error.
- * We look for a device name, but skip the /dev/ prefix if any.
- */
- for (i = 0; i < nargs; i++) {
- name = g_sched_argi(req, i);
- if (name == NULL)
- break;
- pp = g_provider_by_name(name);
- if (pp == NULL || pp->geom->class != mp) {
- G_SCHED_DEBUG(1, "Provider %s is invalid.", name);
- gctl_error(req, "Provider %s is invalid.", name);
- break;
- }
- if (g_sched_change_algo(req, mp, pp, gsp) != 0)
- break;
- }
-
- g_gsched_unref(gsp);
-}
-
-static struct g_geom *
-g_sched_find_geom(struct g_class *mp, const char *name)
-{
- struct g_geom *gp;
-
- LIST_FOREACH(gp, &mp->geom, geom) {
- if (strcmp(gp->name, name) == 0)
- return (gp);
- }
- return (NULL);
-}
-
-static void
-g_sched_ctl_destroy(struct gctl_req *req, struct g_class *mp)
-{
- int nargs, *force, error, i;
- struct g_geom *gp;
- const char *name;
-
- g_topology_assert();
-
- nargs = g_sched_get_nargs(req);
-
- force = gctl_get_paraml(req, "force", sizeof(*force));
- if (force == NULL) {
- gctl_error(req, "No 'force' argument");
- return;
- }
-
- for (i = 0; i < nargs; i++) {
- name = g_sched_argi(req, i);
- if (name == NULL)
- break;
-
- gp = g_sched_find_geom(mp, name);
- if (gp == NULL) {
- G_SCHED_DEBUG(1, "Device %s is invalid.", name);
- gctl_error(req, "Device %s is invalid.", name);
- break;
- }
-
- error = g_sched_destroy(gp, *force);
- if (error != 0) {
- gctl_error(req, "Cannot destroy device %s (error=%d).",
- gp->name, error);
- break;
- }
- }
-}
-
-static void
-g_sched_config(struct gctl_req *req, struct g_class *mp, const char *verb)
-{
- uint32_t *version;
-
- g_topology_assert();
-
- version = gctl_get_paraml(req, "version", sizeof(*version));
- if (version == NULL) {
- gctl_error(req, "No '%s' argument.", "version");
- return;
- }
-
- if (*version != G_SCHED_VERSION) {
- gctl_error(req, "Userland and kernel parts are "
- "out of sync.");
- return;
- }
-
- if (strcmp(verb, "create") == 0) {
- g_sched_ctl_create(req, mp, 0);
- return;
- } else if (strcmp(verb, "insert") == 0) {
- g_sched_ctl_create(req, mp, 1);
- return;
- } else if (strcmp(verb, "configure") == 0) {
- g_sched_ctl_configure(req, mp);
- return;
- } else if (strcmp(verb, "destroy") == 0) {
- g_sched_ctl_destroy(req, mp);
- return;
- }
-
- gctl_error(req, "Unknown verb.");
-}
-
-static void
-g_sched_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
- struct g_consumer *cp, struct g_provider *pp)
-{
- struct g_sched_softc *sc = gp->softc;
- struct g_gsched *gsp = sc->sc_gsched;
- if (indent == NULL) { /* plaintext */
- sbuf_printf(sb, " algo %s", gsp ? gsp->gs_name : "--");
- }
- if (gsp != NULL && gsp->gs_dumpconf)
- gsp->gs_dumpconf(sb, indent, gp, cp, pp);
-}
-
-DECLARE_GEOM_CLASS(g_sched_class, g_sched);
-MODULE_VERSION(geom_sched, 0);
diff --git a/sys/geom/sched/g_sched.h b/sys/geom/sched/g_sched.h
deleted file mode 100644
index e2db2471c9ab..000000000000
--- a/sys/geom/sched/g_sched.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009-2010 Fabio Checconi
- * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _G_SCHED_H_
-#define _G_SCHED_H_
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * Header for the geom_sched class (userland library and kernel part).
- * See g_sched.c for documentation.
- * The userland code only needs the three G_SCHED_* values below.
- */
-
-#define G_SCHED_CLASS_NAME "SCHED"
-#define G_SCHED_VERSION 0
-#define G_SCHED_SUFFIX ".sched."
-
-#ifdef _KERNEL
-#define G_SCHED_DEBUG(lvl, ...) \
- _GEOM_DEBUG("GEOM_SCHED", me.gs_debug, (lvl), NULL, __VA_ARGS__)
-#define G_SCHED_LOGREQ(bp, ...) \
- _GEOM_DEBUG("GEOM_SCHED", me.gs_debug, 2, (bp), __VA_ARGS__)
-
-LIST_HEAD(g_hash, g_sched_class);
-
-/*
- * Descriptor of a scheduler.
- * In addition to the obvious fields, sc_flushing and sc_pending
- * support dynamic switching of scheduling algorithm.
- * Normally, sc_flushing is 0, and requests that are scheduled are
- * also added to the sc_pending queue, and removed when we receive
- * the 'done' event.
- *
- * When we are transparently inserted on an existing provider,
- * sc_proxying is set. The detach procedure is slightly different.
- *
- * When switching schedulers, sc_flushing is set so requests bypass us,
- * and at the same time we update the pointer in the pending bios
- * to ignore us when they return up.
- * XXX it would be more efficient to implement sc_pending with
- * a generation number: the softc generation is increased when
- * we change scheduling algorithm, we store the current generation
- * number in the pending bios, and when they come back we ignore
- * the done() call if the generation number do not match.
- */
-struct g_sched_softc {
- /*
- * Generic fields used by any scheduling algorithm:
- * a mutex, the class descriptor, flags, list of pending
- * requests (used when flushing the module) and support
- * for hash tables where we store per-flow queues.
- */
- struct mtx sc_mtx;
- struct g_gsched *sc_gsched; /* Scheduler descriptor. */
- int sc_pending; /* Pending requests. */
- int sc_flags; /* Various flags. */
-
- /*
- * Hash tables to store per-flow queues are generally useful
- * so we handle them in the common code.
- * sc_hash and sc_mask are parameters of the hash table,
- * the last two fields are used to periodically remove
- * expired items from the hash table.
- */
- struct g_hash *sc_hash;
- u_long sc_mask;
- int sc_flush_ticks; /* Next tick for a flush. */
- int sc_flush_bucket; /* Next bucket to flush. */
-
- /*
- * Pointer to the algorithm's private data, which is the value
- * returned by sc_gsched->gs_init() . A NULL here means failure.
- * XXX intptr_t might be more appropriate.
- */
- void *sc_data;
-};
-
-#define G_SCHED_PROXYING 1
-#define G_SCHED_FLUSHING 2
-
-#endif /* _KERNEL */
-
-#endif /* _G_SCHED_H_ */
diff --git a/sys/geom/sched/gs_delay.c b/sys/geom/sched/gs_delay.c
deleted file mode 100644
index 151738d2fedb..000000000000
--- a/sys/geom/sched/gs_delay.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*-
- * Copyright (c) 2015 Netflix, Inc.
- *
- * Derived from gs_rr.c:
- * Copyright (c) 2009-2010 Fabio Checconi
- * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * A simple scheduler that just delays certain transactions by a certain
- * amount. We collect all the transactions that are 'done' and put them on
- * a queue. The queue is run through every so often and the transactions that
- * have taken longer than the threshold delay are completed.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bio.h>
-#include <sys/callout.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/proc.h>
-#include <sys/queue.h>
-#include <sys/sbuf.h>
-#include <sys/sysctl.h>
-#include "gs_scheduler.h"
-
-/* Useful constants */
-#define BTFRAC_1US 18446744073709ULL /* 2^64 / 1000000 */
-
-/* list of scheduler instances */
-LIST_HEAD(g_scheds, g_delay_softc);
-
-/*
- * Per device descriptor, holding the Round Robin list of queues
- * accessing the disk, a reference to the geom, and the timer.
- */
-struct g_delay_softc {
- struct g_geom *sc_geom;
-
- struct bio_queue_head sc_bioq; /* queue of pending requests */
- struct callout sc_wait; /* timer for completing with delays */
-
- /* Statistics */
- int sc_in_flight; /* requests in the driver */
-};
-
-/*
- * parameters, config and stats
- */
-struct g_delay_params {
- uint64_t io;
- int bypass; /* bypass scheduling */
- int units; /* how many instances */
- int latency; /* How big a latncy are hoping for */
-};
-
-static struct g_delay_params me = {
- .bypass = 0,
- .units = 0,
- .latency = 0,
- .io = 0,
-};
-struct g_delay_params *gs_delay_me = &me;
-
-SYSCTL_DECL(_kern_geom_sched);
-static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, delay, CTLFLAG_RW, 0,
- "GEOM_SCHED DELAY stuff");
-SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, bypass, CTLFLAG_RD,
- &me.bypass, 0, "Scheduler bypass");
-SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, units, CTLFLAG_RD,
- &me.units, 0, "Scheduler instances");
-SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, latency, CTLFLAG_RW,
- &me.latency, 0, "Minimum latency for requests, in microseconds (1/hz resolution)");
-SYSCTL_QUAD(_kern_geom_sched_delay, OID_AUTO, io, CTLFLAG_RW,
- &me.io, 0, "I/Os delayed\n");
-
-static int
-g_delay_init_class(void *data, void *priv)
-{
- return (0);
-}
-
-static void
-g_delay_fini_class(void *data, void *priv)
-{
-}
-
-/*
- * Called on a request arrival, timeout or completion.
- * Try to serve a request among those queued.
- */
-static struct bio *
-g_delay_next(void *data, int force)
-{
- struct g_delay_softc *sc = data;
- struct bio *bp;
- struct bintime bt;
-
- bp = bioq_first(&sc->sc_bioq);
- if (bp == NULL)
- return (NULL);
-
- /*
- * If the time isn't yet ripe for this bp to be let loose,
- * then the time isn't ripe for any of its friends either
- * since we insert in-order. Terminate if the bio hasn't
- * aged appropriately. Note that there's pathology here
- * such that we may be up to one tick early in releasing
- * this I/O. We could implement this up to a tick late too
- * but choose not to.
- */
- getbinuptime(&bt); /* BIO's bio_t0 is uptime */
- if (bintime_cmp(&bp->bio_t0, &bt, >))
- return (NULL);
- me.io++;
-
- /*
- * The bp has mellowed enough, let it through and update stats.
- * If there's others, we'll catch them next time we get called.
- */
- sc->sc_in_flight++;
-
- bp = bioq_takefirst(&sc->sc_bioq);
- return (bp);
-}
-
-/*
- * Called when a real request for disk I/O arrives.
- * Locate the queue associated with the client.
- * If the queue is the one we are anticipating for, reset its timeout;
- * if the queue is not in the round robin list, insert it in the list.
- * On any error, do not queue the request and return -1, the caller
- * will take care of this request.
- */
-static int
-g_delay_start(void *data, struct bio *bp)
-{
- struct g_delay_softc *sc = data;
-
- if (me.bypass)
- return (-1); /* bypass the scheduler */
-
- bp->bio_caller1 = sc;
- getbinuptime(&bp->bio_t0); /* BIO's bio_t0 is uptime */
- bintime_addx(&bp->bio_t0, BTFRAC_1US * me.latency);
-
- /*
- * Keep the I/Os ordered. Lower layers will reorder as we release them down.
- * We rely on this in g_delay_next() so that we delay all things equally. Even
- * if we move to multiple queues to push stuff down the stack, we'll want to
- * insert in order and let the lower layers do whatever reordering they want.
- */
- bioq_insert_tail(&sc->sc_bioq, bp);
-
- return (0);
-}
-
-static void
-g_delay_timeout(void *data)
-{
- struct g_delay_softc *sc = data;
-
- g_sched_lock(sc->sc_geom);
- g_sched_dispatch(sc->sc_geom);
- g_sched_unlock(sc->sc_geom);
- callout_reset(&sc->sc_wait, 1, g_delay_timeout, sc);
-}
-
-/*
- * Module glue: allocate descriptor, initialize its fields.
- */
-static void *
-g_delay_init(struct g_geom *geom)
-{
- struct g_delay_softc *sc;
-
- sc = malloc(sizeof *sc, M_GEOM_SCHED, M_WAITOK | M_ZERO);
- sc->sc_geom = geom;
- bioq_init(&sc->sc_bioq);
- callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
- callout_reset(&sc->sc_wait, 1, g_delay_timeout, sc);
- me.units++;
-
- return (sc);
-}
-
-/*
- * Module glue -- drain the callout structure, destroy the
- * hash table and its element, and free the descriptor.
- */
-static void
-g_delay_fini(void *data)
-{
- struct g_delay_softc *sc = data;
-
- /* We're force drained before getting here */
-
- /* Kick out timers */
- callout_drain(&sc->sc_wait);
- me.units--;
- free(sc, M_GEOM_SCHED);
-}
-
-/*
- * Called when the request under service terminates.
- * Start the anticipation timer if needed.
- */
-static void
-g_delay_done(void *data, struct bio *bp)
-{
- struct g_delay_softc *sc = data;
-
- sc->sc_in_flight--;
-
- g_sched_dispatch(sc->sc_geom);
-}
-
-static void
-g_delay_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
- struct g_consumer *cp, struct g_provider *pp)
-{
-}
-
-static struct g_gsched g_delay = {
- .gs_name = "delay",
- .gs_priv_size = 0,
- .gs_init = g_delay_init,
- .gs_fini = g_delay_fini,
- .gs_start = g_delay_start,
- .gs_done = g_delay_done,
- .gs_next = g_delay_next,
- .gs_dumpconf = g_delay_dumpconf,
- .gs_init_class = g_delay_init_class,
- .gs_fini_class = g_delay_fini_class,
-};
-
-DECLARE_GSCHED_MODULE(delay, &g_delay);
diff --git a/sys/geom/sched/gs_rr.c b/sys/geom/sched/gs_rr.c
deleted file mode 100644
index 75b75045c656..000000000000
--- a/sys/geom/sched/gs_rr.c
+++ /dev/null
@@ -1,701 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009-2010 Fabio Checconi
- * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * A round-robin (RR) anticipatory scheduler, with per-client queues.
- *
- * The goal of this implementation is to improve throughput compared
- * to the pure elevator algorithm, and insure some fairness among
- * clients.
- *
- * Requests coming from the same client are put in the same queue.
- * We use anticipation to help reducing seeks, and each queue
- * is never served continuously for more than a given amount of
- * time or data. Queues are then served in a round-robin fashion.
- *
- * Each queue can be in any of the following states:
- * READY immediately serve the first pending request;
- * BUSY one request is under service, wait for completion;
- * IDLING do not serve incoming requests immediately, unless
- * they are "eligible" as defined later.
- *
- * Scheduling is made looking at the status of all queues,
- * and the first one in round-robin order is privileged.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bio.h>
-#include <sys/callout.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/proc.h>
-#include <sys/queue.h>
-#include <sys/sbuf.h>
-#include <sys/sysctl.h>
-#include "gs_scheduler.h"
-
-/* possible states of the scheduler */
-enum g_rr_state {
- G_QUEUE_READY = 0, /* Ready to dispatch. */
- G_QUEUE_BUSY, /* Waiting for a completion. */
- G_QUEUE_IDLING /* Waiting for a new request. */
-};
-
-/* possible queue flags */
-enum g_rr_flags {
- /* G_FLAG_COMPLETED means that the field q_slice_end is valid. */
- G_FLAG_COMPLETED = 1, /* Completed a req. in the current budget. */
-};
-
-struct g_rr_softc;
-
-/*
- * Queue descriptor, containing reference count, scheduling
- * state, a queue of pending requests, configuration parameters.
- * Queues with pending request(s) and not under service are also
- * stored in a Round Robin (RR) list.
- */
-struct g_rr_queue {
- struct g_rr_softc *q_sc; /* link to the parent */
-
- enum g_rr_state q_status;
- unsigned int q_service; /* service received so far */
- int q_slice_end; /* actual slice end time, in ticks */
- enum g_rr_flags q_flags; /* queue flags */
- struct bio_queue_head q_bioq;
-
- /* Scheduling parameters */
- unsigned int q_budget; /* slice size in bytes */
- unsigned int q_slice_duration; /* slice size in ticks */
- unsigned int q_wait_ticks; /* wait time for anticipation */
-
- /* Stats to drive the various heuristics. */
- struct g_savg q_thinktime; /* Thinktime average. */
- struct g_savg q_seekdist; /* Seek distance average. */
-
- int q_bionum; /* Number of requests. */
-
- off_t q_lastoff; /* Last submitted req. offset. */
- int q_lastsub; /* Last submitted req. time. */
-
- /* Expiration deadline for an empty queue. */
- int q_expire;
-
- TAILQ_ENTRY(g_rr_queue) q_tailq; /* RR list link field */
-};
-
-/* List types. */
-TAILQ_HEAD(g_rr_tailq, g_rr_queue);
-
-/* list of scheduler instances */
-LIST_HEAD(g_scheds, g_rr_softc);
-
-/* Default quantum for RR between queues. */
-#define G_RR_DEFAULT_BUDGET 0x00800000
-
-/*
- * Per device descriptor, holding the Round Robin list of queues
- * accessing the disk, a reference to the geom, and the timer.
- */
-struct g_rr_softc {
- struct g_geom *sc_geom;
-
- /*
- * sc_active is the queue we are anticipating for.
- * It is set only in gs_rr_next(), and possibly cleared
- * only in gs_rr_next() or on a timeout.
- * The active queue is never in the Round Robin list
- * even if it has requests queued.
- */
- struct g_rr_queue *sc_active;
- struct callout sc_wait; /* timer for sc_active */
-
- struct g_rr_tailq sc_rr_tailq; /* the round-robin list */
- int sc_nqueues; /* number of queues */
-
- /* Statistics */
- int sc_in_flight; /* requests in the driver */
-
- LIST_ENTRY(g_rr_softc) sc_next;
-};
-
-/* Descriptor for bounded values, min and max are constant. */
-struct x_bound {
- const int x_min;
- int x_cur;
- const int x_max;
-};
-
-/*
- * parameters, config and stats
- */
-struct g_rr_params {
- int queues; /* total number of queues */
- int w_anticipate; /* anticipate writes */
- int bypass; /* bypass scheduling writes */
-
- int units; /* how many instances */
- /* sc_head is used for debugging */
- struct g_scheds sc_head; /* first scheduler instance */
-
- struct x_bound queue_depth; /* max parallel requests */
- struct x_bound wait_ms; /* wait time, milliseconds */
- struct x_bound quantum_ms; /* quantum size, milliseconds */
- struct x_bound quantum_kb; /* quantum size, Kb (1024 bytes) */
-
- /* statistics */
- int wait_hit; /* success in anticipation */
- int wait_miss; /* failure in anticipation */
-};
-
-/*
- * Default parameters for the scheduler. The quantum sizes target
- * a 80MB/s disk; if the hw is faster or slower the minimum of the
- * two will have effect: the clients will still be isolated but
- * the fairness may be limited. A complete solution would involve
- * the on-line measurement of the actual disk throughput to derive
- * these parameters. Or we may just choose to ignore service domain
- * fairness and accept what can be achieved with time-only budgets.
- */
-static struct g_rr_params me = {
- .sc_head = LIST_HEAD_INITIALIZER(&me.sc_head),
- .w_anticipate = 1,
- .queue_depth = { 1, 1, 50 },
- .wait_ms = { 1, 10, 30 },
- .quantum_ms = { 1, 100, 500 },
- .quantum_kb = { 16, 8192, 65536 },
-};
-
-struct g_rr_params *gs_rr_me = &me;
-
-SYSCTL_DECL(_kern_geom_sched);
-static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, rr, CTLFLAG_RW, 0,
- "GEOM_SCHED ROUND ROBIN stuff");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, units, CTLFLAG_RD,
- &me.units, 0, "Scheduler instances");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queues, CTLFLAG_RD,
- &me.queues, 0, "Total rr queues");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_ms, CTLFLAG_RW,
- &me.wait_ms.x_cur, 0, "Wait time milliseconds");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_ms, CTLFLAG_RW,
- &me.quantum_ms.x_cur, 0, "Quantum size milliseconds");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, bypass, CTLFLAG_RW,
- &me.bypass, 0, "Bypass scheduler");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, w_anticipate, CTLFLAG_RW,
- &me.w_anticipate, 0, "Do anticipation on writes");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_kb, CTLFLAG_RW,
- &me.quantum_kb.x_cur, 0, "Quantum size Kbytes");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queue_depth, CTLFLAG_RW,
- &me.queue_depth.x_cur, 0, "Maximum simultaneous requests");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_hit, CTLFLAG_RW,
- &me.wait_hit, 0, "Hits in anticipation");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_miss, CTLFLAG_RW,
- &me.wait_miss, 0, "Misses in anticipation");
-
-#ifdef DEBUG_QUEUES
-/* print the status of a queue */
-static void
-gs_rr_dump_q(struct g_rr_queue *qp, int index)
-{
- int l = 0;
- struct bio *bp;
-
- TAILQ_FOREACH(bp, &(qp->q_bioq.queue), bio_queue) {
- l++;
- }
- printf("--- rr queue %d %p status %d len %d ---\n",
- index, qp, qp->q_status, l);
-}
-
-/*
- * Dump the scheduler status when writing to this sysctl variable.
- * XXX right now we only dump the status of the last instance created.
- * not a severe issue because this is only for debugging
- */
-static int
-gs_rr_sysctl_status(SYSCTL_HANDLER_ARGS)
-{
- int error, val = 0;
- struct g_rr_softc *sc;
-
- error = sysctl_handle_int(oidp, &val, 0, req);
- if (error || !req->newptr )
- return (error);
-
- printf("called %s\n", __FUNCTION__);
-
- LIST_FOREACH(sc, &me.sc_head, sc_next) {
- int i, tot = 0;
- printf("--- sc %p active %p nqueues %d "
- "callout %d in_flight %d ---\n",
- sc, sc->sc_active, sc->sc_nqueues,
- callout_active(&sc->sc_wait),
- sc->sc_in_flight);
- for (i = 0; i < G_RR_HASH_SIZE; i++) {
- struct g_rr_queue *qp;
- LIST_FOREACH(qp, &sc->sc_hash[i], q_hash) {
- gs_rr_dump_q(qp, tot);
- tot++;
- }
- }
- }
- return (0);
-}
-
-SYSCTL_PROC(_kern_geom_sched_rr, OID_AUTO, status,
- CTLTYPE_UINT | CTLFLAG_RW,
- 0, sizeof(int), gs_rr_sysctl_status, "I", "status");
-
-#endif /* DEBUG_QUEUES */
-
-/*
- * Get a bounded value, optionally convert to a min of t_min ticks.
- */
-static int
-get_bounded(struct x_bound *v, int t_min)
-{
- int x;
-
- x = v->x_cur;
- if (x < v->x_min)
- x = v->x_min;
- else if (x > v->x_max)
- x = v->x_max;
- if (t_min) {
- x = x * hz / 1000; /* convert to ticks */
- if (x < t_min)
- x = t_min;
- }
- return x;
-}
-
-/*
- * Get a reference to the queue for bp, using the generic
- * classification mechanism.
- */
-static struct g_rr_queue *
-g_rr_queue_get(struct g_rr_softc *sc, struct bio *bp)
-{
-
- return (g_sched_get_class(sc->sc_geom, bp));
-}
-
-static int
-g_rr_init_class(void *data, void *priv)
-{
- struct g_rr_softc *sc = data;
- struct g_rr_queue *qp = priv;
-
- bioq_init(&qp->q_bioq);
-
- /*
- * Set the initial parameters for the client:
- * slice size in bytes and ticks, and wait ticks.
- * Right now these are constant, but we could have
- * autoconfiguration code to adjust the values based on
- * the actual workload.
- */
- qp->q_budget = 1024 * get_bounded(&me.quantum_kb, 0);
- qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
- qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
-
- qp->q_sc = sc; /* link to the parent */
- qp->q_sc->sc_nqueues++;
- me.queues++;
-
- return (0);
-}
-
-/*
- * Release a reference to the queue.
- */
-static void
-g_rr_queue_put(struct g_rr_queue *qp)
-{
-
- g_sched_put_class(qp->q_sc->sc_geom, qp);
-}
-
-static void
-g_rr_fini_class(void *data, void *priv)
-{
- struct g_rr_queue *qp = priv;
-
- KASSERT(bioq_first(&qp->q_bioq) == NULL,
- ("released nonempty queue"));
- qp->q_sc->sc_nqueues--;
- me.queues--;
-}
-
-static inline int
-g_rr_queue_expired(struct g_rr_queue *qp)
-{
-
- if (qp->q_service >= qp->q_budget)
- return (1);
-
- if ((qp->q_flags & G_FLAG_COMPLETED) &&
- ticks - qp->q_slice_end >= 0)
- return (1);
-
- return (0);
-}
-
-static inline int
-g_rr_should_anticipate(struct g_rr_queue *qp, struct bio *bp)
-{
- int wait = get_bounded(&me.wait_ms, 2);
-
- if (!me.w_anticipate && (bp->bio_cmd == BIO_WRITE))
- return (0);
-
- if (g_savg_valid(&qp->q_thinktime) &&
- g_savg_read(&qp->q_thinktime) > wait)
- return (0);
-
- if (g_savg_valid(&qp->q_seekdist) &&
- g_savg_read(&qp->q_seekdist) > 8192)
- return (0);
-
- return (1);
-}
-
-/*
- * Called on a request arrival, timeout or completion.
- * Try to serve a request among those queued.
- */
-static struct bio *
-g_rr_next(void *data, int force)
-{
- struct g_rr_softc *sc = data;
- struct g_rr_queue *qp;
- struct bio *bp, *next;
- int expired;
-
- qp = sc->sc_active;
- if (me.bypass == 0 && !force) {
- if (sc->sc_in_flight >= get_bounded(&me.queue_depth, 0))
- return (NULL);
-
- /* Try with the queue under service first. */
- if (qp != NULL && qp->q_status != G_QUEUE_READY) {
- /*
- * Queue is anticipating, ignore request.
- * We should check that we are not past
- * the timeout, but in that case the timeout
- * will fire immediately afterwards so we
- * don't bother.
- */
- return (NULL);
- }
- } else if (qp != NULL && qp->q_status != G_QUEUE_READY) {
- g_rr_queue_put(qp);
- sc->sc_active = qp = NULL;
- }
-
- /*
- * No queue under service, look for the first in RR order.
- * If we find it, select if as sc_active, clear service
- * and record the end time of the slice.
- */
- if (qp == NULL) {
- qp = TAILQ_FIRST(&sc->sc_rr_tailq);
- if (qp == NULL)
- return (NULL); /* no queues at all, return */
- /* otherwise select the new queue for service. */
- TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq);
- sc->sc_active = qp;
- qp->q_service = 0;
- qp->q_flags &= ~G_FLAG_COMPLETED;
- }
-
- bp = bioq_takefirst(&qp->q_bioq); /* surely not NULL */
- qp->q_service += bp->bio_length; /* charge the service */
-
- /*
- * The request at the head of the active queue is always
- * dispatched, and gs_rr_next() will be called again
- * immediately.
- * We need to prepare for what to do next:
- *
- * 1. have we reached the end of the (time or service) slice ?
- * If so, clear sc_active and possibly requeue the previous
- * active queue if it has more requests pending;
- * 2. do we have more requests in sc_active ?
- * If yes, do not anticipate, as gs_rr_next() will run again;
- * if no, decide whether or not to anticipate depending
- * on read or writes (e.g., anticipate only on reads).
- */
- expired = g_rr_queue_expired(qp); /* are we expired ? */
- next = bioq_first(&qp->q_bioq); /* do we have one more ? */
- if (expired) {
- sc->sc_active = NULL;
- /* Either requeue or release reference. */
- if (next != NULL)
- TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
- else
- g_rr_queue_put(qp);
- } else if (next != NULL) {
- qp->q_status = G_QUEUE_READY;
- } else {
- if (!force && g_rr_should_anticipate(qp, bp)) {
- /* anticipate */
- qp->q_status = G_QUEUE_BUSY;
- } else {
- /* do not anticipate, release reference */
- g_rr_queue_put(qp);
- sc->sc_active = NULL;
- }
- }
- /* If sc_active != NULL, its q_status is always correct. */
-
- sc->sc_in_flight++;
-
- return (bp);
-}
-
-static inline void
-g_rr_update_thinktime(struct g_rr_queue *qp)
-{
- int delta = ticks - qp->q_lastsub, wait = get_bounded(&me.wait_ms, 2);
-
- if (qp->q_sc->sc_active != qp)
- return;
-
- qp->q_lastsub = ticks;
- delta = (delta > 2 * wait) ? 2 * wait : delta;
- if (qp->q_bionum > 7)
- g_savg_add_sample(&qp->q_thinktime, delta);
-}
-
-static inline void
-g_rr_update_seekdist(struct g_rr_queue *qp, struct bio *bp)
-{
- off_t dist;
-
- if (qp->q_lastoff > bp->bio_offset)
- dist = qp->q_lastoff - bp->bio_offset;
- else
- dist = bp->bio_offset - qp->q_lastoff;
-
- if (dist > (8192 * 8))
- dist = 8192 * 8;
-
- qp->q_lastoff = bp->bio_offset + bp->bio_length;
-
- if (qp->q_bionum > 7)
- g_savg_add_sample(&qp->q_seekdist, dist);
-}
-
-/*
- * Called when a real request for disk I/O arrives.
- * Locate the queue associated with the client.
- * If the queue is the one we are anticipating for, reset its timeout;
- * if the queue is not in the round robin list, insert it in the list.
- * On any error, do not queue the request and return -1, the caller
- * will take care of this request.
- */
-static int
-g_rr_start(void *data, struct bio *bp)
-{
- struct g_rr_softc *sc = data;
- struct g_rr_queue *qp;
-
- if (me.bypass)
- return (-1); /* bypass the scheduler */
-
- /* Get the queue for the request. */
- qp = g_rr_queue_get(sc, bp);
- if (qp == NULL)
- return (-1); /* allocation failed, tell upstream */
-
- if (bioq_first(&qp->q_bioq) == NULL) {
- /*
- * We are inserting into an empty queue.
- * Reset its state if it is sc_active,
- * otherwise insert it in the RR list.
- */
- if (qp == sc->sc_active) {
- qp->q_status = G_QUEUE_READY;
- callout_stop(&sc->sc_wait);
- } else {
- g_sched_priv_ref(qp);
- TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
- }
- }
-
- qp->q_bionum = 1 + qp->q_bionum - (qp->q_bionum >> 3);
-
- g_rr_update_thinktime(qp);
- g_rr_update_seekdist(qp, bp);
-
- /* Inherit the reference returned by g_rr_queue_get(). */
- bp->bio_caller1 = qp;
- bioq_disksort(&qp->q_bioq, bp);
-
- return (0);
-}
-
-/*
- * Callout executed when a queue times out anticipating a new request.
- */
-static void
-g_rr_wait_timeout(void *data)
-{
- struct g_rr_softc *sc = data;
- struct g_geom *geom = sc->sc_geom;
-
- g_sched_lock(geom);
- /*
- * We can race with other events, so check if
- * sc_active is still valid.
- */
- if (sc->sc_active != NULL) {
- /* Release the reference to the queue. */
- g_rr_queue_put(sc->sc_active);
- sc->sc_active = NULL;
- me.wait_hit--;
- me.wait_miss++; /* record the miss */
- }
- g_sched_dispatch(geom);
- g_sched_unlock(geom);
-}
-
-/*
- * Module glue: allocate descriptor, initialize its fields.
- */
-static void *
-g_rr_init(struct g_geom *geom)
-{
- struct g_rr_softc *sc;
-
- /* XXX check whether we can sleep */
- sc = malloc(sizeof *sc, M_GEOM_SCHED, M_NOWAIT | M_ZERO);
- sc->sc_geom = geom;
- TAILQ_INIT(&sc->sc_rr_tailq);
- callout_init(&sc->sc_wait, 1);
- LIST_INSERT_HEAD(&me.sc_head, sc, sc_next);
- me.units++;
-
- return (sc);
-}
-
-/*
- * Module glue -- drain the callout structure, destroy the
- * hash table and its element, and free the descriptor.
- */
-static void
-g_rr_fini(void *data)
-{
- struct g_rr_softc *sc = data;
-
- callout_drain(&sc->sc_wait);
- KASSERT(sc->sc_active == NULL, ("still a queue under service"));
- KASSERT(TAILQ_EMPTY(&sc->sc_rr_tailq), ("still scheduled queues"));
-
- LIST_REMOVE(sc, sc_next);
- me.units--;
- free(sc, M_GEOM_SCHED);
-}
-
-/*
- * Called when the request under service terminates.
- * Start the anticipation timer if needed.
- */
-static void
-g_rr_done(void *data, struct bio *bp)
-{
- struct g_rr_softc *sc = data;
- struct g_rr_queue *qp;
-
- sc->sc_in_flight--;
-
- qp = bp->bio_caller1;
-
- /*
- * When the first request for this queue completes, update the
- * duration and end of the slice. We do not do it when the
- * slice starts to avoid charging to the queue the time for
- * the first seek.
- */
- if (!(qp->q_flags & G_FLAG_COMPLETED)) {
- qp->q_flags |= G_FLAG_COMPLETED;
- /*
- * recompute the slice duration, in case we want
- * to make it adaptive. This is not used right now.
- * XXX should we do the same for q_quantum and q_wait_ticks ?
- */
- qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
- qp->q_slice_end = ticks + qp->q_slice_duration;
- }
-
- if (qp == sc->sc_active && qp->q_status == G_QUEUE_BUSY) {
- /* The queue is trying anticipation, start the timer. */
- qp->q_status = G_QUEUE_IDLING;
- /* may make this adaptive */
- qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
- me.wait_hit++;
- callout_reset(&sc->sc_wait, qp->q_wait_ticks,
- g_rr_wait_timeout, sc);
- } else
- g_sched_dispatch(sc->sc_geom);
-
- /* Release a reference to the queue. */
- g_rr_queue_put(qp);
-}
-
-static void
-g_rr_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
- struct g_consumer *cp, struct g_provider *pp)
-{
- if (indent == NULL) { /* plaintext */
- sbuf_printf(sb, " units %d queues %d",
- me.units, me.queues);
- }
-}
-
-static struct g_gsched g_rr = {
- .gs_name = "rr",
- .gs_priv_size = sizeof(struct g_rr_queue),
- .gs_init = g_rr_init,
- .gs_fini = g_rr_fini,
- .gs_start = g_rr_start,
- .gs_done = g_rr_done,
- .gs_next = g_rr_next,
- .gs_dumpconf = g_rr_dumpconf,
- .gs_init_class = g_rr_init_class,
- .gs_fini_class = g_rr_fini_class,
-};
-
-DECLARE_GSCHED_MODULE(rr, &g_rr);
diff --git a/sys/geom/sched/gs_scheduler.h b/sys/geom/sched/gs_scheduler.h
deleted file mode 100644
index d5d37b933b30..000000000000
--- a/sys/geom/sched/gs_scheduler.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009-2010 Fabio Checconi
- * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * Prototypes for GEOM-based disk scheduling algorithms.
- * See g_sched.c for generic documentation.
- *
- * This file is used by the kernel modules implementing the various
- * scheduling algorithms. They should provide all the methods
- * defined in struct g_gsched, and also invoke the macro
- * DECLARE_GSCHED_MODULE
- * which registers the scheduling algorithm with the geom_sched module.
- *
- * The various scheduling algorithms do not need to know anything
- * about geom, they only need to handle the 'bio' requests they
- * receive, pass them down when needed, and use the locking interface
- * defined below.
- */
-
-#ifndef _G_GSCHED_H_
-#define _G_GSCHED_H_
-
-#ifdef _KERNEL
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/module.h>
-#include <sys/queue.h>
-#include <geom/geom.h>
-#include "g_sched.h"
-
-/*
- * This is the interface exported to scheduling modules.
- *
- * gs_init() is called when our scheduling algorithm
- * starts being used by a geom 'sched'
- *
- * gs_fini() is called when the algorithm is released.
- *
- * gs_start() is called when a new request comes in. It should
- * enqueue the request and return 0 if success, or return non-zero
- * in case of failure (meaning the request is passed down).
- * The scheduler can use bio->bio_caller1 to store a non-null
- * pointer meaning the request is under its control.
- *
- * gs_next() is called in a loop by g_sched_dispatch(), right after
- * gs_start(), or on timeouts or 'done' events. It should return
- * immediately, either a pointer to the bio to be served or NULL
- * if no bio should be served now. If force is specified, a
- * work-conserving behavior is expected.
- *
- * gs_done() is called when a request under service completes.
- * In turn the scheduler may decide to call the dispatch loop
- * to serve other pending requests (or make sure there is a pending
- * timeout to avoid stalls).
- *
- * gs_init_class() is called when a new client (as determined by
- * the classifier) starts being used.
- *
- * gs_hash_unref() is called right before the class hashtable is
- * destroyed; after this call, the scheduler is supposed to hold no
- * more references to the elements in the table.
- */
-
-/* Forward declarations for prototypes. */
-struct g_geom;
-struct g_sched_class;
-
-typedef void *gs_init_t (struct g_geom *geom);
-typedef void gs_fini_t (void *data);
-typedef int gs_start_t (void *data, struct bio *bio);
-typedef void gs_done_t (void *data, struct bio *bio);
-typedef struct bio *gs_next_t (void *data, int force);
-typedef int gs_init_class_t (void *data, void *priv);
-typedef void gs_fini_class_t (void *data, void *priv);
-typedef void gs_hash_unref_t (void *data);
-
-struct g_gsched {
- const char *gs_name;
- int gs_refs;
- int gs_priv_size;
-
- gs_init_t *gs_init;
- gs_fini_t *gs_fini;
- gs_start_t *gs_start;
- gs_done_t *gs_done;
- gs_next_t *gs_next;
- g_dumpconf_t *gs_dumpconf;
-
- gs_init_class_t *gs_init_class;
- gs_fini_class_t *gs_fini_class;
- gs_hash_unref_t *gs_hash_unref;
-
- LIST_ENTRY(g_gsched) glist;
-};
-
-#define KTR_GSCHED KTR_SPARE4
-
-MALLOC_DECLARE(M_GEOM_SCHED);
-
-/*
- * Basic classification mechanism. Each request is associated to
- * a g_sched_class, and each scheduler has the opportunity to set
- * its own private data for the given (class, geom) pair. The
- * private data have a base type of g_sched_private, and are
- * extended at the end with the actual private fields of each
- * scheduler.
- */
-struct g_sched_class {
- int gsc_refs;
- int gsc_expire;
- u_long gsc_key;
- LIST_ENTRY(g_sched_class) gsc_clist;
-
- void *gsc_priv[0];
-};
-
-/*
- * Manipulate the classifier's data. g_sched_get_class() gets a reference
- * to the class corresponding to bp in gp, allocating and initializing
- * it if necessary. g_sched_put_class() releases the reference.
- * The returned value points to the private data for the class.
- */
-void *g_sched_get_class(struct g_geom *gp, struct bio *bp);
-void g_sched_put_class(struct g_geom *gp, void *priv);
-
-static inline struct g_sched_class *
-g_sched_priv2class(void *priv)
-{
-
- return ((struct g_sched_class *)((u_long)priv -
- offsetof(struct g_sched_class, gsc_priv)));
-}
-
-static inline void
-g_sched_priv_ref(void *priv)
-{
- struct g_sched_class *gsc;
-
- gsc = g_sched_priv2class(priv);
- gsc->gsc_refs++;
-}
-
-/*
- * Locking interface. When each operation registered with the
- * scheduler is invoked, a per-instance lock is taken to protect
- * the data associated with it. If the scheduler needs something
- * else to access the same data (e.g., a callout) it must use
- * these functions.
- */
-void g_sched_lock(struct g_geom *gp);
-void g_sched_unlock(struct g_geom *gp);
-
-/*
- * Restart request dispatching. Must be called with the per-instance
- * mutex held.
- */
-void g_sched_dispatch(struct g_geom *geom);
-
-/*
- * Simple gathering of statistical data, used by schedulers to collect
- * info on process history. Just keep an exponential average of the
- * samples, with some extra bits of precision.
- */
-struct g_savg {
- uint64_t gs_avg;
- unsigned int gs_smpl;
-};
-
-static inline void
-g_savg_add_sample(struct g_savg *ss, uint64_t sample)
-{
-
- /* EMA with alpha = 0.125, fixed point, 3 bits of precision. */
- ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3);
- ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3);
-}
-
-static inline int
-g_savg_valid(struct g_savg *ss)
-{
-
- /* We want at least 8 samples to deem an average as valid. */
- return (ss->gs_smpl > 7);
-}
-
-static inline uint64_t
-g_savg_read(struct g_savg *ss)
-{
-
- return (ss->gs_avg / ss->gs_smpl);
-}
-
-/*
- * Declaration of a scheduler module.
- */
-int g_gsched_modevent(module_t mod, int cmd, void *arg);
-
-#define DECLARE_GSCHED_MODULE(name, gsched) \
- static moduledata_t name##_mod = { \
- #name, \
- g_gsched_modevent, \
- gsched, \
- }; \
- DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \
- MODULE_DEPEND(name, geom_sched, 0, 0, 0);
-
-#endif /* _KERNEL */
-
-#endif /* _G_GSCHED_H_ */
diff --git a/sys/modules/geom/Makefile b/sys/modules/geom/Makefile
index cbc7207b6f0e..51f3d2438eb1 100644
--- a/sys/modules/geom/Makefile
+++ b/sys/modules/geom/Makefile
@@ -19,7 +19,6 @@ SUBDIR= geom_bde \
geom_part \
geom_raid \
geom_raid3 \
- geom_sched \
geom_shsec \
geom_stripe \
geom_uzip \
diff --git a/sys/modules/geom/geom_sched/Makefile b/sys/modules/geom/geom_sched/Makefile
deleted file mode 100644
index 72ee42a0d5fa..000000000000
--- a/sys/modules/geom/geom_sched/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# $FreeBSD$
-
-SUBDIR= gs_sched gsched_rr gsched_delay
-
-.include <bsd.subdir.mk>
diff --git a/sys/modules/geom/geom_sched/Makefile.inc b/sys/modules/geom/geom_sched/Makefile.inc
deleted file mode 100644
index a81a965bebb9..000000000000
--- a/sys/modules/geom/geom_sched/Makefile.inc
+++ /dev/null
@@ -1,9 +0,0 @@
-# $FreeBSD$
-# included by geom_sched children
-
-.PATH: ${SRCTOP}/sys/geom/sched
-
-# 6.x needs this path
-#CFLAGS += -I${SRCTOP}/sys/geom/sched
-
-# .include <bsd.kmod.mk>
diff --git a/sys/modules/geom/geom_sched/gs_sched/Makefile b/sys/modules/geom/geom_sched/gs_sched/Makefile
deleted file mode 100644
index 13bb91bc99f5..000000000000
--- a/sys/modules/geom/geom_sched/gs_sched/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# $FreeBSD$
-KMOD= geom_sched
-SRCS= g_sched.c
-
-# ../Makefile.inc automatically included
-.include <bsd.kmod.mk>
diff --git a/sys/modules/geom/geom_sched/gsched_delay/Makefile b/sys/modules/geom/geom_sched/gsched_delay/Makefile
deleted file mode 100644
index 4d4381d2229f..000000000000
--- a/sys/modules/geom/geom_sched/gsched_delay/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# $FreeBSD$
-
-KMOD= gsched_delay
-SRCS= gs_delay.c
-
-# ../Makefile.inc automatically included
-.include <bsd.kmod.mk>
diff --git a/sys/modules/geom/geom_sched/gsched_rr/Makefile b/sys/modules/geom/geom_sched/gsched_rr/Makefile
deleted file mode 100644
index 44cc56605a3b..000000000000
--- a/sys/modules/geom/geom_sched/gsched_rr/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# $FreeBSD$
-
-KMOD= gsched_rr
-SRCS= gs_rr.c
-
-# ../Makefile.inc automatically included
-.include <bsd.kmod.mk>
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index 488f3039778d..5fdf0ecbb917 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -79,9 +79,6 @@ struct disk;
struct bio;
struct vm_map;
-/* Empty classifier tag, to prevent further classification. */
-#define BIO_NOTCLASSIFIED (void *)(~0UL)
-
typedef void bio_task_t(void *);
/*
@@ -122,8 +119,8 @@ struct bio {
bio_task_t *bio_task; /* Task_queue handler */
void *bio_task_arg; /* Argument to above */
- void *bio_classifier1; /* Classifier tag. */
- void *bio_classifier2; /* Classifier tag. */
+ void *bio_spare1;
+ void *bio_spare2;
#ifdef DIAGNOSTIC
void *_bio_caller1;
diff --git a/sys/sys/ktr_class.h b/sys/sys/ktr_class.h
index 32d905d23a2e..f2c76c73505b 100644
--- a/sys/sys/ktr_class.h
+++ b/sys/sys/ktr_class.h
@@ -58,7 +58,7 @@
#define KTR_SYSC 0x00002000 /* System call */
#define KTR_INIT 0x00004000 /* System initialization */
#define KTR_SPARE3 0x00008000 /* cxgb, drm2, ioat, ntb */
-#define KTR_SPARE4 0x00010000 /* geom_sched */
+#define KTR_SPARE4 0x00010000
#define KTR_EVH 0x00020000 /* Eventhandler */
#define KTR_VFS 0x00040000 /* VFS events */
#define KTR_VOP 0x00080000 /* Auto-generated vop events */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 964c1869eb0e..03b8ccd25840 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -60,7 +60,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1300071 /* Master, propagated to newvers */
+#define __FreeBSD_version 1300072 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,