aboutsummaryrefslogtreecommitdiff
path: root/lib/geom
diff options
context:
space:
mode:
authorBrooks Davis <brooks@FreeBSD.org>2018-06-25 19:55:15 +0000
committerBrooks Davis <brooks@FreeBSD.org>2018-06-25 19:55:15 +0000
commite4b0a90e771b94e4b043729a6f0f5564f1d01aca (patch)
tree2cb4088f70d2963a7584dc9069c3c90a27081d45 /lib/geom
parent9c42fa94a63fc22aee2bd106aa12bfa7388f3911 (diff)
downloadsrc-e4b0a90e771b94e4b043729a6f0f5564f1d01aca.tar.gz
src-e4b0a90e771b94e4b043729a6f0f5564f1d01aca.zip
Normalize the g(eom,cache,part,...) build.
Rather then combining hardlink creation for the geom(8) binary with shared library build, move libraries to src/lib/geom so they are built and installed normally. Create a common Makefile.classes which is included by both lib/geom/Makefile and sbin/geom/Makefile so the symlink and libraries stay in sync. The relocation of libraries allows libraries to be build for 32-bit compat. This also reduces the number of non-standard builds in the system. This commit is not sufficent to run a 32-bit /sbin/geom on a 64-bit system out of the box as it will look in the wrong place for libraries unless GEOM_LIBRARY_PATH is set appropriatly in the environment. Reviewed by: bdrewery Sponsored by: DARPA, AFRL Differential Revision: https://reviews.freebsd.org/D15360
Notes
Notes: svn path=/head/; revision=335645
Diffstat (limited to 'lib/geom')
-rw-r--r--lib/geom/Makefile6
-rw-r--r--lib/geom/Makefile.inc16
-rw-r--r--lib/geom/cache/Makefile8
-rw-r--r--lib/geom/cache/Makefile.depend19
-rw-r--r--lib/geom/cache/gcache.8192
-rw-r--r--lib/geom/cache/geom_cache.c242
-rw-r--r--lib/geom/concat/Makefile8
-rw-r--r--lib/geom/concat/Makefile.depend19
-rw-r--r--lib/geom/concat/gconcat.8197
-rw-r--r--lib/geom/concat/geom_concat.c250
-rw-r--r--lib/geom/eli/Makefile20
-rw-r--r--lib/geom/eli/Makefile.depend21
-rw-r--r--lib/geom/eli/geli.81123
-rw-r--r--lib/geom/eli/geom_eli.c1769
-rw-r--r--lib/geom/journal/Makefile13
-rw-r--r--lib/geom/journal/Makefile.depend21
-rw-r--r--lib/geom/journal/geom_journal.c351
-rw-r--r--lib/geom/journal/geom_journal.h35
-rw-r--r--lib/geom/journal/geom_journal_ufs.c80
-rw-r--r--lib/geom/journal/gjournal.8346
-rw-r--r--lib/geom/label/Makefile8
-rw-r--r--lib/geom/label/Makefile.depend19
-rw-r--r--lib/geom/label/geom_label.c260
-rw-r--r--lib/geom/label/glabel.8280
-rw-r--r--lib/geom/mirror/Makefile10
-rw-r--r--lib/geom/mirror/Makefile.depend20
-rw-r--r--lib/geom/mirror/geom_mirror.c500
-rw-r--r--lib/geom/mirror/gmirror.8436
-rw-r--r--lib/geom/mountver/Makefile8
-rw-r--r--lib/geom/mountver/Makefile.depend19
-rw-r--r--lib/geom/mountver/geom_mountver.c58
-rw-r--r--lib/geom/mountver/gmountver.8133
-rw-r--r--lib/geom/multipath/Makefile10
-rw-r--r--lib/geom/multipath/Makefile.depend19
-rw-r--r--lib/geom/multipath/geom_multipath.c325
-rw-r--r--lib/geom/multipath/gmultipath.8377
-rw-r--r--lib/geom/nop/Makefile8
-rw-r--r--lib/geom/nop/Makefile.depend19
-rw-r--r--lib/geom/nop/geom_nop.c81
-rw-r--r--lib/geom/nop/gnop.8189
-rw-r--r--lib/geom/part/Makefile10
-rw-r--r--lib/geom/part/Makefile.depend20
-rw-r--r--lib/geom/part/geom_part.c1344
-rw-r--r--lib/geom/part/gpart.81441
-rw-r--r--lib/geom/raid/Makefile10
-rw-r--r--lib/geom/raid/Makefile.depend20
-rw-r--r--lib/geom/raid/geom_raid.c94
-rw-r--r--lib/geom/raid/graid.8321
-rw-r--r--lib/geom/raid3/Makefile10
-rw-r--r--lib/geom/raid3/Makefile.depend20
-rw-r--r--lib/geom/raid3/geom_raid3.c338
-rw-r--r--lib/geom/raid3/graid3.8257
-rw-r--r--lib/geom/sched/Makefile9
-rw-r--r--lib/geom/sched/Makefile.depend19
-rw-r--r--lib/geom/sched/geom_sched.c128
-rw-r--r--lib/geom/sched/gsched.8162
-rw-r--r--lib/geom/shsec/Makefile8
-rw-r--r--lib/geom/shsec/Makefile.depend19
-rw-r--r--lib/geom/shsec/geom_shsec.c262
-rw-r--r--lib/geom/shsec/gshsec.8130
-rw-r--r--lib/geom/stripe/Makefile8
-rw-r--r--lib/geom/stripe/Makefile.depend19
-rw-r--r--lib/geom/stripe/geom_stripe.c288
-rw-r--r--lib/geom/stripe/gstripe.8243
-rw-r--r--lib/geom/virstor/Makefile11
-rw-r--r--lib/geom/virstor/Makefile.depend19
-rw-r--r--lib/geom/virstor/geom_virstor.c530
-rw-r--r--lib/geom/virstor/gvirstor.8299
68 files changed, 13554 insertions, 0 deletions
diff --git a/lib/geom/Makefile b/lib/geom/Makefile
new file mode 100644
index 000000000000..2a53278be92d
--- /dev/null
+++ b/lib/geom/Makefile
@@ -0,0 +1,6 @@
+# $FreeBSD$
+
+SUBDIR=${GEOM_CLASSES}
+
+.include "Makefile.inc"
+.include <bsd.subdir.mk>
diff --git a/lib/geom/Makefile.inc b/lib/geom/Makefile.inc
new file mode 100644
index 000000000000..74597f8bcacd
--- /dev/null
+++ b/lib/geom/Makefile.inc
@@ -0,0 +1,16 @@
+# $FreeBSD$
+
+.include <src.opts.mk>
+
+SHLIBDIR=${GEOM_CLASS_DIR}
+SHLIB_NAME?=geom_${GEOM_CLASS}.so
+MAN= g${GEOM_CLASS}.8
+SRCS+= geom_${GEOM_CLASS}.c subr.c
+CFLAGS+=-I${SRCTOP}/sbin/geom
+
+.PATH: ${SRCTOP}/sbin/geom/misc
+
+NO_WMISSING_VARIABLE_DECLARATIONS=
+
+.include "Makefile.classes"
+.include "../Makefile.inc"
diff --git a/lib/geom/cache/Makefile b/lib/geom/cache/Makefile
new file mode 100644
index 000000000000..e1ba031c9ffe
--- /dev/null
+++ b/lib/geom/cache/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= cache
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/cache/Makefile.depend b/lib/geom/cache/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/cache/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/cache/gcache.8 b/lib/geom/cache/gcache.8
new file mode 100644
index 000000000000..b0f1c7abf1f7
--- /dev/null
+++ b/lib/geom/cache/gcache.8
@@ -0,0 +1,192 @@
+.\"-
+.\" Copyright (c) 2010 Edward Tomasz Napierala
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd January 3, 2010
+.Dt GCACHE 8
+.Os
+.Sh NAME
+.Nm gcache
+.Nd "control utility for CACHE GEOM class"
+.Sh SYNOPSIS
+.Nm
+.Cm create
+.Op Fl v
+.Op Fl b Ar blocksize
+.Op Fl s Ar size
+.Ar name
+.Ar prov
+.Nm
+.Cm configure
+.Op Fl v
+.Op Fl b Ar blocksize
+.Op Fl s Ar size
+.Ar name
+.Nm
+.Cm destroy
+.Op Fl fv
+.Ar name
+.Nm
+.Cm label
+.Op Fl v
+.Op Fl b Ar blocksize
+.Op Fl s Ar size
+.Ar name
+.Ar prov
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm dump
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Op Fl s Ar name
+.Nm
+.Cm load
+.Op Fl v
+.Nm
+.Cm unload
+.Op Fl v
+.Sh DESCRIPTION
+The
+.Nm
+utility is used to control GEOM cache, which can
+speed up read performance by sending fixed size
+read requests to its consumer. It has been developed to address
+the problem of a horrible read performance of a 64k blocksize FS
+residing on a RAID3 array with 8 data components, where a single
+disk component would only get 8k read requests, thus effectively
+killing disk performance under high load.
+.Pp
+Caching can be configured using two different methods:
+.Dq manual
+or
+.Dq automatic .
+When using the
+.Dq manual
+method, no metadata are stored on the devices, so the cached
+device has to be configured by hand every time it is needed.
+The
+.Dq automatic
+method uses on-disk metadata to detect devices.
+Once devices are labeled, they will be automatically detected and
+configured.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm destroy"
+.It Cm create
+Cache the given devices with specified
+.Ar name .
+This is the
+.Dq manual
+method.
+The kernel module
+.Pa geom_cache.ko
+will be loaded if it is not loaded already.
+.It Cm label
+Cache the given devices with the specified
+.Ar name .
+This is the
+.Dq automatic
+method, where metadata are stored in every device's last sector.
+The kernel module
+.Pa geom_cache.ko
+will be loaded if it is not loaded already.
+.It Cm stop
+Turn off existing cache device by its
+.Ar name .
+This command does not touch on-disk metadata!
+.It Cm destroy
+Same as
+.Cm stop .
+.It Cm clear
+Clear metadata on the given devices.
+.It Cm dump
+Dump metadata stored on the given devices.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options:
+.Bl -tag -width indent
+.It Fl f
+Force the removal of the specified cache device.
+.It Fl v
+Be more verbose.
+.El
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to control the behavior of the
+.Nm CACHE
+GEOM class.
+The default value is shown next to each variable.
+.Bl -tag -width indent
+.It Va kern.geom.cache.used_hi : No 20
+.It Va kern.geom.cache.used_lo : No 5
+.It Va kern.geom.cache.idletime : No 5
+.It Va kern.geom.cache.timeout : No 10
+.It Va kern.geom.cache.enable : No 1
+.It Va kern.geom.cache.debug : No 0
+Debug level of the
+.Nm CACHE
+GEOM class.
+This can be set to a number between 0 and 3 inclusive.
+If set to 0 minimal debug information is printed, and if set to 3 the
+maximum amount of debug information is printed.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr geom 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 7.0 .
+.Sh AUTHORS
+.An Ruslan Ermilov Aq Mt ru@FreeBSD.org
diff --git a/lib/geom/cache/geom_cache.c b/lib/geom/cache/geom_cache.c
new file mode 100644
index 000000000000..4e76da2ce7a8
--- /dev/null
+++ b/lib/geom/cache/geom_cache.c
@@ -0,0 +1,242 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2006 Ruslan Ermilov <ru@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <libgeom.h>
+#include <geom/cache/g_cache.h>
+
+#include "core/geom.h"
+#include "misc/subr.h"
+
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_CACHE_VERSION;
+
+#define GCACHE_BLOCKSIZE "65536"
+#define GCACHE_SIZE "100"
+
+static void cache_main(struct gctl_req *req, unsigned flags);
+static void cache_clear(struct gctl_req *req);
+static void cache_dump(struct gctl_req *req);
+static void cache_label(struct gctl_req *req);
+
+struct g_command class_commands[] = {
+ { "clear", G_FLAG_VERBOSE, cache_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ { "configure", G_FLAG_VERBOSE, NULL,
+ {
+ { 'b', "blocksize", "0", G_TYPE_NUMBER },
+ { 's', "size", "0", G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-v] [-b blocksize] [-s size] name"
+ },
+ { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL,
+ {
+ { 'b', "blocksize", GCACHE_BLOCKSIZE, G_TYPE_NUMBER },
+ { 's', "size", GCACHE_SIZE, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-v] [-b blocksize] [-s size] name prov"
+ },
+ { "destroy", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ { "dump", 0, cache_main, G_NULL_OPTS,
+ "prov ..."
+ },
+ { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, cache_main,
+ {
+ { 'b', "blocksize", GCACHE_BLOCKSIZE, G_TYPE_NUMBER },
+ { 's', "size", GCACHE_SIZE, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-v] [-b blocksize] [-s size] name prov"
+ },
+ { "reset", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name ..."
+ },
+ { "stop", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+static void
+cache_main(struct gctl_req *req, unsigned flags)
+{
+ const char *name;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0)
+ cache_label(req);
+ else if (strcmp(name, "clear") == 0)
+ cache_clear(req);
+ else if (strcmp(name, "dump") == 0)
+ cache_dump(req);
+ else
+ gctl_error(req, "Unknown command: %s.", name);
+}
+
+static void
+cache_label(struct gctl_req *req)
+{
+ struct g_cache_metadata md;
+ u_char sector[512];
+ const char *name;
+ int error, nargs;
+ intmax_t val;
+
+ bzero(sector, sizeof(sector));
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 2) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+
+ strlcpy(md.md_magic, G_CACHE_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_CACHE_VERSION;
+ name = gctl_get_ascii(req, "arg0");
+ strlcpy(md.md_name, name, sizeof(md.md_name));
+ val = gctl_get_intmax(req, "blocksize");
+ md.md_bsize = val;
+ val = gctl_get_intmax(req, "size");
+ md.md_size = val;
+
+ name = gctl_get_ascii(req, "arg1");
+ md.md_provsize = g_get_mediasize(name);
+ if (md.md_provsize == 0) {
+ fprintf(stderr, "Can't get mediasize of %s: %s.\n",
+ name, strerror(errno));
+ gctl_error(req, "Not fully done.");
+ return;
+ }
+ cache_metadata_encode(&md, sector);
+ error = g_metadata_store(name, sector, sizeof(sector));
+ if (error != 0) {
+ fprintf(stderr, "Can't store metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ return;
+ }
+ if (verbose)
+ printf("Metadata value stored on %s.\n", name);
+}
+
+static void
+cache_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_CACHE_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+static void
+cache_metadata_dump(const struct g_cache_metadata *md)
+{
+
+ printf(" Magic string: %s\n", md->md_magic);
+ printf(" Metadata version: %u\n", (u_int)md->md_version);
+ printf(" Device name: %s\n", md->md_name);
+ printf(" Block size: %u\n", (u_int)md->md_bsize);
+ printf(" Cache size: %u\n", (u_int)md->md_size);
+ printf(" Provider size: %ju\n", (uintmax_t)md->md_provsize);
+}
+
+static void
+cache_dump(struct gctl_req *req)
+{
+ struct g_cache_metadata md, tmpmd;
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd),
+ G_CACHE_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't read metadata from %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ cache_metadata_decode((u_char *)&tmpmd, &md);
+ printf("Metadata on %s:\n", name);
+ cache_metadata_dump(&md);
+ printf("\n");
+ }
+}
diff --git a/lib/geom/concat/Makefile b/lib/geom/concat/Makefile
new file mode 100644
index 000000000000..3c370520a624
--- /dev/null
+++ b/lib/geom/concat/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= concat
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/concat/Makefile.depend b/lib/geom/concat/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/concat/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/concat/gconcat.8 b/lib/geom/concat/gconcat.8
new file mode 100644
index 000000000000..d874b087b649
--- /dev/null
+++ b/lib/geom/concat/gconcat.8
@@ -0,0 +1,197 @@
+.\" Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd May 21, 2004
+.Dt GCONCAT 8
+.Os
+.Sh NAME
+.Nm gconcat
+.Nd "disk concatenation control utility"
+.Sh SYNOPSIS
+.Nm
+.Cm create
+.Op Fl v
+.Ar name
+.Ar prov ...
+.Nm
+.Cm destroy
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm label
+.Op Fl hv
+.Ar name
+.Ar prov ...
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm dump
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for device concatenation configuration.
+The concatenation can be configured using two different methods:
+.Dq manual
+or
+.Dq automatic .
+When using the
+.Dq manual
+method, no metadata are stored on the devices, so the concatenated
+device has to be configured by hand every time it is needed.
+The
+.Dq automatic
+method uses on-disk metadata to detect devices.
+Once devices are labeled, they will be automatically detected and
+configured.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm destroy"
+.It Cm create
+Concatenate the given devices with specified
+.Ar name .
+This is the
+.Dq manual
+method.
+The kernel module
+.Pa geom_concat.ko
+will be loaded if it is not loaded already.
+.It Cm label
+Concatenate the given devices with the specified
+.Ar name .
+This is the
+.Dq automatic
+method, where metadata are stored in every device's last sector.
+The kernel module
+.Pa geom_concat.ko
+will be loaded if it is not loaded already.
+.It Cm stop
+Turn off existing concatenate device by its
+.Ar name .
+This command does not touch on-disk metadata!
+.It Cm destroy
+Same as
+.Cm stop .
+.It Cm clear
+Clear metadata on the given devices.
+.It Cm dump
+Dump metadata stored on the given devices.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options:
+.Bl -tag -width indent
+.It Fl f
+Force the removal of the specified concatenated device.
+.It Fl h
+Hardcode providers' names in metadata.
+.It Fl v
+Be more verbose.
+.El
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to control the behavior of the
+.Nm CONCAT
+GEOM class.
+The default value is shown next to each variable.
+.Bl -tag -width indent
+.It Va kern.geom.concat.debug : No 0
+Debug level of the
+.Nm CONCAT
+GEOM class.
+This can be set to a number between 0 and 3 inclusive.
+If set to 0 minimal debug information is printed, and if set to 3 the
+maximum amount of debug information is printed.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+The following example shows how to configure four disks for automatic
+concatenation, create a file system on it, and mount it:
+.Bd -literal -offset indent
+gconcat label -v data /dev/da0 /dev/da1 /dev/da2 /dev/da3
+newfs /dev/concat/data
+mount /dev/concat/data /mnt
+[...]
+umount /mnt
+gconcat stop data
+gconcat unload
+.Ed
+.Pp
+Configure concatenated provider on one disk only.
+Create file system.
+Add two more disks and extend existing file system.
+.Bd -literal -offset indent
+gconcat label data /dev/da0
+newfs /dev/concat/data
+gconcat label data /dev/da0 /dev/da1 /dev/da2
+growfs /dev/concat/data
+.Ed
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr loader.conf 5 ,
+.Xr geom 8 ,
+.Xr growfs 8 ,
+.Xr gvinum 8 ,
+.Xr mount 8 ,
+.Xr newfs 8 ,
+.Xr sysctl 8 ,
+.Xr umount 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 5.3 .
+.Sh AUTHORS
+.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org
diff --git a/lib/geom/concat/geom_concat.c b/lib/geom/concat/geom_concat.c
new file mode 100644
index 000000000000..801bea61cdfd
--- /dev/null
+++ b/lib/geom/concat/geom_concat.c
@@ -0,0 +1,250 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <libgeom.h>
+#include <geom/concat/g_concat.h>
+
+#include "core/geom.h"
+#include "misc/subr.h"
+
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_CONCAT_VERSION;
+
+static void concat_main(struct gctl_req *req, unsigned flags);
+static void concat_clear(struct gctl_req *req);
+static void concat_dump(struct gctl_req *req);
+static void concat_label(struct gctl_req *req);
+
+struct g_command class_commands[] = {
+ { "clear", G_FLAG_VERBOSE, concat_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, G_NULL_OPTS,
+ "[-v] name prov ..."
+ },
+ { "destroy", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ { "dump", 0, concat_main, G_NULL_OPTS,
+ "prov ..."
+ },
+ { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, concat_main,
+ {
+ { 'h', "hardcode", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-hv] name prov ..."
+ },
+ { "stop", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+static void
+concat_main(struct gctl_req *req, unsigned flags)
+{
+ const char *name;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0)
+ concat_label(req);
+ else if (strcmp(name, "clear") == 0)
+ concat_clear(req);
+ else if (strcmp(name, "dump") == 0)
+ concat_dump(req);
+ else
+ gctl_error(req, "Unknown command: %s.", name);
+}
+
+static void
+concat_label(struct gctl_req *req)
+{
+ struct g_concat_metadata md;
+ u_char sector[512];
+ const char *name;
+ int error, i, hardcode, nargs;
+
+ bzero(sector, sizeof(sector));
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 2) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ hardcode = gctl_get_int(req, "hardcode");
+
+ /*
+ * Clear last sector first to spoil all components if device exists.
+ */
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, NULL);
+ if (error != 0) {
+ gctl_error(req, "Can't store metadata on %s: %s.", name,
+ strerror(error));
+ return;
+ }
+ }
+
+ strlcpy(md.md_magic, G_CONCAT_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_CONCAT_VERSION;
+ name = gctl_get_ascii(req, "arg0");
+ strlcpy(md.md_name, name, sizeof(md.md_name));
+ md.md_id = arc4random();
+ md.md_all = nargs - 1;
+
+ /*
+ * Ok, store metadata.
+ */
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ md.md_no = i - 1;
+ if (!hardcode)
+ bzero(md.md_provider, sizeof(md.md_provider));
+ else {
+ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ name += sizeof(_PATH_DEV) - 1;
+ strlcpy(md.md_provider, name, sizeof(md.md_provider));
+ }
+ md.md_provsize = g_get_mediasize(name);
+ if (md.md_provsize == 0) {
+ fprintf(stderr, "Can't get mediasize of %s: %s.\n",
+ name, strerror(errno));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ concat_metadata_encode(&md, sector);
+ error = g_metadata_store(name, sector, sizeof(sector));
+ if (error != 0) {
+ fprintf(stderr, "Can't store metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata value stored on %s.\n", name);
+ }
+}
+
+static void
+concat_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_CONCAT_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+static void
+concat_metadata_dump(const struct g_concat_metadata *md)
+{
+
+ printf(" Magic string: %s\n", md->md_magic);
+ printf(" Metadata version: %u\n", (u_int)md->md_version);
+ printf(" Device name: %s\n", md->md_name);
+ printf(" Device ID: %u\n", (u_int)md->md_id);
+ printf(" Disk number: %u\n", (u_int)md->md_no);
+ printf("Total number of disks: %u\n", (u_int)md->md_all);
+ printf(" Hardcoded provider: %s\n", md->md_provider);
+}
+
+static void
+concat_dump(struct gctl_req *req)
+{
+ struct g_concat_metadata md, tmpmd;
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd),
+ G_CONCAT_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't read metadata from %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ concat_metadata_decode((u_char *)&tmpmd, &md);
+ printf("Metadata on %s:\n", name);
+ concat_metadata_dump(&md);
+ printf("\n");
+ }
+}
diff --git a/lib/geom/eli/Makefile b/lib/geom/eli/Makefile
new file mode 100644
index 000000000000..b9bd29268893
--- /dev/null
+++ b/lib/geom/eli/Makefile
@@ -0,0 +1,20 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc ${SRCTOP}/sys/geom/eli ${SRCTOP}/sys/crypto/sha2
+
+GEOM_CLASS= eli
+SRCS= g_eli_crypto.c
+SRCS+= g_eli_hmac.c
+SRCS+= g_eli_key.c
+SRCS+= pkcs5v2.c
+SRCS+= sha256c.c
+SRCS+= sha512c.c
+
+LIBADD= md crypto
+
+WARNS?= 3
+
+CFLAGS+=-I${SRCTOP}/sys
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/eli/Makefile.depend b/lib/geom/eli/Makefile.depend
new file mode 100644
index 000000000000..3378fcd11dbc
--- /dev/null
+++ b/lib/geom/eli/Makefile.depend
@@ -0,0 +1,21 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ lib/libmd \
+ sbin/geom/core \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/eli/geli.8 b/lib/geom/eli/geli.8
new file mode 100644
index 000000000000..e4a48cf625f7
--- /dev/null
+++ b/lib/geom/eli/geli.8
@@ -0,0 +1,1123 @@
+.\" Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd May 9, 2018
+.Dt GELI 8
+.Os
+.Sh NAME
+.Nm geli
+.Nd "control utility for the cryptographic GEOM class"
+.Sh SYNOPSIS
+To compile GEOM_ELI into your kernel, add the following lines to your kernel
+configuration file:
+.Bd -ragged -offset indent
+.Cd "device crypto"
+.Cd "options GEOM_ELI"
+.Ed
+.Pp
+Alternatively, to load the GEOM_ELI module at boot time, add the following line
+to your
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+geom_eli_load="YES"
+.Ed
+.Pp
+Usage of the
+.Nm
+utility:
+.Pp
+.Nm
+.Cm init
+.Op Fl bdgPTv
+.Op Fl a Ar aalgo
+.Op Fl B Ar backupfile
+.Op Fl e Ar ealgo
+.Op Fl i Ar iterations
+.Op Fl J Ar newpassfile
+.Op Fl K Ar newkeyfile
+.Op Fl l Ar keylen
+.Op Fl s Ar sectorsize
+.Op Fl V Ar version
+.Ar prov
+.Nm
+.Cm label - an alias for
+.Cm init
+.Nm
+.Cm attach
+.Op Fl Cdprv
+.Op Fl n Ar keyno
+.Op Fl j Ar passfile
+.Op Fl k Ar keyfile
+.Ar prov
+.Nm
+.Cm detach
+.Op Fl fl
+.Ar prov ...
+.Nm
+.Cm stop - an alias for
+.Cm detach
+.Nm
+.Cm onetime
+.Op Fl dT
+.Op Fl a Ar aalgo
+.Op Fl e Ar ealgo
+.Op Fl l Ar keylen
+.Op Fl s Ar sectorsize
+.Ar prov
+.Nm
+.Cm configure
+.Op Fl bBdDgGtT
+.Ar prov ...
+.Nm
+.Cm setkey
+.Op Fl pPv
+.Op Fl i Ar iterations
+.Op Fl j Ar passfile
+.Op Fl J Ar newpassfile
+.Op Fl k Ar keyfile
+.Op Fl K Ar newkeyfile
+.Op Fl n Ar keyno
+.Ar prov
+.Nm
+.Cm delkey
+.Op Fl afv
+.Op Fl n Ar keyno
+.Ar prov
+.Nm
+.Cm kill
+.Op Fl av
+.Op Ar prov ...
+.Nm
+.Cm backup
+.Op Fl v
+.Ar prov
+.Ar file
+.Nm
+.Cm restore
+.Op Fl fv
+.Ar file
+.Ar prov
+.Nm
+.Cm suspend
+.Op Fl v
+.Fl a | Ar prov ...
+.Nm
+.Cm resume
+.Op Fl pv
+.Op Fl j Ar passfile
+.Op Fl k Ar keyfile
+.Ar prov
+.Nm
+.Cm resize
+.Op Fl v
+.Fl s Ar oldsize
+.Ar prov
+.Nm
+.Cm version
+.Op Ar prov ...
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm dump
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used to configure encryption on GEOM providers.
+.Pp
+The following is a list of the most important features:
+.Pp
+.Bl -bullet -offset indent -compact
+.It
+Utilizes the
+.Xr crypto 9
+framework, so when there is crypto hardware available,
+.Nm
+will make use of it automatically.
+.It
+Supports many cryptographic algorithms (currently
+.Nm AES-XTS ,
+.Nm AES-CBC ,
+.Nm Blowfish-CBC ,
+.Nm Camellia-CBC
+and
+.Nm 3DES-CBC ) .
+.It
+Can optionally perform data authentication (integrity verification) utilizing
+one of the following algorithms:
+.Nm HMAC/MD5 ,
+.Nm HMAC/SHA1 ,
+.Nm HMAC/RIPEMD160 ,
+.Nm HMAC/SHA256 ,
+.Nm HMAC/SHA384
+or
+.Nm HMAC/SHA512 .
+.It
+Can create a User Key from up to two, piecewise components: a passphrase
+entered via prompt or read from one or more passfiles; a keyfile read from
+one or more files.
+.It
+Allows encryption of the root partition.
+The user will be asked for the
+passphrase before the root file system is mounted.
+.It
+Strengthens the passphrase component of the User Key with:
+.Rs
+.%A B. Kaliski
+.%T "PKCS #5: Password-Based Cryptography Specification, Version 2.0."
+.%R RFC
+.%N 2898
+.Re
+.It
+Allows the use of two independent User Keys (e.g., a
+.Qq "user key"
+and a
+.Qq "company key" ) .
+.It
+It is fast -
+.Nm
+performs simple sector-to-sector encryption.
+.It
+Allows the encrypted Master Key to be backed up and restored,
+so that if a user has to quickly destroy key material,
+it is possible to get the data back by restoring keys from
+backup.
+.It
+Providers can be configured to automatically detach on last close
+(so users do not have to remember to detach providers after unmounting
+the file systems).
+.It
+Allows attaching a provider with a random, one-time Master Key -
+useful for swap partitions and temporary file systems.
+.It
+Allows verification of data integrity (data authentication).
+.It
+Allows suspending and resuming encrypted devices.
+.El
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm configure"
+.It Cm init
+Initialize the provider which needs to be encrypted.
+Here you can set up the cryptographic algorithm to use, Data Key length,
+etc.
+The last sector of the provider is used to store metadata.
+The
+.Cm init
+subcommand also automatically writes metadata backups to
+.Pa /var/backups/<prov>.eli
+file.
+The metadata can be recovered with the
+.Cm restore
+subcommand described below.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl J Ar newpassfile"
+.It Fl a Ar aalgo
+Enable data integrity verification (authentication) using the given algorithm.
+This will reduce the size of storage available and also reduce speed.
+For example, when using 4096 bytes sector and
+.Nm HMAC/SHA256
+algorithm, 89% of the original provider storage will be available for use.
+Currently supported algorithms are:
+.Nm HMAC/MD5 ,
+.Nm HMAC/SHA1 ,
+.Nm HMAC/RIPEMD160 ,
+.Nm HMAC/SHA256 ,
+.Nm HMAC/SHA384
+and
+.Nm HMAC/SHA512 .
+If the option is not given, there will be no authentication, only encryption.
+The recommended algorithm is
+.Nm HMAC/SHA256 .
+.It Fl b
+Try to decrypt this partition during boot, before the root partition is mounted.
+This makes it possible to use an encrypted root partition.
+One will still need bootable unencrypted storage with a
+.Pa /boot/
+directory, which can be a CD-ROM disc or USB pen-drive, that can be removed
+after boot.
+.It Fl B Ar backupfile
+File name to use for metadata backup instead of the default
+.Pa /var/backups/<prov>.eli .
+To inhibit backups, you can use
+.Pa none
+as the
+.Ar backupfile .
+.It Fl d
+When entering the passphrase to boot from this encrypted root filesystem, echo
+.Ql *
+characters.
+This makes the length of the passphrase visible.
+.It Fl e Ar ealgo
+Encryption algorithm to use.
+Currently supported algorithms are:
+.Nm AES-XTS ,
+.Nm AES-CBC ,
+.Nm Blowfish-CBC ,
+.Nm Camellia-CBC ,
+.Nm 3DES-CBC ,
+and
+.Nm NULL .
+The default and recommended algorithm is
+.Nm AES-XTS .
+.Nm NULL
+is unencrypted.
+.It Fl g
+Enable booting from this encrypted root filesystem.
+The boot loader prompts for the passphrase and loads
+.Xr loader 8
+from the encrypted partition.
+.It Fl i Ar iterations
+Number of iterations to use with PKCS#5v2 when processing User Key
+passphrase component.
+If this option is not specified,
+.Nm
+will find the number of iterations which is equal to 2 seconds of crypto work.
+If 0 is given, PKCS#5v2 will not be used.
+PKCS#5v2 processing is performed once, after all parts of the passphrase
+component have been read.
+.It Fl J Ar newpassfile
+Specifies a file which contains the passphrase component of the User Key
+(or part of it).
+If
+.Ar newpassfile
+is given as -, standard input will be used.
+Only the first line (excluding new-line character) is taken from the given file.
+This argument can be specified multiple times, which has the effect of
+reassembling a single passphrase split across multiple files.
+Cannot be combined with the
+.Fl P
+option.
+.It Fl K Ar newkeyfile
+Specifies a file which contains the keyfile component of the User Key
+(or part of it).
+If
+.Ar newkeyfile
+is given as -, standard input will be used.
+This argument can be specified multiple times, which has the effect of
+reassembling a single keyfile split across multiple keyfile parts.
+.It Fl l Ar keylen
+Data Key length to use with the given cryptographic algorithm.
+If the length is not specified, the selected algorithm uses its
+.Em default
+key length.
+.Bl -ohang -offset indent
+.It Nm AES-XTS
+.Em 128 ,
+256
+.It Nm AES-CBC , Nm Camellia-CBC
+.Em 128 ,
+192,
+256
+.It Nm Blowfish-CBC
+.Em 128
++ n * 32, for n=[0..10]
+.It Nm 3DES-CBC
+.Em 192
+.El
+.It Fl P
+Do not use a passphrase as a component of the User Key.
+Cannot be combined with the
+.Fl J
+option.
+.It Fl s Ar sectorsize
+Change decrypted provider's sector size.
+Increasing the sector size allows increased performance,
+because encryption/decryption which requires an initialization vector
+is done per sector; fewer sectors means less computational work.
+.It Fl T
+Don't pass through
+.Dv BIO_DELETE
+calls (i.e., TRIM/UNMAP).
+This can prevent an attacker from knowing how much space you're actually
+using and which sectors contain live data, but will also prevent the
+backing store (SSD, etc) from reclaiming space you're not using, which
+may degrade its performance and lifespan.
+The underlying provider may or may not actually obliterate the deleted
+sectors when TRIM is enabled, so it should not be considered to add any
+security.
+.It Fl V Ar version
+Metadata version to use.
+This option is helpful when creating a provider that may be used by older
+.Nm FreeBSD/GELI
+versions.
+Consult the
+.Sx HISTORY
+section to find which metadata version is supported by which FreeBSD version.
+Note that using an older version of metadata may limit the number of
+features available.
+.El
+.It Cm attach
+Attach the given provider.
+The encrypted Master Key will be loaded from the metadata and decrypted
+using the given passphrase/keyfile and a new GEOM provider will be created
+using the given provider's name with an
+.Qq .eli
+suffix.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl j Ar passfile"
+.It Fl C
+Do a dry-run decryption.
+This is useful to verify passphrase and keyfile without decrypting the device.
+.It Fl d
+If specified, a decrypted provider will be detached automatically on last close.
+This can help with scarce memory so the user does not have to remember to detach the
+provider after unmounting the file system.
+It only works when the provider was opened for writing, so it will not work if
+the file system on the provider is mounted read-only.
+Probably a better choice is the
+.Fl l
+option for the
+.Cm detach
+subcommand.
+.It Fl n Ar keyno
+Specifies the index number of the Master Key copy to use (could be 0 or 1).
+If the index number is not provided all keys will be tested.
+.It Fl j Ar passfile
+Specifies a file which contains the passphrase component of the User Key
+(or part of it).
+For more information see the description of the
+.Fl J
+option for the
+.Cm init
+subcommand.
+.It Fl k Ar keyfile
+Specifies a file which contains the keyfile component of the User Key
+(or part of it).
+For more information see the description of the
+.Fl K
+option for the
+.Cm init
+subcommand.
+.It Fl p
+Do not use a passphrase as a component of the User Key.
+Cannot be combined with the
+.Fl j
+option.
+.It Fl r
+Attach read-only provider.
+It will not be opened for writing.
+.El
+.It Cm detach
+Detach the given providers, which means remove the devfs entry
+and clear the Master Key and Data Keys from memory.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Force detach - detach even if the provider is open.
+.It Fl l
+Mark provider to detach on last close.
+If this option is specified, the provider will not be detached
+while it is open, but will be automatically detached when it is closed for the
+last time even if it was only opened for reading.
+.El
+.It Cm onetime
+Attach the given providers with a random, one-time (ephemeral) Master Key.
+The command can be used to encrypt swap partitions or temporary file systems.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl a Ar sectorsize"
+.It Fl a Ar aalgo
+Enable data integrity verification (authentication).
+For more information, see the description of the
+.Cm init
+subcommand.
+.It Fl e Ar ealgo
+Encryption algorithm to use.
+For more information, see the description of the
+.Cm init
+subcommand.
+.It Fl d
+Detach on last close.
+Note: this option is not usable for temporary file systems as the provider will
+be detached after creating the file system on it.
+It still can (and should be) used for swap partitions.
+For more information, see the description of the
+.Cm attach
+subcommand.
+.It Fl l Ar keylen
+Data Key length to use with the given cryptographic algorithm.
+For more information, see the description of the
+.Cm init
+subcommand.
+.It Fl s Ar sectorsize
+Change decrypted provider's sector size.
+For more information, see the description of the
+.Cm init
+subcommand.
+.It Fl T
+Disable TRIM/UNMAP passthru.
+For more information, see the description of the
+.Cm init
+subcommand.
+.El
+.It Cm configure
+Change configuration of the given providers.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl b"
+.It Fl b
+Set the BOOT flag on the given providers.
+For more information, see the description of the
+.Cm init
+subcommand.
+.It Fl B
+Remove the BOOT flag from the given providers.
+.It Fl d
+When entering the passphrase to boot from this encrypted root filesystem, echo
+.Ql *
+characters.
+This makes the length of the passphrase visible.
+.It Fl D
+Disable echoing of any characters when a passphrase is entered to boot from this
+encrypted root filesystem.
+This hides the passphrase length.
+.It Fl g
+Enable booting from this encrypted root filesystem.
+The boot loader prompts for the passphrase and loads
+.Xr loader 8
+from the encrypted partition.
+.It Fl G
+Deactivate booting from this encrypted root partition.
+.It Fl t
+Enable TRIM/UNMAP passthru.
+For more information, see the description of the
+.Cm init
+subcommand.
+.It Fl T
+Disable TRIM/UNMAP passthru.
+.El
+.It Cm setkey
+Install a copy of the Master Key into the selected slot, encrypted with
+a new User Key.
+If the selected slot is populated, replace the existing copy.
+A provider has one Master Key, which can be stored in one or both slots,
+each encrypted with an independent User Key.
+With the
+.Cm init
+subcommand, only key number 0 is initialized.
+The User Key can be changed at any time: for an attached provider,
+for a detached provider, or on the backup file.
+When a provider is attached, the user does not have to provide
+an existing passphrase/keyfile.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl J Ar newpassfile"
+.It Fl i Ar iterations
+Number of iterations to use with PKCS#5v2.
+If 0 is given, PKCS#5v2 will not be used.
+To be able to use this option with the
+.Cm setkey
+subcommand, only one key has to be defined and this key must be changed.
+.It Fl j Ar passfile
+Specifies a file which contains the passphrase component of a current User Key
+(or part of it).
+.It Fl J Ar newpassfile
+Specifies a file which contains the passphrase component of the new User Key
+(or part of it).
+.It Fl k Ar keyfile
+Specifies a file which contains the keyfile component of a current User Key
+(or part of it).
+.It Fl K Ar newkeyfile
+Specifies a file which contains the keyfile component of the new User Key
+(or part of it).
+.It Fl n Ar keyno
+Specifies the index number of the Master Key copy to change (could be 0 or 1).
+If the provider is attached and no key number is given, the key
+used for attaching the provider will be changed.
+If the provider is detached (or we are operating on a backup file)
+and no key number is given, the first Master Key copy to be successfully
+decrypted with the provided User Key passphrase/keyfile will be changed.
+.It Fl p
+Do not use a passphrase as a component of the current User Key.
+Cannot be combined with the
+.Fl j
+option.
+.It Fl P
+Do not use a passphrase as a component of the new User Key.
+Cannot be combined with the
+.Fl J
+option.
+.El
+.It Cm delkey
+Destroy (overwrite with random data) the selected Master Key copy.
+If one is destroying keys for an attached provider, the provider
+will not be detached even if all copies of the Master Key are destroyed.
+It can even be rescued with the
+.Cm setkey
+subcommand because the Master Key is still in memory.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl a Ar keyno"
+.It Fl a
+Destroy all copies of the Master Key (does not need
+.Fl f
+option).
+.It Fl f
+Force key destruction.
+This option is needed to destroy the last copy of the Master Key.
+.It Fl n Ar keyno
+Specifies the index number of the Master Key copy.
+If the provider is attached and no key number is given, the key
+used for attaching the provider will be destroyed.
+If provider is detached (or we are operating on a backup file) the key number
+has to be given.
+.El
+.It Cm kill
+This command should be used only in emergency situations.
+It will destroy all copies of the Master Key on a given provider and will
+detach it forcibly (if it is attached).
+This is absolutely a one-way command - if you do not have a metadata
+backup, your data is gone for good.
+In case the provider was attached with the
+.Fl r
+flag, the keys will not be destroyed, only the provider will be detached.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl a"
+.It Fl a
+If specified, all currently attached providers will be killed.
+.El
+.It Cm backup
+Backup metadata from the given provider to the given file.
+.It Cm restore
+Restore metadata from the given file to the given provider.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Metadata contains the size of the provider to ensure that the correct
+partition or slice is attached.
+If an attempt is made to restore metadata to a provider that has a different
+size,
+.Nm
+will refuse to restore the data unless the
+.Fl f
+switch is used.
+If the partition or slice has been grown, the
+.Cm resize
+subcommand should be used rather than attempting to relocate the metadata
+through
+.Cm backup
+and
+.Cm restore .
+.El
+.It Cm suspend
+Suspend device by waiting for all inflight requests to finish, clearing all
+sensitive information (like the Master Key and Data Keys) from kernel memory,
+and blocking all further I/O requests until the
+.Cm resume
+subcommand is executed.
+This functionality is useful for laptops: when one wants to suspend a
+laptop, one does not want to leave an encrypted device attached.
+Instead of closing all files and directories opened from a file system located
+on an encrypted device, unmounting the file system, and detaching the device,
+the
+.Cm suspend
+subcommand can be used.
+Any access to the encrypted device will be blocked until the Master Key is
+reloaded through the
+.Cm resume
+subcommand.
+Thus there is no need to close nor unmount anything.
+The
+.Cm suspend
+subcommand does not work with devices created with the
+.Cm onetime
+subcommand.
+Please note that sensitive data might still be present in memory after
+suspending an encrypted device due to the file system cache, etc.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl a"
+.It Fl a
+Suspend all
+.Nm
+devices.
+.El
+.It Cm resume
+Resume previously suspended device.
+The caller must ensure that executing this subcommand does not access the
+suspended device, leading to a deadlock.
+For example suspending a device which contains the file system where the
+.Nm
+utility is stored is bad idea.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl j Ar passfile"
+.It Fl j Ar passfile
+Specifies a file which contains the passphrase component of the User Key
+(or part of it).
+For more information see the description of the
+.Fl J
+option for the
+.Cm init
+subcommand.
+.It Fl k Ar keyfile
+Specifies a file which contains the keyfile component of the User Key
+(or part of it).
+For more information see the description of the
+.Fl K
+option for the
+.Cm init
+subcommand.
+.It Fl p
+Do not use a passphrase as a component of the User Key.
+Cannot be combined with the
+.Fl j
+option.
+.El
+.It Cm resize
+Inform
+.Nm
+that the provider has been resized.
+The old metadata block is relocated to the correct position at the end of the
+provider and the provider size is updated.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl s Ar oldsize"
+.It Fl s Ar oldsize
+The size of the provider before it was resized.
+.El
+.It Cm version
+If no arguments are given, the
+.Cm version
+subcommand will print the version of
+.Nm
+userland utility as well as the version of the
+.Nm ELI
+GEOM class.
+.Pp
+If GEOM providers are specified, the
+.Cm version
+subcommand will print metadata version used by each of them.
+.It Cm clear
+Clear metadata from the given providers.
+.Em WARNING :
+This will erase with zeros the encrypted Master Key copies stored in the
+metadata.
+.It Cm dump
+Dump metadata stored on the given providers.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl v"
+.It Fl v
+Be more verbose.
+.El
+.Sh KEY SUMMARY
+.Ss Master Key
+Upon
+.Cm init ,
+the
+.Nm
+utility generates a random Master Key for the provider.
+The Master Key never changes during the lifetime of the provider.
+Each copy of the provider metadata, active or backed up to a file, can store
+up to two, independently-encrypted copies of the Master Key.
+.Ss User Key
+Each stored copy of the Master Key is encrypted with a User Key, which
+is generated by the
+.Nm
+utility from a passphrase and/or a keyfile.
+The
+.Nm
+utility first reads all parts of the keyfile in the order specified on the
+command line, then reads all parts of the stored passphrase in the order
+specified on the command line.
+If no passphrase parts are specified, the system prompts the user to enter
+the passphrase.
+The passphrase is optionally strengthened by PKCS#5v2.
+The User Key is a digest computed over the concatenated keyfile and passphrase.
+.Ss Data Key
+During operation, one or more Data Keys are deterministically derived by
+the kernel from the Master Key and cached in memory.
+The number of Data Keys used by a given provider, and the way they are
+derived, depend on the GELI version and whether the provider is configured to
+use data authentication.
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to control the behavior of the
+.Nm ELI
+GEOM class.
+The default value is shown next to each variable.
+Some variables can also be set in
+.Pa /boot/loader.conf .
+.Bl -tag -width indent
+.It Va kern.geom.eli.version
+Version number of the
+.Nm ELI
+GEOM class.
+.It Va kern.geom.eli.debug : No 0
+Debug level of the
+.Nm ELI
+GEOM class.
+This can be set to a number between 0 and 3 inclusive.
+If set to 0, minimal debug information is printed.
+If set to 3, the
+maximum amount of debug information is printed.
+.It Va kern.geom.eli.tries : No 3
+Number of times a user is asked for the passphrase.
+This is only used for providers which are attached on boot
+(before the root file system is mounted).
+If set to 0, attaching providers on boot will be disabled.
+This variable should be set in
+.Pa /boot/loader.conf .
+.It Va kern.geom.eli.overwrites : No 5
+Specifies how many times the Master Key will be overwritten
+with random values when it is destroyed.
+After this operation it is filled with zeros.
+.It Va kern.geom.eli.visible_passphrase : No 0
+If set to 1, the passphrase entered on boot (before the root
+file system is mounted) will be visible.
+This alternative should be used with caution as the entered
+passphrase can be logged and exposed via
+.Xr dmesg 8 .
+This variable should be set in
+.Pa /boot/loader.conf .
+.It Va kern.geom.eli.threads : No 0
+Specifies how many kernel threads should be used for doing software
+cryptography.
+Its purpose is to increase performance on SMP systems.
+If set to 0, a CPU-pinned thread will be started for every active CPU.
+.It Va kern.geom.eli.batch : No 0
+When set to 1, can speed-up crypto operations by using batching.
+Batching reduces the number of interrupts by responding to a group of
+crypto requests with one interrupt.
+The crypto card and the driver has to support this feature.
+.It Va kern.geom.eli.key_cache_limit : No 8192
+Specifies how many Data Keys to cache.
+The default limit
+(8192 keys) will allow caching of all keys for a 4TB provider with 512 byte
+sectors and will take around 1MB of memory.
+.It Va kern.geom.eli.key_cache_hits
+Reports how many times we were looking up a Data Key and it was already in
+cache.
+This sysctl is not updated for providers that need fewer Data Keys than
+the limit specified in
+.Va kern.geom.eli.key_cache_limit .
+.It Va kern.geom.eli.key_cache_misses
+Reports how many times we were looking up a Data Key and it was not in cache.
+This sysctl is not updated for providers that need fewer Data Keys than the limit
+specified in
+.Va kern.geom.eli.key_cache_limit .
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+Initialize a provider which is going to be encrypted with a
+passphrase and random data from a file on the user's pen drive.
+Use 4kB sector size.
+Attach the provider, create a file system, and mount it.
+Do the work.
+Unmount the provider and detach it:
+.Bd -literal -offset indent
+# dd if=/dev/random of=/mnt/pendrive/da2.key bs=64 count=1
+# geli init -s 4096 -K /mnt/pendrive/da2.key /dev/da2
+Enter new passphrase:
+Reenter new passphrase:
+# geli attach -k /mnt/pendrive/da2.key /dev/da2
+Enter passphrase:
+# dd if=/dev/random of=/dev/da2.eli bs=1m
+# newfs /dev/da2.eli
+# mount /dev/da2.eli /mnt/secret
+\&...
+# umount /mnt/secret
+# geli detach da2.eli
+.Ed
+.Pp
+Create an encrypted provider, but use two User Keys:
+one for your employee and one for you as the company's security officer
+(so it is not a tragedy if the employee
+.Qq accidentally
+forgets his passphrase):
+.Bd -literal -offset indent
+# geli init /dev/da2
+Enter new passphrase: (enter security officer's passphrase)
+Reenter new passphrase:
+# geli setkey -n 1 /dev/da2
+Enter passphrase: (enter security officer's passphrase)
+Enter new passphrase: (let your employee enter his passphrase ...)
+Reenter new passphrase: (... twice)
+.Ed
+.Pp
+You are the security officer in your company.
+Create an encrypted provider for use by the user, but remember that users
+forget their passphrases, so backup the Master Key with your own random key:
+.Bd -literal -offset indent
+# dd if=/dev/random of=/mnt/pendrive/keys/`hostname` bs=64 count=1
+# geli init -P -K /mnt/pendrive/keys/`hostname` /dev/ada0s1e
+# geli backup /dev/ada0s1e /mnt/pendrive/backups/`hostname`
+(use key number 0, so the encrypted Master Key will be re-encrypted by this)
+# geli setkey -n 0 -k /mnt/pendrive/keys/`hostname` /dev/ada0s1e
+(allow the user to enter his passphrase)
+Enter new passphrase:
+Reenter new passphrase:
+.Ed
+.Pp
+Encrypted swap partition setup:
+.Bd -literal -offset indent
+# dd if=/dev/random of=/dev/ada0s1b bs=1m
+# geli onetime -d -e 3des ada0s1b
+# swapon /dev/ada0s1b.eli
+.Ed
+.Pp
+The example below shows how to configure two providers which will be attached
+on boot (before the root file system is mounted).
+One of them is using passphrase and three keyfile parts and the other is
+using only a keyfile in one part:
+.Bd -literal -offset indent
+# dd if=/dev/random of=/dev/da0 bs=1m
+# dd if=/dev/random of=/boot/keys/da0.key0 bs=32k count=1
+# dd if=/dev/random of=/boot/keys/da0.key1 bs=32k count=1
+# dd if=/dev/random of=/boot/keys/da0.key2 bs=32k count=1
+# geli init -b -K /boot/keys/da0.key0 -K /boot/keys/da0.key1 -K /boot/keys/da0.key2 da0
+Enter new passphrase:
+Reenter new passphrase:
+# dd if=/dev/random of=/dev/da1s3a bs=1m
+# dd if=/dev/random of=/boot/keys/da1s3a.key bs=128k count=1
+# geli init -b -P -K /boot/keys/da1s3a.key da1s3a
+.Ed
+.Pp
+The providers are initialized, now we have to add these lines to
+.Pa /boot/loader.conf :
+.Bd -literal -offset indent
+geli_da0_keyfile0_load="YES"
+geli_da0_keyfile0_type="da0:geli_keyfile0"
+geli_da0_keyfile0_name="/boot/keys/da0.key0"
+geli_da0_keyfile1_load="YES"
+geli_da0_keyfile1_type="da0:geli_keyfile1"
+geli_da0_keyfile1_name="/boot/keys/da0.key1"
+geli_da0_keyfile2_load="YES"
+geli_da0_keyfile2_type="da0:geli_keyfile2"
+geli_da0_keyfile2_name="/boot/keys/da0.key2"
+
+geli_da1s3a_keyfile0_load="YES"
+geli_da1s3a_keyfile0_type="da1s3a:geli_keyfile0"
+geli_da1s3a_keyfile0_name="/boot/keys/da1s3a.key"
+.Ed
+.Pp
+If there is only one keyfile, the index might be omitted:
+.Bd -literal -offset indent
+geli_da1s3a_keyfile_load="YES"
+geli_da1s3a_keyfile_type="da1s3a:geli_keyfile"
+geli_da1s3a_keyfile_name="/boot/keys/da1s3a.key"
+.Ed
+.Pp
+Not only configure encryption, but also data integrity verification using
+.Nm HMAC/SHA256 .
+.Bd -literal -offset indent
+# geli init -a hmac/sha256 -s 4096 /dev/da0
+Enter new passphrase:
+Reenter new passphrase:
+# geli attach /dev/da0
+Enter passphrase:
+# dd if=/dev/random of=/dev/da0.eli bs=1m
+# newfs /dev/da0.eli
+# mount /dev/da0.eli /mnt/secret
+.Ed
+.Pp
+.Cm geli
+writes the metadata backup by default to the
+.Pa /var/backups/<prov>.eli
+file.
+If the metadata is lost in any way (e.g., by accidental overwrite), it can be restored.
+Consider the following situation:
+.Bd -literal -offset indent
+# geli init /dev/da0
+Enter new passphrase:
+Reenter new passphrase:
+
+Metadata backup can be found in /var/backups/da0.eli and
+can be restored with the following command:
+
+ # geli restore /var/backups/da0.eli /dev/da0
+
+# geli clear /dev/da0
+# geli attach /dev/da0
+geli: Cannot read metadata from /dev/da0: Invalid argument.
+# geli restore /var/backups/da0.eli /dev/da0
+# geli attach /dev/da0
+Enter passphrase:
+.Ed
+.Pp
+If an encrypted file system is extended, it is necessary to relocate and
+update the metadata:
+.Bd -literal -offset indent
+# gpart create -s GPT ada0
+# gpart add -s 1g -t freebsd-ufs -i 1 ada0
+# geli init -K keyfile -P ada0p1
+# gpart resize -s 2g -i 1 ada0
+# geli resize -s 1g ada0p1
+# geli attach -k keyfile -p ada0p1
+.Ed
+.Pp
+Initialize provider with the passphrase split into two files.
+The provider can be attached using those two files or by entering
+.Dq foobar
+as the passphrase at the
+.Nm
+prompt:
+.Bd -literal -offset indent
+# echo foo > da0.pass0
+# echo bar > da0.pass1
+# geli init -J da0.pass0 -J da0.pass1 da0
+# geli attach -j da0.pass0 -j da0.pass1 da0
+# geli detach da0
+# geli attach da0
+Enter passphrase: foobar
+.Ed
+.Pp
+Suspend all
+.Nm
+devices on a laptop, suspend the laptop, then resume devices one by one after
+resuming the laptop:
+.Bd -literal -offset indent
+# geli suspend -a
+# zzz
+<resume your laptop>
+# geli resume -p -k keyfile gpt/secret
+# geli resume gpt/private
+Enter passphrase:
+.Ed
+.Sh ENCRYPTION MODES
+.Nm
+supports two encryption modes:
+.Nm XTS ,
+which was standardized as
+.Nm IEEE P1619
+and
+.Nm CBC
+with unpredictable IV.
+The
+.Nm CBC
+mode used by
+.Nm
+is very similar to the mode
+.Nm ESSIV .
+.Sh DATA AUTHENTICATION
+.Nm
+can verify data integrity when an authentication algorithm is specified.
+When data corruption/modification is detected,
+.Nm
+will not return any data, but instead will return an error
+.Pq Er EINVAL .
+The offset and size of the corrupted data will be printed on the console.
+It is important to know against which attacks
+.Nm
+provides protection for your data.
+If data is modified in-place or copied from one place on the disk
+to another even without modification,
+.Nm
+should be able to detect such a change.
+If an attacker can remember the encrypted data, he can overwrite any future
+changes with the data he owns without it being noticed.
+In other words
+.Nm
+will not protect your data against replay attacks.
+.Pp
+It is recommended to write to the whole provider before first use,
+in order to make sure that all sectors and their corresponding
+checksums are properly initialized into a consistent state.
+One can safely ignore data authentication errors that occur immediately
+after the first time a provider is attached and before it is
+initialized in this way.
+.Sh SEE ALSO
+.Xr crypto 4 ,
+.Xr gbde 4 ,
+.Xr geom 4 ,
+.Xr loader.conf 5 ,
+.Xr gbde 8 ,
+.Xr geom 8 ,
+.Xr crypto 9
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 6.0 .
+Support for the
+.Nm Camellia
+block cipher is implemented by Yoshisato Yanagisawa in
+.Fx 7.0 .
+.Pp
+Highest
+.Nm GELI
+metadata version supported by the given FreeBSD version:
+.Bl -column -offset indent ".Sy FreeBSD" ".Sy version"
+.It Sy FreeBSD Ta Sy GELI
+.It Sy version Ta Sy version
+.Pp
+.It Li 6.0 Ta 0
+.It Li 6.1 Ta 0
+.It Li 6.2 Ta 3
+.It Li 6.3 Ta 3
+.It Li 6.4 Ta 3
+.Pp
+.It Li 7.0 Ta 3
+.It Li 7.1 Ta 3
+.It Li 7.2 Ta 3
+.It Li 7.3 Ta 3
+.It Li 7.4 Ta 3
+.Pp
+.It Li 8.0 Ta 3
+.It Li 8.1 Ta 3
+.It Li 8.2 Ta 5
+.Pp
+.It Li 9.0 Ta 6
+.Pp
+.It Li 10.0 Ta 7
+.El
+.Sh AUTHORS
+.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org
diff --git a/lib/geom/eli/geom_eli.c b/lib/geom/eli/geom_eli.c
new file mode 100644
index 000000000000..f6ed6a88fb4f
--- /dev/null
+++ b/lib/geom/eli/geom_eli.c
@@ -0,0 +1,1769 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2004-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <sys/sysctl.h>
+#include <sys/resource.h>
+#include <opencrypto/cryptodev.h>
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgeom.h>
+#include <paths.h>
+#include <readpassphrase.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <geom/eli/g_eli.h>
+#include <geom/eli/pkcs5v2.h>
+
+#include "core/geom.h"
+#include "misc/subr.h"
+
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_ELI_VERSION;
+
+#define GELI_BACKUP_DIR "/var/backups/"
+#define GELI_ENC_ALGO "aes"
+
+static void eli_main(struct gctl_req *req, unsigned flags);
+static void eli_init(struct gctl_req *req);
+static void eli_attach(struct gctl_req *req);
+static void eli_configure(struct gctl_req *req);
+static void eli_setkey(struct gctl_req *req);
+static void eli_delkey(struct gctl_req *req);
+static void eli_resume(struct gctl_req *req);
+static void eli_kill(struct gctl_req *req);
+static void eli_backup(struct gctl_req *req);
+static void eli_restore(struct gctl_req *req);
+static void eli_resize(struct gctl_req *req);
+static void eli_version(struct gctl_req *req);
+static void eli_clear(struct gctl_req *req);
+static void eli_dump(struct gctl_req *req);
+
+static int eli_backup_create(struct gctl_req *req, const char *prov,
+ const char *file);
+
+/*
+ * Available commands:
+ *
+ * init [-bdgPTv] [-a aalgo] [-B backupfile] [-e ealgo] [-i iterations] [-l keylen] [-J newpassfile] [-K newkeyfile] [-s sectorsize] [-V version] prov
+ * label - alias for 'init'
+ * attach [-Cdprv] [-n keyno] [-j passfile] [-k keyfile] prov
+ * detach [-fl] prov ...
+ * stop - alias for 'detach'
+ * onetime [-d] [-a aalgo] [-e ealgo] [-l keylen] prov
+ * configure [-bBgGtT] prov ...
+ * setkey [-pPv] [-n keyno] [-j passfile] [-J newpassfile] [-k keyfile] [-K newkeyfile] prov
+ * delkey [-afv] [-n keyno] prov
+ * suspend [-v] -a | prov ...
+ * resume [-pv] [-j passfile] [-k keyfile] prov
+ * kill [-av] [prov ...]
+ * backup [-v] prov file
+ * restore [-fv] file prov
+ * resize [-v] -s oldsize prov
+ * version [prov ...]
+ * clear [-v] prov ...
+ * dump [-v] prov ...
+ */
+struct g_command class_commands[] = {
+ { "init", G_FLAG_VERBOSE, eli_main,
+ {
+ { 'a', "aalgo", "", G_TYPE_STRING },
+ { 'b', "boot", NULL, G_TYPE_BOOL },
+ { 'B', "backupfile", "", G_TYPE_STRING },
+ { 'd', "displaypass", NULL, G_TYPE_BOOL },
+ { 'e', "ealgo", "", G_TYPE_STRING },
+ { 'g', "geliboot", NULL, G_TYPE_BOOL },
+ { 'i', "iterations", "-1", G_TYPE_NUMBER },
+ { 'J', "newpassfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'K', "newkeyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'l', "keylen", "0", G_TYPE_NUMBER },
+ { 'P', "nonewpassphrase", NULL, G_TYPE_BOOL },
+ { 's', "sectorsize", "0", G_TYPE_NUMBER },
+ { 'T', "notrim", NULL, G_TYPE_BOOL },
+ { 'V', "mdversion", "-1", G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-bdgPTv] [-a aalgo] [-B backupfile] [-e ealgo] [-i iterations] [-l keylen] [-J newpassfile] [-K newkeyfile] [-s sectorsize] [-V version] prov"
+ },
+ { "label", G_FLAG_VERBOSE, eli_main,
+ {
+ { 'a', "aalgo", "", G_TYPE_STRING },
+ { 'b', "boot", NULL, G_TYPE_BOOL },
+ { 'B', "backupfile", "", G_TYPE_STRING },
+ { 'd', "displaypass", NULL, G_TYPE_BOOL },
+ { 'e', "ealgo", "", G_TYPE_STRING },
+ { 'g', "geliboot", NULL, G_TYPE_BOOL },
+ { 'i', "iterations", "-1", G_TYPE_NUMBER },
+ { 'J', "newpassfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'K', "newkeyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'l', "keylen", "0", G_TYPE_NUMBER },
+ { 'P', "nonewpassphrase", NULL, G_TYPE_BOOL },
+ { 's', "sectorsize", "0", G_TYPE_NUMBER },
+ { 'V', "mdversion", "-1", G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "- an alias for 'init'"
+ },
+ { "attach", G_FLAG_VERBOSE | G_FLAG_LOADKLD, eli_main,
+ {
+ { 'C', "dryrun", NULL, G_TYPE_BOOL },
+ { 'd', "detach", NULL, G_TYPE_BOOL },
+ { 'j', "passfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'k', "keyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'n', "keyno", "-1", G_TYPE_NUMBER },
+ { 'p', "nopassphrase", NULL, G_TYPE_BOOL },
+ { 'r', "readonly", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-Cdprv] [-n keyno] [-j passfile] [-k keyfile] prov"
+ },
+ { "detach", 0, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ { 'l', "last", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fl] prov ..."
+ },
+ { "stop", 0, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ { 'l', "last", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "- an alias for 'detach'"
+ },
+ { "onetime", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL,
+ {
+ { 'a', "aalgo", "", G_TYPE_STRING },
+ { 'd', "detach", NULL, G_TYPE_BOOL },
+ { 'e', "ealgo", GELI_ENC_ALGO, G_TYPE_STRING },
+ { 'l', "keylen", "0", G_TYPE_NUMBER },
+ { 's', "sectorsize", "0", G_TYPE_NUMBER },
+ { 'T', "notrim", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-dT] [-a aalgo] [-e ealgo] [-l keylen] [-s sectorsize] prov"
+ },
+ { "configure", G_FLAG_VERBOSE, eli_main,
+ {
+ { 'b', "boot", NULL, G_TYPE_BOOL },
+ { 'B', "noboot", NULL, G_TYPE_BOOL },
+ { 'd', "displaypass", NULL, G_TYPE_BOOL },
+ { 'D', "nodisplaypass", NULL, G_TYPE_BOOL },
+ { 'g', "geliboot", NULL, G_TYPE_BOOL },
+ { 'G', "nogeliboot", NULL, G_TYPE_BOOL },
+ { 't', "trim", NULL, G_TYPE_BOOL },
+ { 'T', "notrim", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-bBdDgGtT] prov ..."
+ },
+ { "setkey", G_FLAG_VERBOSE, eli_main,
+ {
+ { 'i', "iterations", "-1", G_TYPE_NUMBER },
+ { 'j', "passfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'J', "newpassfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'k', "keyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'K', "newkeyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'n', "keyno", "-1", G_TYPE_NUMBER },
+ { 'p', "nopassphrase", NULL, G_TYPE_BOOL },
+ { 'P', "nonewpassphrase", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-pPv] [-n keyno] [-i iterations] [-j passfile] [-J newpassfile] [-k keyfile] [-K newkeyfile] prov"
+ },
+ { "delkey", G_FLAG_VERBOSE, eli_main,
+ {
+ { 'a', "all", NULL, G_TYPE_BOOL },
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ { 'n', "keyno", "-1", G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-afv] [-n keyno] prov"
+ },
+ { "suspend", G_FLAG_VERBOSE, NULL,
+ {
+ { 'a', "all", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-v] -a | prov ..."
+ },
+ { "resume", G_FLAG_VERBOSE, eli_main,
+ {
+ { 'j', "passfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'k', "keyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI },
+ { 'p', "nopassphrase", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-pv] [-j passfile] [-k keyfile] prov"
+ },
+ { "kill", G_FLAG_VERBOSE, eli_main,
+ {
+ { 'a', "all", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-av] [prov ...]"
+ },
+ { "backup", G_FLAG_VERBOSE, eli_main, G_NULL_OPTS,
+ "[-v] prov file"
+ },
+ { "restore", G_FLAG_VERBOSE, eli_main,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] file prov"
+ },
+ { "resize", G_FLAG_VERBOSE, eli_main,
+ {
+ { 's', "oldsize", NULL, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-v] -s oldsize prov"
+ },
+ { "version", G_FLAG_LOADKLD, eli_main, G_NULL_OPTS,
+ "[prov ...]"
+ },
+ { "clear", G_FLAG_VERBOSE, eli_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ { "dump", G_FLAG_VERBOSE, eli_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+#define BUFSIZE 1024
+
+static int
+eli_protect(struct gctl_req *req)
+{
+ struct rlimit rl;
+
+ /* Disable core dumps. */
+ rl.rlim_cur = 0;
+ rl.rlim_max = 0;
+ if (setrlimit(RLIMIT_CORE, &rl) == -1) {
+ gctl_error(req, "Cannot disable core dumps: %s.",
+ strerror(errno));
+ return (-1);
+ }
+ /* Disable swapping. */
+ if (mlockall(MCL_FUTURE) == -1) {
+ gctl_error(req, "Cannot lock memory: %s.", strerror(errno));
+ return (-1);
+ }
+ return (0);
+}
+
+static void
+eli_main(struct gctl_req *req, unsigned int flags)
+{
+ const char *name;
+
+ if (eli_protect(req) == -1)
+ return;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "init") == 0 || strcmp(name, "label") == 0)
+ eli_init(req);
+ else if (strcmp(name, "attach") == 0)
+ eli_attach(req);
+ else if (strcmp(name, "configure") == 0)
+ eli_configure(req);
+ else if (strcmp(name, "setkey") == 0)
+ eli_setkey(req);
+ else if (strcmp(name, "delkey") == 0)
+ eli_delkey(req);
+ else if (strcmp(name, "resume") == 0)
+ eli_resume(req);
+ else if (strcmp(name, "kill") == 0)
+ eli_kill(req);
+ else if (strcmp(name, "backup") == 0)
+ eli_backup(req);
+ else if (strcmp(name, "restore") == 0)
+ eli_restore(req);
+ else if (strcmp(name, "resize") == 0)
+ eli_resize(req);
+ else if (strcmp(name, "version") == 0)
+ eli_version(req);
+ else if (strcmp(name, "dump") == 0)
+ eli_dump(req);
+ else if (strcmp(name, "clear") == 0)
+ eli_clear(req);
+ else
+ gctl_error(req, "Unknown command: %s.", name);
+}
+
+static bool
+eli_is_attached(const char *prov)
+{
+ char name[MAXPATHLEN];
+
+ /*
+ * Not the best way to do it, but the easiest.
+ * We try to open provider and check if it is a GEOM provider
+ * by asking about its sectorsize.
+ */
+ snprintf(name, sizeof(name), "%s%s", prov, G_ELI_SUFFIX);
+ return (g_get_sectorsize(name) > 0);
+}
+
+static int
+eli_genkey_files(struct gctl_req *req, bool new, const char *type,
+ struct hmac_ctx *ctxp, char *passbuf, size_t passbufsize)
+{
+ char *p, buf[BUFSIZE], argname[16];
+ const char *file;
+ int error, fd, i;
+ ssize_t done;
+
+ assert((strcmp(type, "keyfile") == 0 && ctxp != NULL &&
+ passbuf == NULL && passbufsize == 0) ||
+ (strcmp(type, "passfile") == 0 && ctxp == NULL &&
+ passbuf != NULL && passbufsize > 0));
+ assert(strcmp(type, "keyfile") == 0 || passbuf[0] == '\0');
+
+ for (i = 0; ; i++) {
+ snprintf(argname, sizeof(argname), "%s%s%d",
+ new ? "new" : "", type, i);
+
+ /* No more {key,pass}files? */
+ if (!gctl_has_param(req, argname))
+ return (i);
+
+ file = gctl_get_ascii(req, "%s", argname);
+ assert(file != NULL);
+
+ if (strcmp(file, "-") == 0)
+ fd = STDIN_FILENO;
+ else {
+ fd = open(file, O_RDONLY);
+ if (fd == -1) {
+ gctl_error(req, "Cannot open %s %s: %s.",
+ type, file, strerror(errno));
+ return (-1);
+ }
+ }
+ if (strcmp(type, "keyfile") == 0) {
+ while ((done = read(fd, buf, sizeof(buf))) > 0)
+ g_eli_crypto_hmac_update(ctxp, buf, done);
+ } else /* if (strcmp(type, "passfile") == 0) */ {
+ assert(strcmp(type, "passfile") == 0);
+
+ while ((done = read(fd, buf, sizeof(buf) - 1)) > 0) {
+ buf[done] = '\0';
+ p = strchr(buf, '\n');
+ if (p != NULL) {
+ *p = '\0';
+ done = p - buf;
+ }
+ if (strlcat(passbuf, buf, passbufsize) >=
+ passbufsize) {
+ gctl_error(req,
+ "Passphrase in %s too long.", file);
+ bzero(buf, sizeof(buf));
+ return (-1);
+ }
+ if (p != NULL)
+ break;
+ }
+ }
+ error = errno;
+ if (strcmp(file, "-") != 0)
+ close(fd);
+ bzero(buf, sizeof(buf));
+ if (done == -1) {
+ gctl_error(req, "Cannot read %s %s: %s.",
+ type, file, strerror(error));
+ return (-1);
+ }
+ }
+ /* NOTREACHED */
+}
+
+static int
+eli_genkey_passphrase_prompt(struct gctl_req *req, bool new, char *passbuf,
+ size_t passbufsize)
+{
+ char *p;
+
+ for (;;) {
+ p = readpassphrase(
+ new ? "Enter new passphrase: " : "Enter passphrase: ",
+ passbuf, passbufsize, RPP_ECHO_OFF | RPP_REQUIRE_TTY);
+ if (p == NULL) {
+ bzero(passbuf, passbufsize);
+ gctl_error(req, "Cannot read passphrase: %s.",
+ strerror(errno));
+ return (-1);
+ }
+
+ if (new) {
+ char tmpbuf[BUFSIZE];
+
+ p = readpassphrase("Reenter new passphrase: ",
+ tmpbuf, sizeof(tmpbuf),
+ RPP_ECHO_OFF | RPP_REQUIRE_TTY);
+ if (p == NULL) {
+ bzero(passbuf, passbufsize);
+ gctl_error(req,
+ "Cannot read passphrase: %s.",
+ strerror(errno));
+ return (-1);
+ }
+
+ if (strcmp(passbuf, tmpbuf) != 0) {
+ bzero(passbuf, passbufsize);
+ fprintf(stderr, "They didn't match.\n");
+ continue;
+ }
+ bzero(tmpbuf, sizeof(tmpbuf));
+ }
+ return (0);
+ }
+ /* NOTREACHED */
+}
+
+static int
+eli_genkey_passphrase(struct gctl_req *req, struct g_eli_metadata *md, bool new,
+ struct hmac_ctx *ctxp)
+{
+ char passbuf[BUFSIZE];
+ bool nopassphrase;
+ int nfiles;
+
+ nopassphrase =
+ gctl_get_int(req, new ? "nonewpassphrase" : "nopassphrase");
+ if (nopassphrase) {
+ if (gctl_has_param(req, new ? "newpassfile0" : "passfile0")) {
+ gctl_error(req,
+ "Options -%c and -%c are mutually exclusive.",
+ new ? 'J' : 'j', new ? 'P' : 'p');
+ return (-1);
+ }
+ return (0);
+ }
+
+ if (!new && md->md_iterations == -1) {
+ gctl_error(req, "Missing -p flag.");
+ return (-1);
+ }
+ passbuf[0] = '\0';
+ nfiles = eli_genkey_files(req, new, "passfile", NULL, passbuf,
+ sizeof(passbuf));
+ if (nfiles == -1)
+ return (-1);
+ else if (nfiles == 0) {
+ if (eli_genkey_passphrase_prompt(req, new, passbuf,
+ sizeof(passbuf)) == -1) {
+ return (-1);
+ }
+ }
+ /*
+ * Field md_iterations equal to -1 means "choose some sane
+ * value for me".
+ */
+ if (md->md_iterations == -1) {
+ assert(new);
+ if (verbose)
+ printf("Calculating number of iterations...\n");
+ md->md_iterations = pkcs5v2_calculate(2000000);
+ assert(md->md_iterations > 0);
+ if (verbose) {
+ printf("Done, using %d iterations.\n",
+ md->md_iterations);
+ }
+ }
+ /*
+ * If md_iterations is equal to 0, user doesn't want PKCS#5v2.
+ */
+ if (md->md_iterations == 0) {
+ g_eli_crypto_hmac_update(ctxp, md->md_salt,
+ sizeof(md->md_salt));
+ g_eli_crypto_hmac_update(ctxp, passbuf, strlen(passbuf));
+ } else /* if (md->md_iterations > 0) */ {
+ unsigned char dkey[G_ELI_USERKEYLEN];
+
+ pkcs5v2_genkey(dkey, sizeof(dkey), md->md_salt,
+ sizeof(md->md_salt), passbuf, md->md_iterations);
+ g_eli_crypto_hmac_update(ctxp, dkey, sizeof(dkey));
+ bzero(dkey, sizeof(dkey));
+ }
+ bzero(passbuf, sizeof(passbuf));
+
+ return (0);
+}
+
+static unsigned char *
+eli_genkey(struct gctl_req *req, struct g_eli_metadata *md, unsigned char *key,
+ bool new)
+{
+ struct hmac_ctx ctx;
+ bool nopassphrase;
+ int nfiles;
+
+ nopassphrase =
+ gctl_get_int(req, new ? "nonewpassphrase" : "nopassphrase");
+
+ g_eli_crypto_hmac_init(&ctx, NULL, 0);
+
+ nfiles = eli_genkey_files(req, new, "keyfile", &ctx, NULL, 0);
+ if (nfiles == -1)
+ return (NULL);
+ else if (nfiles == 0 && nopassphrase) {
+ gctl_error(req, "No key components given.");
+ return (NULL);
+ }
+
+ if (eli_genkey_passphrase(req, md, new, &ctx) == -1)
+ return (NULL);
+
+ g_eli_crypto_hmac_final(&ctx, key, 0);
+
+ return (key);
+}
+
+static int
+eli_metadata_read(struct gctl_req *req, const char *prov,
+ struct g_eli_metadata *md)
+{
+ unsigned char sector[sizeof(struct g_eli_metadata)];
+ int error;
+
+ if (g_get_sectorsize(prov) == 0) {
+ int fd;
+
+ /* This is a file probably. */
+ fd = open(prov, O_RDONLY);
+ if (fd == -1) {
+ gctl_error(req, "Cannot open %s: %s.", prov,
+ strerror(errno));
+ return (-1);
+ }
+ if (read(fd, sector, sizeof(sector)) != sizeof(sector)) {
+ gctl_error(req, "Cannot read metadata from %s: %s.",
+ prov, strerror(errno));
+ close(fd);
+ return (-1);
+ }
+ close(fd);
+ } else {
+ /* This is a GEOM provider. */
+ error = g_metadata_read(prov, sector, sizeof(sector),
+ G_ELI_MAGIC);
+ if (error != 0) {
+ gctl_error(req, "Cannot read metadata from %s: %s.",
+ prov, strerror(error));
+ return (-1);
+ }
+ }
+ error = eli_metadata_decode(sector, md);
+ switch (error) {
+ case 0:
+ break;
+ case EOPNOTSUPP:
+ gctl_error(req,
+ "Provider's %s metadata version %u is too new.\n"
+ "geli: The highest supported version is %u.",
+ prov, (unsigned int)md->md_version, G_ELI_VERSION);
+ return (-1);
+ case EINVAL:
+ gctl_error(req, "Inconsistent provider's %s metadata.", prov);
+ return (-1);
+ default:
+ gctl_error(req,
+ "Unexpected error while decoding provider's %s metadata: %s.",
+ prov, strerror(error));
+ return (-1);
+ }
+ return (0);
+}
+
+static int
+eli_metadata_store(struct gctl_req *req, const char *prov,
+ struct g_eli_metadata *md)
+{
+ unsigned char sector[sizeof(struct g_eli_metadata)];
+ int error;
+
+ eli_metadata_encode(md, sector);
+ if (g_get_sectorsize(prov) == 0) {
+ int fd;
+
+ /* This is a file probably. */
+ fd = open(prov, O_WRONLY | O_TRUNC);
+ if (fd == -1) {
+ gctl_error(req, "Cannot open %s: %s.", prov,
+ strerror(errno));
+ bzero(sector, sizeof(sector));
+ return (-1);
+ }
+ if (write(fd, sector, sizeof(sector)) != sizeof(sector)) {
+ gctl_error(req, "Cannot write metadata to %s: %s.",
+ prov, strerror(errno));
+ bzero(sector, sizeof(sector));
+ close(fd);
+ return (-1);
+ }
+ close(fd);
+ } else {
+ /* This is a GEOM provider. */
+ error = g_metadata_store(prov, sector, sizeof(sector));
+ if (error != 0) {
+ gctl_error(req, "Cannot write metadata to %s: %s.",
+ prov, strerror(errno));
+ bzero(sector, sizeof(sector));
+ return (-1);
+ }
+ }
+ bzero(sector, sizeof(sector));
+ return (0);
+}
+
+static void
+eli_init(struct gctl_req *req)
+{
+ struct g_eli_metadata md;
+ unsigned char sector[sizeof(struct g_eli_metadata)] __aligned(4);
+ unsigned char key[G_ELI_USERKEYLEN];
+ char backfile[MAXPATHLEN];
+ const char *str, *prov;
+ unsigned int secsize, version;
+ off_t mediasize;
+ intmax_t val;
+ int error, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 1) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+ prov = gctl_get_ascii(req, "arg0");
+ mediasize = g_get_mediasize(prov);
+ secsize = g_get_sectorsize(prov);
+ if (mediasize == 0 || secsize == 0) {
+ gctl_error(req, "Cannot get informations about %s: %s.", prov,
+ strerror(errno));
+ return;
+ }
+
+ bzero(&md, sizeof(md));
+ strlcpy(md.md_magic, G_ELI_MAGIC, sizeof(md.md_magic));
+ val = gctl_get_intmax(req, "mdversion");
+ if (val == -1) {
+ version = G_ELI_VERSION;
+ } else if (val < 0 || val > G_ELI_VERSION) {
+ gctl_error(req,
+ "Invalid version specified should be between %u and %u.",
+ G_ELI_VERSION_00, G_ELI_VERSION);
+ return;
+ } else {
+ version = val;
+ }
+ md.md_version = version;
+ md.md_flags = 0;
+ if (gctl_get_int(req, "boot"))
+ md.md_flags |= G_ELI_FLAG_BOOT;
+ if (gctl_get_int(req, "geliboot"))
+ md.md_flags |= G_ELI_FLAG_GELIBOOT;
+ if (gctl_get_int(req, "displaypass"))
+ md.md_flags |= G_ELI_FLAG_GELIDISPLAYPASS;
+ if (gctl_get_int(req, "notrim"))
+ md.md_flags |= G_ELI_FLAG_NODELETE;
+ md.md_ealgo = CRYPTO_ALGORITHM_MIN - 1;
+ str = gctl_get_ascii(req, "aalgo");
+ if (*str != '\0') {
+ if (version < G_ELI_VERSION_01) {
+ gctl_error(req,
+ "Data authentication is supported starting from version %u.",
+ G_ELI_VERSION_01);
+ return;
+ }
+ md.md_aalgo = g_eli_str2aalgo(str);
+ if (md.md_aalgo >= CRYPTO_ALGORITHM_MIN &&
+ md.md_aalgo <= CRYPTO_ALGORITHM_MAX) {
+ md.md_flags |= G_ELI_FLAG_AUTH;
+ } else {
+ /*
+ * For backward compatibility, check if the -a option
+ * was used to provide encryption algorithm.
+ */
+ md.md_ealgo = g_eli_str2ealgo(str);
+ if (md.md_ealgo < CRYPTO_ALGORITHM_MIN ||
+ md.md_ealgo > CRYPTO_ALGORITHM_MAX) {
+ gctl_error(req,
+ "Invalid authentication algorithm.");
+ return;
+ } else {
+ fprintf(stderr, "warning: The -e option, not "
+ "the -a option is now used to specify "
+ "encryption algorithm to use.\n");
+ }
+ }
+ }
+ if (md.md_ealgo < CRYPTO_ALGORITHM_MIN ||
+ md.md_ealgo > CRYPTO_ALGORITHM_MAX) {
+ str = gctl_get_ascii(req, "ealgo");
+ if (*str == '\0') {
+ if (version < G_ELI_VERSION_05)
+ str = "aes-cbc";
+ else
+ str = GELI_ENC_ALGO;
+ }
+ md.md_ealgo = g_eli_str2ealgo(str);
+ if (md.md_ealgo < CRYPTO_ALGORITHM_MIN ||
+ md.md_ealgo > CRYPTO_ALGORITHM_MAX) {
+ gctl_error(req, "Invalid encryption algorithm.");
+ return;
+ }
+ if (md.md_ealgo == CRYPTO_CAMELLIA_CBC &&
+ version < G_ELI_VERSION_04) {
+ gctl_error(req,
+ "Camellia-CBC algorithm is supported starting from version %u.",
+ G_ELI_VERSION_04);
+ return;
+ }
+ if (md.md_ealgo == CRYPTO_AES_XTS &&
+ version < G_ELI_VERSION_05) {
+ gctl_error(req,
+ "AES-XTS algorithm is supported starting from version %u.",
+ G_ELI_VERSION_05);
+ return;
+ }
+ }
+ val = gctl_get_intmax(req, "keylen");
+ md.md_keylen = val;
+ md.md_keylen = g_eli_keylen(md.md_ealgo, md.md_keylen);
+ if (md.md_keylen == 0) {
+ gctl_error(req, "Invalid key length.");
+ return;
+ }
+ md.md_provsize = mediasize;
+
+ val = gctl_get_intmax(req, "iterations");
+ if (val != -1) {
+ int nonewpassphrase;
+
+ /*
+ * Don't allow to set iterations when there will be no
+ * passphrase.
+ */
+ nonewpassphrase = gctl_get_int(req, "nonewpassphrase");
+ if (nonewpassphrase) {
+ gctl_error(req,
+ "Options -i and -P are mutually exclusive.");
+ return;
+ }
+ }
+ md.md_iterations = val;
+
+ val = gctl_get_intmax(req, "sectorsize");
+ if (val == 0)
+ md.md_sectorsize = secsize;
+ else {
+ if (val < 0 || (val % secsize) != 0 || !powerof2(val)) {
+ gctl_error(req, "Invalid sector size.");
+ return;
+ }
+ if (val > sysconf(_SC_PAGE_SIZE)) {
+ fprintf(stderr,
+ "warning: Using sectorsize bigger than the page size!\n");
+ }
+ md.md_sectorsize = val;
+ }
+
+ md.md_keys = 0x01;
+ arc4random_buf(md.md_salt, sizeof(md.md_salt));
+ arc4random_buf(md.md_mkeys, sizeof(md.md_mkeys));
+
+ /* Generate user key. */
+ if (eli_genkey(req, &md, key, true) == NULL) {
+ bzero(key, sizeof(key));
+ bzero(&md, sizeof(md));
+ return;
+ }
+
+ /* Encrypt the first and the only Master Key. */
+ error = g_eli_mkey_encrypt(md.md_ealgo, key, md.md_keylen, md.md_mkeys);
+ bzero(key, sizeof(key));
+ if (error != 0) {
+ bzero(&md, sizeof(md));
+ gctl_error(req, "Cannot encrypt Master Key: %s.",
+ strerror(error));
+ return;
+ }
+
+ eli_metadata_encode(&md, sector);
+ bzero(&md, sizeof(md));
+ error = g_metadata_store(prov, sector, sizeof(sector));
+ bzero(sector, sizeof(sector));
+ if (error != 0) {
+ gctl_error(req, "Cannot store metadata on %s: %s.", prov,
+ strerror(error));
+ return;
+ }
+ if (verbose)
+ printf("Metadata value stored on %s.\n", prov);
+ /* Backup metadata to a file. */
+ str = gctl_get_ascii(req, "backupfile");
+ if (str[0] != '\0') {
+ /* Backupfile given be the user, just copy it. */
+ strlcpy(backfile, str, sizeof(backfile));
+ } else {
+ /* Generate file name automatically. */
+ const char *p = prov;
+ unsigned int i;
+
+ if (strncmp(p, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ p += sizeof(_PATH_DEV) - 1;
+ snprintf(backfile, sizeof(backfile), "%s%s.eli",
+ GELI_BACKUP_DIR, p);
+ /* Replace all / with _. */
+ for (i = strlen(GELI_BACKUP_DIR); backfile[i] != '\0'; i++) {
+ if (backfile[i] == '/')
+ backfile[i] = '_';
+ }
+ }
+ if (strcmp(backfile, "none") != 0 &&
+ eli_backup_create(req, prov, backfile) == 0) {
+ printf("\nMetadata backup can be found in %s and\n", backfile);
+ printf("can be restored with the following command:\n");
+ printf("\n\t# geli restore %s %s\n\n", backfile, prov);
+ }
+}
+
+static void
+eli_attach(struct gctl_req *req)
+{
+ struct g_eli_metadata md;
+ unsigned char key[G_ELI_USERKEYLEN];
+ const char *prov;
+ off_t mediasize;
+ int nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 1) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+ prov = gctl_get_ascii(req, "arg0");
+
+ if (eli_metadata_read(req, prov, &md) == -1)
+ return;
+
+ mediasize = g_get_mediasize(prov);
+ if (md.md_provsize != (uint64_t)mediasize) {
+ gctl_error(req, "Provider size mismatch.");
+ return;
+ }
+
+ if (eli_genkey(req, &md, key, false) == NULL) {
+ bzero(key, sizeof(key));
+ return;
+ }
+
+ gctl_ro_param(req, "key", sizeof(key), key);
+ if (gctl_issue(req) == NULL) {
+ if (verbose)
+ printf("Attached to %s.\n", prov);
+ }
+ bzero(key, sizeof(key));
+}
+
+static void
+eli_configure_detached(struct gctl_req *req, const char *prov, int boot,
+ int geliboot, int displaypass, int trim)
+{
+ struct g_eli_metadata md;
+ bool changed = 0;
+
+ if (eli_metadata_read(req, prov, &md) == -1)
+ return;
+
+ if (boot == 1 && (md.md_flags & G_ELI_FLAG_BOOT)) {
+ if (verbose)
+ printf("BOOT flag already configured for %s.\n", prov);
+ } else if (boot == 0 && !(md.md_flags & G_ELI_FLAG_BOOT)) {
+ if (verbose)
+ printf("BOOT flag not configured for %s.\n", prov);
+ } else if (boot >= 0) {
+ if (boot)
+ md.md_flags |= G_ELI_FLAG_BOOT;
+ else
+ md.md_flags &= ~G_ELI_FLAG_BOOT;
+ changed = 1;
+ }
+
+ if (geliboot == 1 && (md.md_flags & G_ELI_FLAG_GELIBOOT)) {
+ if (verbose)
+ printf("GELIBOOT flag already configured for %s.\n", prov);
+ } else if (geliboot == 0 && !(md.md_flags & G_ELI_FLAG_GELIBOOT)) {
+ if (verbose)
+ printf("GELIBOOT flag not configured for %s.\n", prov);
+ } else if (geliboot >= 0) {
+ if (geliboot)
+ md.md_flags |= G_ELI_FLAG_GELIBOOT;
+ else
+ md.md_flags &= ~G_ELI_FLAG_GELIBOOT;
+ changed = 1;
+ }
+
+ if (displaypass == 1 && (md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS)) {
+ if (verbose)
+ printf("GELIDISPLAYPASS flag already configured for %s.\n", prov);
+ } else if (displaypass == 0 &&
+ !(md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS)) {
+ if (verbose)
+ printf("GELIDISPLAYPASS flag not configured for %s.\n", prov);
+ } else if (displaypass >= 0) {
+ if (displaypass)
+ md.md_flags |= G_ELI_FLAG_GELIDISPLAYPASS;
+ else
+ md.md_flags &= ~G_ELI_FLAG_GELIDISPLAYPASS;
+ changed = 1;
+ }
+
+ if (trim == 0 && (md.md_flags & G_ELI_FLAG_NODELETE)) {
+ if (verbose)
+ printf("TRIM disable flag already configured for %s.\n", prov);
+ } else if (trim == 1 && !(md.md_flags & G_ELI_FLAG_NODELETE)) {
+ if (verbose)
+ printf("TRIM disable flag not configured for %s.\n", prov);
+ } else if (trim >= 0) {
+ if (trim)
+ md.md_flags &= ~G_ELI_FLAG_NODELETE;
+ else
+ md.md_flags |= G_ELI_FLAG_NODELETE;
+ changed = 1;
+ }
+
+ if (changed)
+ eli_metadata_store(req, prov, &md);
+ bzero(&md, sizeof(md));
+}
+
+static void
+eli_configure(struct gctl_req *req)
+{
+ const char *prov;
+ bool boot, noboot, geliboot, nogeliboot, displaypass, nodisplaypass;
+ bool trim, notrim;
+ int doboot, dogeliboot, dodisplaypass, dotrim;
+ int i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs == 0) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ boot = gctl_get_int(req, "boot");
+ noboot = gctl_get_int(req, "noboot");
+ geliboot = gctl_get_int(req, "geliboot");
+ nogeliboot = gctl_get_int(req, "nogeliboot");
+ displaypass = gctl_get_int(req, "displaypass");
+ nodisplaypass = gctl_get_int(req, "nodisplaypass");
+ trim = gctl_get_int(req, "trim");
+ notrim = gctl_get_int(req, "notrim");
+
+ doboot = -1;
+ if (boot && noboot) {
+ gctl_error(req, "Options -b and -B are mutually exclusive.");
+ return;
+ }
+ if (boot)
+ doboot = 1;
+ else if (noboot)
+ doboot = 0;
+
+ dogeliboot = -1;
+ if (geliboot && nogeliboot) {
+ gctl_error(req, "Options -g and -G are mutually exclusive.");
+ return;
+ }
+ if (geliboot)
+ dogeliboot = 1;
+ else if (nogeliboot)
+ dogeliboot = 0;
+
+ dodisplaypass = -1;
+ if (displaypass && nodisplaypass) {
+ gctl_error(req, "Options -d and -D are mutually exclusive.");
+ return;
+ }
+ if (displaypass)
+ dodisplaypass = 1;
+ else if (nodisplaypass)
+ dodisplaypass = 0;
+
+ dotrim = -1;
+ if (trim && notrim) {
+ gctl_error(req, "Options -t and -T are mutually exclusive.");
+ return;
+ }
+ if (trim)
+ dotrim = 1;
+ else if (notrim)
+ dotrim = 0;
+
+ if (doboot == -1 && dogeliboot == -1 && dodisplaypass == -1 &&
+ dotrim == -1) {
+ gctl_error(req, "No option given.");
+ return;
+ }
+
+ /* First attached providers. */
+ gctl_issue(req);
+ /* Now the rest. */
+ for (i = 0; i < nargs; i++) {
+ prov = gctl_get_ascii(req, "arg%d", i);
+ if (!eli_is_attached(prov)) {
+ eli_configure_detached(req, prov, doboot, dogeliboot,
+ dodisplaypass, dotrim);
+ }
+ }
+}
+
+static void
+eli_setkey_attached(struct gctl_req *req, struct g_eli_metadata *md)
+{
+ unsigned char key[G_ELI_USERKEYLEN];
+ intmax_t val, old = 0;
+ int error;
+
+ val = gctl_get_intmax(req, "iterations");
+ /* Check if iterations number should be changed. */
+ if (val != -1)
+ md->md_iterations = val;
+ else
+ old = md->md_iterations;
+
+ /* Generate key for Master Key encryption. */
+ if (eli_genkey(req, md, key, true) == NULL) {
+ bzero(key, sizeof(key));
+ return;
+ }
+ /*
+ * If number of iterations has changed, but wasn't given as a
+ * command-line argument, update the request.
+ */
+ if (val == -1 && md->md_iterations != old) {
+ error = gctl_change_param(req, "iterations", sizeof(intmax_t),
+ &md->md_iterations);
+ assert(error == 0);
+ }
+
+ gctl_ro_param(req, "key", sizeof(key), key);
+ gctl_issue(req);
+ bzero(key, sizeof(key));
+}
+
+static void
+eli_setkey_detached(struct gctl_req *req, const char *prov,
+ struct g_eli_metadata *md)
+{
+ unsigned char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
+ unsigned char *mkeydst;
+ unsigned int nkey;
+ intmax_t val;
+ int error;
+
+ if (md->md_keys == 0) {
+ gctl_error(req, "No valid keys on %s.", prov);
+ return;
+ }
+
+ /* Generate key for Master Key decryption. */
+ if (eli_genkey(req, md, key, false) == NULL) {
+ bzero(key, sizeof(key));
+ return;
+ }
+
+ /* Decrypt Master Key. */
+ error = g_eli_mkey_decrypt_any(md, key, mkey, &nkey);
+ bzero(key, sizeof(key));
+ if (error != 0) {
+ bzero(md, sizeof(*md));
+ if (error == -1)
+ gctl_error(req, "Wrong key for %s.", prov);
+ else /* if (error > 0) */ {
+ gctl_error(req, "Cannot decrypt Master Key: %s.",
+ strerror(error));
+ }
+ return;
+ }
+ if (verbose)
+ printf("Decrypted Master Key %u.\n", nkey);
+
+ val = gctl_get_intmax(req, "keyno");
+ if (val != -1)
+ nkey = val;
+#if 0
+ else
+ ; /* Use the key number which was found during decryption. */
+#endif
+ if (nkey >= G_ELI_MAXMKEYS) {
+ gctl_error(req, "Invalid '%s' argument.", "keyno");
+ return;
+ }
+
+ val = gctl_get_intmax(req, "iterations");
+ /* Check if iterations number should and can be changed. */
+ if (val != -1 && md->md_iterations == -1) {
+ md->md_iterations = val;
+ } else if (val != -1 && val != md->md_iterations) {
+ if (bitcount32(md->md_keys) != 1) {
+ gctl_error(req, "To be able to use '-i' option, only "
+ "one key can be defined.");
+ return;
+ }
+ if (md->md_keys != (1 << nkey)) {
+ gctl_error(req, "Only already defined key can be "
+ "changed when '-i' option is used.");
+ return;
+ }
+ md->md_iterations = val;
+ }
+
+ mkeydst = md->md_mkeys + nkey * G_ELI_MKEYLEN;
+ md->md_keys |= (1 << nkey);
+
+ bcopy(mkey, mkeydst, sizeof(mkey));
+ bzero(mkey, sizeof(mkey));
+
+ /* Generate key for Master Key encryption. */
+ if (eli_genkey(req, md, key, true) == NULL) {
+ bzero(key, sizeof(key));
+ bzero(md, sizeof(*md));
+ return;
+ }
+
+ /* Encrypt the Master-Key with the new key. */
+ error = g_eli_mkey_encrypt(md->md_ealgo, key, md->md_keylen, mkeydst);
+ bzero(key, sizeof(key));
+ if (error != 0) {
+ bzero(md, sizeof(*md));
+ gctl_error(req, "Cannot encrypt Master Key: %s.",
+ strerror(error));
+ return;
+ }
+
+ /* Store metadata with fresh key. */
+ eli_metadata_store(req, prov, md);
+ bzero(md, sizeof(*md));
+}
+
+static void
+eli_setkey(struct gctl_req *req)
+{
+ struct g_eli_metadata md;
+ const char *prov;
+ int nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 1) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+ prov = gctl_get_ascii(req, "arg0");
+
+ if (eli_metadata_read(req, prov, &md) == -1)
+ return;
+
+ if (eli_is_attached(prov))
+ eli_setkey_attached(req, &md);
+ else
+ eli_setkey_detached(req, prov, &md);
+
+ if (req->error == NULL || req->error[0] == '\0') {
+ printf("Note, that the master key encrypted with old keys "
+ "and/or passphrase may still exists in a metadata backup "
+ "file.\n");
+ }
+}
+
+static void
+eli_delkey_attached(struct gctl_req *req, const char *prov __unused)
+{
+
+ gctl_issue(req);
+}
+
+static void
+eli_delkey_detached(struct gctl_req *req, const char *prov)
+{
+ struct g_eli_metadata md;
+ unsigned char *mkeydst;
+ unsigned int nkey;
+ intmax_t val;
+ bool all, force;
+
+ if (eli_metadata_read(req, prov, &md) == -1)
+ return;
+
+ all = gctl_get_int(req, "all");
+ if (all)
+ arc4random_buf(md.md_mkeys, sizeof(md.md_mkeys));
+ else {
+ force = gctl_get_int(req, "force");
+ val = gctl_get_intmax(req, "keyno");
+ if (val == -1) {
+ gctl_error(req, "Key number has to be specified.");
+ return;
+ }
+ nkey = val;
+ if (nkey >= G_ELI_MAXMKEYS) {
+ gctl_error(req, "Invalid '%s' argument.", "keyno");
+ return;
+ }
+ if (!(md.md_keys & (1 << nkey)) && !force) {
+ gctl_error(req, "Master Key %u is not set.", nkey);
+ return;
+ }
+ md.md_keys &= ~(1 << nkey);
+ if (md.md_keys == 0 && !force) {
+ gctl_error(req, "This is the last Master Key. Use '-f' "
+ "option if you really want to remove it.");
+ return;
+ }
+ mkeydst = md.md_mkeys + nkey * G_ELI_MKEYLEN;
+ arc4random_buf(mkeydst, G_ELI_MKEYLEN);
+ }
+
+ eli_metadata_store(req, prov, &md);
+ bzero(&md, sizeof(md));
+}
+
+static void
+eli_delkey(struct gctl_req *req)
+{
+ const char *prov;
+ int nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 1) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+ prov = gctl_get_ascii(req, "arg0");
+
+ if (eli_is_attached(prov))
+ eli_delkey_attached(req, prov);
+ else
+ eli_delkey_detached(req, prov);
+}
+
+static void
+eli_resume(struct gctl_req *req)
+{
+ struct g_eli_metadata md;
+ unsigned char key[G_ELI_USERKEYLEN];
+ const char *prov;
+ off_t mediasize;
+ int nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 1) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+ prov = gctl_get_ascii(req, "arg0");
+
+ if (eli_metadata_read(req, prov, &md) == -1)
+ return;
+
+ mediasize = g_get_mediasize(prov);
+ if (md.md_provsize != (uint64_t)mediasize) {
+ gctl_error(req, "Provider size mismatch.");
+ return;
+ }
+
+ if (eli_genkey(req, &md, key, false) == NULL) {
+ bzero(key, sizeof(key));
+ return;
+ }
+
+ gctl_ro_param(req, "key", sizeof(key), key);
+ if (gctl_issue(req) == NULL) {
+ if (verbose)
+ printf("Resumed %s.\n", prov);
+ }
+ bzero(key, sizeof(key));
+}
+
+static int
+eli_trash_metadata(struct gctl_req *req, const char *prov, int fd, off_t offset)
+{
+ unsigned int overwrites;
+ unsigned char *sector;
+ ssize_t size;
+ int error;
+
+ size = sizeof(overwrites);
+ if (sysctlbyname("kern.geom.eli.overwrites", &overwrites, &size,
+ NULL, 0) == -1 || overwrites == 0) {
+ overwrites = G_ELI_OVERWRITES;
+ }
+
+ size = g_sectorsize(fd);
+ if (size <= 0) {
+ gctl_error(req, "Cannot obtain provider sector size %s: %s.",
+ prov, strerror(errno));
+ return (-1);
+ }
+ sector = malloc(size);
+ if (sector == NULL) {
+ gctl_error(req, "Cannot allocate %zd bytes of memory.", size);
+ return (-1);
+ }
+
+ error = 0;
+ do {
+ arc4random_buf(sector, size);
+ if (pwrite(fd, sector, size, offset) != size) {
+ if (error == 0)
+ error = errno;
+ }
+ (void)g_flush(fd);
+ } while (--overwrites > 0);
+ free(sector);
+ if (error != 0) {
+ gctl_error(req, "Cannot trash metadata on provider %s: %s.",
+ prov, strerror(error));
+ return (-1);
+ }
+ return (0);
+}
+
+static void
+eli_kill_detached(struct gctl_req *req, const char *prov)
+{
+ off_t offset;
+ int fd;
+
+ /*
+ * NOTE: Maybe we should verify if this is geli provider first,
+ * but 'kill' command is quite critical so better don't waste
+ * the time.
+ */
+#if 0
+ error = g_metadata_read(prov, (unsigned char *)&md, sizeof(md),
+ G_ELI_MAGIC);
+ if (error != 0) {
+ gctl_error(req, "Cannot read metadata from %s: %s.", prov,
+ strerror(error));
+ return;
+ }
+#endif
+
+ fd = g_open(prov, 1);
+ if (fd == -1) {
+ gctl_error(req, "Cannot open provider %s: %s.", prov,
+ strerror(errno));
+ return;
+ }
+ offset = g_mediasize(fd) - g_sectorsize(fd);
+ if (offset <= 0) {
+ gctl_error(req,
+ "Cannot obtain media size or sector size for provider %s: %s.",
+ prov, strerror(errno));
+ (void)g_close(fd);
+ return;
+ }
+ (void)eli_trash_metadata(req, prov, fd, offset);
+ (void)g_close(fd);
+}
+
+static void
+eli_kill(struct gctl_req *req)
+{
+ const char *prov;
+ int i, nargs, all;
+
+ nargs = gctl_get_int(req, "nargs");
+ all = gctl_get_int(req, "all");
+ if (!all && nargs == 0) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ /*
+ * How '-a' option combine with a list of providers:
+ * Delete Master Keys from all attached providers:
+ * geli kill -a
+ * Delete Master Keys from all attached providers and from
+ * detached da0 and da1:
+ * geli kill -a da0 da1
+ * Delete Master Keys from (attached or detached) da0 and da1:
+ * geli kill da0 da1
+ */
+
+ /* First detached providers. */
+ for (i = 0; i < nargs; i++) {
+ prov = gctl_get_ascii(req, "arg%d", i);
+ if (!eli_is_attached(prov))
+ eli_kill_detached(req, prov);
+ }
+ /* Now attached providers. */
+ gctl_issue(req);
+}
+
+static int
+eli_backup_create(struct gctl_req *req, const char *prov, const char *file)
+{
+ unsigned char *sector;
+ ssize_t secsize;
+ int error, filefd, ret;
+
+ ret = -1;
+ filefd = -1;
+ sector = NULL;
+ secsize = 0;
+
+ secsize = g_get_sectorsize(prov);
+ if (secsize == 0) {
+ gctl_error(req, "Cannot get informations about %s: %s.", prov,
+ strerror(errno));
+ goto out;
+ }
+ sector = malloc(secsize);
+ if (sector == NULL) {
+ gctl_error(req, "Cannot allocate memory.");
+ goto out;
+ }
+ /* Read metadata from the provider. */
+ error = g_metadata_read(prov, sector, secsize, G_ELI_MAGIC);
+ if (error != 0) {
+ gctl_error(req, "Unable to read metadata from %s: %s.", prov,
+ strerror(error));
+ goto out;
+ }
+
+ filefd = open(file, O_WRONLY | O_TRUNC | O_CREAT, 0600);
+ if (filefd == -1) {
+ gctl_error(req, "Unable to open %s: %s.", file,
+ strerror(errno));
+ goto out;
+ }
+ /* Write metadata to the destination file. */
+ if (write(filefd, sector, secsize) != secsize) {
+ gctl_error(req, "Unable to write to %s: %s.", file,
+ strerror(errno));
+ (void)close(filefd);
+ (void)unlink(file);
+ goto out;
+ }
+ (void)fsync(filefd);
+ (void)close(filefd);
+ /* Success. */
+ ret = 0;
+out:
+ if (sector != NULL) {
+ bzero(sector, secsize);
+ free(sector);
+ }
+ return (ret);
+}
+
+static void
+eli_backup(struct gctl_req *req)
+{
+ const char *file, *prov;
+ int nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 2) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+ prov = gctl_get_ascii(req, "arg0");
+ file = gctl_get_ascii(req, "arg1");
+
+ eli_backup_create(req, prov, file);
+}
+
+static void
+eli_restore(struct gctl_req *req)
+{
+ struct g_eli_metadata md;
+ const char *file, *prov;
+ off_t mediasize;
+ int nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 2) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+ file = gctl_get_ascii(req, "arg0");
+ prov = gctl_get_ascii(req, "arg1");
+
+ /* Read metadata from the backup file. */
+ if (eli_metadata_read(req, file, &md) == -1)
+ return;
+ /* Obtain provider's mediasize. */
+ mediasize = g_get_mediasize(prov);
+ if (mediasize == 0) {
+ gctl_error(req, "Cannot get informations about %s: %s.", prov,
+ strerror(errno));
+ return;
+ }
+ /* Check if the provider size has changed since we did the backup. */
+ if (md.md_provsize != (uint64_t)mediasize) {
+ if (gctl_get_int(req, "force")) {
+ md.md_provsize = mediasize;
+ } else {
+ gctl_error(req, "Provider size mismatch: "
+ "wrong backup file?");
+ return;
+ }
+ }
+ /* Write metadata to the provider. */
+ (void)eli_metadata_store(req, prov, &md);
+}
+
+static void
+eli_resize(struct gctl_req *req)
+{
+ struct g_eli_metadata md;
+ const char *prov;
+ unsigned char *sector;
+ ssize_t secsize;
+ off_t mediasize, oldsize;
+ int error, nargs, provfd;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 1) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+ prov = gctl_get_ascii(req, "arg0");
+
+ provfd = -1;
+ sector = NULL;
+ secsize = 0;
+
+ provfd = g_open(prov, 1);
+ if (provfd == -1) {
+ gctl_error(req, "Cannot open %s: %s.", prov, strerror(errno));
+ goto out;
+ }
+
+ mediasize = g_mediasize(provfd);
+ secsize = g_sectorsize(provfd);
+ if (mediasize == -1 || secsize == -1) {
+ gctl_error(req, "Cannot get information about %s: %s.", prov,
+ strerror(errno));
+ goto out;
+ }
+
+ sector = malloc(secsize);
+ if (sector == NULL) {
+ gctl_error(req, "Cannot allocate memory.");
+ goto out;
+ }
+
+ oldsize = gctl_get_intmax(req, "oldsize");
+ if (oldsize < 0 || oldsize > mediasize) {
+ gctl_error(req, "Invalid oldsize: Out of range.");
+ goto out;
+ }
+ if (oldsize == mediasize) {
+ gctl_error(req, "Size hasn't changed.");
+ goto out;
+ }
+
+ /* Read metadata from the 'oldsize' offset. */
+ if (pread(provfd, sector, secsize, oldsize - secsize) != secsize) {
+ gctl_error(req, "Cannot read old metadata: %s.",
+ strerror(errno));
+ goto out;
+ }
+
+ /* Check if this sector contains geli metadata. */
+ error = eli_metadata_decode(sector, &md);
+ switch (error) {
+ case 0:
+ break;
+ case EOPNOTSUPP:
+ gctl_error(req,
+ "Provider's %s metadata version %u is too new.\n"
+ "geli: The highest supported version is %u.",
+ prov, (unsigned int)md.md_version, G_ELI_VERSION);
+ goto out;
+ case EINVAL:
+ gctl_error(req, "Inconsistent provider's %s metadata.", prov);
+ goto out;
+ default:
+ gctl_error(req,
+ "Unexpected error while decoding provider's %s metadata: %s.",
+ prov, strerror(error));
+ goto out;
+ }
+
+ /*
+ * If the old metadata doesn't have a correct provider size, refuse
+ * to resize.
+ */
+ if (md.md_provsize != (uint64_t)oldsize) {
+ gctl_error(req, "Provider size mismatch at oldsize.");
+ goto out;
+ }
+
+ /*
+ * Update the old metadata with the current provider size and write
+ * it back to the correct place on the provider.
+ */
+ md.md_provsize = mediasize;
+ /* Write metadata to the provider. */
+ (void)eli_metadata_store(req, prov, &md);
+ /* Now trash the old metadata. */
+ (void)eli_trash_metadata(req, prov, provfd, oldsize - secsize);
+out:
+ if (provfd != -1)
+ (void)g_close(provfd);
+ if (sector != NULL) {
+ bzero(sector, secsize);
+ free(sector);
+ }
+}
+
+static void
+eli_version(struct gctl_req *req)
+{
+ struct g_eli_metadata md;
+ const char *name;
+ unsigned int version;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+
+ if (nargs == 0) {
+ unsigned int kernver;
+ ssize_t size;
+
+ size = sizeof(kernver);
+ if (sysctlbyname("kern.geom.eli.version", &kernver, &size,
+ NULL, 0) == -1) {
+ warn("Unable to obtain GELI kernel version");
+ } else {
+ printf("kernel: %u\n", kernver);
+ }
+ printf("userland: %u\n", G_ELI_VERSION);
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_read(name, (unsigned char *)&md,
+ sizeof(md), G_ELI_MAGIC);
+ if (error != 0) {
+ warn("%s: Unable to read metadata: %s.", name,
+ strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ version = le32dec(&md.md_version);
+ printf("%s: %u\n", name, version);
+ }
+}
+
+static void
+eli_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_ELI_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Cannot clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+static void
+eli_dump(struct gctl_req *req)
+{
+ struct g_eli_metadata md;
+ const char *name;
+ int i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ if (eli_metadata_read(NULL, name, &md) == -1) {
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ printf("Metadata on %s:\n", name);
+ eli_metadata_dump(&md);
+ printf("\n");
+ }
+}
diff --git a/lib/geom/journal/Makefile b/lib/geom/journal/Makefile
new file mode 100644
index 000000000000..3be15a13a81e
--- /dev/null
+++ b/lib/geom/journal/Makefile
@@ -0,0 +1,13 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= journal
+SRCS+= geom_journal_ufs.c
+
+LIBADD= ufs md
+
+CFLAGS+=-I${SRCTOP}/sys
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/journal/Makefile.depend b/lib/geom/journal/Makefile.depend
new file mode 100644
index 000000000000..7c411851c451
--- /dev/null
+++ b/lib/geom/journal/Makefile.depend
@@ -0,0 +1,21 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ lib/libmd \
+ lib/libufs \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/journal/geom_journal.c b/lib/geom/journal/geom_journal.c
new file mode 100644
index 000000000000..2a174c6e5b1c
--- /dev/null
+++ b/lib/geom/journal/geom_journal.c
@@ -0,0 +1,351 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <libgeom.h>
+#include <geom/journal/g_journal.h>
+#include <core/geom.h>
+#include <misc/subr.h>
+
+#include "geom_journal.h"
+
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_JOURNAL_VERSION;
+
+static void journal_main(struct gctl_req *req, unsigned flags);
+static void journal_clear(struct gctl_req *req);
+static void journal_dump(struct gctl_req *req);
+static void journal_label(struct gctl_req *req);
+
+struct g_command class_commands[] = {
+ { "clear", G_FLAG_VERBOSE, journal_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ { "dump", 0, journal_main, G_NULL_OPTS,
+ "prov ..."
+ },
+ { "label", G_FLAG_VERBOSE, journal_main,
+ {
+ { 'c', "checksum", NULL, G_TYPE_BOOL },
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ { 'h', "hardcode", NULL, G_TYPE_BOOL },
+ { 's', "jsize", "-1", G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-cfhv] [-s jsize] dataprov [jprov]"
+ },
+ { "stop", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ { "sync", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v]"
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+static void
+journal_main(struct gctl_req *req, unsigned flags)
+{
+ const char *name;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0)
+ journal_label(req);
+ else if (strcmp(name, "clear") == 0)
+ journal_clear(req);
+ else if (strcmp(name, "dump") == 0)
+ journal_dump(req);
+ else
+ gctl_error(req, "Unknown command: %s.", name);
+}
+
+static int
+g_journal_fs_exists(const char *prov)
+{
+
+ if (g_journal_ufs_exists(prov))
+ return (1);
+#if 0
+ if (g_journal_otherfs_exists(prov))
+ return (1);
+#endif
+ return (0);
+}
+
+static int
+g_journal_fs_using_last_sector(const char *prov)
+{
+
+ if (g_journal_ufs_using_last_sector(prov))
+ return (1);
+#if 0
+ if (g_journal_otherfs_using_last_sector(prov))
+ return (1);
+#endif
+ return (0);
+}
+
+static void
+journal_label(struct gctl_req *req)
+{
+ struct g_journal_metadata md;
+ const char *data, *journal, *str;
+ u_char sector[512];
+ intmax_t jsize, msize, ssize;
+ int error, force, i, nargs, checksum, hardcode;
+
+ bzero(sector, sizeof(sector));
+ nargs = gctl_get_int(req, "nargs");
+ str = NULL; /* gcc */
+
+ strlcpy(md.md_magic, G_JOURNAL_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_JOURNAL_VERSION;
+ md.md_id = arc4random();
+ md.md_joffset = 0;
+ md.md_jid = 0;
+ md.md_flags = GJ_FLAG_CLEAN;
+ checksum = gctl_get_int(req, "checksum");
+ if (checksum)
+ md.md_flags |= GJ_FLAG_CHECKSUM;
+ force = gctl_get_int(req, "force");
+ hardcode = gctl_get_int(req, "hardcode");
+
+ if (nargs != 1 && nargs != 2) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+
+ /* Verify the given providers. */
+ for (i = 0; i < nargs; i++) {
+ str = gctl_get_ascii(req, "arg%d", i);
+ if (g_get_mediasize(str) == 0) {
+ gctl_error(req, "Invalid provider %s.", str);
+ return;
+ }
+ }
+
+ data = gctl_get_ascii(req, "arg0");
+ jsize = gctl_get_intmax(req, "jsize");
+ journal = NULL;
+ switch (nargs) {
+ case 1:
+ if (!force && g_journal_fs_exists(data)) {
+ gctl_error(req, "File system exists on %s and this "
+ "operation would destroy it.\nUse -f if you "
+ "really want to do it.", data);
+ return;
+ }
+ journal = data;
+ msize = g_get_mediasize(data);
+ ssize = g_get_sectorsize(data);
+ if (jsize == -1) {
+ /*
+ * No journal size specified. 1GB should be safe
+ * default.
+ */
+ jsize = 1073741824ULL;
+ } else {
+ if (jsize < 104857600) {
+ gctl_error(req, "Journal too small.");
+ return;
+ }
+ if ((jsize % ssize) != 0) {
+ gctl_error(req, "Invalid journal size.");
+ return;
+ }
+ }
+ if (jsize + ssize >= msize) {
+ gctl_error(req, "Provider too small for journalling. "
+ "You can try smaller jsize (default is %jd).",
+ jsize);
+ return;
+ }
+ md.md_jstart = msize - ssize - jsize;
+ md.md_jend = msize - ssize;
+ break;
+ case 2:
+ if (!force && g_journal_fs_using_last_sector(data)) {
+ gctl_error(req, "File system on %s is using the last "
+ "sector and this operation is going to overwrite "
+ "it. Use -f if you really want to do it.", data);
+ return;
+ }
+ journal = gctl_get_ascii(req, "arg1");
+ if (jsize != -1) {
+ gctl_error(req, "jsize argument is valid only for "
+ "all-in-one configuration.");
+ return;
+ }
+ msize = g_get_mediasize(journal);
+ ssize = g_get_sectorsize(journal);
+ md.md_jstart = 0;
+ md.md_jend = msize - ssize;
+ break;
+ }
+
+ if (g_get_sectorsize(data) != g_get_sectorsize(journal)) {
+ gctl_error(req, "Not equal sector sizes.");
+ return;
+ }
+
+ /*
+ * Clear last sector first, to spoil all components if device exists.
+ */
+ for (i = 0; i < nargs; i++) {
+ str = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(str, NULL);
+ if (error != 0) {
+ gctl_error(req, "Cannot clear metadata on %s: %s.", str,
+ strerror(error));
+ return;
+ }
+ }
+
+ /*
+ * Ok, store metadata.
+ */
+ for (i = 0; i < nargs; i++) {
+ switch (i) {
+ case 0:
+ str = data;
+ md.md_type = GJ_TYPE_DATA;
+ if (nargs == 1)
+ md.md_type |= GJ_TYPE_JOURNAL;
+ break;
+ case 1:
+ str = journal;
+ md.md_type = GJ_TYPE_JOURNAL;
+ break;
+ }
+ md.md_provsize = g_get_mediasize(str);
+ assert(md.md_provsize != 0);
+ if (!hardcode)
+ bzero(md.md_provider, sizeof(md.md_provider));
+ else {
+ if (strncmp(str, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ str += sizeof(_PATH_DEV) - 1;
+ strlcpy(md.md_provider, str, sizeof(md.md_provider));
+ }
+ journal_metadata_encode(&md, sector);
+ error = g_metadata_store(str, sector, sizeof(sector));
+ if (error != 0) {
+ fprintf(stderr, "Cannot store metadata on %s: %s.\n",
+ str, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata value stored on %s.\n", str);
+ }
+}
+
+static void
+journal_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_JOURNAL_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Cannot clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+static void
+journal_dump(struct gctl_req *req)
+{
+ struct g_journal_metadata md, tmpmd;
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd),
+ G_JOURNAL_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Cannot read metadata from %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (journal_metadata_decode((u_char *)&tmpmd, &md) != 0) {
+ fprintf(stderr, "MD5 hash mismatch for %s, skipping.\n",
+ name);
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ printf("Metadata on %s:\n", name);
+ journal_metadata_dump(&md);
+ printf("\n");
+ }
+}
diff --git a/lib/geom/journal/geom_journal.h b/lib/geom/journal/geom_journal.h
new file mode 100644
index 000000000000..6725fe04001a
--- /dev/null
+++ b/lib/geom/journal/geom_journal.h
@@ -0,0 +1,35 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GEOM_JOURNAL_H_
+#define _GEOM_JOURNAL_H_
+int g_journal_ufs_exists(const char *prov);
+int g_journal_ufs_using_last_sector(const char *prov);
+#endif /* !_GEOM_JOURNAL_H_ */
diff --git a/lib/geom/journal/geom_journal_ufs.c b/lib/geom/journal/geom_journal_ufs.c
new file mode 100644
index 000000000000..324be1748261
--- /dev/null
+++ b/lib/geom/journal/geom_journal_ufs.c
@@ -0,0 +1,80 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/disklabel.h>
+#include <sys/mount.h>
+
+#include <ufs/ufs/dinode.h>
+#include <ufs/ffs/fs.h>
+
+#include <libufs.h>
+#include <libgeom.h>
+#include <core/geom.h>
+#include <misc/subr.h>
+
+#include "geom_journal.h"
+
+static struct fs *
+read_superblock(const char *prov)
+{
+ static struct uufsd disk;
+ struct fs *fs;
+
+ if (ufs_disk_fillout(&disk, prov) == -1)
+ return (NULL);
+ fs = &disk.d_fs;
+ ufs_disk_close(&disk);
+ return (fs);
+}
+
+int
+g_journal_ufs_exists(const char *prov)
+{
+
+ return (read_superblock(prov) != NULL);
+}
+
+int
+g_journal_ufs_using_last_sector(const char *prov)
+{
+ struct fs *fs;
+ off_t psize, fssize;
+
+ fs = read_superblock(prov);
+ if (fs == NULL)
+ return (0);
+ /* Provider size in 512 bytes blocks. */
+ psize = g_get_mediasize(prov) / DEV_BSIZE;
+ /* File system size in 512 bytes blocks. */
+ fssize = fsbtodb(fs, fs->fs_size);
+ return (psize <= fssize);
+}
diff --git a/lib/geom/journal/gjournal.8 b/lib/geom/journal/gjournal.8
new file mode 100644
index 000000000000..6eb8cde821ae
--- /dev/null
+++ b/lib/geom/journal/gjournal.8
@@ -0,0 +1,346 @@
+.\" Copyright (c) 2006-2009 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd February 17, 2009
+.Dt GJOURNAL 8
+.Os
+.Sh NAME
+.Nm gjournal
+.Nd "control utility for journaled devices"
+.Sh SYNOPSIS
+.Nm
+.Cm label
+.Op Fl cfhv
+.Op Fl s Ar jsize
+.Ar dataprov
+.Op Ar jprov
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm sync
+.Op Fl v
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm dump
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for journal configuration on the given GEOM provider.
+The Journal and data may be stored on the same provider or on two separate
+providers.
+This is block level journaling, not file system level journaling, which means
+everything gets logged, e.g.\& for file systems, it journals both data and
+metadata.
+The
+.Nm
+GEOM class can talk to file systems, which allows the use of
+.Nm
+for file system journaling and to keep file systems in a consistent state.
+At this time, only UFS file system is supported.
+.Pp
+To configure journaling on the UFS file system using
+.Nm ,
+one should first create a
+.Nm
+provider using the
+.Nm
+utility, then run
+.Xr newfs 8
+or
+.Xr tunefs 8
+on it with the
+.Fl J
+flag which instructs UFS to cooperate with the
+.Nm
+provider below.
+There are important differences in how journaled UFS works.
+The most important one is that
+.Xr sync 2
+and
+.Xr fsync 2
+system calls do not work as expected anymore.
+To ensure that data is stored on the data provider, the
+.Nm Cm sync
+command should be used after calling
+.Xr sync 2 .
+For the best performance possible, soft-updates should be disabled when
+.Nm
+is used.
+It is also safe and recommended to use the
+.Cm async
+.Xr mount 8
+option.
+.Pp
+When
+.Nm
+is configured on top of
+.Xr gmirror 8
+or
+.Xr graid3 8
+providers, it also keeps them in a consistent state, thus
+automatic synchronization on power failure or system crash may be disabled
+on those providers.
+.Pp
+The
+.Nm
+utility uses on-disk metadata, stored in the provider's last sector,
+to store all needed information.
+This could be a problem when an existing file system is converted to use
+.Nm .
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm status"
+.It Cm label
+Configures
+.Nm
+on the given provider(s).
+If only one provider is given, both data and journal are stored on the same
+provider.
+If two providers are given, the first one will be used as data provider and the
+second will be used as the journal provider.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl s Ar jsize"
+.It Fl c
+Checksum journal records.
+.It Fl f
+May be used to convert an existing file system to use
+.Nm ,
+but only if the journal will be configured on a separate provider and if the
+last sector in the data provider is not used by the existing file system.
+If
+.Nm
+detects that the last sector is used, it will refuse to overwrite it
+and return an error.
+This behavior may be forced by using the
+.Fl f
+flag, which will force
+.Nm
+to overwrite the last sector.
+.It Fl h
+Hardcode provider names in metadata.
+.It Fl s Ar jsize
+Specifies size of the journal if only one provider is used for both data and
+journal.
+The default is one gigabyte.
+Size should be chosen based on provider's load, and not on its size;
+recommended minimum is twice the size of the physical memory installed.
+It is not recommended to use
+.Nm
+for small file systems (e.g.: only few gigabytes big).
+.El
+.It Cm clear
+Clear metadata on the given providers.
+.It Cm stop
+Stop the given provider.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Stop the given provider even if it is opened.
+.El
+.It Cm sync
+Trigger journal switch and enforce sending data to the data provider.
+.It Cm dump
+Dump metadata stored on the given providers.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl v"
+.It Fl v
+Be more verbose.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+Create a
+.Nm
+based UFS file system and mount it:
+.Bd -literal -offset indent
+gjournal load
+gjournal label da0
+newfs -J /dev/da0.journal
+mount -o async /dev/da0.journal /mnt
+.Ed
+.Pp
+Configure journaling on an existing file system, but only if
+.Nm
+allows this (i.e., if the last sector is not already used by the file system):
+.Bd -literal -offset indent
+umount /dev/da0s1d
+gjournal label da0s1d da0s1e && \e
+ tunefs -J enable -n disable da0s1d.journal && \e
+ mount -o async /dev/da0s1d.journal /mnt || \e
+ mount /dev/da0s1d /mnt
+.Ed
+.Sh SYSCTLS
+Gjournal adds the sysctl level kern.geom.journal.
+The string and integer information available is detailed below.
+The changeable column shows whether a process with appropriate privilege may
+change the value.
+.Bl -column "accept_immediatelyXXXXXX" integerXXX -offset indent
+.It Sy "sysctl name Type Changeable"
+.It "debug integer yes"
+.It "switch_time integer yes"
+.It "force_switch integer yes"
+.It "parallel_flushes integer yes"
+.It "accept_immediately integer yes"
+.It "parallel_copies integer yes"
+.It "record_entries integer yes"
+.It "optimize integer yes"
+.El
+.Bl -tag -width 6n
+.It Li debug
+Setting a non-zero value enables debugging at various levels.
+Debug level 1 will record actions at a journal level, relating to journal
+switches, metadata updates, etc.
+Debug level 2 will record actions at a higher level, relating to the numbers of
+entries in journals, access requests, etc.
+Debug level 3 will record verbose detail, including insertion of I/Os to the
+journal.
+.It Li switch_time
+The maximum number of seconds a journal is allowed to remain open before
+switching to a new journal.
+.It Li force_switch
+Force a journal switch when the journal uses more than N% of the free journal
+space.
+.It Li parallel_flushes
+The number of flush I/O requests to be sent in parallel when flushing the
+journal to the data provider.
+.It Li accept_immediately
+The maximum number of I/O requests accepted at the same time.
+.It Li parallel_copies
+The number of copy I/O requests to send in parallel.
+.It Li record_entries
+The maximum number of record entries to allow in a single journal.
+.It Li optimize
+Controls whether entries in a journal will be optimized by combining overlapping
+I/Os into a single I/O and reordering the entries in a journal.
+This can be disabled by setting the sysctl to 0.
+.El
+.Ss cache
+The string and integer information available for the cache level
+is detailed below.
+The changeable column shows whether a process with appropriate
+privilege may change the value.
+.Bl -column "alloc_failuresXXXXXX" integerXXX -offset indent
+.It Sy "sysctl name Type Changeable"
+.It "used integer no"
+.It "limit integer yes"
+.It "divisor integer no"
+.It "switch integer yes"
+.It "misses integer yes"
+.It "alloc_failures integer yes"
+.El
+.Bl -tag -width 6n
+.It Li used
+The number of bytes currently allocated to the cache.
+.It Li limit
+The maximum number of bytes to be allocated to the cache.
+.It Li divisor
+Sets the cache size to be used as a proportion of kmem_size.
+A value of 2 (the default) will cause the cache size to be set to 1/2 of the
+kmem_size.
+.It Li switch
+Force a journal switch when this percentage of cache has been used.
+.It Li misses
+The number of cache misses, when data has been read, but was not found in the
+cache.
+.It Li alloc_failures
+The number of times memory failed to be allocated to the cache because the cache
+limit was hit.
+.El
+.Ss stats
+The string and integer information available for the statistics level
+is detailed below.
+The changeable column shows whether a process with appropriate
+privilege may change the value.
+.Bl -column "skipped_bytesXXXXXX" integerXXX -offset indent
+.It Sy "sysctl name Type Changeable"
+.It "skipped_bytes integer yes"
+.It "combined_ios integer yes"
+.It "switches integer yes"
+.It "wait_for_copy integer yes"
+.It "journal_full integer yes"
+.It "low_mem integer yes"
+.El
+.Bl -tag -width 6n
+.It Li skipped_bytes
+The number of bytes skipped.
+.It Li combined_ios
+The number of I/Os which were combined by journal optimization.
+.It Li switches
+The number of journal switches.
+.It Li wait_for_copy
+The number of times the journal switch process had to wait for the previous
+journal copy to complete.
+.It Li journal_full
+The number of times the journal was almost full, forcing a journal switch.
+.It Li low_mem
+The number of times the low_mem hook was called.
+.El
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr geom 8 ,
+.Xr mount 8 ,
+.Xr newfs 8 ,
+.Xr tunefs 8 ,
+.Xr umount 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 7.0 .
+.Sh AUTHORS
+.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org
diff --git a/lib/geom/label/Makefile b/lib/geom/label/Makefile
new file mode 100644
index 000000000000..767924b78b61
--- /dev/null
+++ b/lib/geom/label/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= label
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/label/Makefile.depend b/lib/geom/label/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/label/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/label/geom_label.c b/lib/geom/label/geom_label.c
new file mode 100644
index 000000000000..f51e87ecb57d
--- /dev/null
+++ b/lib/geom/label/geom_label.c
@@ -0,0 +1,260 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <libgeom.h>
+#include <geom/label/g_label.h>
+
+#include "core/geom.h"
+#include "misc/subr.h"
+
+#ifdef STATIC_GEOM_CLASSES
+#define PUBSYM(x) glabel_##x
+#else
+#define PUBSYM(x) x
+#endif
+
+uint32_t PUBSYM(lib_version) = G_LIB_VERSION;
+uint32_t PUBSYM(version) = G_LABEL_VERSION;
+
+static void label_main(struct gctl_req *req, unsigned flags);
+static void label_clear(struct gctl_req *req);
+static void label_dump(struct gctl_req *req);
+static void label_label(struct gctl_req *req);
+static void label_refresh(struct gctl_req *req);
+
+struct g_command PUBSYM(class_commands)[] = {
+ { "clear", G_FLAG_VERBOSE, label_main, G_NULL_OPTS,
+ "[-v] dev ..."
+ },
+ { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, G_NULL_OPTS,
+ "[-v] name dev"
+ },
+ { "destroy", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ { "dump", 0, label_main, G_NULL_OPTS,
+ "dev ..."
+ },
+ { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, label_main, G_NULL_OPTS,
+ "[-v] name dev"
+ },
+ { "refresh", 0, label_main, G_NULL_OPTS,
+ "dev ..."
+ },
+ { "stop", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+static void
+label_main(struct gctl_req *req, unsigned flags)
+{
+ const char *name;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0)
+ label_label(req);
+ else if (strcmp(name, "clear") == 0)
+ label_clear(req);
+ else if (strcmp(name, "dump") == 0)
+ label_dump(req);
+ else if (strcmp(name, "refresh") == 0)
+ label_refresh(req);
+ else
+ gctl_error(req, "Unknown command: %s.", name);
+}
+
+static void
+label_label(struct gctl_req *req)
+{
+ struct g_label_metadata md;
+ const char *name, *label;
+ u_char sector[512];
+ int error, nargs;
+
+ bzero(sector, sizeof(sector));
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 2) {
+ gctl_error(req, "Invalid number of arguments.");
+ return;
+ }
+
+ /*
+ * Clear last sector first to spoil all components if device exists.
+ */
+ name = gctl_get_ascii(req, "arg1");
+ error = g_metadata_clear(name, NULL);
+ if (error != 0) {
+ gctl_error(req, "Can't store metadata on %s: %s.", name,
+ strerror(error));
+ return;
+ }
+
+ strlcpy(md.md_magic, G_LABEL_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_LABEL_VERSION;
+ label = gctl_get_ascii(req, "arg0");
+ bzero(md.md_label, sizeof(md.md_label));
+ strlcpy(md.md_label, label, sizeof(md.md_label));
+ md.md_provsize = g_get_mediasize(name);
+ if (md.md_provsize == 0) {
+ gctl_error(req, "Can't get mediasize of %s: %s.", name,
+ strerror(errno));
+ return;
+ }
+
+ /*
+ * Ok, store metadata.
+ */
+ label_metadata_encode(&md, sector);
+ error = g_metadata_store(name, sector, sizeof(sector));
+ if (error != 0) {
+ fprintf(stderr, "Can't store metadata on %s: %s.\n", name,
+ strerror(error));
+ gctl_error(req, "Not done.");
+ }
+ if (verbose)
+ printf("Metadata value stored on %s.\n", name);
+}
+
+static void
+label_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_LABEL_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+static void
+label_metadata_dump(const struct g_label_metadata *md)
+{
+
+ printf(" Magic string: %s\n", md->md_magic);
+ printf("Metadata version: %u\n", (u_int)md->md_version);
+ printf(" Label: %s\n", md->md_label);
+}
+
+static void
+label_dump(struct gctl_req *req)
+{
+ struct g_label_metadata md, tmpmd;
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd),
+ G_LABEL_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't read metadata from %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ label_metadata_decode((u_char *)&tmpmd, &md);
+ printf("Metadata on %s:\n", name);
+ label_metadata_dump(&md);
+ printf("\n");
+ }
+}
+
+static void
+label_refresh(struct gctl_req *req)
+{
+ const char *name;
+ int i, nargs, fd;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ fd = g_open(name, 1);
+ if (fd == -1) {
+ printf("Can't refresh metadata from %s: %s.\n",
+ name, strerror(errno));
+ } else {
+ printf("Metadata from %s refreshed.\n", name);
+ (void)g_close(fd);
+ }
+ }
+}
diff --git a/lib/geom/label/glabel.8 b/lib/geom/label/glabel.8
new file mode 100644
index 000000000000..c426a06ea355
--- /dev/null
+++ b/lib/geom/label/glabel.8
@@ -0,0 +1,280 @@
+.\" Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 12, 2017
+.Dt GLABEL 8
+.Os
+.Sh NAME
+.Nm glabel
+.Nd "disk labelization control utility"
+.Sh SYNOPSIS
+.Nm
+.Cm create
+.Op Fl v
+.Ar name
+.Ar dev
+.Nm
+.Cm destroy
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm label
+.Op Fl v
+.Ar name
+.Ar dev
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm clear
+.Op Fl v
+.Ar dev ...
+.Nm
+.Cm dump
+.Ar dev ...
+.Nm
+.Cm refresh
+.Ar dev ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for GEOM provider labelization.
+A label can be set up on a GEOM provider in two ways:
+.Dq manual
+or
+.Dq automatic .
+When using the
+.Dq manual
+method, no metadata are stored on the devices, so a label has to be configured
+by hand every time it is needed.
+The
+.Dq automatic
+method uses on-disk metadata to store the label and detect it automatically in
+the future.
+.Pp
+This GEOM class also provides volume label detection for file systems.
+Those labels cannot be set with
+.Nm ,
+but must be set with the appropriate file system utility, e.g.\& for UFS
+the file system label is set with
+.Xr tunefs 8 .
+Currently supported file systems are:
+.Pp
+.Bl -bullet -offset indent -compact
+.It
+UFS1 volume names (directory
+.Pa /dev/ufs/ ) .
+.It
+UFS2 volume names (directory
+.Pa /dev/ufs/ ) .
+.It
+UFS1 file system IDs (directory
+.Pa /dev/ufsid/ ) .
+.It
+UFS2 file system IDs (directory
+.Pa /dev/ufsid/ ) .
+.It
+MSDOSFS (FAT12, FAT16, FAT32) (directory
+.Pa /dev/msdosfs/ ) .
+.It
+CD ISO9660 (directory
+.Pa /dev/iso9660/ ) .
+.It
+EXT2FS (directory
+.Pa /dev/ext2fs/ ) .
+.It
+REISERFS (directory
+.Pa /dev/reiserfs/ ) .
+.It
+NTFS (directory
+.Pa /dev/ntfs/ ) .
+.El
+.Pp
+Support for partition metadata is implemented for:
+.Pp
+.Bl -bullet -offset indent -compact
+.It
+GPT labels (directory
+.Pa /dev/gpt/ ) .
+.It
+GPT UUIDs (directory
+.Pa /dev/gptid/ ) .
+.El
+.Pp
+Generic disk ID strings are exported as labels in the format
+.Pa /dev/diskid/GEOM_CLASS-ident
+e.g.
+.Pa /dev/diskid/DISK-6QG3Z026 .
+.Pp
+Generic labels created and managed solely by
+.Xr glabel 8
+are created in the
+.Pa /dev/label/
+directory.
+.Pp
+Note that for all label types, nested GEOM classes will cause additional
+device nodes to be created, with context-specific data appended to their
+names. E.g. for every node like
+.Pa /dev/label/bigdisk
+there will be additional entries for any partitions which the device
+contains, like
+.Pa /dev/label/bigdiskp1
+and
+.Pa /dev/label/bigdiskp1a .
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm destroy"
+.It Cm create
+Create temporary label
+.Ar name
+for the given provider.
+This is the
+.Dq manual
+method.
+The kernel module
+.Pa geom_label.ko
+will be loaded if it is not loaded already.
+.It Cm label
+Set up a label
+.Ar name
+for the given provider.
+This is the
+.Dq automatic
+method, where metadata is stored in a provider's last sector.
+The kernel module
+.Pa geom_label.ko
+will be loaded if it is not loaded already.
+.It Cm stop
+Turn off the given label by its
+.Ar name .
+This command does not touch on-disk metadata!
+.It Cm destroy
+Same as
+.Cm stop .
+.It Cm clear
+Clear metadata on the given devices.
+.It Cm dump
+Dump metadata stored on the given devices.
+.It Cm refresh
+Refresh / rediscover metadata from the given devices.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options:
+.Bl -tag -width indent
+.It Fl f
+Force the removal of the specified labels.
+.It Fl v
+Be more verbose.
+.El
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to control the behavior of the
+.Nm LABEL
+GEOM class.
+The default value is shown next to each variable.
+.Bl -tag -width indent
+.It Va kern.geom.label.debug : No 0
+Debug level of the
+.Nm LABEL
+GEOM class.
+This can be set to a number between 0 and 2 inclusive.
+If set to 0 minimal debug information is printed, and if set to 2 the
+maximum amount of debug information is printed.
+.El
+.Bl -tag -width indent
+.It Va kern.geom.label.*.enable : No 1
+Most
+.Nm LABEL
+providers implement a
+.Xr sysctl 8
+flag and a tunable variable named in the above format. This flag
+controls if the label provider will be active, tasting devices
+and creating label nodes in the
+.Xr devfs 5
+tree. It is sometimes desirable to disable certain label types if
+they conflict with other classes in complex GEOM topologies.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+The following example shows how to set up a label for disk
+.Dq Li da2 ,
+create a file system on it, and mount it:
+.Bd -literal -offset indent
+glabel label -v usr /dev/da2
+newfs /dev/label/usr
+mount /dev/label/usr /usr
+[...]
+umount /usr
+glabel stop usr
+glabel unload
+.Ed
+.Pp
+The next example shows how to set up a label for a UFS file system:
+.Bd -literal -offset indent
+tunefs -L data /dev/da4s1a
+mount /dev/ufs/data /mnt/data
+.Ed
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr loader.conf 5 ,
+.Xr geom 8 ,
+.Xr mount 8 ,
+.Xr newfs 8 ,
+.Xr sysctl 8 ,
+.Xr tunefs 8 ,
+.Xr umount 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 5.3 .
+.Sh AUTHORS
+.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org
diff --git a/lib/geom/mirror/Makefile b/lib/geom/mirror/Makefile
new file mode 100644
index 000000000000..553e44787d8b
--- /dev/null
+++ b/lib/geom/mirror/Makefile
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= mirror
+
+LIBADD= md
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/mirror/Makefile.depend b/lib/geom/mirror/Makefile.depend
new file mode 100644
index 000000000000..7902e1927044
--- /dev/null
+++ b/lib/geom/mirror/Makefile.depend
@@ -0,0 +1,20 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ lib/libmd \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/mirror/geom_mirror.c b/lib/geom/mirror/geom_mirror.c
new file mode 100644
index 000000000000..a1b399338814
--- /dev/null
+++ b/lib/geom/mirror/geom_mirror.c
@@ -0,0 +1,500 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2004-2009 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <err.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <libgeom.h>
+#include <geom/mirror/g_mirror.h>
+#include <core/geom.h>
+#include <misc/subr.h>
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_MIRROR_VERSION;
+
+#define GMIRROR_BALANCE "load"
+#define GMIRROR_SLICE "4096"
+#define GMIRROR_PRIORITY "0"
+
+static void mirror_main(struct gctl_req *req, unsigned flags);
+static void mirror_activate(struct gctl_req *req);
+static void mirror_clear(struct gctl_req *req);
+static void mirror_dump(struct gctl_req *req);
+static void mirror_label(struct gctl_req *req);
+static void mirror_resize(struct gctl_req *req, unsigned flags);
+
+struct g_command class_commands[] = {
+ { "activate", G_FLAG_VERBOSE, mirror_main, G_NULL_OPTS,
+ "[-v] name prov ..."
+ },
+ { "clear", G_FLAG_VERBOSE, mirror_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ { "configure", G_FLAG_VERBOSE, NULL,
+ {
+ { 'a', "autosync", NULL, G_TYPE_BOOL },
+ { 'b', "balance", "", G_TYPE_STRING },
+ { 'd', "dynamic", NULL, G_TYPE_BOOL },
+ { 'f', "failsync", NULL, G_TYPE_BOOL },
+ { 'F', "nofailsync", NULL, G_TYPE_BOOL },
+ { 'h', "hardcode", NULL, G_TYPE_BOOL },
+ { 'n', "noautosync", NULL, G_TYPE_BOOL },
+ { 'p', "priority", "-1", G_TYPE_NUMBER },
+ { 's', "slice", "-1", G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-adfFhnv] [-b balance] [-s slice] name\n"
+ "[-v] -p priority name prov"
+ },
+ { "create", G_FLAG_VERBOSE, NULL,
+ {
+ { 'b', "balance", GMIRROR_BALANCE, G_TYPE_STRING },
+ { 'F', "nofailsync", NULL, G_TYPE_BOOL },
+ { 'n', "noautosync", NULL, G_TYPE_BOOL },
+ { 's', "slice", GMIRROR_SLICE, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-Fnv] [-b balance] [-s slice] name prov ..."
+ },
+ { "deactivate", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov ..."
+ },
+ { "destroy", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ { "dump", 0, mirror_main, G_NULL_OPTS,
+ "prov ..."
+ },
+ { "forget", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "name ..."
+ },
+ { "label", G_FLAG_VERBOSE, mirror_main,
+ {
+ { 'b', "balance", GMIRROR_BALANCE, G_TYPE_STRING },
+ { 'F', "nofailsync", NULL, G_TYPE_BOOL },
+ { 'h', "hardcode", NULL, G_TYPE_BOOL },
+ { 'n', "noautosync", NULL, G_TYPE_BOOL },
+ { 's', "slice", GMIRROR_SLICE, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-Fhnv] [-b balance] [-s slice] name prov ..."
+ },
+ { "insert", G_FLAG_VERBOSE, NULL,
+ {
+ { 'h', "hardcode", NULL, G_TYPE_BOOL },
+ { 'i', "inactive", NULL, G_TYPE_BOOL },
+ { 'p', "priority", GMIRROR_PRIORITY, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-hiv] [-p priority] name prov ..."
+ },
+ { "rebuild", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov ..."
+ },
+ { "remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov ..."
+ },
+ { "resize", G_FLAG_VERBOSE, mirror_resize,
+ {
+ { 's', "size", "*", G_TYPE_STRING },
+ G_OPT_SENTINEL
+ },
+ "[-s size] [-v] name"
+ },
+ { "stop", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+static void
+mirror_main(struct gctl_req *req, unsigned flags)
+{
+ const char *name;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0)
+ mirror_label(req);
+ else if (strcmp(name, "clear") == 0)
+ mirror_clear(req);
+ else if (strcmp(name, "dump") == 0)
+ mirror_dump(req);
+ else if (strcmp(name, "activate") == 0)
+ mirror_activate(req);
+ else
+ gctl_error(req, "Unknown command: %s.", name);
+}
+
+static void
+mirror_label(struct gctl_req *req)
+{
+ struct g_mirror_metadata md;
+ u_char sector[512];
+ const char *str;
+ unsigned sectorsize;
+ off_t mediasize;
+ intmax_t val;
+ int error, i, nargs, bal, hardcode;
+
+ bzero(sector, sizeof(sector));
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 2) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ strlcpy(md.md_magic, G_MIRROR_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_MIRROR_VERSION;
+ str = gctl_get_ascii(req, "arg0");
+ strlcpy(md.md_name, str, sizeof(md.md_name));
+ md.md_mid = arc4random();
+ md.md_all = nargs - 1;
+ md.md_mflags = 0;
+ md.md_dflags = 0;
+ md.md_genid = 0;
+ md.md_syncid = 1;
+ md.md_sync_offset = 0;
+ val = gctl_get_intmax(req, "slice");
+ md.md_slice = val;
+ str = gctl_get_ascii(req, "balance");
+ bal = balance_id(str);
+ if (bal == -1) {
+ gctl_error(req, "Invalid balance algorithm.");
+ return;
+ }
+ md.md_balance = bal;
+ if (gctl_get_int(req, "noautosync"))
+ md.md_mflags |= G_MIRROR_DEVICE_FLAG_NOAUTOSYNC;
+ if (gctl_get_int(req, "nofailsync"))
+ md.md_mflags |= G_MIRROR_DEVICE_FLAG_NOFAILSYNC;
+ hardcode = gctl_get_int(req, "hardcode");
+
+ /*
+ * Calculate sectorsize by finding least common multiple from
+ * sectorsizes of every disk and find the smallest mediasize.
+ */
+ mediasize = 0;
+ sectorsize = 0;
+ for (i = 1; i < nargs; i++) {
+ unsigned ssize;
+ off_t msize;
+
+ str = gctl_get_ascii(req, "arg%d", i);
+ msize = g_get_mediasize(str);
+ ssize = g_get_sectorsize(str);
+ if (msize == 0 || ssize == 0) {
+ gctl_error(req, "Can't get informations about %s: %s.",
+ str, strerror(errno));
+ return;
+ }
+ msize -= ssize;
+ if (mediasize == 0 || (mediasize > 0 && msize < mediasize))
+ mediasize = msize;
+ if (sectorsize == 0)
+ sectorsize = ssize;
+ else
+ sectorsize = g_lcm(sectorsize, ssize);
+ }
+ md.md_mediasize = mediasize;
+ md.md_sectorsize = sectorsize;
+ md.md_mediasize -= (md.md_mediasize % md.md_sectorsize);
+
+ /*
+ * Clear last sector first, to spoil all components if device exists.
+ */
+ for (i = 1; i < nargs; i++) {
+ str = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(str, NULL);
+ if (error != 0) {
+ gctl_error(req, "Can't store metadata on %s: %s.", str,
+ strerror(error));
+ return;
+ }
+ }
+
+ /*
+ * Ok, store metadata (use disk number as priority).
+ */
+ for (i = 1; i < nargs; i++) {
+ str = gctl_get_ascii(req, "arg%d", i);
+ md.md_did = arc4random();
+ md.md_priority = i - 1;
+ md.md_provsize = g_get_mediasize(str);
+ assert(md.md_provsize != 0);
+ if (!hardcode)
+ bzero(md.md_provider, sizeof(md.md_provider));
+ else {
+ if (strncmp(str, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ str += sizeof(_PATH_DEV) - 1;
+ strlcpy(md.md_provider, str, sizeof(md.md_provider));
+ }
+ mirror_metadata_encode(&md, sector);
+ error = g_metadata_store(str, sector, sizeof(sector));
+ if (error != 0) {
+ fprintf(stderr, "Can't store metadata on %s: %s.\n",
+ str, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata value stored on %s.\n", str);
+ }
+}
+
+static void
+mirror_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_MIRROR_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+static void
+mirror_dump(struct gctl_req *req)
+{
+ struct g_mirror_metadata md, tmpmd;
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd),
+ G_MIRROR_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't read metadata from %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (mirror_metadata_decode((u_char *)&tmpmd, &md) != 0) {
+ fprintf(stderr, "MD5 hash mismatch for %s, skipping.\n",
+ name);
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ printf("Metadata on %s:\n", name);
+ mirror_metadata_dump(&md);
+ printf("\n");
+ }
+}
+
+static void
+mirror_activate(struct gctl_req *req)
+{
+ struct g_mirror_metadata md, tmpmd;
+ const char *name, *path;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 2) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ name = gctl_get_ascii(req, "arg0");
+
+ for (i = 1; i < nargs; i++) {
+ path = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_read(path, (u_char *)&tmpmd, sizeof(tmpmd),
+ G_MIRROR_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Cannot read metadata from %s: %s.\n",
+ path, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (mirror_metadata_decode((u_char *)&tmpmd, &md) != 0) {
+ fprintf(stderr,
+ "MD5 hash mismatch for provider %s, skipping.\n",
+ path);
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (strcmp(md.md_name, name) != 0) {
+ fprintf(stderr,
+ "Provider %s is not the mirror %s component.\n",
+ path, name);
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ md.md_dflags &= ~G_MIRROR_DISK_FLAG_INACTIVE;
+ mirror_metadata_encode(&md, (u_char *)&tmpmd);
+ error = g_metadata_store(path, (u_char *)&tmpmd, sizeof(tmpmd));
+ if (error != 0) {
+ fprintf(stderr, "Cannot write metadata from %s: %s.\n",
+ path, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Provider %s activated.\n", path);
+ }
+}
+
+static struct gclass *
+find_class(struct gmesh *mesh, const char *name)
+{
+ struct gclass *classp;
+
+ LIST_FOREACH(classp, &mesh->lg_class, lg_class) {
+ if (strcmp(classp->lg_name, name) == 0)
+ return (classp);
+ }
+ return (NULL);
+}
+
+static struct ggeom *
+find_geom(struct gclass *classp, const char *name)
+{
+ struct ggeom *gp;
+
+ LIST_FOREACH(gp, &classp->lg_geom, lg_geom) {
+ if (strcmp(gp->lg_name, name) == 0)
+ return (gp);
+ }
+ return (NULL);
+}
+
+static void
+mirror_resize(struct gctl_req *req, unsigned flags __unused)
+{
+ struct gmesh mesh;
+ struct gclass *classp;
+ struct ggeom *gp;
+ struct gprovider *pp;
+ struct gconsumer *cp;
+ off_t size;
+ int error, nargs;
+ const char *name;
+ char ssize[30];
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ error = geom_gettree(&mesh);
+ if (error)
+ errc(EXIT_FAILURE, error, "Cannot get GEOM tree");
+ name = gctl_get_ascii(req, "class");
+ if (name == NULL)
+ abort();
+ classp = find_class(&mesh, name);
+ if (classp == NULL)
+ errx(EXIT_FAILURE, "Class %s not found.", name);
+ name = gctl_get_ascii(req, "arg0");
+ if (name == NULL)
+ abort();
+ gp = find_geom(classp, name);
+ if (gp == NULL)
+ errx(EXIT_FAILURE, "No such geom: %s.", name);
+ pp = LIST_FIRST(&gp->lg_provider);
+ if (pp == NULL)
+ errx(EXIT_FAILURE, "Provider of geom %s not found.", name);
+ size = pp->lg_mediasize;
+ name = gctl_get_ascii(req, "size");
+ if (name == NULL)
+ errx(EXIT_FAILURE, "The size is not specified.");
+ if (*name == '*') {
+#define CSZ(c) ((c)->lg_provider->lg_mediasize - \
+ (c)->lg_provider->lg_sectorsize)
+ /* Find the maximum possible size */
+ LIST_FOREACH(cp, &gp->lg_consumer, lg_consumer) {
+ if (CSZ(cp) > size)
+ size = CSZ(cp);
+ }
+ LIST_FOREACH(cp, &gp->lg_consumer, lg_consumer) {
+ if (CSZ(cp) < size)
+ size = CSZ(cp);
+ }
+#undef CSZ
+ if (size == pp->lg_mediasize)
+ errx(EXIT_FAILURE,
+ "Cannot expand provider %s\n",
+ pp->lg_name);
+ } else {
+ error = g_parse_lba(name, pp->lg_sectorsize, &size);
+ if (error)
+ errc(EXIT_FAILURE, error, "Invalid size param");
+ size *= pp->lg_sectorsize;
+ }
+ snprintf(ssize, sizeof(ssize), "%ju", (uintmax_t)size);
+ gctl_change_param(req, "size", -1, ssize);
+ geom_deletetree(&mesh);
+ gctl_issue(req);
+}
diff --git a/lib/geom/mirror/gmirror.8 b/lib/geom/mirror/gmirror.8
new file mode 100644
index 000000000000..128138bbd2e4
--- /dev/null
+++ b/lib/geom/mirror/gmirror.8
@@ -0,0 +1,436 @@
+.\" Copyright (c) 2004-2009 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 30, 2017
+.Dt GMIRROR 8
+.Os
+.Sh NAME
+.Nm gmirror
+.Nd "control utility for mirrored devices"
+.Sh SYNOPSIS
+.Nm
+.Cm label
+.Op Fl Fhnv
+.Op Fl b Ar balance
+.Op Fl s Ar slice
+.Ar name
+.Ar prov ...
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm create
+.Op Fl Fnv
+.Op Fl b Ar balance
+.Op Fl s Ar slice
+.Ar name
+.Ar prov ...
+.Nm
+.Cm configure
+.Op Fl adfFhnv
+.Op Fl b Ar balance
+.Op Fl s Ar slice
+.Ar name
+.Nm
+.Cm configure
+.Op Fl v
+.Fl p Ar priority
+.Ar name
+.Ar prov
+.Nm
+.Cm rebuild
+.Op Fl v
+.Ar name
+.Ar prov ...
+.Nm
+.Cm resize
+.Op Fl v
+.Op Fl s Ar size
+.Ar name
+.Nm
+.Cm insert
+.Op Fl hiv
+.Op Fl p Ar priority
+.Ar name
+.Ar prov ...
+.Nm
+.Cm remove
+.Op Fl v
+.Ar name
+.Ar prov ...
+.Nm
+.Cm activate
+.Op Fl v
+.Ar name
+.Ar prov ...
+.Nm
+.Cm deactivate
+.Op Fl v
+.Ar name
+.Ar prov ...
+.Nm
+.Cm destroy
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm forget
+.Op Fl v
+.Ar name ...
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm dump
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for mirror (RAID1) configurations.
+After a mirror's creation, all components are detected and configured
+automatically.
+All operations like failure detection, stale component detection, rebuild
+of stale components, etc.\& are also done automatically.
+The
+.Nm
+utility uses on-disk metadata (stored in the provider's last sector) to store all needed
+information.
+Since the last sector is used for this purpose, it is possible to place a root
+file system on a mirror.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm deactivate"
+.It Cm label
+Create a mirror.
+The order of components is important, because a component's priority is based on its position
+(starting from 0 to 255).
+The component with the biggest priority is used by the
+.Cm prefer
+balance algorithm
+and is also used as a master component when resynchronization is needed,
+e.g.\& after a power failure when the device was open for writing.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl b Ar balance"
+.It Fl b Ar balance
+Specifies balance algorithm to use, one of:
+.Bl -tag -width ".Cm round-robin"
+.It Cm load
+Read from the component with the lowest load.
+This is the default balance algorithm.
+.It Cm prefer
+Read from the component with the biggest priority.
+.It Cm round-robin
+Use round-robin algorithm when choosing component to read.
+.It Cm split
+Split read requests, which are bigger than or equal to slice size on N pieces,
+where N is the number of active components.
+.El
+.It Fl F
+Do not synchronize after a power failure or system crash.
+Assumes device is in consistent state.
+.It Fl h
+Hardcode providers' names in metadata.
+.It Fl n
+Turn off autosynchronization of stale components.
+.It Fl s Ar slice
+When using the
+.Cm split
+balance algorithm and an I/O READ request is bigger than or equal to this value,
+the I/O request will be split into N pieces, where N is the number of active
+components.
+Defaults to 4096 bytes.
+.El
+.It Cm clear
+Clear metadata on the given providers.
+.It Cm create
+Similar to
+.Cm label ,
+but creates mirror without storing on-disk metadata in last sector.
+This special "manual" operation mode assumes some external control to manage
+mirror detection after reboot, device hot-plug and other external events.
+.It Cm configure
+Configure the given device.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl p Ar priority"
+.It Fl a
+Turn on autosynchronization of stale components.
+.It Fl b Ar balance
+Specifies balance algorithm to use.
+.It Fl d
+Do not hardcode providers' names in metadata.
+.It Fl f
+Synchronize device after a power failure or system crash.
+.It Fl F
+Do not synchronize after a power failure or system crash.
+Assumes device is in consistent state.
+.It Fl h
+Hardcode providers' names in metadata.
+.It Fl n
+Turn off autosynchronization of stale components.
+.It Fl p Ar priority
+Specifies priority for the given component
+.Ar prov .
+.It Fl s Ar slice
+Specifies slice size for
+.Cm split
+balance algorithm.
+.El
+.It Cm rebuild
+Rebuild the given mirror components forcibly.
+If autosynchronization was not turned off for the given device, this command
+should be unnecessary.
+.It Cm resize
+Change the size of the given mirror.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl s Ar size"
+.It Fl s Ar size
+New size of the mirror is expressed in logical block numbers.
+This option can be omitted, then it will be automatically calculated to
+maximum available size.
+.El
+.It Cm insert
+Add the given component(s) to the existing mirror.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl p Ar priority"
+.It Fl h
+Hardcode providers' names in metadata.
+.It Fl i
+Mark component(s) as inactive immediately after insertion.
+.It Fl p Ar priority
+Specifies priority of the given component(s).
+.El
+.It Cm remove
+Remove the given component(s) from the mirror and clear metadata on it.
+.It Cm activate
+Activate the given component(s), which were marked as inactive before.
+.It Cm deactivate
+Mark the given component(s) as inactive, so it will not be automatically
+connected to the mirror.
+.It Cm destroy
+Stop the given mirror and clear metadata on all its components.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Stop the given mirror even if it is opened.
+.El
+.It Cm forget
+Forget about components which are not connected.
+This command is useful when a disk has failed and cannot be reconnected, preventing the
+.Cm remove
+command from being used to remove it.
+.It Cm stop
+Stop the given mirror.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Stop the given mirror even if it is opened.
+.El
+.It Cm dump
+Dump metadata stored on the given providers.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl v"
+.It Fl v
+Be more verbose.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+Use 3 disks to setup a mirror.
+Choose split balance algorithm, split only
+requests which are bigger than or equal to 2kB.
+Create file system,
+mount it, then unmount it and stop device:
+.Bd -literal -offset indent
+gmirror label -v -b split -s 2048 data da0 da1 da2
+newfs /dev/mirror/data
+mount /dev/mirror/data /mnt
+\&...
+umount /mnt
+gmirror stop data
+gmirror unload
+.Ed
+.Pp
+Create a mirror on disk with valid data (note that the last sector of the disk
+will be overwritten).
+Add another disk to this mirror,
+so it will be synchronized with existing disk:
+.Bd -literal -offset indent
+gmirror label -v -b round-robin data da0
+gmirror insert data da1
+.Ed
+.Pp
+Create a mirror, but do not use automatic synchronization feature.
+Add another disk and rebuild it:
+.Bd -literal -offset indent
+gmirror label -v -n -b load data da0 da1
+gmirror insert data da2
+gmirror rebuild data da2
+.Ed
+.Pp
+One disk failed.
+Replace it with a brand new one:
+.Bd -literal -offset indent
+gmirror forget data
+gmirror insert data da1
+.Ed
+.Pp
+Create a mirror, deactivate one component, do the backup and connect it again.
+It will not be resynchronized, if there is no need to do so (there were no writes in
+the meantime):
+.Bd -literal -offset indent
+gmirror label data da0 da1
+gmirror deactivate data da1
+dd if=/dev/da1 of=/backup/data.img bs=1m
+gmirror activate data da1
+.Ed
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to configure behavior for all mirrors.
+.Bl -tag -width indent
+.It Va kern.geom.mirror.debug
+Control the verbosity of kernel logging related to mirrors.
+A value larger than 0 will enable debug logging.
+.It Va kern.geom.mirror.timeout
+The amount of time, in seconds, to wait for all copies of a mirror to
+appear before starting the mirror.
+Disks that appear after the mirror has been started are not automatically
+added to the mirror.
+.It Va kern.geom.mirror.idletime
+The amount of time, in seconds, which must elapse after the last write to
+a mirror before that mirror is marked clean.
+Clean mirrors do not need to be synchronized after a power failure or
+system crash.
+A small value may result in frequent overwrites of the disks' metadata
+sectors, and thus may reduce the longevity of the disks.
+.It Va kern.geom.mirror.disconnect_on_failure
+Determine whether a disk is automatically removed from its mirror when an
+I/O request to that disk fails.
+.It Va kern.geom.mirror.sync_requests
+The number of parallel I/O requests used while synchronizing a mirror.
+This parameter may only be configured as a
+.Xr loader.conf 5
+tunable.
+.It Va kern.geom.mirror.sync_update_period
+The period, in seconds, at which a synchronizing mirror's metadata is
+updated.
+Periodic updates are used to record a synchronization's progress so that
+an interrupted synchronization may be resumed starting at the recorded
+offset, rather than at the beginning.
+A smaller value results in more accurate progress tracking, but also
+increases the number of non-sequential writes to the disk being synchronized.
+If the sysctl value is 0, no updates are performed until the synchronization
+is complete.
+.El
+.Sh NOTES
+Doing kernel dumps to
+.Nm
+providers is possible, but some conditions have to be met.
+First of all, a kernel dump will go only to one component and
+.Nm
+always chooses the component with the highest priority.
+Reading a dump from the mirror on boot will only work if the
+.Cm prefer
+balance algorithm is used (that way
+.Nm
+will read only from the component with the highest priority).
+If you use a different balance algorithm, you should add:
+.Bd -literal -offset indent
+gmirror configure -b prefer data
+.Ed
+.Pp
+to the
+.Pa /etc/rc.early
+script and:
+.Bd -literal -offset indent
+gmirror configure -b round-robin data
+.Ed
+.Pp
+to the
+.Pa /etc/rc.local
+script.
+The decision which component to choose for dumping is made when
+.Xr dumpon 8
+is called.
+If on the next boot a component with a higher priority will be available,
+the prefer algorithm will choose to read from it and
+.Xr savecore 8
+will find nothing.
+If on the next boot a component with the highest priority will be synchronized,
+the prefer balance algorithm will read from the next one, thus will find nothing
+there.
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr dumpon 8 ,
+.Xr geom 8 ,
+.Xr gvinum 8 ,
+.Xr mount 8 ,
+.Xr newfs 8 ,
+.Xr savecore 8 ,
+.Xr sysctl 8 ,
+.Xr umount 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 5.3 .
+.Sh AUTHORS
+.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org
+.Sh BUGS
+There should be a way to change a component's priority inside a running mirror.
+.Pp
+There should be a section with an implementation description.
diff --git a/lib/geom/mountver/Makefile b/lib/geom/mountver/Makefile
new file mode 100644
index 000000000000..36c1e01e112a
--- /dev/null
+++ b/lib/geom/mountver/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= mountver
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/mountver/Makefile.depend b/lib/geom/mountver/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/mountver/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/mountver/geom_mountver.c b/lib/geom/mountver/geom_mountver.c
new file mode 100644
index 000000000000..0d065d71612e
--- /dev/null
+++ b/lib/geom/mountver/geom_mountver.c
@@ -0,0 +1,58 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2010 Edward Tomasz Napierala <trasz@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdint.h>
+#include <libgeom.h>
+#include <geom/mountver/g_mountver.h>
+
+#include "core/geom.h"
+
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_MOUNTVER_VERSION;
+
+struct g_command class_commands[] = {
+ { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL,
+ {
+ G_OPT_SENTINEL
+ },
+ "[-v] prov ..."
+ },
+ { "destroy", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name"
+ },
+ G_CMD_SENTINEL
+};
diff --git a/lib/geom/mountver/gmountver.8 b/lib/geom/mountver/gmountver.8
new file mode 100644
index 000000000000..4c27a652b0b0
--- /dev/null
+++ b/lib/geom/mountver/gmountver.8
@@ -0,0 +1,133 @@
+.\"-
+.\" Copyright (c) 2010 Edward Tomasz Napierala
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd May 18, 2015
+.Dt GMOUNTVER 8
+.Os
+.Sh NAME
+.Nm gmountver
+.Nd "control utility for disk mount verification GEOM class"
+.Sh SYNOPSIS
+.Nm
+.Cm create
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm destroy
+.Op Fl fv
+.Ar name
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Op Fl s Ar name
+.Nm
+.Cm load
+.Op Fl v
+.Nm
+.Cm unload
+.Op Fl v
+.Sh DESCRIPTION
+The
+.Nm
+utility is used to control the mount verification GEOM class.
+When configured, it passes all the I/O requests to the underlying provider.
+When the underlying provider disappears - for example because the disk device
+got disconnected - it queues all the I/O requests and waits for the provider
+to reappear.
+When that happens, it attaches to it and sends the queued requests.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm destroy"
+.It Cm create
+Enable mount verification for the given provider.
+If the operation succeeds, a new GEOM provider will be created using the
+given provider's name with a
+.Ql .mountver
+suffix.
+The kernel module
+.Pa geom_mountver.ko
+will be loaded if it is not loaded already.
+.It Cm destroy
+Destroy
+.Ar name .
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options:
+.Bl -tag -width indent
+.It Fl f
+Force the removal of the specified mountver device.
+.It Fl v
+Be more verbose.
+.El
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to control the behavior of the
+.Nm MOUNTVER
+GEOM class.
+The default value is shown next to each variable.
+.Bl -tag -width indent
+.It Va kern.geom.mountver.debug : No 0
+Debug level of the
+.Nm MOUNTVER
+GEOM class.
+This can be set to a number between 0 and 3 inclusive.
+If set to 0 minimal debug information is printed, and if set to 3 the
+maximum amount of debug information is printed.
+.It Va kern.geom.mountver.check_ident : No 1
+This can be set to 0 or 1.
+If set to 0,
+.Nm
+will reattach to the device even if the device reports a different disk ID.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr geom 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 9.0 .
+.Sh AUTHORS
+.An Edward Tomasz Napierala Aq Mt trasz@FreeBSD.org
diff --git a/lib/geom/multipath/Makefile b/lib/geom/multipath/Makefile
new file mode 100644
index 000000000000..5a753e42d76d
--- /dev/null
+++ b/lib/geom/multipath/Makefile
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= multipath
+
+CFLAGS+= -I${SRCTOP}/sys
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/multipath/Makefile.depend b/lib/geom/multipath/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/multipath/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/multipath/geom_multipath.c b/lib/geom/multipath/geom_multipath.c
new file mode 100644
index 000000000000..12f194ff762d
--- /dev/null
+++ b/lib/geom/multipath/geom_multipath.c
@@ -0,0 +1,325 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2006 Mathew Jacob <mjacob@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <libgeom.h>
+#include <unistd.h>
+#include <uuid.h>
+#include <geom/multipath/g_multipath.h>
+
+#include "core/geom.h"
+#include "misc/subr.h"
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_MULTIPATH_VERSION;
+
+static void mp_main(struct gctl_req *, unsigned int);
+static void mp_label(struct gctl_req *);
+static void mp_clear(struct gctl_req *);
+static void mp_prefer(struct gctl_req *);
+
+struct g_command class_commands[] = {
+ {
+ "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL,
+ {
+ { 'A', "active_active", NULL, G_TYPE_BOOL },
+ { 'R', "active_read", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-vAR] name prov ..."
+ },
+ {
+ "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, mp_main,
+ {
+ { 'A', "active_active", NULL, G_TYPE_BOOL },
+ { 'R', "active_read", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-vAR] name prov ..."
+ },
+ { "configure", G_FLAG_VERBOSE, NULL,
+ {
+ { 'A', "active_active", NULL, G_TYPE_BOOL },
+ { 'P', "active_passive", NULL, G_TYPE_BOOL },
+ { 'R', "active_read", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-vAPR] name"
+ },
+ {
+ "add", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov"
+ },
+ {
+ "remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov"
+ },
+ {
+ "prefer", G_FLAG_VERBOSE, mp_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ {
+ "fail", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov"
+ },
+ {
+ "restore", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov"
+ },
+ {
+ "rotate", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name"
+ },
+ {
+ "getactive", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name"
+ },
+ {
+ "destroy", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name"
+ },
+ {
+ "stop", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name"
+ },
+ {
+ "clear", G_FLAG_VERBOSE, mp_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static void
+mp_main(struct gctl_req *req, unsigned int flags __unused)
+{
+ const char *name;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0) {
+ mp_label(req);
+ } else if (strcmp(name, "clear") == 0) {
+ mp_clear(req);
+ } else if (strcmp(name, "prefer") == 0) {
+ mp_prefer(req);
+ } else {
+ gctl_error(req, "Unknown command: %s.", name);
+ }
+}
+
+static void
+mp_label(struct gctl_req *req)
+{
+ struct g_multipath_metadata md;
+ off_t disksize = 0, msize;
+ uint8_t *sector, *rsector;
+ char *ptr;
+ uuid_t uuid;
+ ssize_t secsize = 0, ssize;
+ uint32_t status;
+ const char *name, *name2, *mpname;
+ int error, i, nargs, fd;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 2) {
+ gctl_error(req, "wrong number of arguments.");
+ return;
+ }
+
+ /*
+ * First, check each provider to make sure it's the same size.
+ * This also gets us our size and sectorsize for the metadata.
+ */
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ msize = g_get_mediasize(name);
+ ssize = g_get_sectorsize(name);
+ if (msize == 0 || ssize == 0) {
+ gctl_error(req, "cannot get information about %s: %s.",
+ name, strerror(errno));
+ return;
+ }
+ if (i == 1) {
+ secsize = ssize;
+ disksize = msize;
+ } else {
+ if (secsize != ssize) {
+ gctl_error(req, "%s sector size %ju different.",
+ name, (intmax_t)ssize);
+ return;
+ }
+ if (disksize != msize) {
+ gctl_error(req, "%s media size %ju different.",
+ name, (intmax_t)msize);
+ return;
+ }
+ }
+
+ }
+
+ /*
+ * Generate metadata.
+ */
+ strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_MULTIPATH_VERSION;
+ mpname = gctl_get_ascii(req, "arg0");
+ strlcpy(md.md_name, mpname, sizeof(md.md_name));
+ md.md_size = disksize;
+ md.md_sectorsize = secsize;
+ uuid_create(&uuid, &status);
+ if (status != uuid_s_ok) {
+ gctl_error(req, "cannot create a UUID.");
+ return;
+ }
+ uuid_to_string(&uuid, &ptr, &status);
+ if (status != uuid_s_ok) {
+ gctl_error(req, "cannot stringify a UUID.");
+ return;
+ }
+ strlcpy(md.md_uuid, ptr, sizeof (md.md_uuid));
+ md.md_active_active = gctl_get_int(req, "active_active");
+ if (gctl_get_int(req, "active_read"))
+ md.md_active_active = 2;
+ free(ptr);
+
+ /*
+ * Allocate a sector to write as metadata.
+ */
+ sector = calloc(1, secsize);
+ if (sector == NULL) {
+ gctl_error(req, "unable to allocate metadata buffer");
+ return;
+ }
+ rsector = malloc(secsize);
+ if (rsector == NULL) {
+ gctl_error(req, "unable to allocate metadata buffer");
+ goto done;
+ }
+
+ /*
+ * encode the metadata
+ */
+ multipath_metadata_encode(&md, sector);
+
+ /*
+ * Store metadata on the initial provider.
+ */
+ name = gctl_get_ascii(req, "arg1");
+ error = g_metadata_store(name, sector, secsize);
+ if (error != 0) {
+ gctl_error(req, "cannot store metadata on %s: %s.", name, strerror(error));
+ goto done;
+ }
+
+ /*
+ * Now touch the rest of the providers to hint retaste.
+ */
+ for (i = 2; i < nargs; i++) {
+ name2 = gctl_get_ascii(req, "arg%d", i);
+ fd = g_open(name2, 1);
+ if (fd < 0) {
+ fprintf(stderr, "Unable to open %s: %s.\n",
+ name2, strerror(errno));
+ continue;
+ }
+ if (pread(fd, rsector, secsize, disksize - secsize) !=
+ (ssize_t)secsize) {
+ fprintf(stderr, "Unable to read metadata from %s: %s.\n",
+ name2, strerror(errno));
+ g_close(fd);
+ continue;
+ }
+ g_close(fd);
+ if (memcmp(sector, rsector, secsize)) {
+ fprintf(stderr, "No metadata found on %s."
+ " It is not a path of %s.\n",
+ name2, name);
+ }
+ }
+done:
+ free(rsector);
+ free(sector);
+}
+
+
+static void
+mp_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_MULTIPATH_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ }
+}
+
+static void
+mp_prefer(struct gctl_req *req)
+{
+ const char *name, *comp, *errstr;
+ int nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs != 2) {
+ gctl_error(req, "Usage: prefer GEOM PROVIDER");
+ return;
+ }
+ name = gctl_get_ascii(req, "arg0");
+ comp = gctl_get_ascii(req, "arg1");
+ errstr = gctl_issue (req);
+ if (errstr != NULL) {
+ fprintf(stderr, "Can't set %s preferred provider to %s: %s.\n",
+ name, comp, errstr);
+ }
+}
diff --git a/lib/geom/multipath/gmultipath.8 b/lib/geom/multipath/gmultipath.8
new file mode 100644
index 000000000000..f6e286a43c72
--- /dev/null
+++ b/lib/geom/multipath/gmultipath.8
@@ -0,0 +1,377 @@
+.\" Copyright (c) 2007 Matthew Jacob
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 8, 2016
+.Dt GMULTIPATH 8
+.Os
+.Sh NAME
+.Nm gmultipath
+.Nd "disk multipath control utility"
+.Sh SYNOPSIS
+.Nm
+.Cm create
+.Op Fl ARv
+.Ar name
+.Ar prov ...
+.Nm
+.Cm label
+.Op Fl ARv
+.Ar name
+.Ar prov ...
+.Nm
+.Cm configure
+.Op Fl APRv
+.Ar name
+.Nm
+.Cm add
+.Op Fl v
+.Ar name prov
+.Nm
+.Cm remove
+.Op Fl v
+.Ar name prov
+.Nm
+.Cm fail
+.Op Fl v
+.Ar name prov
+.Nm
+.Cm restore
+.Op Fl v
+.Ar name prov
+.Nm
+.Cm rotate
+.Op Fl v
+.Ar name
+.Nm
+.Cm prefer
+.Op Fl v
+.Ar name
+.Ar prov
+.Nm
+.Cm getactive
+.Op Fl v
+.Ar name
+.Nm
+.Cm destroy
+.Op Fl v
+.Ar name
+.Nm
+.Cm stop
+.Op Fl v
+.Ar name
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for device multipath configuration.
+.Pp
+The multipath device can be configured using two different methods:
+.Dq manual
+or
+.Dq automatic .
+When using the
+.Dq manual
+method, no metadata are stored on the devices, so the multipath
+device has to be configured by hand every time it is needed.
+Additional device paths also will not be detected automatically.
+The
+.Dq automatic
+method uses on-disk metadata to detect device and all its paths.
+Metadata use the last sector of the underlying disk device and
+include device name and UUID.
+The UUID guarantees uniqueness in a shared storage environment
+but is in general too cumbersome to use.
+The name is what is exported via the device interface.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm destroy"
+.It Cm create
+Create multipath device with
+.Dq manual
+method without writing any on-disk metadata.
+It is up to administrator, how to properly identify device paths.
+Kernel will only check that all given providers have same media and
+sector sizes.
+.Pp
+.Fl A
+option enables Active/Active mode,
+.Fl R
+option enables Active/Read mode, otherwise Active/Passive mode is used
+by default.
+.It Cm label
+Create multipath device with
+.Dq automatic
+method.
+Label the first given provider with on-disk metadata using the specified
+.Ar name .
+The rest of given providers will be retasted to detect these metadata.
+It reliably protects against specifying unrelated providers.
+Providers with no matching metadata detected will not be added to the device.
+.Pp
+.Fl A
+option enables Active/Active mode,
+.Fl R
+option enables Active/Read mode, otherwise Active/Passive mode is used
+by default.
+.It Cm configure
+Configure the given multipath device.
+.Pp
+.Fl A
+option enables Active/Active mode,
+.Fl P
+option enables Active/Passive mode,
+.Fl R
+option enables Active/Read mode.
+.It Cm add
+Add the given provider as a path to the given multipath device.
+Should normally be used only for devices created with
+.Dq manual
+method, unless you know what you are doing (you are sure that it is another
+device path, but tasting its metadata in regular
+.Dq automatic
+way is not possible).
+.It Cm remove
+Remove the given provider as a path from the given multipath device.
+If the last path removed, the multipath device will be destroyed.
+.It Cm fail
+Mark specified provider as a path of the specified multipath device as failed.
+If there are other paths present, new requests will be forwarded there.
+.It Cm restore
+Mark specified provider as a path of the specified multipath device as
+operational, allowing it to handle requests.
+.It Cm rotate
+Change the active provider/path to the next available provider in Active/Passive mode.
+.It Cm prefer
+Change the active provider/path to the specified provider in Active/Passive mode.
+.It Cm getactive
+Get the currently active provider(s)/path(s).
+.It Cm destroy
+Destroy the given multipath device clearing metadata.
+.It Cm stop
+Stop the given multipath device without clearing metadata.
+.It Cm clear
+Clear metadata on the given provider.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variable can be used to control the behavior of the
+.Nm MULTIPATH
+GEOM class.
+.Bl -tag -width indent
+.It Va kern.geom.multipath.debug : No 0
+Debug level of the
+.Nm MULTIPATH
+GEOM class.
+This can be set to 0 (default) or 1 to disable or enable various
+forms of chattiness.
+.It Va kern.geom.multipath.exclusive : No 1
+Open underlying providers exclusively, preventing individual paths access.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh MULTIPATH ARCHITECTURE
+This is a multiple path architecture with no device knowledge or
+presumptions other than size matching built in.
+Therefore the user must exercise some care
+in selecting providers that do indeed represent multiple paths to the
+same underlying disk device.
+The reason for this is that there are several
+criteria across multiple underlying transport types that can
+.Ar indicate
+identity, but in all respects such identity can rarely be considered
+.Ar definitive .
+.Pp
+For example, if you use the World Word Port Name of a Fibre Channel
+disk object you might believe that two disks that have the same WWPN
+on different paths (or even disjoint fabrics) might be considered
+the same disk.
+Nearly always this would be a safe assumption, until
+you realize that a WWPN, like an Ethernet MAC address, is a soft
+programmable entity, and that a misconfigured Director Class switch
+could lead you to believe incorrectly that you have found multiple
+paths to the same device.
+This is an extreme and theoretical case, but
+it is possible enough to indicate that the policy for deciding which
+of multiple pathnames refer to the same device should be left to the
+system operator who will use tools and knowledge of their own storage
+subsystem to make the correct configuration selection.
+.Pp
+There are Active/Passive, Active/Read and Active/Active operation modes
+supported.
+In Active/Passive mode only one path has I/O moving on it
+at any point in time.
+This I/O continues until an I/O is returned with
+a generic I/O error or a "Nonexistent Device" error.
+When this occurs, that path is marked FAIL, the next path
+in a list is selected as active and the failed I/O reissued.
+In Active/Active mode all paths not marked FAIL may handle I/O at the same time.
+Requests are distributed between paths to equalize load.
+For capable devices it allows the utilisation of the bandwidth available on all paths.
+In Active/Read mode all paths not marked FAIL may handle reads at the same time,
+but unlike in Active/Active mode only one path handles write requests at any
+point in time; closely following the original write request order if the layer
+above needs it for data consistency (not waiting for requisite write completion
+before sending dependent write).
+.Pp
+When new devices are added to the system the
+.Nm MULTIPATH
+GEOM class is given an opportunity to taste these new devices.
+If a new
+device has a
+.Nm MULTIPATH
+on-disk metadata label, the device is either used to create a new
+.Nm MULTIPATH
+GEOM, or added to the list of paths for an existing
+.Nm MULTIPATH
+GEOM.
+.Pp
+It is this mechanism that works reasonably with
+.Xr isp 4
+and
+.Xr mpt 4
+based Fibre Channel disk devices.
+For these devices, when a device disappears
+(due to e.g., a cable pull or power failure to a switch), the device is
+proactively marked as gone and I/O to it failed.
+This causes the
+.Nm MULTIPATH
+failure event just described.
+.Pp
+When Fibre Channel events inform either
+.Xr isp 4
+or
+.Xr mpt 4
+host bus adapters that new devices may have arrived (e.g., the arrival
+of an RSCN event from the Fabric Domain Controller), they can cause
+a rescan to occur and cause the attachment and configuration of any
+(now) new devices to occur, causing the taste event described above.
+.Pp
+This means that this multipath architecture is not a one-shot path
+failover, but can be considered to be steady state as long as failed
+paths are repaired (automatically or otherwise).
+.Pp
+Automatic rescanning is not a requirement.
+Nor is Fibre Channel.
+The
+same failover mechanisms work equally well for traditional "Parallel"
+SCSI but may require manual intervention with
+.Xr camcontrol 8
+to cause the reattachment of repaired device links.
+.Sh EXAMPLES
+The following example shows how to use
+.Xr camcontrol 8
+to find possible multiple path devices and to create a
+.Nm MULTIPATH
+GEOM class for them.
+.Bd -literal -offset indent
+mysys# camcontrol devlist
+<ECNCTX @WESTVILLE > at scbus0 target 0 lun 0 (da0,pass0)
+<ECNCTX @WESTVILLE > at scbus0 target 0 lun 1 (da1,pass1)
+<ECNCTX @WESTVILLE > at scbus1 target 0 lun 0 (da2,pass2)
+<ECNCTX @WESTVILLE > at scbus1 target 0 lun 1 (da3,pass3)
+mysys# camcontrol inquiry da0 -S
+ECNTX0LUN000000SER10ac0d01
+mysys# camcontrol inquiry da2 -S
+ECNTX0LUN000000SER10ac0d01
+.Ed
+.Pp
+Now that you have used the Serial Number to compare two disk paths
+it is not entirely unreasonable to conclude that these are multiple
+paths to the same device.
+However, only the user who is familiar
+with their storage is qualified to make this judgement.
+.Pp
+You can then use the
+.Nm
+command to label and create a
+.Nm MULTIPATH
+GEOM provider named
+.Ar FRED .
+.Bd -literal -offset indent
+gmultipath label -v FRED /dev/da0 /dev/da2
+disklabel -Brw /dev/multipath/FRED auto
+newfs /dev/multipath/FREDa
+mount /dev/multipath/FREDa /mnt....
+.Ed
+.Pp
+The resultant console output looks something like:
+.Bd -literal -offset indent
+GEOM_MULTIPATH: da0 added to FRED
+GEOM_MULTIPATH: da0 is now active path in FRED
+GEOM_MULTIPATH: da2 added to FRED
+.Ed
+.Pp
+To load the
+.Nm
+module at boot time, add this entry to
+.Pa /boot/loader.conf :
+.Bd -literal -offset ident
+geom_multipath_load="YES"
+.Ed
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr isp 4 ,
+.Xr mpt 4 ,
+.Xr loader.conf 5 ,
+.Xr camcontrol 8 ,
+.Xr geom 8 ,
+.Xr mount 8 ,
+.Xr newfs 8 ,
+.Xr sysctl 8
+.Sh HISTORY
+The
+.Nm
+utility first appeared in
+.Fx 7.0
+.Sh AUTHORS
+.An Matthew Jacob Aq Mt mjacob@FreeBSD.org
+.An Alexander Motin Aq Mt mav@FreeBSD.org
diff --git a/lib/geom/nop/Makefile b/lib/geom/nop/Makefile
new file mode 100644
index 000000000000..9d8b69117466
--- /dev/null
+++ b/lib/geom/nop/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= nop
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/nop/Makefile.depend b/lib/geom/nop/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/nop/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/nop/geom_nop.c b/lib/geom/nop/geom_nop.c
new file mode 100644
index 000000000000..65dc07cf1a3c
--- /dev/null
+++ b/lib/geom/nop/geom_nop.c
@@ -0,0 +1,81 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdint.h>
+#include <libgeom.h>
+#include <geom/nop/g_nop.h>
+
+#include "core/geom.h"
+
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_NOP_VERSION;
+
+struct g_command class_commands[] = {
+ { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL,
+ {
+ { 'e', "error", "-1", G_TYPE_NUMBER },
+ { 'o', "offset", "0", G_TYPE_NUMBER },
+ { 'p', "stripesize", "0", G_TYPE_NUMBER },
+ { 'P', "stripeoffset", "0", G_TYPE_NUMBER },
+ { 'r', "rfailprob", "-1", G_TYPE_NUMBER },
+ { 's', "size", "0", G_TYPE_NUMBER },
+ { 'S', "secsize", "0", G_TYPE_NUMBER },
+ { 'w', "wfailprob", "-1", G_TYPE_NUMBER },
+ { 'z', "physpath", G_NOP_PHYSPATH_PASSTHROUGH, G_TYPE_STRING },
+ G_OPT_SENTINEL
+ },
+ "[-v] [-e error] [-o offset] [-p stripesize] [-P stripeoffset] "
+ "[-r rfailprob] [-s size] [-S secsize] [-w wfailprob] "
+ "[-z physpath] dev ..."
+ },
+ { "configure", G_FLAG_VERBOSE, NULL,
+ {
+ { 'e', "error", "-1", G_TYPE_NUMBER },
+ { 'r', "rfailprob", "-1", G_TYPE_NUMBER },
+ { 'w', "wfailprob", "-1", G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-v] [-e error] [-r rfailprob] [-w wfailprob] prov ..."
+ },
+ { "destroy", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] prov ..."
+ },
+ { "reset", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ G_CMD_SENTINEL
+};
diff --git a/lib/geom/nop/gnop.8 b/lib/geom/nop/gnop.8
new file mode 100644
index 000000000000..f9b3dc2c440e
--- /dev/null
+++ b/lib/geom/nop/gnop.8
@@ -0,0 +1,189 @@
+.\" Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd January 17, 2018
+.Dt GNOP 8
+.Os
+.Sh NAME
+.Nm gnop
+.Nd "control utility for NOP GEOM class"
+.Sh SYNOPSIS
+.Nm
+.Cm create
+.Op Fl v
+.Op Fl e Ar error
+.Op Fl o Ar offset
+.Op Fl p Ar stripesize
+.Op Fl P Ar stripeoffset
+.Op Fl r Ar rfailprob
+.Op Fl s Ar size
+.Op Fl S Ar secsize
+.Op Fl w Ar wfailprob
+.Op Fl z Ar physpath
+.Ar dev ...
+.Nm
+.Cm configure
+.Op Fl v
+.Op Fl e Ar error
+.Op Fl r Ar rfailprob
+.Op Fl w Ar wfailprob
+.Ar prov ...
+.Nm
+.Cm destroy
+.Op Fl fv
+.Ar prov ...
+.Nm
+.Cm reset
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for setting up transparent providers on existing ones.
+Its main purpose is testing other GEOM classes, as it allows forced provider
+removal and I/O error simulation with a given probability.
+It also gathers statistics on the number of read, write, delete,
+getattr, flush, and other requests, and the number of bytes read and written.
+.Nm
+can also be used as a good starting point for implementing new GEOM
+classes.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm configure"
+.It Cm create
+Set up a transparent provider on the given devices.
+If the operation succeeds, the new provider should appear with name
+.Pa /dev/ Ns Ao Ar dev Ac Ns Pa .nop .
+The kernel module
+.Pa geom_nop.ko
+will be loaded if it is not loaded already.
+.It Cm configure
+Configure existing transparent provider.
+At the moment it is only used for changing failure probability.
+.It Cm destroy
+Turn off the given transparent providers.
+.It Cm reset
+Reset statistics for the given transparent providers.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options:
+.Bl -tag -width ".Fl r Ar rfailprob"
+.It Fl e Ar error
+Specifies the error number to return on failure.
+.It Fl f
+Force the removal of the specified provider.
+.It Fl o Ar offset
+Where to begin on the original provider.
+.It Fl p Ar stripesize
+Value of the stripesize property of the transparent provider.
+.It Fl P Ar stripeoffset
+Value of the stripeoffset property of the transparent provider.
+.It Fl r Ar rfailprob
+Specifies read failure probability in percent.
+.It Fl s Ar size
+Size of the transparent provider.
+.It Fl S Ar secsize
+Sector size of the transparent provider.
+.It Fl w Ar wfailprob
+Specifies write failure probability in percent.
+.It Fl v
+Be more verbose.
+.It Fl z Ar physpath
+Physical path of the transparent provider.
+.El
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to control the behavior of the
+.Nm NOP
+GEOM class.
+The default value is shown next to each variable.
+.Bl -tag -width indent
+.It Va kern.geom.nop.debug : No 0
+Debug level of the
+.Nm NOP
+GEOM class.
+This can be set to a number between 0 and 2 inclusive.
+If set to 0, minimal debug information is printed.
+If set to 1, basic debug information is logged along with the I/O requests
+that were returned as errors.
+If set to 2, the maximum amount of debug information is printed including
+all I/O requests.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+The following example shows how to create a transparent provider for disk
+.Pa /dev/da0
+with 50% write failure probability, and how to destroy it.
+.Bd -literal -offset indent
+gnop create -v -w 50 da0
+gnop destroy -v da0.nop
+.Ed
+.Pp
+The traffic statistics for the given transparent providers can be obtained
+with the
+.Cm list
+command.
+The example below shows the number of bytes written with
+.Xr newfs 8 :
+.Bd -literal -offset indent
+gnop create da0
+newfs /dev/da0.nop
+gnop list
+.Ed
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr geom 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 5.3 .
+.Sh AUTHORS
+.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org
diff --git a/lib/geom/part/Makefile b/lib/geom/part/Makefile
new file mode 100644
index 000000000000..e9631caf4d84
--- /dev/null
+++ b/lib/geom/part/Makefile
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= part
+
+LIBADD= util
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/part/Makefile.depend b/lib/geom/part/Makefile.depend
new file mode 100644
index 000000000000..29b9a504acf9
--- /dev/null
+++ b/lib/geom/part/Makefile.depend
@@ -0,0 +1,20 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ lib/libutil \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/part/geom_part.c b/lib/geom/part/geom_part.c
new file mode 100644
index 000000000000..29c066c4714f
--- /dev/null
+++ b/lib/geom/part/geom_part.c
@@ -0,0 +1,1344 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2007, 2008 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/stat.h>
+#include <sys/vtoc.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgeom.h>
+#include <libutil.h>
+#include <paths.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include "core/geom.h"
+#include "misc/subr.h"
+
+#ifdef STATIC_GEOM_CLASSES
+#define PUBSYM(x) gpart_##x
+#else
+#define PUBSYM(x) x
+#endif
+
+uint32_t PUBSYM(lib_version) = G_LIB_VERSION;
+uint32_t PUBSYM(version) = 0;
+
+static char sstart[32];
+static char ssize[32];
+volatile sig_atomic_t undo_restore;
+
+#define GPART_AUTOFILL "*"
+#define GPART_FLAGS "C"
+
+#define GPART_PARAM_BOOTCODE "bootcode"
+#define GPART_PARAM_INDEX "index"
+#define GPART_PARAM_PARTCODE "partcode"
+
+static struct gclass *find_class(struct gmesh *, const char *);
+static struct ggeom * find_geom(struct gclass *, const char *);
+static int geom_is_withered(struct ggeom *);
+static const char *find_geomcfg(struct ggeom *, const char *);
+static const char *find_provcfg(struct gprovider *, const char *);
+static struct gprovider *find_provider(struct ggeom *, off_t);
+static const char *fmtsize(int64_t);
+static int gpart_autofill(struct gctl_req *);
+static int gpart_autofill_resize(struct gctl_req *);
+static void gpart_bootcode(struct gctl_req *, unsigned int);
+static void *gpart_bootfile_read(const char *, ssize_t *);
+static _Noreturn void gpart_issue(struct gctl_req *, unsigned int);
+static void gpart_show(struct gctl_req *, unsigned int);
+static void gpart_show_geom(struct ggeom *, const char *, int);
+static int gpart_show_hasopt(struct gctl_req *, const char *, const char *);
+static void gpart_write_partcode(struct ggeom *, int, void *, ssize_t);
+static void gpart_write_partcode_vtoc8(struct ggeom *, int, void *);
+static void gpart_print_error(const char *);
+static void gpart_backup(struct gctl_req *, unsigned int);
+static void gpart_restore(struct gctl_req *, unsigned int);
+
+struct g_command PUBSYM(class_commands)[] = {
+ { "add", 0, gpart_issue, {
+ { 'a', "alignment", GPART_AUTOFILL, G_TYPE_STRING },
+ { 'b', "start", GPART_AUTOFILL, G_TYPE_STRING },
+ { 's', "size", GPART_AUTOFILL, G_TYPE_STRING },
+ { 't', "type", NULL, G_TYPE_STRING },
+ { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER },
+ { 'l', "label", G_VAL_OPTIONAL, G_TYPE_STRING },
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "-t type [-a alignment] [-b start] [-s size] [-i index] "
+ "[-l label] [-f flags] geom"
+ },
+ { "backup", 0, gpart_backup, G_NULL_OPTS,
+ "geom"
+ },
+ { "bootcode", 0, gpart_bootcode, {
+ { 'b', GPART_PARAM_BOOTCODE, G_VAL_OPTIONAL, G_TYPE_STRING },
+ { 'p', GPART_PARAM_PARTCODE, G_VAL_OPTIONAL, G_TYPE_STRING },
+ { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER },
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "[-b bootcode] [-p partcode -i index] [-f flags] geom"
+ },
+ { "commit", 0, gpart_issue, G_NULL_OPTS,
+ "geom"
+ },
+ { "create", 0, gpart_issue, {
+ { 's', "scheme", NULL, G_TYPE_STRING },
+ { 'n', "entries", G_VAL_OPTIONAL, G_TYPE_NUMBER },
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "-s scheme [-n entries] [-f flags] provider"
+ },
+ { "delete", 0, gpart_issue, {
+ { 'i', GPART_PARAM_INDEX, NULL, G_TYPE_NUMBER },
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "-i index [-f flags] geom"
+ },
+ { "destroy", 0, gpart_issue, {
+ { 'F', "force", NULL, G_TYPE_BOOL },
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "[-F] [-f flags] geom"
+ },
+ { "modify", 0, gpart_issue, {
+ { 'i', GPART_PARAM_INDEX, NULL, G_TYPE_NUMBER },
+ { 'l', "label", G_VAL_OPTIONAL, G_TYPE_STRING },
+ { 't', "type", G_VAL_OPTIONAL, G_TYPE_STRING },
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "-i index [-l label] [-t type] [-f flags] geom"
+ },
+ { "set", 0, gpart_issue, {
+ { 'a', "attrib", NULL, G_TYPE_STRING },
+ { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER },
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "-a attrib [-i index] [-f flags] geom"
+ },
+ { "show", 0, gpart_show, {
+ { 'l', "show_label", NULL, G_TYPE_BOOL },
+ { 'r', "show_rawtype", NULL, G_TYPE_BOOL },
+ { 'p', "show_providers", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL },
+ "[-l | -r] [-p] [geom ...]"
+ },
+ { "undo", 0, gpart_issue, G_NULL_OPTS,
+ "geom"
+ },
+ { "unset", 0, gpart_issue, {
+ { 'a', "attrib", NULL, G_TYPE_STRING },
+ { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER },
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "-a attrib [-i index] [-f flags] geom"
+ },
+ { "resize", 0, gpart_issue, {
+ { 'a', "alignment", GPART_AUTOFILL, G_TYPE_STRING },
+ { 's', "size", GPART_AUTOFILL, G_TYPE_STRING },
+ { 'i', GPART_PARAM_INDEX, NULL, G_TYPE_NUMBER },
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "-i index [-a alignment] [-s size] [-f flags] geom"
+ },
+ { "restore", 0, gpart_restore, {
+ { 'F', "force", NULL, G_TYPE_BOOL },
+ { 'l', "restore_labels", NULL, G_TYPE_BOOL },
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "[-lF] [-f flags] provider [...]"
+ },
+ { "recover", 0, gpart_issue, {
+ { 'f', "flags", GPART_FLAGS, G_TYPE_STRING },
+ G_OPT_SENTINEL },
+ "[-f flags] geom"
+ },
+ G_CMD_SENTINEL
+};
+
+static struct gclass *
+find_class(struct gmesh *mesh, const char *name)
+{
+ struct gclass *classp;
+
+ LIST_FOREACH(classp, &mesh->lg_class, lg_class) {
+ if (strcmp(classp->lg_name, name) == 0)
+ return (classp);
+ }
+ return (NULL);
+}
+
+static struct ggeom *
+find_geom(struct gclass *classp, const char *name)
+{
+ struct ggeom *gp, *wgp;
+
+ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ name += sizeof(_PATH_DEV) - 1;
+ wgp = NULL;
+ LIST_FOREACH(gp, &classp->lg_geom, lg_geom) {
+ if (strcmp(gp->lg_name, name) != 0)
+ continue;
+ if (!geom_is_withered(gp))
+ return (gp);
+ else
+ wgp = gp;
+ }
+ return (wgp);
+}
+
+static int
+geom_is_withered(struct ggeom *gp)
+{
+ struct gconfig *gc;
+
+ LIST_FOREACH(gc, &gp->lg_config, lg_config) {
+ if (!strcmp(gc->lg_name, "wither"))
+ return (1);
+ }
+ return (0);
+}
+
+static const char *
+find_geomcfg(struct ggeom *gp, const char *cfg)
+{
+ struct gconfig *gc;
+
+ LIST_FOREACH(gc, &gp->lg_config, lg_config) {
+ if (!strcmp(gc->lg_name, cfg))
+ return (gc->lg_val);
+ }
+ return (NULL);
+}
+
+static const char *
+find_provcfg(struct gprovider *pp, const char *cfg)
+{
+ struct gconfig *gc;
+
+ LIST_FOREACH(gc, &pp->lg_config, lg_config) {
+ if (!strcmp(gc->lg_name, cfg))
+ return (gc->lg_val);
+ }
+ return (NULL);
+}
+
+static struct gprovider *
+find_provider(struct ggeom *gp, off_t minsector)
+{
+ struct gprovider *pp, *bestpp;
+ const char *s;
+ off_t sector, bestsector;
+
+ bestpp = NULL;
+ bestsector = 0;
+ LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+ s = find_provcfg(pp, "start");
+ sector = (off_t)strtoimax(s, NULL, 0);
+ if (sector < minsector)
+ continue;
+ if (bestpp != NULL && sector >= bestsector)
+ continue;
+
+ bestpp = pp;
+ bestsector = sector;
+ }
+ return (bestpp);
+}
+
+static const char *
+fmtsize(int64_t rawsz)
+{
+ static char buf[5];
+
+ humanize_number(buf, sizeof(buf), rawsz, "", HN_AUTOSCALE,
+ HN_B | HN_NOSPACE | HN_DECIMAL);
+ return (buf);
+}
+
+static const char *
+fmtattrib(struct gprovider *pp)
+{
+ static char buf[128];
+ struct gconfig *gc;
+ u_int idx;
+
+ buf[0] = '\0';
+ idx = 0;
+ LIST_FOREACH(gc, &pp->lg_config, lg_config) {
+ if (strcmp(gc->lg_name, "attrib") != 0)
+ continue;
+ idx += snprintf(buf + idx, sizeof(buf) - idx, "%s%s",
+ (idx == 0) ? " [" : ",", gc->lg_val);
+ }
+ if (idx > 0)
+ snprintf(buf + idx, sizeof(buf) - idx, "] ");
+ return (buf);
+}
+
+#define ALIGNDOWN(d, a) ((d) - (d) % (a))
+#define ALIGNUP(d, a) ((d) % (a) ? (d) - (d) % (a) + (a): (d))
+
+static int
+gpart_autofill_resize(struct gctl_req *req)
+{
+ struct gmesh mesh;
+ struct gclass *cp;
+ struct ggeom *gp;
+ struct gprovider *pp;
+ off_t last, size, start, new_size;
+ off_t lba, new_lba, alignment, offset;
+ const char *s;
+ int error, idx, has_alignment;
+
+ idx = (int)gctl_get_intmax(req, GPART_PARAM_INDEX);
+ if (idx < 1)
+ errx(EXIT_FAILURE, "invalid partition index");
+
+ error = geom_gettree(&mesh);
+ if (error)
+ return (error);
+ s = gctl_get_ascii(req, "class");
+ if (s == NULL)
+ abort();
+ cp = find_class(&mesh, s);
+ if (cp == NULL)
+ errx(EXIT_FAILURE, "Class %s not found.", s);
+ s = gctl_get_ascii(req, "arg0");
+ if (s == NULL)
+ abort();
+ gp = find_geom(cp, s);
+ if (gp == NULL)
+ errx(EXIT_FAILURE, "No such geom: %s.", s);
+ pp = LIST_FIRST(&gp->lg_consumer)->lg_provider;
+ if (pp == NULL)
+ errx(EXIT_FAILURE, "Provider for geom %s not found.", s);
+
+ s = gctl_get_ascii(req, "alignment");
+ has_alignment = (*s == '*') ? 0 : 1;
+ alignment = 1;
+ if (has_alignment) {
+ error = g_parse_lba(s, pp->lg_sectorsize, &alignment);
+ if (error)
+ errc(EXIT_FAILURE, error, "Invalid alignment param");
+ if (alignment == 0)
+ errx(EXIT_FAILURE, "Invalid alignment param");
+ } else {
+ lba = pp->lg_stripesize / pp->lg_sectorsize;
+ if (lba > 0)
+ alignment = lba;
+ }
+ error = gctl_delete_param(req, "alignment");
+ if (error)
+ errc(EXIT_FAILURE, error, "internal error");
+
+ s = gctl_get_ascii(req, "size");
+ if (*s == '*')
+ new_size = 0;
+ else {
+ error = g_parse_lba(s, pp->lg_sectorsize, &new_size);
+ if (error)
+ errc(EXIT_FAILURE, error, "Invalid size param");
+ /* no autofill necessary. */
+ if (has_alignment == 0)
+ goto done;
+ }
+
+ offset = (pp->lg_stripeoffset / pp->lg_sectorsize) % alignment;
+ s = find_geomcfg(gp, "last");
+ if (s == NULL)
+ errx(EXIT_FAILURE, "Final block not found for geom %s",
+ gp->lg_name);
+ last = (off_t)strtoimax(s, NULL, 0);
+ LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+ s = find_provcfg(pp, "index");
+ if (s == NULL)
+ continue;
+ if (atoi(s) == idx)
+ break;
+ }
+ if (pp == NULL)
+ errx(EXIT_FAILURE, "invalid partition index");
+
+ s = find_provcfg(pp, "start");
+ start = (off_t)strtoimax(s, NULL, 0);
+ s = find_provcfg(pp, "end");
+ lba = (off_t)strtoimax(s, NULL, 0);
+ size = lba - start + 1;
+
+ pp = find_provider(gp, lba + 1);
+ if (new_size > 0 && (new_size <= size || pp == NULL)) {
+ /* The start offset may be not aligned, so we align the end
+ * offset and then calculate the size.
+ */
+ new_size = ALIGNDOWN(start + offset + new_size,
+ alignment) - start - offset;
+ goto done;
+ }
+ if (pp == NULL) {
+ new_size = ALIGNDOWN(last + offset + 1, alignment) -
+ start - offset;
+ if (new_size < size)
+ return (ENOSPC);
+ } else {
+ s = find_provcfg(pp, "start");
+ new_lba = (off_t)strtoimax(s, NULL, 0);
+ /*
+ * Is there any free space between current and
+ * next providers?
+ */
+ new_lba = ALIGNDOWN(new_lba + offset, alignment) - offset;
+ if (new_lba > lba)
+ new_size = new_lba - start;
+ else {
+ geom_deletetree(&mesh);
+ return (ENOSPC);
+ }
+ }
+done:
+ snprintf(ssize, sizeof(ssize), "%jd", (intmax_t)new_size);
+ gctl_change_param(req, "size", -1, ssize);
+ geom_deletetree(&mesh);
+ return (0);
+}
+
+static int
+gpart_autofill(struct gctl_req *req)
+{
+ struct gmesh mesh;
+ struct gclass *cp;
+ struct ggeom *gp;
+ struct gprovider *pp;
+ off_t first, last, a_first;
+ off_t size, start, a_lba;
+ off_t lba, len, alignment, offset;
+ uintmax_t grade;
+ const char *s;
+ int error, has_size, has_start, has_alignment;
+
+ s = gctl_get_ascii(req, "verb");
+ if (strcmp(s, "resize") == 0)
+ return gpart_autofill_resize(req);
+ if (strcmp(s, "add") != 0)
+ return (0);
+
+ error = geom_gettree(&mesh);
+ if (error)
+ return (error);
+ s = gctl_get_ascii(req, "class");
+ if (s == NULL)
+ abort();
+ cp = find_class(&mesh, s);
+ if (cp == NULL)
+ errx(EXIT_FAILURE, "Class %s not found.", s);
+ s = gctl_get_ascii(req, "arg0");
+ if (s == NULL)
+ abort();
+ gp = find_geom(cp, s);
+ if (gp == NULL) {
+ if (g_device_path(s) == NULL) {
+ errx(EXIT_FAILURE, "No such geom %s.", s);
+ } else {
+ /*
+ * We don't free memory allocated by g_device_path() as
+ * we are about to exit.
+ */
+ errx(EXIT_FAILURE,
+ "No partitioning scheme found on geom %s. Create one first using 'gpart create'.",
+ s);
+ }
+ }
+ pp = LIST_FIRST(&gp->lg_consumer)->lg_provider;
+ if (pp == NULL)
+ errx(EXIT_FAILURE, "Provider for geom %s not found.", s);
+
+ s = gctl_get_ascii(req, "alignment");
+ has_alignment = (*s == '*') ? 0 : 1;
+ alignment = 1;
+ if (has_alignment) {
+ error = g_parse_lba(s, pp->lg_sectorsize, &alignment);
+ if (error)
+ errc(EXIT_FAILURE, error, "Invalid alignment param");
+ if (alignment == 0)
+ errx(EXIT_FAILURE, "Invalid alignment param");
+ }
+ error = gctl_delete_param(req, "alignment");
+ if (error)
+ errc(EXIT_FAILURE, error, "internal error");
+
+ s = gctl_get_ascii(req, "size");
+ has_size = (*s == '*') ? 0 : 1;
+ size = 0;
+ if (has_size) {
+ error = g_parse_lba(s, pp->lg_sectorsize, &size);
+ if (error)
+ errc(EXIT_FAILURE, error, "Invalid size param");
+ }
+
+ s = gctl_get_ascii(req, "start");
+ has_start = (*s == '*') ? 0 : 1;
+ start = 0ULL;
+ if (has_start) {
+ error = g_parse_lba(s, pp->lg_sectorsize, &start);
+ if (error)
+ errc(EXIT_FAILURE, error, "Invalid start param");
+ }
+
+ /* No autofill necessary. */
+ if (has_size && has_start && !has_alignment)
+ goto done;
+
+ len = pp->lg_stripesize / pp->lg_sectorsize;
+ if (len > 0 && !has_alignment)
+ alignment = len;
+
+ /* Adjust parameters to stripeoffset */
+ offset = (pp->lg_stripeoffset / pp->lg_sectorsize) % alignment;
+ start = ALIGNUP(start + offset, alignment);
+ if (size > alignment)
+ size = ALIGNDOWN(size, alignment);
+
+ s = find_geomcfg(gp, "first");
+ if (s == NULL)
+ errx(EXIT_FAILURE, "Starting block not found for geom %s",
+ gp->lg_name);
+ first = (off_t)strtoimax(s, NULL, 0);
+ s = find_geomcfg(gp, "last");
+ if (s == NULL)
+ errx(EXIT_FAILURE, "Final block not found for geom %s",
+ gp->lg_name);
+ last = (off_t)strtoimax(s, NULL, 0);
+ grade = ~0ULL;
+ a_first = ALIGNUP(first + offset, alignment);
+ last = ALIGNDOWN(last + offset + 1, alignment) - 1;
+ if (a_first < start)
+ a_first = start;
+ while ((pp = find_provider(gp, first)) != NULL) {
+ s = find_provcfg(pp, "start");
+ lba = (off_t)strtoimax(s, NULL, 0);
+ a_lba = ALIGNDOWN(lba + offset, alignment);
+ if (first < a_lba && a_first < a_lba) {
+ /* Free space [first, lba> */
+ len = a_lba - a_first;
+ if (has_size) {
+ if (len >= size &&
+ (uintmax_t)(len - size) < grade) {
+ start = a_first;
+ grade = len - size;
+ }
+ } else if (has_start) {
+ if (start >= a_first && start < a_lba) {
+ size = a_lba - start;
+ grade = start - a_first;
+ }
+ } else {
+ if (grade == ~0ULL || len > size) {
+ start = a_first;
+ size = len;
+ grade = 0;
+ }
+ }
+ }
+
+ s = find_provcfg(pp, "end");
+ first = (off_t)strtoimax(s, NULL, 0) + 1;
+ if (first + offset > a_first)
+ a_first = ALIGNUP(first + offset, alignment);
+ }
+ if (a_first <= last) {
+ /* Free space [first-last] */
+ len = ALIGNDOWN(last - a_first + 1, alignment);
+ if (has_size) {
+ if (len >= size &&
+ (uintmax_t)(len - size) < grade) {
+ start = a_first;
+ grade = len - size;
+ }
+ } else if (has_start) {
+ if (start >= a_first && start <= last) {
+ size = ALIGNDOWN(last - start + 1, alignment);
+ grade = start - a_first;
+ }
+ } else {
+ if (grade == ~0ULL || len > size) {
+ start = a_first;
+ size = len;
+ grade = 0;
+ }
+ }
+ }
+ if (grade == ~0ULL) {
+ geom_deletetree(&mesh);
+ return (ENOSPC);
+ }
+ start -= offset; /* Return back to real offset */
+done:
+ snprintf(ssize, sizeof(ssize), "%jd", (intmax_t)size);
+ gctl_change_param(req, "size", -1, ssize);
+ snprintf(sstart, sizeof(sstart), "%jd", (intmax_t)start);
+ gctl_change_param(req, "start", -1, sstart);
+ geom_deletetree(&mesh);
+ return (0);
+}
+
+static void
+gpart_show_geom(struct ggeom *gp, const char *element, int show_providers)
+{
+ struct gprovider *pp;
+ const char *s, *scheme;
+ off_t first, last, sector, end;
+ off_t length, secsz;
+ int idx, wblocks, wname, wmax;
+
+ if (geom_is_withered(gp))
+ return;
+ scheme = find_geomcfg(gp, "scheme");
+ if (scheme == NULL)
+ errx(EXIT_FAILURE, "Scheme not found for geom %s", gp->lg_name);
+ s = find_geomcfg(gp, "first");
+ if (s == NULL)
+ errx(EXIT_FAILURE, "Starting block not found for geom %s",
+ gp->lg_name);
+ first = (off_t)strtoimax(s, NULL, 0);
+ s = find_geomcfg(gp, "last");
+ if (s == NULL)
+ errx(EXIT_FAILURE, "Final block not found for geom %s",
+ gp->lg_name);
+ last = (off_t)strtoimax(s, NULL, 0);
+ wblocks = strlen(s);
+ s = find_geomcfg(gp, "state");
+ if (s == NULL)
+ errx(EXIT_FAILURE, "State not found for geom %s", gp->lg_name);
+ if (s != NULL && *s != 'C')
+ s = NULL;
+ wmax = strlen(gp->lg_name);
+ if (show_providers) {
+ LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+ wname = strlen(pp->lg_name);
+ if (wname > wmax)
+ wmax = wname;
+ }
+ }
+ wname = wmax;
+ pp = LIST_FIRST(&gp->lg_consumer)->lg_provider;
+ secsz = pp->lg_sectorsize;
+ printf("=>%*jd %*jd %*s %s (%s)%s\n",
+ wblocks, (intmax_t)first, wblocks, (intmax_t)(last - first + 1),
+ wname, gp->lg_name,
+ scheme, fmtsize(pp->lg_mediasize),
+ s ? " [CORRUPT]": "");
+
+ while ((pp = find_provider(gp, first)) != NULL) {
+ s = find_provcfg(pp, "start");
+ sector = (off_t)strtoimax(s, NULL, 0);
+
+ s = find_provcfg(pp, "end");
+ end = (off_t)strtoimax(s, NULL, 0);
+ length = end - sector + 1;
+
+ s = find_provcfg(pp, "index");
+ idx = atoi(s);
+ if (first < sector) {
+ printf(" %*jd %*jd %*s - free - (%s)\n",
+ wblocks, (intmax_t)first, wblocks,
+ (intmax_t)(sector - first), wname, "",
+ fmtsize((sector - first) * secsz));
+ }
+ if (show_providers) {
+ printf(" %*jd %*jd %*s %s %s (%s)\n",
+ wblocks, (intmax_t)sector, wblocks,
+ (intmax_t)length, wname, pp->lg_name,
+ find_provcfg(pp, element), fmtattrib(pp),
+ fmtsize(pp->lg_mediasize));
+ } else
+ printf(" %*jd %*jd %*d %s %s (%s)\n",
+ wblocks, (intmax_t)sector, wblocks,
+ (intmax_t)length, wname, idx,
+ find_provcfg(pp, element), fmtattrib(pp),
+ fmtsize(pp->lg_mediasize));
+ first = end + 1;
+ }
+ if (first <= last) {
+ length = last - first + 1;
+ printf(" %*jd %*jd %*s - free - (%s)\n",
+ wblocks, (intmax_t)first, wblocks, (intmax_t)length,
+ wname, "",
+ fmtsize(length * secsz));
+ }
+ printf("\n");
+}
+
+static int
+gpart_show_hasopt(struct gctl_req *req, const char *opt, const char *elt)
+{
+
+ if (!gctl_get_int(req, "%s", opt))
+ return (0);
+
+ if (elt != NULL)
+ errx(EXIT_FAILURE, "-l and -r are mutually exclusive");
+
+ return (1);
+}
+
+static void
+gpart_show(struct gctl_req *req, unsigned int fl __unused)
+{
+ struct gmesh mesh;
+ struct gclass *classp;
+ struct ggeom *gp;
+ const char *element, *name;
+ int error, i, nargs, show_providers;
+
+ element = NULL;
+ if (gpart_show_hasopt(req, "show_label", element))
+ element = "label";
+ if (gpart_show_hasopt(req, "show_rawtype", element))
+ element = "rawtype";
+ if (element == NULL)
+ element = "type";
+
+ name = gctl_get_ascii(req, "class");
+ if (name == NULL)
+ abort();
+ error = geom_gettree(&mesh);
+ if (error != 0)
+ errc(EXIT_FAILURE, error, "Cannot get GEOM tree");
+ classp = find_class(&mesh, name);
+ if (classp == NULL) {
+ geom_deletetree(&mesh);
+ errx(EXIT_FAILURE, "Class %s not found.", name);
+ }
+ show_providers = gctl_get_int(req, "show_providers");
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs > 0) {
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ gp = find_geom(classp, name);
+ if (gp != NULL)
+ gpart_show_geom(gp, element, show_providers);
+ else
+ errx(EXIT_FAILURE, "No such geom: %s.", name);
+ }
+ } else {
+ LIST_FOREACH(gp, &classp->lg_geom, lg_geom) {
+ gpart_show_geom(gp, element, show_providers);
+ }
+ }
+ geom_deletetree(&mesh);
+}
+
+static void
+gpart_backup(struct gctl_req *req, unsigned int fl __unused)
+{
+ struct gmesh mesh;
+ struct gclass *classp;
+ struct gprovider *pp;
+ struct ggeom *gp;
+ const char *s, *scheme;
+ off_t sector, end;
+ off_t length;
+ int error, i, windex, wblocks, wtype;
+
+ if (gctl_get_int(req, "nargs") != 1)
+ errx(EXIT_FAILURE, "Invalid number of arguments.");
+ error = geom_gettree(&mesh);
+ if (error != 0)
+ errc(EXIT_FAILURE, error, "Cannot get GEOM tree");
+ s = gctl_get_ascii(req, "class");
+ if (s == NULL)
+ abort();
+ classp = find_class(&mesh, s);
+ if (classp == NULL) {
+ geom_deletetree(&mesh);
+ errx(EXIT_FAILURE, "Class %s not found.", s);
+ }
+ s = gctl_get_ascii(req, "arg0");
+ if (s == NULL)
+ abort();
+ gp = find_geom(classp, s);
+ if (gp == NULL)
+ errx(EXIT_FAILURE, "No such geom: %s.", s);
+ scheme = find_geomcfg(gp, "scheme");
+ if (scheme == NULL)
+ abort();
+ pp = LIST_FIRST(&gp->lg_consumer)->lg_provider;
+ s = find_geomcfg(gp, "last");
+ if (s == NULL)
+ abort();
+ wblocks = strlen(s);
+ wtype = 0;
+ LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+ s = find_provcfg(pp, "type");
+ i = strlen(s);
+ if (i > wtype)
+ wtype = i;
+ }
+ s = find_geomcfg(gp, "entries");
+ if (s == NULL)
+ abort();
+ windex = strlen(s);
+ printf("%s %s\n", scheme, s);
+ LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+ s = find_provcfg(pp, "start");
+ sector = (off_t)strtoimax(s, NULL, 0);
+
+ s = find_provcfg(pp, "end");
+ end = (off_t)strtoimax(s, NULL, 0);
+ length = end - sector + 1;
+
+ s = find_provcfg(pp, "label");
+ printf("%-*s %*s %*jd %*jd %s %s\n",
+ windex, find_provcfg(pp, "index"),
+ wtype, find_provcfg(pp, "type"),
+ wblocks, (intmax_t)sector,
+ wblocks, (intmax_t)length,
+ (s != NULL) ? s: "", fmtattrib(pp));
+ }
+ geom_deletetree(&mesh);
+}
+
+static int
+skip_line(const char *p)
+{
+
+ while (*p != '\0') {
+ if (*p == '#')
+ return (1);
+ if (isspace(*p) == 0)
+ return (0);
+ p++;
+ }
+ return (1);
+}
+
+static void
+gpart_sighndl(int sig __unused)
+{
+ undo_restore = 1;
+}
+
+static void
+gpart_restore(struct gctl_req *req, unsigned int fl __unused)
+{
+ struct gmesh mesh;
+ struct gclass *classp;
+ struct gctl_req *r;
+ struct ggeom *gp;
+ struct sigaction si_sa;
+ const char *s, *flags, *errstr, *label;
+ char **ap, *argv[6], line[BUFSIZ], *pline;
+ int error, forced, i, l, nargs, created, rl;
+ intmax_t n;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1)
+ errx(EXIT_FAILURE, "Invalid number of arguments.");
+
+ forced = gctl_get_int(req, "force");
+ flags = gctl_get_ascii(req, "flags");
+ rl = gctl_get_int(req, "restore_labels");
+ s = gctl_get_ascii(req, "class");
+ if (s == NULL)
+ abort();
+ error = geom_gettree(&mesh);
+ if (error != 0)
+ errc(EXIT_FAILURE, error, "Cannot get GEOM tree");
+ classp = find_class(&mesh, s);
+ if (classp == NULL) {
+ geom_deletetree(&mesh);
+ errx(EXIT_FAILURE, "Class %s not found.", s);
+ }
+
+ sigemptyset(&si_sa.sa_mask);
+ si_sa.sa_flags = 0;
+ si_sa.sa_handler = gpart_sighndl;
+ if (sigaction(SIGINT, &si_sa, 0) == -1)
+ err(EXIT_FAILURE, "sigaction SIGINT");
+
+ if (forced) {
+ /* destroy existent partition table before restore */
+ for (i = 0; i < nargs; i++) {
+ s = gctl_get_ascii(req, "arg%d", i);
+ gp = find_geom(classp, s);
+ if (gp != NULL) {
+ r = gctl_get_handle();
+ gctl_ro_param(r, "class", -1,
+ classp->lg_name);
+ gctl_ro_param(r, "verb", -1, "destroy");
+ gctl_ro_param(r, "flags", -1, "restore");
+ gctl_ro_param(r, "force", sizeof(forced),
+ &forced);
+ gctl_ro_param(r, "arg0", -1, s);
+ errstr = gctl_issue(r);
+ if (errstr != NULL && errstr[0] != '\0') {
+ gpart_print_error(errstr);
+ gctl_free(r);
+ goto backout;
+ }
+ gctl_free(r);
+ }
+ }
+ }
+ created = 0;
+ while (undo_restore == 0 &&
+ fgets(line, sizeof(line) - 1, stdin) != NULL) {
+ /* Format of backup entries:
+ * <scheme name> <number of entries>
+ * <index> <type> <start> <size> [label] ['['attrib[,attrib]']']
+ */
+ pline = (char *)line;
+ pline[strlen(line) - 1] = 0;
+ if (skip_line(pline))
+ continue;
+ for (ap = argv;
+ (*ap = strsep(&pline, " \t")) != NULL;)
+ if (**ap != '\0' && ++ap >= &argv[6])
+ break;
+ l = ap - &argv[0];
+ label = pline = NULL;
+ if (l == 1 || l == 2) { /* create table */
+ if (created)
+ errx(EXIT_FAILURE, "Incorrect backup format.");
+ if (l == 2)
+ n = strtoimax(argv[1], NULL, 0);
+ for (i = 0; i < nargs; i++) {
+ s = gctl_get_ascii(req, "arg%d", i);
+ r = gctl_get_handle();
+ gctl_ro_param(r, "class", -1,
+ classp->lg_name);
+ gctl_ro_param(r, "verb", -1, "create");
+ gctl_ro_param(r, "scheme", -1, argv[0]);
+ if (l == 2)
+ gctl_ro_param(r, "entries",
+ sizeof(n), &n);
+ gctl_ro_param(r, "flags", -1, "restore");
+ gctl_ro_param(r, "arg0", -1, s);
+ errstr = gctl_issue(r);
+ if (errstr != NULL && errstr[0] != '\0') {
+ gpart_print_error(errstr);
+ gctl_free(r);
+ goto backout;
+ }
+ gctl_free(r);
+ }
+ created = 1;
+ continue;
+ } else if (l < 4 || created == 0)
+ errx(EXIT_FAILURE, "Incorrect backup format.");
+ else if (l == 5) {
+ if (strchr(argv[4], '[') == NULL)
+ label = argv[4];
+ else
+ pline = argv[4];
+ } else if (l == 6) {
+ label = argv[4];
+ pline = argv[5];
+ }
+ /* Add partitions to each table */
+ for (i = 0; i < nargs; i++) {
+ s = gctl_get_ascii(req, "arg%d", i);
+ r = gctl_get_handle();
+ n = strtoimax(argv[0], NULL, 0);
+ gctl_ro_param(r, "class", -1, classp->lg_name);
+ gctl_ro_param(r, "verb", -1, "add");
+ gctl_ro_param(r, "flags", -1, "restore");
+ gctl_ro_param(r, GPART_PARAM_INDEX, sizeof(n), &n);
+ gctl_ro_param(r, "type", -1, argv[1]);
+ gctl_ro_param(r, "start", -1, argv[2]);
+ gctl_ro_param(r, "size", -1, argv[3]);
+ if (rl != 0 && label != NULL)
+ gctl_ro_param(r, "label", -1, argv[4]);
+ gctl_ro_param(r, "alignment", -1, GPART_AUTOFILL);
+ gctl_ro_param(r, "arg0", -1, s);
+ error = gpart_autofill(r);
+ if (error != 0)
+ errc(EXIT_FAILURE, error, "autofill");
+ errstr = gctl_issue(r);
+ if (errstr != NULL && errstr[0] != '\0') {
+ gpart_print_error(errstr);
+ gctl_free(r);
+ goto backout;
+ }
+ gctl_free(r);
+ }
+ if (pline == NULL || *pline != '[')
+ continue;
+ /* set attributes */
+ pline++;
+ for (ap = argv;
+ (*ap = strsep(&pline, ",]")) != NULL;)
+ if (**ap != '\0' && ++ap >= &argv[6])
+ break;
+ for (i = 0; i < nargs; i++) {
+ l = ap - &argv[0];
+ s = gctl_get_ascii(req, "arg%d", i);
+ while (l > 0) {
+ r = gctl_get_handle();
+ gctl_ro_param(r, "class", -1, classp->lg_name);
+ gctl_ro_param(r, "verb", -1, "set");
+ gctl_ro_param(r, "flags", -1, "restore");
+ gctl_ro_param(r, GPART_PARAM_INDEX,
+ sizeof(n), &n);
+ gctl_ro_param(r, "attrib", -1, argv[--l]);
+ gctl_ro_param(r, "arg0", -1, s);
+ errstr = gctl_issue(r);
+ if (errstr != NULL && errstr[0] != '\0') {
+ gpart_print_error(errstr);
+ gctl_free(r);
+ goto backout;
+ }
+ gctl_free(r);
+ }
+ }
+ }
+ if (undo_restore)
+ goto backout;
+ /* commit changes if needed */
+ if (strchr(flags, 'C') != NULL) {
+ for (i = 0; i < nargs; i++) {
+ s = gctl_get_ascii(req, "arg%d", i);
+ r = gctl_get_handle();
+ gctl_ro_param(r, "class", -1, classp->lg_name);
+ gctl_ro_param(r, "verb", -1, "commit");
+ gctl_ro_param(r, "arg0", -1, s);
+ errstr = gctl_issue(r);
+ if (errstr != NULL && errstr[0] != '\0') {
+ gpart_print_error(errstr);
+ gctl_free(r);
+ goto backout;
+ }
+ gctl_free(r);
+ }
+ }
+ gctl_free(req);
+ geom_deletetree(&mesh);
+ exit(EXIT_SUCCESS);
+
+backout:
+ for (i = 0; i < nargs; i++) {
+ s = gctl_get_ascii(req, "arg%d", i);
+ r = gctl_get_handle();
+ gctl_ro_param(r, "class", -1, classp->lg_name);
+ gctl_ro_param(r, "verb", -1, "undo");
+ gctl_ro_param(r, "arg0", -1, s);
+ gctl_issue(r);
+ gctl_free(r);
+ }
+ gctl_free(req);
+ geom_deletetree(&mesh);
+ exit(EXIT_FAILURE);
+}
+
+static void *
+gpart_bootfile_read(const char *bootfile, ssize_t *size)
+{
+ struct stat sb;
+ void *code;
+ int fd;
+
+ if (stat(bootfile, &sb) == -1)
+ err(EXIT_FAILURE, "%s", bootfile);
+ if (!S_ISREG(sb.st_mode))
+ errx(EXIT_FAILURE, "%s: not a regular file", bootfile);
+ if (sb.st_size == 0)
+ errx(EXIT_FAILURE, "%s: empty file", bootfile);
+ if (*size > 0 && sb.st_size > *size)
+ errx(EXIT_FAILURE, "%s: file too big (%zu limit)", bootfile,
+ *size);
+
+ *size = sb.st_size;
+
+ fd = open(bootfile, O_RDONLY);
+ if (fd == -1)
+ err(EXIT_FAILURE, "%s", bootfile);
+ code = malloc(*size);
+ if (code == NULL)
+ err(EXIT_FAILURE, NULL);
+ if (read(fd, code, *size) != *size)
+ err(EXIT_FAILURE, "%s", bootfile);
+ close(fd);
+
+ return (code);
+}
+
+static void
+gpart_write_partcode(struct ggeom *gp, int idx, void *code, ssize_t size)
+{
+ char dsf[128];
+ struct gprovider *pp;
+ const char *s;
+ char *buf;
+ off_t bsize;
+ int fd;
+
+ LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+ s = find_provcfg(pp, "index");
+ if (s == NULL)
+ continue;
+ if (atoi(s) == idx)
+ break;
+ }
+
+ if (pp != NULL) {
+ snprintf(dsf, sizeof(dsf), "/dev/%s", pp->lg_name);
+ if (pp->lg_mediasize < size)
+ errx(EXIT_FAILURE, "%s: not enough space", dsf);
+ fd = open(dsf, O_WRONLY);
+ if (fd == -1)
+ err(EXIT_FAILURE, "%s", dsf);
+ /*
+ * When writing to a disk device, the write must be
+ * sector aligned and not write to any partial sectors,
+ * so round up the buffer size to the next sector and zero it.
+ */
+ bsize = (size + pp->lg_sectorsize - 1) /
+ pp->lg_sectorsize * pp->lg_sectorsize;
+ buf = calloc(1, bsize);
+ if (buf == NULL)
+ err(EXIT_FAILURE, "%s", dsf);
+ bcopy(code, buf, size);
+ if (write(fd, buf, bsize) != bsize)
+ err(EXIT_FAILURE, "%s", dsf);
+ free(buf);
+ close(fd);
+ printf("partcode written to %s\n", pp->lg_name);
+ } else
+ errx(EXIT_FAILURE, "invalid partition index");
+}
+
+static void
+gpart_write_partcode_vtoc8(struct ggeom *gp, int idx, void *code)
+{
+ char dsf[128];
+ struct gprovider *pp;
+ const char *s;
+ int installed, fd;
+
+ installed = 0;
+ LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+ s = find_provcfg(pp, "index");
+ if (s == NULL)
+ continue;
+ if (idx != 0 && atoi(s) != idx)
+ continue;
+ snprintf(dsf, sizeof(dsf), "/dev/%s", pp->lg_name);
+ if (pp->lg_sectorsize != sizeof(struct vtoc8))
+ errx(EXIT_FAILURE, "%s: unexpected sector "
+ "size (%d)\n", dsf, pp->lg_sectorsize);
+ if (pp->lg_mediasize < VTOC_BOOTSIZE)
+ continue;
+ fd = open(dsf, O_WRONLY);
+ if (fd == -1)
+ err(EXIT_FAILURE, "%s", dsf);
+ /*
+ * We ignore the first VTOC_BOOTSIZE bytes of boot code in
+ * order to avoid overwriting the label.
+ */
+ if (lseek(fd, sizeof(struct vtoc8), SEEK_SET) !=
+ sizeof(struct vtoc8))
+ err(EXIT_FAILURE, "%s", dsf);
+ if (write(fd, (caddr_t)code + sizeof(struct vtoc8),
+ VTOC_BOOTSIZE - sizeof(struct vtoc8)) != VTOC_BOOTSIZE -
+ sizeof(struct vtoc8))
+ err(EXIT_FAILURE, "%s", dsf);
+ installed++;
+ close(fd);
+ if (idx != 0 && atoi(s) == idx)
+ break;
+ }
+ if (installed == 0)
+ errx(EXIT_FAILURE, "%s: no partitions", gp->lg_name);
+ else
+ printf("partcode written to %s\n",
+ idx != 0 ? pp->lg_name: gp->lg_name);
+}
+
+static void
+gpart_bootcode(struct gctl_req *req, unsigned int fl)
+{
+ struct gmesh mesh;
+ struct gclass *classp;
+ struct ggeom *gp;
+ const char *s;
+ void *bootcode, *partcode;
+ size_t bootsize, partsize;
+ int error, idx, vtoc8;
+
+ if (gctl_has_param(req, GPART_PARAM_BOOTCODE)) {
+ s = gctl_get_ascii(req, GPART_PARAM_BOOTCODE);
+ bootsize = 800 * 1024; /* Arbitrary limit. */
+ bootcode = gpart_bootfile_read(s, &bootsize);
+ error = gctl_change_param(req, GPART_PARAM_BOOTCODE, bootsize,
+ bootcode);
+ if (error)
+ errc(EXIT_FAILURE, error, "internal error");
+ } else
+ bootcode = NULL;
+
+ s = gctl_get_ascii(req, "class");
+ if (s == NULL)
+ abort();
+ error = geom_gettree(&mesh);
+ if (error != 0)
+ errc(EXIT_FAILURE, error, "Cannot get GEOM tree");
+ classp = find_class(&mesh, s);
+ if (classp == NULL) {
+ geom_deletetree(&mesh);
+ errx(EXIT_FAILURE, "Class %s not found.", s);
+ }
+ if (gctl_get_int(req, "nargs") != 1)
+ errx(EXIT_FAILURE, "Invalid number of arguments.");
+ s = gctl_get_ascii(req, "arg0");
+ if (s == NULL)
+ abort();
+ gp = find_geom(classp, s);
+ if (gp == NULL)
+ errx(EXIT_FAILURE, "No such geom: %s.", s);
+ s = find_geomcfg(gp, "scheme");
+ if (s == NULL)
+ errx(EXIT_FAILURE, "Scheme not found for geom %s", gp->lg_name);
+ if (strcmp(s, "VTOC8") == 0)
+ vtoc8 = 1;
+ else
+ vtoc8 = 0;
+
+ if (gctl_has_param(req, GPART_PARAM_PARTCODE)) {
+ s = gctl_get_ascii(req, GPART_PARAM_PARTCODE);
+ if (vtoc8 != 0)
+ partsize = VTOC_BOOTSIZE;
+ else
+ partsize = 1024 * 1024; /* Arbitrary limit. */
+ partcode = gpart_bootfile_read(s, &partsize);
+ error = gctl_delete_param(req, GPART_PARAM_PARTCODE);
+ if (error)
+ errc(EXIT_FAILURE, error, "internal error");
+ } else
+ partcode = NULL;
+
+ if (gctl_has_param(req, GPART_PARAM_INDEX)) {
+ if (partcode == NULL)
+ errx(EXIT_FAILURE, "-i is only valid with -p");
+ idx = (int)gctl_get_intmax(req, GPART_PARAM_INDEX);
+ if (idx < 1)
+ errx(EXIT_FAILURE, "invalid partition index");
+ error = gctl_delete_param(req, GPART_PARAM_INDEX);
+ if (error)
+ errc(EXIT_FAILURE, error, "internal error");
+ } else
+ idx = 0;
+
+ if (partcode != NULL) {
+ if (vtoc8 == 0) {
+ if (idx == 0)
+ errx(EXIT_FAILURE, "missing -i option");
+ gpart_write_partcode(gp, idx, partcode, partsize);
+ } else {
+ if (partsize != VTOC_BOOTSIZE)
+ errx(EXIT_FAILURE, "invalid bootcode");
+ gpart_write_partcode_vtoc8(gp, idx, partcode);
+ }
+ } else
+ if (bootcode == NULL)
+ errx(EXIT_FAILURE, "no -b nor -p");
+
+ if (bootcode != NULL)
+ gpart_issue(req, fl);
+
+ geom_deletetree(&mesh);
+ free(partcode);
+}
+
+static void
+gpart_print_error(const char *errstr)
+{
+ char *errmsg;
+ int error;
+
+ error = strtol(errstr, &errmsg, 0);
+ if (errmsg != errstr) {
+ while (errmsg[0] == ' ')
+ errmsg++;
+ if (errmsg[0] != '\0')
+ warnc(error, "%s", errmsg);
+ else
+ warnc(error, NULL);
+ } else
+ warnx("%s", errmsg);
+}
+
+static _Noreturn void
+gpart_issue(struct gctl_req *req, unsigned int fl __unused)
+{
+ char buf[4096];
+ const char *errstr;
+ int error, status;
+
+ if (gctl_get_int(req, "nargs") != 1)
+ errx(EXIT_FAILURE, "Invalid number of arguments.");
+ (void)gctl_delete_param(req, "nargs");
+
+ /* autofill parameters (if applicable). */
+ error = gpart_autofill(req);
+ if (error) {
+ warnc(error, "autofill");
+ status = EXIT_FAILURE;
+ goto done;
+ }
+
+ bzero(buf, sizeof(buf));
+ gctl_rw_param(req, "output", sizeof(buf), buf);
+ errstr = gctl_issue(req);
+ if (errstr == NULL || errstr[0] == '\0') {
+ if (buf[0] != '\0')
+ printf("%s", buf);
+ status = EXIT_SUCCESS;
+ goto done;
+ }
+
+ gpart_print_error(errstr);
+ status = EXIT_FAILURE;
+
+ done:
+ gctl_free(req);
+ exit(status);
+}
diff --git a/lib/geom/part/gpart.8 b/lib/geom/part/gpart.8
new file mode 100644
index 000000000000..829181578e8f
--- /dev/null
+++ b/lib/geom/part/gpart.8
@@ -0,0 +1,1441 @@
+.\" Copyright (c) 2007, 2008 Marcel Moolenaar
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd June 17, 2018
+.Dt GPART 8
+.Os
+.Sh NAME
+.Nm gpart
+.Nd "control utility for the disk partitioning GEOM class"
+.Sh SYNOPSIS
+.\" ==== ADD ====
+.Nm
+.Cm add
+.Fl t Ar type
+.Op Fl a Ar alignment
+.Op Fl b Ar start
+.Op Fl s Ar size
+.Op Fl i Ar index
+.Op Fl l Ar label
+.Op Fl f Ar flags
+.Ar geom
+.\" ==== BACKUP ====
+.Nm
+.Cm backup
+.Ar geom
+.\" ==== BOOTCODE ====
+.Nm
+.Cm bootcode
+.Op Fl b Ar bootcode
+.Op Fl p Ar partcode Fl i Ar index
+.Op Fl f Ar flags
+.Ar geom
+.\" ==== COMMIT ====
+.Nm
+.Cm commit
+.Ar geom
+.\" ==== CREATE ====
+.Nm
+.Cm create
+.Fl s Ar scheme
+.Op Fl n Ar entries
+.Op Fl f Ar flags
+.Ar provider
+.\" ==== DELETE ====
+.Nm
+.Cm delete
+.Fl i Ar index
+.Op Fl f Ar flags
+.Ar geom
+.\" ==== DESTROY ====
+.Nm
+.Cm destroy
+.Op Fl F
+.Op Fl f Ar flags
+.Ar geom
+.\" ==== MODIFY ====
+.Nm
+.Cm modify
+.Fl i Ar index
+.Op Fl l Ar label
+.Op Fl t Ar type
+.Op Fl f Ar flags
+.Ar geom
+.\" ==== RECOVER ====
+.Nm
+.Cm recover
+.Op Fl f Ar flags
+.Ar geom
+.\" ==== RESIZE ====
+.Nm
+.Cm resize
+.Fl i Ar index
+.Op Fl a Ar alignment
+.Op Fl s Ar size
+.Op Fl f Ar flags
+.Ar geom
+.\" ==== RESTORE ====
+.Nm
+.Cm restore
+.Op Fl lF
+.Op Fl f Ar flags
+.Ar provider
+.Op Ar ...
+.\" ==== SET ====
+.Nm
+.Cm set
+.Fl a Ar attrib
+.Fl i Ar index
+.Op Fl f Ar flags
+.Ar geom
+.\" ==== SHOW ====
+.Nm
+.Cm show
+.Op Fl l | r
+.Op Fl p
+.Op Ar geom ...
+.\" ==== UNDO ====
+.Nm
+.Cm undo
+.Ar geom
+.\" ==== UNSET ====
+.Nm
+.Cm unset
+.Fl a Ar attrib
+.Fl i Ar index
+.Op Fl f Ar flags
+.Ar geom
+.\"
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used to partition GEOM providers, normally disks.
+The first argument is the action to be taken:
+.Bl -tag -width ".Cm bootcode"
+.\" ==== ADD ====
+.It Cm add
+Add a new partition to the partitioning scheme given by
+.Ar geom .
+The partition type must be specified with
+.Fl t Ar type .
+The partition's location, size, and other attributes will be calculated
+automatically if the corresponding options are not specified.
+.Pp
+The
+.Cm add
+command accepts these options:
+.Bl -tag -width 12n
+.It Fl a Ar alignment
+If specified, then
+.Nm
+utility tries to align
+.Ar start
+offset and partition
+.Ar size
+to be multiple of
+.Ar alignment
+value.
+.It Fl b Ar start
+The logical block address where the partition will begin.
+A SI unit suffix is allowed.
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.It Fl i Ar index
+The index in the partition table at which the new partition is to be
+placed.
+The index determines the name of the device special file used
+to represent the partition.
+.It Fl l Ar label
+The label attached to the partition.
+This option is only valid when used on partitioning schemes that support
+partition labels.
+.It Fl s Ar size
+Create a partition of size
+.Ar size .
+A SI unit suffix is allowed.
+.It Fl t Ar type
+Create a partition of type
+.Ar type .
+Partition types are discussed below in the section entitled
+.Sx "PARTITION TYPES" .
+.El
+.\" ==== BACKUP ====
+.It Cm backup
+Dump a partition table to standard output in a special format used by the
+.Cm restore
+action.
+.\" ==== BOOTCODE ====
+.It Cm bootcode
+Embed bootstrap code into the partitioning scheme's metadata on the
+.Ar geom
+(using
+.Fl b Ar bootcode )
+or write bootstrap code into a partition (using
+.Fl p Ar partcode
+and
+.Fl i Ar index ) .
+.Pp
+The
+.Cm bootcode
+command accepts these options:
+.Bl -tag -width 10n
+.It Fl b Ar bootcode
+Embed bootstrap code from the file
+.Ar bootcode
+into the partitioning scheme's metadata for
+.Ar geom .
+Not all partitioning schemes have embedded bootstrap code, so the
+.Fl b Ar bootcode
+option is scheme-specific in nature (see the section entitled
+.Sx BOOTSTRAPPING
+below).
+The
+.Ar bootcode
+file must match the partitioning scheme's requirements for file content
+and size.
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.It Fl i Ar index
+Specify the target partition for
+.Fl p Ar partcode .
+.It Fl p Ar partcode
+Write the bootstrap code from the file
+.Ar partcode
+into the
+.Ar geom
+partition specified by
+.Fl i Ar index .
+The size of the file must be smaller than the size of the partition.
+.El
+.\" ==== COMMIT ====
+.It Cm commit
+Commit any pending changes for geom
+.Ar geom .
+All actions are committed by default and will not result in
+pending changes.
+Actions can be modified with the
+.Fl f Ar flags
+option so that they are not committed, but become pending.
+Pending changes are reflected by the geom and the
+.Nm
+utility, but they are not actually written to disk.
+The
+.Cm commit
+action will write all pending changes to disk.
+.\" ==== CREATE ====
+.It Cm create
+Create a new partitioning scheme on a provider given by
+.Ar provider .
+The scheme to use must be specified with the
+.Fl s Ar scheme
+option.
+.Pp
+The
+.Cm create
+command accepts these options:
+.Bl -tag -width 10n
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.It Fl n Ar entries
+The number of entries in the partition table.
+Every partitioning scheme has a minimum and maximum number of entries.
+This option allows tables to be created with a number of entries
+that is within the limits.
+Some schemes have a maximum equal to the minimum and some schemes have
+a maximum large enough to be considered unlimited.
+By default, partition tables are created with the minimum number of
+entries.
+.It Fl s Ar scheme
+Specify the partitioning scheme to use.
+The kernel must have support for a particular scheme before
+that scheme can be used to partition a disk.
+.El
+.\" ==== DELETE ====
+.It Cm delete
+Delete a partition from geom
+.Ar geom
+and further identified by the
+.Fl i Ar index
+option.
+The partition cannot be actively used by the kernel.
+.Pp
+The
+.cm delete
+command accepts these options:
+.Bl -tag -width 10n
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.It Fl i Ar index
+Specifies the index of the partition to be deleted.
+.El
+.\" ==== DESTROY ====
+.It Cm destroy
+Destroy the partitioning scheme as implemented by geom
+.Ar geom .
+.Pp
+The
+.Cm destroy
+command accepts these options:
+.Bl -tag -width 10n
+.It Fl F
+Forced destroying of the partition table even if it is not empty.
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.El
+.\" ==== MODIFY ====
+.It Cm modify
+Modify a partition from geom
+.Ar geom
+and further identified by the
+.Fl i Ar index
+option.
+Only the type and/or label of the partition can be modified.
+Not all partitioning schemes support labels and it is invalid to
+try to change a partition label in such cases.
+.Pp
+The
+.Cm modify
+command accepts these options:
+.Bl -tag -width 10n
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.It Fl i Ar index
+Specifies the index of the partition to be modified.
+.It Fl l Ar label
+Change the partition label to
+.Ar label .
+.It Fl t Ar type
+Change the partition type to
+.Ar type .
+.El
+.\" ==== RECOVER ====
+.It Cm recover
+Recover a corrupt partition's scheme metadata on the geom
+.Ar geom .
+See the section entitled
+.Sx RECOVERING
+below for the additional information.
+.Pp
+The
+.Cm recover
+command accepts these options:
+.Bl -tag -width 10n
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.El
+.\" ==== RESIZE ====
+.It Cm resize
+Resize a partition from geom
+.Ar geom
+and further identified by the
+.Fl i Ar index
+option.
+If the new size is not specified it is automatically calculated
+to be the maximum available from
+.Ar geom .
+.Pp
+The
+.Cm resize
+command accepts these options:
+.Bl -tag -width 12n
+.It Fl a Ar alignment
+If specified, then
+.Nm
+utility tries to align partition
+.Ar size
+to be a multiple of the
+.Ar alignment
+value.
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.It Fl i Ar index
+Specifies the index of the partition to be resized.
+.It Fl s Ar size
+Specifies the new size of the partition, in logical blocks.
+A SI unit suffix is allowed.
+.El
+.\" ==== RESTORE ====
+.It Cm restore
+Restore the partition table from a backup previously created by the
+.Cm backup
+action and read from standard input.
+Only the partition table is restored.
+This action does not affect the content of partitions.
+After restoring the partition table and writing bootcode if needed,
+user data must be restored from backup.
+.Pp
+The
+.Cm restore
+command accepts these options:
+.Bl -tag -width 10n
+.It Fl F
+Destroy partition table on the given
+.Ar provider
+before doing restore.
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.It Fl l
+Restore partition labels for partitioning schemes that support them.
+.El
+.\" ==== SET ====
+.It Cm set
+Set the named attribute on the partition entry.
+See the section entitled
+.Sx ATTRIBUTES
+below for a list of available attributes.
+.Pp
+The
+.Cm set
+command accepts these options:
+.Bl -tag -width 10n
+.It Fl a Ar attrib
+Specifies the attribute to set.
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.It Fl i Ar index
+Specifies the index of the partition on which the attribute will be set.
+.El
+.\" ==== SHOW ====
+.It Cm show
+Show current partition information for the specified geoms, or all
+geoms if none are specified.
+The default output includes the logical starting block of each
+partition, the partition size in blocks, the partition index number,
+the partition type, and a human readable partition size.
+Block sizes and locations are based on the device's Sectorsize
+as shown by
+.Cm gpart list .
+.Pp
+The
+.Cm show
+command accepts these options:
+.Bl -tag -width 10n
+.It Fl l
+For partitioning schemes that support partition labels, print them
+instead of partition type.
+.It Fl p
+Show provider names instead of partition indexes.
+.It Fl r
+Show raw partition type instead of symbolic name.
+.El
+.\" ==== UNDO ====
+.It Cm undo
+Revert any pending changes for geom
+.Ar geom .
+This action is the opposite of the
+.Cm commit
+action and can be used to undo any changes that have not been committed.
+.\" ==== UNSET ====
+.It Cm unset
+Clear the named attribute on the partition entry.
+See the section entitled
+.Sx ATTRIBUTES
+below for a list of available attributes.
+.Pp
+The
+.Cm unset
+command accepts these options:
+.Bl -tag -width 10n
+.It Fl a Ar attrib
+Specifies the attribute to clear.
+.It Fl f Ar flags
+Additional operational flags.
+See the section entitled
+.Sx "OPERATIONAL FLAGS"
+below for a discussion
+about its use.
+.It Fl i Ar index
+Specifies the index of the partition on which the attribute will be cleared.
+.El
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Sh PARTITIONING SCHEMES
+Several partitioning schemes are supported by the
+.Nm
+utility:
+.Bl -tag -width ".Cm VTOC8"
+.It Cm APM
+Apple Partition Map, used by PowerPC(R) Macintosh(R) computers.
+Requires the
+.Cd GEOM_PART_APM
+kernel option.
+.It Cm BSD
+Traditional BSD disklabel, usually used to subdivide MBR partitions.
+.Po
+This scheme can also be used as the sole partitioning method, without
+an MBR.
+Partition editing tools from other operating systems often do not
+understand the bare disklabel partition layout, so this is sometimes
+called
+.Dq dangerously dedicated .
+.Pc
+Requires the
+.Cm GEOM_PART_BSD
+kernel option.
+.It Cm BSD64
+64-bit implementation of BSD disklabel used in DragonFlyBSD to subdivide MBR
+or GPT partitions.
+Requires the
+.Cm GEOM_PART_BSD64
+kernel option.
+.It Cm LDM
+The Logical Disk Manager is an implementation of volume manager for
+Microsoft Windows NT.
+Requires the
+.Cd GEOM_PART_LDM
+kernel option.
+.It Cm GPT
+GUID Partition Table is used on Intel-based Macintosh computers and
+gradually replacing MBR on most PCs and other systems.
+Requires the
+.Cm GEOM_PART_GPT
+kernel option.
+.It Cm MBR
+Master Boot Record is used on PCs and removable media.
+Requires the
+.Cm GEOM_PART_MBR
+kernel option.
+The
+.Cm GEOM_PART_EBR
+option adds support for the Extended Boot Record (EBR),
+which is used to define a logical partition.
+The
+.Cm GEOM_PART_EBR_COMPAT
+option enables backward compatibility for partition names
+in the EBR scheme.
+It also prevents any type of actions on such partitions.
+.It Cm VTOC8
+Sun's SMI Volume Table Of Contents, used by
+.Tn SPARC64
+and
+.Tn UltraSPARC
+computers.
+Requires the
+.Cm GEOM_PART_VTOC8
+kernel option.
+.El
+.Sh PARTITION TYPES
+Partition types are identified on disk by particular strings or magic
+values.
+The
+.Nm
+utility uses symbolic names for common partition types so the user
+does not need to know these values or other details of the partitioning
+scheme in question.
+The
+.Nm
+utility also allows the user to specify scheme-specific partition types
+for partition types that do not have symbolic names.
+Symbolic names currently understood and used by
+.Fx
+are:
+.Bl -tag -width ".Cm dragonfly-disklabel64"
+.It Cm apple-boot
+The system partition dedicated to storing boot loaders on some Apple
+systems.
+The scheme-specific types are
+.Qq Li "!171"
+for MBR,
+.Qq Li "!Apple_Bootstrap"
+for APM, and
+.Qq Li "!426f6f74-0000-11aa-aa11-00306543ecac"
+for GPT.
+.It Cm bios-boot
+The system partition dedicated to second stage of the boot loader program.
+Usually it is used by the GRUB 2 loader for GPT partitioning schemes.
+The scheme-specific type is
+.Qq Li "!21686148-6449-6E6F-744E-656564454649" .
+.It Cm efi
+The system partition for computers that use the Extensible Firmware
+Interface (EFI).
+The scheme-specific types are
+.Qq Li "!239"
+for MBR, and
+.Qq Li "!c12a7328-f81f-11d2-ba4b-00a0c93ec93b"
+for GPT.
+.It Cm freebsd
+A
+.Fx
+partition subdivided into filesystems with a
+.Bx
+disklabel.
+This is a legacy partition type and should not be used for the APM
+or GPT schemes.
+The scheme-specific types are
+.Qq Li "!165"
+for MBR,
+.Qq Li "!FreeBSD"
+for APM, and
+.Qq Li "!516e7cb4-6ecf-11d6-8ff8-00022d09712b"
+for GPT.
+.It Cm freebsd-boot
+A
+.Fx
+partition dedicated to bootstrap code.
+The scheme-specific type is
+.Qq Li "!83bd6b9d-7f41-11dc-be0b-001560b84f0f"
+for GPT.
+.It Cm freebsd-swap
+A
+.Fx
+partition dedicated to swap space.
+The scheme-specific types are
+.Qq Li "!FreeBSD-swap"
+for APM,
+.Qq Li "!516e7cb5-6ecf-11d6-8ff8-00022d09712b"
+for GPT, and tag 0x0901 for VTOC8.
+.It Cm freebsd-ufs
+A
+.Fx
+partition that contains a UFS or UFS2 filesystem.
+The scheme-specific types are
+.Qq Li "!FreeBSD-UFS"
+for APM,
+.Qq Li "!516e7cb6-6ecf-11d6-8ff8-00022d09712b"
+for GPT, and tag 0x0902 for VTOC8.
+.It Cm freebsd-vinum
+A
+.Fx
+partition that contains a Vinum volume.
+The scheme-specific types are
+.Qq Li "!FreeBSD-Vinum"
+for APM,
+.Qq Li "!516e7cb8-6ecf-11d6-8ff8-00022d09712b"
+for GPT, and tag 0x0903 for VTOC8.
+.It Cm freebsd-zfs
+A
+.Fx
+partition that contains a ZFS volume.
+The scheme-specific types are
+.Qq Li "!FreeBSD-ZFS"
+for APM,
+.Qq Li "!516e7cba-6ecf-11d6-8ff8-00022d09712b"
+for GPT, and 0x0904 for VTOC8.
+.El
+.Pp
+Another symbolic names that can be used with
+.Cm gpart
+utility are:
+.Bl -tag -width ".Cm dragonfly-disklabel64"
+.It Cm apple-apfs
+An Apple macOS partition used for the Apple file system, APFS.
+.It Cm apple-core-storage
+An Apple Mac OS X partition used by logical volume manager known as
+Core Storage.
+The scheme-specific type is
+.Qq Li "!53746f72-6167-11aa-aa11-00306543ecac"
+for GPT.
+.It Cm apple-hfs
+An Apple Mac OS X partition that contains a HFS or HFS+ filesystem.
+The scheme-specific types are
+.Qq Li "!175"
+for MBR,
+.Qq Li "!Apple_HFS"
+for APM and
+.Qq Li "!48465300-0000-11aa-aa11-00306543ecac"
+for GPT.
+.It Cm apple-label
+An Apple Mac OS X partition dedicated to partition metadata that descibes
+disk device.
+The scheme-specific type is
+.Qq Li "!4c616265-6c00-11aa-aa11-00306543ecac"
+for GPT.
+.It Cm apple-raid
+An Apple Mac OS X partition used in a software RAID configuration.
+The scheme-specific type is
+.Qq Li "!52414944-0000-11aa-aa11-00306543ecac"
+for GPT.
+.It Cm apple-raid-offline
+An Apple Mac OS X partition used in a software RAID configuration.
+The scheme-specific type is
+.Qq Li "!52414944-5f4f-11aa-aa11-00306543ecac"
+for GPT.
+.It Cm apple-tv-recovery
+An Apple Mac OS X partition used by Apple TV.
+The scheme-specific type is
+.Qq Li "!5265636f-7665-11aa-aa11-00306543ecac"
+for GPT.
+.It Cm apple-ufs
+An Apple Mac OS X partition that contains a UFS filesystem.
+The scheme-specific types are
+.Qq Li "!168"
+for MBR,
+.Qq Li "!Apple_UNIX_SVR2"
+for APM and
+.Qq Li "!55465300-0000-11aa-aa11-00306543ecac"
+for GPT.
+.It Cm dragonfly-label32
+A DragonFlyBSD partition subdivided into filesystems with a
+.Bx
+disklabel.
+The scheme-specific type is
+.Qq Li "!9d087404-1ca5-11dc-8817-01301bb8a9f5"
+for GPT.
+.It Cm dragonfly-label64
+A DragonFlyBSD partition subdivided into filesystems with a
+disklabel64.
+The scheme-specific type is
+.Qq Li "!3d48ce54-1d16-11dc-8696-01301bb8a9f5"
+for GPT.
+.It Cm dragonfly-legacy
+A legacy partition type used in DragonFlyBSD.
+The scheme-specific type is
+.Qq Li "!bd215ab2-1d16-11dc-8696-01301bb8a9f5"
+for GPT.
+.It Cm dragonfly-ccd
+A DragonFlyBSD partition used with Concatenated Disk driver.
+The scheme-specific type is
+.Qq Li "!dbd5211b-1ca5-11dc-8817-01301bb8a9f5"
+for GPT.
+.It Cm dragonfly-hammer
+A DragonFlyBSD partition that contains a Hammer filesystem.
+The scheme-specific type is
+.Qq Li "!61dc63ac-6e38-11dc-8513-01301bb8a9f5"
+for GPT.
+.It Cm dragonfly-hammer2
+A DragonFlyBSD partition that contains a Hammer2 filesystem.
+The scheme-specific type is
+.Qq Li "!5cbb9ad1-862d-11dc-a94d-01301bb8a9f5"
+for GPT.
+.It Cm dragonfly-swap
+A DragonFlyBSD partition dedicated to swap space.
+The scheme-specific type is
+.Qq Li "!9d58fdbd-1ca5-11dc-8817-01301bb8a9f5"
+for GPT.
+.It Cm dragonfly-ufs
+A DragonFlyBSD partition that contains an UFS1 filesystem.
+The scheme-specific type is
+.Qq Li "!9d94ce7c-1ca5-11dc-8817-01301bb8a9f5"
+for GPT.
+.It Cm dragonfly-vinum
+A DragonFlyBSD partition used with Logical Volume Manager.
+The scheme-specific type is
+.Qq Li "!9dd4478f-1ca5-11dc-8817-01301bb8a9f5"
+for GPT.
+.It Cm ebr
+A partition subdivided into filesystems with a EBR.
+The scheme-specific type is
+.Qq Li "!5"
+for MBR.
+.It Cm fat16
+A partition that contains a FAT16 filesystem.
+The scheme-specific type is
+.Qq Li "!6"
+for MBR.
+.It Cm fat32
+A partition that contains a FAT32 filesystem.
+The scheme-specific type is
+.Qq Li "!11"
+for MBR.
+.It Cm fat32lba
+A partition that contains a FAT32 (LBA) filesystem.
+The scheme-specific type is
+.Qq Li "!12"
+for MBR.
+.It Cm linux-data
+A Linux partition that contains some filesystem with data.
+The scheme-specific types are
+.Qq Li "!131"
+for MBR and
+.Qq Li "!0fc63daf-8483-4772-8e79-3d69d8477de4"
+for GPT.
+.It Cm linux-lvm
+A Linux partition dedicated to Logical Volume Manager.
+The scheme-specific types are
+.Qq Li "!142"
+for MBR and
+.Qq Li "!e6d6d379-f507-44c2-a23c-238f2a3df928"
+for GPT.
+.It Cm linux-raid
+A Linux partition used in a software RAID configuration.
+The scheme-specific types are
+.Qq Li "!253"
+for MBR and
+.Qq Li "!a19d880f-05fc-4d3b-a006-743f0f84911e"
+for GPT.
+.It Cm linux-swap
+A Linux partition dedicated to swap space.
+The scheme-specific types are
+.Qq Li "!130"
+for MBR and
+.Qq Li "!0657fd6d-a4ab-43c4-84e5-0933c84b4f4f"
+for GPT.
+.It Cm mbr
+A partition that is sub-partitioned by a Master Boot Record (MBR).
+This type is known as
+.Qq Li "!024dee41-33e7-11d3-9d69-0008c781f39f"
+by GPT.
+.It Cm ms-basic-data
+A basic data partition (BDP) for Microsoft operating systems.
+In the GPT this type is the equivalent to partition types
+.Cm fat16 , fat32
+and
+.Cm ntfs
+in MBR.
+The scheme-specific type is
+.Qq Li "!ebd0a0a2-b9e5-4433-87c0-68b6b72699c7"
+for GPT.
+.It Cm ms-ldm-data
+A partition that contains Logical Disk Manager (LDM) volumes.
+The scheme-specific types are
+.Qq Li "!66"
+for MBR,
+.Qq Li "!af9b60a0-1431-4f62-bc68-3311714a69ad"
+for GPT.
+.It Cm ms-ldm-metadata
+A partition that contains Logical Disk Manager (LDM) database.
+The scheme-specific type is
+.Qq Li "!5808c8aa-7e8f-42e0-85d2-e1e90434cfb3"
+for GPT.
+.It Cm netbsd-ccd
+A NetBSD partition used with Concatenated Disk driver.
+The scheme-specific type is
+.Qq Li "!2db519c4-b10f-11dc-b99b-0019d1879648"
+for GPT.
+.It Cm netbsd-cgd
+An encrypted NetBSD partition.
+The scheme-specific type is
+.Qq Li "!2db519ec-b10f-11dc-b99b-0019d1879648"
+for GPT.
+.It Cm netbsd-ffs
+A NetBSD partition that contains an UFS filesystem.
+The scheme-specific type is
+.Qq Li "!49f48d5a-b10e-11dc-b99b-0019d1879648"
+for GPT.
+.It Cm netbsd-lfs
+A NetBSD partition that contains an LFS filesystem.
+The scheme-specific type is
+.Qq Li "!49f48d82-b10e-11dc-b99b-0019d1879648"
+for GPT.
+.It Cm netbsd-raid
+A NetBSD partition used in a software RAID configuration.
+The scheme-specific type is
+.Qq Li "!49f48daa-b10e-11dc-b99b-0019d1879648"
+for GPT.
+.It Cm netbsd-swap
+A NetBSD partition dedicated to swap space.
+The scheme-specific type is
+.Qq Li "!49f48d32-b10e-11dc-b99b-0019d1879648"
+for GPT.
+.It Cm ntfs
+A partition that contains a NTFS or exFAT filesystem.
+The scheme-specific type is
+.Qq Li "!7"
+for MBR.
+.It Cm prep-boot
+The system partition dedicated to storing boot loaders on some PowerPC systems,
+notably those made by IBM.
+The scheme-specific types are
+.Qq Li "!65"
+for MBR and
+.Qq Li "!0x9e1a2d38-c612-4316-aa26-8b49521e5a8b"
+for GPT.
+.It Cm vmware-vmfs
+A partition that contains a VMware File System (VMFS).
+The scheme-specific types are
+.Qq Li "!251"
+for MBR and
+.Qq Li "!aa31e02a-400f-11db-9590-000c2911d1b8"
+for GPT.
+.It Cm vmware-vmkdiag
+A partition that contains a VMware diagostic filesystem.
+The scheme-specific types are
+.Qq Li "!252"
+for MBR and
+.Qq Li "!9d275380-40ad-11db-bf97-000c2911d1b8"
+for GPT.
+.It Cm vmware-reserved
+A VMware reserved partition.
+The scheme-specific type is
+.Qq Li "!9198effc-31c0-11db-8f-78-000c2911d1b8"
+for GPT.
+.It Cm vmware-vsanhdr
+A partition claimed by VMware VSAN.
+The scheme-specific type is
+.Qq Li "!381cfccc-7288-11e0-92ee-000c2911d0b2"
+for GPT.
+.El
+.Sh ATTRIBUTES
+The scheme-specific attributes for EBR:
+.Bl -tag -width ".Cm active"
+.It Cm active
+.El
+.Pp
+The scheme-specific attributes for GPT:
+.Bl -tag -width ".Cm bootfailed"
+.It Cm bootme
+When set, the
+.Nm gptboot
+stage 1 boot loader will try to boot the system from this partition.
+Multiple partitions can be marked with the
+.Cm bootme
+attribute.
+See
+.Xr gptboot 8
+for more details.
+.It Cm bootonce
+Setting this attribute automatically sets the
+.Cm bootme
+attribute.
+When set, the
+.Nm gptboot
+stage 1 boot loader will try to boot the system from this partition only once.
+Multiple partitions can be marked with the
+.Cm bootonce
+and
+.Cm bootme
+attribute pairs.
+See
+.Xr gptboot 8
+for more details.
+.It Cm bootfailed
+This attribute should not be manually managed.
+It is managed by the
+.Nm gptboot
+stage 1 boot loader and the
+.Pa /etc/rc.d/gptboot
+start-up script.
+See
+.Xr gptboot 8
+for more details.
+.It Cm lenovofix
+Setting this attribute overwrites the Protective MBR with a new one where
+the 0xee partition is the second, rather than the first record.
+This resolves a BIOS compatibility issue with some Lenovo models including the
+X220, T420, and T520, allowing them to boot from GPT partitioned disks
+without using EFI.
+.El
+.Pp
+The scheme-specific attributes for MBR:
+.Bl -tag -width ".Cm active"
+.It Cm active
+.El
+.Sh BOOTSTRAPPING
+.Fx
+supports several partitioning schemes and each scheme uses different
+bootstrap code.
+The bootstrap code is located in a specific disk area for each partitioning
+scheme, and may vary in size for different schemes.
+.Pp
+Bootstrap code can be separated into two types.
+The first type is embedded in the partitioning scheme's metadata, while the
+second type is located on a specific partition.
+Embedding bootstrap code should only be done with the
+.Cm gpart bootcode
+command with the
+.Fl b Ar bootcode
+option.
+The GEOM PART class knows how to safely embed bootstrap code into
+specific partitioning scheme metadata without causing any damage.
+.Pp
+The Master Boot Record (MBR) uses a 512-byte bootstrap code image, embedded
+into the partition table's metadata area.
+There are two variants of this bootstrap code:
+.Pa /boot/mbr
+and
+.Pa /boot/boot0 .
+.Pa /boot/mbr
+searches for a partition with the
+.Cm active
+attribute (see the
+.Sx ATTRIBUTES
+section) in the partition table.
+Then it runs next bootstrap stage.
+The
+.Pa /boot/boot0
+image contains a boot manager with some additional interactive functions
+for multi-booting from a user-selected partition.
+.Pp
+A BSD disklabel is usually created inside an MBR partition (slice)
+with type
+.Cm freebsd
+(see the
+.Sx "PARTITION TYPES"
+section).
+It uses 8 KB size bootstrap code image
+.Pa /boot/boot ,
+embedded into the partition table's metadata area.
+.Pp
+Both types of bootstrap code are used to boot from the GUID Partition Table.
+First, a protective MBR is embedded into the first disk sector from the
+.Pa /boot/pmbr
+image.
+It searches through the GPT for a
+.Cm freebsd-boot
+partition (see the
+.Sx "PARTITION TYPES"
+section) and runs the next bootstrap stage from it.
+The
+.Cm freebsd-boot
+partition should be smaller than 545 KB.
+It can be located either before or after other
+.Fx
+partitions on the disk.
+There are two variants of bootstrap code to write to this partition:
+.Pa /boot/gptboot
+and
+.Pa /boot/gptzfsboot .
+.Pp
+.Pa /boot/gptboot
+is used to boot from UFS partitions.
+.Cm gptboot
+searches through
+.Cm freebsd-ufs
+partitions in the GPT and selects one to boot based on the
+.Cm bootonce
+and
+.Cm bootme
+attributes.
+If neither attribute is found,
+.Pa /boot/gptboot
+boots from the first
+.Cm freebsd-ufs
+partition.
+.Pa /boot/loader
+.Pq the third bootstrap stage
+is loaded from the first partition that matches these conditions.
+See
+.Xr gptboot 8
+for more information.
+.Pp
+.Pa /boot/gptzfsboot
+is used to boot from ZFS.
+It searches through the GPT for
+.Cm freebsd-zfs
+partitions, trying to detect ZFS pools.
+After all pools are detected,
+.Pa /boot/zfsloader
+is started from the first one found.
+.Pp
+The VTOC8 scheme does not support embedding bootstrap code.
+Instead, the 8 KBytes bootstrap code image
+.Pa /boot/boot1
+should be written with the
+.Cm gpart bootcode
+command with the
+.Fl p Ar bootcode
+option to all sufficiently large VTOC8 partitions.
+To do this the
+.Fl i Ar index
+option could be omitted.
+.Pp
+The APM scheme also does not support embedding bootstrap code.
+Instead, the 800 KBytes bootstrap code image
+.Pa /boot/boot1.hfs
+should be written with the
+.Cm gpart bootcode
+command to a partition of type
+.Cm apple-boot ,
+which should also be 800 KB in size.
+.Sh OPERATIONAL FLAGS
+Actions other than the
+.Cm commit
+and
+.Cm undo
+actions take an optional
+.Fl f Ar flags
+option.
+This option is used to specify action-specific operational flags.
+By default, the
+.Nm
+utility defines the
+.Ql C
+flag so that the action is immediately
+committed.
+The user can specify
+.Dq Fl f Cm x
+to have the action result in a pending change that can later, with
+other pending changes, be committed as a single compound change with
+the
+.Cm commit
+action or reverted with the
+.Cm undo
+action.
+.Sh RECOVERING
+The GEOM PART class supports recovering of partition tables only for GPT.
+The GPT primary metadata is stored at the beginning of the device.
+For redundancy, a secondary
+.Pq backup
+copy of the metadata is stored at the end of the device.
+As a result of having two copies, some corruption of metadata is not
+fatal to the working of GPT.
+When the kernel detects corrupt metadata, it marks this table as corrupt
+and reports the problem.
+.Cm destroy
+and
+.Cm recover
+are the only operations allowed on corrupt tables.
+.Pp
+If one GPT header appears to be corrupt but the other copy remains intact,
+the kernel will log the following:
+.Bd -literal -offset indent
+GEOM: provider: the primary GPT table is corrupt or invalid.
+GEOM: provider: using the secondary instead -- recovery strongly advised.
+.Ed
+.Pp
+or
+.Bd -literal -offset indent
+GEOM: provider: the secondary GPT table is corrupt or invalid.
+GEOM: provider: using the primary only -- recovery suggested.
+.Ed
+.Pp
+Also
+.Nm
+commands such as
+.Cm show , status
+and
+.Cm list
+will report about corrupt tables.
+.Pp
+If the size of the device has changed (e.g.,\& volume expansion) the
+secondary GPT header will no longer be located in the last sector.
+This is not a metadata corruption, but it is dangerous because any
+corruption of the primary GPT will lead to loss of the partition table.
+This problem is reported by the kernel with the message:
+.Bd -literal -offset indent
+GEOM: provider: the secondary GPT header is not in the last LBA.
+.Ed
+.Pp
+This situation can be recovered with the
+.Cm recover
+command.
+This command reconstructs the corrupt metadata using known valid
+metadata and relocates the secondary GPT to the end of the device.
+.Pp
+.Em NOTE :
+The GEOM PART class can detect the same partition table visible through
+different GEOM providers, and some of them will be marked as corrupt.
+Be careful when choosing a provider for recovery.
+If you choose incorrectly you can destroy the metadata of another GEOM class,
+e.g.,\& GEOM MIRROR or GEOM LABEL.
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to control the behavior of the
+.Nm PART
+GEOM class.
+The default value is shown next to each variable.
+.Bl -tag -width indent
+.It Va kern.geom.part.auto_resize: No 1
+This variable controls automatic resize behavior of
+.Nm
+GEOM class.
+When this variable is enable and new size of provider is detected, the schema
+metadata is resized but all changes are not saved to disk, until
+.Cm gpart commit
+is run to confirm changes.
+This behavior is also reported with diagnostic message:
+.Sy "GEOM_PART: (provider) was automatically resized."
+.Sy "Use `gpart commit (provider)` to save changes or `gpart undo (provider)`"
+.Sy "to revert them."
+.It Va kern.geom.part.check_integrity : No 1
+This variable controls the behaviour of metadata integrity checks.
+When integrity checks are enabled, the
+.Nm PART
+GEOM class verifies all generic partition parameters obtained from the
+disk metadata.
+If some inconsistency is detected, the partition table will be
+rejected with a diagnostic message:
+.Sy "GEOM_PART: Integrity check failed (provider, scheme)" .
+.It Va kern.geom.part.ldm.debug : No 0
+Debug level of the Logical Disk Manager (LDM) module.
+This can be set to a number between 0 and 2 inclusive.
+If set to 0 minimal debug information is printed,
+and if set to 2 the maximum amount of debug information is printed.
+.It Va kern.geom.part.ldm.show_mirrors : No 0
+This variable controls how the Logical Disk Manager (LDM) module handles
+mirrored volumes.
+By default mirrored volumes are shown as partitions with type
+.Cm ms-ldm-data
+(see the
+.Sx "PARTITION TYPES"
+section).
+If this variable set to 1 each component of the mirrored volume will be
+present as independent partition.
+.Em NOTE :
+This may break a mirrored volume and lead to data damage.
+.It Va kern.geom.part.mbr.enforce_chs : No 0
+Specify how the Master Boot Record (MBR) module does alignment.
+If this variable is set to a non-zero value, the module will automatically
+recalculate the user-specified offset and size for alignment with the CHS
+geometry.
+Otherwise the values will be left unchanged.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+The examples below assume that the disk's logical block size is 512
+bytes, regardless of its physical block size.
+.Ss GPT
+In this example, we will format
+.Pa ada0
+with the GPT scheme and create boot, swap and root partitions.
+First, we need to create the partition table:
+.Bd -literal -offset indent
+/sbin/gpart create -s GPT ada0
+.Ed
+.Pp
+Next, we install a protective MBR with the first-stage bootstrap code.
+The protective MBR lists a single, bootable partition spanning the
+entire disk, thus allowing non-GPT-aware BIOSes to boot from the disk
+and preventing tools which do not understand the GPT scheme from
+considering the disk to be unformatted.
+.Bd -literal -offset indent
+/sbin/gpart bootcode -b /boot/pmbr ada0
+.Ed
+.Pp
+We then create a dedicated
+.Cm freebsd-boot
+partition to hold the second-stage boot loader, which will load the
+.Fx
+kernel and modules from a UFS or ZFS filesystem.
+This partition must be larger than the bootstrap code
+.Po
+either
+.Pa /boot/gptboot
+for UFS or
+.Pa /boot/gptzfsboot
+for ZFS
+.Pc ,
+but smaller than 545 kB since the first-stage loader will load the
+entire partition into memory during boot, regardless of how much data
+it actually contains.
+We create a 472-block (236 kB) boot partition at offset 40, which is
+the size of the partition table (34 blocks or 17 kB) rounded up to the
+nearest 4 kB boundary.
+.Bd -literal -offset indent
+/sbin/gpart add -b 40 -s 472 -t freebsd-boot ada0
+/sbin/gpart bootcode -p /boot/gptboot -i 1 ada0
+.Ed
+.Pp
+We now create a 4 GB swap partition at the first available offset,
+which is 40 + 472 = 512 blocks (256 kB).
+.Bd -literal -offset indent
+/sbin/gpart add -s 4G -t freebsd-swap ada0
+.Ed
+.Pp
+Aligning the swap partition and all subsequent partitions on a 256 kB
+boundary ensures optimal performance on a wide range of media, from
+plain old disks with 512-byte blocks, through modern
+.Dq advanced format
+disks with 4096-byte physical blocks, to RAID volumes with stripe
+sizes of up to 256 kB.
+.Pp
+Finally, we create and format an 8 GB
+.Cm freebsd-ufs
+partition for the root filesystem, leaving the rest of the slice free
+for additional filesystems:
+.Bd -literal -offset indent
+/sbin/gpart add -s 8G -t freebsd-ufs ada0
+/sbin/newfs -Uj /dev/ada0p3
+.Ed
+.Ss MBR
+In this example, we will format
+.Pa ada0
+with the MBR scheme and create a single partition which we subdivide
+using a traditional
+.Bx
+disklabel.
+.Pp
+First, we create the partition table and a single 64 GB partition,
+then we mark that partition active (bootable) and install the
+first-stage boot loader:
+.Bd -literal -offset indent
+/sbin/gpart create -s MBR ada0
+/sbin/gpart add -t freebsd -s 64G ada0
+/sbin/gpart set -a active -i 1 ada0
+/sbin/gpart bootcode -b /boot/boot0 ada0
+.Ed
+.Pp
+Next, we create a disklabel in that partition
+.Po
+.Dq slice
+in disklabel terminology
+.Pc
+with room for up to 20 partitions:
+.Bd -literal -offset indent
+/sbin/gpart create -s BSD -n 20 ada0s1
+.Ed
+.Pp
+We then create an 8 GB root partition and a 4 GB swap partition:
+.Bd -literal -offset indent
+/sbin/gpart add -t freebsd-ufs -s 8G ada0s1
+/sbin/gpart add -t freebsd-swap -s 4G ada0s1
+.Ed
+.Pp
+Finally, we install the appropriate boot loader for the
+.Bx
+label:
+.Bd -literal -offset indent
+/sbin/gpart bootcode -b /boot/boot ada0s1
+.Ed
+.Ss VTOC8
+.Pp
+Create a VTOC8 scheme on
+.Pa da0 :
+.Bd -literal -offset indent
+/sbin/gpart create -s VTOC8 da0
+.Ed
+.Pp
+Create a 512MB-sized
+.Cm freebsd-ufs
+partition to contain a UFS filesystem from which the system can boot.
+.Bd -literal -offset indent
+/sbin/gpart add -s 512M -t freebsd-ufs da0
+.Ed
+.Pp
+Create a 15GB-sized
+.Cm freebsd-ufs
+partition to contain a UFS filesystem and aligned on 4KB boundaries:
+.Bd -literal -offset indent
+/sbin/gpart add -s 15G -t freebsd-ufs -a 4k da0
+.Ed
+.Pp
+After creating all required partitions, embed bootstrap code into them:
+.Bd -literal -offset indent
+/sbin/gpart bootcode -p /boot/boot1 da0
+.Ed
+.Ss Deleting Partitions and Destroying the Partitioning Scheme
+If a
+.Em "Device busy"
+error is shown when trying to destroy a partition table, remember that
+all of the partitions must be deleted first with the
+.Cm delete
+action.
+In this example,
+.Pa da0
+has three partitions:
+.Bd -literal -offset indent
+/sbin/gpart delete -i 3 da0
+/sbin/gpart delete -i 2 da0
+/sbin/gpart delete -i 1 da0
+/sbin/gpart destroy da0
+.Ed
+.Pp
+Rather than deleting each partition and then destroying the partitioning
+scheme, the
+.Fl F
+option can be given with
+.Cm destroy
+to delete all of the partitions before destroying the partitioning scheme.
+This is equivalent to the previous example:
+.Bd -literal -offset indent
+/sbin/gpart destroy -F da0
+.Ed
+.Ss Backup and Restore
+.Pp
+Create a backup of the partition table from
+.Pa da0 :
+.Bd -literal -offset indent
+/sbin/gpart backup da0 > da0.backup
+.Ed
+.Pp
+Restore the partition table from the backup to
+.Pa da0 :
+.Bd -literal -offset indent
+/sbin/gpart restore -l da0 < /mnt/da0.backup
+.Ed
+.Pp
+Clone the partition table from
+.Pa ada0
+to
+.Pa ada1
+and
+.Pa ada2 :
+.Bd -literal -offset indent
+/sbin/gpart backup ada0 | /sbin/gpart restore -F ada1 ada2
+.Ed
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr boot0cfg 8 ,
+.Xr geom 8 ,
+.Xr gptboot 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 7.0 .
+.Sh AUTHORS
+.An Marcel Moolenaar Aq Mt marcel@FreeBSD.org
diff --git a/lib/geom/raid/Makefile b/lib/geom/raid/Makefile
new file mode 100644
index 000000000000..75d2ac7e25b7
--- /dev/null
+++ b/lib/geom/raid/Makefile
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= raid
+
+LIBADD= md
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/raid/Makefile.depend b/lib/geom/raid/Makefile.depend
new file mode 100644
index 000000000000..7902e1927044
--- /dev/null
+++ b/lib/geom/raid/Makefile.depend
@@ -0,0 +1,20 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ lib/libmd \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/raid/geom_raid.c b/lib/geom/raid/geom_raid.c
new file mode 100644
index 000000000000..efb356f6a7e3
--- /dev/null
+++ b/lib/geom/raid/geom_raid.c
@@ -0,0 +1,94 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <libgeom.h>
+#include <geom/raid/g_raid.h>
+#include <core/geom.h>
+#include <misc/subr.h>
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_RAID_VERSION;
+
+struct g_command class_commands[] = {
+ { "label", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ { 'o', "fmtopt", G_VAL_OPTIONAL, G_TYPE_STRING },
+ { 'S', "size", G_VAL_OPTIONAL, G_TYPE_NUMBER },
+ { 's', "strip", G_VAL_OPTIONAL, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-fv] [-o fmtopt] [-S size] [-s stripsize] format label level prov ..."
+ },
+ { "add", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ { 'S', "size", G_VAL_OPTIONAL, G_TYPE_NUMBER },
+ { 's', "strip", G_VAL_OPTIONAL, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-fv] [-S size] [-s stripsize] name label level"
+ },
+ { "delete", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name [label|num]"
+ },
+ { "insert", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov ..."
+ },
+ { "remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov ..."
+ },
+ { "fail", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov ..."
+ },
+ { "stop", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name"
+ },
+ G_CMD_SENTINEL
+};
+
diff --git a/lib/geom/raid/graid.8 b/lib/geom/raid/graid.8
new file mode 100644
index 000000000000..e11648659495
--- /dev/null
+++ b/lib/geom/raid/graid.8
@@ -0,0 +1,321 @@
+.\" Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd April 4, 2013
+.Dt GRAID 8
+.Os
+.Sh NAME
+.Nm graid
+.Nd "control utility for software RAID devices"
+.Sh SYNOPSIS
+.Nm
+.Cm label
+.Op Fl f
+.Op Fl o Ar fmtopt
+.Op Fl S Ar size
+.Op Fl s Ar strip
+.Ar format
+.Ar label
+.Ar level
+.Ar prov ...
+.Nm
+.Cm add
+.Op Fl f
+.Op Fl S Ar size
+.Op Fl s Ar strip
+.Ar name
+.Ar label
+.Ar level
+.Nm
+.Cm delete
+.Op Fl f
+.Ar name
+.Op Ar label | Ar num
+.Nm
+.Cm insert
+.Ar name
+.Ar prov ...
+.Nm
+.Cm remove
+.Ar name
+.Ar prov ...
+.Nm
+.Cm fail
+.Ar name
+.Ar prov ...
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used to manage software RAID configurations, supported by the
+GEOM RAID class.
+GEOM RAID class uses on-disk metadata to provide access to software-RAID
+volumes defined by different RAID BIOSes.
+Depending on RAID BIOS type and its metadata format, different subsets of
+configurations and features are supported.
+To allow booting from RAID volume, the metadata format should match the
+RAID BIOS type and its capabilities.
+To guarantee that these match, it is recommended to create volumes via the
+RAID BIOS interface, while experienced users are free to do it using this
+utility.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm destroy"
+.It Cm label
+Create an array with single volume.
+The
+.Ar format
+argument specifies the on-disk metadata format to use for this array,
+such as "Intel".
+The
+.Ar label
+argument specifies the label of the created volume.
+The
+.Ar level
+argument specifies the RAID level of the created volume, such as:
+"RAID0", "RAID1", etc.
+The subsequent list enumerates providers to use as array components.
+The special name "NONE" can be used to reserve space for absent disks.
+The order of components can be important, depending on specific RAID level
+and metadata format.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl s Ar strip"
+.It Fl f
+Enforce specified configuration creation if it is officially unsupported,
+but technically can be created.
+.It Fl o Ar fmtopt
+Specifies metadata format options.
+.It Fl S Ar size
+Use
+.Ar size
+bytes on each component for this volume.
+Should be used if several volumes per array are planned, or if smaller
+components going to be inserted later.
+Defaults to size of the smallest component.
+.It Fl s Ar strip
+Specifies strip size in bytes.
+Defaults to 131072.
+.El
+.It Cm add
+Create another volume on the existing array.
+The
+.Ar name
+argument is the name of the existing array, reported by label command.
+The rest of arguments are the same as for the label command.
+.It Cm delete
+Delete volume(s) from the existing array.
+When the last volume is deleted, the array is also deleted and its metadata
+erased.
+The
+.Ar name
+argument is the name of existing array.
+Optional
+.Ar label
+or
+.Ar num
+arguments allow specifying volume for deletion.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Delete volume(s) even if it is still open.
+.El
+.It Cm insert
+Insert specified provider(s) into specified array instead of the first missing
+or failed components.
+If there are no such components, mark disk(s) as spare.
+.It Cm remove
+Remove the specified provider(s) from the specified array and erase metadata.
+If there are spare disks present, the removed disk(s) will be replaced by
+spares.
+.It Cm fail
+Mark the given disks(s) as failed, removing from active use unless absolutely
+necessary due to exhausted redundancy.
+If there are spare disks present - failed disk(s) will be replaced with one
+of them.
+.It Cm stop
+Stop the given array.
+The metadata will not be erased.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Stop the given array even if some of its volumes are opened.
+.El
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl v"
+.It Fl v
+Be more verbose.
+.El
+.Sh SUPPORTED METADATA FORMATS
+The GEOM RAID class follows a modular design, allowing different metadata
+formats to be used.
+Support is currently implemented for the following formats:
+.Bl -tag -width "Intel"
+.It DDF
+The format defined by the SNIA Common RAID Disk Data Format v2.0 specification.
+Used by some Adaptec RAID BIOSes and some hardware RAID controllers.
+Because of high format flexibility different implementations support
+different set of features and have different on-disk metadata layouts.
+To provide compatibility, the GEOM RAID class mimics capabilities
+of the first detected DDF array.
+Respecting that, it may support different number of disks per volume,
+volumes per array, partitions per disk, etc.
+The following configurations are supported: RAID0 (2+ disks), RAID1 (2+ disks),
+RAID1E (3+ disks), RAID3 (3+ disks), RAID4 (3+ disks), RAID5 (3+ disks),
+RAID5E (4+ disks), RAID5EE (4+ disks), RAID5R (3+ disks), RAID6 (4+ disks),
+RAIDMDF (4+ disks), RAID10 (4+ disks), SINGLE (1 disk), CONCAT (2+ disks).
+.Pp
+Format supports two options "BE" and "LE", that mean big-endian byte order
+defined by specification (default) and little-endian used by some Adaptec
+controllers.
+.It Intel
+The format used by Intel RAID BIOS.
+Supports up to two volumes per array.
+Supports configurations: RAID0 (2+ disks), RAID1 (2 disks),
+RAID5 (3+ disks), RAID10 (4 disks).
+Configurations not supported by Intel RAID BIOS, but enforceable on your own
+risk: RAID1 (3+ disks), RAID1E (3+ disks), RAID10 (6+ disks).
+.It JMicron
+The format used by JMicron RAID BIOS.
+Supports one volume per array.
+Supports configurations: RAID0 (2+ disks), RAID1 (2 disks),
+RAID10 (4 disks), CONCAT (2+ disks).
+Configurations not supported by JMicron RAID BIOS, but enforceable on your own
+risk: RAID1 (3+ disks), RAID1E (3+ disks), RAID10 (6+ disks), RAID5 (3+ disks).
+.It NVIDIA
+The format used by NVIDIA MediaShield RAID BIOS.
+Supports one volume per array.
+Supports configurations: RAID0 (2+ disks), RAID1 (2 disks),
+RAID5 (3+ disks), RAID10 (4+ disks), SINGLE (1 disk), CONCAT (2+ disks).
+Configurations not supported by NVIDIA MediaShield RAID BIOS, but enforceable
+on your own risk: RAID1 (3+ disks).
+.It Promise
+The format used by Promise and AMD/ATI RAID BIOSes.
+Supports multiple volumes per array.
+Each disk can be split to be used by up to two arbitrary volumes.
+Supports configurations: RAID0 (2+ disks), RAID1 (2 disks),
+RAID5 (3+ disks), RAID10 (4 disks), SINGLE (1 disk), CONCAT (2+ disks).
+Configurations not supported by RAID BIOSes, but enforceable on your
+own risk: RAID1 (3+ disks), RAID10 (6+ disks).
+.It SiI
+The format used by SiliconImage RAID BIOS.
+Supports one volume per array.
+Supports configurations: RAID0 (2+ disks), RAID1 (2 disks),
+RAID5 (3+ disks), RAID10 (4 disks), SINGLE (1 disk), CONCAT (2+ disks).
+Configurations not supported by SiliconImage RAID BIOS, but enforceable on your
+own risk: RAID1 (3+ disks), RAID10 (6+ disks).
+.El
+.Sh SUPPORTED RAID LEVELS
+The GEOM RAID class follows a modular design, allowing different RAID levels
+to be used.
+Full support for the following RAID levels is currently implemented:
+RAID0, RAID1, RAID1E, RAID10, SINGLE, CONCAT.
+The following RAID levels supported as read-only for volumes in optimal
+state (without using redundancy): RAID4, RAID5, RAID5E, RAID5EE, RAID5R,
+RAID6, RAIDMDF.
+.Sh RAID LEVEL MIGRATION
+The GEOM RAID class has no support for RAID level migration, allowed by some
+metadata formats.
+If you started migration using BIOS or in some other way, make sure to
+complete it there.
+Do not run GEOM RAID class on migrating volumes under pain of possible data
+corruption!
+.Sh 2TiB BARRIERS
+NVIDIA metadata format does not support volumes above 2TiB.
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variable can be used to control the behavior of the
+.Nm RAID
+GEOM class.
+.Bl -tag -width indent
+.It Va kern.geom.raid.aggressive_spare : No 0
+Use any disks without metadata connected to controllers of the vendor
+matching to volume metadata format as spare.
+Use it with much care to not lose data if connecting unrelated disk!
+.It Va kern.geom.raid.clean_time : No 5
+Mark volume as clean when idle for the specified number of seconds.
+.It Va kern.geom.raid.debug : No 0
+Debug level of the
+.Nm RAID
+GEOM class.
+.It Va kern.geom.raid.enable : No 1
+Enable on-disk metadata taste.
+.It Va kern.geom.raid.idle_threshold : No 1000000
+Time in microseconds to consider a volume idle for rebuild purposes.
+.It Va kern.geom.raid.name_format : No 0
+Providers name format: 0 -- raid/r{num}, 1 -- raid/{label}.
+.It Va kern.geom.raid.read_err_thresh : No 10
+Number of read errors equated to disk failure.
+Write errors are always considered as disk failures.
+.It Va kern.geom.raid.start_timeout : No 30
+Time to wait for missing array components on startup.
+.It Va kern.geom.raid. Ns Ar X Ns Va .enable : No 1
+Enable taste for specific metadata or transformation module.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and non-zero if the command fails.
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr geom 8 ,
+.Xr gvinum 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 9.0 .
+.Sh AUTHORS
+.An Alexander Motin Aq Mt mav@FreeBSD.org
+.An M. Warner Losh Aq Mt imp@FreeBSD.org
diff --git a/lib/geom/raid3/Makefile b/lib/geom/raid3/Makefile
new file mode 100644
index 000000000000..2fdf4e8f8167
--- /dev/null
+++ b/lib/geom/raid3/Makefile
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= raid3
+
+LIBADD= md
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/raid3/Makefile.depend b/lib/geom/raid3/Makefile.depend
new file mode 100644
index 000000000000..7902e1927044
--- /dev/null
+++ b/lib/geom/raid3/Makefile.depend
@@ -0,0 +1,20 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ lib/libmd \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/raid3/geom_raid3.c b/lib/geom/raid3/geom_raid3.c
new file mode 100644
index 000000000000..17d3187d5cf4
--- /dev/null
+++ b/lib/geom/raid3/geom_raid3.c
@@ -0,0 +1,338 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <libgeom.h>
+#include <geom/raid3/g_raid3.h>
+#include <core/geom.h>
+#include <misc/subr.h>
+
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_RAID3_VERSION;
+
+static void raid3_main(struct gctl_req *req, unsigned f);
+static void raid3_clear(struct gctl_req *req);
+static void raid3_dump(struct gctl_req *req);
+static void raid3_label(struct gctl_req *req);
+
+struct g_command class_commands[] = {
+ { "clear", G_FLAG_VERBOSE, raid3_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ { "configure", G_FLAG_VERBOSE, NULL,
+ {
+ { 'a', "autosync", NULL, G_TYPE_BOOL },
+ { 'd', "dynamic", NULL, G_TYPE_BOOL },
+ { 'f', "failsync", NULL, G_TYPE_BOOL },
+ { 'F', "nofailsync", NULL, G_TYPE_BOOL },
+ { 'h', "hardcode", NULL, G_TYPE_BOOL },
+ { 'n', "noautosync", NULL, G_TYPE_BOOL },
+ { 'r', "round_robin", NULL, G_TYPE_BOOL },
+ { 'R', "noround_robin", NULL, G_TYPE_BOOL },
+ { 'w', "verify", NULL, G_TYPE_BOOL },
+ { 'W', "noverify", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-adfFhnrRvwW] name"
+ },
+ { "dump", 0, raid3_main, G_NULL_OPTS,
+ "prov ..."
+ },
+ { "insert", G_FLAG_VERBOSE, NULL,
+ {
+ { 'h', "hardcode", NULL, G_TYPE_BOOL },
+ { 'n', "number", G_VAL_OPTIONAL, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-hv] <-n number> name prov"
+ },
+ { "label", G_FLAG_VERBOSE, raid3_main,
+ {
+ { 'h', "hardcode", NULL, G_TYPE_BOOL },
+ { 'F', "nofailsync", NULL, G_TYPE_BOOL },
+ { 'n', "noautosync", NULL, G_TYPE_BOOL },
+ { 'r', "round_robin", NULL, G_TYPE_BOOL },
+ { 's', "sectorsize", "0", G_TYPE_NUMBER },
+ { 'w', "verify", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-hFnrvw] [-s blocksize] name prov prov prov ..."
+ },
+ { "rebuild", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name prov"
+ },
+ { "remove", G_FLAG_VERBOSE, NULL,
+ {
+ { 'n', "number", NULL, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-v] <-n number> name"
+ },
+ { "stop", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+static void
+raid3_main(struct gctl_req *req, unsigned flags)
+{
+ const char *name;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0)
+ raid3_label(req);
+ else if (strcmp(name, "clear") == 0)
+ raid3_clear(req);
+ else if (strcmp(name, "dump") == 0)
+ raid3_dump(req);
+ else
+ gctl_error(req, "Unknown command: %s.", name);
+}
+
+static void
+raid3_label(struct gctl_req *req)
+{
+ struct g_raid3_metadata md;
+ u_char sector[512];
+ const char *str;
+ unsigned sectorsize, ssize;
+ off_t mediasize, msize;
+ int hardcode, round_robin, verify;
+ int error, i, nargs;
+
+ bzero(sector, sizeof(sector));
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 4) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ if (bitcount32(nargs - 2) != 1) {
+ gctl_error(req, "Invalid number of components.");
+ return;
+ }
+
+ strlcpy(md.md_magic, G_RAID3_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_RAID3_VERSION;
+ str = gctl_get_ascii(req, "arg0");
+ strlcpy(md.md_name, str, sizeof(md.md_name));
+ md.md_id = arc4random();
+ md.md_all = nargs - 1;
+ md.md_mflags = 0;
+ md.md_dflags = 0;
+ md.md_genid = 0;
+ md.md_syncid = 1;
+ md.md_sync_offset = 0;
+ if (gctl_get_int(req, "noautosync"))
+ md.md_mflags |= G_RAID3_DEVICE_FLAG_NOAUTOSYNC;
+ if (gctl_get_int(req, "nofailsync"))
+ md.md_mflags |= G_RAID3_DEVICE_FLAG_NOFAILSYNC;
+ round_robin = gctl_get_int(req, "round_robin");
+ if (round_robin)
+ md.md_mflags |= G_RAID3_DEVICE_FLAG_ROUND_ROBIN;
+ verify = gctl_get_int(req, "verify");
+ if (verify)
+ md.md_mflags |= G_RAID3_DEVICE_FLAG_VERIFY;
+ if (round_robin && verify) {
+ gctl_error(req, "Both '%c' and '%c' options given.", 'r', 'w');
+ return;
+ }
+ hardcode = gctl_get_int(req, "hardcode");
+
+ /*
+ * Calculate sectorsize by finding least common multiple from
+ * sectorsizes of every disk and find the smallest mediasize.
+ */
+ mediasize = 0;
+ sectorsize = gctl_get_intmax(req, "sectorsize");
+ for (i = 1; i < nargs; i++) {
+ str = gctl_get_ascii(req, "arg%d", i);
+ msize = g_get_mediasize(str);
+ ssize = g_get_sectorsize(str);
+ if (msize == 0 || ssize == 0) {
+ gctl_error(req, "Can't get informations about %s: %s.",
+ str, strerror(errno));
+ return;
+ }
+ msize -= ssize;
+ if (mediasize == 0 || (mediasize > 0 && msize < mediasize))
+ mediasize = msize;
+ if (sectorsize == 0)
+ sectorsize = ssize;
+ else
+ sectorsize = g_lcm(sectorsize, ssize);
+ }
+ md.md_mediasize = mediasize * (nargs - 2);
+ md.md_sectorsize = sectorsize * (nargs - 2);
+ md.md_mediasize -= (md.md_mediasize % md.md_sectorsize);
+
+ if (md.md_sectorsize > MAXPHYS) {
+ gctl_error(req, "The blocksize is too big.");
+ return;
+ }
+
+ /*
+ * Clear last sector first, to spoil all components if device exists.
+ */
+ for (i = 1; i < nargs; i++) {
+ str = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(str, NULL);
+ if (error != 0) {
+ gctl_error(req, "Can't store metadata on %s: %s.", str,
+ strerror(error));
+ return;
+ }
+ }
+
+ /*
+ * Ok, store metadata (use disk number as priority).
+ */
+ for (i = 1; i < nargs; i++) {
+ str = gctl_get_ascii(req, "arg%d", i);
+ msize = g_get_mediasize(str);
+ ssize = g_get_sectorsize(str);
+ if (mediasize < msize - ssize) {
+ fprintf(stderr,
+ "warning: %s: only %jd bytes from %jd bytes used.\n",
+ str, (intmax_t)mediasize, (intmax_t)(msize - ssize));
+ }
+
+ md.md_no = i - 1;
+ md.md_provsize = msize;
+ if (!hardcode)
+ bzero(md.md_provider, sizeof(md.md_provider));
+ else {
+ if (strncmp(str, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ str += sizeof(_PATH_DEV) - 1;
+ strlcpy(md.md_provider, str, sizeof(md.md_provider));
+ }
+ if (verify && md.md_no == md.md_all - 1) {
+ /*
+ * In "verify" mode, force synchronization of parity
+ * component on start.
+ */
+ md.md_syncid = 0;
+ }
+ raid3_metadata_encode(&md, sector);
+ error = g_metadata_store(str, sector, sizeof(sector));
+ if (error != 0) {
+ fprintf(stderr, "Can't store metadata on %s: %s.\n",
+ str, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata value stored on %s.\n", str);
+ }
+}
+
+static void
+raid3_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_RAID3_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+static void
+raid3_dump(struct gctl_req *req)
+{
+ struct g_raid3_metadata md, tmpmd;
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd),
+ G_RAID3_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't read metadata from %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (raid3_metadata_decode((u_char *)&tmpmd, &md) != 0) {
+ fprintf(stderr, "MD5 hash mismatch for %s, skipping.\n",
+ name);
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ printf("Metadata on %s:\n", name);
+ raid3_metadata_dump(&md);
+ printf("\n");
+ }
+}
diff --git a/lib/geom/raid3/graid3.8 b/lib/geom/raid3/graid3.8
new file mode 100644
index 000000000000..426c94d17521
--- /dev/null
+++ b/lib/geom/raid3/graid3.8
@@ -0,0 +1,257 @@
+.\" Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd January 15, 2012
+.Dt GRAID3 8
+.Os
+.Sh NAME
+.Nm graid3
+.Nd "control utility for RAID3 devices"
+.Sh SYNOPSIS
+.Nm
+.Cm label
+.Op Fl Fhnrvw
+.Op Fl s Ar blocksize
+.Ar name
+.Ar prov prov prov ...
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm configure
+.Op Fl adfFhnrRvwW
+.Ar name
+.Nm
+.Cm rebuild
+.Op Fl v
+.Ar name
+.Ar prov
+.Nm
+.Cm insert
+.Op Fl hv
+.Op Fl n Ar number
+.Ar name
+.Ar prov
+.Nm
+.Cm remove
+.Op Fl v
+.Fl n Ar number
+.Ar name
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for RAID3 array configuration.
+After a device is created, all components are detected and configured
+automatically.
+All operations such as failure detection, stale component detection, rebuild
+of stale components, etc.\& are also done automatically.
+The
+.Nm
+utility uses on-disk metadata (the provider's last sector) to store all needed
+information.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm configure"
+.It Cm label
+Create a RAID3 device.
+The last given component will contain parity data, whilst the others
+will all contain regular data.
+The number of components must be equal to 3, 5, 9, 17, etc.\& (2^n + 1).
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl h"
+.It Fl F
+Do not synchronize after a power failure or system crash.
+Assumes device is in consistent state.
+.It Fl h
+Hardcode providers' names in metadata.
+.It Fl n
+Turn off autosynchronization of stale components.
+.It Fl r
+Use parity component for reading in round-robin fashion.
+Without this option the parity component is not used at all for reading operations
+when the device is in a complete state.
+With this option specified random I/O read operations are even 40% faster,
+but sequential reads are slower.
+One cannot use this option if the
+.Fl w
+option is also specified.
+.It Fl s
+Manually specify array block size. Block size will be set equal to least
+common multiple of all component's sector sizes and specified value.
+Note that array sector size calculated as multiple of block size and number
+of regular data components. Big values may decrease performance and compatibility,
+as all I/O requests have to be multiple of sector size.
+.It Fl w
+Use verify reading feature.
+When reading from a device in a complete state, also read data from the parity component
+and verify the data by comparing XORed regular data with parity data.
+If verification fails, an
+.Er EIO
+error is returned and the value of the
+.Va kern.geom.raid3.stat.parity_mismatch
+sysctl is increased.
+One cannot use this option if the
+.Fl r
+option is also specified.
+.El
+.It Cm clear
+Clear metadata on the given providers.
+.It Cm configure
+Configure the given device.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl a"
+.It Fl a
+Turn on autosynchronization of stale components.
+.It Fl d
+Do not hardcode providers' names in metadata.
+.It Fl f
+Synchronize device after a power failure or system crash.
+.It Fl F
+Do not synchronize after a power failure or system crash.
+Assumes device is in consistent state.
+.It Fl h
+Hardcode providers' names in metadata.
+.It Fl n
+Turn off autosynchronization of stale components.
+.It Fl r
+Turn on round-robin reading.
+.It Fl R
+Turn off round-robin reading.
+.It Fl w
+Turn on verify reading.
+.It Fl W
+Turn off verify reading.
+.El
+.It Cm rebuild
+Rebuild the given component forcibly.
+If autosynchronization was not turned off for the given device, this command
+should be unnecessary.
+.It Cm insert
+Add the given component to the existing array, if one of the components was
+removed previously with the
+.Cm remove
+command or if one component is missing and will not be connected again.
+If no number is given, new component will be added instead of first missed
+component.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl h"
+.It Fl h
+Hardcode providers' names in metadata.
+.El
+.It Cm remove
+Remove the given component from the given array and clear metadata on it.
+.It Cm stop
+Stop the given arrays.
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Stop the given array even if it is opened.
+.El
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options include:
+.Bl -tag -width ".Fl v"
+.It Fl v
+Be more verbose.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+Use 3 disks to setup a RAID3 array (with the round-robin reading feature).
+Create a file system, mount it, then unmount it and stop device:
+.Bd -literal -offset indent
+graid3 label -v -r data da0 da1 da2
+newfs /dev/raid3/data
+mount /dev/raid3/data /mnt
+\&...
+umount /mnt
+graid3 stop data
+graid3 unload
+.Ed
+.Pp
+Create a RAID3 array, but do not use the automatic synchronization feature.
+Rebuild parity component:
+.Bd -literal -offset indent
+graid3 label -n data da0 da1 da2
+graid3 rebuild data da2
+.Ed
+.Pp
+Replace one data disk with a brand new one:
+.Bd -literal -offset indent
+graid3 remove -n 0 data
+graid3 insert -n 0 data da5
+.Ed
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr geom 8 ,
+.Xr gvinum 8 ,
+.Xr mount 8 ,
+.Xr newfs 8 ,
+.Xr umount 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 5.3 .
+.Sh AUTHORS
+.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org
+.Sh BUGS
+There should be a section with an implementation description.
+.Pp
+Documentation for sysctls
+.Va kern.geom.raid3.*
+is missing.
diff --git a/lib/geom/sched/Makefile b/lib/geom/sched/Makefile
new file mode 100644
index 000000000000..ad3f5b131a18
--- /dev/null
+++ b/lib/geom/sched/Makefile
@@ -0,0 +1,9 @@
+# GEOM_LIBRARY_PATH
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= sched
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/sched/Makefile.depend b/lib/geom/sched/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/sched/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/sched/geom_sched.c b/lib/geom/sched/geom_sched.c
new file mode 100644
index 000000000000..7b7b281d7d55
--- /dev/null
+++ b/lib/geom/sched/geom_sched.c
@@ -0,0 +1,128 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2009 Fabio Checconi
+ * Copyright (c) 2010 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Id$
+ * $FreeBSD$
+ *
+ * This file implements the userspace library used by the 'geom'
+ * command to load and manipulate disk schedulers.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/linker.h>
+#include <sys/module.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <libgeom.h>
+
+#include "core/geom.h"
+#include "misc/subr.h"
+
+#define G_SCHED_VERSION 0
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_SCHED_VERSION;
+
+/*
+ * storage for parameters used by this geom class.
+ * Right now only the scheduler name is used.
+ */
+#define GSCHED_ALGO "rr" /* default scheduler */
+
+/*
+ * Adapt to differences in geom library.
+ * in V1 struct g_command misses gc_argname, eld, and G_BOOL is undefined
+ */
+#if G_LIB_VERSION <= 1
+#define G_TYPE_BOOL G_TYPE_NUMBER
+#endif
+#if G_LIB_VERSION >= 3 && G_LIB_VERSION <= 4
+#define G_ARGNAME NULL,
+#else
+#define G_ARGNAME
+#endif
+
+static void
+gcmd_createinsert(struct gctl_req *req, unsigned flags __unused)
+{
+ const char *reqalgo;
+ char name[64];
+
+ if (gctl_has_param(req, "algo"))
+ reqalgo = gctl_get_ascii(req, "algo");
+ else
+ reqalgo = GSCHED_ALGO;
+
+ snprintf(name, sizeof(name), "gsched_%s", reqalgo);
+ /*
+ * Do not complain about errors here, gctl_issue()
+ * will fail anyway.
+ */
+ if (modfind(name) < 0)
+ kldload(name);
+ gctl_issue(req);
+}
+
+struct g_command class_commands[] = {
+ { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, gcmd_createinsert,
+ {
+ { 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
+ G_OPT_SENTINEL
+ },
+ G_ARGNAME "[-v] [-a algorithm_name] dev ..."
+ },
+ { "insert", G_FLAG_VERBOSE | G_FLAG_LOADKLD, gcmd_createinsert,
+ {
+ { 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
+ G_OPT_SENTINEL
+ },
+ G_ARGNAME "[-v] [-a algorithm_name] dev ..."
+ },
+ { "configure", G_FLAG_VERBOSE, NULL,
+ {
+ { 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
+ G_OPT_SENTINEL
+ },
+ G_ARGNAME "[-v] [-a algorithm_name] prov ..."
+ },
+ { "destroy", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ G_ARGNAME "[-fv] prov ..."
+ },
+ { "reset", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ G_ARGNAME "[-v] prov ..."
+ },
+ G_CMD_SENTINEL
+};
diff --git a/lib/geom/sched/gsched.8 b/lib/geom/sched/gsched.8
new file mode 100644
index 000000000000..facb5c18440f
--- /dev/null
+++ b/lib/geom/sched/gsched.8
@@ -0,0 +1,162 @@
+.\" Copyright (c) 2009-2010 Fabio Checconi
+.\" Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd July 26, 2012
+.Dt GSCHED 8
+.Os
+.Sh NAME
+.Nm gsched
+.Nd "control utility for disk scheduler GEOM class"
+.Sh SYNOPSIS
+.Nm
+.Cm create
+.Op Fl v
+.Op Fl a Ar algorithm
+.Ar provider ...
+.Nm
+.Cm insert
+.Op Fl v
+.Op Fl a Ar algorithm
+.Ar provider ...
+.Nm
+.Cm configure
+.Op Fl v
+.Op Fl a Ar algorithm
+.Ar node ...
+.Nm
+.Cm destroy
+.Op Fl fv
+.Ar node ...
+.Nm
+.Cm reset
+.Op Fl v
+.Ar node ...
+.Nm
+.Cm { list | status | load | unload }
+.Sh DESCRIPTION
+The
+.Nm
+utility (also callable as
+.Nm geom sched ... )
+changes the scheduling policy of the requests going to a provider.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm configure"
+.It Cm create
+Create a new provider and geom node using the specified scheduling algorithm.
+.Ar algorithm
+is the name of the scheduling algorithm used for the provider.
+Available algorithms include:
+.Ar rr ,
+which implements anticipatory scheduling with round robin service
+among clients;
+.Ar as ,
+which implements a simple form of anticipatory scheduling with
+no per-client queue.
+.Pp
+If the operation succeeds, the new provider should appear with name
+.Pa /dev/ Ns Ao Ar dev Ac Ns Pa .sched. .
+The kernel module
+.Pa geom_sched.ko
+will be loaded if it is not loaded already.
+.It Cm insert
+Operates as "create", but the insertion is "transparent",
+i.e. the existing provider is rerouted to the newly created geom,
+which in turn forwards requests to the existing geom.
+This operation allows one to start/stop a scheduling service
+on an already existing provider.
+.Pp
+A subsequent "destroy" will remove the newly created geom and
+hook the provider back to the original geom.
+.It Cm configure
+Configure existing scheduling provider. It supports the same options
+as the
+.Nm create
+command.
+.It Cm destroy
+Destroy the geom specified in the parameter.
+.It Cm reset
+Do nothing.
+.It Cm list | status | load | unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Force the removal of the specified provider.
+.It Fl v
+Be more verbose.
+.El
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to control the behavior of the
+.Nm SCHED
+GEOM class.
+The default value is shown next to each variable.
+.Bl -tag -width indent
+.It Va kern.geom.sched.debug : No 0
+Debug level of the
+.Nm SCHED
+GEOM class.
+This can be set to a number between 0 and 2 inclusive.
+If set to 0 minimal debug information is printed, and if set to 2 the
+maximum amount of debug information is printed.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+The following example shows how to create a scheduling provider for disk
+.Pa /dev/ada0 ,
+and how to destroy it.
+.Bd -literal -offset indent
+# Load the geom_sched module:
+kldload geom_sched
+# Load some scheduler classes used by geom_sched:
+kldload gsched_rr
+# Configure device ada0 to use scheduler "rr":
+geom sched insert -a rr ada0
+# Now provider ada0 uses the "rr" algorithm;
+# the new geom is ada0.sched.
+# Remove the scheduler on the device:
+geom sched destroy -v ada0.sched.
+.Ed
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr geom 8
+.Sh HISTORY
+The
+.Nm
+utility first appeared in
+.Fx 8.1 .
+.Sh AUTHORS
+.An Fabio Checconi Aq Mt fabio@FreeBSD.org
+.An Luigi Rizzo Aq Mt luigi@FreeBSD.org
diff --git a/lib/geom/shsec/Makefile b/lib/geom/shsec/Makefile
new file mode 100644
index 000000000000..f86df6c9af42
--- /dev/null
+++ b/lib/geom/shsec/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= shsec
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/shsec/Makefile.depend b/lib/geom/shsec/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/shsec/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/shsec/geom_shsec.c b/lib/geom/shsec/geom_shsec.c
new file mode 100644
index 000000000000..308a53b7f9d3
--- /dev/null
+++ b/lib/geom/shsec/geom_shsec.c
@@ -0,0 +1,262 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <libgeom.h>
+#include <geom/shsec/g_shsec.h>
+
+#include "core/geom.h"
+#include "misc/subr.h"
+
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_SHSEC_VERSION;
+
+static void shsec_main(struct gctl_req *req, unsigned flags);
+static void shsec_clear(struct gctl_req *req);
+static void shsec_dump(struct gctl_req *req);
+static void shsec_label(struct gctl_req *req);
+
+struct g_command class_commands[] = {
+ { "clear", G_FLAG_VERBOSE, shsec_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ { "dump", 0, shsec_main, G_NULL_OPTS,
+ "prov ..."
+ },
+ { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, shsec_main,
+ {
+ { 'h', "hardcode", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-hv] name prov prov ..."
+ },
+ { "stop", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+static void
+shsec_main(struct gctl_req *req, unsigned flags)
+{
+ const char *name;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0)
+ shsec_label(req);
+ else if (strcmp(name, "clear") == 0)
+ shsec_clear(req);
+ else if (strcmp(name, "dump") == 0)
+ shsec_dump(req);
+ else
+ gctl_error(req, "Unknown command: %s.", name);
+}
+
+static void
+shsec_label(struct gctl_req *req)
+{
+ struct g_shsec_metadata md;
+ off_t compsize, msize;
+ u_char sector[512];
+ unsigned ssize, secsize;
+ const char *name;
+ int error, i, nargs, hardcode;
+
+ bzero(sector, sizeof(sector));
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs <= 2) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ hardcode = gctl_get_int(req, "hardcode");
+
+ /*
+ * Clear last sector first to spoil all components if device exists.
+ */
+ compsize = 0;
+ secsize = 0;
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ msize = g_get_mediasize(name);
+ ssize = g_get_sectorsize(name);
+ if (msize == 0 || ssize == 0) {
+ gctl_error(req, "Can't get informations about %s: %s.",
+ name, strerror(errno));
+ return;
+ }
+ msize -= ssize;
+ if (compsize == 0 || (compsize > 0 && msize < compsize))
+ compsize = msize;
+ if (secsize == 0)
+ secsize = ssize;
+ else
+ secsize = g_lcm(secsize, ssize);
+
+ error = g_metadata_clear(name, NULL);
+ if (error != 0) {
+ gctl_error(req, "Can't store metadata on %s: %s.", name,
+ strerror(error));
+ return;
+ }
+ }
+
+ strlcpy(md.md_magic, G_SHSEC_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_SHSEC_VERSION;
+ name = gctl_get_ascii(req, "arg0");
+ strlcpy(md.md_name, name, sizeof(md.md_name));
+ md.md_id = arc4random();
+ md.md_all = nargs - 1;
+
+ /*
+ * Ok, store metadata.
+ */
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ msize = g_get_mediasize(name);
+ ssize = g_get_sectorsize(name);
+ if (compsize < msize - ssize) {
+ fprintf(stderr,
+ "warning: %s: only %jd bytes from %jd bytes used.\n",
+ name, (intmax_t)compsize, (intmax_t)(msize - ssize));
+ }
+
+ md.md_no = i - 1;
+ md.md_provsize = msize;
+ if (!hardcode)
+ bzero(md.md_provider, sizeof(md.md_provider));
+ else {
+ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ name += sizeof(_PATH_DEV) - 1;
+ strlcpy(md.md_provider, name, sizeof(md.md_provider));
+ }
+ shsec_metadata_encode(&md, sector);
+ error = g_metadata_store(name, sector, sizeof(sector));
+ if (error != 0) {
+ fprintf(stderr, "Can't store metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata value stored on %s.\n", name);
+ }
+}
+
+static void
+shsec_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_SHSEC_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+static void
+shsec_metadata_dump(const struct g_shsec_metadata *md)
+{
+
+ printf(" Magic string: %s\n", md->md_magic);
+ printf(" Metadata version: %u\n", (u_int)md->md_version);
+ printf(" Device name: %s\n", md->md_name);
+ printf(" Device ID: %u\n", (u_int)md->md_id);
+ printf(" Disk number: %u\n", (u_int)md->md_no);
+ printf("Total number of disks: %u\n", (u_int)md->md_all);
+ printf(" Hardcoded provider: %s\n", md->md_provider);
+}
+
+static void
+shsec_dump(struct gctl_req *req)
+{
+ struct g_shsec_metadata md, tmpmd;
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd),
+ G_SHSEC_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't read metadata from %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ shsec_metadata_decode((u_char *)&tmpmd, &md);
+ printf("Metadata on %s:\n", name);
+ shsec_metadata_dump(&md);
+ printf("\n");
+ }
+}
diff --git a/lib/geom/shsec/gshsec.8 b/lib/geom/shsec/gshsec.8
new file mode 100644
index 000000000000..dcfd2b397909
--- /dev/null
+++ b/lib/geom/shsec/gshsec.8
@@ -0,0 +1,130 @@
+.\" Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 1, 2013
+.Dt GSHSEC 8
+.Os
+.Sh NAME
+.Nm gshsec
+.Nd "control utility for shared secret devices"
+.Sh SYNOPSIS
+.Nm
+.Cm label
+.Op Fl hv
+.Ar name
+.Ar prov prov ...
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm dump
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for setting up a device which contains a shared secret.
+The secret is shared between the given providers.
+To collect the secret, all providers are needed.
+If one of the components is missing, there is no way to get any useful data from
+the rest of them.
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm destroy"
+.It Cm label
+Set up a shared secret device from the given components with the specified
+.Ar name .
+Metadata are stored in the last sector of every component.
+.It Cm stop
+Turn off an existing shared secret device by its
+.Ar name .
+This command does not touch on-disk metadata!
+.It Cm clear
+Clear metadata on the given providers.
+.It Cm dump
+Dump metadata stored on the given providers.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Force the removal of the specified shared secret device.
+.It Fl h
+Hardcode providers' names in metadata.
+.It Fl v
+Be more verbose.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+The following example shows how to create a shared secret device.
+The secret will be split between a slice on a local disk and a USB Pen drive.
+.Bd -literal -offset indent
+gshsec label -v secret /dev/ada0s1 /dev/da0
+newfs /dev/shsec/secret
+.Ed
+.Pp
+From now on, when the USB Pen drive is inserted, it will be automatically
+detected and connected, making the secret available via the
+.Pa /dev/shsec/secret
+device.
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr gbde 8 ,
+.Xr geom 8 ,
+.Xr newfs 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 5.4 .
+.Sh AUTHORS
+.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org
diff --git a/lib/geom/stripe/Makefile b/lib/geom/stripe/Makefile
new file mode 100644
index 000000000000..bbea1901749e
--- /dev/null
+++ b/lib/geom/stripe/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS= stripe
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/stripe/Makefile.depend b/lib/geom/stripe/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/stripe/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/stripe/geom_stripe.c b/lib/geom/stripe/geom_stripe.c
new file mode 100644
index 000000000000..175ddada2e33
--- /dev/null
+++ b/lib/geom/stripe/geom_stripe.c
@@ -0,0 +1,288 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <libgeom.h>
+#include <geom/stripe/g_stripe.h>
+
+#include "core/geom.h"
+#include "misc/subr.h"
+
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_STRIPE_VERSION;
+
+#define GSTRIPE_STRIPESIZE "65536"
+
+static void stripe_main(struct gctl_req *req, unsigned flags);
+static void stripe_clear(struct gctl_req *req);
+static void stripe_dump(struct gctl_req *req);
+static void stripe_label(struct gctl_req *req);
+
+struct g_command class_commands[] = {
+ { "clear", G_FLAG_VERBOSE, stripe_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL,
+ {
+ { 's', "stripesize", GSTRIPE_STRIPESIZE, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-v] [-s stripesize] name prov prov ..."
+ },
+ { "destroy", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ { "dump", 0, stripe_main, G_NULL_OPTS,
+ "prov ..."
+ },
+ { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, stripe_main,
+ {
+ { 'h', "hardcode", NULL, G_TYPE_BOOL },
+ { 's', "stripesize", GSTRIPE_STRIPESIZE, G_TYPE_NUMBER },
+ G_OPT_SENTINEL
+ },
+ "[-hv] [-s stripesize] name prov prov ..."
+ },
+ { "stop", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL },
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+static void
+stripe_main(struct gctl_req *req, unsigned flags)
+{
+ const char *name;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0)
+ stripe_label(req);
+ else if (strcmp(name, "clear") == 0)
+ stripe_clear(req);
+ else if (strcmp(name, "dump") == 0)
+ stripe_dump(req);
+ else
+ gctl_error(req, "Unknown command: %s.", name);
+}
+
+static void
+stripe_label(struct gctl_req *req)
+{
+ struct g_stripe_metadata md;
+ intmax_t stripesize;
+ off_t compsize, msize;
+ u_char sector[512];
+ unsigned ssize, secsize;
+ const char *name;
+ int error, i, nargs, hardcode;
+
+ bzero(sector, sizeof(sector));
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 3) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ hardcode = gctl_get_int(req, "hardcode");
+
+ /*
+ * Clear last sector first to spoil all components if device exists.
+ */
+ compsize = 0;
+ secsize = 0;
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ msize = g_get_mediasize(name);
+ ssize = g_get_sectorsize(name);
+ if (msize == 0 || ssize == 0) {
+ gctl_error(req, "Can't get informations about %s: %s.",
+ name, strerror(errno));
+ return;
+ }
+ msize -= ssize;
+ if (compsize == 0 || (compsize > 0 && msize < compsize))
+ compsize = msize;
+ if (secsize == 0)
+ secsize = ssize;
+ else
+ secsize = g_lcm(secsize, ssize);
+
+ error = g_metadata_clear(name, NULL);
+ if (error != 0) {
+ gctl_error(req, "Can't store metadata on %s: %s.", name,
+ strerror(error));
+ return;
+ }
+ }
+
+ strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_STRIPE_VERSION;
+ name = gctl_get_ascii(req, "arg0");
+ strlcpy(md.md_name, name, sizeof(md.md_name));
+ md.md_id = arc4random();
+ md.md_all = nargs - 1;
+ stripesize = gctl_get_intmax(req, "stripesize");
+ if ((stripesize % secsize) != 0) {
+ gctl_error(req, "Stripesize should be multiple of %u.",
+ secsize);
+ return;
+ }
+ md.md_stripesize = stripesize;
+
+ /*
+ * Ok, store metadata.
+ */
+ for (i = 1; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ msize = g_get_mediasize(name);
+ ssize = g_get_sectorsize(name);
+ if (compsize < msize - ssize) {
+ fprintf(stderr,
+ "warning: %s: only %jd bytes from %jd bytes used.\n",
+ name, (intmax_t)compsize, (intmax_t)(msize - ssize));
+ }
+
+ md.md_no = i - 1;
+ md.md_provsize = msize;
+ if (!hardcode)
+ bzero(md.md_provider, sizeof(md.md_provider));
+ else {
+ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ name += sizeof(_PATH_DEV) - 1;
+ strlcpy(md.md_provider, name, sizeof(md.md_provider));
+ }
+ stripe_metadata_encode(&md, sector);
+ error = g_metadata_store(name, sector, sizeof(sector));
+ if (error != 0) {
+ fprintf(stderr, "Can't store metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata value stored on %s.\n", name);
+ }
+}
+
+static void
+stripe_clear(struct gctl_req *req)
+{
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_clear(name, G_STRIPE_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+static void
+stripe_metadata_dump(const struct g_stripe_metadata *md)
+{
+
+ printf(" Magic string: %s\n", md->md_magic);
+ printf(" Metadata version: %u\n", (u_int)md->md_version);
+ printf(" Device name: %s\n", md->md_name);
+ printf(" Device ID: %u\n", (u_int)md->md_id);
+ printf(" Disk number: %u\n", (u_int)md->md_no);
+ printf("Total number of disks: %u\n", (u_int)md->md_all);
+ printf(" Stripe size: %u\n", (u_int)md->md_stripesize);
+ printf(" Hardcoded provider: %s\n", md->md_provider);
+}
+
+static void
+stripe_dump(struct gctl_req *req)
+{
+ struct g_stripe_metadata md, tmpmd;
+ const char *name;
+ int error, i, nargs;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+
+ for (i = 0; i < nargs; i++) {
+ name = gctl_get_ascii(req, "arg%d", i);
+ error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd),
+ G_STRIPE_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't read metadata from %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req, "Not fully done.");
+ continue;
+ }
+ stripe_metadata_decode((u_char *)&tmpmd, &md);
+ printf("Metadata on %s:\n", name);
+ stripe_metadata_dump(&md);
+ printf("\n");
+ }
+}
diff --git a/lib/geom/stripe/gstripe.8 b/lib/geom/stripe/gstripe.8
new file mode 100644
index 000000000000..f1f34fe10609
--- /dev/null
+++ b/lib/geom/stripe/gstripe.8
@@ -0,0 +1,243 @@
+.\" Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd May 21, 2004
+.Dt GSTRIPE 8
+.Os
+.Sh NAME
+.Nm gstripe
+.Nd "control utility for striped devices"
+.Sh SYNOPSIS
+.Nm
+.Cm create
+.Op Fl v
+.Op Fl s Ar stripesize
+.Ar name
+.Ar prov prov ...
+.Nm
+.Cm destroy
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm label
+.Op Fl hv
+.Op Fl s Ar stripesize
+.Ar name
+.Ar prov prov ...
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm dump
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for setting up a stripe on two or more disks.
+The striped device can be configured using two different methods:
+.Dq manual
+or
+.Dq automatic .
+When using the
+.Dq manual
+method, no metadata are stored on the devices, so the striped
+device has to be configured by hand every time it is needed.
+The
+.Dq automatic
+method uses on-disk metadata to detect devices.
+Once devices are labeled, they will be automatically detected and
+configured.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm destroy"
+.It Cm create
+Set up a striped device from the given devices with specified
+.Ar name .
+This is the
+.Dq manual
+method and the stripe will not exist after a reboot (see
+.Sx DESCRIPTION
+above).
+The kernel module
+.Pa geom_stripe.ko
+will be loaded if it is not loaded already.
+.It Cm label
+Set up a striped device from the given devices with the specified
+.Ar name .
+This is the
+.Dq automatic
+method, where metadata are stored in every device's last sector.
+The kernel module
+.Pa geom_stripe.ko
+will be loaded if it is not loaded already.
+.It Cm stop
+Turn off an existing striped device by its
+.Ar name .
+This command does not touch on-disk metadata!
+.It Cm destroy
+Same as
+.Cm stop .
+.It Cm clear
+Clear metadata on the given devices.
+.It Cm dump
+Dump metadata stored on the given devices.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options:
+.Bl -tag -width ".Fl s Ar stripesize"
+.It Fl f
+Force the removal of the specified striped device.
+.It Fl h
+Hardcode providers' names in metadata.
+.It Fl s Ar stripesize
+Specifies size of stripe block in bytes.
+The
+.Ar stripesize
+must be a multiple of the largest sector size of all the providers.
+.It Fl v
+Be more verbose.
+.El
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to control the behavior of the
+.Nm STRIPE
+GEOM class.
+The default value is shown next to each variable.
+.Bl -tag -width indent
+.It Va kern.geom.stripe.debug : No 0
+Debug level of the
+.Nm STRIPE
+GEOM class.
+This can be set to a number between 0 and 3 inclusive.
+If set to 0 minimal debug information is printed, and if set to 3 the
+maximum amount of debug information is printed.
+.It Va kern.geom.stripe.fast : No 0
+If set to a non-zero value enable
+.Dq "fast mode"
+instead of the normal
+.Dq "economic mode" .
+Compared to
+.Dq "economic mode" ,
+.Dq "fast mode"
+uses more memory, but it is much faster for smaller stripe sizes.
+If enough memory cannot be allocated,
+.Nm STRIPE
+will fall back to
+.Dq "economic mode" .
+.It Va kern.geom.stripe.maxmem : No 13107200
+Maximum amount of memory that can be consumed by
+.Dq "fast mode"
+(in bytes).
+This
+.Xr sysctl 8
+variable is read-only and can only be set as a tunable in
+.Xr loader.conf 5 .
+.It Va kern.geom.stripe.fast_failed
+A count of how many times
+.Dq "fast mode"
+has failed due to an insufficient amount of memory.
+If this value is large, you should consider increasing the
+.Va kern.geom.stripe.maxmem
+value.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+The following example shows how to set up a striped device from four disks with a
+128KB stripe size for automatic configuration,
+create a file system on it,
+and mount it:
+.Bd -literal -offset indent
+gstripe label -v -s 131072 data /dev/da0 /dev/da1 /dev/da2 /dev/da3
+newfs /dev/stripe/data
+mount /dev/stripe/data /mnt
+[...]
+umount /mnt
+gstripe stop data
+gstripe unload
+.Ed
+.Sh COMPATIBILITY
+The
+.Nm
+interleave is in number of bytes,
+unlike
+.Xr ccdconfig 8
+which use the number of sectors.
+A
+.Xr ccdconfig 8
+.Ar ileave
+of
+.Ql 128
+is 64 KB (128 512B sectors).
+The same stripe interleave would be specified as
+.Ql 65536
+for
+.Nm .
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr loader.conf 5 ,
+.Xr ccdconfig 8 ,
+.Xr geom 8 ,
+.Xr gvinum 8 ,
+.Xr mount 8 ,
+.Xr newfs 8 ,
+.Xr sysctl 8 ,
+.Xr umount 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 5.3 .
+.Sh AUTHORS
+.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org
diff --git a/lib/geom/virstor/Makefile b/lib/geom/virstor/Makefile
new file mode 100644
index 000000000000..dfbe0aeddc47
--- /dev/null
+++ b/lib/geom/virstor/Makefile
@@ -0,0 +1,11 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc ${SRCTOP}/sys/geom/virstor
+
+GEOM_CLASS= virstor
+
+SRCS+= binstream.c
+SRCS+= g_virstor_md.c
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/virstor/Makefile.depend b/lib/geom/virstor/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/virstor/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libgeom \
+ sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/virstor/geom_virstor.c b/lib/geom/virstor/geom_virstor.c
new file mode 100644
index 000000000000..25ebdebbe7eb
--- /dev/null
+++ b/lib/geom/virstor/geom_virstor.c
@@ -0,0 +1,530 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2005 Ivan Voras <ivoras@freebsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgeom.h>
+#include <err.h>
+#include <assert.h>
+
+#include <core/geom.h>
+#include <misc/subr.h>
+
+#include <geom/virstor/g_virstor_md.h>
+#include <geom/virstor/g_virstor.h>
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_VIRSTOR_VERSION;
+
+#define GVIRSTOR_CHUNK_SIZE "4M"
+#define GVIRSTOR_VIR_SIZE "2T"
+
+#if G_LIB_VERSION == 1
+/* Support RELENG_6 */
+#define G_TYPE_BOOL G_TYPE_NONE
+#endif
+
+/*
+ * virstor_main gets called by the geom(8) utility
+ */
+static void virstor_main(struct gctl_req *req, unsigned flags);
+
+struct g_command class_commands[] = {
+ { "clear", G_FLAG_VERBOSE, virstor_main, G_NULL_OPTS,
+ "[-v] prov ..."
+ },
+ { "dump", 0, virstor_main, G_NULL_OPTS,
+ "prov ..."
+ },
+ { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, virstor_main,
+ {
+ { 'h', "hardcode", NULL, G_TYPE_BOOL},
+ { 'm', "chunk_size", GVIRSTOR_CHUNK_SIZE, G_TYPE_NUMBER},
+ { 's', "vir_size", GVIRSTOR_VIR_SIZE, G_TYPE_NUMBER},
+ G_OPT_SENTINEL
+ },
+ "[-h] [-v] [-m chunk_size] [-s vir_size] name provider0 [provider1 ...]"
+ },
+ { "destroy", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL},
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ..."
+ },
+ { "stop", G_FLAG_VERBOSE, NULL,
+ {
+ { 'f', "force", NULL, G_TYPE_BOOL},
+ G_OPT_SENTINEL
+ },
+ "[-fv] name ... (alias for \"destroy\")"
+ },
+ { "add", G_FLAG_VERBOSE, NULL,
+ {
+ { 'h', "hardcode", NULL, G_TYPE_BOOL},
+ G_OPT_SENTINEL
+ },
+ "[-vh] name prov [prov ...]"
+ },
+ { "remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+ "[-v] name ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+/* Helper functions' declarations */
+static void virstor_clear(struct gctl_req *req);
+static void virstor_dump(struct gctl_req *req);
+static void virstor_label(struct gctl_req *req);
+
+/* Dispatcher function (no real work done here, only verbose flag recorder) */
+static void
+virstor_main(struct gctl_req *req, unsigned flags)
+{
+ const char *name;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0)
+ virstor_label(req);
+ else if (strcmp(name, "clear") == 0)
+ virstor_clear(req);
+ else if (strcmp(name, "dump") == 0)
+ virstor_dump(req);
+ else
+ gctl_error(req, "%s: Unknown command: %s.", __func__, name);
+
+ /* No CTASSERT in userland
+ CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS);
+ */
+}
+
+/*
+ * Labels a new geom Meaning: parses and checks the parameters, calculates &
+ * writes metadata to the relevant providers so when the next round of
+ * "tasting" comes (which will be just after the provider(s) are closed) geom
+ * can be instantiated with the tasted metadata.
+ */
+static void
+virstor_label(struct gctl_req *req)
+{
+ struct g_virstor_metadata md;
+ off_t msize;
+ unsigned char *sect;
+ unsigned int i;
+ size_t ssize, secsize;
+ const char *name;
+ char param[32];
+ int hardcode, nargs, error;
+ struct virstor_map_entry *map;
+ size_t total_chunks; /* We'll run out of memory if
+ this needs to be bigger. */
+ unsigned int map_chunks; /* Chunks needed by the map (map size). */
+ size_t map_size; /* In bytes. */
+ ssize_t written;
+ int fd;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 2) {
+ gctl_error(req, "Too few arguments (%d): expecting: name "
+ "provider0 [provider1 ...]", nargs);
+ return;
+ }
+
+ hardcode = gctl_get_int(req, "hardcode");
+
+ /*
+ * Initialize constant parts of metadata: magic signature, version,
+ * name.
+ */
+ bzero(&md, sizeof(md));
+ strlcpy(md.md_magic, G_VIRSTOR_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_VIRSTOR_VERSION;
+ name = gctl_get_ascii(req, "arg0");
+ if (name == NULL) {
+ gctl_error(req, "No 'arg%u' argument.", 0);
+ return;
+ }
+ strlcpy(md.md_name, name, sizeof(md.md_name));
+
+ md.md_virsize = (off_t)gctl_get_intmax(req, "vir_size");
+ md.md_chunk_size = gctl_get_intmax(req, "chunk_size");
+ md.md_count = nargs - 1;
+
+ if (md.md_virsize == 0 || md.md_chunk_size == 0) {
+ gctl_error(req, "Virtual size and chunk size must be non-zero");
+ return;
+ }
+
+ if (md.md_chunk_size % MAXPHYS != 0) {
+ /* XXX: This is not strictly needed, but it's convenient to
+ * impose some limitations on it, so why not MAXPHYS. */
+ size_t new_size = rounddown(md.md_chunk_size, MAXPHYS);
+ if (new_size < md.md_chunk_size)
+ new_size += MAXPHYS;
+ fprintf(stderr, "Resizing chunk size to be a multiple of "
+ "MAXPHYS (%d kB).\n", MAXPHYS / 1024);
+ fprintf(stderr, "New chunk size: %zu kB\n", new_size / 1024);
+ md.md_chunk_size = new_size;
+ }
+
+ if (md.md_virsize % md.md_chunk_size != 0) {
+ off_t chunk_count = md.md_virsize / md.md_chunk_size;
+ md.md_virsize = chunk_count * md.md_chunk_size;
+ fprintf(stderr, "Resizing virtual size to be a multiple of "
+ "chunk size.\n");
+ fprintf(stderr, "New virtual size: %zu MB\n",
+ (size_t)(md.md_virsize/(1024 * 1024)));
+ }
+
+ msize = secsize = 0;
+ for (i = 1; i < (unsigned)nargs; i++) {
+ snprintf(param, sizeof(param), "arg%u", i);
+ name = gctl_get_ascii(req, "%s", param);
+ ssize = g_get_sectorsize(name);
+ if (ssize == 0)
+ fprintf(stderr, "%s for %s\n", strerror(errno), name);
+ msize += g_get_mediasize(name);
+ if (secsize == 0)
+ secsize = ssize;
+ else if (secsize != ssize) {
+ gctl_error(req, "Devices need to have same sector size "
+ "(%u on %s needs to be %u).",
+ (u_int)ssize, name, (u_int)secsize);
+ return;
+ }
+ }
+
+ if (secsize == 0) {
+ gctl_error(req, "Device not specified");
+ return;
+ }
+
+ if (md.md_chunk_size % secsize != 0) {
+ fprintf(stderr, "Error: chunk size is not a multiple of sector "
+ "size.");
+ gctl_error(req, "Chunk size (in bytes) must be multiple of %u.",
+ (unsigned int)secsize);
+ return;
+ }
+
+ total_chunks = md.md_virsize / md.md_chunk_size;
+ map_size = total_chunks * sizeof(*map);
+ assert(md.md_virsize % md.md_chunk_size == 0);
+
+ ssize = map_size % secsize;
+ if (ssize != 0) {
+ size_t add_chunks = (secsize - ssize) / sizeof(*map);
+ total_chunks += add_chunks;
+ md.md_virsize = (off_t)total_chunks * (off_t)md.md_chunk_size;
+ map_size = total_chunks * sizeof(*map);
+ fprintf(stderr, "Resizing virtual size to fit virstor "
+ "structures.\n");
+ fprintf(stderr, "New virtual size: %ju MB (%zu new chunks)\n",
+ (uintmax_t)(md.md_virsize / (1024 * 1024)), add_chunks);
+ }
+
+ if (verbose)
+ printf("Total virtual chunks: %zu (%zu MB each), %ju MB total "
+ "virtual size.\n",
+ total_chunks, (size_t)(md.md_chunk_size / (1024 * 1024)),
+ md.md_virsize/(1024 * 1024));
+
+ if ((off_t)md.md_virsize < msize)
+ fprintf(stderr, "WARNING: Virtual storage size < Physical "
+ "available storage (%ju < %ju)\n", md.md_virsize, msize);
+
+ /* Clear last sector first to spoil all components if device exists. */
+ if (verbose)
+ printf("Clearing metadata on");
+
+ for (i = 1; i < (unsigned)nargs; i++) {
+ snprintf(param, sizeof(param), "arg%u", i);
+ name = gctl_get_ascii(req, "%s", param);
+
+ if (verbose)
+ printf(" %s", name);
+
+ msize = g_get_mediasize(name);
+ ssize = g_get_sectorsize(name);
+ if (msize == 0 || ssize == 0) {
+ gctl_error(req, "Can't retrieve information about "
+ "%s: %s.", name, strerror(errno));
+ return;
+ }
+ if (msize < (off_t) MAX(md.md_chunk_size*4, map_size))
+ gctl_error(req, "Device %s is too small", name);
+ error = g_metadata_clear(name, NULL);
+ if (error != 0) {
+ gctl_error(req, "Can't clear metadata on %s: %s.", name,
+ strerror(error));
+ return;
+ }
+ }
+
+
+ /* Write allocation table to the first provider - this needs to be done
+ * before metadata is written because when kernel tastes it it's too
+ * late */
+ name = gctl_get_ascii(req, "arg1"); /* device with metadata */
+ if (verbose)
+ printf(".\nWriting allocation table to %s...", name);
+
+ /* How many chunks does the map occupy? */
+ map_chunks = map_size/md.md_chunk_size;
+ if (map_size % md.md_chunk_size != 0)
+ map_chunks++;
+ if (verbose) {
+ printf(" (%zu MB, %d chunks) ", map_size/(1024*1024), map_chunks);
+ fflush(stdout);
+ }
+
+ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ fd = open(name, O_RDWR);
+ else {
+ sprintf(param, "%s%s", _PATH_DEV, name);
+ fd = open(param, O_RDWR);
+ }
+ if (fd < 0)
+ gctl_error(req, "Cannot open provider %s to write map", name);
+
+ /* Do it with calloc because there might be a need to set up chunk flags
+ * in the future */
+ map = calloc(total_chunks, sizeof(*map));
+ if (map == NULL) {
+ gctl_error(req,
+ "Out of memory (need %zu bytes for allocation map)",
+ map_size);
+ }
+
+ written = pwrite(fd, map, map_size, 0);
+ free(map);
+ if ((size_t)written != map_size) {
+ if (verbose) {
+ fprintf(stderr, "\nTried to write %zu, written %zd (%s)\n",
+ map_size, written, strerror(errno));
+ }
+ gctl_error(req, "Error writing out allocation map!");
+ return;
+ }
+ close (fd);
+
+ if (verbose)
+ printf("\nStoring metadata on ");
+
+ /*
+ * ID is randomly generated, unique for a geom. This is used to
+ * recognize all providers belonging to one geom.
+ */
+ md.md_id = arc4random();
+
+ /* Ok, store metadata. */
+ for (i = 1; i < (unsigned)nargs; i++) {
+ snprintf(param, sizeof(param), "arg%u", i);
+ name = gctl_get_ascii(req, "%s", param);
+
+ msize = g_get_mediasize(name);
+ ssize = g_get_sectorsize(name);
+
+ if (verbose)
+ printf("%s ", name);
+
+ /* this provider's position/type in geom */
+ md.no = i - 1;
+ /* this provider's size */
+ md.provsize = msize;
+ /* chunk allocation info */
+ md.chunk_count = md.provsize / md.md_chunk_size;
+ if (verbose)
+ printf("(%u chunks) ", md.chunk_count);
+ /* Check to make sure last sector is unused */
+ if ((off_t)(md.chunk_count * md.md_chunk_size) > (off_t)(msize-ssize))
+ md.chunk_count--;
+ md.chunk_next = 0;
+ if (i != 1) {
+ md.chunk_reserved = 0;
+ md.flags = 0;
+ } else {
+ md.chunk_reserved = map_chunks * 2;
+ md.flags = VIRSTOR_PROVIDER_ALLOCATED |
+ VIRSTOR_PROVIDER_CURRENT;
+ md.chunk_next = md.chunk_reserved;
+ if (verbose)
+ printf("(%u reserved) ", md.chunk_reserved);
+ }
+
+ if (!hardcode)
+ bzero(md.provider, sizeof(md.provider));
+ else {
+ /* convert "/dev/something" to "something" */
+ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) {
+ strlcpy(md.provider, name + sizeof(_PATH_DEV) - 1,
+ sizeof(md.provider));
+ } else
+ strlcpy(md.provider, name, sizeof(md.provider));
+ }
+ sect = malloc(ssize);
+ if (sect == NULL)
+ err(1, "Cannot allocate sector of %zu bytes", ssize);
+ bzero(sect, ssize);
+ virstor_metadata_encode(&md, sect);
+ error = g_metadata_store(name, sect, ssize);
+ free(sect);
+ if (error != 0) {
+ if (verbose)
+ printf("\n");
+ fprintf(stderr, "Can't store metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req,
+ "Not fully done (error storing metadata).");
+ return;
+ }
+ }
+#if 0
+ if (verbose)
+ printf("\n");
+#endif
+}
+
+/* Clears metadata on given provider(s) IF it's owned by us */
+static void
+virstor_clear(struct gctl_req *req)
+{
+ const char *name;
+ char param[32];
+ unsigned i;
+ int nargs, error;
+ int fd;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ for (i = 0; i < (unsigned)nargs; i++) {
+ snprintf(param, sizeof(param), "arg%u", i);
+ name = gctl_get_ascii(req, "%s", param);
+
+ error = g_metadata_clear(name, G_VIRSTOR_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s "
+ "(do I own it?)\n", name, strerror(error));
+ gctl_error(req,
+ "Not fully done (can't clear metadata).");
+ continue;
+ }
+ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ fd = open(name, O_RDWR);
+ else {
+ sprintf(param, "%s%s", _PATH_DEV, name);
+ fd = open(param, O_RDWR);
+ }
+ if (fd < 0) {
+ gctl_error(req, "Cannot clear header sector for %s",
+ name);
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+/* Print some metadata information */
+static void
+virstor_metadata_dump(const struct g_virstor_metadata *md)
+{
+ printf(" Magic string: %s\n", md->md_magic);
+ printf(" Metadata version: %u\n", (u_int) md->md_version);
+ printf(" Device name: %s\n", md->md_name);
+ printf(" Device ID: %u\n", (u_int) md->md_id);
+ printf(" Provider index: %u\n", (u_int) md->no);
+ printf(" Active providers: %u\n", (u_int) md->md_count);
+ printf(" Hardcoded provider: %s\n",
+ md->provider[0] != '\0' ? md->provider : "(not hardcoded)");
+ printf(" Virtual size: %u MB\n",
+ (unsigned int)(md->md_virsize/(1024 * 1024)));
+ printf(" Chunk size: %u kB\n", md->md_chunk_size / 1024);
+ printf(" Chunks on provider: %u\n", md->chunk_count);
+ printf(" Chunks free: %u\n", md->chunk_count - md->chunk_next);
+ printf(" Reserved chunks: %u\n", md->chunk_reserved);
+}
+
+/* Called by geom(8) via gvirstor_main() to dump metadata information */
+static void
+virstor_dump(struct gctl_req *req)
+{
+ struct g_virstor_metadata md;
+ u_char tmpmd[512]; /* temporary buffer */
+ const char *name;
+ char param[16];
+ int nargs, error, i;
+
+ assert(sizeof(tmpmd) >= sizeof(md));
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ for (i = 0; i < nargs; i++) {
+ snprintf(param, sizeof(param), "arg%u", i);
+ name = gctl_get_ascii(req, "%s", param);
+
+ error = g_metadata_read(name, (u_char *) & tmpmd, sizeof(tmpmd),
+ G_VIRSTOR_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't read metadata from %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req,
+ "Not fully done (error reading metadata).");
+ continue;
+ }
+ virstor_metadata_decode((u_char *) & tmpmd, &md);
+ printf("Metadata on %s:\n", name);
+ virstor_metadata_dump(&md);
+ printf("\n");
+ }
+}
diff --git a/lib/geom/virstor/gvirstor.8 b/lib/geom/virstor/gvirstor.8
new file mode 100644
index 000000000000..3d93e5baa2ae
--- /dev/null
+++ b/lib/geom/virstor/gvirstor.8
@@ -0,0 +1,299 @@
+.\" Copyright (c) 2006-2011 Ivan Voras <ivoras@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 1, 2013
+.Dt GVIRSTOR 8
+.Os
+.Sh NAME
+.Nm gvirstor
+.Nd "control utility for virtual data storage devices"
+.Sh SYNOPSIS
+.Nm
+.Cm label
+.Op Fl hv
+.Op Fl s Ar virsize
+.Op Fl m Ar chunksize
+.Ar name
+.Ar prov ...
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm destroy
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm add
+.Op Fl vh
+.Ar name prov ...
+.Nm
+.Cm remove
+.Op Fl v
+.Ar name prov ...
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm dump
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for setting up a virtual storage device of arbitrary
+large size
+.Pq for example, several TB ,
+consisting of an arbitrary number of physical storage devices with the
+total size which is equal to or smaller than the virtual size.
+Data for the virtual devices will be allocated from physical devices on
+demand.
+The idea behind
+.Nm
+is similar to the concept of Virtual Memory in operating systems,
+effectively allowing users to overcommit on storage
+.Pq free file system space .
+The concept is also known as "thin provisioning" in virtualization
+environments, only here it is implemented on the level of physical storage
+devices.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm remove"
+.It Cm label
+Set up a virtual device from the given components with the specified
+.Ar name .
+Metadata is stored in the last sector of every component.
+Argument
+.Fl s Ar virsize
+is the size of new virtual device, with default being set to 2 TiB
+.Pq 2097152 MiB .
+Argument
+.Fl m Ar chunksize
+is the chunk size, with default being set to 4 MiB
+.Pq 4096 KiB .
+The default arguments are thus
+.Qq Fl s Ar 2097152 Fl m Ar 4096 .
+.It Cm stop
+Turn off an existing virtual device with the given
+.Ar name .
+This command does not touch on-disk metadata.
+As with other GEOM classes, stopped geoms cannot be started manually.
+.It Cm destroy
+Same as
+.Cm stop.
+.It Cm add
+Adds new components to existing virtual device with the given
+.Ar name .
+The specified virstor device must exist and be active
+.Pq i.e. module loaded, device present in Pa /dev .
+This action can be safely performed while the virstor device is in use
+.Pq Qo hot Qc operation .
+.It Cm remove
+Removes components from existing virtual device with the given
+.Ar name .
+Only unallocated providers can be removed.
+.It Cm clear
+Clear metadata on the given providers.
+.It Cm dump
+Dump metadata stored on the given providers.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Force the removal of the specified virtual device.
+.It Fl h
+Hardcode providers' names in metadata.
+.It Fl v
+Be more verbose.
+.El
+.Sh EXAMPLES
+The following example shows how to create a virtual device of default size
+.Pq 2 TiB ,
+of default chunk
+.Pq extent
+size
+.Pq 4 MiB ,
+with two physical devices for backing storage.
+.Bd -literal -offset indent
+.No gvirstor label -v Ar mydata Ar /dev/ada4 Ar /dev/ada6
+.No newfs Ar /dev/virstor/mydata
+.Ed
+.Pp
+From now on, the virtual device will be available via the
+.Pa /dev/virstor/mydata
+device entry.
+To add a new physical device / component to an active virstor device:
+.Bd -literal -offset indent
+.No gvirstor add Ar mydata Ar ada8
+.Ed
+.Pp
+This will add physical storage of
+.Ar ada8
+to
+.Pa /dev/virstor/mydata
+device.
+.Pp
+To see the device status information
+.Pq including how much physical storage is still available for the virtual device ,
+use:
+.Bd -literal -offset indent
+gvirstor list
+.Ed
+.Pp
+All standard
+.Xr geom 8
+subcommands
+.Pq e.g. Cm status , Cm help
+are also supported.
+.Sh SYSCTL VARIABLES
+.Nm
+has several
+.Xr sysctl 8
+tunable variables.
+.Bd -literal -offset indent
+.Va int kern.geom.virstor.debug
+.Ed
+.Pp
+This sysctl controls verbosity of the kernel module, in the range
+1 to 15.
+Messages that are marked with higher verbosity levels than this are
+suppressed.
+Default value is 5 and it is not recommended to set this tunable to less
+than 2, because level 1 messages are error events, and level 2 messages
+are system warnings.
+.Bd -literal -offset indent
+.Va int kern.geom.virstor.chunk_watermark
+.Ed
+.Pp
+Value in this sysctl sets warning watermark level for physical chunk
+usage on a single component.
+The warning is issued when a virstor component has less than this many
+free chunks
+.Pq default 100 .
+.Bd -literal -offset indent
+.Va int kern.geom.virstor.component_watermark
+.Ed
+.Pp
+Value in this sysctl sets warning watermark level for component usage.
+The warning is issued when there are less than this many unallocated
+components
+.Pq default is 1 .
+.Pp
+All these sysctls are also available as
+.Xr loader 8
+tunables.
+.Sh DIAGNOSTICS
+.Ex -std
+.Pp
+.Nm
+kernel module issues log messages with prefixes in standardized format,
+which is useful for log message filtering and dispatching.
+Each message line begins with
+.Bd -literal -offset indent
+.Li GEOM_VIRSTOR[%d]:
+.Ed
+.Pp
+The number
+.Pq %d
+is message verbosity / importance level, in the range 1 to 15.
+If a message filtering, dispatching or operator alert system is used, it
+is recommended that messages with levels 1 and 2 be taken seriously
+.Pq for example, to catch out-of-space conditions as set by watermark
+sysctls.
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr fstab 5 ,
+.Xr geom 8 ,
+.Xr glabel 8 ,
+.Xr newfs 8
+.Sh HISTORY
+The
+.Nm
+utility first appeared in
+.Fx 7.0 .
+.Sh AUTHORS
+.An Ivan Voras Aq Mt ivoras@FreeBSD.org
+.Pp
+Sponsored by Google Summer of Code 2006.
+.Sh BUGS
+Commands
+.Cm add
+and
+.Cm remove
+contain unavoidable critical sections which may make the virstor
+device unusable if a power failure
+.Pq or other disruptive event
+happens during their execution.
+It is recommended to run them when the system is quiescent.
+.Sh ASSUMPTIONS AND INTERACTION WITH FILE SYSTEMS
+There are several assumptions that
+.Nm
+has in its operation: that the size of the virtual storage device will not
+change once it is set, and that the sizes of individual physical storage
+components will always remain constant during their existence.
+For alternative ways to implement virtual or resizable file systems see
+.Xr zfs 1M ,
+.Xr gconcat 8
+and
+.Xr growfs 8 .
+.Pp
+Note that
+.Nm
+has nontrivial interaction with file systems which initialize a large
+number of on-disk structures during newfs.
+If such file systems attempt to spread their structures across the drive
+media
+.Pq like UFS/UFS2 does ,
+their efforts will be effectively foiled by sequential allocation of
+chunks in
+.Nm
+and all their structures will be physically allocated at the start
+of the first virstor component.
+This could have a significant impact on file system performance
+.Pq which can in some rare cases be even positive .