diff options
Diffstat (limited to 'lib/geom')
69 files changed, 14085 insertions, 0 deletions
diff --git a/lib/geom/Makefile b/lib/geom/Makefile new file mode 100644 index 000000000000..a6274d43e7e1 --- /dev/null +++ b/lib/geom/Makefile @@ -0,0 +1,4 @@ +SUBDIR=${GEOM_CLASSES} + +.include "Makefile.inc" +.include <bsd.subdir.mk> diff --git a/lib/geom/Makefile.classes b/lib/geom/Makefile.classes new file mode 100644 index 000000000000..1852e35be15f --- /dev/null +++ b/lib/geom/Makefile.classes @@ -0,0 +1,24 @@ +.if !defined(COMPAT_libcompat) +GEOM_CLASS_DIR?=/lib/geom +.else +GEOM_CLASS_DIR?=/usr/lib${COMPAT_libcompat}/geom +.endif + +GEOM_CLASSES= cache +GEOM_CLASSES+= concat +.if ${MK_OPENSSL} != "no" +GEOM_CLASSES+= eli +.endif +GEOM_CLASSES+= journal +GEOM_CLASSES+= label +GEOM_CLASSES+= mirror +GEOM_CLASSES+= mountver +GEOM_CLASSES+= multipath +GEOM_CLASSES+= nop +GEOM_CLASSES+= part +GEOM_CLASSES+= raid +GEOM_CLASSES+= raid3 +GEOM_CLASSES+= shsec +GEOM_CLASSES+= stripe +GEOM_CLASSES+= union +GEOM_CLASSES+= virstor diff --git a/lib/geom/Makefile.inc b/lib/geom/Makefile.inc new file mode 100644 index 000000000000..35163127538d --- /dev/null +++ b/lib/geom/Makefile.inc @@ -0,0 +1,14 @@ +.include <src.opts.mk> + +SHLIBDIR=${GEOM_CLASS_DIR} +SHLIB_NAME?=geom_${GEOM_CLASS}.so +MAN= g${GEOM_CLASS}.8 +SRCS+= geom_${GEOM_CLASS}.c subr.c +CFLAGS+=-I${SRCTOP}/sbin/geom + +.PATH: ${SRCTOP}/sbin/geom/misc + +NO_WMISSING_VARIABLE_DECLARATIONS= + +.include "Makefile.classes" +.include "../Makefile.inc" diff --git a/lib/geom/cache/Makefile b/lib/geom/cache/Makefile new file mode 100644 index 000000000000..e307351c1de5 --- /dev/null +++ b/lib/geom/cache/Makefile @@ -0,0 +1,5 @@ +PACKAGE=geom + +GEOM_CLASS= cache + +.include <bsd.lib.mk> diff --git a/lib/geom/cache/Makefile.depend b/lib/geom/cache/Makefile.depend new file mode 100644 index 000000000000..0dd05cace3c0 --- /dev/null +++ b/lib/geom/cache/Makefile.depend @@ -0,0 +1,16 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/cache/gcache.8 b/lib/geom/cache/gcache.8 new file mode 100644 index 000000000000..026c66b0966b --- /dev/null +++ b/lib/geom/cache/gcache.8 @@ -0,0 +1,190 @@ +.\"- +.\" Copyright (c) 2010 Edward Tomasz Napierala +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd January 3, 2010 +.Dt GCACHE 8 +.Os +.Sh NAME +.Nm gcache +.Nd "control utility for CACHE GEOM class" +.Sh SYNOPSIS +.Nm +.Cm create +.Op Fl v +.Op Fl b Ar blocksize +.Op Fl s Ar size +.Ar name +.Ar prov +.Nm +.Cm configure +.Op Fl v +.Op Fl b Ar blocksize +.Op Fl s Ar size +.Ar name +.Nm +.Cm destroy +.Op Fl fv +.Ar name +.Nm +.Cm label +.Op Fl v +.Op Fl b Ar blocksize +.Op Fl s Ar size +.Ar name +.Ar prov +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm dump +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Op Fl s Ar name +.Nm +.Cm load +.Op Fl v +.Nm +.Cm unload +.Op Fl v +.Sh DESCRIPTION +The +.Nm +utility is used to control GEOM cache, which can +speed up read performance by sending fixed size +read requests to its consumer. It has been developed to address +the problem of a horrible read performance of a 64k blocksize FS +residing on a RAID3 array with 8 data components, where a single +disk component would only get 8k read requests, thus effectively +killing disk performance under high load. +.Pp +Caching can be configured using two different methods: +.Dq manual +or +.Dq automatic . +When using the +.Dq manual +method, no metadata are stored on the devices, so the cached +device has to be configured by hand every time it is needed. +The +.Dq automatic +method uses on-disk metadata to detect devices. +Once devices are labeled, they will be automatically detected and +configured. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm destroy" +.It Cm create +Cache the given devices with specified +.Ar name . +This is the +.Dq manual +method. +The kernel module +.Pa geom_cache.ko +will be loaded if it is not loaded already. +.It Cm label +Cache the given devices with the specified +.Ar name . +This is the +.Dq automatic +method, where metadata are stored in every device's last sector. +The kernel module +.Pa geom_cache.ko +will be loaded if it is not loaded already. +.It Cm stop +Turn off existing cache device by its +.Ar name . +This command does not touch on-disk metadata! +.It Cm destroy +Same as +.Cm stop . +.It Cm clear +Clear metadata on the given devices. +.It Cm dump +Dump metadata stored on the given devices. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options: +.Bl -tag -width indent +.It Fl f +Force the removal of the specified cache device. +.It Fl v +Be more verbose. +.El +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to control the behavior of the +.Nm CACHE +GEOM class. +The default value is shown next to each variable. +.Bl -tag -width indent +.It Va kern.geom.cache.used_hi : No 20 +.It Va kern.geom.cache.used_lo : No 5 +.It Va kern.geom.cache.idletime : No 5 +.It Va kern.geom.cache.timeout : No 10 +.It Va kern.geom.cache.enable : No 1 +.It Va kern.geom.cache.debug : No 0 +Debug level of the +.Nm CACHE +GEOM class. +This can be set to a number between 0 and 3 inclusive. +If set to 0 minimal debug information is printed, and if set to 3 the +maximum amount of debug information is printed. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh SEE ALSO +.Xr geom 4 , +.Xr geom 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 7.0 . +.Sh AUTHORS +.An Ruslan Ermilov Aq Mt ru@FreeBSD.org diff --git a/lib/geom/cache/geom_cache.c b/lib/geom/cache/geom_cache.c new file mode 100644 index 000000000000..7be54004f196 --- /dev/null +++ b/lib/geom/cache/geom_cache.c @@ -0,0 +1,240 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2006 Ruslan Ermilov <ru@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <errno.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> +#include <libgeom.h> +#include <geom/cache/g_cache.h> + +#include "core/geom.h" +#include "misc/subr.h" + + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_CACHE_VERSION; + +#define GCACHE_BLOCKSIZE "65536" +#define GCACHE_SIZE "100" + +static void cache_main(struct gctl_req *req, unsigned flags); +static void cache_clear(struct gctl_req *req); +static void cache_dump(struct gctl_req *req); +static void cache_label(struct gctl_req *req); + +struct g_command class_commands[] = { + { "clear", G_FLAG_VERBOSE, cache_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { "configure", G_FLAG_VERBOSE, NULL, + { + { 'b', "blocksize", "0", G_TYPE_NUMBER }, + { 's', "size", "0", G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-v] [-b blocksize] [-s size] name" + }, + { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, + { + { 'b', "blocksize", GCACHE_BLOCKSIZE, G_TYPE_NUMBER }, + { 's', "size", GCACHE_SIZE, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-v] [-b blocksize] [-s size] name prov" + }, + { "destroy", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + { "dump", 0, cache_main, G_NULL_OPTS, + "prov ..." + }, + { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, cache_main, + { + { 'b', "blocksize", GCACHE_BLOCKSIZE, G_TYPE_NUMBER }, + { 's', "size", GCACHE_SIZE, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-v] [-b blocksize] [-s size] name prov" + }, + { "reset", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name ..." + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +static void +cache_main(struct gctl_req *req, unsigned flags) +{ + const char *name; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) + cache_label(req); + else if (strcmp(name, "clear") == 0) + cache_clear(req); + else if (strcmp(name, "dump") == 0) + cache_dump(req); + else + gctl_error(req, "Unknown command: %s.", name); +} + +static void +cache_label(struct gctl_req *req) +{ + struct g_cache_metadata md; + u_char sector[512]; + const char *name; + int error, nargs; + intmax_t val; + + bzero(sector, sizeof(sector)); + nargs = gctl_get_int(req, "nargs"); + if (nargs != 2) { + gctl_error(req, "Invalid number of arguments."); + return; + } + + strlcpy(md.md_magic, G_CACHE_MAGIC, sizeof(md.md_magic)); + md.md_version = G_CACHE_VERSION; + name = gctl_get_ascii(req, "arg0"); + strlcpy(md.md_name, name, sizeof(md.md_name)); + val = gctl_get_intmax(req, "blocksize"); + md.md_bsize = val; + val = gctl_get_intmax(req, "size"); + md.md_size = val; + + name = gctl_get_ascii(req, "arg1"); + md.md_provsize = g_get_mediasize(name); + if (md.md_provsize == 0) { + fprintf(stderr, "Can't get mediasize of %s: %s.\n", + name, strerror(errno)); + gctl_error(req, "Not fully done."); + return; + } + cache_metadata_encode(&md, sector); + error = g_metadata_store(name, sector, sizeof(sector)); + if (error != 0) { + fprintf(stderr, "Can't store metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + return; + } + if (verbose) + printf("Metadata value stored on %s.\n", name); +} + +static void +cache_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_CACHE_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +static void +cache_metadata_dump(const struct g_cache_metadata *md) +{ + + printf(" Magic string: %s\n", md->md_magic); + printf(" Metadata version: %u\n", (u_int)md->md_version); + printf(" Device name: %s\n", md->md_name); + printf(" Block size: %u\n", (u_int)md->md_bsize); + printf(" Cache size: %u\n", (u_int)md->md_size); + printf(" Provider size: %ju\n", (uintmax_t)md->md_provsize); +} + +static void +cache_dump(struct gctl_req *req) +{ + struct g_cache_metadata md, tmpmd; + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd), + G_CACHE_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't read metadata from %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + cache_metadata_decode((u_char *)&tmpmd, &md); + printf("Metadata on %s:\n", name); + cache_metadata_dump(&md); + printf("\n"); + } +} diff --git a/lib/geom/concat/Makefile b/lib/geom/concat/Makefile new file mode 100644 index 000000000000..c9f9d0e38c75 --- /dev/null +++ b/lib/geom/concat/Makefile @@ -0,0 +1,5 @@ +PACKAGE=geom + +GEOM_CLASS= concat + +.include <bsd.lib.mk> diff --git a/lib/geom/concat/Makefile.depend b/lib/geom/concat/Makefile.depend new file mode 100644 index 000000000000..0dd05cace3c0 --- /dev/null +++ b/lib/geom/concat/Makefile.depend @@ -0,0 +1,16 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/concat/gconcat.8 b/lib/geom/concat/gconcat.8 new file mode 100644 index 000000000000..165f809ffba8 --- /dev/null +++ b/lib/geom/concat/gconcat.8 @@ -0,0 +1,229 @@ +.\" Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd January 23, 2025 +.Dt GCONCAT 8 +.Os +.Sh NAME +.Nm gconcat +.Nd "disk concatenation control utility" +.Sh SYNOPSIS +.Nm +.Cm create +.Op Fl v +.Ar name +.Ar prov ... +.Nm +.Cm destroy +.Op Fl fv +.Ar name ... +.Nm +.Cm label +.Op Fl hv +.Ar name +.Ar prov ... +.Nm +.Cm append +.Op Fl hv +.Ar name +.Ar prov +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm dump +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for device concatenation configuration. +The concatenation can be configured using two different methods: +.Dq manual +or +.Dq automatic . +When using the +.Dq manual +method, no metadata are stored on the devices, so the concatenated +device has to be configured by hand every time it is needed. +The +.Dq automatic +method uses on-disk metadata to detect devices. +Once devices are labeled, they will be automatically detected and +configured. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm destroy" +.It Cm create +Concatenate the given devices with specified +.Ar name . +This is the +.Dq manual +method. +The kernel module +.Pa geom_concat.ko +will be loaded if it is not loaded already. +.It Cm label +Concatenate the given devices with the specified +.Ar name . +This is the +.Dq automatic +method, where metadata are stored in every device's last sector. +The kernel module +.Pa geom_concat.ko +will be loaded if it is not loaded already. +.Pp +Additional options include: +.Bl -tag -width ".Fl h" +.It Fl h +Hardcode providers' names in metadata. +.El +.It Cm append +Append a new device to the end of an existing concatenate device +with the specified +.Ar name . +.Pp +If the existing device is using the +.Dq manual +method, the new device is simply appended as-is. +.Pp +If the existing device is using the +.Dq automatic +method, the device is appended persistently. +New +.Cm gconcat +metadata is written to all existing components, as well as to the +newly added one. +.Pp +Additional options include: +.Bl -tag -width ".Fl h" +.It Fl h +Hardcode providers' names in metadata. +.El +.It Cm stop +Turn off existing concatenate device by its +.Ar name . +This command does not touch on-disk metadata! +.Pp +Additional options include: +.Bl -tag -width ".Fl f" +.It Fl f +Stop the given device even if it is opened. +.El +.It Cm destroy +Same as +.Cm stop . +.It Cm clear +Clear metadata on the given devices. +.It Cm dump +Dump metadata stored on the given devices. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options: +.Bl -tag -width indent +.It Fl v +Be more verbose. +.El +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to control the behavior of the +.Nm CONCAT +GEOM class. +The default value is shown next to each variable. +.Bl -tag -width indent +.It Va kern.geom.concat.debug : No 0 +Debug level of the +.Nm CONCAT +GEOM class. +This can be set to a number between 0 and 3 inclusive. +If set to 0 minimal debug information is printed, and if set to 3 the +maximum amount of debug information is printed. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +The following example shows how to configure four disks for automatic +concatenation, create a file system on it, and mount it: +.Bd -literal -offset indent +gconcat label -v data /dev/da0 /dev/da1 /dev/da2 /dev/da3 +newfs /dev/concat/data +mount /dev/concat/data /mnt +[...] +umount /mnt +gconcat stop data +gconcat unload +.Ed +.Pp +Configure concatenated provider on one disk only. +Create file system. +Add two more disks and extend existing file system. +.Bd -literal -offset indent +gconcat label data /dev/da0 +newfs /dev/concat/data +gconcat label data /dev/da0 /dev/da1 /dev/da2 +growfs /dev/concat/data +.Ed +.Sh SEE ALSO +.Xr geom 4 , +.Xr loader.conf 5 , +.Xr geom 8 , +.Xr growfs 8 , +.Xr mount 8 , +.Xr newfs 8 , +.Xr sysctl 8 , +.Xr umount 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 5.3 . +.Sh AUTHORS +.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org diff --git a/lib/geom/concat/geom_concat.c b/lib/geom/concat/geom_concat.c new file mode 100644 index 000000000000..424f60ce0179 --- /dev/null +++ b/lib/geom/concat/geom_concat.c @@ -0,0 +1,254 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <errno.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <libgeom.h> +#include <geom/concat/g_concat.h> + +#include "core/geom.h" +#include "misc/subr.h" + + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_CONCAT_VERSION; + +static void concat_main(struct gctl_req *req, unsigned flags); +static void concat_clear(struct gctl_req *req); +static void concat_dump(struct gctl_req *req); +static void concat_label(struct gctl_req *req); + +struct g_command class_commands[] = { + { "append", G_FLAG_VERBOSE, NULL, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-hv] name prov" + }, + { "clear", G_FLAG_VERBOSE, concat_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, G_NULL_OPTS, + "[-v] name prov ..." + }, + { "destroy", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + { "dump", 0, concat_main, G_NULL_OPTS, + "prov ..." + }, + { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, concat_main, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-hv] name prov ..." + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +static void +concat_main(struct gctl_req *req, unsigned flags) +{ + const char *name; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) + concat_label(req); + else if (strcmp(name, "clear") == 0) + concat_clear(req); + else if (strcmp(name, "dump") == 0) + concat_dump(req); + else + gctl_error(req, "Unknown command: %s.", name); +} + +static void +concat_label(struct gctl_req *req) +{ + struct g_concat_metadata md; + u_char sector[512]; + const char *name; + int error, i, hardcode, nargs; + + bzero(sector, sizeof(sector)); + nargs = gctl_get_int(req, "nargs"); + if (nargs < 2) { + gctl_error(req, "Too few arguments."); + return; + } + hardcode = gctl_get_int(req, "hardcode"); + + /* + * Clear last sector first to spoil all components if device exists. + */ + for (i = 1; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, NULL); + if (error != 0) { + gctl_error(req, "Can't store metadata on %s: %s.", name, + strerror(error)); + return; + } + } + + strlcpy(md.md_magic, G_CONCAT_MAGIC, sizeof(md.md_magic)); + md.md_version = G_CONCAT_VERSION; + name = gctl_get_ascii(req, "arg0"); + strlcpy(md.md_name, name, sizeof(md.md_name)); + md.md_id = arc4random(); + md.md_all = nargs - 1; + + /* + * Ok, store metadata. + */ + for (i = 1; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + md.md_no = i - 1; + if (!hardcode) + bzero(md.md_provider, sizeof(md.md_provider)); + else { + if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + name += sizeof(_PATH_DEV) - 1; + strlcpy(md.md_provider, name, sizeof(md.md_provider)); + } + md.md_provsize = g_get_mediasize(name); + if (md.md_provsize == 0) { + fprintf(stderr, "Can't get mediasize of %s: %s.\n", + name, strerror(errno)); + gctl_error(req, "Not fully done."); + continue; + } + concat_metadata_encode(&md, sector); + error = g_metadata_store(name, sector, sizeof(sector)); + if (error != 0) { + fprintf(stderr, "Can't store metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata value stored on %s.\n", name); + } +} + +static void +concat_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_CONCAT_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +static void +concat_metadata_dump(const struct g_concat_metadata *md) +{ + + printf(" Magic string: %s\n", md->md_magic); + printf(" Metadata version: %u\n", (u_int)md->md_version); + printf(" Device name: %s\n", md->md_name); + printf(" Device ID: %u\n", (u_int)md->md_id); + printf(" Disk number: %u\n", (u_int)md->md_no); + printf("Total number of disks: %u\n", (u_int)md->md_all); + printf(" Hardcoded provider: %s\n", md->md_provider); +} + +static void +concat_dump(struct gctl_req *req) +{ + struct g_concat_metadata md, tmpmd; + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd), + G_CONCAT_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't read metadata from %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + concat_metadata_decode((u_char *)&tmpmd, &md); + printf("Metadata on %s:\n", name); + concat_metadata_dump(&md); + printf("\n"); + } +} diff --git a/lib/geom/eli/Makefile b/lib/geom/eli/Makefile new file mode 100644 index 000000000000..a22eacb9d7e8 --- /dev/null +++ b/lib/geom/eli/Makefile @@ -0,0 +1,14 @@ +PACKAGE=geom +.PATH: ${SRCTOP}/sys/geom/eli + +GEOM_CLASS= eli +SRCS= g_eli_crypto.c +SRCS+= g_eli_hmac.c +SRCS+= g_eli_key.c +SRCS+= pkcs5v2.c + +LIBADD= md crypto + +CFLAGS+=-I${SRCTOP}/sys + +.include <bsd.lib.mk> diff --git a/lib/geom/eli/Makefile.depend b/lib/geom/eli/Makefile.depend new file mode 100644 index 000000000000..8e5ef128814d --- /dev/null +++ b/lib/geom/eli/Makefile.depend @@ -0,0 +1,18 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + lib/libmd \ + secure/lib/libcrypto \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/eli/geli.8 b/lib/geom/eli/geli.8 new file mode 100644 index 000000000000..876caf67ab40 --- /dev/null +++ b/lib/geom/eli/geli.8 @@ -0,0 +1,1289 @@ +.\" Copyright (c) 2005-2019 Pawel Jakub Dawidek <pawel@dawidek.net> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd April 24, 2024 +.Dt GELI 8 +.Os +.Sh NAME +.Nm geli +.Nd "control utility for the cryptographic GEOM class" +.Sh SYNOPSIS +To compile GEOM_ELI into your kernel, add the following lines to your kernel +configuration file: +.Bd -ragged -offset indent +.Cd "device crypto" +.Cd "options GEOM_ELI" +.Ed +.Pp +Alternatively, to load the GEOM_ELI module at boot time, add the following line +to your +.Xr loader.conf 5 : +.Bd -literal -offset indent +geom_eli_load="YES" +.Ed +.Pp +.No Usage of the Nm +utility: +.Pp +.Nm +.Cm init +.Op Fl bdgPRTv +.Op Fl a Ar aalgo +.Op Fl B Ar backupfile +.Op Fl e Ar ealgo +.Op Fl i Ar iterations +.Op Fl J Ar newpassfile +.Op Fl K Ar newkeyfile +.Op Fl l Ar keylen +.Op Fl s Ar sectorsize +.Op Fl V Ar version +.Ar prov ... +.Nm +.Cm label - an alias for +.Cm init +.Nm +.Cm attach +.Op Fl Cdprv +.Op Fl n Ar keyno +.Op Fl j Ar passfile +.Op Fl k Ar keyfile +.Ar prov ... +.Nm +.Cm detach +.Op Fl fl +.Ar prov ... +.Nm +.Cm stop - an alias for +.Cm detach +.Nm +.Cm onetime +.Op Fl dRT +.Op Fl a Ar aalgo +.Op Fl e Ar ealgo +.Op Fl l Ar keylen +.Op Fl s Ar sectorsize +.Ar prov +.Nm +.Cm configure +.Op Fl bBdDgGrRtT +.Ar prov ... +.Nm +.Cm setkey +.Op Fl pPv +.Op Fl i Ar iterations +.Op Fl j Ar passfile +.Op Fl J Ar newpassfile +.Op Fl k Ar keyfile +.Op Fl K Ar newkeyfile +.Op Fl n Ar keyno +.Ar prov +.Nm +.Cm delkey +.Op Fl afv +.Op Fl n Ar keyno +.Ar prov +.Nm +.Cm kill +.Op Fl av +.Op Ar prov ... +.Nm +.Cm backup +.Op Fl v +.Ar prov +.Ar file +.Nm +.Cm restore +.Op Fl fv +.Ar file +.Ar prov +.Nm +.Cm suspend +.Op Fl v +.Fl a | Ar prov ... +.Nm +.Cm resume +.Op Fl pv +.Op Fl j Ar passfile +.Op Fl k Ar keyfile +.Ar prov +.Nm +.Cm resize +.Op Fl v +.Fl s Ar oldsize +.Ar prov +.Nm +.Cm version +.Op Ar prov ... +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm dump +.Op Fl v +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used to configure encryption on GEOM providers. +.Pp +The following is a list of the most important features: +.Pp +.Bl -bullet -offset indent -compact +.It +Utilizes the +.Xr crypto 9 +framework, so when there is crypto hardware available, +.Nm +will make use of it automatically. +.It +Supports many cryptographic algorithms (currently +.Nm AES-XTS , +.Nm AES-CBC , +and +.Nm Camellia-CBC ) . +.It +Can optionally perform data authentication (integrity verification) utilizing +one of the following algorithms: +.Nm HMAC/SHA1 , +.Nm HMAC/RIPEMD160 , +.Nm HMAC/SHA256 , +.Nm HMAC/SHA384 +or +.Nm HMAC/SHA512 . +.It +Can create a User Key from up to two, piecewise components: a passphrase +entered via prompt or read from one or more passfiles; a keyfile read from +one or more files. +.It +Allows encryption of the root partition. +The user is asked for the passphrase before the root filesystem is mounted. +.It +Strengthens the passphrase component of the User Key with: +.Rs +.%A B. Kaliski +.%T "PKCS #5: Password-Based Cryptography Specification, Version 2.0." +.%R RFC +.%N 2898 +.Re +.It +Allows the use of two independent User Keys (e.g., a +.Qq "user key" +and a +.Qq "company key" ) . +.It +It is fast - +.Nm +performs simple sector-to-sector encryption. +.It +Allows the encrypted Master Key to be backed up and restored, +so that if a user has to quickly destroy key material, +it is possible to get the data back by restoring keys from +backup. +.It +Providers can be configured to automatically detach on last close, +so users do not have to remember to detach providers after unmounting +the filesystems. +.It +Allows attaching a provider with a random, one-time Master Key, +which is useful for swap partitions and temporary filesystems. +.It +Allows verification of data integrity (data authentication). +.It +Allows suspending and resuming encrypted devices. +.El +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm configure" +.It Cm init +Initialize providers which need to be encrypted. +If multiple providers are listed as arguments, they will all be initialized +with the same passphrase and/or User Key. +A unique salt will be randomly generated for each provider to ensure the +Master Key for each is unique. +Here you can set up the cryptographic algorithm to use, Data Key length, +etc. +The last sector of the providers is used to store metadata. +The +.Cm init +subcommand also automatically writes metadata backups to +.Pa /var/backups/<prov>.eli +file. +The metadata can be recovered with the +.Cm restore +subcommand described below. +.Pp +Additional options include: +.Bl -tag -width ".Fl J Ar newpassfile" +.It Fl a Ar aalgo +Enable data integrity verification (authentication) using the given algorithm. +This will reduce the size of storage available and also reduce speed. +For example, when using 4096 bytes sector and +.Nm HMAC/SHA256 +algorithm, 89% of the original provider storage will be available for use. +Currently supported algorithms are: +.Nm HMAC/SHA1 , +.Nm HMAC/RIPEMD160 , +.Nm HMAC/SHA256 , +.Nm HMAC/SHA384 +and +.Nm HMAC/SHA512 . +If the option is not given, there will be no authentication, only encryption. +The recommended algorithm is +.Nm HMAC/SHA256 . +.It Fl b +Try to decrypt this partition during boot, before the root partition is mounted. +This makes it possible to use an encrypted root partition. +One will still need bootable unencrypted storage with a +.Pa /boot/ +directory, which can be a CD-ROM disc or USB pen-drive, that can be removed +after boot. +.It Fl B Ar backupfile +File name to use for metadata backup instead of the default +.Pa /var/backups/<prov>.eli . +To inhibit backups, you can use +.Pa none +as the +.Ar backupfile . +If multiple providers were initialized in the one command, you can use +.Pa PROV +(all upper-case) in the file name, and it will be replaced with the provider +name. +If +.Pa PROV +is not found in the file name and multiple providers were initialized in the +one command, +.Pa -<prov> +will be appended to the end of the file name specified. +.It Fl d +When entering the passphrase to boot from this encrypted root filesystem, echo +.Ql * +characters. +This makes the length of the passphrase visible. +.It Fl e Ar ealgo +Encryption algorithm to use. +Currently supported algorithms are: +.Nm AES-XTS , +.Nm AES-CBC , +.Nm Camellia-CBC , +and +.Nm NULL . +The default and recommended algorithm is +.Nm AES-XTS . +.Nm NULL +is unencrypted. +.It Fl g +Enable booting from this encrypted root filesystem. +The boot loader prompts for the passphrase and loads +.Xr loader 8 +from the encrypted partition. +.It Fl i Ar iterations +Number of iterations to use with PKCS#5v2 when processing User Key +passphrase component. +If this option is not specified, +.Nm +will find the number of iterations which is equal to 2 seconds of crypto work. +If 0 is given, PKCS#5v2 will not be used. +PKCS#5v2 processing is performed once, after all parts of the passphrase +component have been read. +.It Fl J Ar newpassfile +Specifies a file which contains the passphrase component of the User Key +(or part of it). +If +.Ar newpassfile +is given as -, standard input will be used. +Only the first line (excluding new-line character) is taken from the given file. +This argument can be specified multiple times, which has the effect of +reassembling a single passphrase split across multiple files. +Cannot be combined with the +.Fl P +option. +.It Fl K Ar newkeyfile +Specifies a file which contains the keyfile component of the User Key +(or part of it). +If +.Ar newkeyfile +is given as -, standard input will be used. +This argument can be specified multiple times, which has the effect of +reassembling a single keyfile split across multiple keyfile parts. +.It Fl l Ar keylen +Data Key length to use with the given cryptographic algorithm. +If the length is not specified, the selected algorithm uses its +.Em default +key length. +.Bl -ohang -offset indent +.It Nm AES-XTS +.Em 128 , +256 +.It Nm AES-CBC , Nm Camellia-CBC +.Em 128 , +192, +256 +.El +.It Fl P +Do not use a passphrase as a component of the User Key. +Cannot be combined with the +.Fl J +option. +.It Fl s Ar sectorsize +Change decrypted provider's sector size. +Increasing the sector size allows increased performance, +because encryption/decryption which requires an initialization vector +is done per sector; fewer sectors means less computational work. +.It Fl R +Turn off automatic expansion. +By default, if the underlying provider grows, the encrypted provider will +grow automatically too. +The metadata will be moved to the new location. +If automatic expansion if turned off and the underlying provider changes +size, attaching encrypted provider will no longer be possible as the metadata +will no longer be located in the last sector. +In this case +.Nm GELI +will only log the previous size of the underlying provider, so metadata can +be found easier, if resize was done by mistake. +.It Fl T +Don't pass through +.Dv BIO_DELETE +calls (i.e., TRIM/UNMAP). +This can prevent an attacker from knowing how much space you're actually +using and which sectors contain live data, but will also prevent the +backing store (SSD, etc) from reclaiming space you're not using, which +may degrade its performance and lifespan. +The underlying provider may or may not actually obliterate the deleted +sectors when TRIM is enabled, so it should not be considered to add any +security. +.It Fl V Ar version +Metadata version to use. +This option is helpful when creating a provider that may be used by older +.Nm FreeBSD/GELI +versions. +Consult the +.Sx HISTORY +section to find which metadata version is supported by which +.Fx +version. +Note that using an older version of metadata may limit the number of +features available. +.El +.It Cm attach +Attach the given providers. +The encrypted Master Keys are loaded from the metadata and decrypted +using the given passphrase/keyfile and new GEOM providers are created +using the specified provider names. +A +.Qq .eli +suffix is added to the user specified provider names. +Multiple providers can only be attached with a single +.Cm attach +command if they all have the same passphrase and keyfiles. +.Pp +Additional options include: +.Bl -tag -width ".Fl j Ar passfile" +.It Fl C +Do a dry-run decryption. +This is useful to verify passphrase and keyfile without decrypting the device. +.It Fl d +If specified, the decrypted providers are detached automatically on last close, +so the user does not have to remember to detach +providers after unmounting the filesystems. +This only works when providers were opened for writing, and will not work if +the filesystems on the providers were mounted read-only. +Probably a better choice is the +.Fl l +option for the +.Cm detach +subcommand. +.It Fl n Ar keyno +Specifies the index number of the Master Key copy to use (could be 0 or 1). +If the index number is not provided all keys will be tested. +.It Fl j Ar passfile +Specifies a file which contains the passphrase component of the User Key +(or part of it). +For more information see the description of the +.Fl J +option for the +.Cm init +subcommand. +The same passfiles are used for all listed providers. +.It Fl k Ar keyfile +Specifies a file which contains the keyfile component of the User Key +(or part of it). +For more information see the description of the +.Fl K +option for the +.Cm init +subcommand. +The same keyfiles are used for all listed providers. +.It Fl p +Do not use a passphrase as a component of the User Keys. +Cannot be combined with the +.Fl j +option. +.It Fl r +Attach read-only providers. +They are not opened for writing. +.El +.It Cm detach +Detach the given providers, which means remove the devfs entry +and clear the Master Key and Data Keys from memory. +.Pp +Additional options include: +.Bl -tag -width ".Fl f" +.It Fl f +Force detach - detach even if the provider is open. +.It Fl l +Mark provider to detach on last close, after the last filesystem has been +unmounted. +If this option is specified, the provider will not be detached +while it is open, but will be automatically detached when it is closed for the +last time even if it was only opened for reading. +.El +.It Cm onetime +Attach the given providers with a random, one-time (ephemeral) Master Key. +The command can be used to encrypt swap partitions or temporary filesystems. +.Pp +Additional options include: +.Bl -tag -width ".Fl a Ar sectorsize" +.It Fl a Ar aalgo +Enable data integrity verification (authentication). +For more information, see the description of the +.Cm init +subcommand. +.It Fl e Ar ealgo +Encryption algorithm to use. +For more information, see the description of the +.Cm init +subcommand. +.It Fl d +Detach on last close, after the last filesystem has been unmounted. +Note: this option is not usable for temporary filesystems as the provider is +detached after the filesystem has been created. +It still can, and should, be used for swap partitions. +For more information, see the description of the +.Cm attach +subcommand. +.It Fl l Ar keylen +Data Key length to use with the given cryptographic algorithm. +For more information, see the description of the +.Cm init +subcommand. +.It Fl s Ar sectorsize +Change decrypted provider's sector size. +For more information, see the description of the +.Cm init +subcommand. +.It Fl R +Turn off automatic expansion. +For more information, see the description of the +.Cm init +subcommand. +.It Fl T +Disable TRIM/UNMAP passthru. +For more information, see the description of the +.Cm init +subcommand. +.El +.It Cm configure +Change configuration of the given providers. +.Pp +Additional options include: +.Bl -tag -width ".Fl b" +.It Fl b +Set the BOOT flag on the given providers. +For more information, see the description of the +.Cm init +subcommand. +.It Fl B +Remove the BOOT flag from the given providers. +.It Fl d +When entering the passphrase to boot from this encrypted root filesystem, echo +.Ql * +characters. +This makes the length of the passphrase visible. +.It Fl D +Disable echoing of any characters when a passphrase is entered to boot from this +encrypted root filesystem. +This hides the passphrase length. +.It Fl g +Enable booting from this encrypted root filesystem. +The boot loader prompts for the passphrase and loads +.Xr loader 8 +from the encrypted partition. +.It Fl G +Deactivate booting from this encrypted root partition. +.It Fl r +Turn on automatic expansion. +For more information, see the description of the +.Cm init +subcommand. +.It Fl R +Turn off automatic expansion. +.It Fl t +Enable TRIM/UNMAP passthru. +For more information, see the description of the +.Cm init +subcommand. +.It Fl T +Disable TRIM/UNMAP passthru. +.El +.It Cm setkey +Install a copy of the Master Key into the selected slot, encrypted with +a new User Key. +If the selected slot is populated, replace the existing copy. +A provider has one Master Key, which can be stored in one or both slots, +each encrypted with an independent User Key. +With the +.Cm init +subcommand, only key number 0 is initialized. +The User Key can be changed at any time: for an attached provider, +for a detached provider, or on the backup file. +When a provider is attached, the user does not have to provide +an existing passphrase/keyfile. +.Pp +Additional options include: +.Bl -tag -width ".Fl J Ar newpassfile" +.It Fl i Ar iterations +Number of iterations to use with PKCS#5v2. +If 0 is given, PKCS#5v2 will not be used. +To be able to use this option with the +.Cm setkey +subcommand, only one key has to be defined and this key must be changed. +.It Fl j Ar passfile +Specifies a file which contains the passphrase component of a current User Key +(or part of it). +.It Fl J Ar newpassfile +Specifies a file which contains the passphrase component of the new User Key +(or part of it). +.It Fl k Ar keyfile +Specifies a file which contains the keyfile component of a current User Key +(or part of it). +.It Fl K Ar newkeyfile +Specifies a file which contains the keyfile component of the new User Key +(or part of it). +.It Fl n Ar keyno +Specifies the index number of the Master Key copy to change (could be 0 or 1). +If the provider is attached and no key number is given, the key +used for attaching the provider will be changed. +If the provider is detached (or we are operating on a backup file) +and no key number is given, the first Master Key copy to be successfully +decrypted with the provided User Key passphrase/keyfile will be changed. +.It Fl p +Do not use a passphrase as a component of the current User Key. +Cannot be combined with the +.Fl j +option. +.It Fl P +Do not use a passphrase as a component of the new User Key. +Cannot be combined with the +.Fl J +option. +.El +.It Cm delkey +Destroy (overwrite with random data) the selected Master Key copy. +If one is destroying keys for an attached provider, the provider +will not be detached even if all copies of the Master Key are destroyed. +It can even be rescued with the +.Cm setkey +subcommand because the Master Key is still in memory. +.Pp +Additional options include: +.Bl -tag -width ".Fl a Ar keyno" +.It Fl a +Destroy all copies of the Master Key (does not need +.Fl f +option). +.It Fl f +Force key destruction. +This option is needed to destroy the last copy of the Master Key. +.It Fl n Ar keyno +Specifies the index number of the Master Key copy. +If the provider is attached and no key number is given, the key +used for attaching the provider will be destroyed. +If provider is detached (or we are operating on a backup file) the key number +has to be given. +.El +.It Cm kill +This command should be used only in emergency situations. +It will destroy all copies of the Master Key on a given provider and will +detach it forcibly (if it is attached). +This is absolutely a one-way command - if you do not have a metadata +backup, your data is gone for good. +In case the provider was attached with the +.Fl r +flag, the keys will not be destroyed, only the provider will be detached. +.Pp +Additional options include: +.Bl -tag -width ".Fl a" +.It Fl a +If specified, all currently attached providers will be killed. +.El +.It Cm backup +Backup metadata from the given provider to the given file. +.It Cm restore +Restore metadata from the given file to the given provider. +.Pp +Additional options include: +.Bl -tag -width ".Fl f" +.It Fl f +Metadata contains the size of the provider to ensure that the correct +partition or slice is attached. +If an attempt is made to restore metadata to a provider that has a different +size, +.Nm +will refuse to restore the data unless the +.Fl f +switch is used. +If the partition or slice has been grown, the +.Cm resize +subcommand should be used rather than attempting to relocate the metadata +through +.Cm backup +and +.Cm restore . +.El +.It Cm suspend +Suspend device by waiting for all inflight requests to finish, clearing all +sensitive information such as the Master Key and Data Keys from kernel memory, +and blocking all further I/O requests until the +.Cm resume +subcommand is executed. +This functionality is useful for laptops. +Suspending a laptop should not leave an encrypted device attached. +The +.Cm suspend +subcommand can be used rather than closing all files and directories from +filesystems on the encrypted device, unmounting the filesystem, and +detaching the device. +Any access to the encrypted device will be blocked until the Master Key is +reloaded through the +.Cm resume +subcommand. +Thus there is no need to close nor unmount anything. +The +.Cm suspend +subcommand does not work with devices created with the +.Cm onetime +subcommand. +Please note that sensitive data might still be present in memory locations +such as the filesystem cache after suspending an encrypted device. +.Pp +Additional options include: +.Bl -tag -width ".Fl a" +.It Fl a +Suspend all +.Nm +devices. +.El +.It Cm resume +Resume previously suspended device. +The caller must ensure that executing this subcommand does not access the +suspended device, leading to a deadlock. +For example, suspending a device which contains the filesystem where the +.Nm +utility is stored is a bad idea. +.Pp +Additional options include: +.Bl -tag -width ".Fl j Ar passfile" +.It Fl j Ar passfile +Specifies a file which contains the passphrase component of the User Key, +or part of it. +For more information see the description of the +.Fl J +option for the +.Cm init +subcommand. +.It Fl k Ar keyfile +Specifies a file which contains the keyfile component of the User Key, +or part of it. +For more information see the description of the +.Fl K +option for the +.Cm init +subcommand. +.It Fl p +Do not use a passphrase as a component of the User Key. +Cannot be combined with the +.Fl j +option. +.El +.It Cm resize +Inform +.Nm +that the provider has been resized. +The old metadata block is relocated to the correct position at the end of the +provider and the provider size is updated. +.Pp +Additional options include: +.Bl -tag -width ".Fl s Ar oldsize" +.It Fl s Ar oldsize +The size of the provider before it was resized. +.El +.It Cm version +If no arguments are given, the +.Cm version +subcommand will print the version of +.Nm +userland utility as well as the version of the +.Nm ELI +GEOM class. +.Pp +If GEOM providers are specified, the +.Cm version +subcommand will print metadata version used by each of them. +.It Cm clear +Clear metadata from the given providers. +.Em WARNING : +This will erase with zeros the encrypted Master Key copies stored in the +metadata. +.It Cm dump +Dump metadata stored on the given providers. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options include: +.Bl -tag -width ".Fl v" +.It Fl v +Be more verbose. +.El +.Sh KEY SUMMARY +.Ss Master Key +Upon +.Cm init , +the +.Nm +utility generates a random Master Key for the provider. +The Master Key never changes during the lifetime of the provider. +Each copy of the provider metadata, active or backed up to a file, can store +up to two, independently-encrypted copies of the Master Key. +.Ss User Key +Each stored copy of the Master Key is encrypted with a User Key, which +is generated by the +.Nm +utility from a passphrase and/or a keyfile. +The +.Nm +utility first reads all parts of the keyfile in the order specified on the +command line, then reads all parts of the stored passphrase in the order +specified on the command line. +If no passphrase parts are specified, the system prompts the user to enter +the passphrase. +The passphrase is optionally strengthened by PKCS#5v2. +The User Key is a digest computed over the concatenated keyfile and passphrase. +.Ss Data Key +During operation, one or more Data Keys are deterministically derived by +the kernel from the Master Key and cached in memory. +The number of Data Keys used by a given provider, and the way they are +derived, depend on the GELI version and whether the provider is configured to +use data authentication. +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to control the behavior of the +.Nm ELI +GEOM class. +The default value is shown next to each variable. +Some variables can also be set in +.Pa /boot/loader.conf . +.Bl -tag -width indent +.It Va kern.geom.eli.version +Version number of the +.Nm ELI +GEOM class. +.It Va kern.geom.eli.debug : No 0 +Debug level of the +.Nm ELI +GEOM class. +This can be set to a number between 0 and 3 inclusive. +If set to 0, minimal debug information is printed. +If set to 3, the +maximum amount of debug information is printed. +.It Va kern.geom.eli.tries : No 3 +Number of times a user is asked for the passphrase. +This is only used for providers which are attached on boot, +before the root filesystem is mounted. +If set to 0, attaching providers on boot will be disabled. +This variable should be set in +.Pa /boot/loader.conf . +.It Va kern.geom.eli.overwrites : No 5 +Specifies how many times the Master Key is overwritten +with random values when it is destroyed. +After this operation it is filled with zeros. +.It Va kern.geom.eli.use_uma_bytes +.Nm +must allocate a buffer for every write operation, used when performing +encryption. +This sysctl reports the maximum size in bytes for which geli will perform the +allocation using +.Xr uma 9 , +as opposed to +.Xr malloc 9 . +.It Va kern.geom.eli.visible_passphrase : No 0 +If set to 1, the passphrase entered on boot will be visible. +This alternative should be used with caution as the entered +passphrase can be logged and exposed via +.Xr dmesg 8 . +This variable should be set in +.Pa /boot/loader.conf . +.It Va kern.geom.eli.threads : No 0 +Specifies how many kernel threads should be used for doing software +cryptography. +Its purpose is to increase performance on SMP systems. +If set to 0, a CPU-pinned thread will be started for every active CPU. +Note that this variable must be set prior to attaching +.Nm +to a disk. +.It Va kern.geom.eli.batch : No 0 +When set to 1, can speed-up crypto operations by using batching. +Batching reduces the number of interrupts by responding to a group of +crypto requests with one interrupt. +The crypto card and the driver have to support this feature. +.It Va kern.geom.eli.key_cache_limit : No 8192 +Specifies how many Data Keys to cache. +The default limit +(8192 keys) will allow caching of all keys for a 4TB provider with 512 byte +sectors and will take around 1MB of memory. +.It Va kern.geom.eli.key_cache_hits +Reports how many times we were looking up a Data Key and it was already in +cache. +This sysctl is not updated for providers that need fewer Data Keys than +the limit specified in +.Va kern.geom.eli.key_cache_limit . +.It Va kern.geom.eli.key_cache_misses +Reports how many times we were looking up a Data Key and it was not in cache. +This sysctl is not updated for providers that need fewer Data Keys than the limit +specified in +.Va kern.geom.eli.key_cache_limit . +.It Va kern.geom.eli.unmapped_io +Enable support for unmapped I/O buffers, currently implemented only on 64-bit +platforms. +This is an optimization which reduces the overhead of I/O processing. +This variable is intended for debugging purposes and must be set in +.Pa /boot/loader.conf . +.El +.Sh PERFORMANCE CONSIDERATIONS +The default value of +.Va kern.geom.eli.threads +is usually good for a system with one SSD. +However, it may need to be lowered on systems with many disks, +so as to avoid creating too much thread-switching overhead. +On systems with more disks than CPUs, it's best to set this variable +to 1. +.Pp +.Nm +internally uses +.Xr malloc 9 +to allocate memory for operations larger than +.Va kern.geom.eli.use_uma_bytes , +but malloc is slow for allocations larger than +.Va vm.kmem_zmax . +So it's best to avoid writing more than +.Ms MAX(kern.geom.eli.use_uma_bytes, vm.kmem_zmax) +in a single write operation. +On systems that format +.Xr zfs 4 +on top of +.Nm , +the maximum write size can be controlled by +.Va vfs.zfs.vdev.aggregation_limit +and +.Va vfs.zfs.vdev.aggregation_limit_non_rotating +for HDDs and SSDs, respectively. +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +Initialize a provider which is going to be encrypted with a +passphrase and random data from a file on the user's pen drive. +Use 4kB sector size. +Attach the provider, create a filesystem, and mount it. +Do the work. +Unmount the provider and detach it: +.Bd -literal -offset indent +# dd if=/dev/random of=/mnt/pendrive/da2.key bs=64 count=1 +# geli init -s 4096 -K /mnt/pendrive/da2.key /dev/da2 +Enter new passphrase: +Reenter new passphrase: +# geli attach -k /mnt/pendrive/da2.key /dev/da2 +Enter passphrase: +# dd if=/dev/random of=/dev/da2.eli bs=1m +# newfs /dev/da2.eli +# mount /dev/da2.eli /mnt/secret +\&... +# umount /mnt/secret +# geli detach da2.eli +.Ed +.Pp +Create an encrypted provider, but use two User Keys: +one for your employee and one for you as the company's security officer +(so it is not a tragedy if the employee +.Qq accidentally +forgets his passphrase): +.Bd -literal -offset indent +# geli init /dev/da2 +Enter new passphrase: (enter security officer's passphrase) +Reenter new passphrase: +# geli setkey -n 1 /dev/da2 +Enter passphrase: (enter security officer's passphrase) +Enter new passphrase: (let your employee enter his passphrase ...) +Reenter new passphrase: (... twice) +.Ed +.Pp +You are the security officer in your company. +Create an encrypted provider for use by the user, but remember that users +forget their passphrases, so backup the Master Key with your own random key: +.Bd -literal -offset indent +# dd if=/dev/random of=/mnt/pendrive/keys/`hostname` bs=64 count=1 +# geli init -P -K /mnt/pendrive/keys/`hostname` /dev/ada0s1e +# geli backup /dev/ada0s1e /mnt/pendrive/backups/`hostname` +(use key number 0, so the encrypted Master Key will be re-encrypted by this) +# geli setkey -n 0 -k /mnt/pendrive/keys/`hostname` /dev/ada0s1e +(allow the user to enter his passphrase) +Enter new passphrase: +Reenter new passphrase: +.Ed +.Pp +Encrypted swap partition setup: +.Bd -literal -offset indent +# dd if=/dev/random of=/dev/ada0s1b bs=1m +# geli onetime -d ada0s1b +# swapon /dev/ada0s1b.eli +.Ed +.Pp +The example below shows how to configure two providers which will be attached +on boot, before the root filesystem is mounted. +One of them is using passphrase and three keyfile parts and the other is +using only a keyfile in one part: +.Bd -literal -offset indent +# dd if=/dev/random of=/dev/da0 bs=1m +# dd if=/dev/random of=/boot/keys/da0.key0 bs=32k count=1 +# dd if=/dev/random of=/boot/keys/da0.key1 bs=32k count=1 +# dd if=/dev/random of=/boot/keys/da0.key2 bs=32k count=1 +# geli init -b -K /boot/keys/da0.key0 -K /boot/keys/da0.key1 -K /boot/keys/da0.key2 da0 +Enter new passphrase: +Reenter new passphrase: +# dd if=/dev/random of=/dev/da1s3a bs=1m +# dd if=/dev/random of=/boot/keys/da1s3a.key bs=128k count=1 +# geli init -b -P -K /boot/keys/da1s3a.key da1s3a +.Ed +.Pp +The providers are initialized, now we have to add these lines to +.Pa /boot/loader.conf : +.Bd -literal -offset indent +geli_da0_keyfile0_load="YES" +geli_da0_keyfile0_type="da0:geli_keyfile0" +geli_da0_keyfile0_name="/boot/keys/da0.key0" +geli_da0_keyfile1_load="YES" +geli_da0_keyfile1_type="da0:geli_keyfile1" +geli_da0_keyfile1_name="/boot/keys/da0.key1" +geli_da0_keyfile2_load="YES" +geli_da0_keyfile2_type="da0:geli_keyfile2" +geli_da0_keyfile2_name="/boot/keys/da0.key2" + +geli_da1s3a_keyfile0_load="YES" +geli_da1s3a_keyfile0_type="da1s3a:geli_keyfile0" +geli_da1s3a_keyfile0_name="/boot/keys/da1s3a.key" +.Ed +.Pp +If there is only one keyfile, the index might be omitted: +.Bd -literal -offset indent +geli_da1s3a_keyfile_load="YES" +geli_da1s3a_keyfile_type="da1s3a:geli_keyfile" +geli_da1s3a_keyfile_name="/boot/keys/da1s3a.key" +.Ed +.Pp +By convention, these loader variables are called +.Sm off +.Va geli_ No < Ar device No > Va _load . +.Sm on +However, the actual name prefix before +.Va _load , _type , +or +.Va _name +does not matter. +At boot time, the +.Nm +module searches through all +.Sm off +.No < Va prefix No > Va _type No -like +.Sm on +variables that have a value of +.Sm off +.Dq < Ar device No > :geli_keyfile . +.Sm on +The paths to keyfiles are then extracted from +.Sm off +.No < Ar prefix No > Va _name +.Sm on +variables. +In the example above, +.Ar prefix +is +.Dq Li geli_da1s3a_keyfile . +.Pp +Not only configure encryption, but also data integrity verification using +.Nm HMAC/SHA256 . +.Bd -literal -offset indent +# geli init -a hmac/sha256 -s 4096 /dev/da0 +Enter new passphrase: +Reenter new passphrase: +# geli attach /dev/da0 +Enter passphrase: +# dd if=/dev/random of=/dev/da0.eli bs=1m +# newfs /dev/da0.eli +# mount /dev/da0.eli /mnt/secret +.Ed +.Pp +.Cm geli +writes the metadata backup by default to the +.Pa /var/backups/<prov>.eli +file. +If the metadata is lost in any way (e.g., by accidental overwrite), it can be restored. +Consider the following situation: +.Bd -literal -offset indent +# geli init /dev/da0 +Enter new passphrase: +Reenter new passphrase: + +Metadata backup can be found in /var/backups/da0.eli and +can be restored with the following command: + + # geli restore /var/backups/da0.eli /dev/da0 + +# geli clear /dev/da0 +# geli attach /dev/da0 +geli: Cannot read metadata from /dev/da0: Invalid argument. +# geli restore /var/backups/da0.eli /dev/da0 +# geli attach /dev/da0 +Enter passphrase: +.Ed +.Pp +If an encrypted filesystem is extended, it is necessary to relocate and +update the metadata: +.Bd -literal -offset indent +# gpart create -s GPT ada0 +# gpart add -s 1g -t freebsd-ufs -i 1 ada0 +# geli init -K keyfile -P ada0p1 +# gpart resize -s 2g -i 1 ada0 +# geli resize -s 1g ada0p1 +# geli attach -k keyfile -p ada0p1 +.Ed +.Pp +Initialize provider with the passphrase split into two files. +The provider can be attached using those two files or by entering +.Dq foobar +as the passphrase at the +.Nm +prompt: +.Bd -literal -offset indent +# echo foo > da0.pass0 +# echo bar > da0.pass1 +# geli init -J da0.pass0 -J da0.pass1 da0 +# geli attach -j da0.pass0 -j da0.pass1 da0 +# geli detach da0 +# geli attach da0 +Enter passphrase: foobar +.Ed +.Pp +Suspend all +.Nm +devices on a laptop, suspend the laptop, then resume devices one by one after +resuming the laptop: +.Bd -literal -offset indent +# geli suspend -a +# zzz +<resume your laptop> +# geli resume -p -k keyfile gpt/secret +# geli resume gpt/private +Enter passphrase: +.Ed +.Pp +To create a +.Nm +encrypted filesystem with a file as storage device follow this example. +First a file named private0 is created in +.Pa /usr +and attached as a memory disk like +.Pa /dev/md0 +for example. +.Bd -literal -offset indent +# dd if=/dev/zero of=/usr/private0 bs=1m count=256 +# chmod 0600 /usr/private0 +# mdconfig -t vnode -f /usr/private0 +.Ed +.Pp +It is recommended to place the following line in +.Xr rc.conf 5 +to have the memory disk automatically created during boot. +.Bd -literal -offset indent +mdconfig_md0="-t vnode -f /usr/private0" +.Ed +.Pp +After +.Pa /dev/md0 +is created a random key has to be generated and stored in a secure location, +like +.Pa /root +for example. +This key should be protected by a passphrase, which +is requested when geli init is called. +.Bd -literal -offset indent +# dd if=/dev/random of=/root/private0.key bs=64 count=1 +# geli init -K /root/private0.key -s 4096 /dev/md0 +Enter new passphrase: +Reenter new passphrase: +# geli attach -k /root/private0.key /dev/md0 +Enter passphrase: +# dd if=/dev/random of=/dev/md0.eli bs=1m +.Ed +.Pp +Once the initialization of the +.Pa /dev/md0.eli +device is ready create a UFS filesystem and mount it for example in +.Pa /private . +.Bd -literal -offset indent +# newfs /dev/md0.eli +# mount /dev/md0.eli /private +.Ed +.Pp +After a system reboot the +.Nm +device can be mounted again with the following commands. +The call of geli attach will ask for the passphrase. +It is recommended to do this procedure after the boot, because otherwise +the boot process would be waiting for the passphrase input. +.Bd -literal -offset indent +# geli attach -k /root/private0.key /dev/md0 +Enter passphrase: +# mount /dev/md0.eli /private +.Ed +.Sh ENCRYPTION MODES +.Nm +supports two encryption modes: +.Nm XTS , +which was standardized as +.Nm IEEE P1619 +and +.Nm CBC +with unpredictable IV. +The +.Nm CBC +mode used by +.Nm +is very similar to the mode +.Nm ESSIV . +.Sh DATA AUTHENTICATION +.Nm +can verify data integrity when an authentication algorithm is specified. +When data corruption/modification is detected, +.Nm +will not return any data, but instead will return an error +.Pq Er EINVAL . +The offset and size of the corrupted data will be printed on the console. +It is important to know against which attacks +.Nm +provides protection for your data. +If data is modified in-place or copied from one place on the disk +to another even without modification, +.Nm +should be able to detect such a change. +If an attacker can remember the encrypted data, he can overwrite any future +changes with the data he owns without it being noticed. +In other words +.Nm +will not protect your data against replay attacks. +.Pp +It is recommended to write to the whole provider before first use, +in order to make sure that all sectors and their corresponding +checksums are properly initialized into a consistent state. +One can safely ignore data authentication errors that occur immediately +after the first time a provider is attached and before it is +initialized in this way. +.Sh SEE ALSO +.Xr crypto 4 , +.Xr geom 4 , +.Xr loader.conf 5 , +.Xr geom 8 , +.Xr crypto 9 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 6.0 . +Support for the +.Nm Camellia +block cipher was implemented by Yoshisato Yanagisawa in +.Fx 7.0 . +.Pp +Highest +.Nm GELI +metadata version supported by the given +.Fx +version: +.Bl -column -offset indent ".Sy FreeBSD" ".Sy version" +.It Sy FreeBSD Ta Sy GELI +.It Sy version Ta Sy version +.Pp +.It Li 6.0 Ta 0 +.It Li 6.1 Ta 0 +.It Li 6.2 Ta 3 +.It Li 6.3 Ta 3 +.It Li 6.4 Ta 3 +.Pp +.It Li 7.0 Ta 3 +.It Li 7.1 Ta 3 +.It Li 7.2 Ta 3 +.It Li 7.3 Ta 3 +.It Li 7.4 Ta 3 +.Pp +.It Li 8.0 Ta 3 +.It Li 8.1 Ta 3 +.It Li 8.2 Ta 5 +.Pp +.It Li 9.0 Ta 6 +.Pp +.It Li 10.0 Ta 7 +.El +.Sh AUTHORS +.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org diff --git a/lib/geom/eli/geom_eli.c b/lib/geom/eli/geom_eli.c new file mode 100644 index 000000000000..4dd1c5dea35d --- /dev/null +++ b/lib/geom/eli/geom_eli.c @@ -0,0 +1,2014 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004-2019 Pawel Jakub Dawidek <pawel@dawidek.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/mman.h> +#include <sys/sysctl.h> +#include <sys/resource.h> +#include <opencrypto/cryptodev.h> + +#include <assert.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <libgeom.h> +#include <paths.h> +#include <readpassphrase.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> + +#include <geom/eli/g_eli.h> +#include <geom/eli/pkcs5v2.h> + +#include "core/geom.h" +#include "misc/subr.h" + + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_ELI_VERSION; + +#define GELI_BACKUP_DIR "/var/backups/" +#define GELI_ENC_ALGO "aes" +#define BUFSIZE 1024 + +/* + * Passphrase cached when attaching multiple providers, in order to be more + * user-friendly if they are using the same passphrase. + */ +static char cached_passphrase[BUFSIZE] = ""; + +static void eli_main(struct gctl_req *req, unsigned flags); +static void eli_init(struct gctl_req *req); +static void eli_attach(struct gctl_req *req); +static void eli_configure(struct gctl_req *req); +static void eli_setkey(struct gctl_req *req); +static void eli_delkey(struct gctl_req *req); +static void eli_resume(struct gctl_req *req); +static void eli_kill(struct gctl_req *req); +static void eli_backup(struct gctl_req *req); +static void eli_restore(struct gctl_req *req); +static void eli_resize(struct gctl_req *req); +static void eli_version(struct gctl_req *req); +static void eli_clear(struct gctl_req *req); +static void eli_dump(struct gctl_req *req); + +static int eli_backup_create(struct gctl_req *req, const char *prov, + const char *file); + +/* + * Available commands: + * + * init [-bdgPRTv] [-a aalgo] [-B backupfile] [-e ealgo] [-i iterations] [-l keylen] [-J newpassfile] [-K newkeyfile] [-s sectorsize] [-V version] prov ... + * label - alias for 'init' + * attach [-Cdprv] [-n keyno] [-j passfile] [-k keyfile] prov ... + * detach [-fl] prov ... + * stop - alias for 'detach' + * onetime [-dRT] [-a aalgo] [-e ealgo] [-l keylen] prov + * configure [-bBgGrRtT] prov ... + * setkey [-pPv] [-n keyno] [-j passfile] [-J newpassfile] [-k keyfile] [-K newkeyfile] prov + * delkey [-afv] [-n keyno] prov + * suspend [-v] -a | prov ... + * resume [-pv] [-j passfile] [-k keyfile] prov + * kill [-av] [prov ...] + * backup [-v] prov file + * restore [-fv] file prov + * resize [-v] -s oldsize prov + * version [prov ...] + * clear [-v] prov ... + * dump [-v] prov ... + */ +struct g_command class_commands[] = { + { "init", G_FLAG_VERBOSE, eli_main, + { + { 'a', "aalgo", "", G_TYPE_STRING }, + { 'b', "boot", NULL, G_TYPE_BOOL }, + { 'B', "backupfile", "", G_TYPE_STRING }, + { 'd', "displaypass", NULL, G_TYPE_BOOL }, + { 'e', "ealgo", "", G_TYPE_STRING }, + { 'g', "geliboot", NULL, G_TYPE_BOOL }, + { 'i', "iterations", "-1", G_TYPE_NUMBER }, + { 'J', "newpassfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'K', "newkeyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'l', "keylen", "0", G_TYPE_NUMBER }, + { 'P', "nonewpassphrase", NULL, G_TYPE_BOOL }, + { 'R', "noautoresize", NULL, G_TYPE_BOOL }, + { 's', "sectorsize", "0", G_TYPE_NUMBER }, + { 'T', "notrim", NULL, G_TYPE_BOOL }, + { 'V', "mdversion", "-1", G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-bdgPRTv] [-a aalgo] [-B backupfile] [-e ealgo] [-i iterations] [-l keylen] [-J newpassfile] [-K newkeyfile] [-s sectorsize] [-V version] prov ..." + }, + { "label", G_FLAG_VERBOSE, eli_main, + { + { 'a', "aalgo", "", G_TYPE_STRING }, + { 'b', "boot", NULL, G_TYPE_BOOL }, + { 'B', "backupfile", "", G_TYPE_STRING }, + { 'd', "displaypass", NULL, G_TYPE_BOOL }, + { 'e', "ealgo", "", G_TYPE_STRING }, + { 'g', "geliboot", NULL, G_TYPE_BOOL }, + { 'i', "iterations", "-1", G_TYPE_NUMBER }, + { 'J', "newpassfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'K', "newkeyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'l', "keylen", "0", G_TYPE_NUMBER }, + { 'P', "nonewpassphrase", NULL, G_TYPE_BOOL }, + { 'R', "noautoresize", NULL, G_TYPE_BOOL }, + { 's', "sectorsize", "0", G_TYPE_NUMBER }, + { 'T', "notrim", NULL, G_TYPE_BOOL }, + { 'V', "mdversion", "-1", G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "- an alias for 'init'" + }, + { "attach", G_FLAG_VERBOSE | G_FLAG_LOADKLD, eli_main, + { + { 'C', "dryrun", NULL, G_TYPE_BOOL }, + { 'd', "detach", NULL, G_TYPE_BOOL }, + { 'j', "passfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'k', "keyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'n', "keyno", "-1", G_TYPE_NUMBER }, + { 'p', "nopassphrase", NULL, G_TYPE_BOOL }, + { 'r', "readonly", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-Cdprv] [-n keyno] [-j passfile] [-k keyfile] prov ..." + }, + { "detach", 0, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + { 'l', "last", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fl] prov ..." + }, + { "stop", 0, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + { 'l', "last", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "- an alias for 'detach'" + }, + { "onetime", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, + { + { 'a', "aalgo", "", G_TYPE_STRING }, + { 'd', "detach", NULL, G_TYPE_BOOL }, + { 'e', "ealgo", GELI_ENC_ALGO, G_TYPE_STRING }, + { 'l', "keylen", "0", G_TYPE_NUMBER }, + { 'R', "noautoresize", NULL, G_TYPE_BOOL }, + { 's', "sectorsize", "0", G_TYPE_NUMBER }, + { 'T', "notrim", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-dRT] [-a aalgo] [-e ealgo] [-l keylen] [-s sectorsize] prov" + }, + { "configure", G_FLAG_VERBOSE, eli_main, + { + { 'b', "boot", NULL, G_TYPE_BOOL }, + { 'B', "noboot", NULL, G_TYPE_BOOL }, + { 'd', "displaypass", NULL, G_TYPE_BOOL }, + { 'D', "nodisplaypass", NULL, G_TYPE_BOOL }, + { 'g', "geliboot", NULL, G_TYPE_BOOL }, + { 'G', "nogeliboot", NULL, G_TYPE_BOOL }, + { 'r', "autoresize", NULL, G_TYPE_BOOL }, + { 'R', "noautoresize", NULL, G_TYPE_BOOL }, + { 't', "trim", NULL, G_TYPE_BOOL }, + { 'T', "notrim", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-bBdDgGrRtT] prov ..." + }, + { "setkey", G_FLAG_VERBOSE, eli_main, + { + { 'i', "iterations", "-1", G_TYPE_NUMBER }, + { 'j', "passfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'J', "newpassfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'k', "keyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'K', "newkeyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'n', "keyno", "-1", G_TYPE_NUMBER }, + { 'p', "nopassphrase", NULL, G_TYPE_BOOL }, + { 'P', "nonewpassphrase", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-pPv] [-n keyno] [-i iterations] [-j passfile] [-J newpassfile] [-k keyfile] [-K newkeyfile] prov" + }, + { "delkey", G_FLAG_VERBOSE, eli_main, + { + { 'a', "all", NULL, G_TYPE_BOOL }, + { 'f', "force", NULL, G_TYPE_BOOL }, + { 'n', "keyno", "-1", G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-afv] [-n keyno] prov" + }, + { "suspend", G_FLAG_VERBOSE, NULL, + { + { 'a', "all", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-v] -a | prov ..." + }, + { "resume", G_FLAG_VERBOSE, eli_main, + { + { 'j', "passfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'k', "keyfile", G_VAL_OPTIONAL, G_TYPE_STRING | G_TYPE_MULTI }, + { 'p', "nopassphrase", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-pv] [-j passfile] [-k keyfile] prov" + }, + { "kill", G_FLAG_VERBOSE, eli_main, + { + { 'a', "all", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-av] [prov ...]" + }, + { "backup", G_FLAG_VERBOSE, eli_main, G_NULL_OPTS, + "[-v] prov file" + }, + { "restore", G_FLAG_VERBOSE, eli_main, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] file prov" + }, + { "resize", G_FLAG_VERBOSE, eli_main, + { + { 's', "oldsize", NULL, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-v] -s oldsize prov" + }, + { "version", G_FLAG_LOADKLD, eli_main, G_NULL_OPTS, + "[prov ...]" + }, + { "clear", G_FLAG_VERBOSE, eli_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { "dump", G_FLAG_VERBOSE, eli_main, G_NULL_OPTS, + "[-v] prov ..." + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +static int +eli_protect(struct gctl_req *req) +{ + struct rlimit rl; + + /* Disable core dumps. */ + rl.rlim_cur = 0; + rl.rlim_max = 0; + if (setrlimit(RLIMIT_CORE, &rl) == -1) { + gctl_error(req, "Cannot disable core dumps: %s.", + strerror(errno)); + return (-1); + } + /* Disable swapping. */ + if (mlockall(MCL_FUTURE) == -1) { + gctl_error(req, "Cannot lock memory: %s.", strerror(errno)); + return (-1); + } + return (0); +} + +static void +eli_main(struct gctl_req *req, unsigned int flags) +{ + const char *name; + + if (eli_protect(req) == -1) + return; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "init") == 0 || strcmp(name, "label") == 0) + eli_init(req); + else if (strcmp(name, "attach") == 0) + eli_attach(req); + else if (strcmp(name, "configure") == 0) + eli_configure(req); + else if (strcmp(name, "setkey") == 0) + eli_setkey(req); + else if (strcmp(name, "delkey") == 0) + eli_delkey(req); + else if (strcmp(name, "resume") == 0) + eli_resume(req); + else if (strcmp(name, "kill") == 0) + eli_kill(req); + else if (strcmp(name, "backup") == 0) + eli_backup(req); + else if (strcmp(name, "restore") == 0) + eli_restore(req); + else if (strcmp(name, "resize") == 0) + eli_resize(req); + else if (strcmp(name, "version") == 0) + eli_version(req); + else if (strcmp(name, "dump") == 0) + eli_dump(req); + else if (strcmp(name, "clear") == 0) + eli_clear(req); + else + gctl_error(req, "Unknown command: %s.", name); +} + +static bool +eli_is_attached(const char *prov) +{ + char name[MAXPATHLEN]; + + /* + * Not the best way to do it, but the easiest. + * We try to open provider and check if it is a GEOM provider + * by asking about its sectorsize. + */ + snprintf(name, sizeof(name), "%s%s", prov, G_ELI_SUFFIX); + return (g_get_sectorsize(name) > 0); +} + +static int +eli_genkey_files(struct gctl_req *req, bool new, const char *type, + struct hmac_ctx *ctxp, char *passbuf, size_t passbufsize) +{ + char *p, buf[BUFSIZE], argname[16]; + const char *file; + int error, fd, i; + ssize_t done; + + assert((strcmp(type, "keyfile") == 0 && ctxp != NULL && + passbuf == NULL && passbufsize == 0) || + (strcmp(type, "passfile") == 0 && ctxp == NULL && + passbuf != NULL && passbufsize > 0)); + assert(strcmp(type, "keyfile") == 0 || passbuf[0] == '\0'); + + for (i = 0; ; i++) { + snprintf(argname, sizeof(argname), "%s%s%d", + new ? "new" : "", type, i); + + /* No more {key,pass}files? */ + if (!gctl_has_param(req, argname)) + return (i); + + file = gctl_get_ascii(req, "%s", argname); + assert(file != NULL); + + if (strcmp(file, "-") == 0) + fd = STDIN_FILENO; + else { + fd = open(file, O_RDONLY); + if (fd == -1) { + gctl_error(req, "Cannot open %s %s: %s.", + type, file, strerror(errno)); + return (-1); + } + } + if (strcmp(type, "keyfile") == 0) { + while ((done = read(fd, buf, sizeof(buf))) > 0) + g_eli_crypto_hmac_update(ctxp, buf, done); + } else /* if (strcmp(type, "passfile") == 0) */ { + assert(strcmp(type, "passfile") == 0); + + while ((done = read(fd, buf, sizeof(buf) - 1)) > 0) { + buf[done] = '\0'; + p = strchr(buf, '\n'); + if (p != NULL) { + *p = '\0'; + done = p - buf; + } + if (strlcat(passbuf, buf, passbufsize) >= + passbufsize) { + gctl_error(req, + "Passphrase in %s too long.", file); + explicit_bzero(buf, sizeof(buf)); + return (-1); + } + if (p != NULL) + break; + } + } + error = errno; + if (strcmp(file, "-") != 0) + close(fd); + explicit_bzero(buf, sizeof(buf)); + if (done == -1) { + gctl_error(req, "Cannot read %s %s: %s.", + type, file, strerror(error)); + return (-1); + } + } + /* NOTREACHED */ +} + +static int +eli_genkey_passphrase_prompt(struct gctl_req *req, bool new, char *passbuf, + size_t passbufsize) +{ + char *p; + + for (;;) { + p = readpassphrase( + new ? "Enter new passphrase: " : "Enter passphrase: ", + passbuf, passbufsize, RPP_ECHO_OFF | RPP_REQUIRE_TTY); + if (p == NULL) { + explicit_bzero(passbuf, passbufsize); + gctl_error(req, "Cannot read passphrase: %s.", + strerror(errno)); + return (-1); + } + + if (new) { + char tmpbuf[BUFSIZE]; + + p = readpassphrase("Reenter new passphrase: ", + tmpbuf, sizeof(tmpbuf), + RPP_ECHO_OFF | RPP_REQUIRE_TTY); + if (p == NULL) { + explicit_bzero(passbuf, passbufsize); + gctl_error(req, + "Cannot read passphrase: %s.", + strerror(errno)); + return (-1); + } + + if (strcmp(passbuf, tmpbuf) != 0) { + explicit_bzero(passbuf, passbufsize); + fprintf(stderr, "They didn't match.\n"); + continue; + } + explicit_bzero(tmpbuf, sizeof(tmpbuf)); + } + return (0); + } + /* NOTREACHED */ +} + +static int +eli_genkey_passphrase(struct gctl_req *req, struct g_eli_metadata *md, bool new, + struct hmac_ctx *ctxp) +{ + char passbuf[BUFSIZE]; + bool nopassphrase; + int nfiles; + + /* + * Return error if the 'do not use passphrase' flag was given but a + * passfile was provided. + */ + nopassphrase = + gctl_get_int(req, new ? "nonewpassphrase" : "nopassphrase"); + if (nopassphrase) { + if (gctl_has_param(req, new ? "newpassfile0" : "passfile0")) { + gctl_error(req, + "Options -%c and -%c are mutually exclusive.", + new ? 'J' : 'j', new ? 'P' : 'p'); + return (-1); + } + return (0); + } + + /* + * Return error if using a provider which does not require a passphrase + * but the 'do not use passphrase' flag was not given. + */ + if (!new && md->md_iterations == -1) { + gctl_error(req, "Missing -p flag."); + return (-1); + } + passbuf[0] = '\0'; + + /* Use cached passphrase if defined. */ + if (strlen(cached_passphrase) > 0) { + strlcpy(passbuf, cached_passphrase, sizeof(passbuf)); + } else { + nfiles = eli_genkey_files(req, new, "passfile", NULL, passbuf, + sizeof(passbuf)); + if (nfiles == -1) { + return (-1); + } else if (nfiles == 0) { + if (eli_genkey_passphrase_prompt(req, new, passbuf, + sizeof(passbuf)) == -1) { + return (-1); + } + } + /* Cache the passphrase for other providers. */ + strlcpy(cached_passphrase, passbuf, sizeof(cached_passphrase)); + } + /* + * Field md_iterations equal to -1 means "choose some sane + * value for me". + */ + if (md->md_iterations == -1) { + assert(new); + if (verbose) + printf("Calculating number of iterations...\n"); + md->md_iterations = pkcs5v2_calculate(2000000); + assert(md->md_iterations > 0); + if (verbose) { + printf("Done, using %d iterations.\n", + md->md_iterations); + } + } + /* + * If md_iterations is equal to 0, user doesn't want PKCS#5v2. + */ + if (md->md_iterations == 0) { + g_eli_crypto_hmac_update(ctxp, md->md_salt, + sizeof(md->md_salt)); + g_eli_crypto_hmac_update(ctxp, passbuf, strlen(passbuf)); + } else /* if (md->md_iterations > 0) */ { + unsigned char dkey[G_ELI_USERKEYLEN]; + + pkcs5v2_genkey(dkey, sizeof(dkey), md->md_salt, + sizeof(md->md_salt), passbuf, md->md_iterations); + g_eli_crypto_hmac_update(ctxp, dkey, sizeof(dkey)); + explicit_bzero(dkey, sizeof(dkey)); + } + explicit_bzero(passbuf, sizeof(passbuf)); + + return (0); +} + +static bool +eli_init_key_hmac_ctx(struct gctl_req *req, struct hmac_ctx *ctx, bool new) +{ + int nfiles; + bool nopassphrase; + + nopassphrase = + gctl_get_int(req, new ? "nonewpassphrase" : "nopassphrase"); + + g_eli_crypto_hmac_init(ctx, NULL, 0); + nfiles = eli_genkey_files(req, new, "keyfile", ctx, NULL, 0); + if (nfiles == -1) { + return (false); + } else if (nfiles == 0 && nopassphrase) { + gctl_error(req, "No key components given."); + return (false); + } + + return (true); +} + +static unsigned char * +eli_genkey(struct gctl_req *req, const struct hmac_ctx *ctxtemplate, + struct g_eli_metadata *md, unsigned char *key, bool new) +{ + struct hmac_ctx ctx; + + memcpy(&ctx, ctxtemplate, sizeof(ctx)); + + if (eli_genkey_passphrase(req, md, new, &ctx) == -1) + return (NULL); + + g_eli_crypto_hmac_final(&ctx, key, 0); + + return (key); +} + +static unsigned char * +eli_genkey_single(struct gctl_req *req, struct g_eli_metadata *md, + unsigned char *key, bool new) +{ + struct hmac_ctx ctx; + unsigned char *rkey; + + if (!eli_init_key_hmac_ctx(req, &ctx, new)) { + return (NULL); + } + rkey = eli_genkey(req, &ctx, md, key, new); + explicit_bzero(&ctx, sizeof(ctx)); + + return (rkey); +} + +static int +eli_metadata_read(struct gctl_req *req, const char *prov, + struct g_eli_metadata *md) +{ + unsigned char sector[sizeof(struct g_eli_metadata)]; + int error; + + if (g_get_sectorsize(prov) == 0) { + int fd; + + /* This is a file probably. */ + fd = open(prov, O_RDONLY); + if (fd == -1) { + gctl_error(req, "Cannot open %s: %s.", prov, + strerror(errno)); + return (-1); + } + if (read(fd, sector, sizeof(sector)) != sizeof(sector)) { + gctl_error(req, "Cannot read metadata from %s: %s.", + prov, strerror(errno)); + close(fd); + return (-1); + } + close(fd); + } else { + /* This is a GEOM provider. */ + error = g_metadata_read(prov, sector, sizeof(sector), + G_ELI_MAGIC); + if (error != 0) { + gctl_error(req, "Cannot read metadata from %s: %s.", + prov, strerror(error)); + return (-1); + } + } + error = eli_metadata_decode(sector, md); + switch (error) { + case 0: + break; + case EOPNOTSUPP: + gctl_error(req, + "Provider's %s metadata version %u is too new.\n" + "geli: The highest supported version is %u.", + prov, (unsigned int)md->md_version, G_ELI_VERSION); + return (-1); + case EINVAL: + gctl_error(req, "Inconsistent provider's %s metadata.", prov); + return (-1); + default: + gctl_error(req, + "Unexpected error while decoding provider's %s metadata: %s.", + prov, strerror(error)); + return (-1); + } + return (0); +} + +static int +eli_metadata_store(struct gctl_req *req, const char *prov, + struct g_eli_metadata *md) +{ + unsigned char sector[sizeof(struct g_eli_metadata)]; + int error; + + eli_metadata_encode(md, sector); + if (g_get_sectorsize(prov) == 0) { + int fd; + + /* This is a file probably. */ + fd = open(prov, O_WRONLY | O_TRUNC); + if (fd == -1) { + gctl_error(req, "Cannot open %s: %s.", prov, + strerror(errno)); + explicit_bzero(sector, sizeof(sector)); + return (-1); + } + if (write(fd, sector, sizeof(sector)) != sizeof(sector)) { + gctl_error(req, "Cannot write metadata to %s: %s.", + prov, strerror(errno)); + explicit_bzero(sector, sizeof(sector)); + close(fd); + return (-1); + } + close(fd); + } else { + /* This is a GEOM provider. */ + error = g_metadata_store(prov, sector, sizeof(sector)); + if (error != 0) { + gctl_error(req, "Cannot write metadata to %s: %s.", + prov, strerror(errno)); + explicit_bzero(sector, sizeof(sector)); + return (-1); + } + } + explicit_bzero(sector, sizeof(sector)); + return (0); +} + +static void +eli_init(struct gctl_req *req) +{ + struct g_eli_metadata md; + struct gctl_req *r; + unsigned char sector[sizeof(struct g_eli_metadata)] __aligned(4); + unsigned char key[G_ELI_USERKEYLEN]; + char backfile[MAXPATHLEN]; + const char *str, *prov; + unsigned int secsize, eli_version; + off_t mediasize; + intmax_t val; + int error, i, nargs, nparams, param; + const int one = 1; + struct hmac_ctx ctxtemplate; + + nargs = gctl_get_int(req, "nargs"); + if (nargs <= 0) { + gctl_error(req, "Too few arguments."); + return; + } + + /* Start generating metadata for provider(s) being initialized. */ + explicit_bzero(&md, sizeof(md)); + strlcpy(md.md_magic, G_ELI_MAGIC, sizeof(md.md_magic)); + val = gctl_get_intmax(req, "mdversion"); + if (val == -1) { + eli_version = G_ELI_VERSION; + } else if (val < 0 || val > G_ELI_VERSION) { + gctl_error(req, + "Invalid version specified should be between %u and %u.", + G_ELI_VERSION_00, G_ELI_VERSION); + return; + } else { + eli_version = val; + } + md.md_version = eli_version; + md.md_flags = G_ELI_FLAG_AUTORESIZE; + if (gctl_get_int(req, "boot")) + md.md_flags |= G_ELI_FLAG_BOOT; + if (gctl_get_int(req, "geliboot")) + md.md_flags |= G_ELI_FLAG_GELIBOOT; + if (gctl_get_int(req, "displaypass")) + md.md_flags |= G_ELI_FLAG_GELIDISPLAYPASS; + if (gctl_get_int(req, "notrim")) + md.md_flags |= G_ELI_FLAG_NODELETE; + if (gctl_get_int(req, "noautoresize")) + md.md_flags &= ~G_ELI_FLAG_AUTORESIZE; + md.md_ealgo = CRYPTO_ALGORITHM_MIN - 1; + str = gctl_get_ascii(req, "aalgo"); + if (*str != '\0') { + if (eli_version < G_ELI_VERSION_01) { + gctl_error(req, + "Data authentication is supported starting from version %u.", + G_ELI_VERSION_01); + return; + } + md.md_aalgo = g_eli_str2aalgo(str); + if (md.md_aalgo >= CRYPTO_ALGORITHM_MIN && + md.md_aalgo <= CRYPTO_ALGORITHM_MAX) { + md.md_flags |= G_ELI_FLAG_AUTH; + } else { + /* + * For backward compatibility, check if the -a option + * was used to provide encryption algorithm. + */ + md.md_ealgo = g_eli_str2ealgo(str); + if (md.md_ealgo < CRYPTO_ALGORITHM_MIN || + md.md_ealgo > CRYPTO_ALGORITHM_MAX) { + gctl_error(req, + "Invalid authentication algorithm."); + return; + } else { + fprintf(stderr, "warning: The -e option, not " + "the -a option is now used to specify " + "encryption algorithm to use.\n"); + } + } + } + if (md.md_ealgo < CRYPTO_ALGORITHM_MIN || + md.md_ealgo > CRYPTO_ALGORITHM_MAX) { + str = gctl_get_ascii(req, "ealgo"); + if (*str == '\0') { + if (eli_version < G_ELI_VERSION_05) + str = "aes-cbc"; + else + str = GELI_ENC_ALGO; + } + md.md_ealgo = g_eli_str2ealgo(str); + if (md.md_ealgo < CRYPTO_ALGORITHM_MIN || + md.md_ealgo > CRYPTO_ALGORITHM_MAX) { + gctl_error(req, "Invalid encryption algorithm."); + return; + } + if (md.md_ealgo == CRYPTO_CAMELLIA_CBC && + eli_version < G_ELI_VERSION_04) { + gctl_error(req, + "Camellia-CBC algorithm is supported starting from version %u.", + G_ELI_VERSION_04); + return; + } + if (md.md_ealgo == CRYPTO_AES_XTS && + eli_version < G_ELI_VERSION_05) { + gctl_error(req, + "AES-XTS algorithm is supported starting from version %u.", + G_ELI_VERSION_05); + return; + } + } + val = gctl_get_intmax(req, "keylen"); + md.md_keylen = val; + md.md_keylen = g_eli_keylen(md.md_ealgo, md.md_keylen); + if (md.md_keylen == 0) { + gctl_error(req, "Invalid key length."); + return; + } + + val = gctl_get_intmax(req, "iterations"); + if (val != -1) { + int nonewpassphrase; + + /* + * Don't allow to set iterations when there will be no + * passphrase. + */ + nonewpassphrase = gctl_get_int(req, "nonewpassphrase"); + if (nonewpassphrase) { + gctl_error(req, + "Options -i and -P are mutually exclusive."); + return; + } + } + md.md_iterations = val; + + val = gctl_get_intmax(req, "sectorsize"); + if (val > sysconf(_SC_PAGE_SIZE)) { + fprintf(stderr, + "warning: Using sectorsize bigger than the page size!\n"); + } + + md.md_keys = 0x01; + + /* + * Determine number of parameters in the parent geom request before the + * nargs parameter and list of providers. + */ + nparams = req->narg - nargs - 1; + + /* Generate HMAC context template. */ + if (!eli_init_key_hmac_ctx(req, &ctxtemplate, true)) + return; + + /* Create new child request for each provider and issue to kernel */ + for (i = 0; i < nargs; i++) { + r = gctl_get_handle(); + + /* Copy each parameter from the parent request to the child */ + for (param = 0; param < nparams; param++) { + gctl_ro_param(r, req->arg[param].name, + req->arg[param].len, req->arg[param].value); + } + + /* Add a single provider to the parameter list of the child */ + gctl_ro_param(r, "nargs", sizeof(one), &one); + prov = gctl_get_ascii(req, "arg%d", i); + gctl_ro_param(r, "arg0", -1, prov); + + mediasize = g_get_mediasize(prov); + secsize = g_get_sectorsize(prov); + if (mediasize == 0 || secsize == 0) { + gctl_error(r, "Cannot get information about %s: %s.", + prov, strerror(errno)); + goto out; + } + + md.md_provsize = mediasize; + + val = gctl_get_intmax(r, "sectorsize"); + if (val == 0) { + md.md_sectorsize = secsize; + } else { + if (val < 0 || (val % secsize) != 0 || !powerof2(val)) { + gctl_error(r, "Invalid sector size."); + goto out; + } + md.md_sectorsize = val; + } + + /* Use different salt and Master Key for each provider. */ + arc4random_buf(md.md_salt, sizeof(md.md_salt)); + arc4random_buf(md.md_mkeys, sizeof(md.md_mkeys)); + + /* Generate user key. */ + if (eli_genkey(r, &ctxtemplate, &md, key, true) == NULL) { + /* + * Error generating key - details added to geom request + * by eli_genkey(). + */ + goto out; + } + + /* Encrypt the first and the only Master Key. */ + error = g_eli_mkey_encrypt(md.md_ealgo, key, md.md_keylen, + md.md_mkeys); + if (error != 0) { + gctl_error(r, "Cannot encrypt Master Key: %s.", + strerror(error)); + goto out; + } + + /* Convert metadata to on-disk format. */ + eli_metadata_encode(&md, sector); + + /* Store metadata to disk. */ + error = g_metadata_store(prov, sector, sizeof(sector)); + if (error != 0) { + gctl_error(r, "Cannot store metadata on %s: %s.", prov, + strerror(error)); + goto out; + } + if (verbose) + printf("Metadata value stored on %s.\n", prov); + + /* Backup metadata to a file. */ + const char *p = prov; + unsigned int j; + + /* + * Check if provider string includes the devfs mountpoint + * (typically /dev/). + */ + if (strncmp(p, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) { + /* Skip forward to the device filename only. */ + p += sizeof(_PATH_DEV) - 1; + } + + str = gctl_get_ascii(r, "backupfile"); + if (str[0] != '\0') { + /* Backupfile given by the user, just copy it. */ + strlcpy(backfile, str, sizeof(backfile)); + + /* If multiple providers have been initialized in one + * command, and the backup filename has been specified + * as anything other than "none", make the backup + * filename unique for each provider. */ + if (nargs > 1 && strcmp(backfile, "none") != 0) { + /* + * Replace first occurrence of "PROV" with + * provider name. + */ + str = strnstr(backfile, "PROV", + sizeof(backfile)); + if (str != NULL) { + char suffix[MAXPATHLEN]; + j = str - backfile; + strlcpy(suffix, &backfile[j+4], + sizeof(suffix)); + backfile[j] = '\0'; + strlcat(backfile, p, sizeof(backfile)); + strlcat(backfile, suffix, + sizeof(backfile)); + } else { + /* + * "PROV" not found in backfile, append + * provider name. + */ + strlcat(backfile, "-", + sizeof(backfile)); + strlcat(backfile, p, sizeof(backfile)); + } + } + } else { + /* Generate filename automatically. */ + snprintf(backfile, sizeof(backfile), "%s%s.eli", + GELI_BACKUP_DIR, p); + /* Replace all / with _. */ + for (j = strlen(GELI_BACKUP_DIR); backfile[j] != '\0'; + j++) { + if (backfile[j] == '/') + backfile[j] = '_'; + } + } + if (strcmp(backfile, "none") != 0 && + eli_backup_create(r, prov, backfile) == 0) { + printf("\nMetadata backup for provider %s can be found " + "in %s\n", prov, backfile); + printf("and can be restored with the following " + "command:\n"); + printf("\n\t# geli restore %s %s\n\n", backfile, prov); + } + +out: + /* + * Print error for this request, and set parent request error + * message. + */ + if (r->error != NULL && r->error[0] != '\0') { + warnx("%s", r->error); + gctl_error(req, "There was an error with at least one " + "provider."); + } + + gctl_free(r); + + /* + * Erase sensitive and provider specific data from memory. + */ + explicit_bzero(key, sizeof(key)); + explicit_bzero(sector, sizeof(sector)); + explicit_bzero(&md.md_provsize, sizeof(md.md_provsize)); + explicit_bzero(&md.md_sectorsize, sizeof(md.md_sectorsize)); + explicit_bzero(&md.md_salt, sizeof(md.md_salt)); + explicit_bzero(&md.md_mkeys, sizeof(md.md_mkeys)); + } + + /* Clear the cached metadata, including keys. */ + explicit_bzero(&md, sizeof(md)); + explicit_bzero(&ctxtemplate, sizeof(ctxtemplate)); +} + +static void +eli_attach(struct gctl_req *req) +{ + struct g_eli_metadata md; + struct gctl_req *r; + const char *prov; + off_t mediasize; + int i, nargs, nparams, param; + const int one = 1; + struct hmac_ctx ctxtemplate; + + nargs = gctl_get_int(req, "nargs"); + if (nargs <= 0) { + gctl_error(req, "Too few arguments."); + return; + } + + unsigned char key[G_ELI_USERKEYLEN]; + + /* + * Determine number of parameters in the parent geom request before the + * nargs parameter and list of providers. + */ + nparams = req->narg - nargs - 1; + + /* Generate HMAC context template. */ + if (!eli_init_key_hmac_ctx(req, &ctxtemplate, false)) + return; + + /* Create new child request for each provider and issue to kernel */ + for (i = 0; i < nargs; i++) { + r = gctl_get_handle(); + + /* Copy each parameter from the parent request to the child */ + for (param = 0; param < nparams; param++) { + gctl_ro_param(r, req->arg[param].name, + req->arg[param].len, req->arg[param].value); + } + + /* Add a single provider to the parameter list of the child */ + gctl_ro_param(r, "nargs", sizeof(one), &one); + prov = gctl_get_ascii(req, "arg%d", i); + gctl_ro_param(r, "arg0", -1, prov); + + if (eli_metadata_read(r, prov, &md) == -1) { + /* + * Error reading metadata - details added to geom + * request by eli_metadata_read(). + */ + goto out; + } + + mediasize = g_get_mediasize(prov); + if (md.md_provsize != (uint64_t)mediasize) { + gctl_error(r, "Provider size mismatch."); + goto out; + } + + if (eli_genkey(r, &ctxtemplate, &md, key, false) == NULL) { + /* + * Error generating key - details added to geom request + * by eli_genkey(). + */ + goto out; + } + + gctl_ro_param(r, "key", sizeof(key), key); + + if (gctl_issue(r) == NULL) { + if (verbose) + printf("Attached to %s.\n", prov); + } + +out: + /* + * Print error for this request, and set parent request error + * message. + */ + if (r->error != NULL && r->error[0] != '\0') { + warnx("%s", r->error); + gctl_error(req, "There was an error with at least one " + "provider."); + } + + gctl_free(r); + + /* Clear sensitive data from memory. */ + explicit_bzero(key, sizeof(key)); + } + + /* Clear sensitive data from memory. */ + explicit_bzero(cached_passphrase, sizeof(cached_passphrase)); + explicit_bzero(&ctxtemplate, sizeof(ctxtemplate)); +} + +static void +eli_configure_detached(struct gctl_req *req, const char *prov, int boot, + int geliboot, int displaypass, int trim, int autoresize) +{ + struct g_eli_metadata md; + bool changed = 0; + + if (eli_metadata_read(req, prov, &md) == -1) + return; + + if (boot == 1 && (md.md_flags & G_ELI_FLAG_BOOT)) { + if (verbose) + printf("BOOT flag already configured for %s.\n", prov); + } else if (boot == 0 && !(md.md_flags & G_ELI_FLAG_BOOT)) { + if (verbose) + printf("BOOT flag not configured for %s.\n", prov); + } else if (boot >= 0) { + if (boot) + md.md_flags |= G_ELI_FLAG_BOOT; + else + md.md_flags &= ~G_ELI_FLAG_BOOT; + changed = 1; + } + + if (geliboot == 1 && (md.md_flags & G_ELI_FLAG_GELIBOOT)) { + if (verbose) + printf("GELIBOOT flag already configured for %s.\n", prov); + } else if (geliboot == 0 && !(md.md_flags & G_ELI_FLAG_GELIBOOT)) { + if (verbose) + printf("GELIBOOT flag not configured for %s.\n", prov); + } else if (geliboot >= 0) { + if (geliboot) + md.md_flags |= G_ELI_FLAG_GELIBOOT; + else + md.md_flags &= ~G_ELI_FLAG_GELIBOOT; + changed = 1; + } + + if (displaypass == 1 && (md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS)) { + if (verbose) + printf("GELIDISPLAYPASS flag already configured for %s.\n", prov); + } else if (displaypass == 0 && + !(md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS)) { + if (verbose) + printf("GELIDISPLAYPASS flag not configured for %s.\n", prov); + } else if (displaypass >= 0) { + if (displaypass) + md.md_flags |= G_ELI_FLAG_GELIDISPLAYPASS; + else + md.md_flags &= ~G_ELI_FLAG_GELIDISPLAYPASS; + changed = 1; + } + + if (trim == 0 && (md.md_flags & G_ELI_FLAG_NODELETE)) { + if (verbose) + printf("TRIM disable flag already configured for %s.\n", prov); + } else if (trim == 1 && !(md.md_flags & G_ELI_FLAG_NODELETE)) { + if (verbose) + printf("TRIM disable flag not configured for %s.\n", prov); + } else if (trim >= 0) { + if (trim) + md.md_flags &= ~G_ELI_FLAG_NODELETE; + else + md.md_flags |= G_ELI_FLAG_NODELETE; + changed = 1; + } + + if (autoresize == 1 && (md.md_flags & G_ELI_FLAG_AUTORESIZE)) { + if (verbose) + printf("AUTORESIZE flag already configured for %s.\n", prov); + } else if (autoresize == 0 && !(md.md_flags & G_ELI_FLAG_AUTORESIZE)) { + if (verbose) + printf("AUTORESIZE flag not configured for %s.\n", prov); + } else if (autoresize >= 0) { + if (autoresize) + md.md_flags |= G_ELI_FLAG_AUTORESIZE; + else + md.md_flags &= ~G_ELI_FLAG_AUTORESIZE; + changed = 1; + } + + if (changed) + eli_metadata_store(req, prov, &md); + explicit_bzero(&md, sizeof(md)); +} + +static void +eli_configure(struct gctl_req *req) +{ + const char *prov; + bool boot, noboot, geliboot, nogeliboot, displaypass, nodisplaypass; + bool autoresize, noautoresize, trim, notrim; + int doboot, dogeliboot, dodisplaypass, dotrim, doautoresize; + int i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs == 0) { + gctl_error(req, "Too few arguments."); + return; + } + + boot = gctl_get_int(req, "boot"); + noboot = gctl_get_int(req, "noboot"); + geliboot = gctl_get_int(req, "geliboot"); + nogeliboot = gctl_get_int(req, "nogeliboot"); + displaypass = gctl_get_int(req, "displaypass"); + nodisplaypass = gctl_get_int(req, "nodisplaypass"); + trim = gctl_get_int(req, "trim"); + notrim = gctl_get_int(req, "notrim"); + autoresize = gctl_get_int(req, "autoresize"); + noautoresize = gctl_get_int(req, "noautoresize"); + + doboot = -1; + if (boot && noboot) { + gctl_error(req, "Options -b and -B are mutually exclusive."); + return; + } + if (boot) + doboot = 1; + else if (noboot) + doboot = 0; + + dogeliboot = -1; + if (geliboot && nogeliboot) { + gctl_error(req, "Options -g and -G are mutually exclusive."); + return; + } + if (geliboot) + dogeliboot = 1; + else if (nogeliboot) + dogeliboot = 0; + + dodisplaypass = -1; + if (displaypass && nodisplaypass) { + gctl_error(req, "Options -d and -D are mutually exclusive."); + return; + } + if (displaypass) + dodisplaypass = 1; + else if (nodisplaypass) + dodisplaypass = 0; + + dotrim = -1; + if (trim && notrim) { + gctl_error(req, "Options -t and -T are mutually exclusive."); + return; + } + if (trim) + dotrim = 1; + else if (notrim) + dotrim = 0; + + doautoresize = -1; + if (autoresize && noautoresize) { + gctl_error(req, "Options -r and -R are mutually exclusive."); + return; + } + if (autoresize) + doautoresize = 1; + else if (noautoresize) + doautoresize = 0; + + if (doboot == -1 && dogeliboot == -1 && dodisplaypass == -1 && + dotrim == -1 && doautoresize == -1) { + gctl_error(req, "No option given."); + return; + } + + /* First attached providers. */ + gctl_issue(req); + /* Now the rest. */ + for (i = 0; i < nargs; i++) { + prov = gctl_get_ascii(req, "arg%d", i); + if (!eli_is_attached(prov)) { + eli_configure_detached(req, prov, doboot, dogeliboot, + dodisplaypass, dotrim, doautoresize); + } + } +} + +static void +eli_setkey_attached(struct gctl_req *req, struct g_eli_metadata *md) +{ + unsigned char key[G_ELI_USERKEYLEN]; + intmax_t val, old = 0; + int error; + + val = gctl_get_intmax(req, "iterations"); + /* Check if iterations number should be changed. */ + if (val != -1) + md->md_iterations = val; + else + old = md->md_iterations; + + /* Generate key for Master Key encryption. */ + if (eli_genkey_single(req, md, key, true) == NULL) { + explicit_bzero(key, sizeof(key)); + return; + } + /* + * If number of iterations has changed, but wasn't given as a + * command-line argument, update the request. + */ + if (val == -1 && md->md_iterations != old) { + error = gctl_change_param(req, "iterations", sizeof(intmax_t), + &md->md_iterations); + assert(error == 0); + } + + gctl_ro_param(req, "key", sizeof(key), key); + gctl_issue(req); + explicit_bzero(key, sizeof(key)); +} + +static void +eli_setkey_detached(struct gctl_req *req, const char *prov, + struct g_eli_metadata *md) +{ + unsigned char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN]; + unsigned char *mkeydst; + unsigned int nkey; + intmax_t val; + int error; + + if (md->md_keys == 0) { + gctl_error(req, "No valid keys on %s.", prov); + return; + } + + /* Generate key for Master Key decryption. */ + if (eli_genkey_single(req, md, key, false) == NULL) { + explicit_bzero(key, sizeof(key)); + return; + } + + /* Decrypt Master Key. */ + error = g_eli_mkey_decrypt_any(md, key, mkey, &nkey); + explicit_bzero(key, sizeof(key)); + if (error != 0) { + explicit_bzero(md, sizeof(*md)); + if (error == -1) + gctl_error(req, "Wrong key for %s.", prov); + else /* if (error > 0) */ { + gctl_error(req, "Cannot decrypt Master Key: %s.", + strerror(error)); + } + return; + } + if (verbose) + printf("Decrypted Master Key %u.\n", nkey); + + val = gctl_get_intmax(req, "keyno"); + if (val != -1) + nkey = val; +#if 0 + else + ; /* Use the key number which was found during decryption. */ +#endif + if (nkey >= G_ELI_MAXMKEYS) { + gctl_error(req, "Invalid '%s' argument.", "keyno"); + return; + } + + val = gctl_get_intmax(req, "iterations"); + /* Check if iterations number should and can be changed. */ + if (val != -1 && md->md_iterations == -1) { + md->md_iterations = val; + } else if (val != -1 && val != md->md_iterations) { + if (bitcount32(md->md_keys) != 1) { + gctl_error(req, "To be able to use '-i' option, only " + "one key can be defined."); + return; + } + if (md->md_keys != (1 << nkey)) { + gctl_error(req, "Only already defined key can be " + "changed when '-i' option is used."); + return; + } + md->md_iterations = val; + } + + mkeydst = md->md_mkeys + nkey * G_ELI_MKEYLEN; + md->md_keys |= (1 << nkey); + + bcopy(mkey, mkeydst, sizeof(mkey)); + explicit_bzero(mkey, sizeof(mkey)); + + /* + * The previous eli_genkey() set cached_passphrase, we do not want to + * use that for the new passphrase so always prompt for it + */ + explicit_bzero(cached_passphrase, sizeof(cached_passphrase)); + + /* Generate key for Master Key encryption. */ + if (eli_genkey_single(req, md, key, true) == NULL) { + explicit_bzero(key, sizeof(key)); + explicit_bzero(md, sizeof(*md)); + return; + } + + /* Encrypt the Master-Key with the new key. */ + error = g_eli_mkey_encrypt(md->md_ealgo, key, md->md_keylen, mkeydst); + explicit_bzero(key, sizeof(key)); + if (error != 0) { + explicit_bzero(md, sizeof(*md)); + gctl_error(req, "Cannot encrypt Master Key: %s.", + strerror(error)); + return; + } + + /* Store metadata with fresh key. */ + eli_metadata_store(req, prov, md); + explicit_bzero(md, sizeof(*md)); +} + +static void +eli_setkey(struct gctl_req *req) +{ + struct g_eli_metadata md; + const char *prov; + int nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs != 1) { + gctl_error(req, "Invalid number of arguments."); + return; + } + prov = gctl_get_ascii(req, "arg0"); + + if (eli_metadata_read(req, prov, &md) == -1) + return; + + if (eli_is_attached(prov)) + eli_setkey_attached(req, &md); + else + eli_setkey_detached(req, prov, &md); + + if (req->error == NULL || req->error[0] == '\0') { + printf("Note, that the master key encrypted with old keys " + "and/or passphrase may still exist in a metadata backup " + "file.\n"); + } +} + +static void +eli_delkey_attached(struct gctl_req *req, const char *prov __unused) +{ + + gctl_issue(req); +} + +static void +eli_delkey_detached(struct gctl_req *req, const char *prov) +{ + struct g_eli_metadata md; + unsigned char *mkeydst; + unsigned int nkey; + intmax_t val; + bool all, force; + + if (eli_metadata_read(req, prov, &md) == -1) + return; + + all = gctl_get_int(req, "all"); + if (all) + arc4random_buf(md.md_mkeys, sizeof(md.md_mkeys)); + else { + force = gctl_get_int(req, "force"); + val = gctl_get_intmax(req, "keyno"); + if (val == -1) { + gctl_error(req, "Key number has to be specified."); + return; + } + nkey = val; + if (nkey >= G_ELI_MAXMKEYS) { + gctl_error(req, "Invalid '%s' argument.", "keyno"); + return; + } + if (!(md.md_keys & (1 << nkey)) && !force) { + gctl_error(req, "Master Key %u is not set.", nkey); + return; + } + md.md_keys &= ~(1 << nkey); + if (md.md_keys == 0 && !force) { + gctl_error(req, "This is the last Master Key. Use '-f' " + "option if you really want to remove it."); + return; + } + mkeydst = md.md_mkeys + nkey * G_ELI_MKEYLEN; + arc4random_buf(mkeydst, G_ELI_MKEYLEN); + } + + eli_metadata_store(req, prov, &md); + explicit_bzero(&md, sizeof(md)); +} + +static void +eli_delkey(struct gctl_req *req) +{ + const char *prov; + int nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs != 1) { + gctl_error(req, "Invalid number of arguments."); + return; + } + prov = gctl_get_ascii(req, "arg0"); + + if (eli_is_attached(prov)) + eli_delkey_attached(req, prov); + else + eli_delkey_detached(req, prov); +} + +static void +eli_resume(struct gctl_req *req) +{ + struct g_eli_metadata md; + unsigned char key[G_ELI_USERKEYLEN]; + const char *prov; + off_t mediasize; + int nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs != 1) { + gctl_error(req, "Invalid number of arguments."); + return; + } + prov = gctl_get_ascii(req, "arg0"); + + if (eli_metadata_read(req, prov, &md) == -1) + return; + + mediasize = g_get_mediasize(prov); + if (md.md_provsize != (uint64_t)mediasize) { + gctl_error(req, "Provider size mismatch."); + return; + } + + if (eli_genkey_single(req, &md, key, false) == NULL) { + explicit_bzero(key, sizeof(key)); + return; + } + + gctl_ro_param(req, "key", sizeof(key), key); + if (gctl_issue(req) == NULL) { + if (verbose) + printf("Resumed %s.\n", prov); + } + explicit_bzero(key, sizeof(key)); +} + +static int +eli_trash_metadata(struct gctl_req *req, const char *prov, int fd, off_t offset) +{ + unsigned int overwrites; + unsigned char *sector; + ssize_t size; + int error; + + size = sizeof(overwrites); + if (sysctlbyname("kern.geom.eli.overwrites", &overwrites, &size, + NULL, 0) == -1 || overwrites == 0) { + overwrites = G_ELI_OVERWRITES; + } + + size = g_sectorsize(fd); + if (size <= 0) { + gctl_error(req, "Cannot obtain provider sector size %s: %s.", + prov, strerror(errno)); + return (-1); + } + sector = malloc(size); + if (sector == NULL) { + gctl_error(req, "Cannot allocate %zd bytes of memory.", size); + return (-1); + } + + error = 0; + do { + arc4random_buf(sector, size); + if (pwrite(fd, sector, size, offset) != size) { + if (error == 0) + error = errno; + } + (void)g_flush(fd); + } while (--overwrites > 0); + free(sector); + if (error != 0) { + gctl_error(req, "Cannot trash metadata on provider %s: %s.", + prov, strerror(error)); + return (-1); + } + return (0); +} + +static void +eli_kill_detached(struct gctl_req *req, const char *prov) +{ + off_t offset; + int fd; + + /* + * NOTE: Maybe we should verify if this is geli provider first, + * but 'kill' command is quite critical so better don't waste + * the time. + */ +#if 0 + error = g_metadata_read(prov, (unsigned char *)&md, sizeof(md), + G_ELI_MAGIC); + if (error != 0) { + gctl_error(req, "Cannot read metadata from %s: %s.", prov, + strerror(error)); + return; + } +#endif + + fd = g_open(prov, 1); + if (fd == -1) { + gctl_error(req, "Cannot open provider %s: %s.", prov, + strerror(errno)); + return; + } + offset = g_mediasize(fd) - g_sectorsize(fd); + if (offset <= 0) { + gctl_error(req, + "Cannot obtain media size or sector size for provider %s: %s.", + prov, strerror(errno)); + (void)g_close(fd); + return; + } + (void)eli_trash_metadata(req, prov, fd, offset); + (void)g_close(fd); +} + +static void +eli_kill(struct gctl_req *req) +{ + const char *prov; + int i, nargs, all; + + nargs = gctl_get_int(req, "nargs"); + all = gctl_get_int(req, "all"); + if (!all && nargs == 0) { + gctl_error(req, "Too few arguments."); + return; + } + /* + * How '-a' option combine with a list of providers: + * Delete Master Keys from all attached providers: + * geli kill -a + * Delete Master Keys from all attached providers and from + * detached da0 and da1: + * geli kill -a da0 da1 + * Delete Master Keys from (attached or detached) da0 and da1: + * geli kill da0 da1 + */ + + /* First detached providers. */ + for (i = 0; i < nargs; i++) { + prov = gctl_get_ascii(req, "arg%d", i); + if (!eli_is_attached(prov)) + eli_kill_detached(req, prov); + } + /* Now attached providers. */ + gctl_issue(req); +} + +static int +eli_backup_create(struct gctl_req *req, const char *prov, const char *file) +{ + unsigned char *sector; + ssize_t secsize; + int error, filefd, ret; + + ret = -1; + filefd = -1; + sector = NULL; + secsize = 0; + + secsize = g_get_sectorsize(prov); + if (secsize == 0) { + gctl_error(req, "Cannot get informations about %s: %s.", prov, + strerror(errno)); + goto out; + } + sector = malloc(secsize); + if (sector == NULL) { + gctl_error(req, "Cannot allocate memory."); + goto out; + } + /* Read metadata from the provider. */ + error = g_metadata_read(prov, sector, secsize, G_ELI_MAGIC); + if (error != 0) { + gctl_error(req, "Unable to read metadata from %s: %s.", prov, + strerror(error)); + goto out; + } + + filefd = open(file, O_WRONLY | O_TRUNC | O_CREAT, 0600); + if (filefd == -1) { + gctl_error(req, "Unable to open %s: %s.", file, + strerror(errno)); + goto out; + } + /* Write metadata to the destination file. */ + if (write(filefd, sector, secsize) != secsize) { + gctl_error(req, "Unable to write to %s: %s.", file, + strerror(errno)); + (void)close(filefd); + (void)unlink(file); + goto out; + } + (void)fsync(filefd); + (void)close(filefd); + /* Success. */ + ret = 0; +out: + if (sector != NULL) { + explicit_bzero(sector, secsize); + free(sector); + } + return (ret); +} + +static void +eli_backup(struct gctl_req *req) +{ + const char *file, *prov; + int nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs != 2) { + gctl_error(req, "Invalid number of arguments."); + return; + } + prov = gctl_get_ascii(req, "arg0"); + file = gctl_get_ascii(req, "arg1"); + + eli_backup_create(req, prov, file); +} + +static void +eli_restore(struct gctl_req *req) +{ + struct g_eli_metadata md; + const char *file, *prov; + off_t mediasize; + int nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs != 2) { + gctl_error(req, "Invalid number of arguments."); + return; + } + file = gctl_get_ascii(req, "arg0"); + prov = gctl_get_ascii(req, "arg1"); + + /* Read metadata from the backup file. */ + if (eli_metadata_read(req, file, &md) == -1) + return; + /* Obtain provider's mediasize. */ + mediasize = g_get_mediasize(prov); + if (mediasize == 0) { + gctl_error(req, "Cannot get informations about %s: %s.", prov, + strerror(errno)); + return; + } + /* Check if the provider size has changed since we did the backup. */ + if (md.md_provsize != (uint64_t)mediasize) { + if (gctl_get_int(req, "force")) { + md.md_provsize = mediasize; + } else { + gctl_error(req, "Provider size mismatch: " + "wrong backup file?"); + return; + } + } + /* Write metadata to the provider. */ + (void)eli_metadata_store(req, prov, &md); +} + +static void +eli_resize(struct gctl_req *req) +{ + struct g_eli_metadata md; + const char *prov; + unsigned char *sector; + ssize_t secsize; + off_t mediasize, oldsize; + int error, nargs, provfd; + + nargs = gctl_get_int(req, "nargs"); + if (nargs != 1) { + gctl_error(req, "Invalid number of arguments."); + return; + } + prov = gctl_get_ascii(req, "arg0"); + + provfd = -1; + sector = NULL; + secsize = 0; + + provfd = g_open(prov, 1); + if (provfd == -1) { + gctl_error(req, "Cannot open %s: %s.", prov, strerror(errno)); + goto out; + } + + mediasize = g_mediasize(provfd); + secsize = g_sectorsize(provfd); + if (mediasize == -1 || secsize == -1) { + gctl_error(req, "Cannot get information about %s: %s.", prov, + strerror(errno)); + goto out; + } + + sector = malloc(secsize); + if (sector == NULL) { + gctl_error(req, "Cannot allocate memory."); + goto out; + } + + oldsize = gctl_get_intmax(req, "oldsize"); + if (oldsize < 0 || oldsize > mediasize) { + gctl_error(req, "Invalid oldsize: Out of range."); + goto out; + } + + /* Read metadata from the 'oldsize' offset. */ + if (pread(provfd, sector, secsize, oldsize - secsize) != secsize) { + gctl_error(req, "Cannot read old metadata: %s.", + strerror(errno)); + goto out; + } + + /* Check if this sector contains geli metadata. */ + error = eli_metadata_decode(sector, &md); + switch (error) { + case 0: + break; + case EOPNOTSUPP: + gctl_error(req, + "Provider's %s metadata version %u is too new.\n" + "geli: The highest supported version is %u.", + prov, (unsigned int)md.md_version, G_ELI_VERSION); + goto out; + case EINVAL: + gctl_error(req, "Inconsistent provider's %s metadata.", prov); + goto out; + default: + gctl_error(req, + "Unexpected error while decoding provider's %s metadata: %s.", + prov, strerror(error)); + goto out; + } + + /* + * If the old metadata doesn't have a correct provider size, refuse + * to resize. + */ + if (md.md_provsize != (uint64_t)oldsize) { + gctl_error(req, "Provider size mismatch at oldsize."); + goto out; + } + + /* The metadata is valid and nothing has changed. Just exit. */ + if (oldsize == mediasize) + goto out; + + /* + * Update the old metadata with the current provider size and write + * it back to the correct place on the provider. + */ + md.md_provsize = mediasize; + /* Write metadata to the provider. */ + (void)eli_metadata_store(req, prov, &md); + /* Now trash the old metadata. */ + (void)eli_trash_metadata(req, prov, provfd, oldsize - secsize); +out: + if (provfd != -1) + (void)g_close(provfd); + if (sector != NULL) { + explicit_bzero(sector, secsize); + free(sector); + } +} + +static void +eli_version(struct gctl_req *req) +{ + struct g_eli_metadata md; + const char *name; + unsigned int eli_version; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + + if (nargs == 0) { + unsigned int kernver; + ssize_t size; + + size = sizeof(kernver); + if (sysctlbyname("kern.geom.eli.version", &kernver, &size, + NULL, 0) == -1) { + warn("Unable to obtain GELI kernel version"); + } else { + printf("kernel: %u\n", kernver); + } + printf("userland: %u\n", G_ELI_VERSION); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(name, (unsigned char *)&md, + sizeof(md), G_ELI_MAGIC); + if (error != 0) { + warn("%s: Unable to read metadata: %s.", name, + strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + eli_version = le32dec(&md.md_version); + printf("%s: %u\n", name, eli_version); + } +} + +static void +eli_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_ELI_MAGIC); + if (error != 0) { + fprintf(stderr, "Cannot clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +static void +eli_dump(struct gctl_req *req) +{ + struct g_eli_metadata md; + const char *name; + int i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + if (eli_metadata_read(NULL, name, &md) == -1) { + gctl_error(req, "Not fully done."); + continue; + } + printf("Metadata on %s:\n", name); + eli_metadata_dump(&md); + printf("\n"); + } +} diff --git a/lib/geom/journal/Makefile b/lib/geom/journal/Makefile new file mode 100644 index 000000000000..b87e8283225b --- /dev/null +++ b/lib/geom/journal/Makefile @@ -0,0 +1,10 @@ +PACKAGE=geom + +GEOM_CLASS= journal +SRCS+= geom_journal_ufs.c + +LIBADD= ufs md + +CFLAGS+=-I${SRCTOP}/sys + +.include <bsd.lib.mk> diff --git a/lib/geom/journal/Makefile.depend b/lib/geom/journal/Makefile.depend new file mode 100644 index 000000000000..9bf396c48894 --- /dev/null +++ b/lib/geom/journal/Makefile.depend @@ -0,0 +1,18 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + lib/libmd \ + lib/libufs \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/journal/geom_journal.c b/lib/geom/journal/geom_journal.c new file mode 100644 index 000000000000..85eb6f88ae84 --- /dev/null +++ b/lib/geom/journal/geom_journal.c @@ -0,0 +1,348 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <errno.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <libgeom.h> +#include <geom/journal/g_journal.h> +#include <core/geom.h> +#include <misc/subr.h> + +#include "geom_journal.h" + + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_JOURNAL_VERSION; + +static void journal_main(struct gctl_req *req, unsigned flags); +static void journal_clear(struct gctl_req *req); +static void journal_dump(struct gctl_req *req); +static void journal_label(struct gctl_req *req); + +struct g_command class_commands[] = { + { "clear", G_FLAG_VERBOSE, journal_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { "dump", 0, journal_main, G_NULL_OPTS, + "prov ..." + }, + { "label", G_FLAG_VERBOSE, journal_main, + { + { 'c', "checksum", NULL, G_TYPE_BOOL }, + { 'f', "force", NULL, G_TYPE_BOOL }, + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 's', "jsize", "-1", G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-cfhv] [-s jsize] dataprov [jprov]" + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + { "sync", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v]" + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +static void +journal_main(struct gctl_req *req, unsigned flags) +{ + const char *name; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) + journal_label(req); + else if (strcmp(name, "clear") == 0) + journal_clear(req); + else if (strcmp(name, "dump") == 0) + journal_dump(req); + else + gctl_error(req, "Unknown command: %s.", name); +} + +static int +g_journal_fs_exists(const char *prov) +{ + + if (g_journal_ufs_exists(prov)) + return (1); +#if 0 + if (g_journal_otherfs_exists(prov)) + return (1); +#endif + return (0); +} + +static int +g_journal_fs_using_last_sector(const char *prov) +{ + + if (g_journal_ufs_using_last_sector(prov)) + return (1); +#if 0 + if (g_journal_otherfs_using_last_sector(prov)) + return (1); +#endif + return (0); +} + +static void +journal_label(struct gctl_req *req) +{ + struct g_journal_metadata md; + const char *data, *journal, *str; + u_char sector[512]; + intmax_t jsize, msize, ssize; + int error, force, i, nargs, checksum, hardcode; + + bzero(sector, sizeof(sector)); + nargs = gctl_get_int(req, "nargs"); + str = NULL; /* gcc */ + + strlcpy(md.md_magic, G_JOURNAL_MAGIC, sizeof(md.md_magic)); + md.md_version = G_JOURNAL_VERSION; + md.md_id = arc4random(); + md.md_joffset = 0; + md.md_jid = 0; + md.md_flags = GJ_FLAG_CLEAN; + checksum = gctl_get_int(req, "checksum"); + if (checksum) + md.md_flags |= GJ_FLAG_CHECKSUM; + force = gctl_get_int(req, "force"); + hardcode = gctl_get_int(req, "hardcode"); + + if (nargs != 1 && nargs != 2) { + gctl_error(req, "Invalid number of arguments."); + return; + } + + /* Verify the given providers. */ + for (i = 0; i < nargs; i++) { + str = gctl_get_ascii(req, "arg%d", i); + if (g_get_mediasize(str) == 0) { + gctl_error(req, "Invalid provider %s.", str); + return; + } + } + + data = gctl_get_ascii(req, "arg0"); + jsize = gctl_get_intmax(req, "jsize"); + journal = NULL; + switch (nargs) { + case 1: + if (!force && g_journal_fs_exists(data)) { + gctl_error(req, "File system exists on %s and this " + "operation would destroy it.\nUse -f if you " + "really want to do it.", data); + return; + } + journal = data; + msize = g_get_mediasize(data); + ssize = g_get_sectorsize(data); + if (jsize == -1) { + /* + * No journal size specified. 1GB should be safe + * default. + */ + jsize = 1073741824ULL; + } else { + if (jsize < 104857600) { + gctl_error(req, "Journal too small."); + return; + } + if ((jsize % ssize) != 0) { + gctl_error(req, "Invalid journal size."); + return; + } + } + if (jsize + ssize >= msize) { + gctl_error(req, "Provider too small for journalling. " + "You can try smaller jsize (default is %jd).", + jsize); + return; + } + md.md_jstart = msize - ssize - jsize; + md.md_jend = msize - ssize; + break; + case 2: + if (!force && g_journal_fs_using_last_sector(data)) { + gctl_error(req, "File system on %s is using the last " + "sector and this operation is going to overwrite " + "it. Use -f if you really want to do it.", data); + return; + } + journal = gctl_get_ascii(req, "arg1"); + if (jsize != -1) { + gctl_error(req, "jsize argument is valid only for " + "all-in-one configuration."); + return; + } + msize = g_get_mediasize(journal); + ssize = g_get_sectorsize(journal); + md.md_jstart = 0; + md.md_jend = msize - ssize; + break; + } + + if (g_get_sectorsize(data) != g_get_sectorsize(journal)) { + gctl_error(req, "Not equal sector sizes."); + return; + } + + /* + * Clear last sector first, to spoil all components if device exists. + */ + for (i = 0; i < nargs; i++) { + str = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(str, NULL); + if (error != 0) { + gctl_error(req, "Cannot clear metadata on %s: %s.", str, + strerror(error)); + return; + } + } + + /* + * Ok, store metadata. + */ + for (i = 0; i < nargs; i++) { + switch (i) { + case 0: + str = data; + md.md_type = GJ_TYPE_DATA; + if (nargs == 1) + md.md_type |= GJ_TYPE_JOURNAL; + break; + case 1: + str = journal; + md.md_type = GJ_TYPE_JOURNAL; + break; + } + md.md_provsize = g_get_mediasize(str); + assert(md.md_provsize != 0); + if (!hardcode) + bzero(md.md_provider, sizeof(md.md_provider)); + else { + if (strncmp(str, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + str += sizeof(_PATH_DEV) - 1; + strlcpy(md.md_provider, str, sizeof(md.md_provider)); + } + journal_metadata_encode(&md, sector); + error = g_metadata_store(str, sector, sizeof(sector)); + if (error != 0) { + fprintf(stderr, "Cannot store metadata on %s: %s.\n", + str, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata value stored on %s.\n", str); + } +} + +static void +journal_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_JOURNAL_MAGIC); + if (error != 0) { + fprintf(stderr, "Cannot clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +static void +journal_dump(struct gctl_req *req) +{ + struct g_journal_metadata md, tmpmd; + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd), + G_JOURNAL_MAGIC); + if (error != 0) { + fprintf(stderr, "Cannot read metadata from %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (journal_metadata_decode((u_char *)&tmpmd, &md) != 0) { + fprintf(stderr, "MD5 hash mismatch for %s, skipping.\n", + name); + gctl_error(req, "Not fully done."); + continue; + } + printf("Metadata on %s:\n", name); + journal_metadata_dump(&md); + printf("\n"); + } +} diff --git a/lib/geom/journal/geom_journal.h b/lib/geom/journal/geom_journal.h new file mode 100644 index 000000000000..a3c7651cddb5 --- /dev/null +++ b/lib/geom/journal/geom_journal.h @@ -0,0 +1,33 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _GEOM_JOURNAL_H_ +#define _GEOM_JOURNAL_H_ +int g_journal_ufs_exists(const char *prov); +int g_journal_ufs_using_last_sector(const char *prov); +#endif /* !_GEOM_JOURNAL_H_ */ diff --git a/lib/geom/journal/geom_journal_ufs.c b/lib/geom/journal/geom_journal_ufs.c new file mode 100644 index 000000000000..fc81663ec96a --- /dev/null +++ b/lib/geom/journal/geom_journal_ufs.c @@ -0,0 +1,77 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/disklabel.h> +#include <sys/mount.h> + +#include <ufs/ufs/dinode.h> +#include <ufs/ffs/fs.h> + +#include <libufs.h> +#include <libgeom.h> +#include <core/geom.h> +#include <misc/subr.h> + +#include "geom_journal.h" + +static struct fs * +read_superblock(const char *prov) +{ + static struct uufsd disk; + struct fs *fs; + + if (ufs_disk_fillout(&disk, prov) == -1) + return (NULL); + fs = &disk.d_fs; + ufs_disk_close(&disk); + return (fs); +} + +int +g_journal_ufs_exists(const char *prov) +{ + + return (read_superblock(prov) != NULL); +} + +int +g_journal_ufs_using_last_sector(const char *prov) +{ + struct fs *fs; + off_t psize, fssize; + + fs = read_superblock(prov); + if (fs == NULL) + return (0); + /* Provider size in 512 bytes blocks. */ + psize = g_get_mediasize(prov) / DEV_BSIZE; + /* File system size in 512 bytes blocks. */ + fssize = fsbtodb(fs, fs->fs_size); + return (psize <= fssize); +} diff --git a/lib/geom/journal/gjournal.8 b/lib/geom/journal/gjournal.8 new file mode 100644 index 000000000000..f9959ffa0f3f --- /dev/null +++ b/lib/geom/journal/gjournal.8 @@ -0,0 +1,344 @@ +.\" Copyright (c) 2006-2009 Pawel Jakub Dawidek <pjd@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd February 17, 2009 +.Dt GJOURNAL 8 +.Os +.Sh NAME +.Nm gjournal +.Nd "control utility for journaled devices" +.Sh SYNOPSIS +.Nm +.Cm label +.Op Fl cfhv +.Op Fl s Ar jsize +.Ar dataprov +.Op Ar jprov +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm sync +.Op Fl v +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm dump +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for journal configuration on the given GEOM provider. +The journal and data may be stored on the same provider or on two separate +providers. +This is block level journaling, not file system level journaling, which means +everything gets logged, e.g.\& for file systems, it journals both data and +metadata. +The +.Nm +GEOM class can talk to file systems, which allows the use of +.Nm +for file system journaling and to keep file systems in a consistent state. +At this time, only UFS file system is supported. +.Pp +To configure journaling on the UFS file system using +.Nm , +one should first create a +.Nm +provider using the +.Nm +utility, then run +.Xr newfs 8 +or +.Xr tunefs 8 +on it with the +.Fl J +flag which instructs UFS to cooperate with the +.Nm +provider below. +There are important differences in how journaled UFS works. +The most important one is that +.Xr sync 2 +and +.Xr fsync 2 +system calls do not work as expected anymore. +To ensure that data is stored on the data provider, the +.Nm Cm sync +command should be used after calling +.Xr sync 2 . +For the best performance possible, soft-updates should be disabled when +.Nm +is used. +It is also safe and recommended to use the +.Cm async +.Xr mount 8 +option. +.Pp +When +.Nm +is configured on top of +.Xr gmirror 8 +or +.Xr graid3 8 +providers, it also keeps them in a consistent state, thus +automatic synchronization on power failure or system crash may be disabled +on those providers. +.Pp +The +.Nm +utility uses on-disk metadata, stored in the provider's last sector, +to store all needed information. +This could be a problem when an existing file system is converted to use +.Nm . +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm status" +.It Cm label +Configures +.Nm +on the given provider(s). +If only one provider is given, both data and journal are stored on the same +provider. +If two providers are given, the first one will be used as data provider and the +second will be used as the journal provider. +.Pp +Additional options include: +.Bl -tag -width ".Fl s Ar jsize" +.It Fl c +Checksum journal records. +.It Fl f +May be used to convert an existing file system to use +.Nm , +but only if the journal will be configured on a separate provider and if the +last sector in the data provider is not used by the existing file system. +If +.Nm +detects that the last sector is used, it will refuse to overwrite it +and return an error. +This behavior may be forced by using the +.Fl f +flag, which will force +.Nm +to overwrite the last sector. +.It Fl h +Hardcode provider names in metadata. +.It Fl s Ar jsize +Specifies size of the journal if only one provider is used for both data and +journal. +The default is one gigabyte. +Size should be chosen based on provider's load, and not on its size; +recommended minimum is twice the size of the physical memory installed. +It is not recommended to use +.Nm +for small file systems (e.g.: only few gigabytes big). +.El +.It Cm clear +Clear metadata on the given providers. +.It Cm stop +Stop the given provider. +.Pp +Additional options include: +.Bl -tag -width ".Fl f" +.It Fl f +Stop the given provider even if it is opened. +.El +.It Cm sync +Trigger journal switch and enforce sending data to the data provider. +.It Cm dump +Dump metadata stored on the given providers. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options include: +.Bl -tag -width ".Fl v" +.It Fl v +Be more verbose. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +Create a +.Nm +based UFS file system and mount it: +.Bd -literal -offset indent +gjournal load +gjournal label da0 +newfs -J /dev/da0.journal +mount -o async /dev/da0.journal /mnt +.Ed +.Pp +Configure journaling on an existing file system, but only if +.Nm +allows this (i.e., if the last sector is not already used by the file system): +.Bd -literal -offset indent +umount /dev/da0s1d +gjournal label da0s1d da0s1e && \e + tunefs -J enable -n disable da0s1d.journal && \e + mount -o async /dev/da0s1d.journal /mnt || \e + mount /dev/da0s1d /mnt +.Ed +.Sh SYSCTLS +Gjournal adds the sysctl level kern.geom.journal. +The string and integer information available is detailed below. +The changeable column shows whether a process with appropriate privilege may +change the value. +.Bl -column "accept_immediatelyXXXXXX" integerXXX -offset indent +.It Sy "sysctl name Type Changeable" +.It "debug integer yes" +.It "switch_time integer yes" +.It "force_switch integer yes" +.It "parallel_flushes integer yes" +.It "accept_immediately integer yes" +.It "parallel_copies integer yes" +.It "record_entries integer yes" +.It "optimize integer yes" +.El +.Bl -tag -width 6n +.It Li debug +Setting a non-zero value enables debugging at various levels. +Debug level 1 will record actions at a journal level, relating to journal +switches, metadata updates, etc. +Debug level 2 will record actions at a higher level, relating to the numbers of +entries in journals, access requests, etc. +Debug level 3 will record verbose detail, including insertion of I/Os to the +journal. +.It Li switch_time +The maximum number of seconds a journal is allowed to remain open before +switching to a new journal. +.It Li force_switch +Force a journal switch when the journal uses more than N% of the free journal +space. +.It Li parallel_flushes +The number of flush I/O requests to be sent in parallel when flushing the +journal to the data provider. +.It Li accept_immediately +The maximum number of I/O requests accepted at the same time. +.It Li parallel_copies +The number of copy I/O requests to send in parallel. +.It Li record_entries +The maximum number of record entries to allow in a single journal. +.It Li optimize +Controls whether entries in a journal will be optimized by combining overlapping +I/Os into a single I/O and reordering the entries in a journal. +This can be disabled by setting the sysctl to 0. +.El +.Ss cache +The string and integer information available for the cache level +is detailed below. +The changeable column shows whether a process with appropriate +privilege may change the value. +.Bl -column "alloc_failuresXXXXXX" integerXXX -offset indent +.It Sy "sysctl name Type Changeable" +.It "used integer no" +.It "limit integer yes" +.It "divisor integer no" +.It "switch integer yes" +.It "misses integer yes" +.It "alloc_failures integer yes" +.El +.Bl -tag -width 6n +.It Li used +The number of bytes currently allocated to the cache. +.It Li limit +The maximum number of bytes to be allocated to the cache. +.It Li divisor +Sets the cache size to be used as a proportion of kmem_size. +A value of 2 (the default) will cause the cache size to be set to 1/2 of the +kmem_size. +.It Li switch +Force a journal switch when this percentage of cache has been used. +.It Li misses +The number of cache misses, when data has been read, but was not found in the +cache. +.It Li alloc_failures +The number of times memory failed to be allocated to the cache because the cache +limit was hit. +.El +.Ss stats +The string and integer information available for the statistics level +is detailed below. +The changeable column shows whether a process with appropriate +privilege may change the value. +.Bl -column "skipped_bytesXXXXXX" integerXXX -offset indent +.It Sy "sysctl name Type Changeable" +.It "skipped_bytes integer yes" +.It "combined_ios integer yes" +.It "switches integer yes" +.It "wait_for_copy integer yes" +.It "journal_full integer yes" +.It "low_mem integer yes" +.El +.Bl -tag -width 6n +.It Li skipped_bytes +The number of bytes skipped. +.It Li combined_ios +The number of I/Os which were combined by journal optimization. +.It Li switches +The number of journal switches. +.It Li wait_for_copy +The number of times the journal switch process had to wait for the previous +journal copy to complete. +.It Li journal_full +The number of times the journal was almost full, forcing a journal switch. +.It Li low_mem +The number of times the low_mem hook was called. +.El +.Sh SEE ALSO +.Xr geom 4 , +.Xr geom 8 , +.Xr mount 8 , +.Xr newfs 8 , +.Xr tunefs 8 , +.Xr umount 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 7.0 . +.Sh AUTHORS +.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org diff --git a/lib/geom/label/Makefile b/lib/geom/label/Makefile new file mode 100644 index 000000000000..823d5816be64 --- /dev/null +++ b/lib/geom/label/Makefile @@ -0,0 +1,5 @@ +PACKAGE=geom + +GEOM_CLASS= label + +.include <bsd.lib.mk> diff --git a/lib/geom/label/Makefile.depend b/lib/geom/label/Makefile.depend new file mode 100644 index 000000000000..0dd05cace3c0 --- /dev/null +++ b/lib/geom/label/Makefile.depend @@ -0,0 +1,16 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/label/geom_label.c b/lib/geom/label/geom_label.c new file mode 100644 index 000000000000..1daf3df1c5c2 --- /dev/null +++ b/lib/geom/label/geom_label.c @@ -0,0 +1,257 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <libgeom.h> +#include <geom/label/g_label.h> + +#include "core/geom.h" +#include "misc/subr.h" + +#ifdef STATIC_GEOM_CLASSES +#define PUBSYM(x) glabel_##x +#else +#define PUBSYM(x) x +#endif + +uint32_t PUBSYM(lib_version) = G_LIB_VERSION; +uint32_t PUBSYM(version) = G_LABEL_VERSION; + +static void label_main(struct gctl_req *req, unsigned flags); +static void label_clear(struct gctl_req *req); +static void label_dump(struct gctl_req *req); +static void label_label(struct gctl_req *req); +static void label_refresh(struct gctl_req *req); + +struct g_command PUBSYM(class_commands)[] = { + { "clear", G_FLAG_VERBOSE, label_main, G_NULL_OPTS, + "[-v] dev ..." + }, + { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, G_NULL_OPTS, + "[-v] name dev" + }, + { "destroy", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + { "dump", 0, label_main, G_NULL_OPTS, + "dev ..." + }, + { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, label_main, G_NULL_OPTS, + "[-v] name dev" + }, + { "refresh", 0, label_main, G_NULL_OPTS, + "dev ..." + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +static void +label_main(struct gctl_req *req, unsigned flags) +{ + const char *name; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) + label_label(req); + else if (strcmp(name, "clear") == 0) + label_clear(req); + else if (strcmp(name, "dump") == 0) + label_dump(req); + else if (strcmp(name, "refresh") == 0) + label_refresh(req); + else + gctl_error(req, "Unknown command: %s.", name); +} + +static void +label_label(struct gctl_req *req) +{ + struct g_label_metadata md; + const char *name, *label; + u_char sector[512]; + int error, nargs; + + bzero(sector, sizeof(sector)); + nargs = gctl_get_int(req, "nargs"); + if (nargs != 2) { + gctl_error(req, "Invalid number of arguments."); + return; + } + + /* + * Clear last sector first to spoil all components if device exists. + */ + name = gctl_get_ascii(req, "arg1"); + error = g_metadata_clear(name, NULL); + if (error != 0) { + gctl_error(req, "Can't store metadata on %s: %s.", name, + strerror(error)); + return; + } + + strlcpy(md.md_magic, G_LABEL_MAGIC, sizeof(md.md_magic)); + md.md_version = G_LABEL_VERSION; + label = gctl_get_ascii(req, "arg0"); + bzero(md.md_label, sizeof(md.md_label)); + strlcpy(md.md_label, label, sizeof(md.md_label)); + md.md_provsize = g_get_mediasize(name); + if (md.md_provsize == 0) { + gctl_error(req, "Can't get mediasize of %s: %s.", name, + strerror(errno)); + return; + } + + /* + * Ok, store metadata. + */ + label_metadata_encode(&md, sector); + error = g_metadata_store(name, sector, sizeof(sector)); + if (error != 0) { + fprintf(stderr, "Can't store metadata on %s: %s.\n", name, + strerror(error)); + gctl_error(req, "Not done."); + } + if (verbose) + printf("Metadata value stored on %s.\n", name); +} + +static void +label_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_LABEL_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +static void +label_metadata_dump(const struct g_label_metadata *md) +{ + + printf(" Magic string: %s\n", md->md_magic); + printf("Metadata version: %u\n", (u_int)md->md_version); + printf(" Label: %s\n", md->md_label); +} + +static void +label_dump(struct gctl_req *req) +{ + struct g_label_metadata md, tmpmd; + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd), + G_LABEL_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't read metadata from %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + label_metadata_decode((u_char *)&tmpmd, &md); + printf("Metadata on %s:\n", name); + label_metadata_dump(&md); + printf("\n"); + } +} + +static void +label_refresh(struct gctl_req *req) +{ + const char *name; + int i, nargs, fd; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + fd = g_open(name, 1); + if (fd == -1) { + printf("Can't refresh metadata from %s: %s.\n", + name, strerror(errno)); + } else { + printf("Metadata from %s refreshed.\n", name); + (void)g_close(fd); + } + } +} diff --git a/lib/geom/label/glabel.8 b/lib/geom/label/glabel.8 new file mode 100644 index 000000000000..da958b52befd --- /dev/null +++ b/lib/geom/label/glabel.8 @@ -0,0 +1,311 @@ +.\" Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd April 19, 2024 +.Dt GLABEL 8 +.Os +.Sh NAME +.Nm glabel +.Nd "disk labelization control utility" +.Sh SYNOPSIS +.Nm +.Cm create +.Op Fl v +.Ar name +.Ar dev +.Nm +.Cm destroy +.Op Fl fv +.Ar name ... +.Nm +.Cm label +.Op Fl v +.Ar name +.Ar dev +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm clear +.Op Fl v +.Ar dev ... +.Nm +.Cm dump +.Ar dev ... +.Nm +.Cm refresh +.Ar dev ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for GEOM provider labelization. +A label can be set up on a GEOM provider in two ways: +.Dq manual +or +.Dq automatic . +When using the +.Dq manual +method, no metadata are stored on the devices, so a label has to be configured +by hand every time it is needed. +The +.Dq automatic +method uses on-disk metadata to store the label and detect it automatically in +the future. +.Pp +This GEOM class also provides volume label detection for file systems. +Those labels cannot be set with +.Nm , +but must be set with the appropriate file system utility, e.g.\& for UFS +the file system label is set with +.Xr tunefs 8 . +Currently supported file systems are: +.Pp +.Bl -bullet -offset indent -compact +.It +UFS1 volume names (directory +.Pa /dev/ufs/ ) . +.It +UFS2 volume names (directory +.Pa /dev/ufs/ ) . +.It +UFS1 file system IDs (directory +.Pa /dev/ufsid/ ) . +.It +UFS2 file system IDs (directory +.Pa /dev/ufsid/ ) . +.It +MSDOSFS (FAT12, FAT16, FAT32) (directory +.Pa /dev/msdosfs/ ) . +.It +CD ISO9660 (directory +.Pa /dev/iso9660/ ) . +.It +EXT2FS (directory +.Pa /dev/ext2fs/ ) . +.It +NTFS (directory +.Pa /dev/ntfs/ ) . +.It +Swap Linux (directory +.Pa /dev/swaplinux/ ) . +.El +.Pp +Support for partition metadata is implemented for: +.Pp +.Bl -bullet -offset indent -compact +.It +GPT labels (directory +.Pa /dev/gpt/ ) . +.It +GPT UUIDs (directory +.Pa /dev/gptid/ ) . +.El +.Pp +Generic disk ID strings are exported as labels in the format +.Pa /dev/diskid/GEOM_CLASS-ident +e.g. +.Pa /dev/diskid/DISK-6QG3Z026 . +.Pp +Generic labels created and managed solely by +.Nm +are created in the +.Pa /dev/label/ +directory. +Note that generic, automatic labels occupy some space on the device +and thus should not be added to a device already containing a file system. +In particular, +.Nm +reserves the last sector of the device to store the label information. +If the device already contains a file system, +.Nm +will overwrite the last sector, possibly damaging the file system, and the +file system may later overwrite the label sector. +Instead, create a label before initializing the file system, and initialize +that file system on the device created by +.Nm +under the +.Pa /dev/label/ +directory. +Then the file system will correctly account for the space occupied by the +generic label, +since the +.Nm +device will be one sector smaller than the device from which it was created. +.Pp +Note that for all label types, nested GEOM classes will cause additional +device nodes to be created, with context-specific data appended to their +names. +E.g. for every node like +.Pa /dev/label/bigdisk +there will be additional entries for any partitions which the device +contains, like +.Pa /dev/label/bigdiskp1 +and +.Pa /dev/label/bigdiskp1a . +.Pp +.Nm +requires write access to a device to create, modify, and destroy +"automatic" labels. +It will fail to persist changes if they are applied while a file +system on the device is currently mounted. +To ensure that modifications are stored across a reboot, the file system +needs to be unmounted first. +If the device holds the system's root file system, entering into +single user mode and mounting in read-only mode is required to persist +the change. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm destroy" +.It Cm create +Create temporary label +.Ar name +for the given provider. +This is the +.Dq manual +method. +The kernel module +.Pa geom_label.ko +will be loaded if it is not loaded already. +.It Cm label +Set up a label +.Ar name +for the given provider. +This is the +.Dq automatic +method, where metadata is stored in a provider's last sector. +The kernel module +.Pa geom_label.ko +will be loaded if it is not loaded already. +.It Cm stop +Turn off the given label by its +.Ar name . +This command does not touch on-disk metadata! +.It Cm destroy +Same as +.Cm stop . +.It Cm clear +Clear metadata on the given devices. +.It Cm dump +Dump metadata stored on the given devices. +.It Cm refresh +Refresh / rediscover metadata from the given devices. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options: +.Bl -tag -width indent +.It Fl f +Force the removal of the specified labels. +.It Fl v +Be more verbose. +.El +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to control the behavior of the +.Nm LABEL +GEOM class. +The default value is shown next to each variable. +.Bl -tag -width indent +.It Va kern.geom.label.debug : No 0 +Debug level of the +.Nm LABEL +GEOM class. +This can be set to a number between 0 and 2 inclusive. +If set to 0 minimal debug information is printed, and if set to 2 the +maximum amount of debug information is printed. +.El +.Bl -tag -width indent +.It Va kern.geom.label.*.enable : No 1 +Most +.Nm LABEL +providers implement a +.Xr sysctl 8 +flag and a tunable variable named in the above format. +This flag controls if the label provider will be active, tasting devices +and creating label nodes in the +.Xr devfs 4 +tree. +It is sometimes desirable to disable certain label types if they +conflict with other classes in complex GEOM topologies. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +The following example shows how to set up a label for disk +.Dq Li da2 , +create a file system on it, and mount it: +.Bd -literal -offset indent +glabel label -v usr /dev/da2 +newfs /dev/label/usr +mount /dev/label/usr /usr +[...] +umount /usr +glabel stop usr +glabel unload +.Ed +.Pp +The next example shows how to set up a label for a UFS file system: +.Bd -literal -offset indent +tunefs -L data /dev/da4s1a +mount /dev/ufs/data /mnt/data +.Ed +.Sh SEE ALSO +.Xr geom 4 , +.Xr loader.conf 5 , +.Xr geom 8 , +.Xr mount 8 , +.Xr newfs 8 , +.Xr sysctl 8 , +.Xr tunefs 8 , +.Xr umount 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 5.3 . +.Sh AUTHORS +.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org diff --git a/lib/geom/mirror/Makefile b/lib/geom/mirror/Makefile new file mode 100644 index 000000000000..cf0174ee0766 --- /dev/null +++ b/lib/geom/mirror/Makefile @@ -0,0 +1,7 @@ +PACKAGE=geom + +GEOM_CLASS= mirror + +LIBADD= md + +.include <bsd.lib.mk> diff --git a/lib/geom/mirror/Makefile.depend b/lib/geom/mirror/Makefile.depend new file mode 100644 index 000000000000..27e87393b549 --- /dev/null +++ b/lib/geom/mirror/Makefile.depend @@ -0,0 +1,17 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + lib/libmd \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/mirror/geom_mirror.c b/lib/geom/mirror/geom_mirror.c new file mode 100644 index 000000000000..8b47592803d9 --- /dev/null +++ b/lib/geom/mirror/geom_mirror.c @@ -0,0 +1,495 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004-2009 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <err.h> +#include <errno.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <libgeom.h> +#include <geom/mirror/g_mirror.h> +#include <core/geom.h> +#include <misc/subr.h> + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_MIRROR_VERSION; + +#define GMIRROR_BALANCE "load" +#define GMIRROR_SLICE "4096" +#define GMIRROR_PRIORITY "0" + +static void mirror_main(struct gctl_req *req, unsigned flags); +static void mirror_activate(struct gctl_req *req); +static void mirror_clear(struct gctl_req *req); +static void mirror_dump(struct gctl_req *req); +static void mirror_label(struct gctl_req *req); +static void mirror_resize(struct gctl_req *req, unsigned flags); + +struct g_command class_commands[] = { + { "activate", G_FLAG_VERBOSE, mirror_main, G_NULL_OPTS, + "[-v] name prov ..." + }, + { "clear", G_FLAG_VERBOSE, mirror_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { "configure", G_FLAG_VERBOSE, NULL, + { + { 'a', "autosync", NULL, G_TYPE_BOOL }, + { 'b', "balance", "", G_TYPE_STRING }, + { 'd', "dynamic", NULL, G_TYPE_BOOL }, + { 'f', "failsync", NULL, G_TYPE_BOOL }, + { 'F', "nofailsync", NULL, G_TYPE_BOOL }, + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 'n', "noautosync", NULL, G_TYPE_BOOL }, + { 'p', "priority", "-1", G_TYPE_NUMBER }, + { 's', "slice", "-1", G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-adfFhnv] [-b balance] [-s slice] name\n" + "[-v] -p priority name prov" + }, + { "create", G_FLAG_VERBOSE, NULL, + { + { 'b', "balance", GMIRROR_BALANCE, G_TYPE_STRING }, + { 'F', "nofailsync", NULL, G_TYPE_BOOL }, + { 'n', "noautosync", NULL, G_TYPE_BOOL }, + { 's', "slice", GMIRROR_SLICE, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-Fnv] [-b balance] [-s slice] name prov ..." + }, + { "deactivate", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov ..." + }, + { "destroy", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + { "dump", 0, mirror_main, G_NULL_OPTS, + "prov ..." + }, + { "forget", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "name ..." + }, + { "label", G_FLAG_VERBOSE, mirror_main, + { + { 'b', "balance", GMIRROR_BALANCE, G_TYPE_STRING }, + { 'F', "nofailsync", NULL, G_TYPE_BOOL }, + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 'n', "noautosync", NULL, G_TYPE_BOOL }, + { 's', "slice", GMIRROR_SLICE, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-Fhnv] [-b balance] [-s slice] name prov ..." + }, + { "insert", G_FLAG_VERBOSE, NULL, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 'i', "inactive", NULL, G_TYPE_BOOL }, + { 'p', "priority", GMIRROR_PRIORITY, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-hiv] [-p priority] name prov ..." + }, + { "rebuild", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov ..." + }, + { "remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov ..." + }, + { "resize", G_FLAG_VERBOSE, mirror_resize, + { + { 's', "size", "*", G_TYPE_STRING }, + G_OPT_SENTINEL + }, + "[-s size] [-v] name" + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +static void +mirror_main(struct gctl_req *req, unsigned flags) +{ + const char *name; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) + mirror_label(req); + else if (strcmp(name, "clear") == 0) + mirror_clear(req); + else if (strcmp(name, "dump") == 0) + mirror_dump(req); + else if (strcmp(name, "activate") == 0) + mirror_activate(req); + else + gctl_error(req, "Unknown command: %s.", name); +} + +static void +mirror_label(struct gctl_req *req) +{ + struct g_mirror_metadata md; + u_char sector[512]; + const char *str; + unsigned sectorsize; + off_t mediasize; + intmax_t val; + int error, i, nargs, bal, hardcode; + + bzero(sector, sizeof(sector)); + nargs = gctl_get_int(req, "nargs"); + if (nargs < 2) { + gctl_error(req, "Too few arguments."); + return; + } + + strlcpy(md.md_magic, G_MIRROR_MAGIC, sizeof(md.md_magic)); + md.md_version = G_MIRROR_VERSION; + str = gctl_get_ascii(req, "arg0"); + strlcpy(md.md_name, str, sizeof(md.md_name)); + md.md_mid = arc4random(); + md.md_all = nargs - 1; + md.md_mflags = 0; + md.md_dflags = 0; + md.md_genid = 0; + md.md_syncid = 1; + md.md_sync_offset = 0; + val = gctl_get_intmax(req, "slice"); + md.md_slice = val; + str = gctl_get_ascii(req, "balance"); + bal = balance_id(str); + if (bal == -1) { + gctl_error(req, "Invalid balance algorithm."); + return; + } + md.md_balance = bal; + if (gctl_get_int(req, "noautosync")) + md.md_mflags |= G_MIRROR_DEVICE_FLAG_NOAUTOSYNC; + if (gctl_get_int(req, "nofailsync")) + md.md_mflags |= G_MIRROR_DEVICE_FLAG_NOFAILSYNC; + hardcode = gctl_get_int(req, "hardcode"); + + /* + * Calculate sectorsize by finding least common multiple from + * sectorsizes of every disk and find the smallest mediasize. + */ + mediasize = 0; + sectorsize = 0; + for (i = 1; i < nargs; i++) { + unsigned ssize; + off_t msize; + + str = gctl_get_ascii(req, "arg%d", i); + msize = g_get_mediasize(str); + ssize = g_get_sectorsize(str); + if (msize == 0 || ssize == 0) { + gctl_error(req, "Can't get informations about %s: %s.", + str, strerror(errno)); + return; + } + msize -= ssize; + if (mediasize == 0 || (mediasize > 0 && msize < mediasize)) + mediasize = msize; + if (sectorsize == 0) + sectorsize = ssize; + else + sectorsize = g_lcm(sectorsize, ssize); + } + md.md_mediasize = mediasize; + md.md_sectorsize = sectorsize; + md.md_mediasize -= (md.md_mediasize % md.md_sectorsize); + + /* + * Clear last sector first, to spoil all components if device exists. + */ + for (i = 1; i < nargs; i++) { + str = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(str, NULL); + if (error != 0) { + gctl_error(req, "Can't store metadata on %s: %s.", str, + strerror(error)); + return; + } + } + + /* + * Ok, store metadata (use disk number as priority). + */ + for (i = 1; i < nargs; i++) { + str = gctl_get_ascii(req, "arg%d", i); + md.md_did = arc4random(); + md.md_priority = i - 1; + md.md_provsize = g_get_mediasize(str); + assert(md.md_provsize != 0); + if (!hardcode) + bzero(md.md_provider, sizeof(md.md_provider)); + else { + if (strncmp(str, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + str += sizeof(_PATH_DEV) - 1; + strlcpy(md.md_provider, str, sizeof(md.md_provider)); + } + mirror_metadata_encode(&md, sector); + error = g_metadata_store(str, sector, sizeof(sector)); + if (error != 0) { + fprintf(stderr, "Can't store metadata on %s: %s.\n", + str, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata value stored on %s.\n", str); + } +} + +static void +mirror_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_MIRROR_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +static void +mirror_dump(struct gctl_req *req) +{ + struct g_mirror_metadata md, tmpmd; + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd), + G_MIRROR_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't read metadata from %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (mirror_metadata_decode((u_char *)&tmpmd, &md) != 0) { + fprintf(stderr, "MD5 hash mismatch for %s, skipping.\n", + name); + gctl_error(req, "Not fully done."); + continue; + } + printf("Metadata on %s:\n", name); + mirror_metadata_dump(&md); + printf("\n"); + } +} + +static void +mirror_activate(struct gctl_req *req) +{ + struct g_mirror_metadata md, tmpmd; + const char *name, *path; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 2) { + gctl_error(req, "Too few arguments."); + return; + } + name = gctl_get_ascii(req, "arg0"); + + for (i = 1; i < nargs; i++) { + path = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(path, (u_char *)&tmpmd, sizeof(tmpmd), + G_MIRROR_MAGIC); + if (error != 0) { + fprintf(stderr, "Cannot read metadata from %s: %s.\n", + path, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (mirror_metadata_decode((u_char *)&tmpmd, &md) != 0) { + fprintf(stderr, + "MD5 hash mismatch for provider %s, skipping.\n", + path); + gctl_error(req, "Not fully done."); + continue; + } + if (strcmp(md.md_name, name) != 0) { + fprintf(stderr, + "Provider %s is not the mirror %s component.\n", + path, name); + gctl_error(req, "Not fully done."); + continue; + } + md.md_dflags &= ~G_MIRROR_DISK_FLAG_INACTIVE; + mirror_metadata_encode(&md, (u_char *)&tmpmd); + error = g_metadata_store(path, (u_char *)&tmpmd, sizeof(tmpmd)); + if (error != 0) { + fprintf(stderr, "Cannot write metadata from %s: %s.\n", + path, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Provider %s activated.\n", path); + } +} + +static struct gclass * +find_class(struct gmesh *mesh, const char *name) +{ + struct gclass *classp; + + LIST_FOREACH(classp, &mesh->lg_class, lg_class) { + if (strcmp(classp->lg_name, name) == 0) + return (classp); + } + return (NULL); +} + +static struct ggeom * +find_geom(struct gclass *classp, const char *name) +{ + struct ggeom *gp; + + LIST_FOREACH(gp, &classp->lg_geom, lg_geom) { + if (strcmp(gp->lg_name, name) == 0) + return (gp); + } + return (NULL); +} + +static void +mirror_resize(struct gctl_req *req, unsigned flags __unused) +{ + struct gmesh mesh; + struct gclass *classp; + struct ggeom *gp; + struct gprovider *pp; + struct gconsumer *cp; + off_t size; + int error, nargs; + const char *name, *g; + char ssize[30]; + + nargs = gctl_get_int(req, "nargs"); + if (nargs != 1) + errx(EXIT_FAILURE, "Invalid number of arguments."); + name = gctl_get_ascii(req, "class"); + if (name == NULL) + abort(); + g = gctl_get_ascii(req, "arg0"); + if (g == NULL) + abort(); + error = geom_gettree_geom(&mesh, name, g, 1); + if (error) + errc(EXIT_FAILURE, error, "Cannot get GEOM tree"); + classp = find_class(&mesh, name); + if (classp == NULL) + errx(EXIT_FAILURE, "Class %s not found.", name); + gp = find_geom(classp, g); + if (gp == NULL) + errx(EXIT_FAILURE, "No such geom: %s.", g); + pp = LIST_FIRST(&gp->lg_provider); + if (pp == NULL) + errx(EXIT_FAILURE, "Provider of geom %s not found.", g); + size = pp->lg_mediasize; + name = gctl_get_ascii(req, "size"); + if (name == NULL) + errx(EXIT_FAILURE, "The size is not specified."); + if (*name == '*') { +#define CSZ(c) ((c)->lg_provider->lg_mediasize - \ + (c)->lg_provider->lg_sectorsize) + /* Find the maximum possible size */ + LIST_FOREACH(cp, &gp->lg_consumer, lg_consumer) { + if (CSZ(cp) > size) + size = CSZ(cp); + } + LIST_FOREACH(cp, &gp->lg_consumer, lg_consumer) { + if (CSZ(cp) < size) + size = CSZ(cp); + } +#undef CSZ + if (size == pp->lg_mediasize) + errx(EXIT_FAILURE, + "Cannot expand provider %s\n", + pp->lg_name); + } else { + error = g_parse_lba(name, pp->lg_sectorsize, &size); + if (error) + errc(EXIT_FAILURE, error, "Invalid size param"); + size *= pp->lg_sectorsize; + } + snprintf(ssize, sizeof(ssize), "%ju", (uintmax_t)size); + gctl_change_param(req, "size", -1, ssize); + geom_deletetree(&mesh); + gctl_issue(req); +} diff --git a/lib/geom/mirror/gmirror.8 b/lib/geom/mirror/gmirror.8 new file mode 100644 index 000000000000..aeffb2d948b1 --- /dev/null +++ b/lib/geom/mirror/gmirror.8 @@ -0,0 +1,454 @@ +.\" Copyright (c) 2004-2009 Pawel Jakub Dawidek <pjd@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd January 23, 2025 +.Dt GMIRROR 8 +.Os +.Sh NAME +.Nm gmirror +.Nd "control utility for mirrored devices" +.Sh SYNOPSIS +To compile GEOM_MIRROR into your kernel, add the following lines to your +kernel configuration file: +.Bd -ragged -offset indent +.Cd "options GEOM_MIRROR" +.Ed +.Pp +Alternatively, to load the GEOM_MIRROR module at boot time, add the following +line to your +.Xr loader.conf 5 : +.Bd -literal -offset indent +geom_mirror_load="YES" +.Ed +.Pp +.No Usage of the Nm +utility: +.Pp +.Nm +.Cm label +.Op Fl Fhnv +.Op Fl b Ar balance +.Op Fl s Ar slice +.Ar name +.Ar prov ... +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm create +.Op Fl Fnv +.Op Fl b Ar balance +.Op Fl s Ar slice +.Ar name +.Ar prov ... +.Nm +.Cm configure +.Op Fl adfFhnv +.Op Fl b Ar balance +.Op Fl s Ar slice +.Ar name +.Nm +.Cm configure +.Op Fl v +.Fl p Ar priority +.Ar name +.Ar prov +.Nm +.Cm rebuild +.Op Fl v +.Ar name +.Ar prov ... +.Nm +.Cm resize +.Op Fl v +.Op Fl s Ar size +.Ar name +.Nm +.Cm insert +.Op Fl hiv +.Op Fl p Ar priority +.Ar name +.Ar prov ... +.Nm +.Cm remove +.Op Fl v +.Ar name +.Ar prov ... +.Nm +.Cm activate +.Op Fl v +.Ar name +.Ar prov ... +.Nm +.Cm deactivate +.Op Fl v +.Ar name +.Ar prov ... +.Nm +.Cm destroy +.Op Fl fv +.Ar name ... +.Nm +.Cm forget +.Op Fl v +.Ar name ... +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm dump +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for mirror (RAID1) configurations. +After a mirror's creation, all components are detected and configured +automatically. +All operations like failure detection, stale component detection, rebuild +of stale components, etc.\& are also done automatically. +The +.Nm +utility uses on-disk metadata (stored in the provider's last sector) to store all needed +information. +Since the last sector is used for this purpose, it is possible to place a root +file system on a mirror. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm deactivate" +.It Cm label +Create a mirror. +The order of components is important, because a component's priority is based on its position +(starting from 0 to 255). +The component with the biggest priority is used by the +.Cm prefer +balance algorithm +and is also used as a master component when resynchronization is needed, +e.g.\& after a power failure when the device was open for writing. +.Pp +Additional options include: +.Bl -tag -width ".Fl b Ar balance" +.It Fl b Ar balance +Specifies balance algorithm to use, one of: +.Bl -tag -width ".Cm round-robin" +.It Cm load +Read from the component with the lowest load. +This is the default balance algorithm. +.It Cm prefer +Read from the component with the biggest priority. +.It Cm round-robin +Use round-robin algorithm when choosing component to read. +.It Cm split +Split read requests, which are bigger than or equal to slice size on N pieces, +where N is the number of active components. +.El +.It Fl F +Do not synchronize after a power failure or system crash. +Assumes device is in consistent state. +.It Fl h +Hardcode providers' names in metadata. +.It Fl n +Turn off autosynchronization of stale components. +.It Fl s Ar slice +When using the +.Cm split +balance algorithm and an I/O READ request is bigger than or equal to this value, +the I/O request will be split into N pieces, where N is the number of active +components. +Defaults to 4096 bytes. +.El +.It Cm clear +Clear metadata on the given providers. +.It Cm create +Similar to +.Cm label , +but creates mirror without storing on-disk metadata in last sector. +This special "manual" operation mode assumes some external control to manage +mirror detection after reboot, device hot-plug and other external events. +.It Cm configure +Configure the given device. +.Pp +Additional options include: +.Bl -tag -width ".Fl p Ar priority" +.It Fl a +Turn on autosynchronization of stale components. +.It Fl b Ar balance +Specifies balance algorithm to use. +.It Fl d +Do not hardcode providers' names in metadata. +.It Fl f +Synchronize device after a power failure or system crash. +.It Fl F +Do not synchronize after a power failure or system crash. +Assumes device is in consistent state. +.It Fl h +Hardcode providers' names in metadata. +.It Fl n +Turn off autosynchronization of stale components. +.It Fl p Ar priority +Specifies priority for the given component +.Ar prov . +.It Fl s Ar slice +Specifies slice size for +.Cm split +balance algorithm. +.El +.It Cm rebuild +Rebuild the given mirror components forcibly. +If autosynchronization was not turned off for the given device, this command +should be unnecessary. +.It Cm resize +Change the size of the given mirror. +.Pp +Additional options include: +.Bl -tag -width ".Fl s Ar size" +.It Fl s Ar size +New size of the mirror is expressed in logical block numbers. +This option can be omitted, then it will be automatically calculated to +maximum available size. +.El +.It Cm insert +Add the given component(s) to the existing mirror. +.Pp +Additional options include: +.Bl -tag -width ".Fl p Ar priority" +.It Fl h +Hardcode providers' names in metadata. +.It Fl i +Mark component(s) as inactive immediately after insertion. +.It Fl p Ar priority +Specifies priority of the given component(s). +.El +.It Cm remove +Remove the given component(s) from the mirror and clear metadata on it. +.It Cm activate +Activate the given component(s), which were marked as inactive before. +.It Cm deactivate +Mark the given component(s) as inactive, so it will not be automatically +connected to the mirror. +.It Cm destroy +Stop the given mirror and clear metadata on all its components. +.Pp +Additional options include: +.Bl -tag -width ".Fl f" +.It Fl f +Stop the given mirror even if it is opened. +.El +.It Cm forget +Forget about components which are not connected. +This command is useful when a disk has failed and cannot be reconnected, preventing the +.Cm remove +command from being used to remove it. +.It Cm stop +Stop the given mirror. +.Pp +Additional options include: +.Bl -tag -width ".Fl f" +.It Fl f +Stop the given mirror even if it is opened. +.El +.It Cm dump +Dump metadata stored on the given providers. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options include: +.Bl -tag -width ".Fl v" +.It Fl v +Be more verbose. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +Use 3 disks to setup a mirror. +Choose split balance algorithm, split only +requests which are bigger than or equal to 2kB. +Create file system, +mount it, then unmount it and stop device: +.Bd -literal -offset indent +gmirror label -v -b split -s 2048 data da0 da1 da2 +newfs /dev/mirror/data +mount /dev/mirror/data /mnt +\&... +umount /mnt +gmirror stop data +gmirror unload +.Ed +.Pp +Create a mirror on disk with valid data (note that the last sector of the disk +will be overwritten). +Add another disk to this mirror, +so it will be synchronized with existing disk: +.Bd -literal -offset indent +gmirror label -v -b round-robin data da0 +gmirror insert data da1 +.Ed +.Pp +Create a mirror, but do not use automatic synchronization feature. +Add another disk and rebuild it: +.Bd -literal -offset indent +gmirror label -v -n -b load data da0 da1 +gmirror insert data da2 +gmirror rebuild data da2 +.Ed +.Pp +One disk failed. +Replace it with a brand new one: +.Bd -literal -offset indent +gmirror forget data +gmirror insert data da1 +.Ed +.Pp +Create a mirror, deactivate one component, do the backup and connect it again. +It will not be resynchronized, if there is no need to do so (there were no writes in +the meantime): +.Bd -literal -offset indent +gmirror label data da0 da1 +gmirror deactivate data da1 +dd if=/dev/da1 of=/backup/data.img bs=1m +gmirror activate data da1 +.Ed +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to configure behavior for all mirrors. +.Bl -tag -width indent +.It Va kern.geom.mirror.debug +Control the verbosity of kernel logging related to mirrors. +A value larger than 0 will enable debug logging. +.It Va kern.geom.mirror.timeout +The amount of time, in seconds, to wait for all copies of a mirror to +appear before starting the mirror. +Disks that appear after the mirror has been started are not automatically +added to the mirror. +.It Va kern.geom.mirror.idletime +The amount of time, in seconds, which must elapse after the last write to +a mirror before that mirror is marked clean. +Clean mirrors do not need to be synchronized after a power failure or +system crash. +A small value may result in frequent overwrites of the disks' metadata +sectors, and thus may reduce the longevity of the disks. +.It Va kern.geom.mirror.disconnect_on_failure +Determine whether a disk is automatically removed from its mirror when an +I/O request to that disk fails. +.It Va kern.geom.mirror.sync_requests +The number of parallel I/O requests used while synchronizing a mirror. +This parameter may only be configured as a +.Xr loader.conf 5 +tunable. +.It Va kern.geom.mirror.sync_update_period +The period, in seconds, at which a synchronizing mirror's metadata is +updated. +Periodic updates are used to record a synchronization's progress so that +an interrupted synchronization may be resumed starting at the recorded +offset, rather than at the beginning. +A smaller value results in more accurate progress tracking, but also +increases the number of non-sequential writes to the disk being synchronized. +If the sysctl value is 0, no updates are performed until the synchronization +is complete. +.El +.Sh NOTES +Doing kernel dumps to +.Nm +providers is possible, but some conditions have to be met. +First of all, a kernel dump will go only to one component and +.Nm +always chooses the component with the highest priority. +Reading a dump from the mirror on boot will only work if the +.Cm prefer +balance algorithm is used (that way +.Nm +will read only from the component with the highest priority). +If you use a different balance algorithm, you should create an +.Xr rc 8 +script that sets the balance algorithm to +.Cm prefer , +for example with the following command: +.Bd -literal -offset indent +gmirror configure -b prefer data +.Ed +.Pp +Make sure that +.Xr rcorder 8 +schedules the new script before +.Xr savecore 8 . +The desired balance algorithm can be restored later on +by placing the following command in +.Xr rc.local 8 : +.Bd -literal -offset indent +gmirror configure -b round-robin data +.Ed +.Pp +The decision which component to choose for dumping is made when +.Xr dumpon 8 +is called. +If on the next boot a component with a higher priority will be available, +the prefer algorithm will choose to read from it and +.Xr savecore 8 +will find nothing. +If on the next boot a component with the highest priority will be synchronized, +the prefer balance algorithm will read from the next one, thus will find nothing +there. +.Sh SEE ALSO +.Xr geom 4 , +.Xr dumpon 8 , +.Xr geom 8 , +.Xr mount 8 , +.Xr newfs 8 , +.Xr savecore 8 , +.Xr sysctl 8 , +.Xr umount 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 5.3 . +.Sh AUTHORS +.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org +.Sh BUGS +There should be a way to change a component's priority inside a running mirror. +.Pp +There should be a section with an implementation description. diff --git a/lib/geom/mountver/Makefile b/lib/geom/mountver/Makefile new file mode 100644 index 000000000000..6ea3705261c2 --- /dev/null +++ b/lib/geom/mountver/Makefile @@ -0,0 +1,5 @@ +PACKAGE=geom + +GEOM_CLASS= mountver + +.include <bsd.lib.mk> diff --git a/lib/geom/mountver/Makefile.depend b/lib/geom/mountver/Makefile.depend new file mode 100644 index 000000000000..0dd05cace3c0 --- /dev/null +++ b/lib/geom/mountver/Makefile.depend @@ -0,0 +1,16 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/mountver/geom_mountver.c b/lib/geom/mountver/geom_mountver.c new file mode 100644 index 000000000000..79e245377664 --- /dev/null +++ b/lib/geom/mountver/geom_mountver.c @@ -0,0 +1,55 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2010 Edward Tomasz Napierala <trasz@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <stdio.h> +#include <stdint.h> +#include <libgeom.h> +#include <geom/mountver/g_mountver.h> + +#include "core/geom.h" + + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_MOUNTVER_VERSION; + +struct g_command class_commands[] = { + { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, + { + G_OPT_SENTINEL + }, + "[-v] prov ..." + }, + { "destroy", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name" + }, + G_CMD_SENTINEL +}; diff --git a/lib/geom/mountver/gmountver.8 b/lib/geom/mountver/gmountver.8 new file mode 100644 index 000000000000..70cdf83a8896 --- /dev/null +++ b/lib/geom/mountver/gmountver.8 @@ -0,0 +1,130 @@ +.\"- +.\" Copyright (c) 2010 Edward Tomasz Napierala +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd May 18, 2015 +.Dt GMOUNTVER 8 +.Os +.Sh NAME +.Nm gmountver +.Nd "control utility for disk mount verification GEOM class" +.Sh SYNOPSIS +.Nm +.Cm create +.Op Fl v +.Ar prov ... +.Nm +.Cm destroy +.Op Fl fv +.Ar name +.Nm +.Cm list +.Nm +.Cm status +.Op Fl s Ar name +.Nm +.Cm load +.Op Fl v +.Nm +.Cm unload +.Op Fl v +.Sh DESCRIPTION +The +.Nm +utility is used to control the mount verification GEOM class. +When configured, it passes all the I/O requests to the underlying provider. +When the underlying provider disappears - for example because the disk device +got disconnected - it queues all the I/O requests and waits for the provider +to reappear. +When that happens, it attaches to it and sends the queued requests. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm destroy" +.It Cm create +Enable mount verification for the given provider. +If the operation succeeds, a new GEOM provider will be created using the +given provider's name with a +.Ql .mountver +suffix. +The kernel module +.Pa geom_mountver.ko +will be loaded if it is not loaded already. +.It Cm destroy +Destroy +.Ar name . +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options: +.Bl -tag -width indent +.It Fl f +Force the removal of the specified mountver device. +.It Fl v +Be more verbose. +.El +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to control the behavior of the +.Nm MOUNTVER +GEOM class. +The default value is shown next to each variable. +.Bl -tag -width indent +.It Va kern.geom.mountver.debug : No 0 +Debug level of the +.Nm MOUNTVER +GEOM class. +This can be set to a number between 0 and 3 inclusive. +If set to 0 minimal debug information is printed, and if set to 3 the +maximum amount of debug information is printed. +.It Va kern.geom.mountver.check_ident : No 1 +This can be set to 0 or 1. +If set to 0, +.Nm +will reattach to the device even if the device reports a different disk ID. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh SEE ALSO +.Xr geom 4 , +.Xr geom 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 9.0 . +.Sh AUTHORS +.An Edward Tomasz Napierala Aq Mt trasz@FreeBSD.org diff --git a/lib/geom/multipath/Makefile b/lib/geom/multipath/Makefile new file mode 100644 index 000000000000..1c7341145017 --- /dev/null +++ b/lib/geom/multipath/Makefile @@ -0,0 +1,7 @@ +PACKAGE=geom + +GEOM_CLASS= multipath + +CFLAGS+= -I${SRCTOP}/sys + +.include <bsd.lib.mk> diff --git a/lib/geom/multipath/Makefile.depend b/lib/geom/multipath/Makefile.depend new file mode 100644 index 000000000000..0dd05cace3c0 --- /dev/null +++ b/lib/geom/multipath/Makefile.depend @@ -0,0 +1,16 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/multipath/geom_multipath.c b/lib/geom/multipath/geom_multipath.c new file mode 100644 index 000000000000..e4cb123594c3 --- /dev/null +++ b/lib/geom/multipath/geom_multipath.c @@ -0,0 +1,323 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2006 Mathew Jacob <mjacob@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <errno.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <libgeom.h> +#include <unistd.h> +#include <uuid.h> +#include <geom/multipath/g_multipath.h> + +#include "core/geom.h" +#include "misc/subr.h" + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_MULTIPATH_VERSION; + +static void mp_main(struct gctl_req *, unsigned int); +static void mp_label(struct gctl_req *); +static void mp_clear(struct gctl_req *); +static void mp_prefer(struct gctl_req *); + +struct g_command class_commands[] = { + { + "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, + { + { 'A', "active_active", NULL, G_TYPE_BOOL }, + { 'R', "active_read", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-vAR] name prov ..." + }, + { + "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, mp_main, + { + { 'A', "active_active", NULL, G_TYPE_BOOL }, + { 'R', "active_read", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-vAR] name prov ..." + }, + { "configure", G_FLAG_VERBOSE, NULL, + { + { 'A', "active_active", NULL, G_TYPE_BOOL }, + { 'P', "active_passive", NULL, G_TYPE_BOOL }, + { 'R', "active_read", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-vAPR] name" + }, + { + "add", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov" + }, + { + "remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov" + }, + { + "prefer", G_FLAG_VERBOSE, mp_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { + "fail", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov" + }, + { + "restore", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov" + }, + { + "rotate", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name" + }, + { + "getactive", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name" + }, + { + "destroy", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name" + }, + { + "stop", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name" + }, + { + "clear", G_FLAG_VERBOSE, mp_main, G_NULL_OPTS, + "[-v] prov ..." + }, + G_CMD_SENTINEL +}; + +static void +mp_main(struct gctl_req *req, unsigned int flags __unused) +{ + const char *name; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) { + mp_label(req); + } else if (strcmp(name, "clear") == 0) { + mp_clear(req); + } else if (strcmp(name, "prefer") == 0) { + mp_prefer(req); + } else { + gctl_error(req, "Unknown command: %s.", name); + } +} + +static void +mp_label(struct gctl_req *req) +{ + struct g_multipath_metadata md; + off_t disksize = 0, msize; + uint8_t *sector, *rsector; + char *ptr; + uuid_t uuid; + ssize_t secsize = 0, ssize; + uint32_t status; + const char *name, *name2, *mpname; + int error, i, nargs, fd; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 2) { + gctl_error(req, "wrong number of arguments."); + return; + } + + /* + * First, check each provider to make sure it's the same size. + * This also gets us our size and sectorsize for the metadata. + */ + for (i = 1; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + msize = g_get_mediasize(name); + ssize = g_get_sectorsize(name); + if (msize == 0 || ssize == 0) { + gctl_error(req, "cannot get information about %s: %s.", + name, strerror(errno)); + return; + } + if (i == 1) { + secsize = ssize; + disksize = msize; + } else { + if (secsize != ssize) { + gctl_error(req, "%s sector size %ju different.", + name, (intmax_t)ssize); + return; + } + if (disksize != msize) { + gctl_error(req, "%s media size %ju different.", + name, (intmax_t)msize); + return; + } + } + + } + + /* + * Generate metadata. + */ + strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic)); + md.md_version = G_MULTIPATH_VERSION; + mpname = gctl_get_ascii(req, "arg0"); + strlcpy(md.md_name, mpname, sizeof(md.md_name)); + md.md_size = disksize; + md.md_sectorsize = secsize; + uuid_create(&uuid, &status); + if (status != uuid_s_ok) { + gctl_error(req, "cannot create a UUID."); + return; + } + uuid_to_string(&uuid, &ptr, &status); + if (status != uuid_s_ok) { + gctl_error(req, "cannot stringify a UUID."); + return; + } + strlcpy(md.md_uuid, ptr, sizeof (md.md_uuid)); + md.md_active_active = gctl_get_int(req, "active_active"); + if (gctl_get_int(req, "active_read")) + md.md_active_active = 2; + free(ptr); + + /* + * Allocate a sector to write as metadata. + */ + sector = calloc(1, secsize); + if (sector == NULL) { + gctl_error(req, "unable to allocate metadata buffer"); + return; + } + rsector = malloc(secsize); + if (rsector == NULL) { + gctl_error(req, "unable to allocate metadata buffer"); + goto done; + } + + /* + * encode the metadata + */ + multipath_metadata_encode(&md, sector); + + /* + * Store metadata on the initial provider. + */ + name = gctl_get_ascii(req, "arg1"); + error = g_metadata_store(name, sector, secsize); + if (error != 0) { + gctl_error(req, "cannot store metadata on %s: %s.", name, strerror(error)); + goto done; + } + + /* + * Now touch the rest of the providers to hint retaste. + */ + for (i = 2; i < nargs; i++) { + name2 = gctl_get_ascii(req, "arg%d", i); + fd = g_open(name2, 1); + if (fd < 0) { + fprintf(stderr, "Unable to open %s: %s.\n", + name2, strerror(errno)); + continue; + } + if (pread(fd, rsector, secsize, disksize - secsize) != + (ssize_t)secsize) { + fprintf(stderr, "Unable to read metadata from %s: %s.\n", + name2, strerror(errno)); + g_close(fd); + continue; + } + g_close(fd); + if (memcmp(sector, rsector, secsize)) { + fprintf(stderr, "No metadata found on %s." + " It is not a path of %s.\n", + name2, name); + } + } +done: + free(rsector); + free(sector); +} + + +static void +mp_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_MULTIPATH_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + } +} + +static void +mp_prefer(struct gctl_req *req) +{ + const char *name, *comp, *errstr; + int nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs != 2) { + gctl_error(req, "Usage: prefer GEOM PROVIDER"); + return; + } + name = gctl_get_ascii(req, "arg0"); + comp = gctl_get_ascii(req, "arg1"); + errstr = gctl_issue (req); + if (errstr != NULL) { + fprintf(stderr, "Can't set %s preferred provider to %s: %s.\n", + name, comp, errstr); + } +} diff --git a/lib/geom/multipath/gmultipath.8 b/lib/geom/multipath/gmultipath.8 new file mode 100644 index 000000000000..0f007196f7d4 --- /dev/null +++ b/lib/geom/multipath/gmultipath.8 @@ -0,0 +1,375 @@ +.\" Copyright (c) 2007 Matthew Jacob +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd March 17, 2022 +.Dt GMULTIPATH 8 +.Os +.Sh NAME +.Nm gmultipath +.Nd "disk multipath control utility" +.Sh SYNOPSIS +.Nm +.Cm create +.Op Fl ARv +.Ar name +.Ar prov ... +.Nm +.Cm label +.Op Fl ARv +.Ar name +.Ar prov ... +.Nm +.Cm configure +.Op Fl APRv +.Ar name +.Nm +.Cm add +.Op Fl v +.Ar name prov +.Nm +.Cm remove +.Op Fl v +.Ar name prov +.Nm +.Cm fail +.Op Fl v +.Ar name prov +.Nm +.Cm restore +.Op Fl v +.Ar name prov +.Nm +.Cm rotate +.Op Fl v +.Ar name +.Nm +.Cm prefer +.Op Fl v +.Ar name +.Ar prov +.Nm +.Cm getactive +.Op Fl v +.Ar name +.Nm +.Cm destroy +.Op Fl v +.Ar name +.Nm +.Cm stop +.Op Fl v +.Ar name +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for device multipath configuration. +.Pp +The multipath device can be configured using two different methods: +.Dq manual +or +.Dq automatic . +When using the +.Dq manual +method, no metadata are stored on the devices, so the multipath +device has to be configured by hand every time it is needed. +Additional device paths also will not be detected automatically. +The +.Dq automatic +method uses on-disk metadata to detect device and all its paths. +Metadata use the last sector of the underlying disk device and +include device name and UUID. +The UUID guarantees uniqueness in a shared storage environment +but is in general too cumbersome to use. +The name is what is exported via the device interface. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm destroy" +.It Cm create +Create multipath device with +.Dq manual +method without writing any on-disk metadata. +It is up to administrator, how to properly identify device paths. +Kernel will only check that all given providers have same media and +sector sizes. +.Pp +.Fl A +option enables Active/Active mode, +.Fl R +option enables Active/Read mode, otherwise Active/Passive mode is used +by default. +.It Cm label +Create multipath device with +.Dq automatic +method. +Label the first given provider with on-disk metadata using the specified +.Ar name . +The rest of given providers will be retasted to detect these metadata. +It reliably protects against specifying unrelated providers. +Providers with no matching metadata detected will not be added to the device. +.Pp +.Fl A +option enables Active/Active mode, +.Fl R +option enables Active/Read mode, otherwise Active/Passive mode is used +by default. +.It Cm configure +Configure the given multipath device. +.Pp +.Fl A +option enables Active/Active mode, +.Fl P +option enables Active/Passive mode, +.Fl R +option enables Active/Read mode. +.It Cm add +Add the given provider as a path to the given multipath device. +Should normally be used only for devices created with +.Dq manual +method, unless you know what you are doing (you are sure that it is another +device path, but tasting its metadata in regular +.Dq automatic +way is not possible). +.It Cm remove +Remove the given provider as a path from the given multipath device. +If the last path removed, the multipath device will be destroyed. +.It Cm fail +Mark specified provider as a path of the specified multipath device as failed. +If there are other paths present, new requests will be forwarded there. +.It Cm restore +Mark specified provider as a path of the specified multipath device as +operational, allowing it to handle requests. +.It Cm rotate +Change the active provider/path to the next available provider in Active/Passive mode. +.It Cm prefer +Change the active provider/path to the specified provider in Active/Passive mode. +.It Cm getactive +Get the currently active provider(s)/path(s). +.It Cm destroy +Destroy the given multipath device clearing metadata. +.It Cm stop +Stop the given multipath device without clearing metadata. +.It Cm clear +Clear metadata on the given provider. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variable can be used to control the behavior of the +.Nm MULTIPATH +GEOM class. +.Bl -tag -width indent +.It Va kern.geom.multipath.debug : No 0 +Debug level of the +.Nm MULTIPATH +GEOM class. +This can be set to 0 (default) or 1 to disable or enable various +forms of chattiness. +.It Va kern.geom.multipath.exclusive : No 1 +Open underlying providers exclusively, preventing individual paths access. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh MULTIPATH ARCHITECTURE +This is a multiple path architecture with no device knowledge or +presumptions other than size matching built in. +Therefore the user must exercise some care +in selecting providers that do indeed represent multiple paths to the +same underlying disk device. +The reason for this is that there are several +criteria across multiple underlying transport types that can +.Ar indicate +identity, but in all respects such identity can rarely be considered +.Ar definitive . +.Pp +For example, if you use the World Wide Port Name of a Fibre Channel +disk object you might believe that two disks that have the same WWPN +on different paths (or even disjoint fabrics) might be considered +the same disk. +Nearly always this would be a safe assumption, until +you realize that a WWPN, like an Ethernet MAC address, is a soft +programmable entity, and that a misconfigured Director Class switch +could lead you to believe incorrectly that you have found multiple +paths to the same device. +This is an extreme and theoretical case, but +it is possible enough to indicate that the policy for deciding which +of multiple pathnames refer to the same device should be left to the +system operator who will use tools and knowledge of their own storage +subsystem to make the correct configuration selection. +.Pp +There are Active/Passive, Active/Read and Active/Active operation modes +supported. +In Active/Passive mode only one path has I/O moving on it +at any point in time. +This I/O continues until an I/O is returned with +a generic I/O error or a "Nonexistent Device" error. +When this occurs, that path is marked FAIL, the next path +in a list is selected as active and the failed I/O reissued. +In Active/Active mode all paths not marked FAIL may handle I/O at the same time. +Requests are distributed between paths to equalize load. +For capable devices it allows the utilisation of the bandwidth available on all paths. +In Active/Read mode all paths not marked FAIL may handle reads at the same time, +but unlike in Active/Active mode only one path handles write requests at any +point in time; closely following the original write request order if the layer +above needs it for data consistency (not waiting for requisite write completion +before sending dependent write). +.Pp +When new devices are added to the system the +.Nm MULTIPATH +GEOM class is given an opportunity to taste these new devices. +If a new +device has a +.Nm MULTIPATH +on-disk metadata label, the device is either used to create a new +.Nm MULTIPATH +GEOM, or added to the list of paths for an existing +.Nm MULTIPATH +GEOM. +.Pp +It is this mechanism that works reasonably with +.Xr isp 4 +and +.Xr mpt 4 +based Fibre Channel disk devices. +For these devices, when a device disappears +(due to e.g., a cable pull or power failure to a switch), the device is +proactively marked as gone and I/O to it failed. +This causes the +.Nm MULTIPATH +failure event just described. +.Pp +When Fibre Channel events inform either +.Xr isp 4 +or +.Xr mpt 4 +host bus adapters that new devices may have arrived (e.g., the arrival +of an RSCN event from the Fabric Domain Controller), they can cause +a rescan to occur and cause the attachment and configuration of any +(now) new devices to occur, causing the taste event described above. +.Pp +This means that this multipath architecture is not a one-shot path +failover, but can be considered to be steady state as long as failed +paths are repaired (automatically or otherwise). +.Pp +Automatic rescanning is not a requirement. +Nor is Fibre Channel. +The +same failover mechanisms work equally well for traditional "Parallel" +SCSI but may require manual intervention with +.Xr camcontrol 8 +to cause the reattachment of repaired device links. +.Sh EXAMPLES +The following example shows how to use +.Xr camcontrol 8 +to find possible multiple path devices and to create a +.Nm MULTIPATH +GEOM class for them. +.Bd -literal -offset indent +mysys# camcontrol devlist +<ECNCTX @WESTVILLE > at scbus0 target 0 lun 0 (da0,pass0) +<ECNCTX @WESTVILLE > at scbus0 target 0 lun 1 (da1,pass1) +<ECNCTX @WESTVILLE > at scbus1 target 0 lun 0 (da2,pass2) +<ECNCTX @WESTVILLE > at scbus1 target 0 lun 1 (da3,pass3) +mysys# camcontrol inquiry da0 -S +ECNTX0LUN000000SER10ac0d01 +mysys# camcontrol inquiry da2 -S +ECNTX0LUN000000SER10ac0d01 +.Ed +.Pp +Now that you have used the Serial Number to compare two disk paths +it is not entirely unreasonable to conclude that these are multiple +paths to the same device. +However, only the user who is familiar +with their storage is qualified to make this judgement. +.Pp +You can then use the +.Nm +command to label and create a +.Nm MULTIPATH +GEOM provider named +.Ar FRED . +.Bd -literal -offset indent +gmultipath label -v FRED /dev/da0 /dev/da2 +disklabel -Bw /dev/multipath/FRED auto +newfs /dev/multipath/FREDa +mount /dev/multipath/FREDa /mnt.... +.Ed +.Pp +The resultant console output looks something like: +.Bd -literal -offset indent +GEOM_MULTIPATH: da0 added to FRED +GEOM_MULTIPATH: da0 is now active path in FRED +GEOM_MULTIPATH: da2 added to FRED +.Ed +.Pp +To load the +.Nm +module at boot time, add this entry to +.Pa /boot/loader.conf : +.Bd -literal -offset ident +geom_multipath_load="YES" +.Ed +.Sh SEE ALSO +.Xr geom 4 , +.Xr isp 4 , +.Xr mpt 4 , +.Xr loader.conf 5 , +.Xr camcontrol 8 , +.Xr geom 8 , +.Xr mount 8 , +.Xr newfs 8 , +.Xr sysctl 8 +.Sh HISTORY +The +.Nm +utility first appeared in +.Fx 7.0 +.Sh AUTHORS +.An Matthew Jacob Aq Mt mjacob@FreeBSD.org +.An Alexander Motin Aq Mt mav@FreeBSD.org diff --git a/lib/geom/nop/Makefile b/lib/geom/nop/Makefile new file mode 100644 index 000000000000..294eeb4e7002 --- /dev/null +++ b/lib/geom/nop/Makefile @@ -0,0 +1,5 @@ +PACKAGE=geom + +GEOM_CLASS= nop + +.include <bsd.lib.mk> diff --git a/lib/geom/nop/Makefile.depend b/lib/geom/nop/Makefile.depend new file mode 100644 index 000000000000..0dd05cace3c0 --- /dev/null +++ b/lib/geom/nop/Makefile.depend @@ -0,0 +1,16 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/nop/geom_nop.c b/lib/geom/nop/geom_nop.c new file mode 100644 index 000000000000..ee0cf4808f42 --- /dev/null +++ b/lib/geom/nop/geom_nop.c @@ -0,0 +1,91 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <stdio.h> +#include <stdint.h> +#include <libgeom.h> +#include <geom/nop/g_nop.h> + +#include "core/geom.h" + + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_NOP_VERSION; + +struct g_command class_commands[] = { + { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, + { + { 'c', "count_until_fail", "-1", G_TYPE_NUMBER }, + { 'd', "delaymsec", "-1", G_TYPE_NUMBER }, + { 'e', "error", "-1", G_TYPE_NUMBER }, + { 'o', "offset", "0", G_TYPE_NUMBER }, + { 'p', "stripesize", "0", G_TYPE_NUMBER }, + { 'P', "stripeoffset", "0", G_TYPE_NUMBER }, + { 'q', "rdelayprob", "-1", G_TYPE_NUMBER }, + { 'r', "rfailprob", "-1", G_TYPE_NUMBER }, + { 's', "size", "0", G_TYPE_NUMBER }, + { 'S', "secsize", "0", G_TYPE_NUMBER }, + { 'w', "wfailprob", "-1", G_TYPE_NUMBER }, + { 'x', "wdelayprob", "-1", G_TYPE_NUMBER }, + { 'z', "physpath", G_NOP_PHYSPATH_PASSTHROUGH, G_TYPE_STRING }, + { 'Z', "gnopname", G_VAL_OPTIONAL, G_TYPE_STRING }, + G_OPT_SENTINEL + }, + "[-v] [-c count_until_fail] [-d delaymsec] [-e error] [-o offset] " + "[-p stripesize] [-P stripeoffset] [-q rdelayprob] [-r rfailprob] " + "[-s size] [-S secsize] [-w wfailprob] [-x wdelayprob] " + "[-z physpath] [-Z gnopname] dev ..." + }, + { "configure", G_FLAG_VERBOSE, NULL, + { + { 'c', "count_until_fail", "-1", G_TYPE_NUMBER }, + { 'd', "delaymsec", "-1", G_TYPE_NUMBER }, + { 'e', "error", "-1", G_TYPE_NUMBER }, + { 'q', "rdelayprob", "-1", G_TYPE_NUMBER }, + { 'r', "rfailprob", "-1", G_TYPE_NUMBER }, + { 'w', "wfailprob", "-1", G_TYPE_NUMBER }, + { 'x', "wdelayprob", "-1", G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-v] [-c count_until_fail] [-d delaymsec] [-e error] " + "[-q rdelayprob] [-r rfailprob] [-w wfailprob] [-x wdelayprob] " + "prov ..." + }, + { "destroy", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] prov ..." + }, + { "reset", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] prov ..." + }, + G_CMD_SENTINEL +}; diff --git a/lib/geom/nop/gnop.8 b/lib/geom/nop/gnop.8 new file mode 100644 index 000000000000..24293d143058 --- /dev/null +++ b/lib/geom/nop/gnop.8 @@ -0,0 +1,211 @@ +.\" Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd December 29, 2019 +.Dt GNOP 8 +.Os +.Sh NAME +.Nm gnop +.Nd "control utility for NOP GEOM class" +.Sh SYNOPSIS +.Nm +.Cm create +.Op Fl v +.Op Fl c Ar count_until_fail +.Op Fl d Ar delaymsec +.Op Fl e Ar error +.Op Fl o Ar offset +.Op Fl p Ar stripesize +.Op Fl P Ar stripeoffset +.Op Fl q Ar rdelayprob +.Op Fl r Ar rfailprob +.Op Fl s Ar size +.Op Fl S Ar secsize +.Op Fl w Ar wfailprob +.Op Fl x Ar wdelayprob +.Op Fl z Ar physpath +.Op Fl Z Ar gnopname +.Ar dev ... +.Nm +.Cm configure +.Op Fl v +.Op Fl c Ar count_until_fail +.Op Fl d Ar delaymsec +.Op Fl e Ar error +.Op Fl q Ar rdelayprob +.Op Fl r Ar rfailprob +.Op Fl w Ar wfailprob +.Op Fl x Ar wdelayprob +.Ar prov ... +.Nm +.Cm destroy +.Op Fl fv +.Ar prov ... +.Nm +.Cm reset +.Op Fl v +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for setting up transparent providers on existing ones. +Its main purpose is testing other GEOM classes, as it allows forced provider +removal and I/O error simulation with a given probability. +It also gathers statistics on the number of read, write, delete, +getattr, flush, and other requests, and the number of bytes read and written. +.Nm +can also be used as a good starting point for implementing new GEOM +classes. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm configure" +.It Cm create +Set up a transparent provider on the given devices. +If the operation succeeds, the new provider should appear with name +.Pa /dev/ Ns Ao Ar dev Ac Ns Pa .nop . +The kernel module +.Pa geom_nop.ko +will be loaded if it is not loaded already. +.It Cm configure +Configure existing transparent provider. +At the moment it is only used for changing failure probability. +.It Cm destroy +Turn off the given transparent providers. +.It Cm reset +Reset statistics for the given transparent providers. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options: +.Bl -tag -width "-c count_until_fail" +.It Fl c Ar count_until_fail +Specifies the number of I/O requests to allow before setting the read, write and +delay failure probabilities. +.It Fl d Ar delaymsec +Specifies the delay of the requests in milliseconds. +Note that requests will be delayed before they are sent to the backing device. +.It Fl e Ar error +Specifies the error number to return on failure. +.It Fl f +Force the removal of the specified provider. +.It Fl o Ar offset +Where to begin on the original provider. +.It Fl p Ar stripesize +Value of the stripesize property of the transparent provider. +.It Fl P Ar stripeoffset +Value of the stripeoffset property of the transparent provider. +.It Fl q Ar rdelayprob +Specifies read delay probability in percent. +.It Fl r Ar rfailprob +Specifies read failure probability in percent. +.It Fl s Ar size +Size of the transparent provider. +.It Fl S Ar secsize +Sector size of the transparent provider. +.It Fl w Ar wfailprob +Specifies write failure probability in percent. +.It Fl v +Be more verbose. +.It Fl x Ar wdelayprob +Specifies write delay probability in percent. +.It Fl z Ar physpath +Physical path of the transparent provider. +.It Fl Z Ar gnopname +The name of the new provider. +The suffix +.Dq .nop +will be appended to the provider name. +.El +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to control the behavior of the +.Nm NOP +GEOM class. +The default value is shown next to each variable. +.Bl -tag -width indent +.It Va kern.geom.nop.debug : No 0 +Debug level of the +.Nm NOP +GEOM class. +This can be set to a number between 0 and 2 inclusive. +If set to 0, minimal debug information is printed. +If set to 1, basic debug information is logged along with the I/O requests +that were returned as errors. +If set to 2, the maximum amount of debug information is printed including +all I/O requests. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +The following example shows how to create a transparent provider for disk +.Pa /dev/da0 +with 50% write failure probability, and how to destroy it. +.Bd -literal -offset indent +gnop create -v -w 50 da0 +gnop destroy -v da0.nop +.Ed +.Pp +The traffic statistics for the given transparent providers can be obtained +with the +.Cm list +command. +The example below shows the number of bytes written with +.Xr newfs 8 : +.Bd -literal -offset indent +gnop create da0 +newfs /dev/da0.nop +gnop list +.Ed +.Sh SEE ALSO +.Xr geom 4 , +.Xr geom 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 5.3 . +.Sh AUTHORS +.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org diff --git a/lib/geom/part/Makefile b/lib/geom/part/Makefile new file mode 100644 index 000000000000..58390e299d6f --- /dev/null +++ b/lib/geom/part/Makefile @@ -0,0 +1,7 @@ +PACKAGE=geom + +GEOM_CLASS= part + +LIBADD= util + +.include <bsd.lib.mk> diff --git a/lib/geom/part/Makefile.depend b/lib/geom/part/Makefile.depend new file mode 100644 index 000000000000..08cfc713d335 --- /dev/null +++ b/lib/geom/part/Makefile.depend @@ -0,0 +1,17 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + lib/libutil \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/part/geom_part.c b/lib/geom/part/geom_part.c new file mode 100644 index 000000000000..cbbc81d3cc60 --- /dev/null +++ b/lib/geom/part/geom_part.c @@ -0,0 +1,1288 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2007, 2008 Marcel Moolenaar + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/stat.h> + +#include <assert.h> +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <libgeom.h> +#include <libutil.h> +#include <paths.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <inttypes.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> + +#include "core/geom.h" +#include "misc/subr.h" + +#ifdef STATIC_GEOM_CLASSES +#define PUBSYM(x) gpart_##x +#else +#define PUBSYM(x) x +#endif + +uint32_t PUBSYM(lib_version) = G_LIB_VERSION; +uint32_t PUBSYM(version) = 0; + +static char sstart[32]; +static char ssize[32]; +volatile sig_atomic_t undo_restore; + +#define GPART_AUTOFILL "*" +#define GPART_FLAGS "C" + +#define GPART_PARAM_BOOTCODE "bootcode" +#define GPART_PARAM_INDEX "index" +#define GPART_PARAM_PARTCODE "partcode" +#define GPART_PARAM_SKIP_DSN "skip_dsn" + +static struct gclass *find_class(struct gmesh *, const char *); +static struct ggeom * find_geom(struct gclass *, const char *); +static int geom_is_withered(struct ggeom *); +static const char *find_geomcfg(struct ggeom *, const char *); +static const char *find_provcfg(struct gprovider *, const char *); +static struct gprovider *find_provider(struct ggeom *, off_t); +static const char *fmtsize(int64_t); +static int gpart_autofill(struct gctl_req *); +static int gpart_autofill_resize(struct gctl_req *); +static void gpart_bootcode(struct gctl_req *, unsigned int); +static void *gpart_bootfile_read(const char *, ssize_t *); +static _Noreturn void gpart_issue(struct gctl_req *, unsigned int); +static void gpart_show(struct gctl_req *, unsigned int); +static void gpart_show_geom(struct ggeom *, const char *, int); +static int gpart_show_hasopt(struct gctl_req *, const char *, const char *); +static void gpart_write_partcode(struct gctl_req *, int, void *, ssize_t); +static void gpart_print_error(const char *); +static void gpart_backup(struct gctl_req *, unsigned int); +static void gpart_restore(struct gctl_req *, unsigned int); + +struct g_command PUBSYM(class_commands)[] = { + { "add", 0, gpart_issue, { + { 'a', "alignment", GPART_AUTOFILL, G_TYPE_STRING }, + { 'b', "start", GPART_AUTOFILL, G_TYPE_STRING }, + { 's', "size", GPART_AUTOFILL, G_TYPE_STRING }, + { 't', "type", NULL, G_TYPE_STRING }, + { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER }, + { 'l', "label", G_VAL_OPTIONAL, G_TYPE_STRING }, + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "-t type [-a alignment] [-b start] [-s size] [-i index] " + "[-l label] [-f flags] geom" + }, + { "backup", 0, gpart_backup, G_NULL_OPTS, + "geom" + }, + { "bootcode", 0, gpart_bootcode, { + { 'b', GPART_PARAM_BOOTCODE, G_VAL_OPTIONAL, G_TYPE_STRING }, + { 'p', GPART_PARAM_PARTCODE, G_VAL_OPTIONAL, G_TYPE_STRING }, + { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER }, + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + { 'N', GPART_PARAM_SKIP_DSN, NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL }, + "[-N] [-b bootcode] [-p partcode -i index] [-f flags] geom" + }, + { "commit", 0, gpart_issue, G_NULL_OPTS, + "geom" + }, + { "create", 0, gpart_issue, { + { 's', "scheme", NULL, G_TYPE_STRING }, + { 'n', "entries", G_VAL_OPTIONAL, G_TYPE_NUMBER }, + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "-s scheme [-n entries] [-f flags] provider" + }, + { "delete", 0, gpart_issue, { + { 'i', GPART_PARAM_INDEX, NULL, G_TYPE_NUMBER }, + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "-i index [-f flags] geom" + }, + { "destroy", 0, gpart_issue, { + { 'F', "force", NULL, G_TYPE_BOOL }, + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "[-F] [-f flags] geom" + }, + { "modify", 0, gpart_issue, { + { 'i', GPART_PARAM_INDEX, NULL, G_TYPE_NUMBER }, + { 'l', "label", G_VAL_OPTIONAL, G_TYPE_STRING }, + { 't', "type", G_VAL_OPTIONAL, G_TYPE_STRING }, + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "-i index [-l label] [-t type] [-f flags] geom" + }, + { "set", 0, gpart_issue, { + { 'a', "attrib", NULL, G_TYPE_STRING }, + { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER }, + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "-a attrib [-i index] [-f flags] geom" + }, + { "show", 0, gpart_show, { + { 'l', "show_label", NULL, G_TYPE_BOOL }, + { 'r', "show_rawtype", NULL, G_TYPE_BOOL }, + { 'p', "show_providers", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL }, + "[-l | -r] [-p] [geom ...]" + }, + { "undo", 0, gpart_issue, G_NULL_OPTS, + "geom" + }, + { "unset", 0, gpart_issue, { + { 'a', "attrib", NULL, G_TYPE_STRING }, + { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER }, + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "-a attrib [-i index] [-f flags] geom" + }, + { "resize", 0, gpart_issue, { + { 'a', "alignment", GPART_AUTOFILL, G_TYPE_STRING }, + { 's', "size", GPART_AUTOFILL, G_TYPE_STRING }, + { 'i', GPART_PARAM_INDEX, NULL, G_TYPE_NUMBER }, + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "-i index [-a alignment] [-s size] [-f flags] geom" + }, + { "restore", 0, gpart_restore, { + { 'F', "force", NULL, G_TYPE_BOOL }, + { 'l', "restore_labels", NULL, G_TYPE_BOOL }, + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "[-lF] [-f flags] provider [...]" + }, + { "recover", 0, gpart_issue, { + { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, + G_OPT_SENTINEL }, + "[-f flags] geom" + }, + G_CMD_SENTINEL +}; + +static struct gclass * +find_class(struct gmesh *mesh, const char *name) +{ + struct gclass *classp; + + LIST_FOREACH(classp, &mesh->lg_class, lg_class) { + if (strcmp(classp->lg_name, name) == 0) + return (classp); + } + return (NULL); +} + +static struct ggeom * +find_geom(struct gclass *classp, const char *name) +{ + struct ggeom *gp, *wgp; + + if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + name += sizeof(_PATH_DEV) - 1; + wgp = NULL; + LIST_FOREACH(gp, &classp->lg_geom, lg_geom) { + if (strcmp(gp->lg_name, name) != 0) + continue; + if (!geom_is_withered(gp)) + return (gp); + else + wgp = gp; + } + return (wgp); +} + +static int +geom_is_withered(struct ggeom *gp) +{ + struct gconfig *gc; + + LIST_FOREACH(gc, &gp->lg_config, lg_config) { + if (!strcmp(gc->lg_name, "wither")) + return (1); + } + return (0); +} + +static const char * +find_geomcfg(struct ggeom *gp, const char *cfg) +{ + struct gconfig *gc; + + LIST_FOREACH(gc, &gp->lg_config, lg_config) { + if (!strcmp(gc->lg_name, cfg)) + return (gc->lg_val); + } + return (NULL); +} + +static const char * +find_provcfg(struct gprovider *pp, const char *cfg) +{ + struct gconfig *gc; + + LIST_FOREACH(gc, &pp->lg_config, lg_config) { + if (!strcmp(gc->lg_name, cfg)) + return (gc->lg_val); + } + return (NULL); +} + +static struct gprovider * +find_provider(struct ggeom *gp, off_t minsector) +{ + struct gprovider *pp, *bestpp; + const char *s; + off_t sector, bestsector; + + bestpp = NULL; + bestsector = 0; + LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { + s = find_provcfg(pp, "start"); + sector = (off_t)strtoimax(s, NULL, 0); + if (sector < minsector) + continue; + if (bestpp != NULL && sector >= bestsector) + continue; + + bestpp = pp; + bestsector = sector; + } + return (bestpp); +} + +static const char * +fmtsize(int64_t rawsz) +{ + static char buf[5]; + + humanize_number(buf, sizeof(buf), rawsz, "", HN_AUTOSCALE, + HN_B | HN_NOSPACE | HN_DECIMAL); + return (buf); +} + +static const char * +fmtattrib(struct gprovider *pp) +{ + static char buf[128]; + struct gconfig *gc; + u_int idx; + + buf[0] = '\0'; + idx = 0; + LIST_FOREACH(gc, &pp->lg_config, lg_config) { + if (strcmp(gc->lg_name, "attrib") != 0) + continue; + idx += snprintf(buf + idx, sizeof(buf) - idx, "%s%s", + (idx == 0) ? " [" : ",", gc->lg_val); + } + if (idx > 0) + snprintf(buf + idx, sizeof(buf) - idx, "] "); + return (buf); +} + +#define ALIGNDOWN(d, a) ((d) - (d) % (a)) +#define ALIGNUP(d, a) ((d) % (a) ? (d) - (d) % (a) + (a): (d)) + +static int +gpart_autofill_resize(struct gctl_req *req) +{ + struct gmesh mesh; + struct gclass *cp; + struct ggeom *gp; + struct gprovider *pp; + off_t last, size, start, new_size; + off_t lba, new_lba, alignment, offset; + const char *g, *s; + int error, idx, has_alignment; + + idx = (int)gctl_get_intmax(req, GPART_PARAM_INDEX); + if (idx < 1) + errx(EXIT_FAILURE, "invalid partition index"); + + s = gctl_get_ascii(req, "class"); + if (s == NULL) + abort(); + g = gctl_get_ascii(req, "arg0"); + if (g == NULL) + abort(); + error = geom_gettree_geom(&mesh, s, g, 1); + if (error) + return (error); + cp = find_class(&mesh, s); + if (cp == NULL) + errx(EXIT_FAILURE, "Class %s not found.", s); + gp = find_geom(cp, g); + if (gp == NULL) + errx(EXIT_FAILURE, "No such geom: %s.", g); + pp = LIST_FIRST(&gp->lg_consumer)->lg_provider; + if (pp == NULL) + errx(EXIT_FAILURE, "Provider for geom %s not found.", g); + + s = gctl_get_ascii(req, "alignment"); + has_alignment = (*s == '*') ? 0 : 1; + alignment = 1; + if (has_alignment) { + error = g_parse_lba(s, pp->lg_sectorsize, &alignment); + if (error) + errc(EXIT_FAILURE, error, "Invalid alignment param"); + if (alignment == 0) + errx(EXIT_FAILURE, "Invalid alignment param"); + } else { + lba = pp->lg_stripesize / pp->lg_sectorsize; + if (lba > 0) + alignment = lba; + } + error = gctl_delete_param(req, "alignment"); + if (error) + errc(EXIT_FAILURE, error, "internal error"); + + s = gctl_get_ascii(req, "size"); + if (*s == '*') + new_size = 0; + else { + error = g_parse_lba(s, pp->lg_sectorsize, &new_size); + if (error) + errc(EXIT_FAILURE, error, "Invalid size param"); + /* no autofill necessary. */ + if (has_alignment == 0) + goto done; + } + + offset = (pp->lg_stripeoffset / pp->lg_sectorsize) % alignment; + s = find_geomcfg(gp, "last"); + if (s == NULL) + errx(EXIT_FAILURE, "Final block not found for geom %s", + gp->lg_name); + last = (off_t)strtoimax(s, NULL, 0); + LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { + s = find_provcfg(pp, "index"); + if (s == NULL) + continue; + if (atoi(s) == idx) + break; + } + if (pp == NULL) + errx(EXIT_FAILURE, "invalid partition index"); + + s = find_provcfg(pp, "start"); + start = (off_t)strtoimax(s, NULL, 0); + s = find_provcfg(pp, "end"); + lba = (off_t)strtoimax(s, NULL, 0); + size = lba - start + 1; + + pp = find_provider(gp, lba + 1); + if (new_size > 0 && (new_size <= size || pp == NULL)) { + /* The start offset may be not aligned, so we align the end + * offset and then calculate the size. + */ + new_size = ALIGNDOWN(start + offset + new_size, + alignment) - start - offset; + goto done; + } + if (pp == NULL) { + new_size = ALIGNDOWN(last + offset + 1, alignment) - + start - offset; + if (new_size < size) + return (ENOSPC); + } else { + s = find_provcfg(pp, "start"); + new_lba = (off_t)strtoimax(s, NULL, 0); + /* + * Is there any free space between current and + * next providers? + */ + new_lba = ALIGNDOWN(new_lba + offset, alignment) - offset; + if (new_lba > lba) + new_size = new_lba - start; + else { + geom_deletetree(&mesh); + return (ENOSPC); + } + } +done: + snprintf(ssize, sizeof(ssize), "%jd", (intmax_t)new_size); + gctl_change_param(req, "size", -1, ssize); + geom_deletetree(&mesh); + return (0); +} + +static int +gpart_autofill(struct gctl_req *req) +{ + struct gmesh mesh; + struct gclass *cp; + struct ggeom *gp; + struct gprovider *pp; + off_t first, last, a_first; + off_t size, start, a_lba; + off_t lba, len, alignment, offset; + uintmax_t grade; + const char *g, *s; + int error, has_size, has_start, has_alignment; + + s = gctl_get_ascii(req, "verb"); + if (strcmp(s, "resize") == 0) + return gpart_autofill_resize(req); + if (strcmp(s, "add") != 0) + return (0); + + s = gctl_get_ascii(req, "class"); + if (s == NULL) + abort(); + g = gctl_get_ascii(req, "arg0"); + if (g == NULL) + abort(); + error = geom_gettree_geom(&mesh, s, g, 1); + if (error) + return (error); + cp = find_class(&mesh, s); + if (cp == NULL) + errx(EXIT_FAILURE, "Class %s not found.", s); + gp = find_geom(cp, g); + if (gp == NULL) { + if (g_device_path(g) == NULL) { + errx(EXIT_FAILURE, "No such geom %s.", g); + } else { + /* + * We don't free memory allocated by g_device_path() as + * we are about to exit. + */ + errx(EXIT_FAILURE, + "No partitioning scheme found on geom %s. Create one first using 'gpart create'.", + g); + } + } + pp = LIST_FIRST(&gp->lg_consumer)->lg_provider; + if (pp == NULL) + errx(EXIT_FAILURE, "Provider for geom %s not found.", g); + + s = gctl_get_ascii(req, "alignment"); + has_alignment = (*s == '*') ? 0 : 1; + alignment = 1; + if (has_alignment) { + error = g_parse_lba(s, pp->lg_sectorsize, &alignment); + if (error) + errc(EXIT_FAILURE, error, "Invalid alignment param"); + if (alignment == 0) + errx(EXIT_FAILURE, "Invalid alignment param"); + } + error = gctl_delete_param(req, "alignment"); + if (error) + errc(EXIT_FAILURE, error, "internal error"); + + s = gctl_get_ascii(req, "size"); + has_size = (*s == '*') ? 0 : 1; + size = 0; + if (has_size) { + error = g_parse_lba(s, pp->lg_sectorsize, &size); + if (error) + errc(EXIT_FAILURE, error, "Invalid size param"); + } + + s = gctl_get_ascii(req, "start"); + has_start = (*s == '*') ? 0 : 1; + start = 0ULL; + if (has_start) { + error = g_parse_lba(s, pp->lg_sectorsize, &start); + if (error) + errc(EXIT_FAILURE, error, "Invalid start param"); + } + + /* No autofill necessary. */ + if (has_size && has_start && !has_alignment) + goto done; + + len = pp->lg_stripesize / pp->lg_sectorsize; + if (len > 0 && !has_alignment) + alignment = len; + + /* Adjust parameters to stripeoffset */ + offset = (pp->lg_stripeoffset / pp->lg_sectorsize) % alignment; + start = ALIGNUP(start + offset, alignment); + if (size > alignment) + size = ALIGNDOWN(size, alignment); + + s = find_geomcfg(gp, "first"); + if (s == NULL) + errx(EXIT_FAILURE, "Starting block not found for geom %s", + gp->lg_name); + first = (off_t)strtoimax(s, NULL, 0); + s = find_geomcfg(gp, "last"); + if (s == NULL) + errx(EXIT_FAILURE, "Final block not found for geom %s", + gp->lg_name); + last = (off_t)strtoimax(s, NULL, 0); + grade = ~0ULL; + a_first = ALIGNUP(first + offset, alignment); + last = ALIGNDOWN(last + offset + 1, alignment) - 1; + if (a_first < start) + a_first = start; + while ((pp = find_provider(gp, first)) != NULL) { + s = find_provcfg(pp, "start"); + lba = (off_t)strtoimax(s, NULL, 0); + a_lba = ALIGNDOWN(lba + offset, alignment); + if (first < a_lba && a_first < a_lba) { + /* Free space [first, lba> */ + len = a_lba - a_first; + if (has_size) { + if (len >= size && + (uintmax_t)(len - size) < grade) { + start = a_first; + grade = len - size; + } + } else if (has_start) { + if (start >= a_first && start < a_lba) { + size = a_lba - start; + grade = start - a_first; + } + } else { + if (grade == ~0ULL || len > size) { + start = a_first; + size = len; + grade = 0; + } + } + } + + s = find_provcfg(pp, "end"); + first = (off_t)strtoimax(s, NULL, 0) + 1; + if (first + offset > a_first) + a_first = ALIGNUP(first + offset, alignment); + } + if (a_first <= last) { + /* Free space [first-last] */ + len = ALIGNDOWN(last - a_first + 1, alignment); + if (has_size) { + if (len >= size && + (uintmax_t)(len - size) < grade) { + start = a_first; + grade = len - size; + } + } else if (has_start) { + if (start >= a_first && start <= last) { + size = ALIGNDOWN(last - start + 1, alignment); + grade = start - a_first; + } + } else { + if (grade == ~0ULL || len > size) { + start = a_first; + size = len; + grade = 0; + } + } + } + if (grade == ~0ULL) { + geom_deletetree(&mesh); + return (ENOSPC); + } + start -= offset; /* Return back to real offset */ +done: + snprintf(ssize, sizeof(ssize), "%jd", (intmax_t)size); + gctl_change_param(req, "size", -1, ssize); + snprintf(sstart, sizeof(sstart), "%jd", (intmax_t)start); + gctl_change_param(req, "start", -1, sstart); + geom_deletetree(&mesh); + return (0); +} + +static void +gpart_show_geom(struct ggeom *gp, const char *element, int show_providers) +{ + struct gprovider *pp; + const char *s, *scheme; + off_t first, last, sector, end; + off_t length, secsz; + int idx, wblocks, wname, wmax; + + if (geom_is_withered(gp)) + return; + scheme = find_geomcfg(gp, "scheme"); + if (scheme == NULL) + errx(EXIT_FAILURE, "Scheme not found for geom %s", gp->lg_name); + s = find_geomcfg(gp, "first"); + if (s == NULL) + errx(EXIT_FAILURE, "Starting block not found for geom %s", + gp->lg_name); + first = (off_t)strtoimax(s, NULL, 0); + s = find_geomcfg(gp, "last"); + if (s == NULL) + errx(EXIT_FAILURE, "Final block not found for geom %s", + gp->lg_name); + last = (off_t)strtoimax(s, NULL, 0); + wblocks = strlen(s); + s = find_geomcfg(gp, "state"); + if (s == NULL) + errx(EXIT_FAILURE, "State not found for geom %s", gp->lg_name); + if (s != NULL && *s != 'C') + s = NULL; + wmax = strlen(gp->lg_name); + if (show_providers) { + LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { + wname = strlen(pp->lg_name); + if (wname > wmax) + wmax = wname; + } + } + wname = wmax; + pp = LIST_FIRST(&gp->lg_consumer)->lg_provider; + secsz = pp->lg_sectorsize; + printf("=>%*jd %*jd %*s %s (%s)%s\n", + wblocks, (intmax_t)first, wblocks, (intmax_t)(last - first + 1), + wname, gp->lg_name, + scheme, fmtsize(pp->lg_mediasize), + s ? " [CORRUPT]": ""); + + while ((pp = find_provider(gp, first)) != NULL) { + s = find_provcfg(pp, "start"); + sector = (off_t)strtoimax(s, NULL, 0); + + s = find_provcfg(pp, "end"); + end = (off_t)strtoimax(s, NULL, 0); + length = end - sector + 1; + + s = find_provcfg(pp, "index"); + idx = atoi(s); + if (first < sector) { + printf(" %*jd %*jd %*s - free - (%s)\n", + wblocks, (intmax_t)first, wblocks, + (intmax_t)(sector - first), wname, "", + fmtsize((sector - first) * secsz)); + } + if (show_providers) { + printf(" %*jd %*jd %*s %s %s (%s)\n", + wblocks, (intmax_t)sector, wblocks, + (intmax_t)length, wname, pp->lg_name, + find_provcfg(pp, element), fmtattrib(pp), + fmtsize(pp->lg_mediasize)); + } else + printf(" %*jd %*jd %*d %s %s (%s)\n", + wblocks, (intmax_t)sector, wblocks, + (intmax_t)length, wname, idx, + find_provcfg(pp, element), fmtattrib(pp), + fmtsize(pp->lg_mediasize)); + first = end + 1; + } + if (first <= last) { + length = last - first + 1; + printf(" %*jd %*jd %*s - free - (%s)\n", + wblocks, (intmax_t)first, wblocks, (intmax_t)length, + wname, "", + fmtsize(length * secsz)); + } + printf("\n"); +} + +static int +gpart_show_hasopt(struct gctl_req *req, const char *opt, const char *elt) +{ + + if (!gctl_get_int(req, "%s", opt)) + return (0); + + if (elt != NULL) + errx(EXIT_FAILURE, "-l and -r are mutually exclusive"); + + return (1); +} + +static void +gpart_show(struct gctl_req *req, unsigned int fl __unused) +{ + struct gmesh mesh; + struct gclass *classp; + struct ggeom *gp; + const char *element, *name; + int error, i, nargs, show_providers; + + element = NULL; + if (gpart_show_hasopt(req, "show_label", element)) + element = "label"; + if (gpart_show_hasopt(req, "show_rawtype", element)) + element = "rawtype"; + if (element == NULL) + element = "type"; + + name = gctl_get_ascii(req, "class"); + if (name == NULL) + abort(); + nargs = gctl_get_int(req, "nargs"); + if (nargs == 1) { + error = geom_gettree_geom(&mesh, name, + gctl_get_ascii(req, "arg0"), 1); + } else + error = geom_gettree(&mesh); + if (error != 0) + errc(EXIT_FAILURE, error, "Cannot get GEOM tree"); + classp = find_class(&mesh, name); + if (classp == NULL) { + geom_deletetree(&mesh); + errx(EXIT_FAILURE, "Class %s not found.", name); + } + show_providers = gctl_get_int(req, "show_providers"); + if (nargs > 0) { + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + gp = find_geom(classp, name); + if (gp != NULL) + gpart_show_geom(gp, element, show_providers); + else + errx(EXIT_FAILURE, "No such geom: %s.", name); + } + } else { + LIST_FOREACH(gp, &classp->lg_geom, lg_geom) { + gpart_show_geom(gp, element, show_providers); + } + } + geom_deletetree(&mesh); +} + +static void +gpart_backup(struct gctl_req *req, unsigned int fl __unused) +{ + struct gmesh mesh; + struct gclass *classp; + struct gprovider *pp; + struct ggeom *gp; + const char *g, *s, *scheme; + off_t sector, end; + off_t length; + int error, i, windex, wblocks, wtype; + + if (gctl_get_int(req, "nargs") != 1) + errx(EXIT_FAILURE, "Invalid number of arguments."); + s = gctl_get_ascii(req, "class"); + if (s == NULL) + abort(); + g = gctl_get_ascii(req, "arg0"); + if (g == NULL) + abort(); + error = geom_gettree_geom(&mesh, s, g, 0); + if (error != 0) + errc(EXIT_FAILURE, error, "Cannot get GEOM tree"); + classp = find_class(&mesh, s); + if (classp == NULL) { + geom_deletetree(&mesh); + errx(EXIT_FAILURE, "Class %s not found.", s); + } + gp = find_geom(classp, g); + if (gp == NULL) + errx(EXIT_FAILURE, "No such geom: %s.", g); + scheme = find_geomcfg(gp, "scheme"); + if (scheme == NULL) + abort(); + s = find_geomcfg(gp, "last"); + if (s == NULL) + abort(); + wblocks = strlen(s); + wtype = 0; + LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { + s = find_provcfg(pp, "type"); + i = strlen(s); + if (i > wtype) + wtype = i; + } + s = find_geomcfg(gp, "entries"); + if (s == NULL) + abort(); + windex = strlen(s); + printf("%s %s\n", scheme, s); + LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { + s = find_provcfg(pp, "start"); + sector = (off_t)strtoimax(s, NULL, 0); + + s = find_provcfg(pp, "end"); + end = (off_t)strtoimax(s, NULL, 0); + length = end - sector + 1; + + s = find_provcfg(pp, "label"); + printf("%-*s %*s %*jd %*jd %s %s\n", + windex, find_provcfg(pp, "index"), + wtype, find_provcfg(pp, "type"), + wblocks, (intmax_t)sector, + wblocks, (intmax_t)length, + (s != NULL) ? s: "", fmtattrib(pp)); + } + geom_deletetree(&mesh); +} + +static int +skip_line(const char *p) +{ + + while (*p != '\0') { + if (*p == '#') + return (1); + if (isspace(*p) == 0) + return (0); + p++; + } + return (1); +} + +static void +gpart_sighndl(int sig __unused) +{ + undo_restore = 1; +} + +static void +gpart_restore(struct gctl_req *req, unsigned int fl __unused) +{ + struct gmesh mesh; + struct gclass *classp; + struct gctl_req *r; + struct ggeom *gp; + struct sigaction si_sa; + const char *s, *flags, *errstr, *label; + char **ap, *argv[6], line[BUFSIZ], *pline; + int error, forced, i, l, nargs, created, rl; + intmax_t n; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) + errx(EXIT_FAILURE, "Invalid number of arguments."); + + forced = gctl_get_int(req, "force"); + flags = gctl_get_ascii(req, "flags"); + rl = gctl_get_int(req, "restore_labels"); + s = gctl_get_ascii(req, "class"); + if (s == NULL) + abort(); + error = geom_gettree(&mesh); + if (error != 0) + errc(EXIT_FAILURE, error, "Cannot get GEOM tree"); + classp = find_class(&mesh, s); + if (classp == NULL) { + geom_deletetree(&mesh); + errx(EXIT_FAILURE, "Class %s not found.", s); + } + + sigemptyset(&si_sa.sa_mask); + si_sa.sa_flags = 0; + si_sa.sa_handler = gpart_sighndl; + if (sigaction(SIGINT, &si_sa, 0) == -1) + err(EXIT_FAILURE, "sigaction SIGINT"); + + if (forced) { + /* destroy existent partition table before restore */ + for (i = 0; i < nargs; i++) { + s = gctl_get_ascii(req, "arg%d", i); + gp = find_geom(classp, s); + if (gp != NULL) { + r = gctl_get_handle(); + gctl_ro_param(r, "class", -1, + classp->lg_name); + gctl_ro_param(r, "verb", -1, "destroy"); + gctl_ro_param(r, "flags", -1, "restore"); + gctl_ro_param(r, "force", sizeof(forced), + &forced); + gctl_ro_param(r, "arg0", -1, s); + errstr = gctl_issue(r); + if (errstr != NULL && errstr[0] != '\0') { + gpart_print_error(errstr); + gctl_free(r); + goto backout; + } + gctl_free(r); + } + } + } + created = 0; + while (undo_restore == 0 && + fgets(line, sizeof(line) - 1, stdin) != NULL) { + /* Format of backup entries: + * <scheme name> <number of entries> + * <index> <type> <start> <size> [label] ['['attrib[,attrib]']'] + */ + pline = (char *)line; + pline[strlen(line) - 1] = 0; + if (skip_line(pline)) + continue; + for (ap = argv; + (*ap = strsep(&pline, " \t")) != NULL;) + if (**ap != '\0' && ++ap >= &argv[6]) + break; + l = ap - &argv[0]; + label = pline = NULL; + if (l == 1 || l == 2) { /* create table */ + if (created) + errx(EXIT_FAILURE, "Incorrect backup format."); + if (l == 2) + n = strtoimax(argv[1], NULL, 0); + for (i = 0; i < nargs; i++) { + s = gctl_get_ascii(req, "arg%d", i); + r = gctl_get_handle(); + gctl_ro_param(r, "class", -1, + classp->lg_name); + gctl_ro_param(r, "verb", -1, "create"); + gctl_ro_param(r, "scheme", -1, argv[0]); + if (l == 2) + gctl_ro_param(r, "entries", + sizeof(n), &n); + gctl_ro_param(r, "flags", -1, "restore"); + gctl_ro_param(r, "arg0", -1, s); + errstr = gctl_issue(r); + if (errstr != NULL && errstr[0] != '\0') { + gpart_print_error(errstr); + gctl_free(r); + goto backout; + } + gctl_free(r); + } + created = 1; + continue; + } else if (l < 4 || created == 0) + errx(EXIT_FAILURE, "Incorrect backup format."); + else if (l == 5) { + if (strchr(argv[4], '[') == NULL) + label = argv[4]; + else + pline = argv[4]; + } else if (l == 6) { + label = argv[4]; + pline = argv[5]; + } + /* Add partitions to each table */ + for (i = 0; i < nargs; i++) { + s = gctl_get_ascii(req, "arg%d", i); + r = gctl_get_handle(); + n = strtoimax(argv[0], NULL, 0); + gctl_ro_param(r, "class", -1, classp->lg_name); + gctl_ro_param(r, "verb", -1, "add"); + gctl_ro_param(r, "flags", -1, "restore"); + gctl_ro_param(r, GPART_PARAM_INDEX, sizeof(n), &n); + gctl_ro_param(r, "type", -1, argv[1]); + gctl_ro_param(r, "start", -1, argv[2]); + gctl_ro_param(r, "size", -1, argv[3]); + if (rl != 0 && label != NULL) + gctl_ro_param(r, "label", -1, argv[4]); + gctl_ro_param(r, "alignment", -1, GPART_AUTOFILL); + gctl_ro_param(r, "arg0", -1, s); + error = gpart_autofill(r); + if (error != 0) + errc(EXIT_FAILURE, error, "autofill"); + errstr = gctl_issue(r); + if (errstr != NULL && errstr[0] != '\0') { + gpart_print_error(errstr); + gctl_free(r); + goto backout; + } + gctl_free(r); + } + if (pline == NULL || *pline != '[') + continue; + /* set attributes */ + pline++; + for (ap = argv; + (*ap = strsep(&pline, ",]")) != NULL;) + if (**ap != '\0' && ++ap >= &argv[6]) + break; + for (i = 0; i < nargs; i++) { + l = ap - &argv[0]; + s = gctl_get_ascii(req, "arg%d", i); + while (l > 0) { + r = gctl_get_handle(); + gctl_ro_param(r, "class", -1, classp->lg_name); + gctl_ro_param(r, "verb", -1, "set"); + gctl_ro_param(r, "flags", -1, "restore"); + gctl_ro_param(r, GPART_PARAM_INDEX, + sizeof(n), &n); + gctl_ro_param(r, "attrib", -1, argv[--l]); + gctl_ro_param(r, "arg0", -1, s); + errstr = gctl_issue(r); + if (errstr != NULL && errstr[0] != '\0') { + gpart_print_error(errstr); + gctl_free(r); + goto backout; + } + gctl_free(r); + } + } + } + if (undo_restore) + goto backout; + /* commit changes if needed */ + if (strchr(flags, 'C') != NULL) { + for (i = 0; i < nargs; i++) { + s = gctl_get_ascii(req, "arg%d", i); + r = gctl_get_handle(); + gctl_ro_param(r, "class", -1, classp->lg_name); + gctl_ro_param(r, "verb", -1, "commit"); + gctl_ro_param(r, "arg0", -1, s); + errstr = gctl_issue(r); + if (errstr != NULL && errstr[0] != '\0') { + gpart_print_error(errstr); + gctl_free(r); + goto backout; + } + gctl_free(r); + } + } + gctl_free(req); + geom_deletetree(&mesh); + exit(EXIT_SUCCESS); + +backout: + for (i = 0; i < nargs; i++) { + s = gctl_get_ascii(req, "arg%d", i); + r = gctl_get_handle(); + gctl_ro_param(r, "class", -1, classp->lg_name); + gctl_ro_param(r, "verb", -1, "undo"); + gctl_ro_param(r, "arg0", -1, s); + gctl_issue(r); + gctl_free(r); + } + gctl_free(req); + geom_deletetree(&mesh); + exit(EXIT_FAILURE); +} + +static void * +gpart_bootfile_read(const char *bootfile, ssize_t *size) +{ + struct stat sb; + void *code; + int fd; + + if (stat(bootfile, &sb) == -1) + err(EXIT_FAILURE, "%s", bootfile); + if (!S_ISREG(sb.st_mode)) + errx(EXIT_FAILURE, "%s: not a regular file", bootfile); + if (sb.st_size == 0) + errx(EXIT_FAILURE, "%s: empty file", bootfile); + if (*size > 0 && sb.st_size > *size) + errx(EXIT_FAILURE, "%s: file too big (%zu limit)", bootfile, + *size); + + *size = sb.st_size; + + fd = open(bootfile, O_RDONLY); + if (fd == -1) + err(EXIT_FAILURE, "%s", bootfile); + code = malloc(*size); + if (code == NULL) + err(EXIT_FAILURE, NULL); + if (read(fd, code, *size) != *size) + err(EXIT_FAILURE, "%s", bootfile); + close(fd); + + return (code); +} + +static void +gpart_write_partcode(struct gctl_req *req, int idx, void *code, ssize_t size) +{ + char dsf[128]; + struct gmesh mesh; + struct gclass *classp; + struct ggeom *gp; + struct gprovider *pp; + const char *g, *s; + char *buf; + off_t bsize; + int error, fd; + + s = gctl_get_ascii(req, "class"); + if (s == NULL) + abort(); + g = gctl_get_ascii(req, "arg0"); + if (g == NULL) + abort(); + error = geom_gettree_geom(&mesh, s, g, 0); + if (error != 0) + errc(EXIT_FAILURE, error, "Cannot get GEOM tree"); + classp = find_class(&mesh, s); + if (classp == NULL) { + geom_deletetree(&mesh); + errx(EXIT_FAILURE, "Class %s not found.", s); + } + gp = find_geom(classp, g); + if (gp == NULL) + errx(EXIT_FAILURE, "No such geom: %s.", g); + s = find_geomcfg(gp, "scheme"); + if (s == NULL) + errx(EXIT_FAILURE, "Scheme not found for geom %s", gp->lg_name); + + LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { + s = find_provcfg(pp, "index"); + if (s == NULL) + continue; + if (atoi(s) == idx) + break; + } + + if (pp != NULL) { + snprintf(dsf, sizeof(dsf), "/dev/%s", pp->lg_name); + if (pp->lg_mediasize < size) + errx(EXIT_FAILURE, "%s: not enough space", dsf); + fd = open(dsf, O_WRONLY); + if (fd == -1) + err(EXIT_FAILURE, "%s", dsf); + /* + * When writing to a disk device, the write must be + * sector aligned and not write to any partial sectors, + * so round up the buffer size to the next sector and zero it. + */ + bsize = (size + pp->lg_sectorsize - 1) / + pp->lg_sectorsize * pp->lg_sectorsize; + buf = calloc(1, bsize); + if (buf == NULL) + err(EXIT_FAILURE, "%s", dsf); + bcopy(code, buf, size); + if (write(fd, buf, bsize) != bsize) + err(EXIT_FAILURE, "%s", dsf); + free(buf); + close(fd); + printf("partcode written to %s\n", pp->lg_name); + } else + errx(EXIT_FAILURE, "invalid partition index"); + + geom_deletetree(&mesh); +} + +static void +gpart_bootcode(struct gctl_req *req, unsigned int fl) +{ + const char *s; + void *bootcode, *partcode; + size_t bootsize, partsize; + int error, idx; + + if (gctl_get_int(req, "nargs") != 1) + errx(EXIT_FAILURE, "Invalid number of arguments."); + + if (gctl_has_param(req, GPART_PARAM_BOOTCODE)) { + s = gctl_get_ascii(req, GPART_PARAM_BOOTCODE); + bootsize = 800 * 1024; /* Arbitrary limit. */ + bootcode = gpart_bootfile_read(s, &bootsize); + error = gctl_change_param(req, GPART_PARAM_BOOTCODE, bootsize, + bootcode); + if (error) + errc(EXIT_FAILURE, error, "internal error"); + } else + bootcode = NULL; + + if (!gctl_has_param(req, GPART_PARAM_PARTCODE)) { + if (bootcode == NULL) + errx(EXIT_FAILURE, "neither -b nor -p specified"); + if (gctl_has_param(req, GPART_PARAM_INDEX)) + errx(EXIT_FAILURE, "-i is only valid with -p"); + goto nopartcode; + } + + if (gctl_has_param(req, GPART_PARAM_INDEX)) { + idx = (int)gctl_get_intmax(req, GPART_PARAM_INDEX); + if (idx < 1) + errx(EXIT_FAILURE, "invalid partition index"); + error = gctl_delete_param(req, GPART_PARAM_INDEX); + if (error) + errc(EXIT_FAILURE, error, "internal error"); + } else + idx = 0; + + if (gctl_has_param(req, GPART_PARAM_PARTCODE)) { + s = gctl_get_ascii(req, GPART_PARAM_PARTCODE); + partsize = 1024 * 1024; /* Arbitrary limit. */ + partcode = gpart_bootfile_read(s, &partsize); + error = gctl_delete_param(req, GPART_PARAM_PARTCODE); + if (error) + errc(EXIT_FAILURE, error, "internal error"); + if (idx == 0) + errx(EXIT_FAILURE, "missing -i option"); + gpart_write_partcode(req, idx, partcode, partsize); + free(partcode); + } + +nopartcode: + if (bootcode != NULL) + gpart_issue(req, fl); +} + +static void +gpart_print_error(const char *errstr) +{ + char *errmsg; + int error; + + error = strtol(errstr, &errmsg, 0); + if (errmsg != errstr) { + while (errmsg[0] == ' ') + errmsg++; + if (errmsg[0] != '\0') + warnc(error, "%s", errmsg); + else + warnc(error, NULL); + } else + warnx("%s", errmsg); +} + +static _Noreturn void +gpart_issue(struct gctl_req *req, unsigned int fl __unused) +{ + char buf[4096]; + const char *errstr; + int error, status; + + if (gctl_get_int(req, "nargs") != 1) + errx(EXIT_FAILURE, "Invalid number of arguments."); + (void)gctl_delete_param(req, "nargs"); + + /* autofill parameters (if applicable). */ + error = gpart_autofill(req); + if (error) { + warnc(error, "autofill"); + status = EXIT_FAILURE; + goto done; + } + + buf[0] = '\0'; + gctl_add_param(req, "output", sizeof(buf), buf, + GCTL_PARAM_WR | GCTL_PARAM_ASCII); + errstr = gctl_issue(req); + if (errstr == NULL || errstr[0] == '\0') { + if (buf[0] != '\0') + printf("%s", buf); + status = EXIT_SUCCESS; + goto done; + } + + gpart_print_error(errstr); + status = EXIT_FAILURE; + + done: + gctl_free(req); + exit(status); +} diff --git a/lib/geom/part/gpart.8 b/lib/geom/part/gpart.8 new file mode 100644 index 000000000000..f76c1d9d5d6c --- /dev/null +++ b/lib/geom/part/gpart.8 @@ -0,0 +1,1517 @@ +.\" Copyright (c) 2007, 2008 Marcel Moolenaar +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd February 11, 2025 +.Dt GPART 8 +.Os +.Sh NAME +.Nm gpart +.Nd "control utility for the disk partitioning GEOM class" +.Sh SYNOPSIS +.\" ==== ADD ==== +.Nm +.Cm add +.Fl t Ar type +.Op Fl a Ar alignment +.Op Fl b Ar start +.Op Fl s Ar size +.Op Fl i Ar index +.Op Fl l Ar label +.Op Fl f Ar flags +.Ar geom +.\" ==== BACKUP ==== +.Nm +.Cm backup +.Ar geom +.\" ==== BOOTCODE ==== +.Nm +.Cm bootcode +.Op Fl N +.Op Fl b Ar bootcode +.Op Fl p Ar partcode Fl i Ar index +.Op Fl f Ar flags +.Ar geom +.\" ==== COMMIT ==== +.Nm +.Cm commit +.Ar geom +.\" ==== CREATE ==== +.Nm +.Cm create +.Fl s Ar scheme +.Op Fl n Ar entries +.Op Fl f Ar flags +.Ar provider +.\" ==== DELETE ==== +.Nm +.Cm delete +.Fl i Ar index +.Op Fl f Ar flags +.Ar geom +.\" ==== DESTROY ==== +.Nm +.Cm destroy +.Op Fl F +.Op Fl f Ar flags +.Ar geom +.\" ==== MODIFY ==== +.Nm +.Cm modify +.Fl i Ar index +.Op Fl l Ar label +.Op Fl t Ar type +.Op Fl f Ar flags +.Ar geom +.\" ==== RECOVER ==== +.Nm +.Cm recover +.Op Fl f Ar flags +.Ar geom +.\" ==== RESIZE ==== +.Nm +.Cm resize +.Fl i Ar index +.Op Fl a Ar alignment +.Op Fl s Ar size +.Op Fl f Ar flags +.Ar geom +.\" ==== RESTORE ==== +.Nm +.Cm restore +.Op Fl lF +.Op Fl f Ar flags +.Ar provider +.Op Ar ... +.\" ==== SET ==== +.Nm +.Cm set +.Fl a Ar attrib +.Fl i Ar index +.Op Fl f Ar flags +.Ar geom +.\" ==== SHOW ==== +.Nm +.Cm show +.Op Fl l | r +.Op Fl p +.Op Ar geom ... +.\" ==== UNDO ==== +.Nm +.Cm undo +.Ar geom +.\" ==== UNSET ==== +.Nm +.Cm unset +.Fl a Ar attrib +.Fl i Ar index +.Op Fl f Ar flags +.Ar geom +.\" +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used to partition GEOM providers, normally disks. +The first argument is the action to be taken: +.Bl -tag -width ".Cm bootcode" +.\" ==== ADD ==== +.It Cm add +Add a new partition to the partitioning scheme given by +.Ar geom . +The partition type must be specified with +.Fl t Ar type . +The partition's location, size, and other attributes will be calculated +automatically if the corresponding options are not specified. +.Pp +The +.Cm add +command accepts these options: +.Bl -tag -width 12n +.It Fl a Ar alignment +If specified, then the +.Nm +utility tries to align +.Ar start +offset and partition +.Ar size +to be multiple of +.Ar alignment +value. +.It Fl b Ar start +The logical block address where the partition will begin. +An SI unit suffix is allowed. +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.It Fl i Ar index +The index in the partition table at which the new partition is to be +placed. +The index determines the name of the device special file used +to represent the partition. +.It Fl l Ar label +The label attached to the partition. +This option is only valid when used on partitioning schemes that support +partition labels. +.It Fl s Ar size +Create a partition of size +.Ar size . +An SI unit suffix is allowed. +.It Fl t Ar type +Create a partition of type +.Ar type . +Partition types are discussed below in the section entitled +.Sx "PARTITION TYPES" . +.El +.\" ==== BACKUP ==== +.It Cm backup +Dump a partition table to standard output in a special format used by the +.Cm restore +action. +.\" ==== BOOTCODE ==== +.It Cm bootcode +Embed bootstrap code into the partitioning scheme's metadata on the +.Ar geom +(using +.Fl b Ar bootcode ) +or write bootstrap code into a partition (using +.Fl p Ar partcode +and +.Fl i Ar index ) . +.Pp +The +.Cm bootcode +command accepts these options: +.Bl -tag -width 10n +.It Fl N +Do not preserve the Volume Serial Number for MBR. +MBR bootcode contains Volume Serial Number by default, and +.Nm +tries to preserve it when installing new bootstrap code. +This option skips preservation to help with some versions of +.Xr boot0cfg 8 +that do not support Volume Serial Number. +.It Fl b Ar bootcode +Embed bootstrap code from the file +.Ar bootcode +into the partitioning scheme's metadata for +.Ar geom . +Not all partitioning schemes have embedded bootstrap code, so the +.Fl b Ar bootcode +option is scheme-specific in nature (see the section entitled +.Sx BOOTSTRAPPING +below). +The +.Ar bootcode +file must match the partitioning scheme's requirements for file content +and size. +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.It Fl i Ar index +Specify the target partition for +.Fl p Ar partcode . +.It Fl p Ar partcode +Write the bootstrap code from the file +.Ar partcode +into the +.Ar geom +partition specified by +.Fl i Ar index . +The size of the file must be smaller than the size of the partition. +.El +.\" ==== COMMIT ==== +.It Cm commit +Commit any pending changes for geom +.Ar geom . +All actions are committed by default and will not result in +pending changes. +Actions can be modified with the +.Fl f Ar flags +option so that they are not committed, but become pending. +Pending changes are reflected by the geom and the +.Nm +utility, but they are not actually written to disk. +The +.Cm commit +action will write all pending changes to disk. +.\" ==== CREATE ==== +.It Cm create +Create a new partitioning scheme on a provider given by +.Ar provider . +The scheme to use must be specified with the +.Fl s Ar scheme +option. +.Pp +The +.Cm create +command accepts these options: +.Bl -tag -width 10n +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.It Fl n Ar entries +The number of entries in the partition table. +Every partitioning scheme has a minimum and maximum number of entries. +This option allows tables to be created with a number of entries +that is within the limits. +Some schemes have a maximum equal to the minimum and some schemes have +a maximum large enough to be considered unlimited. +By default, partition tables are created with the minimum number of +entries. +.It Fl s Ar scheme +Specify the partitioning scheme to use. +The kernel must have support for a particular scheme before +that scheme can be used to partition a disk. +.El +.\" ==== DELETE ==== +.It Cm delete +Delete a partition from geom +.Ar geom +and further identified by the +.Fl i Ar index +option. +The partition cannot be actively used by the kernel. +.Pp +The +.Cm delete +command accepts these options: +.Bl -tag -width 10n +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.It Fl i Ar index +Specifies the index of the partition to be deleted. +.El +.\" ==== DESTROY ==== +.It Cm destroy +Destroy the partitioning scheme as implemented by geom +.Ar geom . +.Pp +The +.Cm destroy +command accepts these options: +.Bl -tag -width 10n +.It Fl F +Forced destroying of the partition table even if it is not empty. +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.El +.\" ==== MODIFY ==== +.It Cm modify +Modify a partition from geom +.Ar geom +and further identified by the +.Fl i Ar index +option. +Only the type and/or label of the partition can be modified. +Not all partitioning schemes support labels and it is invalid to +try to change a partition label in such cases. +.Pp +The +.Cm modify +command accepts these options: +.Bl -tag -width 10n +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.It Fl i Ar index +Specifies the index of the partition to be modified. +.It Fl l Ar label +Change the partition label to +.Ar label . +.It Fl t Ar type +Change the partition type to +.Ar type . +.El +.\" ==== RECOVER ==== +.It Cm recover +Recover a corrupt partition's scheme metadata on the geom +.Ar geom . +See the section entitled +.Sx RECOVERING +below for the additional information. +.Pp +The +.Cm recover +command accepts these options: +.Bl -tag -width 10n +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.El +.\" ==== RESIZE ==== +.It Cm resize +Resize a partition from geom +.Ar geom +and further identified by the +.Fl i Ar index +option. +If the new size is not specified it is automatically calculated +to be the maximum available from +.Ar geom . +.Pp +The +.Cm resize +command accepts these options: +.Bl -tag -width 12n +.It Fl a Ar alignment +If specified, then the +.Nm +utility tries to align partition +.Ar size +to be a multiple of the +.Ar alignment +value. +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.It Fl i Ar index +Specifies the index of the partition to be resized. +.It Fl s Ar size +Specifies the new size of the partition, in logical blocks. +An SI unit suffix is allowed. +.El +.\" ==== RESTORE ==== +.It Cm restore +Restore the partition table from a backup previously created by the +.Cm backup +action and read from standard input. +Only the partition table is restored. +This action does not affect the content of partitions. +After restoring the partition table and writing bootcode if needed, +user data must be restored from backup. +.Pp +The +.Cm restore +command accepts these options: +.Bl -tag -width 10n +.It Fl F +Destroy partition table on the given +.Ar provider +before doing restore. +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.It Fl l +Restore partition labels for partitioning schemes that support them. +.El +.\" ==== SET ==== +.It Cm set +Set the named attribute on the partition entry. +See the section entitled +.Sx ATTRIBUTES +below for a list of available attributes. +.Pp +The +.Cm set +command accepts these options: +.Bl -tag -width 10n +.It Fl a Ar attrib +Specifies the attribute to set. +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.It Fl i Ar index +Specifies the index of the partition on which the attribute will be set. +.El +.\" ==== SHOW ==== +.It Cm show +Show current partition information for the specified geoms, or all +geoms if none are specified. +The default output includes the logical starting block of each +partition, the partition size in blocks, the partition index number, +the partition type, and a human readable partition size. +Block sizes and locations are based on the device's Sectorsize +as shown by +.Cm gpart list . +.Pp +The +.Cm show +command accepts these options: +.Bl -tag -width 10n +.It Fl l +For partitioning schemes that support partition labels, print them +instead of partition type. +.It Fl p +Show provider names instead of partition indexes. +.It Fl r +Show raw partition type instead of symbolic name. +.El +.\" ==== UNDO ==== +.It Cm undo +Revert any pending changes for geom +.Ar geom . +This action is the opposite of the +.Cm commit +action and can be used to undo any changes that have not been committed. +.\" ==== UNSET ==== +.It Cm unset +Clear the named attribute on the partition entry. +See the section entitled +.Sx ATTRIBUTES +below for a list of available attributes. +.Pp +The +.Cm unset +command accepts these options: +.Bl -tag -width 10n +.It Fl a Ar attrib +Specifies the attribute to clear. +.It Fl f Ar flags +Additional operational flags. +See the section entitled +.Sx "OPERATIONAL FLAGS" +below for a discussion +about its use. +.It Fl i Ar index +Specifies the index of the partition on which the attribute will be cleared. +.El +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Sh PARTITIONING SCHEMES +Several partitioning schemes are supported by the +.Nm +utility: +.Bl -tag -width ".Cm BSD64" +.It Cm APM +Apple Partition Map, used by PowerPC(R) Macintosh(R) computers. +Requires the +.Cd GEOM_PART_APM +kernel option. +.It Cm BSD +Traditional BSD +.Xr disklabel 8 , +usually used to subdivide MBR partitions. +.Po +This scheme can also be used as the sole partitioning method, without +an MBR. +Partition editing tools from other operating systems often do not +understand the bare disklabel partition layout, so this is sometimes +called +.Dq dangerously dedicated . +.Pc +Requires the +.Cm GEOM_PART_BSD +kernel option. +.It Cm BSD64 +64-bit implementation of BSD disklabel used in +.Dx +to subdivide MBR +or GPT partitions. +Requires the +.Cm GEOM_PART_BSD64 +kernel option. +.It Cm LDM +The Logical Disk Manager is an implementation of volume manager for +Microsoft Windows NT. +Requires the +.Cd GEOM_PART_LDM +kernel option. +.It Cm GPT +GUID Partition Table is used on Intel-based Macintosh computers and +gradually replacing MBR on most PCs and other systems. +Requires the +.Cm GEOM_PART_GPT +kernel option. +.It Cm MBR +Master Boot Record is used on PCs and removable media. +Requires the +.Cm GEOM_PART_MBR +kernel option. +The +.Cm GEOM_PART_EBR +option adds support for the Extended Boot Record (EBR), +which is used to define a logical partition. +The +.Cm GEOM_PART_EBR_COMPAT +option enables backward compatibility for partition names +in the EBR scheme. +It also prevents any type of actions on such partitions. +.El +.Pp +See +.Xr glabel 8 +for additional information on labelization of devices and partitions. +.Sh PARTITION TYPES +Partition types are identified on disk by particular strings or magic +values. +The +.Nm +utility uses symbolic names for common partition types so the user +does not need to know these values or other details of the partitioning +scheme in question. +The +.Nm +utility also allows the user to specify scheme-specific partition types +for partition types that do not have symbolic names. +Symbolic names currently understood and used by +.Fx +are: +.Bl -tag -width ".Cm dragonfly-disklabel64" +.It Cm apple-boot +The system partition dedicated to storing boot loaders on some Apple +systems. +The scheme-specific types are +.Qq Li "!171" +for MBR, +.Qq Li "!Apple_Bootstrap" +for APM, and +.Qq Li "!426f6f74-0000-11aa-aa11-00306543ecac" +for GPT. +.It Cm bios-boot +The system partition dedicated to second stage of the boot loader program. +Usually it is used by the GRUB 2 loader for GPT partitioning schemes. +The scheme-specific type is +.Qq Li "!21686148-6449-6E6F-744E-656564454649" . +.It Cm efi +The system partition for computers that use the Extensible Firmware +Interface (EFI). +The scheme-specific types are +.Qq Li "!239" +for MBR, and +.Qq Li "!c12a7328-f81f-11d2-ba4b-00a0c93ec93b" +for GPT. +.It Cm freebsd +A +.Fx +partition subdivided into filesystems with a +.Bx +disklabel. +This is a legacy partition type and should not be used for the APM +or GPT schemes. +The scheme-specific types are +.Qq Li "!165" +for MBR, +.Qq Li "!FreeBSD" +for APM, and +.Qq Li "!516e7cb4-6ecf-11d6-8ff8-00022d09712b" +for GPT. +.It Cm freebsd-boot +A +.Fx +partition dedicated to bootstrap code. +The scheme-specific type is +.Qq Li "!83bd6b9d-7f41-11dc-be0b-001560b84f0f" +for GPT. +.It Cm freebsd-swap +A +.Fx +partition dedicated to swap space. +The scheme-specific types are +.Qq Li "!FreeBSD-swap" +for APM, and +.Qq Li "!516e7cb5-6ecf-11d6-8ff8-00022d09712b" +for GPT. +.It Cm freebsd-ufs +A +.Fx +partition that contains a UFS or UFS2 filesystem. +The scheme-specific types are +.Qq Li "!FreeBSD-UFS" +for APM, and +.Qq Li "!516e7cb6-6ecf-11d6-8ff8-00022d09712b" +for GPT. +.It Cm freebsd-zfs +A +.Fx +partition that contains a ZFS volume. +The scheme-specific types are +.Qq Li "!FreeBSD-ZFS" +for APM, and +.Qq Li "!516e7cba-6ecf-11d6-8ff8-00022d09712b" +for GPT. +.El +.Pp +Other symbolic names that can be used with the +.Nm +utility are: +.Bl -tag -width ".Cm dragonfly-disklabel64" +.It Cm apple-apfs +An Apple macOS partition used for the Apple file system, APFS. +.It Cm apple-core-storage +An Apple Mac OS X partition used by logical volume manager known as +Core Storage. +The scheme-specific type is +.Qq Li "!53746f72-6167-11aa-aa11-00306543ecac" +for GPT. +.It Cm apple-hfs +An Apple Mac OS X partition that contains a HFS or HFS+ filesystem. +The scheme-specific types are +.Qq Li "!175" +for MBR, +.Qq Li "!Apple_HFS" +for APM and +.Qq Li "!48465300-0000-11aa-aa11-00306543ecac" +for GPT. +.It Cm apple-label +An Apple Mac OS X partition dedicated to partition metadata that descibes +disk device. +The scheme-specific type is +.Qq Li "!4c616265-6c00-11aa-aa11-00306543ecac" +for GPT. +.It Cm apple-raid +An Apple Mac OS X partition used in a software RAID configuration. +The scheme-specific type is +.Qq Li "!52414944-0000-11aa-aa11-00306543ecac" +for GPT. +.It Cm apple-raid-offline +An Apple Mac OS X partition used in a software RAID configuration. +The scheme-specific type is +.Qq Li "!52414944-5f4f-11aa-aa11-00306543ecac" +for GPT. +.It Cm apple-tv-recovery +An Apple Mac OS X partition used by Apple TV. +The scheme-specific type is +.Qq Li "!5265636f-7665-11aa-aa11-00306543ecac" +for GPT. +.It Cm apple-ufs +An Apple Mac OS X partition that contains a UFS filesystem. +The scheme-specific types are +.Qq Li "!168" +for MBR, +.Qq Li "!Apple_UNIX_SVR2" +for APM and +.Qq Li "!55465300-0000-11aa-aa11-00306543ecac" +for GPT. +.It Cm apple-zfs +An Apple Mac OS X partition that contains a ZFS volume. +The scheme-specific type is +.Qq Li "!6a898cc3-1dd2-11b2-99a6-080020736631" +for GPT. +The same GUID is being used also for +.Sy illumos/Solaris /usr partition . +See +.Sx CAVEATS +section below. +.It Cm dragonfly-label32 +A +.Dx +partition subdivided into filesystems with a +.Bx +disklabel. +The scheme-specific type is +.Qq Li "!9d087404-1ca5-11dc-8817-01301bb8a9f5" +for GPT. +.It Cm dragonfly-label64 +A +.Dx +partition subdivided into filesystems with a +disklabel64. +The scheme-specific type is +.Qq Li "!3d48ce54-1d16-11dc-8696-01301bb8a9f5" +for GPT. +.It Cm dragonfly-legacy +A legacy partition type used in +.Dx . +The scheme-specific type is +.Qq Li "!bd215ab2-1d16-11dc-8696-01301bb8a9f5" +for GPT. +.It Cm dragonfly-ccd +A +.Dx +partition used with Concatenated Disk driver. +The scheme-specific type is +.Qq Li "!dbd5211b-1ca5-11dc-8817-01301bb8a9f5" +for GPT. +.It Cm dragonfly-hammer +A +.Dx +partition that contains a Hammer filesystem. +The scheme-specific type is +.Qq Li "!61dc63ac-6e38-11dc-8513-01301bb8a9f5" +for GPT. +.It Cm dragonfly-hammer2 +A +.Dx +partition that contains a Hammer2 filesystem. +The scheme-specific type is +.Qq Li "!5cbb9ad1-862d-11dc-a94d-01301bb8a9f5" +for GPT. +.It Cm dragonfly-swap +A +.Dx +partition dedicated to swap space. +The scheme-specific type is +.Qq Li "!9d58fdbd-1ca5-11dc-8817-01301bb8a9f5" +for GPT. +.It Cm dragonfly-ufs +A +.Dx +partition that contains an UFS1 filesystem. +The scheme-specific type is +.Qq Li "!9d94ce7c-1ca5-11dc-8817-01301bb8a9f5" +for GPT. +.It Cm dragonfly-vinum +A +.Dx +partition used with Logical Volume Manager. +The scheme-specific type is +.Qq Li "!9dd4478f-1ca5-11dc-8817-01301bb8a9f5" +for GPT. +.It Cm ebr +A partition subdivided into filesystems with a EBR. +The scheme-specific type is +.Qq Li "!5" +for MBR. +.It Cm fat16 +A partition that contains a FAT16 filesystem. +The scheme-specific type is +.Qq Li "!6" +for MBR. +.It Cm fat32 +A partition that contains a FAT32 filesystem. +The scheme-specific type is +.Qq Li "!11" +for MBR. +.It Cm fat32lba +A partition that contains a FAT32 (LBA) filesystem. +The scheme-specific type is +.Qq Li "!12" +for MBR. +.It Cm hifive-fsbl +A raw partition containing a HiFive first stage bootloader. +The scheme-specific type is +.Qq Li "!5b193300-fc78-40cd-8002-e86c45580b47" +for GPT. +.It Cm hifive-bbl +A raw partition containing a HiFive second stage bootloader. +The scheme-specific type is +.Qq Li "!2e54b353-1271-4842-806f-e436d6af6985" +for GPT. +.It Cm linux-data +A Linux partition that contains some filesystem with data. +The scheme-specific types are +.Qq Li "!131" +for MBR and +.Qq Li "!0fc63daf-8483-4772-8e79-3d69d8477de4" +for GPT. +.It Cm linux-lvm +A Linux partition dedicated to Logical Volume Manager. +The scheme-specific types are +.Qq Li "!142" +for MBR and +.Qq Li "!e6d6d379-f507-44c2-a23c-238f2a3df928" +for GPT. +.It Cm linux-raid +A Linux partition used in a software RAID configuration. +The scheme-specific types are +.Qq Li "!253" +for MBR and +.Qq Li "!a19d880f-05fc-4d3b-a006-743f0f84911e" +for GPT. +.It Cm linux-swap +A Linux partition dedicated to swap space. +The scheme-specific types are +.Qq Li "!130" +for MBR and +.Qq Li "!0657fd6d-a4ab-43c4-84e5-0933c84b4f4f" +for GPT. +.It Cm mbr +A partition that is sub-partitioned by a Master Boot Record (MBR). +This type is known as +.Qq Li "!024dee41-33e7-11d3-9d69-0008c781f39f" +by GPT. +.It Cm ms-basic-data +A basic data partition (BDP) for Microsoft operating systems. +In the GPT this type is the equivalent to partition types +.Cm fat16 , fat32 +and +.Cm ntfs +in MBR. +This type is used for GPT exFAT partitions. +The scheme-specific type is +.Qq Li "!ebd0a0a2-b9e5-4433-87c0-68b6b72699c7" +for GPT. +.It Cm ms-ldm-data +A partition that contains Logical Disk Manager (LDM) volumes. +The scheme-specific types are +.Qq Li "!66" +for MBR, +.Qq Li "!af9b60a0-1431-4f62-bc68-3311714a69ad" +for GPT. +.It Cm ms-ldm-metadata +A partition that contains Logical Disk Manager (LDM) database. +The scheme-specific type is +.Qq Li "!5808c8aa-7e8f-42e0-85d2-e1e90434cfb3" +for GPT. +.It Cm netbsd-ccd +A +.Nx +partition used with Concatenated Disk driver. +The scheme-specific type is +.Qq Li "!2db519c4-b10f-11dc-b99b-0019d1879648" +for GPT. +.It Cm netbsd-cgd +An encrypted +.Nx +partition. +The scheme-specific type is +.Qq Li "!2db519ec-b10f-11dc-b99b-0019d1879648" +for GPT. +.It Cm netbsd-ffs +A +.Nx +partition that contains an UFS filesystem. +The scheme-specific type is +.Qq Li "!49f48d5a-b10e-11dc-b99b-0019d1879648" +for GPT. +.It Cm netbsd-lfs +A +.Nx +partition that contains an LFS filesystem. +The scheme-specific type is +.Qq Li "!49f48d82-b10e-11dc-b99b-0019d1879648" +for GPT. +.It Cm netbsd-raid +A +.Nx +partition used in a software RAID configuration. +The scheme-specific type is +.Qq Li "!49f48daa-b10e-11dc-b99b-0019d1879648" +for GPT. +.It Cm netbsd-swap +A +.Nx +partition dedicated to swap space. +The scheme-specific type is +.Qq Li "!49f48d32-b10e-11dc-b99b-0019d1879648" +for GPT. +.It Cm ntfs +A partition that contains a NTFS or exFAT filesystem. +The scheme-specific type is +.Qq Li "!7" +for MBR. +.It Cm prep-boot +The system partition dedicated to storing boot loaders on some PowerPC systems, +notably those made by IBM. +The scheme-specific types are +.Qq Li "!65" +for MBR and +.Qq Li "!9e1a2d38-c612-4316-aa26-8b49521e5a8b" +for GPT. +.It Cm solaris-boot +A illumos/Solaris partition dedicated to boot loader. +The scheme-specific type is +.Qq Li "!6a82cb45-1dd2-11b2-99a6-080020736631" +for GPT. +.It Cm solaris-root +A illumos/Solaris partition dedicated to root filesystem. +The scheme-specific type is +.Qq Li "!6a85cf4d-1dd2-11b2-99a6-080020736631" +for GPT. +.It Cm solaris-swap +A illumos/Solaris partition dedicated to swap. +The scheme-specific type is +.Qq Li "!6a87c46f-1dd2-11b2-99a6-080020736631" +for GPT. +.It Cm solaris-backup +A illumos/Solaris partition dedicated to backup. +The scheme-specific type is +.Qq Li "!6a8b642b-1dd2-11b2-99a6-080020736631" +for GPT. +.It Cm solaris-var +A illumos/Solaris partition dedicated to /var filesystem. +The scheme-specific type is +.Qq Li "!6a8ef2e9-1dd2-11b2-99a6-080020736631" +for GPT. +.It Cm solaris-home +A illumos/Solaris partition dedicated to /home filesystem. +The scheme-specific type is +.Qq Li "!6a90ba39-1dd2-11b2-99a6-080020736631" +for GPT. +.It Cm solaris-altsec +A illumos/Solaris partition dedicated to alternate sector. +The scheme-specific type is +.Qq Li "!6a9283a5-1dd2-11b2-99a6-080020736631" +for GPT. +.It Cm solaris-reserved +A illumos/Solaris partition dedicated to reserved space. +The scheme-specific type is +.Qq Li "!6a945a3b-1dd2-11b2-99a6-080020736631" +for GPT. +.It Cm u-boot-env +A raw partition dedicated to U-Boot for storing its environment. +The scheme-specific type is +.Qq Li "!3de21764-95bd-54bd-a5c3-4abe786f38a8" +for GPT. +.It Cm vmware-vmfs +A partition that contains a VMware File System (VMFS). +The scheme-specific types are +.Qq Li "!251" +for MBR and +.Qq Li "!aa31e02a-400f-11db-9590-000c2911d1b8" +for GPT. +.It Cm vmware-vmkdiag +A partition that contains a VMware diagostic filesystem. +The scheme-specific types are +.Qq Li "!252" +for MBR and +.Qq Li "!9d275380-40ad-11db-bf97-000c2911d1b8" +for GPT. +.It Cm vmware-reserved +A VMware reserved partition. +The scheme-specific type is +.Qq Li "!9198effc-31c0-11db-8f-78-000c2911d1b8" +for GPT. +.It Cm vmware-vsanhdr +A partition claimed by VMware VSAN. +The scheme-specific type is +.Qq Li "!381cfccc-7288-11e0-92ee-000c2911d0b2" +for GPT. +.El +.Sh ATTRIBUTES +The scheme-specific attributes for EBR: +.Bl -tag -width ".Cm active" +.It Cm active +.El +.Pp +The scheme-specific attributes for GPT: +.Bl -tag -width ".Cm bootfailed" +.It Cm bootme +When set, the +.Nm gptboot +stage 1 boot loader will try to boot the system from this partition. +Multiple partitions can be marked with the +.Cm bootme +attribute. +See +.Xr gptboot 8 +for more details. +.It Cm bootonce +Setting this attribute automatically sets the +.Cm bootme +attribute. +When set, the +.Nm gptboot +stage 1 boot loader will try to boot the system from this partition only once. +Multiple partitions can be marked with the +.Cm bootonce +and +.Cm bootme +attribute pairs. +See +.Xr gptboot 8 +for more details. +.It Cm bootfailed +This attribute should not be manually managed. +It is managed by the +.Nm gptboot +stage 1 boot loader and the +.Pa /etc/rc.d/gptboot +start-up script. +See +.Xr gptboot 8 +for more details. +.It Cm lenovofix +Setting this attribute overwrites the Protective MBR with a new one where +the 0xee partition is the second, rather than the first record. +This resolves a BIOS compatibility issue with some Lenovo models including the +X220, T420, and T520, allowing them to boot from GPT partitioned disks +without using EFI. +.El +.Pp +The scheme-specific attributes for MBR: +.Bl -tag -width ".Cm active" +.It Cm active +.El +.Sh BOOTSTRAPPING +.Fx +supports several partitioning schemes and each scheme uses different +bootstrap code. +The bootstrap code is located in a specific disk area for each partitioning +scheme, and may vary in size for different schemes. +.Pp +Bootstrap code can be separated into two types. +The first type is embedded in the partitioning scheme's metadata, while the +second type is located on a specific partition. +Embedding bootstrap code should only be done with the +.Cm gpart bootcode +command with the +.Fl b Ar bootcode +option. +The GEOM PART class knows how to safely embed bootstrap code into +specific partitioning scheme metadata without causing any damage. +.Pp +The Master Boot Record (MBR) uses a 512-byte bootstrap code image, embedded +into the partition table's metadata area. +There are two variants of this bootstrap code: +.Pa /boot/mbr +and +.Pa /boot/boot0 . +.Pa /boot/mbr +searches for a partition with the +.Cm active +attribute (see the +.Sx ATTRIBUTES +section) in the partition table. +Then it runs next bootstrap stage. +The +.Pa /boot/boot0 +image contains a boot manager with some additional interactive functions +for multi-booting from a user-selected partition. +.Pp +A BSD disklabel is usually created inside an MBR partition (slice) +with type +.Cm freebsd +(see the +.Sx "PARTITION TYPES" +section). +It uses 8 KB size bootstrap code image +.Pa /boot/boot , +embedded into the partition table's metadata area. +.Pp +Both types of bootstrap code are used to boot from the GUID Partition Table. +First, a protective MBR is embedded into the first disk sector from the +.Pa /boot/pmbr +image. +It searches through the GPT for a +.Cm freebsd-boot +partition (see the +.Sx "PARTITION TYPES" +section) and runs the next bootstrap stage from it. +The +.Cm freebsd-boot +partition should be smaller than 545 KB. +It can be located either before or after other +.Fx +partitions on the disk. +There are two variants of bootstrap code to write to this partition: +.Pa /boot/gptboot +and +.Pa /boot/gptzfsboot . +.Pp +.Pa /boot/gptboot +is used to boot from UFS partitions. +.Cm gptboot +searches through +.Cm freebsd-ufs +partitions in the GPT and selects one to boot based on the +.Cm bootonce +and +.Cm bootme +attributes. +If neither attribute is found, +.Pa /boot/gptboot +boots from the first +.Cm freebsd-ufs +partition. +.Pa /boot/loader +.Pq the third bootstrap stage +is loaded from the first partition that matches these conditions. +See +.Xr gptboot 8 +for more information. +.Pp +.Pa /boot/gptzfsboot +is used to boot from ZFS. +It searches through the GPT for +.Cm freebsd-zfs +partitions, trying to detect ZFS pools. +After all pools are detected, +.Pa /boot/loader +is started from the first one found set as bootable. +.Pp +The APM scheme also does not support embedding bootstrap code. +Instead, the 800 KBytes bootstrap code image +.Pa /boot/boot1.hfs +should be written with the +.Cm gpart bootcode +command to a partition of type +.Cm apple-boot , +which should also be 800 KB in size. +.Sh OPERATIONAL FLAGS +Actions other than the +.Cm commit +and +.Cm undo +actions take an optional +.Fl f Ar flags +option. +This option is used to specify action-specific operational flags. +By default, the +.Nm +utility defines the +.Ql C +flag so that the action is immediately +committed. +The user can specify +.Dq Fl f Cm x +to have the action result in a pending change that can later, with +other pending changes, be committed as a single compound change with +the +.Cm commit +action or reverted with the +.Cm undo +action. +.Sh RECOVERING +The GEOM PART class supports recovering of partition tables only for GPT. +The GPT primary metadata is stored at the beginning of the device. +For redundancy, a secondary +.Pq backup +copy of the metadata is stored at the end of the device. +As a result of having two copies, some corruption of metadata is not +fatal to the working of GPT. +When the kernel detects corrupt metadata, it marks this table as corrupt +and reports the problem. +.Cm destroy +and +.Cm recover +are the only operations allowed on corrupt tables. +.Pp +If one GPT header appears to be corrupt but the other copy remains intact, +the kernel will log the following: +.Bd -literal -offset indent +GEOM: provider: the primary GPT table is corrupt or invalid. +GEOM: provider: using the secondary instead -- recovery strongly advised. +.Ed +.Pp +or +.Bd -literal -offset indent +GEOM: provider: the secondary GPT table is corrupt or invalid. +GEOM: provider: using the primary only -- recovery suggested. +.Ed +.Pp +Also +.Nm +commands such as +.Cm show , status +and +.Cm list +will report about corrupt tables. +.Pp +If the size of the device has changed (e.g.,\& volume expansion) the +secondary GPT header will no longer be located in the last sector. +This is not a metadata corruption, but it is dangerous because any +corruption of the primary GPT will lead to loss of the partition table. +This problem is reported by the kernel with the message: +.Bd -literal -offset indent +GEOM: provider: the secondary GPT header is not in the last LBA. +.Ed +.Pp +This situation can be recovered with the +.Cm recover +command. +This command reconstructs the corrupt metadata using known valid +metadata and relocates the secondary GPT to the end of the device. +.Pp +.Em NOTE : +The GEOM PART class can detect the same partition table visible through +different GEOM providers, and some of them will be marked as corrupt. +Be careful when choosing a provider for recovery. +If you choose incorrectly you can destroy the metadata of another GEOM class, +e.g.,\& GEOM MIRROR or GEOM LABEL. +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to control the behavior of the +.Nm PART +GEOM class. +The default value is shown next to each variable. +.Bl -tag -width indent +.It Va kern.geom.part.allow_nesting : No 0 +By default, some schemes (currently BSD and BSD64) do not permit +further nested partitioning. +This variable overrides this restriction and allows arbitrary nesting (except +within partitions created at offset 0). +Some schemes have their own separate checks, for which see below. +.It Va kern.geom.part.auto_resize : No 1 +This variable controls automatic resize behavior of the +.Nm PART +GEOM class. +When this variable is enable and new size of provider is detected, the schema +metadata is resized but all changes are not saved to disk, until +.Cm gpart commit +is run to confirm changes. +This behavior is also reported with diagnostic message: +.Sy "GEOM_PART: (provider) was automatically resized." +.Sy "Use `gpart commit (provider)` to save changes or `gpart undo (provider)`" +.Sy "to revert them." +.It Va kern.geom.part.check_integrity : No 1 +This variable controls the behaviour of metadata integrity checks. +When integrity checks are enabled, the +.Nm PART +GEOM class verifies all generic partition parameters obtained from the +disk metadata. +If some inconsistency is detected, the partition table will be +rejected with a diagnostic message: +.Sy "GEOM_PART: Integrity check failed (provider, scheme)" . +.It Va kern.geom.part.gpt.allow_nesting : No 0 +By default the GPT scheme is allowed only at the outermost nesting level. +This variable allows this restriction to be removed. +.It Va kern.geom.part.ldm.debug : No 0 +Debug level of the Logical Disk Manager (LDM) module. +This can be set to a number between 0 and 2 inclusive. +If set to 0 minimal debug information is printed, +and if set to 2 the maximum amount of debug information is printed. +.It Va kern.geom.part.ldm.show_mirrors : No 0 +This variable controls how the Logical Disk Manager (LDM) module handles +mirrored volumes. +By default mirrored volumes are shown as partitions with type +.Cm ms-ldm-data +(see the +.Sx "PARTITION TYPES" +section). +If this variable set to 1 each component of the mirrored volume will be +present as independent partition. +.Em NOTE : +This may break a mirrored volume and lead to data damage. +.It Va kern.geom.part.mbr.enforce_chs : No 0 +Specify how the Master Boot Record (MBR) module does alignment. +If this variable is set to a non-zero value, the module will automatically +recalculate the user-specified offset and size for alignment with the CHS +geometry. +Otherwise the values will be left unchanged. +.It Va kern.geom.part.separator : No "" +Specify an optional separator that will be inserted between the GEOM name +and partition name. +This variable is a +.Xr loader 8 +tunable. +Note that setting this variable may break software which assumes a particular +naming scheme. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +The examples below assume that the disk's logical block size is 512 +bytes, regardless of its physical block size. +.Ss GPT +In this example, we will format +.Pa ada0 +with the GPT scheme and create boot, swap and root partitions. +First, we need to create the partition table: +.Bd -literal -offset indent +/sbin/gpart create -s GPT ada0 +.Ed +.Pp +Next, we install a protective MBR with the first-stage bootstrap code. +The protective MBR lists a single, bootable partition spanning the +entire disk, thus allowing non-GPT-aware BIOSes to boot from the disk +and preventing tools which do not understand the GPT scheme from +considering the disk to be unformatted. +.Bd -literal -offset indent +/sbin/gpart bootcode -b /boot/pmbr ada0 +.Ed +.Pp +We then create a dedicated +.Cm freebsd-boot +partition to hold the second-stage boot loader, which will load the +.Fx +kernel and modules from a UFS or ZFS filesystem. +This partition must be larger than the bootstrap code +.Po +either +.Pa /boot/gptboot +for UFS or +.Pa /boot/gptzfsboot +for ZFS +.Pc , +but smaller than 545 kB since the first-stage loader will load the +entire partition into memory during boot, regardless of how much data +it actually contains. +We create a 472-block (236 kB) boot partition at offset 40, which is +the size of the partition table (34 blocks or 17 kB) rounded up to the +nearest 4 kB boundary. +.Bd -literal -offset indent +/sbin/gpart add -b 40 -s 472 -t freebsd-boot ada0 +/sbin/gpart bootcode -p /boot/gptboot -i 1 ada0 +.Ed +.Pp +We now create a 4 GB swap partition at the first available offset, +which is 40 + 472 = 512 blocks (256 kB). +.Bd -literal -offset indent +/sbin/gpart add -s 4G -t freebsd-swap ada0 +.Ed +.Pp +Aligning the swap partition and all subsequent partitions on a 256 kB +boundary ensures optimal performance on a wide range of media, from +plain old disks with 512-byte blocks, through modern +.Dq advanced format +disks with 4096-byte physical blocks, to RAID volumes with stripe +sizes of up to 256 kB. +.Pp +Finally, we create and format an 8 GB +.Cm freebsd-ufs +partition for the root filesystem, leaving the rest of the device free +for additional filesystems: +.Bd -literal -offset indent +/sbin/gpart add -s 8G -t freebsd-ufs ada0 +/sbin/newfs -Uj /dev/ada0p3 +.Ed +.Ss MBR +In this example, we will format +.Pa ada0 +with the MBR scheme and create a single partition which we subdivide +using a traditional +.Bx +disklabel. +.Pp +First, we create the partition table as well as a single partition 64 GB in +size and an alignment of 4 kB, then we mark that partition active (bootable) +and install the first-stage boot loader: +.Bd -literal -offset indent +/sbin/gpart create -s MBR ada0 +/sbin/gpart add -t freebsd -s 64G -a 4k ada0 +/sbin/gpart set -a active -i 1 ada0 +/sbin/gpart bootcode -b /boot/boot0 ada0 +.Ed +.Pp +Next, we create a disklabel in that partition +.Po +.Dq slice +in disklabel terminology +.Pc +with room for up to 20 partitions: +.Bd -literal -offset indent +/sbin/gpart create -s BSD -n 20 ada0s1 +.Ed +.Pp +We then create an 8 GB root partition and a 4 GB swap partition: +.Bd -literal -offset indent +/sbin/gpart add -t freebsd-ufs -s 8G ada0s1 +/sbin/gpart add -t freebsd-swap -s 4G ada0s1 +.Ed +.Pp +Finally, we install the appropriate boot loader for the +.Bx +label: +.Bd -literal -offset indent +/sbin/gpart bootcode -b /boot/boot ada0s1 +.Ed +.Ss Deleting Partitions and Destroying the Partitioning Scheme +If a +.Em "Device busy" +error is shown when trying to destroy a partition table, remember that +all of the partitions must be deleted first with the +.Cm delete +action. +In this example, +.Pa da0 +has three partitions: +.Bd -literal -offset indent +/sbin/gpart delete -i 3 da0 +/sbin/gpart delete -i 2 da0 +/sbin/gpart delete -i 1 da0 +/sbin/gpart destroy da0 +.Ed +.Pp +Rather than deleting each partition and then destroying the partitioning +scheme, the +.Fl F +option can be given with +.Cm destroy +to delete all of the partitions before destroying the partitioning scheme. +This is equivalent to the previous example: +.Bd -literal -offset indent +/sbin/gpart destroy -F da0 +.Ed +.Ss Backup and Restore +Create a backup of the partition table from +.Pa da0 : +.Bd -literal -offset indent +/sbin/gpart backup da0 > da0.backup +.Ed +.Pp +Restore the partition table from the backup to +.Pa da0 : +.Bd -literal -offset indent +/sbin/gpart restore -l da0 < /mnt/da0.backup +.Ed +.Pp +Clone the partition table from +.Pa ada0 +to +.Pa ada1 +and +.Pa ada2 : +.Bd -literal -offset indent +/sbin/gpart backup ada0 | /sbin/gpart restore -F ada1 ada2 +.Ed +.Sh SEE ALSO +.Xr geom 4 , +.Xr boot0cfg 8 , +.Xr geom 8 , +.Xr glabel 8 , +.Xr gptboot 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 7.0 . +.Sh AUTHORS +.An Marcel Moolenaar Aq Mt marcel@FreeBSD.org +.Sh CAVEATS +Partition type +.Em apple-zfs +(6a898cc3-1dd2-11b2-99a6-080020736631) is also being used +on illumos/Solaris platforms for ZFS volumes. diff --git a/lib/geom/raid/Makefile b/lib/geom/raid/Makefile new file mode 100644 index 000000000000..b84121126ca1 --- /dev/null +++ b/lib/geom/raid/Makefile @@ -0,0 +1,5 @@ +PACKAGE=geom + +GEOM_CLASS= raid + +.include <bsd.lib.mk> diff --git a/lib/geom/raid/Makefile.depend b/lib/geom/raid/Makefile.depend new file mode 100644 index 000000000000..27e87393b549 --- /dev/null +++ b/lib/geom/raid/Makefile.depend @@ -0,0 +1,17 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + lib/libmd \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/raid/geom_raid.c b/lib/geom/raid/geom_raid.c new file mode 100644 index 000000000000..f73b42e91e85 --- /dev/null +++ b/lib/geom/raid/geom_raid.c @@ -0,0 +1,91 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <errno.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <libgeom.h> +#include <geom/raid/g_raid.h> +#include <core/geom.h> +#include <misc/subr.h> + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_RAID_VERSION; + +struct g_command class_commands[] = { + { "label", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + { 'o', "fmtopt", G_VAL_OPTIONAL, G_TYPE_STRING }, + { 'S', "size", G_VAL_OPTIONAL, G_TYPE_NUMBER }, + { 's', "strip", G_VAL_OPTIONAL, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-fv] [-o fmtopt] [-S size] [-s stripsize] format label level prov ..." + }, + { "add", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + { 'S', "size", G_VAL_OPTIONAL, G_TYPE_NUMBER }, + { 's', "strip", G_VAL_OPTIONAL, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-fv] [-S size] [-s stripsize] name label level" + }, + { "delete", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name [label|num]" + }, + { "insert", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov ..." + }, + { "remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov ..." + }, + { "fail", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov ..." + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name" + }, + G_CMD_SENTINEL +}; + diff --git a/lib/geom/raid/graid.8 b/lib/geom/raid/graid.8 new file mode 100644 index 000000000000..4ef0cd22e703 --- /dev/null +++ b/lib/geom/raid/graid.8 @@ -0,0 +1,318 @@ +.\" Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd January 23, 2025 +.Dt GRAID 8 +.Os +.Sh NAME +.Nm graid +.Nd "control utility for software RAID devices" +.Sh SYNOPSIS +.Nm +.Cm label +.Op Fl f +.Op Fl o Ar fmtopt +.Op Fl S Ar size +.Op Fl s Ar strip +.Ar format +.Ar label +.Ar level +.Ar prov ... +.Nm +.Cm add +.Op Fl f +.Op Fl S Ar size +.Op Fl s Ar strip +.Ar name +.Ar label +.Ar level +.Nm +.Cm delete +.Op Fl f +.Ar name +.Op Ar label | Ar num +.Nm +.Cm insert +.Ar name +.Ar prov ... +.Nm +.Cm remove +.Ar name +.Ar prov ... +.Nm +.Cm fail +.Ar name +.Ar prov ... +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used to manage software RAID configurations, supported by the +GEOM RAID class. +GEOM RAID class uses on-disk metadata to provide access to software-RAID +volumes defined by different RAID BIOSes. +Depending on RAID BIOS type and its metadata format, different subsets of +configurations and features are supported. +To allow booting from RAID volume, the metadata format should match the +RAID BIOS type and its capabilities. +To guarantee that these match, it is recommended to create volumes via the +RAID BIOS interface, while experienced users are free to do it using this +utility. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm destroy" +.It Cm label +Create an array with single volume. +The +.Ar format +argument specifies the on-disk metadata format to use for this array, +such as "Intel". +The +.Ar label +argument specifies the label of the created volume. +The +.Ar level +argument specifies the RAID level of the created volume, such as: +"RAID0", "RAID1", etc. +The subsequent list enumerates providers to use as array components. +The special name "NONE" can be used to reserve space for absent disks. +The order of components can be important, depending on specific RAID level +and metadata format. +.Pp +Additional options include: +.Bl -tag -width ".Fl s Ar strip" +.It Fl f +Enforce specified configuration creation if it is officially unsupported, +but technically can be created. +.It Fl o Ar fmtopt +Specifies metadata format options. +.It Fl S Ar size +Use +.Ar size +bytes on each component for this volume. +Should be used if several volumes per array are planned, or if smaller +components going to be inserted later. +Defaults to size of the smallest component. +.It Fl s Ar strip +Specifies strip size in bytes. +Defaults to 131072. +.El +.It Cm add +Create another volume on the existing array. +The +.Ar name +argument is the name of the existing array, reported by label command. +The rest of arguments are the same as for the label command. +.It Cm delete +Delete volume(s) from the existing array. +When the last volume is deleted, the array is also deleted and its metadata +erased. +The +.Ar name +argument is the name of existing array. +Optional +.Ar label +or +.Ar num +arguments allow specifying volume for deletion. +.Pp +Additional options include: +.Bl -tag -width ".Fl f" +.It Fl f +Delete volume(s) even if it is still open. +.El +.It Cm insert +Insert specified provider(s) into specified array instead of the first missing +or failed components. +If there are no such components, mark disk(s) as spare. +.It Cm remove +Remove the specified provider(s) from the specified array and erase metadata. +If there are spare disks present, the removed disk(s) will be replaced by +spares. +.It Cm fail +Mark the given disks(s) as failed, removing from active use unless absolutely +necessary due to exhausted redundancy. +If there are spare disks present - failed disk(s) will be replaced with one +of them. +.It Cm stop +Stop the given array. +The metadata will not be erased. +.Pp +Additional options include: +.Bl -tag -width ".Fl f" +.It Fl f +Stop the given array even if some of its volumes are opened. +.El +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options include: +.Bl -tag -width ".Fl v" +.It Fl v +Be more verbose. +.El +.Sh SUPPORTED METADATA FORMATS +The GEOM RAID class follows a modular design, allowing different metadata +formats to be used. +Support is currently implemented for the following formats: +.Bl -tag -width "Intel" +.It DDF +The format defined by the SNIA Common RAID Disk Data Format v2.0 specification. +Used by some Adaptec RAID BIOSes and some hardware RAID controllers. +Because of high format flexibility different implementations support +different set of features and have different on-disk metadata layouts. +To provide compatibility, the GEOM RAID class mimics capabilities +of the first detected DDF array. +Respecting that, it may support different number of disks per volume, +volumes per array, partitions per disk, etc. +The following configurations are supported: RAID0 (2+ disks), RAID1 (2+ disks), +RAID1E (3+ disks), RAID3 (3+ disks), RAID4 (3+ disks), RAID5 (3+ disks), +RAID5E (4+ disks), RAID5EE (4+ disks), RAID5R (3+ disks), RAID6 (4+ disks), +RAIDMDF (4+ disks), RAID10 (4+ disks), SINGLE (1 disk), CONCAT (2+ disks). +.Pp +Format supports two options "BE" and "LE", that mean big-endian byte order +defined by specification (default) and little-endian used by some Adaptec +controllers. +.It Intel +The format used by Intel RAID BIOS. +Supports up to two volumes per array. +Supports configurations: RAID0 (2+ disks), RAID1 (2 disks), +RAID5 (3+ disks), RAID10 (4 disks). +Configurations not supported by Intel RAID BIOS, but enforceable on your own +risk: RAID1 (3+ disks), RAID1E (3+ disks), RAID10 (6+ disks). +.It JMicron +The format used by JMicron RAID BIOS. +Supports one volume per array. +Supports configurations: RAID0 (2+ disks), RAID1 (2 disks), +RAID10 (4 disks), CONCAT (2+ disks). +Configurations not supported by JMicron RAID BIOS, but enforceable on your own +risk: RAID1 (3+ disks), RAID1E (3+ disks), RAID10 (6+ disks), RAID5 (3+ disks). +.It NVIDIA +The format used by NVIDIA MediaShield RAID BIOS. +Supports one volume per array. +Supports configurations: RAID0 (2+ disks), RAID1 (2 disks), +RAID5 (3+ disks), RAID10 (4+ disks), SINGLE (1 disk), CONCAT (2+ disks). +Configurations not supported by NVIDIA MediaShield RAID BIOS, but enforceable +on your own risk: RAID1 (3+ disks). +.It Promise +The format used by Promise and AMD/ATI RAID BIOSes. +Supports multiple volumes per array. +Each disk can be split to be used by up to two arbitrary volumes. +Supports configurations: RAID0 (2+ disks), RAID1 (2 disks), +RAID5 (3+ disks), RAID10 (4 disks), SINGLE (1 disk), CONCAT (2+ disks). +Configurations not supported by RAID BIOSes, but enforceable on your +own risk: RAID1 (3+ disks), RAID10 (6+ disks). +.It SiI +The format used by SiliconImage RAID BIOS. +Supports one volume per array. +Supports configurations: RAID0 (2+ disks), RAID1 (2 disks), +RAID5 (3+ disks), RAID10 (4 disks), SINGLE (1 disk), CONCAT (2+ disks). +Configurations not supported by SiliconImage RAID BIOS, but enforceable on your +own risk: RAID1 (3+ disks), RAID10 (6+ disks). +.El +.Sh SUPPORTED RAID LEVELS +The GEOM RAID class follows a modular design, allowing different RAID levels +to be used. +Full support for the following RAID levels is currently implemented: +RAID0, RAID1, RAID1E, RAID10, SINGLE, CONCAT. +The following RAID levels supported as read-only for volumes in optimal +state (without using redundancy): RAID4, RAID5, RAID5E, RAID5EE, RAID5R, +RAID6, RAIDMDF. +.Sh RAID LEVEL MIGRATION +The GEOM RAID class has no support for RAID level migration, allowed by some +metadata formats. +If you started migration using BIOS or in some other way, make sure to +complete it there. +Do not run GEOM RAID class on migrating volumes under pain of possible data +corruption! +.Sh 2TiB BARRIERS +NVIDIA metadata format does not support volumes above 2TiB. +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to control the behavior of the +.Nm RAID +GEOM class. +.Bl -tag -width indent +.It Va kern.geom.raid.aggressive_spare : No 0 +Use any disks without metadata connected to controllers of the vendor +matching to volume metadata format as spare. +Use it with much care to not lose data if connecting unrelated disk! +.It Va kern.geom.raid.clean_time : No 5 +Mark volume as clean when idle for the specified number of seconds. +.It Va kern.geom.raid.debug : No 0 +Debug level of the +.Nm RAID +GEOM class. +.It Va kern.geom.raid.enable : No 1 +Enable on-disk metadata taste. +.It Va kern.geom.raid.idle_threshold : No 1000000 +Time in microseconds to consider a volume idle for rebuild purposes. +.It Va kern.geom.raid.name_format : No 0 +Providers name format: 0 -- raid/r{num}, 1 -- raid/{label}. +.It Va kern.geom.raid.read_err_thresh : No 10 +Number of read errors equated to disk failure. +Write errors are always considered as disk failures. +.It Va kern.geom.raid.start_timeout : No 30 +Time to wait for missing array components on startup. +.It Va kern.geom.raid. Ns Ar X Ns Va .enable : No 1 +Enable taste for specific metadata or transformation module. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and non-zero if the command fails. +.Sh SEE ALSO +.Xr geom 4 , +.Xr geom 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 9.0 . +.Sh AUTHORS +.An Alexander Motin Aq Mt mav@FreeBSD.org +.An M. Warner Losh Aq Mt imp@FreeBSD.org diff --git a/lib/geom/raid3/Makefile b/lib/geom/raid3/Makefile new file mode 100644 index 000000000000..f2d9a9233c16 --- /dev/null +++ b/lib/geom/raid3/Makefile @@ -0,0 +1,7 @@ +PACKAGE=geom + +GEOM_CLASS= raid3 + +LIBADD= md + +.include <bsd.lib.mk> diff --git a/lib/geom/raid3/Makefile.depend b/lib/geom/raid3/Makefile.depend new file mode 100644 index 000000000000..27e87393b549 --- /dev/null +++ b/lib/geom/raid3/Makefile.depend @@ -0,0 +1,17 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + lib/libmd \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/raid3/geom_raid3.c b/lib/geom/raid3/geom_raid3.c new file mode 100644 index 000000000000..4967cec75448 --- /dev/null +++ b/lib/geom/raid3/geom_raid3.c @@ -0,0 +1,335 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <errno.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <libgeom.h> +#include <geom/raid3/g_raid3.h> +#include <core/geom.h> +#include <misc/subr.h> + + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_RAID3_VERSION; + +static void raid3_main(struct gctl_req *req, unsigned f); +static void raid3_clear(struct gctl_req *req); +static void raid3_dump(struct gctl_req *req); +static void raid3_label(struct gctl_req *req); + +struct g_command class_commands[] = { + { "clear", G_FLAG_VERBOSE, raid3_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { "configure", G_FLAG_VERBOSE, NULL, + { + { 'a', "autosync", NULL, G_TYPE_BOOL }, + { 'd', "dynamic", NULL, G_TYPE_BOOL }, + { 'f', "failsync", NULL, G_TYPE_BOOL }, + { 'F', "nofailsync", NULL, G_TYPE_BOOL }, + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 'n', "noautosync", NULL, G_TYPE_BOOL }, + { 'r', "round_robin", NULL, G_TYPE_BOOL }, + { 'R', "noround_robin", NULL, G_TYPE_BOOL }, + { 'w', "verify", NULL, G_TYPE_BOOL }, + { 'W', "noverify", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-adfFhnrRvwW] name" + }, + { "dump", 0, raid3_main, G_NULL_OPTS, + "prov ..." + }, + { "insert", G_FLAG_VERBOSE, NULL, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 'n', "number", G_VAL_OPTIONAL, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-hv] <-n number> name prov" + }, + { "label", G_FLAG_VERBOSE, raid3_main, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 'F', "nofailsync", NULL, G_TYPE_BOOL }, + { 'n', "noautosync", NULL, G_TYPE_BOOL }, + { 'r', "round_robin", NULL, G_TYPE_BOOL }, + { 's', "sectorsize", "0", G_TYPE_NUMBER }, + { 'w', "verify", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-hFnrvw] [-s blocksize] name prov prov prov ..." + }, + { "rebuild", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov" + }, + { "remove", G_FLAG_VERBOSE, NULL, + { + { 'n', "number", NULL, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-v] <-n number> name" + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +static void +raid3_main(struct gctl_req *req, unsigned flags) +{ + const char *name; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) + raid3_label(req); + else if (strcmp(name, "clear") == 0) + raid3_clear(req); + else if (strcmp(name, "dump") == 0) + raid3_dump(req); + else + gctl_error(req, "Unknown command: %s.", name); +} + +static void +raid3_label(struct gctl_req *req) +{ + struct g_raid3_metadata md; + u_char sector[512]; + const char *str; + unsigned sectorsize, ssize; + off_t mediasize, msize; + int hardcode, round_robin, verify; + int error, i, nargs; + + bzero(sector, sizeof(sector)); + nargs = gctl_get_int(req, "nargs"); + if (nargs < 4) { + gctl_error(req, "Too few arguments."); + return; + } + if (bitcount32(nargs - 2) != 1) { + gctl_error(req, "Invalid number of components."); + return; + } + + strlcpy(md.md_magic, G_RAID3_MAGIC, sizeof(md.md_magic)); + md.md_version = G_RAID3_VERSION; + str = gctl_get_ascii(req, "arg0"); + strlcpy(md.md_name, str, sizeof(md.md_name)); + md.md_id = arc4random(); + md.md_all = nargs - 1; + md.md_mflags = 0; + md.md_dflags = 0; + md.md_genid = 0; + md.md_syncid = 1; + md.md_sync_offset = 0; + if (gctl_get_int(req, "noautosync")) + md.md_mflags |= G_RAID3_DEVICE_FLAG_NOAUTOSYNC; + if (gctl_get_int(req, "nofailsync")) + md.md_mflags |= G_RAID3_DEVICE_FLAG_NOFAILSYNC; + round_robin = gctl_get_int(req, "round_robin"); + if (round_robin) + md.md_mflags |= G_RAID3_DEVICE_FLAG_ROUND_ROBIN; + verify = gctl_get_int(req, "verify"); + if (verify) + md.md_mflags |= G_RAID3_DEVICE_FLAG_VERIFY; + if (round_robin && verify) { + gctl_error(req, "Both '%c' and '%c' options given.", 'r', 'w'); + return; + } + hardcode = gctl_get_int(req, "hardcode"); + + /* + * Calculate sectorsize by finding least common multiple from + * sectorsizes of every disk and find the smallest mediasize. + */ + mediasize = 0; + sectorsize = gctl_get_intmax(req, "sectorsize"); + for (i = 1; i < nargs; i++) { + str = gctl_get_ascii(req, "arg%d", i); + msize = g_get_mediasize(str); + ssize = g_get_sectorsize(str); + if (msize == 0 || ssize == 0) { + gctl_error(req, "Can't get informations about %s: %s.", + str, strerror(errno)); + return; + } + msize -= ssize; + if (mediasize == 0 || (mediasize > 0 && msize < mediasize)) + mediasize = msize; + if (sectorsize == 0) + sectorsize = ssize; + else + sectorsize = g_lcm(sectorsize, ssize); + } + md.md_mediasize = mediasize * (nargs - 2); + md.md_sectorsize = sectorsize * (nargs - 2); + md.md_mediasize -= (md.md_mediasize % md.md_sectorsize); + + if (md.md_sectorsize > MAXPHYS) { + gctl_error(req, "The blocksize is too big."); + return; + } + + /* + * Clear last sector first, to spoil all components if device exists. + */ + for (i = 1; i < nargs; i++) { + str = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(str, NULL); + if (error != 0) { + gctl_error(req, "Can't store metadata on %s: %s.", str, + strerror(error)); + return; + } + } + + /* + * Ok, store metadata (use disk number as priority). + */ + for (i = 1; i < nargs; i++) { + str = gctl_get_ascii(req, "arg%d", i); + msize = g_get_mediasize(str); + ssize = g_get_sectorsize(str); + if (mediasize < msize - ssize) { + fprintf(stderr, + "warning: %s: only %jd bytes from %jd bytes used.\n", + str, (intmax_t)mediasize, (intmax_t)(msize - ssize)); + } + + md.md_no = i - 1; + md.md_provsize = msize; + if (!hardcode) + bzero(md.md_provider, sizeof(md.md_provider)); + else { + if (strncmp(str, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + str += sizeof(_PATH_DEV) - 1; + strlcpy(md.md_provider, str, sizeof(md.md_provider)); + } + if (verify && md.md_no == md.md_all - 1) { + /* + * In "verify" mode, force synchronization of parity + * component on start. + */ + md.md_syncid = 0; + } + raid3_metadata_encode(&md, sector); + error = g_metadata_store(str, sector, sizeof(sector)); + if (error != 0) { + fprintf(stderr, "Can't store metadata on %s: %s.\n", + str, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata value stored on %s.\n", str); + } +} + +static void +raid3_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_RAID3_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +static void +raid3_dump(struct gctl_req *req) +{ + struct g_raid3_metadata md, tmpmd; + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd), + G_RAID3_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't read metadata from %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (raid3_metadata_decode((u_char *)&tmpmd, &md) != 0) { + fprintf(stderr, "MD5 hash mismatch for %s, skipping.\n", + name); + gctl_error(req, "Not fully done."); + continue; + } + printf("Metadata on %s:\n", name); + raid3_metadata_dump(&md); + printf("\n"); + } +} diff --git a/lib/geom/raid3/graid3.8 b/lib/geom/raid3/graid3.8 new file mode 100644 index 000000000000..e1bcdac17f99 --- /dev/null +++ b/lib/geom/raid3/graid3.8 @@ -0,0 +1,254 @@ +.\" Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd January 23, 2025 +.Dt GRAID3 8 +.Os +.Sh NAME +.Nm graid3 +.Nd "control utility for RAID3 devices" +.Sh SYNOPSIS +.Nm +.Cm label +.Op Fl Fhnrvw +.Op Fl s Ar blocksize +.Ar name +.Ar prov prov prov ... +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm configure +.Op Fl adfFhnrRvwW +.Ar name +.Nm +.Cm rebuild +.Op Fl v +.Ar name +.Ar prov +.Nm +.Cm insert +.Op Fl hv +.Op Fl n Ar number +.Ar name +.Ar prov +.Nm +.Cm remove +.Op Fl v +.Fl n Ar number +.Ar name +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for RAID3 array configuration. +After a device is created, all components are detected and configured +automatically. +All operations such as failure detection, stale component detection, rebuild +of stale components, etc.\& are also done automatically. +The +.Nm +utility uses on-disk metadata (the provider's last sector) to store all needed +information. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm configure" +.It Cm label +Create a RAID3 device. +The last given component will contain parity data, whilst the others +will all contain regular data. +The number of components must be equal to 3, 5, 9, 17, etc.\& (2^n + 1). +.Pp +Additional options include: +.Bl -tag -width ".Fl h" +.It Fl F +Do not synchronize after a power failure or system crash. +Assumes device is in consistent state. +.It Fl h +Hardcode providers' names in metadata. +.It Fl n +Turn off autosynchronization of stale components. +.It Fl r +Use parity component for reading in round-robin fashion. +Without this option the parity component is not used at all for reading operations +when the device is in a complete state. +With this option specified random I/O read operations are even 40% faster, +but sequential reads are slower. +One cannot use this option if the +.Fl w +option is also specified. +.It Fl s +Manually specify array block size. Block size will be set equal to least +common multiple of all component's sector sizes and specified value. +Note that array sector size calculated as multiple of block size and number +of regular data components. Big values may decrease performance and compatibility, +as all I/O requests have to be multiple of sector size. +.It Fl w +Use verify reading feature. +When reading from a device in a complete state, also read data from the parity component +and verify the data by comparing XORed regular data with parity data. +If verification fails, an +.Er EIO +error is returned and the value of the +.Va kern.geom.raid3.stat.parity_mismatch +sysctl is increased. +One cannot use this option if the +.Fl r +option is also specified. +.El +.It Cm clear +Clear metadata on the given providers. +.It Cm configure +Configure the given device. +.Pp +Additional options include: +.Bl -tag -width ".Fl a" +.It Fl a +Turn on autosynchronization of stale components. +.It Fl d +Do not hardcode providers' names in metadata. +.It Fl f +Synchronize device after a power failure or system crash. +.It Fl F +Do not synchronize after a power failure or system crash. +Assumes device is in consistent state. +.It Fl h +Hardcode providers' names in metadata. +.It Fl n +Turn off autosynchronization of stale components. +.It Fl r +Turn on round-robin reading. +.It Fl R +Turn off round-robin reading. +.It Fl w +Turn on verify reading. +.It Fl W +Turn off verify reading. +.El +.It Cm rebuild +Rebuild the given component forcibly. +If autosynchronization was not turned off for the given device, this command +should be unnecessary. +.It Cm insert +Add the given component to the existing array, if one of the components was +removed previously with the +.Cm remove +command or if one component is missing and will not be connected again. +If no number is given, new component will be added instead of first missed +component. +.Pp +Additional options include: +.Bl -tag -width ".Fl h" +.It Fl h +Hardcode providers' names in metadata. +.El +.It Cm remove +Remove the given component from the given array and clear metadata on it. +.It Cm stop +Stop the given arrays. +.Pp +Additional options include: +.Bl -tag -width ".Fl f" +.It Fl f +Stop the given array even if it is opened. +.El +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options include: +.Bl -tag -width ".Fl v" +.It Fl v +Be more verbose. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +Use 3 disks to setup a RAID3 array (with the round-robin reading feature). +Create a file system, mount it, then unmount it and stop device: +.Bd -literal -offset indent +graid3 label -v -r data da0 da1 da2 +newfs /dev/raid3/data +mount /dev/raid3/data /mnt +\&... +umount /mnt +graid3 stop data +graid3 unload +.Ed +.Pp +Create a RAID3 array, but do not use the automatic synchronization feature. +Rebuild parity component: +.Bd -literal -offset indent +graid3 label -n data da0 da1 da2 +graid3 rebuild data da2 +.Ed +.Pp +Replace one data disk with a brand new one: +.Bd -literal -offset indent +graid3 remove -n 0 data +graid3 insert -n 0 data da5 +.Ed +.Sh SEE ALSO +.Xr geom 4 , +.Xr geom 8 , +.Xr mount 8 , +.Xr newfs 8 , +.Xr umount 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 5.3 . +.Sh AUTHORS +.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org +.Sh BUGS +There should be a section with an implementation description. +.Pp +Documentation for sysctls +.Va kern.geom.raid3.* +is missing. diff --git a/lib/geom/shsec/Makefile b/lib/geom/shsec/Makefile new file mode 100644 index 000000000000..493ea5d15acd --- /dev/null +++ b/lib/geom/shsec/Makefile @@ -0,0 +1,5 @@ +PACKAGE=geom + +GEOM_CLASS= shsec + +.include <bsd.lib.mk> diff --git a/lib/geom/shsec/Makefile.depend b/lib/geom/shsec/Makefile.depend new file mode 100644 index 000000000000..0dd05cace3c0 --- /dev/null +++ b/lib/geom/shsec/Makefile.depend @@ -0,0 +1,16 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/shsec/geom_shsec.c b/lib/geom/shsec/geom_shsec.c new file mode 100644 index 000000000000..227bdcf39b7d --- /dev/null +++ b/lib/geom/shsec/geom_shsec.c @@ -0,0 +1,259 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <errno.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <libgeom.h> +#include <geom/shsec/g_shsec.h> + +#include "core/geom.h" +#include "misc/subr.h" + + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_SHSEC_VERSION; + +static void shsec_main(struct gctl_req *req, unsigned flags); +static void shsec_clear(struct gctl_req *req); +static void shsec_dump(struct gctl_req *req); +static void shsec_label(struct gctl_req *req); + +struct g_command class_commands[] = { + { "clear", G_FLAG_VERBOSE, shsec_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { "dump", 0, shsec_main, G_NULL_OPTS, + "prov ..." + }, + { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, shsec_main, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-hv] name prov prov ..." + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +static void +shsec_main(struct gctl_req *req, unsigned flags) +{ + const char *name; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) + shsec_label(req); + else if (strcmp(name, "clear") == 0) + shsec_clear(req); + else if (strcmp(name, "dump") == 0) + shsec_dump(req); + else + gctl_error(req, "Unknown command: %s.", name); +} + +static void +shsec_label(struct gctl_req *req) +{ + struct g_shsec_metadata md; + off_t compsize, msize; + u_char sector[512]; + unsigned ssize, secsize; + const char *name; + int error, i, nargs, hardcode; + + bzero(sector, sizeof(sector)); + nargs = gctl_get_int(req, "nargs"); + if (nargs <= 2) { + gctl_error(req, "Too few arguments."); + return; + } + hardcode = gctl_get_int(req, "hardcode"); + + /* + * Clear last sector first to spoil all components if device exists. + */ + compsize = 0; + secsize = 0; + for (i = 1; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + msize = g_get_mediasize(name); + ssize = g_get_sectorsize(name); + if (msize == 0 || ssize == 0) { + gctl_error(req, "Can't get informations about %s: %s.", + name, strerror(errno)); + return; + } + msize -= ssize; + if (compsize == 0 || (compsize > 0 && msize < compsize)) + compsize = msize; + if (secsize == 0) + secsize = ssize; + else + secsize = g_lcm(secsize, ssize); + + error = g_metadata_clear(name, NULL); + if (error != 0) { + gctl_error(req, "Can't store metadata on %s: %s.", name, + strerror(error)); + return; + } + } + + strlcpy(md.md_magic, G_SHSEC_MAGIC, sizeof(md.md_magic)); + md.md_version = G_SHSEC_VERSION; + name = gctl_get_ascii(req, "arg0"); + strlcpy(md.md_name, name, sizeof(md.md_name)); + md.md_id = arc4random(); + md.md_all = nargs - 1; + + /* + * Ok, store metadata. + */ + for (i = 1; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + msize = g_get_mediasize(name); + ssize = g_get_sectorsize(name); + if (compsize < msize - ssize) { + fprintf(stderr, + "warning: %s: only %jd bytes from %jd bytes used.\n", + name, (intmax_t)compsize, (intmax_t)(msize - ssize)); + } + + md.md_no = i - 1; + md.md_provsize = msize; + if (!hardcode) + bzero(md.md_provider, sizeof(md.md_provider)); + else { + if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + name += sizeof(_PATH_DEV) - 1; + strlcpy(md.md_provider, name, sizeof(md.md_provider)); + } + shsec_metadata_encode(&md, sector); + error = g_metadata_store(name, sector, sizeof(sector)); + if (error != 0) { + fprintf(stderr, "Can't store metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata value stored on %s.\n", name); + } +} + +static void +shsec_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_SHSEC_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +static void +shsec_metadata_dump(const struct g_shsec_metadata *md) +{ + + printf(" Magic string: %s\n", md->md_magic); + printf(" Metadata version: %u\n", (u_int)md->md_version); + printf(" Device name: %s\n", md->md_name); + printf(" Device ID: %u\n", (u_int)md->md_id); + printf(" Disk number: %u\n", (u_int)md->md_no); + printf("Total number of disks: %u\n", (u_int)md->md_all); + printf(" Hardcoded provider: %s\n", md->md_provider); +} + +static void +shsec_dump(struct gctl_req *req) +{ + struct g_shsec_metadata md, tmpmd; + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd), + G_SHSEC_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't read metadata from %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + shsec_metadata_decode((u_char *)&tmpmd, &md); + printf("Metadata on %s:\n", name); + shsec_metadata_dump(&md); + printf("\n"); + } +} diff --git a/lib/geom/shsec/gshsec.8 b/lib/geom/shsec/gshsec.8 new file mode 100644 index 000000000000..d4477de3a71e --- /dev/null +++ b/lib/geom/shsec/gshsec.8 @@ -0,0 +1,127 @@ +.\" Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd October 1, 2013 +.Dt GSHSEC 8 +.Os +.Sh NAME +.Nm gshsec +.Nd "control utility for shared secret devices" +.Sh SYNOPSIS +.Nm +.Cm label +.Op Fl hv +.Ar name +.Ar prov prov ... +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm dump +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for setting up a device which contains a shared secret. +The secret is shared between the given providers. +To collect the secret, all providers are needed. +If one of the components is missing, there is no way to get any useful data from +the rest of them. +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm destroy" +.It Cm label +Set up a shared secret device from the given components with the specified +.Ar name . +Metadata are stored in the last sector of every component. +.It Cm stop +Turn off an existing shared secret device by its +.Ar name . +This command does not touch on-disk metadata! +.It Cm clear +Clear metadata on the given providers. +.It Cm dump +Dump metadata stored on the given providers. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options: +.Bl -tag -width ".Fl f" +.It Fl f +Force the removal of the specified shared secret device. +.It Fl h +Hardcode providers' names in metadata. +.It Fl v +Be more verbose. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +The following example shows how to create a shared secret device. +The secret will be split between a slice on a local disk and a USB Pen drive. +.Bd -literal -offset indent +gshsec label -v secret /dev/ada0s1 /dev/da0 +newfs /dev/shsec/secret +.Ed +.Pp +From now on, when the USB Pen drive is inserted, it will be automatically +detected and connected, making the secret available via the +.Pa /dev/shsec/secret +device. +.Sh SEE ALSO +.Xr geom 4 , +.Xr geom 8 , +.Xr newfs 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 5.4 . +.Sh AUTHORS +.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org diff --git a/lib/geom/stripe/Makefile b/lib/geom/stripe/Makefile new file mode 100644 index 000000000000..762df5f3c872 --- /dev/null +++ b/lib/geom/stripe/Makefile @@ -0,0 +1,5 @@ +PACKAGE=geom + +GEOM_CLASS= stripe + +.include <bsd.lib.mk> diff --git a/lib/geom/stripe/Makefile.depend b/lib/geom/stripe/Makefile.depend new file mode 100644 index 000000000000..0dd05cace3c0 --- /dev/null +++ b/lib/geom/stripe/Makefile.depend @@ -0,0 +1,16 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/stripe/geom_stripe.c b/lib/geom/stripe/geom_stripe.c new file mode 100644 index 000000000000..da3a726db370 --- /dev/null +++ b/lib/geom/stripe/geom_stripe.c @@ -0,0 +1,285 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <errno.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <libgeom.h> +#include <geom/stripe/g_stripe.h> + +#include "core/geom.h" +#include "misc/subr.h" + + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_STRIPE_VERSION; + +#define GSTRIPE_STRIPESIZE "65536" + +static void stripe_main(struct gctl_req *req, unsigned flags); +static void stripe_clear(struct gctl_req *req); +static void stripe_dump(struct gctl_req *req); +static void stripe_label(struct gctl_req *req); + +struct g_command class_commands[] = { + { "clear", G_FLAG_VERBOSE, stripe_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, + { + { 's', "stripesize", GSTRIPE_STRIPESIZE, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-v] [-s stripesize] name prov prov ..." + }, + { "destroy", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + { "dump", 0, stripe_main, G_NULL_OPTS, + "prov ..." + }, + { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, stripe_main, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 's', "stripesize", GSTRIPE_STRIPESIZE, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-hv] [-s stripesize] name prov prov ..." + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +static void +stripe_main(struct gctl_req *req, unsigned flags) +{ + const char *name; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) + stripe_label(req); + else if (strcmp(name, "clear") == 0) + stripe_clear(req); + else if (strcmp(name, "dump") == 0) + stripe_dump(req); + else + gctl_error(req, "Unknown command: %s.", name); +} + +static void +stripe_label(struct gctl_req *req) +{ + struct g_stripe_metadata md; + intmax_t stripesize; + off_t compsize, msize; + u_char sector[512]; + unsigned ssize, secsize; + const char *name; + int error, i, nargs, hardcode; + + bzero(sector, sizeof(sector)); + nargs = gctl_get_int(req, "nargs"); + if (nargs < 3) { + gctl_error(req, "Too few arguments."); + return; + } + hardcode = gctl_get_int(req, "hardcode"); + + /* + * Clear last sector first to spoil all components if device exists. + */ + compsize = 0; + secsize = 0; + for (i = 1; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + msize = g_get_mediasize(name); + ssize = g_get_sectorsize(name); + if (msize == 0 || ssize == 0) { + gctl_error(req, "Can't get informations about %s: %s.", + name, strerror(errno)); + return; + } + msize -= ssize; + if (compsize == 0 || (compsize > 0 && msize < compsize)) + compsize = msize; + if (secsize == 0) + secsize = ssize; + else + secsize = g_lcm(secsize, ssize); + + error = g_metadata_clear(name, NULL); + if (error != 0) { + gctl_error(req, "Can't store metadata on %s: %s.", name, + strerror(error)); + return; + } + } + + strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); + md.md_version = G_STRIPE_VERSION; + name = gctl_get_ascii(req, "arg0"); + strlcpy(md.md_name, name, sizeof(md.md_name)); + md.md_id = arc4random(); + md.md_all = nargs - 1; + stripesize = gctl_get_intmax(req, "stripesize"); + if ((stripesize % secsize) != 0) { + gctl_error(req, "Stripesize should be multiple of %u.", + secsize); + return; + } + md.md_stripesize = stripesize; + + /* + * Ok, store metadata. + */ + for (i = 1; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + msize = g_get_mediasize(name); + ssize = g_get_sectorsize(name); + if (compsize < msize - ssize) { + fprintf(stderr, + "warning: %s: only %jd bytes from %jd bytes used.\n", + name, (intmax_t)compsize, (intmax_t)(msize - ssize)); + } + + md.md_no = i - 1; + md.md_provsize = msize; + if (!hardcode) + bzero(md.md_provider, sizeof(md.md_provider)); + else { + if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + name += sizeof(_PATH_DEV) - 1; + strlcpy(md.md_provider, name, sizeof(md.md_provider)); + } + stripe_metadata_encode(&md, sector); + error = g_metadata_store(name, sector, sizeof(sector)); + if (error != 0) { + fprintf(stderr, "Can't store metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata value stored on %s.\n", name); + } +} + +static void +stripe_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_STRIPE_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +static void +stripe_metadata_dump(const struct g_stripe_metadata *md) +{ + + printf(" Magic string: %s\n", md->md_magic); + printf(" Metadata version: %u\n", (u_int)md->md_version); + printf(" Device name: %s\n", md->md_name); + printf(" Device ID: %u\n", (u_int)md->md_id); + printf(" Disk number: %u\n", (u_int)md->md_no); + printf("Total number of disks: %u\n", (u_int)md->md_all); + printf(" Stripe size: %u\n", (u_int)md->md_stripesize); + printf(" Hardcoded provider: %s\n", md->md_provider); +} + +static void +stripe_dump(struct gctl_req *req) +{ + struct g_stripe_metadata md, tmpmd; + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd), + G_STRIPE_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't read metadata from %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + stripe_metadata_decode((u_char *)&tmpmd, &md); + printf("Metadata on %s:\n", name); + stripe_metadata_dump(&md); + printf("\n"); + } +} diff --git a/lib/geom/stripe/gstripe.8 b/lib/geom/stripe/gstripe.8 new file mode 100644 index 000000000000..6fd486355a2e --- /dev/null +++ b/lib/geom/stripe/gstripe.8 @@ -0,0 +1,240 @@ +.\" Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd January 23, 2025 +.Dt GSTRIPE 8 +.Os +.Sh NAME +.Nm gstripe +.Nd "control utility for striped devices" +.Sh SYNOPSIS +.Nm +.Cm create +.Op Fl v +.Op Fl s Ar stripesize +.Ar name +.Ar prov prov ... +.Nm +.Cm destroy +.Op Fl fv +.Ar name ... +.Nm +.Cm label +.Op Fl hv +.Op Fl s Ar stripesize +.Ar name +.Ar prov prov ... +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm dump +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for setting up a stripe on two or more disks. +The striped device can be configured using two different methods: +.Dq manual +or +.Dq automatic . +When using the +.Dq manual +method, no metadata are stored on the devices, so the striped +device has to be configured by hand every time it is needed. +The +.Dq automatic +method uses on-disk metadata to detect devices. +Once devices are labeled, they will be automatically detected and +configured. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm destroy" +.It Cm create +Set up a striped device from the given devices with specified +.Ar name . +This is the +.Dq manual +method and the stripe will not exist after a reboot (see +.Sx DESCRIPTION +above). +The kernel module +.Pa geom_stripe.ko +will be loaded if it is not loaded already. +.It Cm label +Set up a striped device from the given devices with the specified +.Ar name . +This is the +.Dq automatic +method, where metadata are stored in every device's last sector. +The kernel module +.Pa geom_stripe.ko +will be loaded if it is not loaded already. +.It Cm stop +Turn off an existing striped device by its +.Ar name . +This command does not touch on-disk metadata! +.It Cm destroy +Same as +.Cm stop . +.It Cm clear +Clear metadata on the given devices. +.It Cm dump +Dump metadata stored on the given devices. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options: +.Bl -tag -width ".Fl s Ar stripesize" +.It Fl f +Force the removal of the specified striped device. +.It Fl h +Hardcode providers' names in metadata. +.It Fl s Ar stripesize +Specifies size of stripe block in bytes. +The +.Ar stripesize +must be a multiple of the largest sector size of all the providers. +.It Fl v +Be more verbose. +.El +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to control the behavior of the +.Nm STRIPE +GEOM class. +The default value is shown next to each variable. +.Bl -tag -width indent +.It Va kern.geom.stripe.debug : No 0 +Debug level of the +.Nm STRIPE +GEOM class. +This can be set to a number between 0 and 3 inclusive. +If set to 0 minimal debug information is printed, and if set to 3 the +maximum amount of debug information is printed. +.It Va kern.geom.stripe.fast : No 0 +If set to a non-zero value enable +.Dq "fast mode" +instead of the normal +.Dq "economic mode" . +Compared to +.Dq "economic mode" , +.Dq "fast mode" +uses more memory, but it is much faster for smaller stripe sizes. +If enough memory cannot be allocated, +.Nm STRIPE +will fall back to +.Dq "economic mode" . +.It Va kern.geom.stripe.maxmem : No 13107200 +Maximum amount of memory that can be consumed by +.Dq "fast mode" +(in bytes). +This +.Xr sysctl 8 +variable is read-only and can only be set as a tunable in +.Xr loader.conf 5 . +.It Va kern.geom.stripe.fast_failed +A count of how many times +.Dq "fast mode" +has failed due to an insufficient amount of memory. +If this value is large, you should consider increasing the +.Va kern.geom.stripe.maxmem +value. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +The following example shows how to set up a striped device from four disks with a +128KB stripe size for automatic configuration, +create a file system on it, +and mount it: +.Bd -literal -offset indent +gstripe label -v -s 131072 data /dev/da0 /dev/da1 /dev/da2 /dev/da3 +newfs /dev/stripe/data +mount /dev/stripe/data /mnt +[...] +umount /mnt +gstripe stop data +gstripe unload +.Ed +.Sh COMPATIBILITY +The +.Nm +interleave is in number of bytes, +unlike +.Xr ccdconfig 8 +which use the number of sectors. +A +.Xr ccdconfig 8 +.Ar ileave +of +.Ql 128 +is 64 KB (128 512B sectors). +The same stripe interleave would be specified as +.Ql 65536 +for +.Nm . +.Sh SEE ALSO +.Xr geom 4 , +.Xr loader.conf 5 , +.Xr ccdconfig 8 , +.Xr geom 8 , +.Xr mount 8 , +.Xr newfs 8 , +.Xr sysctl 8 , +.Xr umount 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 5.3 . +.Sh AUTHORS +.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org diff --git a/lib/geom/union/Makefile b/lib/geom/union/Makefile new file mode 100644 index 000000000000..e2027125c6d1 --- /dev/null +++ b/lib/geom/union/Makefile @@ -0,0 +1,5 @@ +PACKAGE=geom + +GEOM_CLASS= union + +.include <bsd.lib.mk> diff --git a/lib/geom/union/Makefile.depend b/lib/geom/union/Makefile.depend new file mode 100644 index 000000000000..0dd05cace3c0 --- /dev/null +++ b/lib/geom/union/Makefile.depend @@ -0,0 +1,16 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/union/geom_union.c b/lib/geom/union/geom_union.c new file mode 100644 index 000000000000..e1542e4b891c --- /dev/null +++ b/lib/geom/union/geom_union.c @@ -0,0 +1,83 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdint.h> +#include <libgeom.h> +#include <geom/union/g_union.h> + +#include "core/geom.h" + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_UNION_VERSION; + +struct g_command class_commands[] = { + { "create", G_FLAG_LOADKLD, NULL, + { + { 'o', "offset", "0", G_TYPE_NUMBER }, + { 's', "size", "0", G_TYPE_NUMBER }, + { 'S', "secsize", "0", G_TYPE_NUMBER }, + { 'v', "verbose", NULL, G_TYPE_BOOL }, + { 'Z', "gunionname", G_VAL_OPTIONAL, G_TYPE_STRING }, + G_OPT_SENTINEL + }, + "[-v] [-o offset] [-s size] [-S secsize] [-Z gunionname] " + "upperdev lowerdev" + }, + { "destroy", 0, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + { 'v', "verbose", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] prov ..." + }, + { "reset", 0, NULL, + { + { 'v', "verbose", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-v] prov ..." + }, + { "commit", 0, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + { 'r', "reboot", NULL, G_TYPE_BOOL }, + { 'v', "verbose", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-frv] prov ..." + }, + { "revert", 0, NULL, + { + { 'v', "verbose", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-v] prov ..." + }, + G_CMD_SENTINEL +}; diff --git a/lib/geom/union/gunion.8 b/lib/geom/union/gunion.8 new file mode 100644 index 000000000000..5cb20e975b21 --- /dev/null +++ b/lib/geom/union/gunion.8 @@ -0,0 +1,318 @@ +.\" +.\" Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com> +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd January 19, 2022 +.Dt GUNION 8 +.Os +.Sh NAME +.Nm gunion +.Nd "control utility for UNION GEOM class" +.Sh SYNOPSIS +.Nm +.Cm create +.Op Fl v +.Op Fl o Ar offset +.Op Fl s Ar size +.Op Fl S Ar secsize +.Op Fl Z Ar gunionname +.Ar upperdev lowerdev +.Nm +.Cm destroy +.Op Fl fv +.Ar prov ... +.Nm +.Cm reset +.Op Fl v +.Ar prov ... +.Nm +.Cm revert +.Op Fl v +.Ar prov ... +.Nm +.Cm commit +.Op Fl frv +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used to track changes to a read-only disk on a writable disk. +Logically, a writable disk is placed over a read-only disk. +Write requests are intercepted and stored on the writable disk. +Read requests are first checked to see if they have been written +on the top (writable disk) and if found are returned. +If they have not been written on the top disk, +then they are read from the lower disk. +.Pp +The +.Nm +utility can be especially useful if you have a large disk with a +corrupted filesystem that you are unsure of how to repair. +You can use +.Nm +to place another disk over the corrupted disk and then attempt +to repair the filesystem. +If the repair fails, you can revert all the changes in the upper disk +and be back to the unchanged state of the lower disk thus allowing you +to try another approach to repairing it. +If the repair is successful you can request that all the writes recorded +on the top disk be written to the lower disk. +.Pp +Another use of the +.Nm +utility is to try out upgrades to your system. +Place the upper disk over the disk holding your filesystem that +is to be upgraded and then run the upgrade on it. +If it works, commit it; +if it fails, revert the upgrade. +An example is given below. +.Pp +The upper disk must be at least the size of the disk that it covers. +The union metadata exists only for the +period of time that the union is instantiated, +so it is important to commit the updates before destroying the union. +If the top disk is about 2.5 percent larger for 512 byte sector disks +(or 0.5 percent larger for 4K sector disks) than the disk that it covers, +it is posible (thought not currently implemented) to save the union +metadata between instantiations of the union device. +.Pp +If you do not have physical media available to use for the upper layer, the +.Xr md 4 +disk can be used instead. +When used in +.Cm swap +mode the changes are all held in buffer memory. +Pages get pushed out to the swap when the system is under memory pressure, +otherwise they stay in the operating memory. +If long-term persistance is desired, +.Cm vnode +mode can be used in which a regular file is used as backing store. +The disk space used by the file is based on the amount of data that +is written to the top device. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width "destroy" +.It Cm create +Set up a union provider on the two given devices. +The first device given is used as the top device and must be writable. +The second device given is used as the bottom device and need only be readable. +The second device may be mounted read-only but it is recommended +that it be unmounted and accessed only through a mount of the union device. +If the operation succeeds, the new provider should appear with name +.Pa /dev/ Ns Ao Ar upperdev Ac Ns - Ns Ao Ar lowerdev Ac Ns Pa .union . +An alternate name can be specified with the +.Fl Z +flag. +The kernel module +.Pa geom_union.ko +will be loaded if it is not loaded already. +.Pp +Additional options include: +.Bl -tag -width "-Z gunionname" +.It Fl o Ar offset +Where to begin on the original provider. +The default is to start at the beginning of the disk (i.e., at offset 0). +This option may be used to skip over partitioning information stored +at the beginning of a disk. +The offset must be a multiple of the sector size. +.It Fl s Ar size +Size of the transparent provider. +The default is to be the same size as the lower disk. +Any extra space at the end of the upper disk may be used to store +union metadata. +.It Fl S Ar secsize +Sector size of the transparent provider. +The default is to be the same sector size as the lower disk. +.It Fl v +Be more verbose. +.It Fl Z Ar gunionname +The name of the new provider. +The suffix +.Dq .union +will be appended to the provider name. +.El +.It Cm destroy +Turn off the given union providers. +.Pp +Additional options include: +.Bl -tag -width "-f" +.It Fl f +Force the removal of the specified provider. +.It Fl v +Be more verbose. +.El +.It Cm revert +Discard all the changes made in the top layer thus reverting to the +original state of the lower device. +The union device may not be mounted or otherwise in use when a +.Cm revert +operation is being done. +.It Cm commit +Write all the changes made in the top device to the lower device +thus committing the lower device to have the same data as the union. +.Pp +Additional options include: +.Bl -tag -width "-f" +.It Fl f +The +.Cm commit +command will not allow the lower device to be mounted +or otherwise in use while the +.Cm commit +operation is being done. +However, the +.Fl f +flag may be specified to allow the lower device to be mounted read-only. +To prevent a filesystem panic on the mounted lower-device filesystem, +immediately after the +.Cm commit +operation finishes the lower-device filesystem should be unmounted +and then remounted to update its metadata state. +If the lower-device filesystem is currently being used as the root +filesystem then the +.Fl r +flag should be specified to reboot the system at the completion of the +.Cm commit +operation. +.It Fl r +Reboot the system at the completion of the +.Cm commit +operation. +.It Fl v +Be more verbose. +.El +.It Cm reset +Reset statistics for the given union providers. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +The following example shows how to create and destroy a +union provider with disks +.Pa /dev/da0p1 +as the read-only disk on the bottom and +.Pa /dev/md0 +as the wriable disk on the top. +.Bd -literal -offset indent +gunion create -v md0 da0p1 +mount /dev/md0-da0p1.union /mnt +.Ed +.Pp +Proceed to make changes in /mnt filesystem. +If they are successful and you want to keep them. +.Bd -literal -offset indent +umount /mnt +gunion commit -v md0-da0p1.union +.Ed +.Pp +If they are unsuccessful and you want to roll back. +.Bd -literal -offset indent +umount /mnt +gunion revert -v md0-da0p1.union +.Ed +.Pp +When done eliminate the union. +.Bd -literal -offset indent +umount /mnt +gunion destroy -v md0-da0p1.union +.Ed +.Pp +All uncommitted changes will be discarded when the union is destroyed. +.Pp +If you use the name of the full disk, for example +.Pa da0 +and it is labelled, +then a union name will appear for the disk as +.Pa md0-da0.union +as well as for each partition on the disk as +.Pa md0-da0p1.union , +.Pa md0-da0p2.union , +etc. +A commit operation can be done only on +.Pa md0-da0.union +and will commit changes to all the partitions. +If partition level commits are desired, +then a union must be created for each partition. +.Pp +The traffic statistics for the given +union providers can be obtained with the +.Cm list +command. +The example below shows the number of bytes written with +.Xr newfs 8 : +.Bd -literal -offset indent +gunion create md0 da0p1 +newfs /dev/md0-da0p1.union +gunion list +.Ed +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables can be used to control the behavior of the +.Nm UNION +GEOM class. +The default value is shown next to each variable. +.Bl -tag -width indent +.It Va kern.geom.union.debug : No 0 +Debug level of the +.Nm UNION +GEOM class. +This can be set to a number between 0 and 4 inclusive. +If set to 0, no debug information is printed. +If set to 1, all the verbose messages are logged. +If set to 2, addition error-related information is logged. +If set to 3, mapping operations are logged. +If set to 4, the maximum amount of debug information is printed. +.El +.Sh SEE ALSO +.Xr geom 4 , +.Xr geom 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 14.0 . +.Sh AUTHORS +.An Marshall Kirk McKusick Aq Mt mckusick@mckusick.com diff --git a/lib/geom/virstor/Makefile b/lib/geom/virstor/Makefile new file mode 100644 index 000000000000..63ea8e5ad31c --- /dev/null +++ b/lib/geom/virstor/Makefile @@ -0,0 +1,9 @@ +PACKAGE=geom +.PATH: ${SRCTOP}/sys/geom/virstor + +GEOM_CLASS= virstor + +SRCS+= binstream.c +SRCS+= g_virstor_md.c + +.include <bsd.lib.mk> diff --git a/lib/geom/virstor/Makefile.depend b/lib/geom/virstor/Makefile.depend new file mode 100644 index 000000000000..0dd05cace3c0 --- /dev/null +++ b/lib/geom/virstor/Makefile.depend @@ -0,0 +1,16 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/virstor/geom_virstor.c b/lib/geom/virstor/geom_virstor.c new file mode 100644 index 000000000000..5f5087e99213 --- /dev/null +++ b/lib/geom/virstor/geom_virstor.c @@ -0,0 +1,541 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2005 Ivan Voras <ivoras@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <errno.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> +#include <fcntl.h> +#include <unistd.h> +#include <libgeom.h> +#include <err.h> +#include <assert.h> + +#include <core/geom.h> +#include <misc/subr.h> + +#include <geom/virstor/g_virstor_md.h> +#include <geom/virstor/g_virstor.h> + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_VIRSTOR_VERSION; + +#define GVIRSTOR_CHUNK_SIZE "4M" +#define GVIRSTOR_VIR_SIZE "2T" + +#if G_LIB_VERSION == 1 +/* Support RELENG_6 */ +#define G_TYPE_BOOL G_TYPE_NONE +#endif + +/* + * virstor_main gets called by the geom(8) utility + */ +static void virstor_main(struct gctl_req *req, unsigned flags); + +struct g_command class_commands[] = { + { "clear", G_FLAG_VERBOSE, virstor_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { "dump", 0, virstor_main, G_NULL_OPTS, + "prov ..." + }, + { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, virstor_main, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL}, + { 'm', "chunk_size", GVIRSTOR_CHUNK_SIZE, G_TYPE_NUMBER}, + { 's', "vir_size", GVIRSTOR_VIR_SIZE, G_TYPE_NUMBER}, + G_OPT_SENTINEL + }, + "[-h] [-v] [-m chunk_size] [-s vir_size] name provider0 [provider1 ...]" + }, + { "destroy", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL}, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL}, + G_OPT_SENTINEL + }, + "[-fv] name ... (alias for \"destroy\")" + }, + { "add", G_FLAG_VERBOSE, NULL, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL}, + G_OPT_SENTINEL + }, + "[-vh] name prov [prov ...]" + }, + { "remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name ..." + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +/* Helper functions' declarations */ +static void virstor_clear(struct gctl_req *req); +static void virstor_dump(struct gctl_req *req); +static void virstor_label(struct gctl_req *req); + +/* Dispatcher function (no real work done here, only verbose flag recorder) */ +static void +virstor_main(struct gctl_req *req, unsigned flags) +{ + const char *name; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) + virstor_label(req); + else if (strcmp(name, "clear") == 0) + virstor_clear(req); + else if (strcmp(name, "dump") == 0) + virstor_dump(req); + else + gctl_error(req, "%s: Unknown command: %s.", __func__, name); + + /* No CTASSERT in userland + CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS); + */ +} + +/* + * Labels a new geom Meaning: parses and checks the parameters, calculates & + * writes metadata to the relevant providers so when the next round of + * "tasting" comes (which will be just after the provider(s) are closed) geom + * can be instantiated with the tasted metadata. + */ +static void +virstor_label(struct gctl_req *req) +{ + struct g_virstor_metadata md; + off_t msize; + unsigned char *sect; + unsigned int i; + size_t ssize, secsize; + const char *name; + char param[32]; + int hardcode, nargs, error; + struct virstor_map_entry *map; + size_t total_chunks, write_max_map_entries; + unsigned int map_chunks; /* Chunks needed by the map (map size). */ + size_t map_size; /* In bytes. */ + ssize_t written; + int fd; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 2) { + gctl_error(req, "Too few arguments (%d): expecting: name " + "provider0 [provider1 ...]", nargs); + return; + } + + hardcode = gctl_get_int(req, "hardcode"); + + /* + * Initialize constant parts of metadata: magic signature, version, + * name. + */ + bzero(&md, sizeof(md)); + strlcpy(md.md_magic, G_VIRSTOR_MAGIC, sizeof(md.md_magic)); + md.md_version = G_VIRSTOR_VERSION; + name = gctl_get_ascii(req, "arg0"); + if (name == NULL) { + gctl_error(req, "No 'arg%u' argument.", 0); + return; + } + strlcpy(md.md_name, name, sizeof(md.md_name)); + + md.md_virsize = (off_t)gctl_get_intmax(req, "vir_size"); + md.md_chunk_size = gctl_get_intmax(req, "chunk_size"); + md.md_count = nargs - 1; + + if (md.md_virsize == 0 || md.md_chunk_size == 0) { + gctl_error(req, "Virtual size and chunk size must be non-zero"); + return; + } + + msize = secsize = 0; + for (i = 1; i < (unsigned)nargs; i++) { + snprintf(param, sizeof(param), "arg%u", i); + name = gctl_get_ascii(req, "%s", param); + ssize = g_get_sectorsize(name); + if (ssize == 0) + fprintf(stderr, "%s for %s\n", strerror(errno), name); + msize += g_get_mediasize(name); + if (secsize == 0) + secsize = ssize; + else if (secsize != ssize) { + gctl_error(req, "Devices need to have same sector size " + "(%u on %s needs to be %u).", + (u_int)ssize, name, (u_int)secsize); + return; + } + } + + if (secsize == 0) { + gctl_error(req, "Device not specified"); + return; + } + + if (md.md_chunk_size % secsize != 0) { + size_t new_size = roundup(md.md_chunk_size, secsize); + fprintf(stderr, "Resizing chunk size to be a multiple of " + "sector size (%zu bytes).\n", secsize); + fprintf(stderr, "New chunk size: %zu kB\n", new_size / 1024); + md.md_chunk_size = new_size; + } + + if (md.md_virsize % md.md_chunk_size != 0) { + off_t chunk_count = md.md_virsize / md.md_chunk_size; + md.md_virsize = chunk_count * md.md_chunk_size; + fprintf(stderr, "Resizing virtual size to be a multiple of " + "chunk size.\n"); + fprintf(stderr, "New virtual size: %zu MB\n", + (size_t)(md.md_virsize / (1024 * 1024))); + } + + total_chunks = md.md_virsize / md.md_chunk_size; + map_size = total_chunks * sizeof(*map); + assert(md.md_virsize % md.md_chunk_size == 0); + + ssize = map_size % secsize; + if (ssize != 0) { + size_t add_chunks = (secsize - ssize) / sizeof(*map); + total_chunks += add_chunks; + md.md_virsize = (off_t)total_chunks * (off_t)md.md_chunk_size; + map_size = total_chunks * sizeof(*map); + fprintf(stderr, "Resizing virtual size to fit virstor " + "structures.\n"); + fprintf(stderr, "New virtual size: %ju MB (%zu new chunks)\n", + (uintmax_t)(md.md_virsize / (1024 * 1024)), add_chunks); + } + + if (verbose) + printf("Total virtual chunks: %zu (%zu MB each), %ju MB total " + "virtual size.\n", + total_chunks, (size_t)(md.md_chunk_size / (1024 * 1024)), + md.md_virsize/(1024 * 1024)); + + if ((off_t)md.md_virsize < msize) + fprintf(stderr, "WARNING: Virtual storage size < Physical " + "available storage (%ju < %ju)\n", md.md_virsize, msize); + + /* Clear last sector first to spoil all components if device exists. */ + if (verbose) + printf("Clearing metadata on"); + + for (i = 1; i < (unsigned)nargs; i++) { + snprintf(param, sizeof(param), "arg%u", i); + name = gctl_get_ascii(req, "%s", param); + + if (verbose) + printf(" %s", name); + + msize = g_get_mediasize(name); + ssize = g_get_sectorsize(name); + if (msize == 0 || ssize == 0) { + gctl_error(req, "Can't retrieve information about " + "%s: %s.", name, strerror(errno)); + return; + } + if (msize < (off_t) MAX(md.md_chunk_size*4, map_size)) + gctl_error(req, "Device %s is too small", name); + error = g_metadata_clear(name, NULL); + if (error != 0) { + gctl_error(req, "Can't clear metadata on %s: %s.", name, + strerror(error)); + return; + } + } + + + /* Write allocation table to the first provider - this needs to be done + * before metadata is written because when kernel tastes it it's too + * late */ + name = gctl_get_ascii(req, "arg1"); /* device with metadata */ + if (verbose) + printf(".\nWriting allocation table to %s...", name); + + /* How many chunks does the map occupy? */ + map_chunks = map_size/md.md_chunk_size; + if (map_size % md.md_chunk_size != 0) + map_chunks++; + if (verbose) { + printf(" (%zu MB, %d chunks) ", map_size/(1024*1024), map_chunks); + fflush(stdout); + } + + if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + fd = open(name, O_RDWR); + else { + sprintf(param, "%s%s", _PATH_DEV, name); + fd = open(param, O_RDWR); + } + if (fd < 0) { + gctl_error(req, "Cannot open provider %s to write map", name); + return; + } + + /* + * Initialize and write the map. Don't malloc the whole map at once, + * in case it's large. Use calloc because there might be a need to set + * up chunk flags in the future. + */ + write_max_map_entries = 1024 * 1024 / sizeof(*map); + if (write_max_map_entries > total_chunks) + write_max_map_entries = total_chunks; + map = calloc(write_max_map_entries, sizeof(*map)); + if (map == NULL) { + gctl_error(req, + "Out of memory (need %zu bytes for allocation map)", + write_max_map_entries * sizeof(*map)); + close(fd); + return; + } + for (size_t chunk = 0; chunk < total_chunks; + chunk += write_max_map_entries) { + size_t bytes_to_write, entries_to_write; + + entries_to_write = total_chunks - chunk; + if (entries_to_write > write_max_map_entries) + entries_to_write = write_max_map_entries; + bytes_to_write = entries_to_write * sizeof(*map); + for (size_t off = 0; off < bytes_to_write; off += written) { + written = write(fd, ((char *)map) + off, + bytes_to_write - off); + if (written < 0) { + if (verbose) { + fprintf(stderr, + "\nError writing map at offset " + "%zu of %zu: %s\n", + chunk * sizeof(*map) + off, + map_size, strerror(errno)); + } + gctl_error(req, + "Error writing out allocation map!"); + free(map); + close(fd); + return; + } + } + } + free(map); + map = NULL; + close (fd); + + if (verbose) + printf("\nStoring metadata on "); + + /* + * ID is randomly generated, unique for a geom. This is used to + * recognize all providers belonging to one geom. + */ + md.md_id = arc4random(); + + /* Ok, store metadata. */ + for (i = 1; i < (unsigned)nargs; i++) { + snprintf(param, sizeof(param), "arg%u", i); + name = gctl_get_ascii(req, "%s", param); + + msize = g_get_mediasize(name); + ssize = g_get_sectorsize(name); + + if (verbose) + printf("%s ", name); + + /* this provider's position/type in geom */ + md.no = i - 1; + /* this provider's size */ + md.provsize = msize; + /* chunk allocation info */ + md.chunk_count = md.provsize / md.md_chunk_size; + if (verbose) + printf("(%u chunks) ", md.chunk_count); + /* Check to make sure last sector is unused */ + if ((off_t)(md.chunk_count * md.md_chunk_size) > (off_t)(msize-ssize)) + md.chunk_count--; + md.chunk_next = 0; + if (i != 1) { + md.chunk_reserved = 0; + md.flags = 0; + } else { + md.chunk_reserved = map_chunks * 2; + md.flags = VIRSTOR_PROVIDER_ALLOCATED | + VIRSTOR_PROVIDER_CURRENT; + md.chunk_next = md.chunk_reserved; + if (verbose) + printf("(%u reserved) ", md.chunk_reserved); + } + + if (!hardcode) + bzero(md.provider, sizeof(md.provider)); + else { + /* convert "/dev/something" to "something" */ + if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) { + strlcpy(md.provider, name + sizeof(_PATH_DEV) - 1, + sizeof(md.provider)); + } else + strlcpy(md.provider, name, sizeof(md.provider)); + } + sect = calloc(ssize, sizeof(unsigned char)); + if (sect == NULL) + err(1, "Cannot allocate sector of %zu bytes", ssize); + virstor_metadata_encode(&md, sect); + error = g_metadata_store(name, sect, ssize); + free(sect); + if (error != 0) { + if (verbose) + printf("\n"); + fprintf(stderr, "Can't store metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, + "Not fully done (error storing metadata)."); + return; + } + } +#if 0 + if (verbose) + printf("\n"); +#endif +} + +/* Clears metadata on given provider(s) IF it's owned by us */ +static void +virstor_clear(struct gctl_req *req) +{ + const char *name; + char param[32]; + unsigned i; + int nargs, error; + int fd; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + for (i = 0; i < (unsigned)nargs; i++) { + snprintf(param, sizeof(param), "arg%u", i); + name = gctl_get_ascii(req, "%s", param); + + error = g_metadata_clear(name, G_VIRSTOR_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't clear metadata on %s: %s " + "(do I own it?)\n", name, strerror(error)); + gctl_error(req, + "Not fully done (can't clear metadata)."); + continue; + } + if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + fd = open(name, O_RDWR); + else { + sprintf(param, "%s%s", _PATH_DEV, name); + fd = open(param, O_RDWR); + } + if (fd < 0) { + gctl_error(req, "Cannot clear header sector for %s", + name); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +/* Print some metadata information */ +static void +virstor_metadata_dump(const struct g_virstor_metadata *md) +{ + printf(" Magic string: %s\n", md->md_magic); + printf(" Metadata version: %u\n", (u_int) md->md_version); + printf(" Device name: %s\n", md->md_name); + printf(" Device ID: %u\n", (u_int) md->md_id); + printf(" Provider index: %u\n", (u_int) md->no); + printf(" Active providers: %u\n", (u_int) md->md_count); + printf(" Hardcoded provider: %s\n", + md->provider[0] != '\0' ? md->provider : "(not hardcoded)"); + printf(" Virtual size: %u MB\n", + (unsigned int)(md->md_virsize/(1024 * 1024))); + printf(" Chunk size: %u kB\n", md->md_chunk_size / 1024); + printf(" Chunks on provider: %u\n", md->chunk_count); + printf(" Chunks free: %u\n", md->chunk_count - md->chunk_next); + printf(" Reserved chunks: %u\n", md->chunk_reserved); +} + +/* Called by geom(8) via gvirstor_main() to dump metadata information */ +static void +virstor_dump(struct gctl_req *req) +{ + struct g_virstor_metadata md; + u_char tmpmd[512]; /* temporary buffer */ + const char *name; + char param[16]; + int nargs, error, i; + + assert(sizeof(tmpmd) >= sizeof(md)); + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + for (i = 0; i < nargs; i++) { + snprintf(param, sizeof(param), "arg%u", i); + name = gctl_get_ascii(req, "%s", param); + + error = g_metadata_read(name, (u_char *) & tmpmd, sizeof(tmpmd), + G_VIRSTOR_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't read metadata from %s: %s.\n", + name, strerror(error)); + gctl_error(req, + "Not fully done (error reading metadata)."); + continue; + } + virstor_metadata_decode((u_char *) & tmpmd, &md); + printf("Metadata on %s:\n", name); + virstor_metadata_dump(&md); + printf("\n"); + } +} diff --git a/lib/geom/virstor/gvirstor.8 b/lib/geom/virstor/gvirstor.8 new file mode 100644 index 000000000000..436b60bef02c --- /dev/null +++ b/lib/geom/virstor/gvirstor.8 @@ -0,0 +1,297 @@ +.\" Copyright (c) 2006-2011 Ivan Voras <ivoras@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd October 1, 2013 +.Dt GVIRSTOR 8 +.Os +.Sh NAME +.Nm gvirstor +.Nd "control utility for virtual data storage devices" +.Sh SYNOPSIS +.Nm +.Cm label +.Op Fl hv +.Op Fl s Ar virsize +.Op Fl m Ar chunksize +.Ar name +.Ar prov ... +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm destroy +.Op Fl fv +.Ar name ... +.Nm +.Cm add +.Op Fl vh +.Ar name prov ... +.Nm +.Cm remove +.Op Fl v +.Ar name prov ... +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm dump +.Ar prov ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for setting up a virtual storage device of arbitrary +large size +.Pq for example, several TB , +consisting of an arbitrary number of physical storage devices with the +total size which is equal to or smaller than the virtual size. +Data for the virtual devices will be allocated from physical devices on +demand. +The idea behind +.Nm +is similar to the concept of Virtual Memory in operating systems, +effectively allowing users to overcommit on storage +.Pq free file system space . +The concept is also known as "thin provisioning" in virtualization +environments, only here it is implemented on the level of physical storage +devices. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm remove" +.It Cm label +Set up a virtual device from the given components with the specified +.Ar name . +Metadata is stored in the last sector of every component. +Argument +.Fl s Ar virsize +is the size of new virtual device, with default being set to 2 TiB +.Pq 2097152 MiB . +Argument +.Fl m Ar chunksize +is the chunk size, with default being set to 4 MiB +.Pq 4096 KiB . +The default arguments are thus +.Qq Fl s Ar 2097152 Fl m Ar 4096 . +.It Cm stop +Turn off an existing virtual device with the given +.Ar name . +This command does not touch on-disk metadata. +As with other GEOM classes, stopped geoms cannot be started manually. +.It Cm destroy +Same as +.Cm stop. +.It Cm add +Adds new components to existing virtual device with the given +.Ar name . +The specified virstor device must exist and be active +.Pq i.e. module loaded, device present in Pa /dev . +This action can be safely performed while the virstor device is in use +.Pq Qo hot Qc operation . +.It Cm remove +Removes components from existing virtual device with the given +.Ar name . +Only unallocated providers can be removed. +.It Cm clear +Clear metadata on the given providers. +.It Cm dump +Dump metadata stored on the given providers. +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options: +.Bl -tag -width ".Fl f" +.It Fl f +Force the removal of the specified virtual device. +.It Fl h +Hardcode providers' names in metadata. +.It Fl v +Be more verbose. +.El +.Sh EXAMPLES +The following example shows how to create a virtual device of default size +.Pq 2 TiB , +of default chunk +.Pq extent +size +.Pq 4 MiB , +with two physical devices for backing storage. +.Bd -literal -offset indent +.No gvirstor label -v Ar mydata Ar /dev/ada4 Ar /dev/ada6 +.No newfs Ar /dev/virstor/mydata +.Ed +.Pp +From now on, the virtual device will be available via the +.Pa /dev/virstor/mydata +device entry. +To add a new physical device / component to an active virstor device: +.Bd -literal -offset indent +.No gvirstor add Ar mydata Ar ada8 +.Ed +.Pp +This will add physical storage of +.Ar ada8 +to +.Pa /dev/virstor/mydata +device. +.Pp +To see the device status information +.Pq including how much physical storage is still available for the virtual device , +use: +.Bd -literal -offset indent +gvirstor list +.Ed +.Pp +All standard +.Xr geom 8 +subcommands +.Pq e.g. Cm status , Cm help +are also supported. +.Sh SYSCTL VARIABLES +.Nm +has several +.Xr sysctl 8 +tunable variables. +.Bd -literal -offset indent +.Va int kern.geom.virstor.debug +.Ed +.Pp +This sysctl controls verbosity of the kernel module, in the range +1 to 15. +Messages that are marked with higher verbosity levels than this are +suppressed. +Default value is 5 and it is not recommended to set this tunable to less +than 2, because level 1 messages are error events, and level 2 messages +are system warnings. +.Bd -literal -offset indent +.Va int kern.geom.virstor.chunk_watermark +.Ed +.Pp +Value in this sysctl sets warning watermark level for physical chunk +usage on a single component. +The warning is issued when a virstor component has less than this many +free chunks +.Pq default 100 . +.Bd -literal -offset indent +.Va int kern.geom.virstor.component_watermark +.Ed +.Pp +Value in this sysctl sets warning watermark level for component usage. +The warning is issued when there are less than this many unallocated +components +.Pq default is 1 . +.Pp +All these sysctls are also available as +.Xr loader 8 +tunables. +.Sh DIAGNOSTICS +.Ex -std +.Pp +.Nm +kernel module issues log messages with prefixes in standardized format, +which is useful for log message filtering and dispatching. +Each message line begins with +.Bd -literal -offset indent +.Li GEOM_VIRSTOR[%d]: +.Ed +.Pp +The number +.Pq %d +is message verbosity / importance level, in the range 1 to 15. +If a message filtering, dispatching or operator alert system is used, it +is recommended that messages with levels 1 and 2 be taken seriously +.Pq for example, to catch out-of-space conditions as set by watermark +sysctls. +.Sh SEE ALSO +.Xr geom 4 , +.Xr fstab 5 , +.Xr geom 8 , +.Xr glabel 8 , +.Xr newfs 8 +.Sh HISTORY +The +.Nm +utility first appeared in +.Fx 7.0 . +.Sh AUTHORS +.An Ivan Voras Aq Mt ivoras@FreeBSD.org +.Pp +Sponsored by Google Summer of Code 2006. +.Sh BUGS +Commands +.Cm add +and +.Cm remove +contain unavoidable critical sections which may make the virstor +device unusable if a power failure +.Pq or other disruptive event +happens during their execution. +It is recommended to run them when the system is quiescent. +.Sh ASSUMPTIONS AND INTERACTION WITH FILE SYSTEMS +There are several assumptions that +.Nm +has in its operation: that the size of the virtual storage device will not +change once it is set, and that the sizes of individual physical storage +components will always remain constant during their existence. +For alternative ways to implement virtual or resizable file systems see +.Xr zfs 1M , +.Xr gconcat 8 +and +.Xr growfs 8 . +.Pp +Note that +.Nm +has nontrivial interaction with file systems which initialize a large +number of on-disk structures during newfs. +If such file systems attempt to spread their structures across the drive +media +.Pq like UFS/UFS2 does , +their efforts will be effectively foiled by sequential allocation of +chunks in +.Nm +and all their structures will be physically allocated at the start +of the first virstor component. +This could have a significant impact on file system performance +.Pq which can in some rare cases be even positive . |