diff options
Diffstat (limited to 'lib/geom/raid3')
-rw-r--r-- | lib/geom/raid3/Makefile | 10 | ||||
-rw-r--r-- | lib/geom/raid3/Makefile.depend | 20 | ||||
-rw-r--r-- | lib/geom/raid3/geom_raid3.c | 338 | ||||
-rw-r--r-- | lib/geom/raid3/graid3.8 | 257 |
4 files changed, 625 insertions, 0 deletions
diff --git a/lib/geom/raid3/Makefile b/lib/geom/raid3/Makefile new file mode 100644 index 000000000000..2fdf4e8f8167 --- /dev/null +++ b/lib/geom/raid3/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ + +PACKAGE=runtime +.PATH: ${.CURDIR:H:H}/misc + +GEOM_CLASS= raid3 + +LIBADD= md + +.include <bsd.lib.mk> diff --git a/lib/geom/raid3/Makefile.depend b/lib/geom/raid3/Makefile.depend new file mode 100644 index 000000000000..7902e1927044 --- /dev/null +++ b/lib/geom/raid3/Makefile.depend @@ -0,0 +1,20 @@ +# $FreeBSD$ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + gnu/lib/csu \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libgeom \ + lib/libmd \ + sbin/geom/core \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/lib/geom/raid3/geom_raid3.c b/lib/geom/raid3/geom_raid3.c new file mode 100644 index 000000000000..17d3187d5cf4 --- /dev/null +++ b/lib/geom/raid3/geom_raid3.c @@ -0,0 +1,338 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <errno.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> +#include <assert.h> +#include <libgeom.h> +#include <geom/raid3/g_raid3.h> +#include <core/geom.h> +#include <misc/subr.h> + + +uint32_t lib_version = G_LIB_VERSION; +uint32_t version = G_RAID3_VERSION; + +static void raid3_main(struct gctl_req *req, unsigned f); +static void raid3_clear(struct gctl_req *req); +static void raid3_dump(struct gctl_req *req); +static void raid3_label(struct gctl_req *req); + +struct g_command class_commands[] = { + { "clear", G_FLAG_VERBOSE, raid3_main, G_NULL_OPTS, + "[-v] prov ..." + }, + { "configure", G_FLAG_VERBOSE, NULL, + { + { 'a', "autosync", NULL, G_TYPE_BOOL }, + { 'd', "dynamic", NULL, G_TYPE_BOOL }, + { 'f', "failsync", NULL, G_TYPE_BOOL }, + { 'F', "nofailsync", NULL, G_TYPE_BOOL }, + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 'n', "noautosync", NULL, G_TYPE_BOOL }, + { 'r', "round_robin", NULL, G_TYPE_BOOL }, + { 'R', "noround_robin", NULL, G_TYPE_BOOL }, + { 'w', "verify", NULL, G_TYPE_BOOL }, + { 'W', "noverify", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-adfFhnrRvwW] name" + }, + { "dump", 0, raid3_main, G_NULL_OPTS, + "prov ..." + }, + { "insert", G_FLAG_VERBOSE, NULL, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 'n', "number", G_VAL_OPTIONAL, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-hv] <-n number> name prov" + }, + { "label", G_FLAG_VERBOSE, raid3_main, + { + { 'h', "hardcode", NULL, G_TYPE_BOOL }, + { 'F', "nofailsync", NULL, G_TYPE_BOOL }, + { 'n', "noautosync", NULL, G_TYPE_BOOL }, + { 'r', "round_robin", NULL, G_TYPE_BOOL }, + { 's', "sectorsize", "0", G_TYPE_NUMBER }, + { 'w', "verify", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-hFnrvw] [-s blocksize] name prov prov prov ..." + }, + { "rebuild", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov" + }, + { "remove", G_FLAG_VERBOSE, NULL, + { + { 'n', "number", NULL, G_TYPE_NUMBER }, + G_OPT_SENTINEL + }, + "[-v] <-n number> name" + }, + { "stop", G_FLAG_VERBOSE, NULL, + { + { 'f', "force", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-fv] name ..." + }, + G_CMD_SENTINEL +}; + +static int verbose = 0; + +static void +raid3_main(struct gctl_req *req, unsigned flags) +{ + const char *name; + + if ((flags & G_FLAG_VERBOSE) != 0) + verbose = 1; + + name = gctl_get_ascii(req, "verb"); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", "verb"); + return; + } + if (strcmp(name, "label") == 0) + raid3_label(req); + else if (strcmp(name, "clear") == 0) + raid3_clear(req); + else if (strcmp(name, "dump") == 0) + raid3_dump(req); + else + gctl_error(req, "Unknown command: %s.", name); +} + +static void +raid3_label(struct gctl_req *req) +{ + struct g_raid3_metadata md; + u_char sector[512]; + const char *str; + unsigned sectorsize, ssize; + off_t mediasize, msize; + int hardcode, round_robin, verify; + int error, i, nargs; + + bzero(sector, sizeof(sector)); + nargs = gctl_get_int(req, "nargs"); + if (nargs < 4) { + gctl_error(req, "Too few arguments."); + return; + } + if (bitcount32(nargs - 2) != 1) { + gctl_error(req, "Invalid number of components."); + return; + } + + strlcpy(md.md_magic, G_RAID3_MAGIC, sizeof(md.md_magic)); + md.md_version = G_RAID3_VERSION; + str = gctl_get_ascii(req, "arg0"); + strlcpy(md.md_name, str, sizeof(md.md_name)); + md.md_id = arc4random(); + md.md_all = nargs - 1; + md.md_mflags = 0; + md.md_dflags = 0; + md.md_genid = 0; + md.md_syncid = 1; + md.md_sync_offset = 0; + if (gctl_get_int(req, "noautosync")) + md.md_mflags |= G_RAID3_DEVICE_FLAG_NOAUTOSYNC; + if (gctl_get_int(req, "nofailsync")) + md.md_mflags |= G_RAID3_DEVICE_FLAG_NOFAILSYNC; + round_robin = gctl_get_int(req, "round_robin"); + if (round_robin) + md.md_mflags |= G_RAID3_DEVICE_FLAG_ROUND_ROBIN; + verify = gctl_get_int(req, "verify"); + if (verify) + md.md_mflags |= G_RAID3_DEVICE_FLAG_VERIFY; + if (round_robin && verify) { + gctl_error(req, "Both '%c' and '%c' options given.", 'r', 'w'); + return; + } + hardcode = gctl_get_int(req, "hardcode"); + + /* + * Calculate sectorsize by finding least common multiple from + * sectorsizes of every disk and find the smallest mediasize. + */ + mediasize = 0; + sectorsize = gctl_get_intmax(req, "sectorsize"); + for (i = 1; i < nargs; i++) { + str = gctl_get_ascii(req, "arg%d", i); + msize = g_get_mediasize(str); + ssize = g_get_sectorsize(str); + if (msize == 0 || ssize == 0) { + gctl_error(req, "Can't get informations about %s: %s.", + str, strerror(errno)); + return; + } + msize -= ssize; + if (mediasize == 0 || (mediasize > 0 && msize < mediasize)) + mediasize = msize; + if (sectorsize == 0) + sectorsize = ssize; + else + sectorsize = g_lcm(sectorsize, ssize); + } + md.md_mediasize = mediasize * (nargs - 2); + md.md_sectorsize = sectorsize * (nargs - 2); + md.md_mediasize -= (md.md_mediasize % md.md_sectorsize); + + if (md.md_sectorsize > MAXPHYS) { + gctl_error(req, "The blocksize is too big."); + return; + } + + /* + * Clear last sector first, to spoil all components if device exists. + */ + for (i = 1; i < nargs; i++) { + str = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(str, NULL); + if (error != 0) { + gctl_error(req, "Can't store metadata on %s: %s.", str, + strerror(error)); + return; + } + } + + /* + * Ok, store metadata (use disk number as priority). + */ + for (i = 1; i < nargs; i++) { + str = gctl_get_ascii(req, "arg%d", i); + msize = g_get_mediasize(str); + ssize = g_get_sectorsize(str); + if (mediasize < msize - ssize) { + fprintf(stderr, + "warning: %s: only %jd bytes from %jd bytes used.\n", + str, (intmax_t)mediasize, (intmax_t)(msize - ssize)); + } + + md.md_no = i - 1; + md.md_provsize = msize; + if (!hardcode) + bzero(md.md_provider, sizeof(md.md_provider)); + else { + if (strncmp(str, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + str += sizeof(_PATH_DEV) - 1; + strlcpy(md.md_provider, str, sizeof(md.md_provider)); + } + if (verify && md.md_no == md.md_all - 1) { + /* + * In "verify" mode, force synchronization of parity + * component on start. + */ + md.md_syncid = 0; + } + raid3_metadata_encode(&md, sector); + error = g_metadata_store(str, sector, sizeof(sector)); + if (error != 0) { + fprintf(stderr, "Can't store metadata on %s: %s.\n", + str, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata value stored on %s.\n", str); + } +} + +static void +raid3_clear(struct gctl_req *req) +{ + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_clear(name, G_RAID3_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't clear metadata on %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (verbose) + printf("Metadata cleared on %s.\n", name); + } +} + +static void +raid3_dump(struct gctl_req *req) +{ + struct g_raid3_metadata md, tmpmd; + const char *name; + int error, i, nargs; + + nargs = gctl_get_int(req, "nargs"); + if (nargs < 1) { + gctl_error(req, "Too few arguments."); + return; + } + + for (i = 0; i < nargs; i++) { + name = gctl_get_ascii(req, "arg%d", i); + error = g_metadata_read(name, (u_char *)&tmpmd, sizeof(tmpmd), + G_RAID3_MAGIC); + if (error != 0) { + fprintf(stderr, "Can't read metadata from %s: %s.\n", + name, strerror(error)); + gctl_error(req, "Not fully done."); + continue; + } + if (raid3_metadata_decode((u_char *)&tmpmd, &md) != 0) { + fprintf(stderr, "MD5 hash mismatch for %s, skipping.\n", + name); + gctl_error(req, "Not fully done."); + continue; + } + printf("Metadata on %s:\n", name); + raid3_metadata_dump(&md); + printf("\n"); + } +} diff --git a/lib/geom/raid3/graid3.8 b/lib/geom/raid3/graid3.8 new file mode 100644 index 000000000000..426c94d17521 --- /dev/null +++ b/lib/geom/raid3/graid3.8 @@ -0,0 +1,257 @@ +.\" Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd January 15, 2012 +.Dt GRAID3 8 +.Os +.Sh NAME +.Nm graid3 +.Nd "control utility for RAID3 devices" +.Sh SYNOPSIS +.Nm +.Cm label +.Op Fl Fhnrvw +.Op Fl s Ar blocksize +.Ar name +.Ar prov prov prov ... +.Nm +.Cm clear +.Op Fl v +.Ar prov ... +.Nm +.Cm configure +.Op Fl adfFhnrRvwW +.Ar name +.Nm +.Cm rebuild +.Op Fl v +.Ar name +.Ar prov +.Nm +.Cm insert +.Op Fl hv +.Op Fl n Ar number +.Ar name +.Ar prov +.Nm +.Cm remove +.Op Fl v +.Fl n Ar number +.Ar name +.Nm +.Cm stop +.Op Fl fv +.Ar name ... +.Nm +.Cm list +.Nm +.Cm status +.Nm +.Cm load +.Nm +.Cm unload +.Sh DESCRIPTION +The +.Nm +utility is used for RAID3 array configuration. +After a device is created, all components are detected and configured +automatically. +All operations such as failure detection, stale component detection, rebuild +of stale components, etc.\& are also done automatically. +The +.Nm +utility uses on-disk metadata (the provider's last sector) to store all needed +information. +.Pp +The first argument to +.Nm +indicates an action to be performed: +.Bl -tag -width ".Cm configure" +.It Cm label +Create a RAID3 device. +The last given component will contain parity data, whilst the others +will all contain regular data. +The number of components must be equal to 3, 5, 9, 17, etc.\& (2^n + 1). +.Pp +Additional options include: +.Bl -tag -width ".Fl h" +.It Fl F +Do not synchronize after a power failure or system crash. +Assumes device is in consistent state. +.It Fl h +Hardcode providers' names in metadata. +.It Fl n +Turn off autosynchronization of stale components. +.It Fl r +Use parity component for reading in round-robin fashion. +Without this option the parity component is not used at all for reading operations +when the device is in a complete state. +With this option specified random I/O read operations are even 40% faster, +but sequential reads are slower. +One cannot use this option if the +.Fl w +option is also specified. +.It Fl s +Manually specify array block size. Block size will be set equal to least +common multiple of all component's sector sizes and specified value. +Note that array sector size calculated as multiple of block size and number +of regular data components. Big values may decrease performance and compatibility, +as all I/O requests have to be multiple of sector size. +.It Fl w +Use verify reading feature. +When reading from a device in a complete state, also read data from the parity component +and verify the data by comparing XORed regular data with parity data. +If verification fails, an +.Er EIO +error is returned and the value of the +.Va kern.geom.raid3.stat.parity_mismatch +sysctl is increased. +One cannot use this option if the +.Fl r +option is also specified. +.El +.It Cm clear +Clear metadata on the given providers. +.It Cm configure +Configure the given device. +.Pp +Additional options include: +.Bl -tag -width ".Fl a" +.It Fl a +Turn on autosynchronization of stale components. +.It Fl d +Do not hardcode providers' names in metadata. +.It Fl f +Synchronize device after a power failure or system crash. +.It Fl F +Do not synchronize after a power failure or system crash. +Assumes device is in consistent state. +.It Fl h +Hardcode providers' names in metadata. +.It Fl n +Turn off autosynchronization of stale components. +.It Fl r +Turn on round-robin reading. +.It Fl R +Turn off round-robin reading. +.It Fl w +Turn on verify reading. +.It Fl W +Turn off verify reading. +.El +.It Cm rebuild +Rebuild the given component forcibly. +If autosynchronization was not turned off for the given device, this command +should be unnecessary. +.It Cm insert +Add the given component to the existing array, if one of the components was +removed previously with the +.Cm remove +command or if one component is missing and will not be connected again. +If no number is given, new component will be added instead of first missed +component. +.Pp +Additional options include: +.Bl -tag -width ".Fl h" +.It Fl h +Hardcode providers' names in metadata. +.El +.It Cm remove +Remove the given component from the given array and clear metadata on it. +.It Cm stop +Stop the given arrays. +.Pp +Additional options include: +.Bl -tag -width ".Fl f" +.It Fl f +Stop the given array even if it is opened. +.El +.It Cm list +See +.Xr geom 8 . +.It Cm status +See +.Xr geom 8 . +.It Cm load +See +.Xr geom 8 . +.It Cm unload +See +.Xr geom 8 . +.El +.Pp +Additional options include: +.Bl -tag -width ".Fl v" +.It Fl v +Be more verbose. +.El +.Sh EXIT STATUS +Exit status is 0 on success, and 1 if the command fails. +.Sh EXAMPLES +Use 3 disks to setup a RAID3 array (with the round-robin reading feature). +Create a file system, mount it, then unmount it and stop device: +.Bd -literal -offset indent +graid3 label -v -r data da0 da1 da2 +newfs /dev/raid3/data +mount /dev/raid3/data /mnt +\&... +umount /mnt +graid3 stop data +graid3 unload +.Ed +.Pp +Create a RAID3 array, but do not use the automatic synchronization feature. +Rebuild parity component: +.Bd -literal -offset indent +graid3 label -n data da0 da1 da2 +graid3 rebuild data da2 +.Ed +.Pp +Replace one data disk with a brand new one: +.Bd -literal -offset indent +graid3 remove -n 0 data +graid3 insert -n 0 data da5 +.Ed +.Sh SEE ALSO +.Xr geom 4 , +.Xr geom 8 , +.Xr gvinum 8 , +.Xr mount 8 , +.Xr newfs 8 , +.Xr umount 8 +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 5.3 . +.Sh AUTHORS +.An Pawel Jakub Dawidek Aq Mt pjd@FreeBSD.org +.Sh BUGS +There should be a section with an implementation description. +.Pp +Documentation for sysctls +.Va kern.geom.raid3.* +is missing. |