diff options
Diffstat (limited to 'cddl/contrib/opensolaris/cmd/zpool')
-rw-r--r-- | cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 | 577 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/cmd/zpool/zpool.8 | 2157 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c | 255 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/cmd/zpool/zpool_main.c | 6153 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/cmd/zpool/zpool_util.c | 86 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/cmd/zpool/zpool_util.h | 73 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c | 1578 |
7 files changed, 10879 insertions, 0 deletions
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 b/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 new file mode 100644 index 000000000000..18e242129314 --- /dev/null +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 @@ -0,0 +1,577 @@ +'\" te +.\" Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. +.\" All Rights Reserved. +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" Copyright (c) 2012, 2017 by Delphix. All rights reserved. +.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. +.\" Copyright (c) 2013, Joyent, Inc. All rights reserved. +.\" +.\" $FreeBSD$ +.\" +.Dd June 7, 2017 +.Dt ZPOOL-FEATURES 7 +.Os +.Sh NAME +.Nm zpool-features +.Nd ZFS pool feature descriptions +.Sh DESCRIPTION +ZFS pool on\-disk format versions are specified via "features" which replace +the old on\-disk format numbers (the last supported on\-disk format number is +28). +To enable a feature on a pool use the +.Cm upgrade +subcommand of the +.Xr zpool 8 +command, or set the +.Sy feature@feature_name +property to +.Ar enabled . +.Pp +The pool format does not affect file system version compatibility or the ability +to send file systems between pools. +.Pp +Since most features can be enabled independently of each other the on\-disk +format of the pool is specified by the set of all features marked as +.Sy active +on the pool. +If the pool was created by another software version this set may +include unsupported features. +.Ss Identifying features +Every feature has a guid of the form +.Sy com.example:feature_name . +The reverse DNS name ensures that the feature's guid is unique across all ZFS +implementations. +When unsupported features are encountered on a pool they will +be identified by their guids. +Refer to the documentation for the ZFS implementation that created the pool +for information about those features. +.Pp +Each supported feature also has a short name. +By convention a feature's short name is the portion of its guid which follows +the ':' (e.g. +.Sy com.example:feature_name +would have the short name +.Sy feature_name ), +however a feature's short name may differ across ZFS implementations if +following the convention would result in name conflicts. +.Ss Feature states +Features can be in one of three states: +.Bl -tag -width "XXXXXXXX" +.It Sy active +This feature's on\-disk format changes are in effect on the pool. +Support for this feature is required to import the pool in read\-write mode. +If this feature is not read-only compatible, support is also required to +import the pool in read\-only mode (see "Read\-only compatibility"). +.It Sy enabled +An administrator has marked this feature as enabled on the pool, but the +feature's on\-disk format changes have not been made yet. +The pool can still be imported by software that does not support this feature, +but changes may be made to the on\-disk format at any time which will move +the feature to the +.Sy active +state. +Some features may support returning to the +.Sy enabled +state after becoming +.Sy active . +See feature\-specific documentation for details. +.It Sy disabled +This feature's on\-disk format changes have not been made and will not be made +unless an administrator moves the feature to the +.Sy enabled +state. +Features cannot be disabled once they have been enabled. +.El +.Pp +The state of supported features is exposed through pool properties of the form +.Sy feature@short_name . +.Ss Read\-only compatibility +Some features may make on\-disk format changes that do not interfere with other +software's ability to read from the pool. +These features are referred to as "read\-only compatible". +If all unsupported features on a pool are read\-only compatible, the pool can +be imported in read\-only mode by setting the +.Sy readonly +property during import (see +.Xr zpool 8 +for details on importing pools). +.Ss Unsupported features +For each unsupported feature enabled on an imported pool a pool property +named +.Sy unsupported@feature_guid +will indicate why the import was allowed despite the unsupported feature. +Possible values for this property are: +.Bl -tag -width "XXXXXXXX" +.It Sy inactive +The feature is in the +.Sy enabled +state and therefore the pool's on\-disk format is still compatible with +software that does not support this feature. +.It Sy readonly +The feature is read\-only compatible and the pool has been imported in +read\-only mode. +.El +.Ss Feature dependencies +Some features depend on other features being enabled in order to function +properly. +Enabling a feature will automatically enable any features it depends on. +.Sh FEATURES +The following features are supported on this system: +.Bl -tag -width "XXXXXXXX" +.It Sy async_destroy +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:async_destroy" +.It GUID Ta com.delphix:async_destroy +.It READ\-ONLY COMPATIBLE Ta yes +.It DEPENDENCIES Ta none +.El +.Pp +Destroying a file system requires traversing all of its data in order to +return its used space to the pool. +Without +.Sy async_destroy +the file system is not fully removed until all space has been reclaimed. +If the destroy operation is interrupted by a reboot or power outage the next +attempt to open the pool will need to complete the destroy operation +synchronously. +.Pp +When +.Sy async_destroy +is enabled the file system's data will be reclaimed by a background process, +allowing the destroy operation to complete without traversing the entire file +system. +The background process is able to resume interrupted destroys after the pool +has been opened, eliminating the need to finish interrupted destroys as part +of the open operation. +The amount of space remaining to be reclaimed by the background process is +available through the +.Sy freeing +property. +.Pp +This feature is only +.Sy active +while +.Sy freeing +is non\-zero. +.It Sy empty_bpobj +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:empty_bpobj" +.It GUID Ta com.delphix:empty_bpobj +.It READ\-ONLY COMPATIBLE Ta yes +.It DEPENDENCIES Ta none +.El +.Pp +This feature increases the performance of creating and using a large number +of snapshots of a single filesystem or volume, and also reduces the disk +space required. +.Pp +When there are many snapshots, each snapshot uses many Block Pointer Objects +.Pq bpobj's +to track blocks associated with that snapshot. +However, in common use cases, most of these bpobj's are empty. +This feature allows us to create each bpobj on-demand, thus eliminating the +empty bpobjs. +.Pp +This feature is +.Sy active +while there are any filesystems, volumes, or snapshots which were created +after enabling this feature. +.It Sy filesystem_limits +.Bl -column "READ\-ONLY COMPATIBLE" "com.joyent:filesystem_limits" +.It GUID Ta com.joyent:filesystem_limits +.It READ\-ONLY COMPATIBLE Ta yes +.It DEPENDENCIES Ta extensible_dataset +.El +.Pp +This feature enables filesystem and snapshot limits. +These limits can be used +to control how many filesystems and/or snapshots can be created at the point in +the tree on which the limits are set. +.Pp +This feature is +.Sy active +once either of the limit properties has been +set on a dataset. +Once activated the feature is never deactivated. +.It Sy lz4_compress +.Bl -column "READ\-ONLY COMPATIBLE" "org.illumos:lz4_compress" +.It GUID Ta org.illumos:lz4_compress +.It READ\-ONLY COMPATIBLE Ta no +.It DEPENDENCIES Ta none +.El +.Pp +.Sy lz4 +is a high-performance real-time compression algorithm that +features significantly faster compression and decompression as well as a +higher compression ratio than the older +.Sy lzjb +compression. +Typically, +.Sy lz4 +compression is approximately 50% faster on +compressible data and 200% faster on incompressible data than +.Sy lzjb . +It is also approximately 80% faster on decompression, while +giving approximately 10% better compression ratio. +.Pp +When the +.Sy lz4_compress +feature is set to +.Sy enabled , +the +administrator can turn on +.Sy lz4 +compression on any dataset on the +pool using the +.Xr zfs 8 +command. +Also, all newly written metadata +will be compressed with +.Sy lz4 +algorithm. +Since this feature is not read-only compatible, this +operation will render the pool unimportable on systems without support +for the +.Sy lz4_compress +feature. +Booting off of +.Sy lz4 +-compressed root pools is supported. +.Pp +This feature becomes +.Sy active +as soon as it is enabled and will +never return to being +.Sy enabled . +.It Sy multi_vdev_crash_dump +.Bl -column "READ\-ONLY COMPATIBLE" "com.joyent:multi_vdev_crash_dump" +.It GUID Ta com.joyent:multi_vdev_crash_dump +.It READ\-ONLY COMPATIBLE Ta no +.It DEPENDENCIES Ta none +.El +.Pp +This feature allows a dump device to be configured with a pool comprised +of multiple vdevs. +Those vdevs may be arranged in any mirrored or raidz +configuration. +.\" TODO: this is not yet supported on FreeBSD. +.\" .Pp +.\" When the +.\" .Sy multi_vdev_crash_dump +.\" feature is set to +.\" .Sy enabled , +.\" the administrator can use the +.\" .Xr dumpon 8 +.\" command to configure a +.\" dump device on a pool comprised of multiple vdevs. +.It Sy spacemap_histogram +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:spacemap_histogram" +.It GUID Ta com.delphix:spacemap_histogram +.It READ\-ONLY COMPATIBLE Ta yes +.It DEPENDENCIES Ta none +.El +.Pp +This features allows ZFS to maintain more information about how free space +is organized within the pool. +If this feature is +.Sy enabled , +ZFS will +set this feature to +.Sy active +when a new space map object is created or +an existing space map is upgraded to the new format. +Once the feature is +.Sy active , +it will remain in that state until the pool is destroyed. +.It Sy extensible_dataset +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:extensible_dataset" +.It GUID Ta com.delphix:extensible_dataset +.It READ\-ONLY COMPATIBLE Ta no +.It DEPENDENCIES Ta none +.El +.Pp +This feature allows more flexible use of internal ZFS data structures, +and exists for other features to depend on. +.Pp +This feature will be +.Sy active +when the first dependent feature uses it, +and will be returned to the +.Sy enabled +state when all datasets that use +this feature are destroyed. +.It Sy bookmarks +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:bookmarks" +.It GUID Ta com.delphix:bookmarks +.It READ\-ONLY COMPATIBLE Ta yes +.It DEPENDENCIES Ta extensible_dataset +.El +.Pp +This feature enables use of the +.Nm zfs +.Cm bookmark +subcommand. +.Pp +This feature is +.Sy active +while any bookmarks exist in the pool. +All bookmarks in the pool can be listed by running +.Nm zfs +.Cm list +.Fl t No bookmark Fl r Ar poolname . +.It Sy enabled_txg +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:enabled_txg" +.It GUID Ta com.delphix:enabled_txg +.It READ\-ONLY COMPATIBLE Ta yes +.It DEPENDENCIES Ta none +.El +.Pp +Once this feature is enabled ZFS records the transaction group number +in which new features are enabled. +This has no user-visible impact, +but other features may depend on this feature. +.Pp +This feature becomes +.Sy active +as soon as it is enabled and will +never return to being +.Sy enabled . +.It Sy hole_birth +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:hole_birth" +.It GUID Ta com.delphix:hole_birth +.It READ\-ONLY COMPATIBLE Ta no +.It DEPENDENCIES Ta enabled_txg +.El +.Pp +This feature improves performance of incremental sends +.Pq Dq zfs send -i +and receives for objects with many holes. +The most common case of +hole-filled objects is zvols. +.Pp +An incremental send stream from snapshot +.Sy A +to snapshot +.Sy B +contains information about every block that changed between +.Sy A +and +.Sy B . +Blocks which did not change between those snapshots can be +identified and omitted from the stream using a piece of metadata called +the 'block birth time', but birth times are not recorded for holes +.Pq blocks filled only with zeroes . +Since holes created after +.Sy A +cannot be +distinguished from holes created before +.Sy A , +information about every +hole in the entire filesystem or zvol is included in the send stream. +.Pp +For workloads where holes are rare this is not a problem. +However, when +incrementally replicating filesystems or zvols with many holes +.Pq for example a zvol formatted with another filesystem +a lot of time will +be spent sending and receiving unnecessary information about holes that +already exist on the receiving side. +.Pp +Once the +.Sy hole_birth +feature has been enabled the block birth times +of all new holes will be recorded. +Incremental sends between snapshots +created after this feature is enabled will use this new metadata to avoid +sending information about holes that already exist on the receiving side. +.Pp +This feature becomes +.Sy active +as soon as it is enabled and will +never return to being +.Sy enabled . +.It Sy embedded_data +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:embedded_data" +.It GUID Ta com.delphix:embedded_data +.It READ\-ONLY COMPATIBLE Ta no +.It DEPENDENCIES Ta none +.El +.Pp +This feature improves the performance and compression ratio of +highly-compressible blocks. +Blocks whose contents can compress to 112 bytes +or smaller can take advantage of this feature. +.Pp +When this feature is enabled, the contents of highly-compressible blocks are +stored in the block "pointer" itself +.Po a misnomer in this case, as it contains +the compressed data, rather than a pointer to its location on disk +.Pc . +Thus +the space of the block +.Pq one sector, typically 512 bytes or 4KB +is saved, +and no additional i/o is needed to read and write the data block. +.Pp +This feature becomes +.Sy active +as soon as it is enabled and will +never return to being +.Sy enabled . +.It Sy zpool_checkpoint +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:zpool_checkpoint" +.It GUID Ta com.delphix:zpool_checkpoint +.It READ\-ONLY COMPATIBLE Ta yes +.It DEPENDENCIES Ta none +.El +.Pp +This feature enables the "zpool checkpoint" subcommand that can +checkpoint the state of the pool at the time it was issued and later +rewind back to it or discard it. +.Pp +This feature becomes +.Sy active +when the "zpool checkpoint" command is used to checkpoint the pool. +The feature will only return back to being +.Sy enabled +when the pool is rewound or the checkpoint has been discarded. +.It Sy large_blocks +.Bl -column "READ\-ONLY COMPATIBLE" "org.open-zfs:large_block" +.It GUID Ta org.open-zfs:large_block +.It READ\-ONLY COMPATIBLE Ta no +.It DEPENDENCIES Ta extensible_dataset +.El +.Pp +The +.Sy large_block +feature allows the record size on a dataset to be +set larger than 128KB. +.Pp +This feature becomes +.Sy active +once a +.Sy recordsize +property has been set larger than 128KB, and will return to being +.Sy enabled +once all filesystems that have ever had their recordsize larger than 128KB +are destroyed. +.Pp +Please note that booting from datasets that have recordsize greater than +128KB is +.Em NOT +supported by the +.Fx +boot loader. +.It Sy sha512 +.Bl -column "READ\-ONLY COMPATIBLE" "org.illumos:sha512" +.It GUID Ta org.illumos:sha512 +.It READ\-ONLY COMPATIBLE Ta no +.It DEPENDENCIES Ta none +.El +.Pp +The +.Sy sha512 +feature enables the use of the SHA-512/256 truncated hash algorithm +.Pq FIPS 180-4 +for checksum and dedup. +The native 64-bit arithmetic of SHA-512 provides an approximate 50% +performance boost over SHA-256 on 64-bit hardware and is thus a good +minimum-change replacement candidate for systems where hash performance is +important, but these systems cannot for whatever reason utilize the faster +.Sy skein +algorithms. +.Pp +When the +.Sy sha512 +feature is set to +.Sy enabled , +the administrator can turn on the +.Sy sha512 +checksum on any dataset using the +.Dl # zfs set checksum=sha512 Ar dataset +command. +This feature becomes +.Sy active +once a +.Sy checksum +property has been set to +.Sy sha512 , +and will return to being +.Sy enabled +once all filesystems that have ever had their checksum set to +.Sy sha512 +are destroyed. +.It Sy skein +.Bl -column "READ\-ONLY COMPATIBLE" "org.illumos:skein" +.It GUID Ta org.illumos:skein +.It READ\-ONLY COMPATIBLE Ta no +.It DEPENDENCIES Ta none +.El +.Pp +The +.Sy skein +feature enables the use of the Skein hash algorithm for checksum and dedup. +Skein is a high-performance secure hash algorithm that was a finalist in the +NIST SHA-3 competition. +It provides a very high security margin and high performance on 64-bit hardware +.Pq 80% faster than SHA-256 . +This implementation also utilizes the new salted checksumming functionality in +ZFS, which means that the checksum is pre-seeded with a secret 256-bit random +key +.Pq stored on the pool +before being fed the data block to be checksummed. +Thus the produced checksums are unique to a given pool, preventing hash +collision attacks on systems with dedup. +.Pp +When the +.Sy skein +feature is set to +.Sy enabled , +the administrator can turn on the +.Sy skein +checksum on any dataset using the +.Dl # zfs set checksum=skein Ar dataset +command. +This feature becomes +.Sy active +once a +.Sy checksum +property has been set to +.Sy skein , +and will return to being +.Sy enabled +once all filesystems that have ever had their checksum set to +.Sy skein +are destroyed. +.El +.Sh SEE ALSO +.Xr zpool 8 +.Sh AUTHORS +This manual page is a +.Xr mdoc 7 +reimplementation of the +.Tn illumos +manual page +.Em zpool-features(5) , +modified and customized for +.Fx +and licensed under the Common Development and Distribution License +.Pq Tn CDDL . +.Pp +The +.Xr mdoc 7 +implementation of this manual page was initially written by +.An Martin Matuska Aq mm@FreeBSD.org . diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool.8 b/cddl/contrib/opensolaris/cmd/zpool/zpool.8 new file mode 100644 index 000000000000..3e9cd13064ce --- /dev/null +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool.8 @@ -0,0 +1,2157 @@ +'\" te +.\" Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. +.\" Copyright (c) 2013-2014, Xin Li <delphij@FreeBSD.org>. +.\" All Rights Reserved. +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" Copyright (c) 2010, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright (c) 2011, Justin T. Gibbs <gibbs@FreeBSD.org> +.\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org> +.\" Copyright (c) 2013 by Delphix. All Rights Reserved. +.\" Copyright 2017 Nexenta Systems, Inc. +.\" Copyright (c) 2017 Datto Inc. +.\" +.\" $FreeBSD$ +.\" +.Dd September 08, 2017 +.Dt ZPOOL 8 +.Os +.Sh NAME +.Nm zpool +.Nd configures ZFS storage pools +.Sh SYNOPSIS +.Nm +.Op Fl \&? +.Nm +.Cm add +.Op Fl fn +.Ar pool vdev ... +.Nm +.Cm attach +.Op Fl f +.Ar pool device new_device +.Nm +.Cm checkpoint +.Op Fl d, -discard +.Ar pool +.Nm +.Cm clear +.Op Fl F Op Fl n +.Ar pool +.Op Ar device +.Nm +.Cm create +.Op Fl fnd +.Op Fl o Ar property Ns = Ns Ar value +.Ar ... +.Op Fl O Ar file-system-property Ns = Ns Ar value +.Ar ... +.Op Fl m Ar mountpoint +.Op Fl R Ar root +.Ar pool vdev ... +.Nm +.Cm destroy +.Op Fl f +.Ar pool +.Nm +.Cm detach +.Ar pool device +.Nm +.Cm export +.Op Fl f +.Ar pool ... +.Nm +.Cm get +.Op Fl Hp +.Op Fl o Ar field Ns Op , Ns Ar ... +.Ar all | property Ns Op , Ns Ar ... +.Ar pool ... +.Nm +.Cm history +.Op Fl il +.Op Ar pool +.Ar ... +.Nm +.Cm import +.Op Fl d Ar dir | Fl c Ar cachefile +.Op Fl D +.Nm +.Cm import +.Op Fl o Ar mntopts +.Op Fl o Ar property Ns = Ns Ar value +.Ar ... +.Op Fl -rewind-to-checkpoint +.Op Fl d Ar dir | Fl c Ar cachefile +.Op Fl D +.Op Fl f +.Op Fl m +.Op Fl N +.Op Fl R Ar root +.Op Fl F Op Fl n +.Fl a +.Nm +.Cm import +.Op Fl o Ar mntopts +.Op Fl o Ar property Ns = Ns Ar value +.Ar ... +.Op Fl -rewind-to-checkpoint +.Op Fl d Ar dir | Fl c Ar cachefile +.Op Fl D +.Op Fl f +.Op Fl m +.Op Fl N +.Op Fl R Ar root +.Op Fl F Op Fl n +.Ar pool | id +.Op Ar newpool +.Nm +.Cm iostat +.Op Fl T Cm d Ns | Ns Cm u +.Op Fl v +.Op Ar pool +.Ar ... +.Nm +.Cm labelclear +.Op Fl f +.Ar device +.Nm +.Cm list +.Op Fl Hpv +.Op Fl o Ar property Ns Op , Ns Ar ... +.Op Fl T Cm d Ns | Ns Cm u +.Op Ar pool +.Ar ... +.Op Ar inverval Op Ar count +.Nm +.Cm offline +.Op Fl t +.Ar pool device ... +.Nm +.Cm online +.Op Fl e +.Ar pool device ... +.Nm +.Cm reguid +.Ar pool +.Nm +.Cm remove +.Ar pool device ... +.Nm +.Cm reopen +.Ar pool +.Nm +.Cm replace +.Op Fl f +.Ar pool device +.Op Ar new_device +.Nm +.Cm scrub +.Op Fl s | Fl p +.Ar pool ... +.Nm +.Cm set +.Ar property Ns = Ns Ar value pool +.Nm +.Cm split +.Op Fl n +.Op Fl R Ar altroot +.Op Fl o Ar mntopts +.Op Fl o Ar property Ns = Ns Ar value +.Ar pool newpool +.Op Ar device ... +.Nm +.Cm status +.Op Fl vx +.Op Fl T Cm d Ns | Ns Cm u +.Op Ar pool +.Ar ... +.Op Ar interval Op Ar count +.Nm +.Cm upgrade +.Op Fl v +.Nm +.Cm upgrade +.Op Fl V Ar version +.Fl a | Ar pool ... +.Sh DESCRIPTION +The +.Nm +command configures +.Tn ZFS +storage pools. A storage pool is a collection of devices that provides physical +storage and data replication for +.Tn ZFS +datasets. +.Pp +All datasets within a storage pool share the same space. See +.Xr zfs 8 +for information on managing datasets. +.Ss Virtual Devices (vdevs) +A +.Qq virtual device +.Pq No vdev +describes a single device or a collection of devices organized according to +certain performance and fault characteristics. The following virtual devices +are supported: +.Bl -tag -width "XXXXXX" +.It Sy disk +A block device, typically located under +.Pa /dev . +.Tn ZFS +can use individual slices or partitions, though the recommended mode of +operation is to use whole disks. A disk can be specified by a full path to the +device or the +.Xr geom 4 +provider name. When given a whole disk, +.Tn ZFS +automatically labels the disk, if necessary. +.It Sy file +A regular file. The use of files as a backing store is strongly discouraged. It +is designed primarily for experimental purposes, as the fault tolerance of a +file is only as good the file system of which it is a part. A file must be +specified by a full path. +.It Sy mirror +A mirror of two or more devices. Data is replicated in an identical fashion +across all components of a mirror. A mirror with +.Em N +disks of size +.Em X +can hold +.Em X +bytes and can withstand +.Pq Em N-1 +devices failing before data integrity is compromised. +.It Sy raidz +(or +.Sy raidz1 raidz2 raidz3 ) . +A variation on +.Sy RAID-5 +that allows for better distribution of parity and eliminates the +.Qq Sy RAID-5 +write hole (in which data and parity become inconsistent after a power loss). +Data and parity is striped across all disks within a +.No raidz +group. +.Pp +A +.No raidz +group can have single-, double- , or triple parity, meaning that the +.No raidz +group can sustain one, two, or three failures, respectively, without +losing any data. The +.Sy raidz1 No vdev +type specifies a single-parity +.No raidz +group; the +.Sy raidz2 No vdev +type specifies a double-parity +.No raidz +group; and the +.Sy raidz3 No vdev +type specifies a triple-parity +.No raidz +group. The +.Sy raidz No vdev +type is an alias for +.Sy raidz1 . +.Pp +A +.No raidz +group with +.Em N +disks of size +.Em X +with +.Em P +parity disks can hold approximately +.Sm off +.Pq Em N-P +*X +.Sm on +bytes and can withstand +.Em P +device(s) failing before data integrity is compromised. The minimum number of +devices in a +.No raidz +group is one more than the number of parity disks. The +recommended number is between 3 and 9 to help increase performance. +.It Sy spare +A special +.No pseudo- Ns No vdev +which keeps track of available hot spares for a pool. +For more information, see the +.Qq Sx Hot Spares +section. +.It Sy log +A separate-intent log device. If more than one log device is specified, then +writes are load-balanced between devices. Log devices can be mirrored. However, +.No raidz +.No vdev +types are not supported for the intent log. For more information, +see the +.Qq Sx Intent Log +section. +.It Sy cache +A device used to cache storage pool data. A cache device cannot be configured +as a mirror or +.No raidz +group. For more information, see the +.Qq Sx Cache Devices +section. +.El +.Pp +Virtual devices cannot be nested, so a mirror or +.No raidz +virtual device can only +contain files or disks. Mirrors of mirrors (or other combinations) are not +allowed. +.Pp +A pool can have any number of virtual devices at the top of the configuration +(known as +.Qq root +.No vdev Ns s). +Data is dynamically distributed across all top-level devices to balance data +among devices. As new virtual devices are added, +.Tn ZFS +automatically places data on the newly available devices. +.Pp +Virtual devices are specified one at a time on the command line, separated by +whitespace. The keywords +.Qq mirror +and +.Qq raidz +are used to distinguish where a group ends and another begins. For example, the +following creates two root +.No vdev Ns s, +each a mirror of two disks: +.Bd -literal -offset 2n +.Li # Ic zpool create mypool mirror da0 da1 mirror da2 da3 +.Ed +.Ss Device Failure and Recovery +.Tn ZFS +supports a rich set of mechanisms for handling device failure and data +corruption. All metadata and data is checksummed, and +.Tn ZFS +automatically repairs bad data from a good copy when corruption is detected. +.Pp +In order to take advantage of these features, a pool must make use of some form +of redundancy, using either mirrored or +.No raidz +groups. While +.Tn ZFS +supports running in a non-redundant configuration, where each root +.No vdev +is simply a disk or file, this is strongly discouraged. A single case of bit +corruption can render some or all of your data unavailable. +.Pp +A pool's health status is described by one of three states: online, degraded, +or faulted. An online pool has all devices operating normally. A degraded pool +is one in which one or more devices have failed, but the data is still +available due to a redundant configuration. A faulted pool has corrupted +metadata, or one or more faulted devices, and insufficient replicas to continue +functioning. +.Pp +The health of the top-level +.No vdev , +such as mirror or +.No raidz +device, is +potentially impacted by the state of its associated +.No vdev Ns s, +or component devices. A top-level +.No vdev +or component device is in one of the following states: +.Bl -tag -width "DEGRADED" +.It Sy DEGRADED +One or more top-level +.No vdev Ns s +is in the degraded state because one or more +component devices are offline. Sufficient replicas exist to continue +functioning. +.Pp +One or more component devices is in the degraded or faulted state, but +sufficient replicas exist to continue functioning. The underlying conditions +are as follows: +.Bl -bullet -offset 2n +.It +The number of checksum errors exceeds acceptable levels and the device is +degraded as an indication that something may be wrong. +.Tn ZFS +continues to use the device as necessary. +.It +The number of +.Tn I/O +errors exceeds acceptable levels. The device could not be +marked as faulted because there are insufficient replicas to continue +functioning. +.El +.It Sy FAULTED +One or more top-level +.No vdev Ns s +is in the faulted state because one or more +component devices are offline. Insufficient replicas exist to continue +functioning. +.Pp +One or more component devices is in the faulted state, and insufficient +replicas exist to continue functioning. The underlying conditions are as +follows: +.Bl -bullet -offset 2n +.It +The device could be opened, but the contents did not match expected values. +.It +The number of +.Tn I/O +errors exceeds acceptable levels and the device is faulted to +prevent further use of the device. +.El +.It Sy OFFLINE +The device was explicitly taken offline by the +.Qq Nm Cm offline +command. +.It Sy ONLINE +The device is online and functioning. +.It Sy REMOVED +The device was physically removed while the system was running. Device removal +detection is hardware-dependent and may not be supported on all platforms. +.It Sy UNAVAIL +The device could not be opened. If a pool is imported when a device was +unavailable, then the device will be identified by a unique identifier instead +of its path since the path was never correct in the first place. +.El +.Pp +If a device is removed and later reattached to the system, +.Tn ZFS +attempts to put the device online automatically. Device attach detection is +hardware-dependent and might not be supported on all platforms. +.Ss Hot Spares +.Tn ZFS +allows devices to be associated with pools as +.Qq hot spares . +These devices are not actively used in the pool, but when an active device +fails, it is automatically replaced by a hot spare. To create a pool with hot +spares, specify a +.Qq spare +.No vdev +with any number of devices. For example, +.Bd -literal -offset 2n +.Li # Ic zpool create pool mirror da0 da1 spare da2 da3 +.Ed +.Pp +Spares can be shared across multiple pools, and can be added with the +.Qq Nm Cm add +command and removed with the +.Qq Nm Cm remove +command. Once a spare replacement is initiated, a new "spare" +.No vdev +is created +within the configuration that will remain there until the original device is +replaced. At this point, the hot spare becomes available again if another +device fails. +.Pp +If a pool has a shared spare that is currently being used, the pool can not be +exported since other pools may use this shared spare, which may lead to +potential data corruption. +.Pp +An in-progress spare replacement can be cancelled by detaching the hot spare. +If the original faulted device is detached, then the hot spare assumes its +place in the configuration, and is removed from the spare list of all active +pools. +.Pp +Spares cannot replace log devices. +.Pp +This feature requires a userland helper. +FreeBSD provides +.Xr zfsd 8 +for this purpose. +It must be manually enabled by adding +.Va zfsd_enable="YES" +to +.Pa /etc/rc.conf . +.Ss Intent Log +The +.Tn ZFS +Intent Log +.Pq Tn ZIL +satisfies +.Tn POSIX +requirements for synchronous transactions. For instance, databases often +require their transactions to be on stable storage devices when returning from +a system call. +.Tn NFS +and other applications can also use +.Xr fsync 2 +to ensure data stability. By default, the intent log is allocated from blocks +within the main pool. However, it might be possible to get better performance +using separate intent log devices such as +.Tn NVRAM +or a dedicated disk. For example: +.Bd -literal -offset 2n +.Li # Ic zpool create pool da0 da1 log da2 +.Ed +.Pp +Multiple log devices can also be specified, and they can be mirrored. See the +.Sx EXAMPLES +section for an example of mirroring multiple log devices. +.Pp +Log devices can be added, replaced, attached, detached, imported and exported +as part of the larger pool. Mirrored log devices can be removed by specifying +the top-level mirror for the log. +.Ss Cache devices +Devices can be added to a storage pool as "cache devices." These devices +provide an additional layer of caching between main memory and disk. For +read-heavy workloads, where the working set size is much larger than what can +be cached in main memory, using cache devices allow much more of this working +set to be served from low latency media. Using cache devices provides the +greatest performance improvement for random read-workloads of mostly static +content. +.Pp +To create a pool with cache devices, specify a "cache" +.No vdev +with any number of devices. For example: +.Bd -literal -offset 2n +.Li # Ic zpool create pool da0 da1 cache da2 da3 +.Ed +.Pp +Cache devices cannot be mirrored or part of a +.No raidz +configuration. If a read +error is encountered on a cache device, that read +.Tn I/O +is reissued to the original storage pool device, which might be part of a +mirrored or +.No raidz +configuration. +.Pp +The content of the cache devices is considered volatile, as is the case with +other system caches. +.Ss Pool checkpoint +Before starting critical procedures that include destructive actions (e.g +.Nm zfs Cm destroy +), an administrator can checkpoint the pool's state and in the case of a +mistake or failure, rewind the entire pool back to the checkpoint. +Otherwise, the checkpoint can be discarded when the procedure has completed +successfully. +.Pp +A pool checkpoint can be thought of as a pool-wide snapshot and should be used +with care as it contains every part of the pool's state, from properties to vdev +configuration. +Thus, while a pool has a checkpoint certain operations are not allowed. +Specifically, vdev removal/attach/detach, mirror splitting, and +changing the pool's guid. +Adding a new vdev is supported but in the case of a rewind it will have to be +added again. +Finally, users of this feature should keep in mind that scrubs in a pool that +has a checkpoint do not repair checkpointed data. +.Pp +To create a checkpoint for a pool: +.Bd -literal +# zpool checkpoint pool +.Ed +.Pp +To later rewind to its checkpointed state, you need to first export it and +then rewind it during import: +.Bd -literal +# zpool export pool +# zpool import --rewind-to-checkpoint pool +.Ed +.Pp +To discard the checkpoint from a pool: +.Bd -literal +# zpool checkpoint -d pool +.Ed +.Pp +Dataset reservations (controlled by the +.Nm reservation +or +.Nm refreservation +zfs properties) may be unenforceable while a checkpoint exists, because the +checkpoint is allowed to consume the dataset's reservation. +Finally, data that is part of the checkpoint but has been freed in the +current state of the pool won't be scanned during a scrub. +.Ss Properties +Each pool has several properties associated with it. Some properties are +read-only statistics while others are configurable and change the behavior of +the pool. The following are read-only properties: +.Bl -tag -width "dedupratio" +.It Sy alloc +Amount of storage space within the pool that has been physically allocated. +.It Sy capacity +Percentage of pool space used. This property can also be referred to by its +shortened column name, "cap". +.It Sy comment +A text string consisting of printable ASCII characters that will be stored +such that it is available even if the pool becomes faulted. An administrator +can provide additional information about a pool using this property. +.It Sy dedupratio +The deduplication ratio specified for a pool, expressed as a multiplier. +For example, a +.Sy dedupratio +value of 1.76 indicates that 1.76 units of data were stored but only 1 unit of disk space was actually consumed. See +.Xr zfs 8 +for a description of the deduplication feature. +.It Sy expandsize +Amount of uninitialized space within the pool or device that can be used to +increase the total capacity of the pool. +Uninitialized space consists of +any space on an EFI labeled vdev which has not been brought online +.Pq i.e. zpool online -e . +This space occurs when a LUN is dynamically expanded. +.It Sy fragmentation +The amount of fragmentation in the pool. +.It Sy free +Number of blocks within the pool that are not allocated. +.It Sy freeing +After a file system or snapshot is destroyed, the space it was using is +returned to the pool asynchronously. +.Sy freeing +is the amount of space remaining to be reclaimed. +Over time +.Sy freeing +will decrease while +.Sy free +increases. +.It Sy guid +A unique identifier for the pool. +.It Sy health +The current health of the pool. Health can be +.Qq Sy ONLINE , +.Qq Sy DEGRADED , +.Qq Sy FAULTED , +.Qq Sy OFFLINE , +.Qq Sy REMOVED , +or +.Qq Sy UNAVAIL . +.It Sy size +Total size of the storage pool. +.It Sy unsupported@ Ns Ar feature_guid +Information about unsupported features that are enabled on the pool. +See +.Xr zpool-features 7 +for details. +.It Sy used +Amount of storage space used within the pool. +.El +.Pp +The space usage properties report actual physical space available to the +storage pool. The physical space can be different from the total amount of +space that any contained datasets can actually use. The amount of space used in +a +.No raidz +configuration depends on the characteristics of the data being written. +In addition, +.Tn ZFS +reserves some space for internal accounting that the +.Xr zfs 8 +command takes into account, but the +.Xr zpool 8 +command does not. For non-full pools of a reasonable size, these effects should +be invisible. For small pools, or pools that are close to being completely +full, these discrepancies may become more noticeable. +.Pp +The following property can be set at creation time and import time: +.Bl -tag -width 2n +.It Sy altroot +Alternate root directory. If set, this directory is prepended to any mount +points within the pool. This can be used when examining an unknown pool where +the mount points cannot be trusted, or in an alternate boot environment, where +the typical paths are not valid. +.Sy altroot +is not a persistent property. It is valid only while the system is up. +Setting +.Sy altroot +defaults to using +.Cm cachefile=none , +though this may be overridden using an explicit setting. +.El +.Pp +The following property can only be set at import time: +.Bl -tag -width 2n +.It Sy readonly Ns = Ns Cm on No | Cm off +If set to +.Cm on , +pool will be imported in read-only mode with the following restrictions: +.Bl -bullet -offset 2n +.It +Synchronous data in the intent log will not be accessible +.It +Properties of the pool can not be changed +.It +Datasets of this pool can only be mounted read-only +.It +To write to a read-only pool, a export and import of the pool is required. +.El +.Pp +This property can also be referred to by its shortened column name, +.Sy rdonly . +.El +.Pp +The following properties can be set at creation time and import time, and later +changed with the +.Ic zpool set +command: +.Bl -tag -width 2n +.It Sy autoexpand Ns = Ns Cm on No | Cm off +Controls automatic pool expansion when the underlying LUN is grown. If set to +.Qq Cm on , +the pool will be resized according to the size of the expanded +device. If the device is part of a mirror or +.No raidz +then all devices within that +.No mirror/ Ns No raidz +group must be expanded before the new space is made available to +the pool. The default behavior is +.Qq off . +This property can also be referred to by its shortened column name, +.Sy expand . +.It Sy autoreplace Ns = Ns Cm on No | Cm off +Controls automatic device replacement. If set to +.Qq Cm off , +device replacement must be initiated by the administrator by using the +.Qq Nm Cm replace +command. If set to +.Qq Cm on , +any new device, found in the same +physical location as a device that previously belonged to the pool, is +automatically formatted and replaced. The default behavior is +.Qq Cm off . +This property can also be referred to by its shortened column name, "replace". +.It Sy bootfs Ns = Ns Ar pool Ns / Ns Ar dataset +Identifies the default bootable dataset for the root pool. This property is +expected to be set mainly by the installation and upgrade programs. +.It Sy cachefile Ns = Ns Ar path No | Cm none +Controls the location of where the pool configuration is cached. Discovering +all pools on system startup requires a cached copy of the configuration data +that is stored on the root file system. All pools in this cache are +automatically imported when the system boots. Some environments, such as +install and clustering, need to cache this information in a different location +so that pools are not automatically imported. Setting this property caches the +pool configuration in a different location that can later be imported with +.Qq Nm Cm import Fl c . +Setting it to the special value +.Qq Cm none +creates a temporary pool that is never cached, and the special value +.Cm '' +(empty string) uses the default location. +.It Sy comment Ns = Ns Ar text +A text string consisting of printable ASCII characters that will be stored +such that it is available even if the pool becomes faulted. +An administrator can provide additional information about a pool using this +property. +.It Sy dedupditto Ns = Ns Ar number +Threshold for the number of block ditto copies. If the reference count for a +deduplicated block increases above this number, a new ditto copy of this block +is automatically stored. Default setting is +.Cm 0 +which causes no ditto copies to be created for deduplicated blocks. +The miniumum legal nonzero setting is 100. +.It Sy delegation Ns = Ns Cm on No | Cm off +Controls whether a non-privileged user is granted access based on the dataset +permissions defined on the dataset. See +.Xr zfs 8 +for more information on +.Tn ZFS +delegated administration. +.It Sy failmode Ns = Ns Cm wait No | Cm continue No | Cm panic +Controls the system behavior in the event of catastrophic pool failure. This +condition is typically a result of a loss of connectivity to the underlying +storage device(s) or a failure of all devices within the pool. The behavior of +such an event is determined as follows: +.Bl -tag -width indent +.It Sy wait +Blocks all +.Tn I/O +access until the device connectivity is recovered and the errors are cleared. +This is the default behavior. +.It Sy continue +Returns +.Em EIO +to any new write +.Tn I/O +requests but allows reads to any of the remaining healthy devices. Any write +requests that have yet to be committed to disk would be blocked. +.It Sy panic +Prints out a message to the console and generates a system crash dump. +.El +.It Sy feature@ Ns Ar feature_name Ns = Ns Sy enabled +The value of this property is the current state of +.Ar feature_name . +The only valid value when setting this property is +.Sy enabled +which moves +.Ar feature_name +to the enabled state. +See +.Xr zpool-features 7 +for details on feature states. +.It Sy listsnaps Ns = Ns Cm on No | Cm off +Controls whether information about snapshots associated with this pool is +output when +.Qq Nm zfs Cm list +is run without the +.Fl t +option. The default value is +.Cm off . +.It Sy version Ns = Ns Ar version +The current on-disk version of the pool. This can be increased, but never +decreased. The preferred method of updating pools is with the +.Qq Nm Cm upgrade +command, though this property can be used when a specific version is needed +for backwards compatibility. +Once feature flags is enabled on a pool this property will no longer have a +value. +.El +.Sh SUBCOMMANDS +All subcommands that modify state are logged persistently to the pool in their +original form. +.Pp +The +.Nm +command provides subcommands to create and destroy storage pools, add capacity +to storage pools, and provide information about the storage pools. The following +subcommands are supported: +.Bl -tag -width 2n +.It Xo +.Nm +.Op Fl \&? +.Xc +.Pp +Displays a help message. +.It Xo +.Nm +.Cm add +.Op Fl fn +.Ar pool vdev ... +.Xc +.Pp +Adds the specified virtual devices to the given pool. The +.No vdev +specification is described in the +.Qq Sx Virtual Devices +section. The behavior of the +.Fl f +option, and the device checks performed are described in the +.Qq Nm Cm create +subcommand. +.Bl -tag -width indent +.It Fl f +Forces use of +.Ar vdev , +even if they appear in use or specify a conflicting replication level. +Not all devices can be overridden in this manner. +.It Fl n +Displays the configuration that would be used without actually adding the +.Ar vdev Ns s. +The actual pool creation can still fail due to insufficient privileges or device +sharing. +.Pp +Do not add a disk that is currently configured as a quorum device to a zpool. +After a disk is in the pool, that disk can then be configured as a quorum +device. +.El +.It Xo +.Nm +.Cm attach +.Op Fl f +.Ar pool device new_device +.Xc +.Pp +Attaches +.Ar new_device +to an existing +.Sy zpool +device. The existing device cannot be part of a +.No raidz +configuration. If +.Ar device +is not currently part of a mirrored configuration, +.Ar device +automatically transforms into a two-way mirror of +.Ar device No and Ar new_device . +If +.Ar device +is part of a two-way mirror, attaching +.Ar new_device +creates a three-way mirror, and so on. In either case, +.Ar new_device +begins to resilver immediately. +.Bl -tag -width indent +.It Fl f +Forces use of +.Ar new_device , +even if its appears to be in use. Not all devices can be overridden in this +manner. +.El +.It Xo +.Nm +.Cm checkpoint +.Op Fl d, -discard +.Ar pool +.Xc +Checkpoints the current state of +.Ar pool +, which can be later restored by +.Nm zpool Cm import --rewind-to-checkpoint . +The existence of a checkpoint in a pool prohibits the following +.Nm zpool +commands: +.Cm remove , +.Cm attach , +.Cm detach , +.Cm split , +and +.Cm reguid . +In addition, it may break reservation boundaries if the pool lacks free +space. +The +.Nm zpool Cm status +command indicates the existence of a checkpoint or the progress of discarding a +checkpoint from a pool. +The +.Nm zpool Cm list +command reports how much space the checkpoint takes from the pool. +.Bl -tag -width Ds +.It Fl d, -discard +Discards an existing checkpoint from +.Ar pool . +.El +.It Xo +.Nm +.Cm clear +.Op Fl F Op Fl n +.Ar pool +.Op Ar device +.Xc +.Pp +Clears device errors in a pool. If no arguments are specified, all device +errors within the pool are cleared. If one or more devices is specified, only +those errors associated with the specified device or devices are cleared. +.Bl -tag -width indent +.It Fl F +Initiates recovery mode for an unopenable pool. Attempts to discard the last +few transactions in the pool to return it to an openable state. Not all damaged +pools can be recovered by using this option. If successful, the data from the +discarded transactions is irretrievably lost. +.It Fl n +Used in combination with the +.Fl F +flag. Check whether discarding transactions would make the pool openable, but +do not actually discard any transactions. +.El +.It Xo +.Nm +.Cm create +.Op Fl fnd +.Op Fl o Ar property Ns = Ns Ar value +.Ar ... +.Op Fl O Ar file-system-property Ns = Ns Ar value +.Ar ... +.Op Fl m Ar mountpoint +.Op Fl R Ar root +.Ar pool vdev ... +.Xc +.Pp +Creates a new storage pool containing the virtual devices specified on the +command line. The pool name must begin with a letter, and can only contain +alphanumeric characters as well as underscore ("_"), dash ("-"), and period +("."). The pool names "mirror", "raidz", "spare" and "log" are reserved, as are +names beginning with the pattern "c[0-9]". The +.No vdev +specification is described in the +.Qq Sx Virtual Devices +section. +.Pp +The command verifies that each device specified is accessible and not currently +in use by another subsystem. There are some uses, such as being currently +mounted, or specified as the dedicated dump device, that prevents a device from +ever being used by +.Tn ZFS +Other uses, such as having a preexisting +.Sy UFS +file system, can be overridden with the +.Fl f +option. +.Pp +The command also checks that the replication strategy for the pool is +consistent. An attempt to combine redundant and non-redundant storage in a +single pool, or to mix disks and files, results in an error unless +.Fl f +is specified. The use of differently sized devices within a single +.No raidz +or mirror group is also flagged as an error unless +.Fl f +is specified. +.Pp +Unless the +.Fl R +option is specified, the default mount point is +.Qq Pa /pool . +The mount point must not exist or must be empty, or else the +root dataset cannot be mounted. This can be overridden with the +.Fl m +option. +.Pp +By default all supported features are enabled on the new pool unless the +.Fl d +option is specified. +.Bl -tag -width indent +.It Fl f +Forces use of +.Ar vdev Ns s, +even if they appear in use or specify a conflicting replication level. +Not all devices can be overridden in this manner. +.It Fl n +Displays the configuration that would be used without actually creating the +pool. The actual pool creation can still fail due to insufficient privileges or +device sharing. +.It Fl d +Do not enable any features on the new pool. +Individual features can be enabled by setting their corresponding properties +to +.Sy enabled +with the +.Fl o +option. +See +.Xr zpool-features 7 +for details about feature properties. +.It Xo +.Fl o Ar property Ns = Ns Ar value +.Op Fl o Ar property Ns = Ns Ar value +.Ar ... +.Xc +Sets the given pool properties. See the +.Qq Sx Properties +section for a list of valid properties that can be set. +.It Xo +.Fl O +.Ar file-system-property Ns = Ns Ar value +.Op Fl O Ar file-system-property Ns = Ns Ar value +.Ar ... +.Xc +Sets the given file system properties in the root file system of the pool. See +.Xr zfs 8 Properties +for a list of valid properties that +can be set. +.It Fl R Ar root +Equivalent to +.Qq Fl o Cm cachefile=none,altroot= Ns Pa root +.It Fl m Ar mountpoint +Sets the mount point for the root dataset. The default mount point is +.Qq Pa /pool +or +.Qq Cm altroot Ns Pa /pool +if +.Sy altroot +is specified. The mount point must be an absolute path, +.Qq Cm legacy , +or +.Qq Cm none . +For more information on dataset mount points, see +.Xr zfs 8 . +.El +.It Xo +.Nm +.Cm destroy +.Op Fl f +.Ar pool +.Xc +.Pp +Destroys the given pool, freeing up any devices for other use. This command +tries to unmount any active datasets before destroying the pool. +.Bl -tag -width indent +.It Fl f +Forces any active datasets contained within the pool to be unmounted. +.El +.It Xo +.Nm +.Cm detach +.Ar pool device +.Xc +.Pp +Detaches +.Ar device +from a mirror. The operation is refused if there are no other valid replicas +of the data. +.It Xo +.Nm +.Cm export +.Op Fl f +.Ar pool ... +.Xc +.Pp +Exports the given pools from the system. All devices are marked as exported, +but are still considered in use by other subsystems. The devices can be moved +between systems (even those of different endianness) and imported as long as a +sufficient number of devices are present. +.Pp +Before exporting the pool, all datasets within the pool are unmounted. A pool +can not be exported if it has a shared spare that is currently being used. +.Pp +For pools to be portable, you must give the +.Nm +command whole disks, not just slices, so that +.Tn ZFS +can label the disks with portable +.Sy EFI +labels. Otherwise, disk drivers on platforms of different endianness will not +recognize the disks. +.Bl -tag -width indent +.It Fl f +Forcefully unmount all datasets, using the +.Qq Nm unmount Fl f +command. +.Pp +This command will forcefully export the pool even if it has a shared spare that +is currently being used. This may lead to potential data corruption. +.El +.It Xo +.Nm +.Cm get +.Op Fl Hp +.Op Fl o Ar field Ns Op , Ns Ar ... +.Ar all | property Ns Op , Ns Ar ... +.Ar pool ... +.Xc +.Pp +Retrieves the given list of properties (or all properties if +.Qq Cm all +is used) for the specified storage pool(s). These properties are displayed with +the following fields: +.Bl -column -offset indent "property" +.It name Ta Name of storage pool +.It property Ta Property name +.It value Ta Property value +.It source Ta Property source, either 'default' or 'local'. +.El +.Pp +See the +.Qq Sx Properties +section for more information on the available pool properties. +.It Fl H +Scripted mode. Do not display headers, and separate fields by a single tab +instead of arbitrary space. +.It Fl p +Display numbers in parsable (exact) values. +.It Fl o Ar field +A comma-separated list of columns to display. +.Sy name Ns , Ns +.Sy property Ns , Ns +.Sy value Ns , Ns +.Sy source +is the default value. +.It Xo +.Nm +.Cm history +.Op Fl il +.Op Ar pool +.Ar ... +.Xc +.Pp +Displays the command history of the specified pools or all pools if no pool is +specified. +.Bl -tag -width indent +.It Fl i +Displays internally logged +.Tn ZFS +events in addition to user initiated events. +.It Fl l +Displays log records in long format, which in addition to standard format +includes, the user name, the hostname, and the zone in which the operation was +performed. +.El +.It Xo +.Nm +.Cm import +.Op Fl d Ar dir | Fl c Ar cachefile +.Op Fl D +.Xc +.Pp +Lists pools available to import. If the +.Fl d +option is not specified, this command searches for devices in +.Qq Pa /dev . +The +.Fl d +option can be specified multiple times, and all directories are searched. If +the device appears to be part of an exported pool, this command displays a +summary of the pool with the name of the pool, a numeric identifier, as well as +the +.No vdev +layout and current health of the device for each device or file. +Destroyed pools, pools that were previously destroyed with the +.Qq Nm Cm destroy +command, are not listed unless the +.Fl D +option is specified. +.Pp +The numeric identifier is unique, and can be used instead of the pool name when +multiple exported pools of the same name are available. +.Bl -tag -width indent +.It Fl c Ar cachefile +Reads configuration from the given +.Ar cachefile +that was created with the +.Qq Sy cachefile +pool property. This +.Ar cachefile +is used instead of searching for devices. +.It Fl d Ar dir +Searches for devices or files in +.Ar dir . +The +.Fl d +option can be specified multiple times. +.It Fl D +Lists destroyed pools only. +.El +.It Xo +.Nm +.Cm import +.Op Fl o Ar mntopts +.Op Fl o Ar property Ns = Ns Ar value +.Ar ... +.Op Fl d Ar dir | Fl c Ar cachefile +.Op Fl D +.Op Fl f +.Op Fl m +.Op Fl N +.Op Fl R Ar root +.Op Fl F Op Fl n +.Fl a +.Xc +.Pp +Imports all pools found in the search directories. Identical to the previous +command, except that all pools with a sufficient number of devices available +are imported. Destroyed pools, pools that were previously destroyed with the +.Qq Nm Cm destroy +command, will not be imported unless the +.Fl D +option is specified. +.Bl -tag -width indent +.It Fl o Ar mntopts +Comma-separated list of mount options to use when mounting datasets within the +pool. See +.Xr zfs 8 +for a description of dataset properties and mount options. +.It Fl o Ar property Ns = Ns Ar value +Sets the specified property on the imported pool. See the +.Qq Sx Properties +section for more information on the available pool properties. +.It Fl c Ar cachefile +Reads configuration from the given +.Ar cachefile +that was created with the +.Qq Sy cachefile +pool property. This +.Ar cachefile +is used instead of searching for devices. +.It Fl d Ar dir +Searches for devices or files in +.Ar dir . +The +.Fl d +option can be specified multiple times. This option is incompatible with the +.Fl c +option. +.It Fl D +Imports destroyed pools only. The +.Fl f +option is also required. +.It Fl f +Forces import, even if the pool appears to be potentially active. +.It Fl m +Allows a pool to import when there is a missing log device. Recent transactions +can be lost because the log device will be discarded. +.It Fl N +Import the pool without mounting any file systems. +.It Fl R Ar root +Sets the +.Qq Sy cachefile +property to +.Qq Cm none +and the +.Qq Sy altroot +property to +.Qq Ar root +.It Fl F +Recovery mode for a non-importable pool. Attempt to return the pool to an +importable state by discarding the last few transactions. Not all damaged pools +can be recovered by using this option. If successful, the data from the +discarded transactions is irretrievably lost. This option is ignored if the +pool is importable or already imported. +.It Fl n +Used with the +.Fl F +recovery option. Determines whether a non-importable pool can be made +importable again, but does not actually perform the pool recovery. For more +details about pool recovery mode, see the +.Fl F +option, above. +.It Fl a +Searches for and imports all pools found. +.El +.It Xo +.Nm +.Cm import +.Op Fl o Ar mntopts +.Op Fl o Ar property Ns = Ns Ar value +.Ar ... +.Op Fl d Ar dir | Fl c Ar cachefile +.Op Fl D +.Op Fl f +.Op Fl m +.Op Fl N +.Op Fl R Ar root +.Op Fl F Op Fl n +.Ar pool | id +.Op Ar newpool +.Xc +.Pp +Imports a specific pool. A pool can be identified by its name or the numeric +identifier. If +.Ar newpool +is specified, the pool is imported using the name +.Ar newpool . +Otherwise, it is imported with the same name as its exported name. +.Pp +If a device is removed from a system without running +.Qq Nm Cm export +first, the device appears as potentially active. It cannot be determined if +this was a failed export, or whether the device is really in use from another +host. To import a pool in this state, the +.Fl f +option is required. +.Bl -tag -width indent +.It Fl o Ar mntopts +Comma-separated list of mount options to use when mounting datasets within the +pool. See +.Xr zfs 8 +for a description of dataset properties and mount options. +.It Fl o Ar property Ns = Ns Ar value +Sets the specified property on the imported pool. See the +.Qq Sx Properties +section for more information on the available pool properties. +.It Fl c Ar cachefile +Reads configuration from the given +.Ar cachefile +that was created with the +.Qq Sy cachefile +pool property. This +.Ar cachefile +is used instead of searching for devices. +.It Fl d Ar dir +Searches for devices or files in +.Ar dir . +The +.Fl d +option can be specified multiple times. This option is incompatible with the +.Fl c +option. +.It Fl D +Imports destroyed pools only. The +.Fl f +option is also required. +.It Fl f +Forces import, even if the pool appears to be potentially active. +.It Fl m +Allows a pool to import when there is a missing log device. Recent transactions +can be lost because the log device will be discarded. +.It Fl N +Import the pool without mounting any file systems. +.It Fl R Ar root +Equivalent to +.Qq Fl o Cm cachefile=none,altroot= Ns Pa root +.It Fl F +Recovery mode for a non-importable pool. Attempt to return the pool to an +importable state by discarding the last few transactions. Not all damaged pools +can be recovered by using this option. If successful, the data from the +discarded transactions is irretrievably lost. This option is ignored if the +pool is importable or already imported. +.It Fl n +Used with the +.Fl F +recovery option. Determines whether a non-importable pool can be made +importable again, but does not actually perform the pool recovery. For more +details about pool recovery mode, see the +.Fl F +option, above. +.It Fl -rewind-to-checkpoint +Rewinds pool to the checkpointed state. +Once the pool is imported with this flag there is no way to undo the rewind. +All changes and data that were written after the checkpoint are lost! +The only exception is when the +.Sy readonly +mounting option is enabled. +In this case, the checkpointed state of the pool is opened and an +administrator can see how the pool would look like if they were +to fully rewind. +.El +.It Xo +.Nm +.Cm iostat +.Op Fl T Cm d Ns | Ns Cm u +.Op Fl v +.Op Ar pool +.Ar ... +.Op Ar interval Op Ar count +.Xc +.Pp +Displays +.Tn I/O +statistics for the given pools. When given an interval, the statistics are +printed every +.Ar interval +seconds until +.Sy Ctrl-C +is pressed. If no +.Ar pools +are specified, statistics for every pool in the system is shown. If +.Ar count +is specified, the command exits after +.Ar count +reports are printed. +.Bl -tag -width indent +.It Fl T Cm d Ns | Ns Cm u +Print a timestamp. +.Pp +Use modifier +.Cm d +for standard date format. See +.Xr date 1 . +Use modifier +.Cm u +for unixtime +.Pq equals Qq Ic date +%s . +.It Fl v +Verbose statistics. Reports usage statistics for individual +.No vdev Ns s +within the pool, in addition to the pool-wide statistics. +.El +.It Xo +.Nm +.Cm labelclear +.Op Fl f +.Ar device +.Xc +.Pp +Removes +.Tn ZFS +label information from the specified +.Ar device . +The +.Ar device +must not be part of an active pool configuration. +.Bl -tag -width indent +.It Fl f +Treat exported or foreign devices as inactive. +.El +.It Xo +.Nm +.Cm list +.Op Fl Hpv +.Op Fl o Ar property Ns Op , Ns Ar ... +.Op Fl T Cm d Ns | Ns Cm u +.Op Ar pool +.Ar ... +.Op Ar inverval Op Ar count +.Xc +.Pp +Lists the given pools along with a health status and space usage. If no +.Ar pools +are specified, all pools in the system are listed. +.Pp +When given an interval, the output is printed every +.Ar interval +seconds until +.Sy Ctrl-C +is pressed. If +.Ar count +is specified, the command exits after +.Ar count +reports are printed. +.Bl -tag -width indent +.It Fl T Cm d Ns | Ns Cm u +Print a timestamp. +.Pp +Use modifier +.Cm d +for standard date format. See +.Xr date 1 . +Use modifier +.Cm u +for unixtime +.Pq equals Qq Ic date +%s . +.It Fl H +Scripted mode. Do not display headers, and separate fields by a single tab +instead of arbitrary space. +.It Fl p +Display numbers in parsable (exact) values. +.It Fl v +Verbose statistics. Reports usage statistics for individual +.Em vdevs +within +the pool, in addition to the pool-wide statistics. +.It Fl o Ar property Ns Op , Ns Ar ... +Comma-separated list of properties to display. See the +.Qq Sx Properties +section for a list of valid properties. The default list is +.Sy name , +.Sy size , +.Sy used , +.Sy available , +.Sy fragmentation , +.Sy expandsize , +.Sy capacity , +.Sy health , +.Sy altroot . +.It Fl T Cm d Ns | Ns Cm u +Print a timestamp. +.Pp +Use modifier +.Cm d +for standard date format. See +.Xr date 1 . +Use modifier +.Cm u +for unixtime +.Pq equals Qq Ic date +%s . +.El +.It Xo +.Nm +.Cm offline +.Op Fl t +.Ar pool device ... +.Xc +.Pp +Takes the specified physical device offline. While the +.Ar device +is offline, no attempt is made to read or write to the device. +.Bl -tag -width indent +.It Fl t +Temporary. Upon reboot, the specified physical device reverts to its previous +state. +.El +.It Xo +.Nm +.Cm online +.Op Fl e +.Ar pool device ... +.Xc +.Pp +Brings the specified physical device online. +.Pp +This command is not applicable to spares or cache devices. +.Bl -tag -width indent +.It Fl e +Expand the device to use all available space. If the device is part of a mirror +or +.No raidz +then all devices must be expanded before the new space will become +available to the pool. +.El +.It Xo +.Nm +.Cm reguid +.Ar pool +.Xc +.Pp +Generates a new unique identifier for the pool. You must ensure that all +devices in this pool are online and healthy before performing this action. +.It Xo +.Nm +.Cm remove +.Ar pool device ... +.Xc +.Pp +Removes the specified device from the pool. This command currently only +supports removing hot spares, cache, and log devices. A mirrored log device can +be removed by specifying the top-level mirror for the log. Non-log devices that +are part of a mirrored configuration can be removed using the +.Qq Nm Cm detach +command. Non-redundant and +.No raidz +devices cannot be removed from a pool. +.It Xo +.Nm +.Cm reopen +.Ar pool +.Xc +.Pp +Reopen all the vdevs associated with the pool. +.It Xo +.Nm +.Cm replace +.Op Fl f +.Ar pool device +.Op Ar new_device +.Xc +.Pp +Replaces +.Ar old_device +with +.Ar new_device . +This is equivalent to attaching +.Ar new_device , +waiting for it to resilver, and then detaching +.Ar old_device . +.Pp +The size of +.Ar new_device +must be greater than or equal to the minimum size +of all the devices in a mirror or +.No raidz +configuration. +.Pp +.Ar new_device +is required if the pool is not redundant. If +.Ar new_device +is not specified, it defaults to +.Ar old_device . +This form of replacement is useful after an existing disk has failed and has +been physically replaced. In this case, the new disk may have the same +.Pa /dev +path as the old device, even though it is actually a different disk. +.Tn ZFS +recognizes this. +.Bl -tag -width indent +.It Fl f +Forces use of +.Ar new_device , +even if its appears to be in use. Not all devices can be overridden in this +manner. +.El +.It Xo +.Nm +.Cm scrub +.Op Fl s | Fl p +.Ar pool ... +.Xc +.Pp +Begins a scrub or resumes a paused scrub. +The scrub examines all data in the specified pools to verify that it checksums +correctly. +For replicated +.Pq mirror or raidz +devices, ZFS automatically repairs any damage discovered during the scrub. +The +.Nm zpool Cm status +command reports the progress of the scrub and summarizes the results of the +scrub upon completion. +.Pp +Scrubbing and resilvering are very similar operations. +The difference is that resilvering only examines data that ZFS knows to be out +of date +.Po +for example, when attaching a new device to a mirror or replacing an existing +device +.Pc , +whereas scrubbing examines all data to discover silent errors due to hardware +faults or disk failure. +.Pp +Because scrubbing and resilvering are I/O-intensive operations, ZFS only allows +one at a time. +If a scrub is paused, the +.Nm zpool Cm scrub +resumes it. +If a resilver is in progress, ZFS does not allow a scrub to be started until the +resilver completes. +.Bl -tag -width Ds +.It Fl s +Stop scrubbing. +.El +.Bl -tag -width Ds +.It Fl p +Pause scrubbing. +Scrub pause state and progress are periodically synced to disk. +If the system is restarted or pool is exported during a paused scrub, +even after import, scrub will remain paused until it is resumed. +Once resumed the scrub will pick up from the place where it was last +checkpointed to disk. +To resume a paused scrub issue +.Nm zpool Cm scrub +again. +.El +.It Xo +.Nm +.Cm set +.Ar property Ns = Ns Ar value pool +.Xc +.Pp +Sets the given property on the specified pool. See the +.Qq Sx Properties +section for more information on what properties can be set and acceptable +values. +.It Xo +.Nm +.Cm split +.Op Fl n +.Op Fl R Ar altroot +.Op Fl o Ar mntopts +.Op Fl o Ar property Ns = Ns Ar value +.Ar pool newpool +.Op Ar device ... +.Xc +.Pp +Splits off one disk from each mirrored top-level +.No vdev +in a pool and creates a new pool from the split-off disks. The original pool +must be made up of one or more mirrors and must not be in the process of +resilvering. The +.Cm split +subcommand chooses the last device in each mirror +.No vdev +unless overridden by a device specification on the command line. +.Pp +When using a +.Ar device +argument, +.Cm split +includes the specified device(s) in a new pool and, should any devices remain +unspecified, assigns the last device in each mirror +.No vdev +to that pool, as it does normally. If you are uncertain about the outcome of a +.Cm split +command, use the +.Fl n +("dry-run") option to ensure your command will have the effect you intend. +.Bl -tag -width indent +.It Fl R Ar altroot +Automatically import the newly created pool after splitting, using the +specified +.Ar altroot +parameter for the new pool's alternate root. See the +.Sy altroot +description in the +.Qq Sx Properties +section, above. +.It Fl n +Displays the configuration that would be created without actually splitting the +pool. The actual pool split could still fail due to insufficient privileges or +device status. +.It Fl o Ar mntopts +Comma-separated list of mount options to use when mounting datasets within the +pool. See +.Xr zfs 8 +for a description of dataset properties and mount options. Valid only in +conjunction with the +.Fl R +option. +.It Fl o Ar property Ns = Ns Ar value +Sets the specified property on the new pool. See the +.Qq Sx Properties +section, above, for more information on the available pool properties. +.El +.It Xo +.Nm +.Cm status +.Op Fl vx +.Op Fl T Cm d Ns | Ns Cm u +.Op Ar pool +.Ar ... +.Op Ar interval Op Ar count +.Xc +.Pp +Displays the detailed health status for the given pools. If no +.Ar pool +is specified, then the status of each pool in the system is displayed. For more +information on pool and device health, see the +.Qq Sx Device Failure and Recovery +section. +.Pp +When given an interval, the output is printed every +.Ar interval +seconds until +.Sy Ctrl-C +is pressed. If +.Ar count +is specified, the command exits after +.Ar count +reports are printed. +.Pp +If a scrub or resilver is in progress, this command reports the percentage +done and the estimated time to completion. Both of these are only approximate, +because the amount of data in the pool and the other workloads on the system +can change. +.Bl -tag -width indent +.It Fl x +Only display status for pools that are exhibiting errors or are otherwise +unavailable. +Warnings about pools not using the latest on-disk format, having non-native +block size or disabled features will not be included. +.It Fl v +Displays verbose data error information, printing out a complete list of all +data errors since the last complete pool scrub. +.It Fl T Cm d Ns | Ns Cm u +Print a timestamp. +.Pp +Use modifier +.Cm d +for standard date format. See +.Xr date 1 . +Use modifier +.Cm u +for unixtime +.Pq equals Qq Ic date +%s . +.El +.It Xo +.Nm +.Cm upgrade +.Op Fl v +.Xc +.Pp +Displays pools which do not have all supported features enabled and pools +formatted using a legacy +.Tn ZFS +version number. +These pools can continue to be used, but some features may not be available. +Use +.Nm Cm upgrade Fl a +to enable all features on all pools. +.Bl -tag -width indent +.It Fl v +Displays legacy +.Tn ZFS +versions supported by the current software. +See +.Xr zpool-features 7 +for a description of feature flags features supported by the current software. +.El +.It Xo +.Nm +.Cm upgrade +.Op Fl V Ar version +.Fl a | Ar pool ... +.Xc +.Pp +Enables all supported features on the given pool. +Once this is done, the pool will no longer be accessible on systems that do +not support feature flags. +See +.Xr zpool-features 7 +for details on compatibility with systems that support feature flags, but do +not support all features enabled on the pool. +.Bl -tag -width indent +.It Fl a +Enables all supported features on all pools. +.It Fl V Ar version +Upgrade to the specified legacy version. If the +.Fl V +flag is specified, no features will be enabled on the pool. +This option can only be used to increase version number up to the last +supported legacy version number. +.El +.El +.Sh EXIT STATUS +The following exit values are returned: +.Bl -tag -offset 2n -width 2n +.It 0 +Successful completion. +.It 1 +An error occurred. +.It 2 +Invalid command line options were specified. +.El +.Sh EXAMPLES +.Bl -tag -width 0n +.It Sy Example 1 No Creating a RAID-Z Storage Pool +.Pp +The following command creates a pool with a single +.No raidz +root +.No vdev +that consists of six disks. +.Bd -literal -offset 2n +.Li # Ic zpool create tank raidz da0 da1 da2 da3 da4 da5 +.Ed +.It Sy Example 2 No Creating a Mirrored Storage Pool +.Pp +The following command creates a pool with two mirrors, where each mirror +contains two disks. +.Bd -literal -offset 2n +.Li # Ic zpool create tank mirror da0 da1 mirror da2 da3 +.Ed +.It Sy Example 3 No Creating a Tn ZFS No Storage Pool by Using Partitions +.Pp +The following command creates an unmirrored pool using two GPT partitions. +.Bd -literal -offset 2n +.Li # Ic zpool create tank da0p3 da1p3 +.Ed +.It Sy Example 4 No Creating a Tn ZFS No Storage Pool by Using Files +.Pp +The following command creates an unmirrored pool using files. While not +recommended, a pool based on files can be useful for experimental purposes. +.Bd -literal -offset 2n +.Li # Ic zpool create tank /path/to/file/a /path/to/file/b +.Ed +.It Sy Example 5 No Adding a Mirror to a Tn ZFS No Storage Pool +.Pp +The following command adds two mirrored disks to the pool +.Em tank , +assuming the pool is already made up of two-way mirrors. The additional space +is immediately available to any datasets within the pool. +.Bd -literal -offset 2n +.Li # Ic zpool add tank mirror da2 da3 +.Ed +.It Sy Example 6 No Listing Available Tn ZFS No Storage Pools +.Pp +The following command lists all available pools on the system. +.Bd -literal -offset 2n +.Li # Ic zpool list +NAME SIZE ALLOC FREE FRAG EXPANDSZ CAP DEDUP HEALTH ALTROOT +pool 2.70T 473G 2.24T 33% - 17% 1.00x ONLINE - +test 1.98G 89.5K 1.98G 48% - 0% 1.00x ONLINE - +.Ed +.It Sy Example 7 No Listing All Properties for a Pool +.Pp +The following command lists all the properties for a pool. +.Bd -literal -offset 2n +.Li # Ic zpool get all pool +pool size 2.70T - +pool capacity 17% - +pool altroot - default +pool health ONLINE - +pool guid 2501120270416322443 default +pool version 28 default +pool bootfs pool/root local +pool delegation on default +pool autoreplace off default +pool cachefile - default +pool failmode wait default +pool listsnapshots off default +pool autoexpand off default +pool dedupditto 0 default +pool dedupratio 1.00x - +pool free 2.24T - +pool allocated 473G - +pool readonly off - +.Ed +.It Sy Example 8 No Destroying a Tn ZFS No Storage Pool +.Pp +The following command destroys the pool +.Qq Em tank +and any datasets contained within. +.Bd -literal -offset 2n +.Li # Ic zpool destroy -f tank +.Ed +.It Sy Example 9 No Exporting a Tn ZFS No Storage Pool +.Pp +The following command exports the devices in pool +.Em tank +so that they can be relocated or later imported. +.Bd -literal -offset 2n +.Li # Ic zpool export tank +.Ed +.It Sy Example 10 No Importing a Tn ZFS No Storage Pool +.Pp +The following command displays available pools, and then imports the pool +.Qq Em tank +for use on the system. +.Pp +The results from this command are similar to the following: +.Bd -literal -offset 2n +.Li # Ic zpool import + + pool: tank + id: 15451357997522795478 + state: ONLINE +action: The pool can be imported using its name or numeric identifier. +config: + + tank ONLINE + mirror ONLINE + da0 ONLINE + da1 ONLINE +.Ed +.It Xo +.Sy Example 11 +Upgrading All +.Tn ZFS +Storage Pools to the Current Version +.Xc +.Pp +The following command upgrades all +.Tn ZFS +Storage pools to the current version of +the software. +.Bd -literal -offset 2n +.Li # Ic zpool upgrade -a +This system is currently running ZFS pool version 28. +.Ed +.It Sy Example 12 No Managing Hot Spares +.Pp +The following command creates a new pool with an available hot spare: +.Bd -literal -offset 2n +.Li # Ic zpool create tank mirror da0 da1 spare da2 +.Ed +.Pp +If one of the disks were to fail, the pool would be reduced to the degraded +state. The failed device can be replaced using the following command: +.Bd -literal -offset 2n +.Li # Ic zpool replace tank da0 da2 +.Ed +.Pp +Once the data has been resilvered, the spare is automatically removed and is +made available should another device fails. The hot spare can be permanently +removed from the pool using the following command: +.Bd -literal -offset 2n +.Li # Ic zpool remove tank da2 +.Ed +.It Xo +.Sy Example 13 +Creating a +.Tn ZFS +Pool with Mirrored Separate Intent Logs +.Xc +.Pp +The following command creates a +.Tn ZFS +storage pool consisting of two, two-way +mirrors and mirrored log devices: +.Bd -literal -offset 2n +.Li # Ic zpool create pool mirror da0 da1 mirror da2 da3 log mirror da4 da5 +.Ed +.It Sy Example 14 No Adding Cache Devices to a Tn ZFS No Pool +.Pp +The following command adds two disks for use as cache devices to a +.Tn ZFS +storage pool: +.Bd -literal -offset 2n +.Li # Ic zpool add pool cache da2 da3 +.Ed +.Pp +Once added, the cache devices gradually fill with content from main memory. +Depending on the size of your cache devices, it could take over an hour for +them to fill. Capacity and reads can be monitored using the +.Cm iostat +subcommand as follows: +.Bd -literal -offset 2n +.Li # Ic zpool iostat -v pool 5 +.Ed +.It Xo +.Sy Example 15 +Displaying expanded space on a device +.Xc +.Pp +The following command dipslays the detailed information for the +.Em data +pool. +This pool is comprised of a single +.Em raidz +vdev where one of its +devices increased its capacity by 10GB. +In this example, the pool will not +be able to utilized this extra capacity until all the devices under the +.Em raidz +vdev have been expanded. +.Bd -literal -offset 2n +.Li # Ic zpool list -v data +NAME SIZE ALLOC FREE FRAG EXPANDSZ CAP DEDUP HEALTH ALTROOT +data 23.9G 14.6G 9.30G 48% - 61% 1.00x ONLINE - + raidz1 23.9G 14.6G 9.30G 48% - + ada0 - - - - - + ada1 - - - - 10G + ada2 - - - - - +.Ed +.It Xo +.Sy Example 16 +Removing a Mirrored Log Device +.Xc +.Pp +The following command removes the mirrored log device +.Em mirror-2 . +.Pp +Given this configuration: +.Bd -literal -offset 2n + pool: tank + state: ONLINE + scrub: none requested + config: + + NAME STATE READ WRITE CKSUM + tank ONLINE 0 0 0 + mirror-0 ONLINE 0 0 0 + da0 ONLINE 0 0 0 + da1 ONLINE 0 0 0 + mirror-1 ONLINE 0 0 0 + da2 ONLINE 0 0 0 + da3 ONLINE 0 0 0 + logs + mirror-2 ONLINE 0 0 0 + da4 ONLINE 0 0 0 + da5 ONLINE 0 0 0 +.Ed +.Pp +The command to remove the mirrored log +.Em mirror-2 +is: +.Bd -literal -offset 2n +.Li # Ic zpool remove tank mirror-2 +.Ed +.It Xo +.Sy Example 17 +Recovering a Faulted +.Tn ZFS +Pool +.Xc +.Pp +If a pool is faulted but recoverable, a message indicating this state is +provided by +.Qq Nm Cm status +if the pool was cached (see the +.Fl c Ar cachefile +argument above), or as part of the error output from a failed +.Qq Nm Cm import +of the pool. +.Pp +Recover a cached pool with the +.Qq Nm Cm clear +command: +.Bd -literal -offset 2n +.Li # Ic zpool clear -F data +Pool data returned to its state as of Tue Sep 08 13:23:35 2009. +Discarded approximately 29 seconds of transactions. +.Ed +.Pp +If the pool configuration was not cached, use +.Qq Nm Cm import +with the recovery mode flag: +.Bd -literal -offset 2n +.Li # Ic zpool import -F data +Pool data returned to its state as of Tue Sep 08 13:23:35 2009. +Discarded approximately 29 seconds of transactions. +.Ed +.El +.Sh SEE ALSO +.Xr zpool-features 7 , +.Xr zfs 8 , +.Xr zfsd 8 +.Sh AUTHORS +This manual page is a +.Xr mdoc 7 +reimplementation of the +.Tn OpenSolaris +manual page +.Em zpool(1M) , +modified and customized for +.Fx +and licensed under the Common Development and Distribution License +.Pq Tn CDDL . +.Pp +The +.Xr mdoc 7 +implementation of this manual page was initially written by +.An Martin Matuska Aq mm@FreeBSD.org . diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c new file mode 100644 index 000000000000..2f7de933ed41 --- /dev/null +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c @@ -0,0 +1,255 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. + */ + +#include <solaris.h> +#include <libintl.h> +#include <libuutil.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> + +#include <libzfs.h> + +#include "zpool_util.h" + +/* + * Private interface for iterating over pools specified on the command line. + * Most consumers will call for_each_pool, but in order to support iostat, we + * allow fined grained control through the zpool_list_t interface. + */ + +typedef struct zpool_node { + zpool_handle_t *zn_handle; + uu_avl_node_t zn_avlnode; + int zn_mark; +} zpool_node_t; + +struct zpool_list { + boolean_t zl_findall; + uu_avl_t *zl_avl; + uu_avl_pool_t *zl_pool; + zprop_list_t **zl_proplist; +}; + +/* ARGSUSED */ +static int +zpool_compare(const void *larg, const void *rarg, void *unused) +{ + zpool_handle_t *l = ((zpool_node_t *)larg)->zn_handle; + zpool_handle_t *r = ((zpool_node_t *)rarg)->zn_handle; + const char *lname = zpool_get_name(l); + const char *rname = zpool_get_name(r); + + return (strcmp(lname, rname)); +} + +/* + * Callback function for pool_list_get(). Adds the given pool to the AVL tree + * of known pools. + */ +static int +add_pool(zpool_handle_t *zhp, void *data) +{ + zpool_list_t *zlp = data; + zpool_node_t *node = safe_malloc(sizeof (zpool_node_t)); + uu_avl_index_t idx; + + node->zn_handle = zhp; + uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool); + if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) { + if (zlp->zl_proplist && + zpool_expand_proplist(zhp, zlp->zl_proplist) != 0) { + zpool_close(zhp); + free(node); + return (-1); + } + uu_avl_insert(zlp->zl_avl, node, idx); + } else { + zpool_close(zhp); + free(node); + return (-1); + } + + return (0); +} + +/* + * Create a list of pools based on the given arguments. If we're given no + * arguments, then iterate over all pools in the system and add them to the AVL + * tree. Otherwise, add only those pool explicitly specified on the command + * line. + */ +zpool_list_t * +pool_list_get(int argc, char **argv, zprop_list_t **proplist, int *err) +{ + zpool_list_t *zlp; + + zlp = safe_malloc(sizeof (zpool_list_t)); + + zlp->zl_pool = uu_avl_pool_create("zfs_pool", sizeof (zpool_node_t), + offsetof(zpool_node_t, zn_avlnode), zpool_compare, UU_DEFAULT); + + if (zlp->zl_pool == NULL) + zpool_no_memory(); + + if ((zlp->zl_avl = uu_avl_create(zlp->zl_pool, NULL, + UU_DEFAULT)) == NULL) + zpool_no_memory(); + + zlp->zl_proplist = proplist; + + if (argc == 0) { + (void) zpool_iter(g_zfs, add_pool, zlp); + zlp->zl_findall = B_TRUE; + } else { + int i; + + for (i = 0; i < argc; i++) { + zpool_handle_t *zhp; + + if ((zhp = zpool_open_canfail(g_zfs, argv[i])) != + NULL) { + if (add_pool(zhp, zlp) != 0) + *err = B_TRUE; + } else { + *err = B_TRUE; + } + } + } + + return (zlp); +} + +/* + * Search for any new pools, adding them to the list. We only add pools when no + * options were given on the command line. Otherwise, we keep the list fixed as + * those that were explicitly specified. + */ +void +pool_list_update(zpool_list_t *zlp) +{ + if (zlp->zl_findall) + (void) zpool_iter(g_zfs, add_pool, zlp); +} + +/* + * Iterate over all pools in the list, executing the callback for each + */ +int +pool_list_iter(zpool_list_t *zlp, int unavail, zpool_iter_f func, + void *data) +{ + zpool_node_t *node, *next_node; + int ret = 0; + + for (node = uu_avl_first(zlp->zl_avl); node != NULL; node = next_node) { + next_node = uu_avl_next(zlp->zl_avl, node); + if (zpool_get_state(node->zn_handle) != POOL_STATE_UNAVAIL || + unavail) + ret |= func(node->zn_handle, data); + } + + return (ret); +} + +/* + * Remove the given pool from the list. When running iostat, we want to remove + * those pools that no longer exist. + */ +void +pool_list_remove(zpool_list_t *zlp, zpool_handle_t *zhp) +{ + zpool_node_t search, *node; + + search.zn_handle = zhp; + if ((node = uu_avl_find(zlp->zl_avl, &search, NULL, NULL)) != NULL) { + uu_avl_remove(zlp->zl_avl, node); + zpool_close(node->zn_handle); + free(node); + } +} + +/* + * Free all the handles associated with this list. + */ +void +pool_list_free(zpool_list_t *zlp) +{ + uu_avl_walk_t *walk; + zpool_node_t *node; + + if ((walk = uu_avl_walk_start(zlp->zl_avl, UU_WALK_ROBUST)) == NULL) { + (void) fprintf(stderr, + gettext("internal error: out of memory")); + exit(1); + } + + while ((node = uu_avl_walk_next(walk)) != NULL) { + uu_avl_remove(zlp->zl_avl, node); + zpool_close(node->zn_handle); + free(node); + } + + uu_avl_walk_end(walk); + uu_avl_destroy(zlp->zl_avl); + uu_avl_pool_destroy(zlp->zl_pool); + + free(zlp); +} + +/* + * Returns the number of elements in the pool list. + */ +int +pool_list_count(zpool_list_t *zlp) +{ + return (uu_avl_numnodes(zlp->zl_avl)); +} + +/* + * High level function which iterates over all pools given on the command line, + * using the pool_list_* interfaces. + */ +int +for_each_pool(int argc, char **argv, boolean_t unavail, + zprop_list_t **proplist, zpool_iter_f func, void *data) +{ + zpool_list_t *list; + int ret = 0; + + if ((list = pool_list_get(argc, argv, proplist, &ret)) == NULL) + return (1); + + if (pool_list_iter(list, unavail, func, data) != 0) + ret = 1; + + pool_list_free(list); + + return (ret); +} diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c new file mode 100644 index 000000000000..51edeca384e6 --- /dev/null +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c @@ -0,0 +1,6153 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012 by Frederik Wessels. All rights reserved. + * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + * Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved. + * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. + * Copyright 2016 Nexenta Systems, Inc. + * Copyright (c) 2017 Datto Inc. + */ + +#include <solaris.h> +#include <assert.h> +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <libgen.h> +#include <libintl.h> +#include <libuutil.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> +#include <priv.h> +#include <pwd.h> +#include <zone.h> +#include <sys/time.h> +#include <zfs_prop.h> +#include <sys/fs/zfs.h> +#include <sys/stat.h> + +#include <libzfs.h> + +#include "zpool_util.h" +#include "zfs_comutil.h" +#include "zfeature_common.h" + +#include "statcommon.h" + +static int zpool_do_create(int, char **); +static int zpool_do_destroy(int, char **); + +static int zpool_do_add(int, char **); +static int zpool_do_remove(int, char **); +static int zpool_do_labelclear(int, char **); + +static int zpool_do_checkpoint(int, char **); + +static int zpool_do_list(int, char **); +static int zpool_do_iostat(int, char **); +static int zpool_do_status(int, char **); + +static int zpool_do_online(int, char **); +static int zpool_do_offline(int, char **); +static int zpool_do_clear(int, char **); +static int zpool_do_reopen(int, char **); + +static int zpool_do_reguid(int, char **); + +static int zpool_do_attach(int, char **); +static int zpool_do_detach(int, char **); +static int zpool_do_replace(int, char **); +static int zpool_do_split(int, char **); + +static int zpool_do_scrub(int, char **); + +static int zpool_do_import(int, char **); +static int zpool_do_export(int, char **); + +static int zpool_do_upgrade(int, char **); + +static int zpool_do_history(int, char **); + +static int zpool_do_get(int, char **); +static int zpool_do_set(int, char **); + +/* + * These libumem hooks provide a reasonable set of defaults for the allocator's + * debugging facilities. + */ + +#ifdef DEBUG +const char * +_umem_debug_init(void) +{ + return ("default,verbose"); /* $UMEM_DEBUG setting */ +} + +const char * +_umem_logging_init(void) +{ + return ("fail,contents"); /* $UMEM_LOGGING setting */ +} +#endif + +typedef enum { + HELP_ADD, + HELP_ATTACH, + HELP_CLEAR, + HELP_CREATE, + HELP_CHECKPOINT, + HELP_DESTROY, + HELP_DETACH, + HELP_EXPORT, + HELP_HISTORY, + HELP_IMPORT, + HELP_IOSTAT, + HELP_LABELCLEAR, + HELP_LIST, + HELP_OFFLINE, + HELP_ONLINE, + HELP_REPLACE, + HELP_REMOVE, + HELP_SCRUB, + HELP_STATUS, + HELP_UPGRADE, + HELP_GET, + HELP_SET, + HELP_SPLIT, + HELP_REGUID, + HELP_REOPEN +} zpool_help_t; + + +typedef struct zpool_command { + const char *name; + int (*func)(int, char **); + zpool_help_t usage; +} zpool_command_t; + +/* + * Master command table. Each ZFS command has a name, associated function, and + * usage message. The usage messages need to be internationalized, so we have + * to have a function to return the usage message based on a command index. + * + * These commands are organized according to how they are displayed in the usage + * message. An empty command (one with a NULL name) indicates an empty line in + * the generic usage message. + */ +static zpool_command_t command_table[] = { + { "create", zpool_do_create, HELP_CREATE }, + { "destroy", zpool_do_destroy, HELP_DESTROY }, + { NULL }, + { "add", zpool_do_add, HELP_ADD }, + { "remove", zpool_do_remove, HELP_REMOVE }, + { NULL }, + { "labelclear", zpool_do_labelclear, HELP_LABELCLEAR }, + { NULL }, + { "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT }, + { NULL }, + { "list", zpool_do_list, HELP_LIST }, + { "iostat", zpool_do_iostat, HELP_IOSTAT }, + { "status", zpool_do_status, HELP_STATUS }, + { NULL }, + { "online", zpool_do_online, HELP_ONLINE }, + { "offline", zpool_do_offline, HELP_OFFLINE }, + { "clear", zpool_do_clear, HELP_CLEAR }, + { "reopen", zpool_do_reopen, HELP_REOPEN }, + { NULL }, + { "attach", zpool_do_attach, HELP_ATTACH }, + { "detach", zpool_do_detach, HELP_DETACH }, + { "replace", zpool_do_replace, HELP_REPLACE }, + { "split", zpool_do_split, HELP_SPLIT }, + { NULL }, + { "scrub", zpool_do_scrub, HELP_SCRUB }, + { NULL }, + { "import", zpool_do_import, HELP_IMPORT }, + { "export", zpool_do_export, HELP_EXPORT }, + { "upgrade", zpool_do_upgrade, HELP_UPGRADE }, + { "reguid", zpool_do_reguid, HELP_REGUID }, + { NULL }, + { "history", zpool_do_history, HELP_HISTORY }, + { "get", zpool_do_get, HELP_GET }, + { "set", zpool_do_set, HELP_SET }, +}; + +#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) + +static zpool_command_t *current_command; +static char history_str[HIS_MAX_RECORD_LEN]; +static boolean_t log_history = B_TRUE; +static uint_t timestamp_fmt = NODATE; + +static const char * +get_usage(zpool_help_t idx) +{ + switch (idx) { + case HELP_ADD: + return (gettext("\tadd [-fn] <pool> <vdev> ...\n")); + case HELP_ATTACH: + return (gettext("\tattach [-f] <pool> <device> " + "<new-device>\n")); + case HELP_CLEAR: + return (gettext("\tclear [-nF] <pool> [device]\n")); + case HELP_CREATE: + return (gettext("\tcreate [-fnd] [-B] " + "[-o property=value] ... \n" + "\t [-O file-system-property=value] ... \n" + "\t [-m mountpoint] [-R root] <pool> <vdev> ...\n")); + case HELP_CHECKPOINT: + return (gettext("\tcheckpoint [--discard] <pool> ...\n")); + case HELP_DESTROY: + return (gettext("\tdestroy [-f] <pool>\n")); + case HELP_DETACH: + return (gettext("\tdetach <pool> <device>\n")); + case HELP_EXPORT: + return (gettext("\texport [-f] <pool> ...\n")); + case HELP_HISTORY: + return (gettext("\thistory [-il] [<pool>] ...\n")); + case HELP_IMPORT: + return (gettext("\timport [-d dir] [-D]\n" + "\timport [-o mntopts] [-o property=value] ... \n" + "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " + "[-R root] [-F [-n]] -a\n" + "\timport [-o mntopts] [-o property=value] ... \n" + "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " + "[-R root] [-F [-n]]\n" + "\t [--rewind-to-checkpoint] <pool | id> [newpool]\n")); + case HELP_IOSTAT: + return (gettext("\tiostat [-v] [-T d|u] [pool] ... [interval " + "[count]]\n")); + case HELP_LABELCLEAR: + return (gettext("\tlabelclear [-f] <vdev>\n")); + case HELP_LIST: + return (gettext("\tlist [-Hpv] [-o property[,...]] " + "[-T d|u] [pool] ... [interval [count]]\n")); + case HELP_OFFLINE: + return (gettext("\toffline [-t] <pool> <device> ...\n")); + case HELP_ONLINE: + return (gettext("\tonline [-e] <pool> <device> ...\n")); + case HELP_REPLACE: + return (gettext("\treplace [-f] <pool> <device> " + "[new-device]\n")); + case HELP_REMOVE: + return (gettext("\tremove [-nps] <pool> <device> ...\n")); + case HELP_REOPEN: + return (gettext("\treopen <pool>\n")); + case HELP_SCRUB: + return (gettext("\tscrub [-s | -p] <pool> ...\n")); + case HELP_STATUS: + return (gettext("\tstatus [-vx] [-T d|u] [pool] ... [interval " + "[count]]\n")); + case HELP_UPGRADE: + return (gettext("\tupgrade [-v]\n" + "\tupgrade [-V version] <-a | pool ...>\n")); + case HELP_GET: + return (gettext("\tget [-Hp] [-o \"all\" | field[,...]] " + "<\"all\" | property[,...]> <pool> ...\n")); + case HELP_SET: + return (gettext("\tset <property=value> <pool> \n")); + case HELP_SPLIT: + return (gettext("\tsplit [-n] [-R altroot] [-o mntopts]\n" + "\t [-o property=value] <pool> <newpool> " + "[<device> ...]\n")); + case HELP_REGUID: + return (gettext("\treguid <pool>\n")); + } + + abort(); + /* NOTREACHED */ +} + + +/* + * Callback routine that will print out a pool property value. + */ +static int +print_prop_cb(int prop, void *cb) +{ + FILE *fp = cb; + + (void) fprintf(fp, "\t%-15s ", zpool_prop_to_name(prop)); + + if (zpool_prop_readonly(prop)) + (void) fprintf(fp, " NO "); + else + (void) fprintf(fp, " YES "); + + if (zpool_prop_values(prop) == NULL) + (void) fprintf(fp, "-\n"); + else + (void) fprintf(fp, "%s\n", zpool_prop_values(prop)); + + return (ZPROP_CONT); +} + +/* + * Display usage message. If we're inside a command, display only the usage for + * that command. Otherwise, iterate over the entire command table and display + * a complete usage message. + */ +void +usage(boolean_t requested) +{ + FILE *fp = requested ? stdout : stderr; + + if (current_command == NULL) { + int i; + + (void) fprintf(fp, gettext("usage: zpool command args ...\n")); + (void) fprintf(fp, + gettext("where 'command' is one of the following:\n\n")); + + for (i = 0; i < NCOMMAND; i++) { + if (command_table[i].name == NULL) + (void) fprintf(fp, "\n"); + else + (void) fprintf(fp, "%s", + get_usage(command_table[i].usage)); + } + } else { + (void) fprintf(fp, gettext("usage:\n")); + (void) fprintf(fp, "%s", get_usage(current_command->usage)); + } + + if (current_command != NULL && + ((strcmp(current_command->name, "set") == 0) || + (strcmp(current_command->name, "get") == 0) || + (strcmp(current_command->name, "list") == 0))) { + + (void) fprintf(fp, + gettext("\nthe following properties are supported:\n")); + + (void) fprintf(fp, "\n\t%-15s %s %s\n\n", + "PROPERTY", "EDIT", "VALUES"); + + /* Iterate over all properties */ + (void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE, + ZFS_TYPE_POOL); + + (void) fprintf(fp, "\t%-15s ", "feature@..."); + (void) fprintf(fp, "YES disabled | enabled | active\n"); + + (void) fprintf(fp, gettext("\nThe feature@ properties must be " + "appended with a feature name.\nSee zpool-features(7).\n")); + } + + /* + * See comments at end of main(). + */ + if (getenv("ZFS_ABORT") != NULL) { + (void) printf("dumping core by request\n"); + abort(); + } + + exit(requested ? 0 : 2); +} + +void +print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent, + boolean_t print_logs) +{ + nvlist_t **child; + uint_t c, children; + char *vname; + + if (name != NULL) + (void) printf("\t%*s%s\n", indent, "", name); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return; + + for (c = 0; c < children; c++) { + uint64_t is_log = B_FALSE; + + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + if ((is_log && !print_logs) || (!is_log && print_logs)) + continue; + + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); + print_vdev_tree(zhp, vname, child[c], indent + 2, + B_FALSE); + free(vname); + } +} + +static boolean_t +prop_list_contains_feature(nvlist_t *proplist) +{ + nvpair_t *nvp; + for (nvp = nvlist_next_nvpair(proplist, NULL); NULL != nvp; + nvp = nvlist_next_nvpair(proplist, nvp)) { + if (zpool_prop_feature(nvpair_name(nvp))) + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * Add a property pair (name, string-value) into a property nvlist. + */ +static int +add_prop_list(const char *propname, char *propval, nvlist_t **props, + boolean_t poolprop) +{ + zpool_prop_t prop = ZPROP_INVAL; + zfs_prop_t fprop; + nvlist_t *proplist; + const char *normnm; + char *strval; + + if (*props == NULL && + nvlist_alloc(props, NV_UNIQUE_NAME, 0) != 0) { + (void) fprintf(stderr, + gettext("internal error: out of memory\n")); + return (1); + } + + proplist = *props; + + if (poolprop) { + const char *vname = zpool_prop_to_name(ZPOOL_PROP_VERSION); + + if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL && + !zpool_prop_feature(propname)) { + (void) fprintf(stderr, gettext("property '%s' is " + "not a valid pool property\n"), propname); + return (2); + } + + /* + * feature@ properties and version should not be specified + * at the same time. + */ + if ((prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname) && + nvlist_exists(proplist, vname)) || + (prop == ZPOOL_PROP_VERSION && + prop_list_contains_feature(proplist))) { + (void) fprintf(stderr, gettext("'feature@' and " + "'version' properties cannot be specified " + "together\n")); + return (2); + } + + + if (zpool_prop_feature(propname)) + normnm = propname; + else + normnm = zpool_prop_to_name(prop); + } else { + if ((fprop = zfs_name_to_prop(propname)) != ZPROP_INVAL) { + normnm = zfs_prop_to_name(fprop); + } else { + normnm = propname; + } + } + + if (nvlist_lookup_string(proplist, normnm, &strval) == 0 && + prop != ZPOOL_PROP_CACHEFILE) { + (void) fprintf(stderr, gettext("property '%s' " + "specified multiple times\n"), propname); + return (2); + } + + if (nvlist_add_string(proplist, normnm, propval) != 0) { + (void) fprintf(stderr, gettext("internal " + "error: out of memory\n")); + return (1); + } + + return (0); +} + +/* + * zpool add [-fn] <pool> <vdev> ... + * + * -f Force addition of devices, even if they appear in use + * -n Do not add the devices, but display the resulting layout if + * they were to be added. + * + * Adds the given vdevs to 'pool'. As with create, the bulk of this work is + * handled by get_vdev_spec(), which constructs the nvlist needed to pass to + * libzfs. + */ +int +zpool_do_add(int argc, char **argv) +{ + boolean_t force = B_FALSE; + boolean_t dryrun = B_FALSE; + int c; + nvlist_t *nvroot; + char *poolname; + zpool_boot_label_t boot_type; + uint64_t boot_size; + int ret; + zpool_handle_t *zhp; + nvlist_t *config; + + /* check options */ + while ((c = getopt(argc, argv, "fn")) != -1) { + switch (c) { + case 'f': + force = B_TRUE; + break; + case 'n': + dryrun = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(B_FALSE); + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing vdev specification\n")); + usage(B_FALSE); + } + + poolname = argv[0]; + + argc--; + argv++; + + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + if ((config = zpool_get_config(zhp, NULL)) == NULL) { + (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"), + poolname); + zpool_close(zhp); + return (1); + } + + if (zpool_is_bootable(zhp)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + + /* pass off to get_vdev_spec for processing */ + boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL); + nvroot = make_root_vdev(zhp, force, !force, B_FALSE, dryrun, + boot_type, boot_size, argc, argv); + if (nvroot == NULL) { + zpool_close(zhp); + return (1); + } + + if (dryrun) { + nvlist_t *poolnvroot; + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &poolnvroot) == 0); + + (void) printf(gettext("would update '%s' to the following " + "configuration:\n"), zpool_get_name(zhp)); + + /* print original main pool and new tree */ + print_vdev_tree(zhp, poolname, poolnvroot, 0, B_FALSE); + print_vdev_tree(zhp, NULL, nvroot, 0, B_FALSE); + + /* Do the same for the logs */ + if (num_logs(poolnvroot) > 0) { + print_vdev_tree(zhp, "logs", poolnvroot, 0, B_TRUE); + print_vdev_tree(zhp, NULL, nvroot, 0, B_TRUE); + } else if (num_logs(nvroot) > 0) { + print_vdev_tree(zhp, "logs", nvroot, 0, B_TRUE); + } + + ret = 0; + } else { + ret = (zpool_add(zhp, nvroot) != 0); + } + + nvlist_free(nvroot); + zpool_close(zhp); + + return (ret); +} + +/* + * zpool remove <pool> <vdev> ... + * + * Removes the given vdev from the pool. + */ +int +zpool_do_remove(int argc, char **argv) +{ + char *poolname; + int i, ret = 0; + zpool_handle_t *zhp; + boolean_t stop = B_FALSE; + boolean_t noop = B_FALSE; + boolean_t parsable = B_FALSE; + char c; + + /* check options */ + while ((c = getopt(argc, argv, "nps")) != -1) { + switch (c) { + case 'n': + noop = B_TRUE; + break; + case 'p': + parsable = B_TRUE; + break; + case 's': + stop = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(B_FALSE); + } + + poolname = argv[0]; + + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + if (stop && noop) { + (void) fprintf(stderr, gettext("stop request ignored\n")); + return (0); + } + + if (stop) { + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + if (zpool_vdev_remove_cancel(zhp) != 0) + ret = 1; + } else { + if (argc < 2) { + (void) fprintf(stderr, gettext("missing device\n")); + usage(B_FALSE); + } + + for (i = 1; i < argc; i++) { + if (noop) { + uint64_t size; + + if (zpool_vdev_indirect_size(zhp, argv[i], + &size) != 0) { + ret = 1; + break; + } + if (parsable) { + (void) printf("%s %llu\n", + argv[i], size); + } else { + char valstr[32]; + zfs_nicenum(size, valstr, + sizeof (valstr)); + (void) printf("Memory that will be " + "used after removing %s: %s\n", + argv[i], valstr); + } + } else { + if (zpool_vdev_remove(zhp, argv[i]) != 0) + ret = 1; + } + } + } + + return (ret); +} + +/* + * zpool labelclear [-f] <vdev> + * + * -f Force clearing the label for the vdevs which are members of + * the exported or foreign pools. + * + * Verifies that the vdev is not active and zeros out the label information + * on the device. + */ +int +zpool_do_labelclear(int argc, char **argv) +{ + char vdev[MAXPATHLEN]; + char *name = NULL; + struct stat st; + int c, fd, ret = 0; + nvlist_t *config; + pool_state_t state; + boolean_t inuse = B_FALSE; + boolean_t force = B_FALSE; + + /* check options */ + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + force = B_TRUE; + break; + default: + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get vdev name */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing vdev name\n")); + usage(B_FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + /* + * Check if we were given absolute path and use it as is. + * Otherwise if the provided vdev name doesn't point to a file, + * try prepending dsk path and appending s0. + */ + (void) strlcpy(vdev, argv[0], sizeof (vdev)); + if (vdev[0] != '/' && stat(vdev, &st) != 0) { + char *s; + + (void) snprintf(vdev, sizeof (vdev), "%s/%s", +#ifdef illumos + ZFS_DISK_ROOT, argv[0]); + if ((s = strrchr(argv[0], 's')) == NULL || + !isdigit(*(s + 1))) + (void) strlcat(vdev, "s0", sizeof (vdev)); +#else + "/dev", argv[0]); +#endif + if (stat(vdev, &st) != 0) { + (void) fprintf(stderr, gettext( + "failed to find device %s, try specifying absolute " + "path instead\n"), argv[0]); + return (1); + } + } + + if ((fd = open(vdev, O_RDWR)) < 0) { + (void) fprintf(stderr, gettext("failed to open %s: %s\n"), + vdev, strerror(errno)); + return (1); + } + + if (zpool_read_label(fd, &config) != 0) { + (void) fprintf(stderr, + gettext("failed to read label from %s\n"), vdev); + return (1); + } + nvlist_free(config); + + ret = zpool_in_use(g_zfs, fd, &state, &name, &inuse); + if (ret != 0) { + (void) fprintf(stderr, + gettext("failed to check state for %s\n"), vdev); + return (1); + } + + if (!inuse) + goto wipe_label; + + switch (state) { + default: + case POOL_STATE_ACTIVE: + case POOL_STATE_SPARE: + case POOL_STATE_L2CACHE: + (void) fprintf(stderr, gettext( + "%s is a member (%s) of pool \"%s\"\n"), + vdev, zpool_pool_state_to_name(state), name); + ret = 1; + goto errout; + + case POOL_STATE_EXPORTED: + if (force) + break; + (void) fprintf(stderr, gettext( + "use '-f' to override the following error:\n" + "%s is a member of exported pool \"%s\"\n"), + vdev, name); + ret = 1; + goto errout; + + case POOL_STATE_POTENTIALLY_ACTIVE: + if (force) + break; + (void) fprintf(stderr, gettext( + "use '-f' to override the following error:\n" + "%s is a member of potentially active pool \"%s\"\n"), + vdev, name); + ret = 1; + goto errout; + + case POOL_STATE_DESTROYED: + /* inuse should never be set for a destroyed pool */ + assert(0); + break; + } + +wipe_label: + ret = zpool_clear_label(fd); + if (ret != 0) { + (void) fprintf(stderr, + gettext("failed to clear label for %s\n"), vdev); + } + +errout: + free(name); + (void) close(fd); + + return (ret); +} + +/* + * zpool create [-fnd] [-B] [-o property=value] ... + * [-O file-system-property=value] ... + * [-R root] [-m mountpoint] <pool> <dev> ... + * + * -B Create boot partition. + * -f Force creation, even if devices appear in use + * -n Do not create the pool, but display the resulting layout if it + * were to be created. + * -R Create a pool under an alternate root + * -m Set default mountpoint for the root dataset. By default it's + * '/<pool>' + * -o Set property=value. + * -d Don't automatically enable all supported pool features + * (individual features can be enabled with -o). + * -O Set fsproperty=value in the pool's root file system + * + * Creates the named pool according to the given vdev specification. The + * bulk of the vdev processing is done in get_vdev_spec() in zpool_vdev.c. Once + * we get the nvlist back from get_vdev_spec(), we either print out the contents + * (if '-n' was specified), or pass it to libzfs to do the creation. + */ + +#define SYSTEM256 (256 * 1024 * 1024) +int +zpool_do_create(int argc, char **argv) +{ + boolean_t force = B_FALSE; + boolean_t dryrun = B_FALSE; + boolean_t enable_all_pool_feat = B_TRUE; + zpool_boot_label_t boot_type = ZPOOL_NO_BOOT_LABEL; + uint64_t boot_size = 0; + int c; + nvlist_t *nvroot = NULL; + char *poolname; + int ret = 1; + char *altroot = NULL; + char *mountpoint = NULL; + nvlist_t *fsprops = NULL; + nvlist_t *props = NULL; + char *propval; + + /* check options */ + while ((c = getopt(argc, argv, ":fndBR:m:o:O:")) != -1) { + switch (c) { + case 'f': + force = B_TRUE; + break; + case 'n': + dryrun = B_TRUE; + break; + case 'd': + enable_all_pool_feat = B_FALSE; + break; + case 'B': +#ifdef illumos + /* + * We should create the system partition. + * Also make sure the size is set. + */ + boot_type = ZPOOL_CREATE_BOOT_LABEL; + if (boot_size == 0) + boot_size = SYSTEM256; + break; +#else + (void) fprintf(stderr, + gettext("option '%c' is not supported\n"), + optopt); + goto badusage; +#endif + case 'R': + altroot = optarg; + if (add_prop_list(zpool_prop_to_name( + ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE)) + goto errout; + if (nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), + &propval) == 0) + break; + if (add_prop_list(zpool_prop_to_name( + ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) + goto errout; + break; + case 'm': + /* Equivalent to -O mountpoint=optarg */ + mountpoint = optarg; + break; + case 'o': + if ((propval = strchr(optarg, '=')) == NULL) { + (void) fprintf(stderr, gettext("missing " + "'=' for -o option\n")); + goto errout; + } + *propval = '\0'; + propval++; + + if (add_prop_list(optarg, propval, &props, B_TRUE)) + goto errout; + + /* + * Get bootsize value for make_root_vdev(). + */ + if (zpool_name_to_prop(optarg) == ZPOOL_PROP_BOOTSIZE) { + if (zfs_nicestrtonum(g_zfs, propval, + &boot_size) < 0 || boot_size == 0) { + (void) fprintf(stderr, + gettext("bad boot partition size " + "'%s': %s\n"), propval, + libzfs_error_description(g_zfs)); + goto errout; + } + } + + /* + * If the user is creating a pool that doesn't support + * feature flags, don't enable any features. + */ + if (zpool_name_to_prop(optarg) == ZPOOL_PROP_VERSION) { + char *end; + u_longlong_t ver; + + ver = strtoull(propval, &end, 10); + if (*end == '\0' && + ver < SPA_VERSION_FEATURES) { + enable_all_pool_feat = B_FALSE; + } + } + if (zpool_name_to_prop(optarg) == ZPOOL_PROP_ALTROOT) + altroot = propval; + break; + case 'O': + if ((propval = strchr(optarg, '=')) == NULL) { + (void) fprintf(stderr, gettext("missing " + "'=' for -O option\n")); + goto errout; + } + *propval = '\0'; + propval++; + + /* + * Mountpoints are checked and then added later. + * Uniquely among properties, they can be specified + * more than once, to avoid conflict with -m. + */ + if (0 == strcmp(optarg, + zfs_prop_to_name(ZFS_PROP_MOUNTPOINT))) { + mountpoint = propval; + } else if (add_prop_list(optarg, propval, &fsprops, + B_FALSE)) { + goto errout; + } + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + goto badusage; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + goto badusage; + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + goto badusage; + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing vdev specification\n")); + goto badusage; + } + + poolname = argv[0]; + + /* + * As a special case, check for use of '/' in the name, and direct the + * user to use 'zfs create' instead. + */ + if (strchr(poolname, '/') != NULL) { + (void) fprintf(stderr, gettext("cannot create '%s': invalid " + "character '/' in pool name\n"), poolname); + (void) fprintf(stderr, gettext("use 'zfs create' to " + "create a dataset\n")); + goto errout; + } + + /* + * Make sure the bootsize is set when ZPOOL_CREATE_BOOT_LABEL is used, + * and not set otherwise. + */ + if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { + const char *propname; + char *strptr, *buf = NULL; + int rv; + + propname = zpool_prop_to_name(ZPOOL_PROP_BOOTSIZE); + if (nvlist_lookup_string(props, propname, &strptr) != 0) { + (void) asprintf(&buf, "%" PRIu64, boot_size); + if (buf == NULL) { + (void) fprintf(stderr, + gettext("internal error: out of memory\n")); + goto errout; + } + rv = add_prop_list(propname, buf, &props, B_TRUE); + free(buf); + if (rv != 0) + goto errout; + } + } else { + const char *propname; + char *strptr; + + propname = zpool_prop_to_name(ZPOOL_PROP_BOOTSIZE); + if (nvlist_lookup_string(props, propname, &strptr) == 0) { + (void) fprintf(stderr, gettext("error: setting boot " + "partition size requires option '-B'\n")); + goto errout; + } + } + + /* pass off to get_vdev_spec for bulk processing */ + nvroot = make_root_vdev(NULL, force, !force, B_FALSE, dryrun, + boot_type, boot_size, argc - 1, argv + 1); + if (nvroot == NULL) + goto errout; + + /* make_root_vdev() allows 0 toplevel children if there are spares */ + if (!zfs_allocatable_devs(nvroot)) { + (void) fprintf(stderr, gettext("invalid vdev " + "specification: at least one toplevel vdev must be " + "specified\n")); + goto errout; + } + + if (altroot != NULL && altroot[0] != '/') { + (void) fprintf(stderr, gettext("invalid alternate root '%s': " + "must be an absolute path\n"), altroot); + goto errout; + } + + /* + * Check the validity of the mountpoint and direct the user to use the + * '-m' mountpoint option if it looks like its in use. + * Ignore the checks if the '-f' option is given. + */ + if (!force && (mountpoint == NULL || + (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 && + strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0))) { + char buf[MAXPATHLEN]; + DIR *dirp; + + if (mountpoint && mountpoint[0] != '/') { + (void) fprintf(stderr, gettext("invalid mountpoint " + "'%s': must be an absolute path, 'legacy', or " + "'none'\n"), mountpoint); + goto errout; + } + + if (mountpoint == NULL) { + if (altroot != NULL) + (void) snprintf(buf, sizeof (buf), "%s/%s", + altroot, poolname); + else + (void) snprintf(buf, sizeof (buf), "/%s", + poolname); + } else { + if (altroot != NULL) + (void) snprintf(buf, sizeof (buf), "%s%s", + altroot, mountpoint); + else + (void) snprintf(buf, sizeof (buf), "%s", + mountpoint); + } + + if ((dirp = opendir(buf)) == NULL && errno != ENOENT) { + (void) fprintf(stderr, gettext("mountpoint '%s' : " + "%s\n"), buf, strerror(errno)); + (void) fprintf(stderr, gettext("use '-m' " + "option to provide a different default\n")); + goto errout; + } else if (dirp) { + int count = 0; + + while (count < 3 && readdir(dirp) != NULL) + count++; + (void) closedir(dirp); + + if (count > 2) { + (void) fprintf(stderr, gettext("mountpoint " + "'%s' exists and is not empty\n"), buf); + (void) fprintf(stderr, gettext("use '-m' " + "option to provide a " + "different default\n")); + goto errout; + } + } + } + + /* + * Now that the mountpoint's validity has been checked, ensure that + * the property is set appropriately prior to creating the pool. + */ + if (mountpoint != NULL) { + ret = add_prop_list(zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), + mountpoint, &fsprops, B_FALSE); + if (ret != 0) + goto errout; + } + + ret = 1; + if (dryrun) { + /* + * For a dry run invocation, print out a basic message and run + * through all the vdevs in the list and print out in an + * appropriate hierarchy. + */ + (void) printf(gettext("would create '%s' with the " + "following layout:\n\n"), poolname); + + print_vdev_tree(NULL, poolname, nvroot, 0, B_FALSE); + if (num_logs(nvroot) > 0) + print_vdev_tree(NULL, "logs", nvroot, 0, B_TRUE); + + ret = 0; + } else { + /* + * Hand off to libzfs. + */ + if (enable_all_pool_feat) { + spa_feature_t i; + for (i = 0; i < SPA_FEATURES; i++) { + char propname[MAXPATHLEN]; + zfeature_info_t *feat = &spa_feature_table[i]; + + (void) snprintf(propname, sizeof (propname), + "feature@%s", feat->fi_uname); + + /* + * Skip feature if user specified it manually + * on the command line. + */ + if (nvlist_exists(props, propname)) + continue; + + ret = add_prop_list(propname, + ZFS_FEATURE_ENABLED, &props, B_TRUE); + if (ret != 0) + goto errout; + } + } + + ret = 1; + if (zpool_create(g_zfs, poolname, + nvroot, props, fsprops) == 0) { + zfs_handle_t *pool = zfs_open(g_zfs, poolname, + ZFS_TYPE_FILESYSTEM); + if (pool != NULL) { + if (zfs_mount(pool, NULL, 0) == 0) + ret = zfs_shareall(pool); + zfs_close(pool); + } + } else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) { + (void) fprintf(stderr, gettext("pool name may have " + "been omitted\n")); + } + } + +errout: + nvlist_free(nvroot); + nvlist_free(fsprops); + nvlist_free(props); + return (ret); +badusage: + nvlist_free(fsprops); + nvlist_free(props); + usage(B_FALSE); + return (2); +} + +/* + * zpool destroy <pool> + * + * -f Forcefully unmount any datasets + * + * Destroy the given pool. Automatically unmounts any datasets in the pool. + */ +int +zpool_do_destroy(int argc, char **argv) +{ + boolean_t force = B_FALSE; + int c; + char *pool; + zpool_handle_t *zhp; + int ret; + + /* check options */ + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + force = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool argument\n")); + usage(B_FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + pool = argv[0]; + + if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) { + /* + * As a special case, check for use of '/' in the name, and + * direct the user to use 'zfs destroy' instead. + */ + if (strchr(pool, '/') != NULL) + (void) fprintf(stderr, gettext("use 'zfs destroy' to " + "destroy a dataset\n")); + return (1); + } + + if (zpool_disable_datasets(zhp, force) != 0) { + (void) fprintf(stderr, gettext("could not destroy '%s': " + "could not unmount datasets\n"), zpool_get_name(zhp)); + return (1); + } + + /* The history must be logged as part of the export */ + log_history = B_FALSE; + + ret = (zpool_destroy(zhp, history_str) != 0); + + zpool_close(zhp); + + return (ret); +} + +/* + * zpool export [-f] <pool> ... + * + * -f Forcefully unmount datasets + * + * Export the given pools. By default, the command will attempt to cleanly + * unmount any active datasets within the pool. If the '-f' flag is specified, + * then the datasets will be forcefully unmounted. + */ +int +zpool_do_export(int argc, char **argv) +{ + boolean_t force = B_FALSE; + boolean_t hardforce = B_FALSE; + int c; + zpool_handle_t *zhp; + int ret; + int i; + + /* check options */ + while ((c = getopt(argc, argv, "fF")) != -1) { + switch (c) { + case 'f': + force = B_TRUE; + break; + case 'F': + hardforce = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool argument\n")); + usage(B_FALSE); + } + + ret = 0; + for (i = 0; i < argc; i++) { + if ((zhp = zpool_open_canfail(g_zfs, argv[i])) == NULL) { + ret = 1; + continue; + } + + if (zpool_disable_datasets(zhp, force) != 0) { + ret = 1; + zpool_close(zhp); + continue; + } + + /* The history must be logged as part of the export */ + log_history = B_FALSE; + + if (hardforce) { + if (zpool_export_force(zhp, history_str) != 0) + ret = 1; + } else if (zpool_export(zhp, force, history_str) != 0) { + ret = 1; + } + + zpool_close(zhp); + } + + return (ret); +} + +/* + * Given a vdev configuration, determine the maximum width needed for the device + * name column. + */ +static int +max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max) +{ + char *name = zpool_vdev_name(g_zfs, zhp, nv, B_TRUE); + nvlist_t **child; + uint_t c, children; + int ret; + + if (strlen(name) + depth > max) + max = strlen(name) + depth; + + free(name); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if ((ret = max_width(zhp, child[c], depth + 2, + max)) > max) + max = ret; + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if ((ret = max_width(zhp, child[c], depth + 2, + max)) > max) + max = ret; + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if ((ret = max_width(zhp, child[c], depth + 2, + max)) > max) + max = ret; + } + + + return (max); +} + +typedef struct spare_cbdata { + uint64_t cb_guid; + zpool_handle_t *cb_zhp; +} spare_cbdata_t; + +static boolean_t +find_vdev(nvlist_t *nv, uint64_t search) +{ + uint64_t guid; + nvlist_t **child; + uint_t c, children; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && + search == guid) + return (B_TRUE); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if (find_vdev(child[c], search)) + return (B_TRUE); + } + + return (B_FALSE); +} + +static int +find_spare(zpool_handle_t *zhp, void *data) +{ + spare_cbdata_t *cbp = data; + nvlist_t *config, *nvroot; + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + if (find_vdev(nvroot, cbp->cb_guid)) { + cbp->cb_zhp = zhp; + return (1); + } + + zpool_close(zhp); + return (0); +} + +/* + * Print out configuration state as requested by status_callback. + */ +void +print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, + int namewidth, int depth, boolean_t isspare) +{ + nvlist_t **child; + uint_t c, vsc, children; + pool_scan_stat_t *ps = NULL; + vdev_stat_t *vs; + char rbuf[6], wbuf[6], cbuf[6]; + char *vname; + uint64_t notpresent; + uint64_t ashift; + spare_cbdata_t cb; + const char *state; + char *type; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + children = 0; + + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0); + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + + if (strcmp(type, VDEV_TYPE_INDIRECT) == 0) + return; + + state = zpool_state_to_name(vs->vs_state, vs->vs_aux); + if (isspare) { + /* + * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for + * online drives. + */ + if (vs->vs_aux == VDEV_AUX_SPARED) + state = "INUSE"; + else if (vs->vs_state == VDEV_STATE_HEALTHY) + state = "AVAIL"; + } + + (void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth, + name, state); + + if (!isspare) { + zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf)); + zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf)); + zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf)); + (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf); + } + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, + ¬present) == 0 || + vs->vs_state <= VDEV_STATE_CANT_OPEN) { + char *path; + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) + (void) printf(" was %s", path); + } else if (vs->vs_aux != 0) { + (void) printf(" "); + + switch (vs->vs_aux) { + case VDEV_AUX_OPEN_FAILED: + (void) printf(gettext("cannot open")); + break; + + case VDEV_AUX_BAD_GUID_SUM: + (void) printf(gettext("missing device")); + break; + + case VDEV_AUX_NO_REPLICAS: + (void) printf(gettext("insufficient replicas")); + break; + + case VDEV_AUX_VERSION_NEWER: + (void) printf(gettext("newer version")); + break; + + case VDEV_AUX_UNSUP_FEAT: + (void) printf(gettext("unsupported feature(s)")); + break; + + case VDEV_AUX_ASHIFT_TOO_BIG: + (void) printf(gettext("unsupported minimum blocksize")); + break; + + case VDEV_AUX_SPARED: + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, + &cb.cb_guid) == 0); + if (zpool_iter(g_zfs, find_spare, &cb) == 1) { + if (strcmp(zpool_get_name(cb.cb_zhp), + zpool_get_name(zhp)) == 0) + (void) printf(gettext("currently in " + "use")); + else + (void) printf(gettext("in use by " + "pool '%s'"), + zpool_get_name(cb.cb_zhp)); + zpool_close(cb.cb_zhp); + } else { + (void) printf(gettext("currently in use")); + } + break; + + case VDEV_AUX_ERR_EXCEEDED: + (void) printf(gettext("too many errors")); + break; + + case VDEV_AUX_IO_FAILURE: + (void) printf(gettext("experienced I/O failures")); + break; + + case VDEV_AUX_BAD_LOG: + (void) printf(gettext("bad intent log")); + break; + + case VDEV_AUX_EXTERNAL: + (void) printf(gettext("external device fault")); + break; + + case VDEV_AUX_SPLIT_POOL: + (void) printf(gettext("split into new pool")); + break; + + case VDEV_AUX_CHILDREN_OFFLINE: + (void) printf(gettext("all children offline")); + break; + + default: + (void) printf(gettext("corrupted data")); + break; + } + } else if (children == 0 && !isspare && + VDEV_STAT_VALID(vs_physical_ashift, vsc) && + vs->vs_configured_ashift < vs->vs_physical_ashift) { + (void) printf( + gettext(" block size: %dB configured, %dB native"), + 1 << vs->vs_configured_ashift, 1 << vs->vs_physical_ashift); + } + + (void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS, + (uint64_t **)&ps, &c); + + if (ps && ps->pss_state == DSS_SCANNING && + vs->vs_scan_processed != 0 && children == 0) { + (void) printf(gettext(" (%s)"), + (ps->pss_func == POOL_SCAN_RESILVER) ? + "resilvering" : "repairing"); + } + + (void) printf("\n"); + + for (c = 0; c < children; c++) { + uint64_t islog = B_FALSE, ishole = B_FALSE; + + /* Don't print logs or holes here */ + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &islog); + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, + &ishole); + if (islog || ishole) + continue; + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE); + print_status_config(zhp, vname, child[c], + namewidth, depth + 2, isspare); + free(vname); + } +} + + +/* + * Print the configuration of an exported pool. Iterate over all vdevs in the + * pool, printing out the name and status for each one. + */ +void +print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth) +{ + nvlist_t **child; + uint_t c, children; + vdev_stat_t *vs; + char *type, *vname; + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + if (strcmp(type, VDEV_TYPE_MISSING) == 0 || + strcmp(type, VDEV_TYPE_HOLE) == 0) + return; + + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) == 0); + + (void) printf("\t%*s%-*s", depth, "", namewidth - depth, name); + (void) printf(" %s", zpool_state_to_name(vs->vs_state, vs->vs_aux)); + + if (vs->vs_aux != 0) { + (void) printf(" "); + + switch (vs->vs_aux) { + case VDEV_AUX_OPEN_FAILED: + (void) printf(gettext("cannot open")); + break; + + case VDEV_AUX_BAD_GUID_SUM: + (void) printf(gettext("missing device")); + break; + + case VDEV_AUX_NO_REPLICAS: + (void) printf(gettext("insufficient replicas")); + break; + + case VDEV_AUX_VERSION_NEWER: + (void) printf(gettext("newer version")); + break; + + case VDEV_AUX_UNSUP_FEAT: + (void) printf(gettext("unsupported feature(s)")); + break; + + case VDEV_AUX_ERR_EXCEEDED: + (void) printf(gettext("too many errors")); + break; + + case VDEV_AUX_CHILDREN_OFFLINE: + (void) printf(gettext("all children offline")); + break; + + default: + (void) printf(gettext("corrupted data")); + break; + } + } + (void) printf("\n"); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return; + + for (c = 0; c < children; c++) { + uint64_t is_log = B_FALSE; + + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + if (is_log) + continue; + + vname = zpool_vdev_name(g_zfs, NULL, child[c], B_TRUE); + print_import_config(vname, child[c], namewidth, depth + 2); + free(vname); + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0) { + (void) printf(gettext("\tcache\n")); + for (c = 0; c < children; c++) { + vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE); + (void) printf("\t %s\n", vname); + free(vname); + } + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) { + (void) printf(gettext("\tspares\n")); + for (c = 0; c < children; c++) { + vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE); + (void) printf("\t %s\n", vname); + free(vname); + } + } +} + +/* + * Print log vdevs. + * Logs are recorded as top level vdevs in the main pool child array + * but with "is_log" set to 1. We use either print_status_config() or + * print_import_config() to print the top level logs then any log + * children (eg mirrored slogs) are printed recursively - which + * works because only the top level vdev is marked "is_log" + */ +static void +print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose) +{ + uint_t c, children; + nvlist_t **child; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, + &children) != 0) + return; + + (void) printf(gettext("\tlogs\n")); + + for (c = 0; c < children; c++) { + uint64_t is_log = B_FALSE; + char *name; + + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + if (!is_log) + continue; + name = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE); + if (verbose) + print_status_config(zhp, name, child[c], namewidth, + 2, B_FALSE); + else + print_import_config(name, child[c], namewidth, 2); + free(name); + } +} + +/* + * Display the status for the given pool. + */ +static void +show_import(nvlist_t *config) +{ + uint64_t pool_state; + vdev_stat_t *vs; + char *name; + uint64_t guid; + char *msgid; + nvlist_t *nvroot; + int reason; + const char *health; + uint_t vsc; + int namewidth; + char *comment; + + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &name) == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &guid) == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, + &pool_state) == 0); + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0); + health = zpool_state_to_name(vs->vs_state, vs->vs_aux); + + reason = zpool_import_status(config, &msgid); + + (void) printf(gettext(" pool: %s\n"), name); + (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid); + (void) printf(gettext(" state: %s"), health); + if (pool_state == POOL_STATE_DESTROYED) + (void) printf(gettext(" (DESTROYED)")); + (void) printf("\n"); + + switch (reason) { + case ZPOOL_STATUS_MISSING_DEV_R: + case ZPOOL_STATUS_MISSING_DEV_NR: + case ZPOOL_STATUS_BAD_GUID_SUM: + (void) printf(gettext(" status: One or more devices are " + "missing from the system.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_LABEL_R: + case ZPOOL_STATUS_CORRUPT_LABEL_NR: + (void) printf(gettext(" status: One or more devices contains " + "corrupted data.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_DATA: + (void) printf( + gettext(" status: The pool data is corrupted.\n")); + break; + + case ZPOOL_STATUS_OFFLINE_DEV: + (void) printf(gettext(" status: One or more devices " + "are offlined.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_POOL: + (void) printf(gettext(" status: The pool metadata is " + "corrupted.\n")); + break; + + case ZPOOL_STATUS_VERSION_OLDER: + (void) printf(gettext(" status: The pool is formatted using a " + "legacy on-disk version.\n")); + break; + + case ZPOOL_STATUS_VERSION_NEWER: + (void) printf(gettext(" status: The pool is formatted using an " + "incompatible version.\n")); + break; + + case ZPOOL_STATUS_FEAT_DISABLED: + (void) printf(gettext(" status: Some supported features are " + "not enabled on the pool.\n")); + break; + + case ZPOOL_STATUS_UNSUP_FEAT_READ: + (void) printf(gettext("status: The pool uses the following " + "feature(s) not supported on this system:\n")); + zpool_print_unsup_feat(config); + break; + + case ZPOOL_STATUS_UNSUP_FEAT_WRITE: + (void) printf(gettext("status: The pool can only be accessed " + "in read-only mode on this system. It\n\tcannot be " + "accessed in read-write mode because it uses the " + "following\n\tfeature(s) not supported on this system:\n")); + zpool_print_unsup_feat(config); + break; + + case ZPOOL_STATUS_HOSTID_MISMATCH: + (void) printf(gettext(" status: The pool was last accessed by " + "another system.\n")); + break; + + case ZPOOL_STATUS_FAULTED_DEV_R: + case ZPOOL_STATUS_FAULTED_DEV_NR: + (void) printf(gettext(" status: One or more devices are " + "faulted.\n")); + break; + + case ZPOOL_STATUS_BAD_LOG: + (void) printf(gettext(" status: An intent log record cannot be " + "read.\n")); + break; + + case ZPOOL_STATUS_RESILVERING: + (void) printf(gettext(" status: One or more devices were being " + "resilvered.\n")); + break; + + case ZPOOL_STATUS_NON_NATIVE_ASHIFT: + (void) printf(gettext("status: One or more devices were " + "configured to use a non-native block size.\n" + "\tExpect reduced performance.\n")); + break; + + default: + /* + * No other status can be seen when importing pools. + */ + assert(reason == ZPOOL_STATUS_OK); + } + + /* + * Print out an action according to the overall state of the pool. + */ + if (vs->vs_state == VDEV_STATE_HEALTHY) { + if (reason == ZPOOL_STATUS_VERSION_OLDER || + reason == ZPOOL_STATUS_FEAT_DISABLED) { + (void) printf(gettext(" action: The pool can be " + "imported using its name or numeric identifier, " + "though\n\tsome features will not be available " + "without an explicit 'zpool upgrade'.\n")); + } else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH) { + (void) printf(gettext(" action: The pool can be " + "imported using its name or numeric " + "identifier and\n\tthe '-f' flag.\n")); + } else { + (void) printf(gettext(" action: The pool can be " + "imported using its name or numeric " + "identifier.\n")); + } + } else if (vs->vs_state == VDEV_STATE_DEGRADED) { + (void) printf(gettext(" action: The pool can be imported " + "despite missing or damaged devices. The\n\tfault " + "tolerance of the pool may be compromised if imported.\n")); + } else { + switch (reason) { + case ZPOOL_STATUS_VERSION_NEWER: + (void) printf(gettext(" action: The pool cannot be " + "imported. Access the pool on a system running " + "newer\n\tsoftware, or recreate the pool from " + "backup.\n")); + break; + case ZPOOL_STATUS_UNSUP_FEAT_READ: + (void) printf(gettext("action: The pool cannot be " + "imported. Access the pool on a system that " + "supports\n\tthe required feature(s), or recreate " + "the pool from backup.\n")); + break; + case ZPOOL_STATUS_UNSUP_FEAT_WRITE: + (void) printf(gettext("action: The pool cannot be " + "imported in read-write mode. Import the pool " + "with\n" + "\t\"-o readonly=on\", access the pool on a system " + "that supports the\n\trequired feature(s), or " + "recreate the pool from backup.\n")); + break; + case ZPOOL_STATUS_MISSING_DEV_R: + case ZPOOL_STATUS_MISSING_DEV_NR: + case ZPOOL_STATUS_BAD_GUID_SUM: + (void) printf(gettext(" action: The pool cannot be " + "imported. Attach the missing\n\tdevices and try " + "again.\n")); + break; + default: + (void) printf(gettext(" action: The pool cannot be " + "imported due to damaged devices or data.\n")); + } + } + + /* Print the comment attached to the pool. */ + if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) + (void) printf(gettext("comment: %s\n"), comment); + + /* + * If the state is "closed" or "can't open", and the aux state + * is "corrupt data": + */ + if (((vs->vs_state == VDEV_STATE_CLOSED) || + (vs->vs_state == VDEV_STATE_CANT_OPEN)) && + (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)) { + if (pool_state == POOL_STATE_DESTROYED) + (void) printf(gettext("\tThe pool was destroyed, " + "but can be imported using the '-Df' flags.\n")); + else if (pool_state != POOL_STATE_EXPORTED) + (void) printf(gettext("\tThe pool may be active on " + "another system, but can be imported using\n\t" + "the '-f' flag.\n")); + } + + if (msgid != NULL) + (void) printf(gettext(" see: http://illumos.org/msg/%s\n"), + msgid); + + (void) printf(gettext(" config:\n\n")); + + namewidth = max_width(NULL, nvroot, 0, 0); + if (namewidth < 10) + namewidth = 10; + + print_import_config(name, nvroot, namewidth, 0); + if (num_logs(nvroot) > 0) + print_logs(NULL, nvroot, namewidth, B_FALSE); + + if (reason == ZPOOL_STATUS_BAD_GUID_SUM) { + (void) printf(gettext("\n\tAdditional devices are known to " + "be part of this pool, though their\n\texact " + "configuration cannot be determined.\n")); + } +} + +/* + * Perform the import for the given configuration. This passes the heavy + * lifting off to zpool_import_props(), and then mounts the datasets contained + * within the pool. + */ +static int +do_import(nvlist_t *config, const char *newname, const char *mntopts, + nvlist_t *props, int flags) +{ + zpool_handle_t *zhp; + char *name; + uint64_t state; + uint64_t version; + + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &name) == 0); + + verify(nvlist_lookup_uint64(config, + ZPOOL_CONFIG_POOL_STATE, &state) == 0); + verify(nvlist_lookup_uint64(config, + ZPOOL_CONFIG_VERSION, &version) == 0); + if (!SPA_VERSION_IS_SUPPORTED(version)) { + (void) fprintf(stderr, gettext("cannot import '%s': pool " + "is formatted using an unsupported ZFS version\n"), name); + return (1); + } else if (state != POOL_STATE_EXPORTED && + !(flags & ZFS_IMPORT_ANY_HOST)) { + uint64_t hostid; + + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, + &hostid) == 0) { + if ((unsigned long)hostid != gethostid()) { + char *hostname; + uint64_t timestamp; + time_t t; + + verify(nvlist_lookup_string(config, + ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); + verify(nvlist_lookup_uint64(config, + ZPOOL_CONFIG_TIMESTAMP, ×tamp) == 0); + t = timestamp; + (void) fprintf(stderr, gettext("cannot import " + "'%s': pool may be in use from other " + "system, it was last accessed by %s " + "(hostid: 0x%lx) on %s"), name, hostname, + (unsigned long)hostid, + asctime(localtime(&t))); + (void) fprintf(stderr, gettext("use '-f' to " + "import anyway\n")); + return (1); + } + } else { + (void) fprintf(stderr, gettext("cannot import '%s': " + "pool may be in use from other system\n"), name); + (void) fprintf(stderr, gettext("use '-f' to import " + "anyway\n")); + return (1); + } + } + + if (zpool_import_props(g_zfs, config, newname, props, flags) != 0) + return (1); + + if (newname != NULL) + name = (char *)newname; + + if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL) + return (1); + + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && + !(flags & ZFS_IMPORT_ONLY) && + zpool_enable_datasets(zhp, mntopts, 0) != 0) { + zpool_close(zhp); + return (1); + } + + zpool_close(zhp); + return (0); +} + +/* + * zpool checkpoint <pool> + * checkpoint --discard <pool> + * + * -d Discard the checkpoint from a checkpointed + * --discard pool. + * + * Checkpoints the specified pool, by taking a "snapshot" of its + * current state. A pool can only have one checkpoint at a time. + */ +int +zpool_do_checkpoint(int argc, char **argv) +{ + boolean_t discard; + char *pool; + zpool_handle_t *zhp; + int c, err; + + struct option long_options[] = { + {"discard", no_argument, NULL, 'd'}, + {0, 0, 0, 0} + }; + + discard = B_FALSE; + while ((c = getopt_long(argc, argv, ":d", long_options, NULL)) != -1) { + switch (c) { + case 'd': + discard = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool argument\n")); + usage(B_FALSE); + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + pool = argv[0]; + + if ((zhp = zpool_open(g_zfs, pool)) == NULL) { + /* As a special case, check for use of '/' in the name */ + if (strchr(pool, '/') != NULL) + (void) fprintf(stderr, gettext("'zpool checkpoint' " + "doesn't work on datasets. To save the state " + "of a dataset from a specific point in time " + "please use 'zfs snapshot'\n")); + return (1); + } + + if (discard) + err = (zpool_discard_checkpoint(zhp) != 0); + else + err = (zpool_checkpoint(zhp) != 0); + + zpool_close(zhp); + + return (err); +} + +#define CHECKPOINT_OPT 1024 + +/* + * zpool import [-d dir] [-D] + * import [-o mntopts] [-o prop=value] ... [-R root] [-D] + * [-d dir | -c cachefile] [-f] -a + * import [-o mntopts] [-o prop=value] ... [-R root] [-D] + * [-d dir | -c cachefile] [-f] [-n] [-F] <pool | id> [newpool] + * + * -c Read pool information from a cachefile instead of searching + * devices. + * + * -d Scan in a specific directory, other than /dev/dsk. More than + * one directory can be specified using multiple '-d' options. + * + * -D Scan for previously destroyed pools or import all or only + * specified destroyed pools. + * + * -R Temporarily import the pool, with all mountpoints relative to + * the given root. The pool will remain exported when the machine + * is rebooted. + * + * -V Import even in the presence of faulted vdevs. This is an + * intentionally undocumented option for testing purposes, and + * treats the pool configuration as complete, leaving any bad + * vdevs in the FAULTED state. In other words, it does verbatim + * import. + * + * -f Force import, even if it appears that the pool is active. + * + * -F Attempt rewind if necessary. + * + * -n See if rewind would work, but don't actually rewind. + * + * -N Import the pool but don't mount datasets. + * + * -T Specify a starting txg to use for import. This option is + * intentionally undocumented option for testing purposes. + * + * -a Import all pools found. + * + * -o Set property=value and/or temporary mount options (without '='). + * + * --rewind-to-checkpoint + * Import the pool and revert back to the checkpoint. + * + * The import command scans for pools to import, and import pools based on pool + * name and GUID. The pool can also be renamed as part of the import process. + */ +int +zpool_do_import(int argc, char **argv) +{ + char **searchdirs = NULL; + int nsearch = 0; + int c; + int err = 0; + nvlist_t *pools = NULL; + boolean_t do_all = B_FALSE; + boolean_t do_destroyed = B_FALSE; + char *mntopts = NULL; + nvpair_t *elem; + nvlist_t *config; + uint64_t searchguid = 0; + char *searchname = NULL; + char *propval; + nvlist_t *found_config; + nvlist_t *policy = NULL; + nvlist_t *props = NULL; + boolean_t first; + int flags = ZFS_IMPORT_NORMAL; + uint32_t rewind_policy = ZPOOL_NO_REWIND; + boolean_t dryrun = B_FALSE; + boolean_t do_rewind = B_FALSE; + boolean_t xtreme_rewind = B_FALSE; + uint64_t pool_state, txg = -1ULL; + char *cachefile = NULL; + importargs_t idata = { 0 }; + char *endptr; + + + struct option long_options[] = { + {"rewind-to-checkpoint", no_argument, NULL, CHECKPOINT_OPT}, + {0, 0, 0, 0} + }; + + /* check options */ + while ((c = getopt_long(argc, argv, ":aCc:d:DEfFmnNo:rR:T:VX", + long_options, NULL)) != -1) { + switch (c) { + case 'a': + do_all = B_TRUE; + break; + case 'c': + cachefile = optarg; + break; + case 'd': + if (searchdirs == NULL) { + searchdirs = safe_malloc(sizeof (char *)); + } else { + char **tmp = safe_malloc((nsearch + 1) * + sizeof (char *)); + bcopy(searchdirs, tmp, nsearch * + sizeof (char *)); + free(searchdirs); + searchdirs = tmp; + } + searchdirs[nsearch++] = optarg; + break; + case 'D': + do_destroyed = B_TRUE; + break; + case 'f': + flags |= ZFS_IMPORT_ANY_HOST; + break; + case 'F': + do_rewind = B_TRUE; + break; + case 'm': + flags |= ZFS_IMPORT_MISSING_LOG; + break; + case 'n': + dryrun = B_TRUE; + break; + case 'N': + flags |= ZFS_IMPORT_ONLY; + break; + case 'o': + if ((propval = strchr(optarg, '=')) != NULL) { + *propval = '\0'; + propval++; + if (add_prop_list(optarg, propval, + &props, B_TRUE)) + goto error; + } else { + mntopts = optarg; + } + break; + case 'R': + if (add_prop_list(zpool_prop_to_name( + ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE)) + goto error; + if (nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), + &propval) == 0) + break; + if (add_prop_list(zpool_prop_to_name( + ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) + goto error; + break; + case 'T': + errno = 0; + txg = strtoull(optarg, &endptr, 0); + if (errno != 0 || *endptr != '\0') { + (void) fprintf(stderr, + gettext("invalid txg value\n")); + usage(B_FALSE); + } + rewind_policy = ZPOOL_DO_REWIND | ZPOOL_EXTREME_REWIND; + break; + case 'V': + flags |= ZFS_IMPORT_VERBATIM; + break; + case 'X': + xtreme_rewind = B_TRUE; + break; + case CHECKPOINT_OPT: + flags |= ZFS_IMPORT_CHECKPOINT; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (cachefile && nsearch != 0) { + (void) fprintf(stderr, gettext("-c is incompatible with -d\n")); + usage(B_FALSE); + } + + if ((dryrun || xtreme_rewind) && !do_rewind) { + (void) fprintf(stderr, + gettext("-n or -X only meaningful with -F\n")); + usage(B_FALSE); + } + if (dryrun) + rewind_policy = ZPOOL_TRY_REWIND; + else if (do_rewind) + rewind_policy = ZPOOL_DO_REWIND; + if (xtreme_rewind) + rewind_policy |= ZPOOL_EXTREME_REWIND; + + /* In the future, we can capture further policy and include it here */ + if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 || + nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, txg) != 0 || + nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, + rewind_policy) != 0) + goto error; + + if (searchdirs == NULL) { + searchdirs = safe_malloc(sizeof (char *)); + searchdirs[0] = "/dev"; + nsearch = 1; + } + + /* check argument count */ + if (do_all) { + if (argc != 0) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + } else { + if (argc > 2) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + /* + * Check for the SYS_CONFIG privilege. We do this explicitly + * here because otherwise any attempt to discover pools will + * silently fail. + */ + if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) { + (void) fprintf(stderr, gettext("cannot " + "discover pools: permission denied\n")); + free(searchdirs); + nvlist_free(policy); + return (1); + } + } + + /* + * Depending on the arguments given, we do one of the following: + * + * <none> Iterate through all pools and display information about + * each one. + * + * -a Iterate through all pools and try to import each one. + * + * <id> Find the pool that corresponds to the given GUID/pool + * name and import that one. + * + * -D Above options applies only to destroyed pools. + */ + if (argc != 0) { + char *endptr; + + errno = 0; + searchguid = strtoull(argv[0], &endptr, 10); + if (errno != 0 || *endptr != '\0') { + searchname = argv[0]; + searchguid = 0; + } + found_config = NULL; + + /* + * User specified a name or guid. Ensure it's unique. + */ + idata.unique = B_TRUE; + } + + + idata.path = searchdirs; + idata.paths = nsearch; + idata.poolname = searchname; + idata.guid = searchguid; + idata.cachefile = cachefile; + idata.policy = policy; + + pools = zpool_search_import(g_zfs, &idata); + + if (pools != NULL && idata.exists && + (argc == 1 || strcmp(argv[0], argv[1]) == 0)) { + (void) fprintf(stderr, gettext("cannot import '%s': " + "a pool with that name already exists\n"), + argv[0]); + (void) fprintf(stderr, gettext("use the form '%s " + "<pool | id> <newpool>' to give it a new name\n"), + "zpool import"); + err = 1; + } else if (pools == NULL && idata.exists) { + (void) fprintf(stderr, gettext("cannot import '%s': " + "a pool with that name is already created/imported,\n"), + argv[0]); + (void) fprintf(stderr, gettext("and no additional pools " + "with that name were found\n")); + err = 1; + } else if (pools == NULL) { + if (argc != 0) { + (void) fprintf(stderr, gettext("cannot import '%s': " + "no such pool available\n"), argv[0]); + } + err = 1; + } + + if (err == 1) { + free(searchdirs); + nvlist_free(policy); + return (1); + } + + /* + * At this point we have a list of import candidate configs. Even if + * we were searching by pool name or guid, we still need to + * post-process the list to deal with pool state and possible + * duplicate names. + */ + err = 0; + elem = NULL; + first = B_TRUE; + while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { + + verify(nvpair_value_nvlist(elem, &config) == 0); + + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, + &pool_state) == 0); + if (!do_destroyed && pool_state == POOL_STATE_DESTROYED) + continue; + if (do_destroyed && pool_state != POOL_STATE_DESTROYED) + continue; + + verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY, + policy) == 0); + + if (argc == 0) { + if (first) + first = B_FALSE; + else if (!do_all) + (void) printf("\n"); + + if (do_all) { + err |= do_import(config, NULL, mntopts, + props, flags); + } else { + show_import(config); + } + } else if (searchname != NULL) { + char *name; + + /* + * We are searching for a pool based on name. + */ + verify(nvlist_lookup_string(config, + ZPOOL_CONFIG_POOL_NAME, &name) == 0); + + if (strcmp(name, searchname) == 0) { + if (found_config != NULL) { + (void) fprintf(stderr, gettext( + "cannot import '%s': more than " + "one matching pool\n"), searchname); + (void) fprintf(stderr, gettext( + "import by numeric ID instead\n")); + err = B_TRUE; + } + found_config = config; + } + } else { + uint64_t guid; + + /* + * Search for a pool by guid. + */ + verify(nvlist_lookup_uint64(config, + ZPOOL_CONFIG_POOL_GUID, &guid) == 0); + + if (guid == searchguid) + found_config = config; + } + } + + /* + * If we were searching for a specific pool, verify that we found a + * pool, and then do the import. + */ + if (argc != 0 && err == 0) { + if (found_config == NULL) { + (void) fprintf(stderr, gettext("cannot import '%s': " + "no such pool available\n"), argv[0]); + err = B_TRUE; + } else { + err |= do_import(found_config, argc == 1 ? NULL : + argv[1], mntopts, props, flags); + } + } + + /* + * If we were just looking for pools, report an error if none were + * found. + */ + if (argc == 0 && first) + (void) fprintf(stderr, + gettext("no pools available to import\n")); + +error: + nvlist_free(props); + nvlist_free(pools); + nvlist_free(policy); + free(searchdirs); + + return (err ? 1 : 0); +} + +typedef struct iostat_cbdata { + boolean_t cb_verbose; + int cb_namewidth; + int cb_iteration; + zpool_list_t *cb_list; +} iostat_cbdata_t; + +static void +print_iostat_separator(iostat_cbdata_t *cb) +{ + int i = 0; + + for (i = 0; i < cb->cb_namewidth; i++) + (void) printf("-"); + (void) printf(" ----- ----- ----- ----- ----- -----\n"); +} + +static void +print_iostat_header(iostat_cbdata_t *cb) +{ + (void) printf("%*s capacity operations bandwidth\n", + cb->cb_namewidth, ""); + (void) printf("%-*s alloc free read write read write\n", + cb->cb_namewidth, "pool"); + print_iostat_separator(cb); +} + +/* + * Display a single statistic. + */ +static void +print_one_stat(uint64_t value) +{ + char buf[64]; + + zfs_nicenum(value, buf, sizeof (buf)); + (void) printf(" %5s", buf); +} + +/* + * Print out all the statistics for the given vdev. This can either be the + * toplevel configuration, or called recursively. If 'name' is NULL, then this + * is a verbose output, and we don't want to display the toplevel pool stats. + */ +void +print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, + nvlist_t *newnv, iostat_cbdata_t *cb, int depth) +{ + nvlist_t **oldchild, **newchild; + uint_t c, children; + vdev_stat_t *oldvs, *newvs; + vdev_stat_t zerovs = { 0 }; + uint64_t tdelta; + double scale; + char *vname; + + if (strcmp(name, VDEV_TYPE_INDIRECT) == 0) + return; + + if (oldnv != NULL) { + verify(nvlist_lookup_uint64_array(oldnv, + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&oldvs, &c) == 0); + } else { + oldvs = &zerovs; + } + + verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&newvs, &c) == 0); + + if (strlen(name) + depth > cb->cb_namewidth) + (void) printf("%*s%s", depth, "", name); + else + (void) printf("%*s%s%*s", depth, "", name, + (int)(cb->cb_namewidth - strlen(name) - depth), ""); + + tdelta = newvs->vs_timestamp - oldvs->vs_timestamp; + + if (tdelta == 0) + scale = 1.0; + else + scale = (double)NANOSEC / tdelta; + + /* only toplevel vdevs have capacity stats */ + if (newvs->vs_space == 0) { + (void) printf(" - -"); + } else { + print_one_stat(newvs->vs_alloc); + print_one_stat(newvs->vs_space - newvs->vs_alloc); + } + + print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_READ] - + oldvs->vs_ops[ZIO_TYPE_READ]))); + + print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_WRITE] - + oldvs->vs_ops[ZIO_TYPE_WRITE]))); + + print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_READ] - + oldvs->vs_bytes[ZIO_TYPE_READ]))); + + print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_WRITE] - + oldvs->vs_bytes[ZIO_TYPE_WRITE]))); + + (void) printf("\n"); + + if (!cb->cb_verbose) + return; + + if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_CHILDREN, + &newchild, &children) != 0) + return; + + if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_CHILDREN, + &oldchild, &c) != 0) + return; + + for (c = 0; c < children; c++) { + uint64_t ishole = B_FALSE, islog = B_FALSE; + + (void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_HOLE, + &ishole); + + (void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_LOG, + &islog); + + if (ishole || islog) + continue; + + vname = zpool_vdev_name(g_zfs, zhp, newchild[c], B_FALSE); + print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, + newchild[c], cb, depth + 2); + free(vname); + } + + /* + * Log device section + */ + + if (num_logs(newnv) > 0) { + (void) printf("%-*s - - - - - " + "-\n", cb->cb_namewidth, "logs"); + + for (c = 0; c < children; c++) { + uint64_t islog = B_FALSE; + (void) nvlist_lookup_uint64(newchild[c], + ZPOOL_CONFIG_IS_LOG, &islog); + + if (islog) { + vname = zpool_vdev_name(g_zfs, zhp, newchild[c], + B_FALSE); + print_vdev_stats(zhp, vname, oldnv ? + oldchild[c] : NULL, newchild[c], + cb, depth + 2); + free(vname); + } + } + + } + + /* + * Include level 2 ARC devices in iostat output + */ + if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_L2CACHE, + &newchild, &children) != 0) + return; + + if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_L2CACHE, + &oldchild, &c) != 0) + return; + + if (children > 0) { + (void) printf("%-*s - - - - - " + "-\n", cb->cb_namewidth, "cache"); + for (c = 0; c < children; c++) { + vname = zpool_vdev_name(g_zfs, zhp, newchild[c], + B_FALSE); + print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, + newchild[c], cb, depth + 2); + free(vname); + } + } +} + +static int +refresh_iostat(zpool_handle_t *zhp, void *data) +{ + iostat_cbdata_t *cb = data; + boolean_t missing; + + /* + * If the pool has disappeared, remove it from the list and continue. + */ + if (zpool_refresh_stats(zhp, &missing) != 0) + return (-1); + + if (missing) + pool_list_remove(cb->cb_list, zhp); + + return (0); +} + +/* + * Callback to print out the iostats for the given pool. + */ +int +print_iostat(zpool_handle_t *zhp, void *data) +{ + iostat_cbdata_t *cb = data; + nvlist_t *oldconfig, *newconfig; + nvlist_t *oldnvroot, *newnvroot; + + newconfig = zpool_get_config(zhp, &oldconfig); + + if (cb->cb_iteration == 1) + oldconfig = NULL; + + verify(nvlist_lookup_nvlist(newconfig, ZPOOL_CONFIG_VDEV_TREE, + &newnvroot) == 0); + + if (oldconfig == NULL) + oldnvroot = NULL; + else + verify(nvlist_lookup_nvlist(oldconfig, ZPOOL_CONFIG_VDEV_TREE, + &oldnvroot) == 0); + + /* + * Print out the statistics for the pool. + */ + print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0); + + if (cb->cb_verbose) + print_iostat_separator(cb); + + return (0); +} + +int +get_namewidth(zpool_handle_t *zhp, void *data) +{ + iostat_cbdata_t *cb = data; + nvlist_t *config, *nvroot; + + if ((config = zpool_get_config(zhp, NULL)) != NULL) { + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if (!cb->cb_verbose) + cb->cb_namewidth = strlen(zpool_get_name(zhp)); + else + cb->cb_namewidth = max_width(zhp, nvroot, 0, + cb->cb_namewidth); + } + + /* + * The width must fall into the range [10,38]. The upper limit is the + * maximum we can have and still fit in 80 columns. + */ + if (cb->cb_namewidth < 10) + cb->cb_namewidth = 10; + if (cb->cb_namewidth > 38) + cb->cb_namewidth = 38; + + return (0); +} + +/* + * Parse the input string, get the 'interval' and 'count' value if there is one. + */ +static void +get_interval_count(int *argcp, char **argv, unsigned long *iv, + unsigned long *cnt) +{ + unsigned long interval = 0, count = 0; + int argc = *argcp, errno; + + /* + * Determine if the last argument is an integer or a pool name + */ + if (argc > 0 && isdigit(argv[argc - 1][0])) { + char *end; + + errno = 0; + interval = strtoul(argv[argc - 1], &end, 10); + + if (*end == '\0' && errno == 0) { + if (interval == 0) { + (void) fprintf(stderr, gettext("interval " + "cannot be zero\n")); + usage(B_FALSE); + } + /* + * Ignore the last parameter + */ + argc--; + } else { + /* + * If this is not a valid number, just plow on. The + * user will get a more informative error message later + * on. + */ + interval = 0; + } + } + + /* + * If the last argument is also an integer, then we have both a count + * and an interval. + */ + if (argc > 0 && isdigit(argv[argc - 1][0])) { + char *end; + + errno = 0; + count = interval; + interval = strtoul(argv[argc - 1], &end, 10); + + if (*end == '\0' && errno == 0) { + if (interval == 0) { + (void) fprintf(stderr, gettext("interval " + "cannot be zero\n")); + usage(B_FALSE); + } + + /* + * Ignore the last parameter + */ + argc--; + } else { + interval = 0; + } + } + + *iv = interval; + *cnt = count; + *argcp = argc; +} + +static void +get_timestamp_arg(char c) +{ + if (c == 'u') + timestamp_fmt = UDATE; + else if (c == 'd') + timestamp_fmt = DDATE; + else + usage(B_FALSE); +} + +/* + * zpool iostat [-v] [-T d|u] [pool] ... [interval [count]] + * + * -v Display statistics for individual vdevs + * -T Display a timestamp in date(1) or Unix format + * + * This command can be tricky because we want to be able to deal with pool + * creation/destruction as well as vdev configuration changes. The bulk of this + * processing is handled by the pool_list_* routines in zpool_iter.c. We rely + * on pool_list_update() to detect the addition of new pools. Configuration + * changes are all handled within libzfs. + */ +int +zpool_do_iostat(int argc, char **argv) +{ + int c; + int ret; + int npools; + unsigned long interval = 0, count = 0; + zpool_list_t *list; + boolean_t verbose = B_FALSE; + iostat_cbdata_t cb; + + /* check options */ + while ((c = getopt(argc, argv, "T:v")) != -1) { + switch (c) { + case 'T': + get_timestamp_arg(*optarg); + break; + case 'v': + verbose = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + get_interval_count(&argc, argv, &interval, &count); + + /* + * Construct the list of all interesting pools. + */ + ret = 0; + if ((list = pool_list_get(argc, argv, NULL, &ret)) == NULL) + return (1); + + if (pool_list_count(list) == 0 && argc != 0) { + pool_list_free(list); + return (1); + } + + if (pool_list_count(list) == 0 && interval == 0) { + pool_list_free(list); + (void) fprintf(stderr, gettext("no pools available\n")); + return (1); + } + + /* + * Enter the main iostat loop. + */ + cb.cb_list = list; + cb.cb_verbose = verbose; + cb.cb_iteration = 0; + cb.cb_namewidth = 0; + + for (;;) { + pool_list_update(list); + + if ((npools = pool_list_count(list)) == 0) + break; + + /* + * Refresh all statistics. This is done as an explicit step + * before calculating the maximum name width, so that any + * configuration changes are properly accounted for. + */ + (void) pool_list_iter(list, B_FALSE, refresh_iostat, &cb); + + /* + * Iterate over all pools to determine the maximum width + * for the pool / device name column across all pools. + */ + cb.cb_namewidth = 0; + (void) pool_list_iter(list, B_FALSE, get_namewidth, &cb); + + if (timestamp_fmt != NODATE) + print_timestamp(timestamp_fmt); + + /* + * If it's the first time, or verbose mode, print the header. + */ + if (++cb.cb_iteration == 1 || verbose) + print_iostat_header(&cb); + + (void) pool_list_iter(list, B_FALSE, print_iostat, &cb); + + /* + * If there's more than one pool, and we're not in verbose mode + * (which prints a separator for us), then print a separator. + */ + if (npools > 1 && !verbose) + print_iostat_separator(&cb); + + if (verbose) + (void) printf("\n"); + + /* + * Flush the output so that redirection to a file isn't buffered + * indefinitely. + */ + (void) fflush(stdout); + + if (interval == 0) + break; + + if (count != 0 && --count == 0) + break; + + (void) sleep(interval); + } + + pool_list_free(list); + + return (ret); +} + +typedef struct list_cbdata { + boolean_t cb_verbose; + int cb_namewidth; + boolean_t cb_scripted; + zprop_list_t *cb_proplist; + boolean_t cb_literal; +} list_cbdata_t; + +/* + * Given a list of columns to display, output appropriate headers for each one. + */ +static void +print_header(list_cbdata_t *cb) +{ + zprop_list_t *pl = cb->cb_proplist; + char headerbuf[ZPOOL_MAXPROPLEN]; + const char *header; + boolean_t first = B_TRUE; + boolean_t right_justify; + size_t width = 0; + + for (; pl != NULL; pl = pl->pl_next) { + width = pl->pl_width; + if (first && cb->cb_verbose) { + /* + * Reset the width to accommodate the verbose listing + * of devices. + */ + width = cb->cb_namewidth; + } + + if (!first) + (void) printf(" "); + else + first = B_FALSE; + + right_justify = B_FALSE; + if (pl->pl_prop != ZPROP_INVAL) { + header = zpool_prop_column_name(pl->pl_prop); + right_justify = zpool_prop_align_right(pl->pl_prop); + } else { + int i; + + for (i = 0; pl->pl_user_prop[i] != '\0'; i++) + headerbuf[i] = toupper(pl->pl_user_prop[i]); + headerbuf[i] = '\0'; + header = headerbuf; + } + + if (pl->pl_next == NULL && !right_justify) + (void) printf("%s", header); + else if (right_justify) + (void) printf("%*s", width, header); + else + (void) printf("%-*s", width, header); + + } + + (void) printf("\n"); +} + +/* + * Given a pool and a list of properties, print out all the properties according + * to the described layout. + */ +static void +print_pool(zpool_handle_t *zhp, list_cbdata_t *cb) +{ + zprop_list_t *pl = cb->cb_proplist; + boolean_t first = B_TRUE; + char property[ZPOOL_MAXPROPLEN]; + char *propstr; + boolean_t right_justify; + size_t width; + + for (; pl != NULL; pl = pl->pl_next) { + + width = pl->pl_width; + if (first && cb->cb_verbose) { + /* + * Reset the width to accommodate the verbose listing + * of devices. + */ + width = cb->cb_namewidth; + } + + if (!first) { + if (cb->cb_scripted) + (void) printf("\t"); + else + (void) printf(" "); + } else { + first = B_FALSE; + } + + right_justify = B_FALSE; + if (pl->pl_prop != ZPROP_INVAL) { + if (zpool_get_prop(zhp, pl->pl_prop, property, + sizeof (property), NULL, cb->cb_literal) != 0) + propstr = "-"; + else + propstr = property; + + right_justify = zpool_prop_align_right(pl->pl_prop); + } else if ((zpool_prop_feature(pl->pl_user_prop) || + zpool_prop_unsupported(pl->pl_user_prop)) && + zpool_prop_get_feature(zhp, pl->pl_user_prop, property, + sizeof (property)) == 0) { + propstr = property; + } else { + propstr = "-"; + } + + + /* + * If this is being called in scripted mode, or if this is the + * last column and it is left-justified, don't include a width + * format specifier. + */ + if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify)) + (void) printf("%s", propstr); + else if (right_justify) + (void) printf("%*s", width, propstr); + else + (void) printf("%-*s", width, propstr); + } + + (void) printf("\n"); +} + +static void +print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted, + boolean_t valid) +{ + char propval[64]; + boolean_t fixed; + size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL); + + switch (prop) { + case ZPOOL_PROP_EXPANDSZ: + case ZPOOL_PROP_CHECKPOINT: + if (value == 0) + (void) strlcpy(propval, "-", sizeof (propval)); + else + zfs_nicenum(value, propval, sizeof (propval)); + break; + case ZPOOL_PROP_FRAGMENTATION: + if (value == ZFS_FRAG_INVALID) { + (void) strlcpy(propval, "-", sizeof (propval)); + } else { + (void) snprintf(propval, sizeof (propval), "%llu%%", + value); + } + break; + case ZPOOL_PROP_CAPACITY: + (void) snprintf(propval, sizeof (propval), "%llu%%", value); + break; + default: + zfs_nicenum(value, propval, sizeof (propval)); + } + + if (!valid) + (void) strlcpy(propval, "-", sizeof (propval)); + + if (scripted) + (void) printf("\t%s", propval); + else + (void) printf(" %*s", width, propval); +} + +void +print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, + list_cbdata_t *cb, int depth) +{ + nvlist_t **child; + vdev_stat_t *vs; + uint_t c, children; + char *vname; + boolean_t scripted = cb->cb_scripted; + uint64_t islog = B_FALSE; + boolean_t haslog = B_FALSE; + char *dashes = "%-*s - - - - - -\n"; + + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) == 0); + + if (name != NULL) { + boolean_t toplevel = (vs->vs_space != 0); + uint64_t cap; + + if (strcmp(name, VDEV_TYPE_INDIRECT) == 0) + return; + + if (scripted) + (void) printf("\t%s", name); + else if (strlen(name) + depth > cb->cb_namewidth) + (void) printf("%*s%s", depth, "", name); + else + (void) printf("%*s%s%*s", depth, "", name, + (int)(cb->cb_namewidth - strlen(name) - depth), ""); + + /* + * Print the properties for the individual vdevs. Some + * properties are only applicable to toplevel vdevs. The + * 'toplevel' boolean value is passed to the print_one_column() + * to indicate that the value is valid. + */ + print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, scripted, + toplevel); + print_one_column(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, scripted, + toplevel); + print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc, + scripted, toplevel); + print_one_column(ZPOOL_PROP_CHECKPOINT, + vs->vs_checkpoint_space, scripted, toplevel); + print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, scripted, + B_TRUE); + print_one_column(ZPOOL_PROP_FRAGMENTATION, + vs->vs_fragmentation, scripted, + (vs->vs_fragmentation != ZFS_FRAG_INVALID && toplevel)); + cap = (vs->vs_space == 0) ? 0 : + (vs->vs_alloc * 100 / vs->vs_space); + print_one_column(ZPOOL_PROP_CAPACITY, cap, scripted, toplevel); + (void) printf("\n"); + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return; + + for (c = 0; c < children; c++) { + uint64_t ishole = B_FALSE; + + if (nvlist_lookup_uint64(child[c], + ZPOOL_CONFIG_IS_HOLE, &ishole) == 0 && ishole) + continue; + + if (nvlist_lookup_uint64(child[c], + ZPOOL_CONFIG_IS_LOG, &islog) == 0 && islog) { + haslog = B_TRUE; + continue; + } + + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); + print_list_stats(zhp, vname, child[c], cb, depth + 2); + free(vname); + } + + if (haslog == B_TRUE) { + /* LINTED E_SEC_PRINTF_VAR_FMT */ + (void) printf(dashes, cb->cb_namewidth, "log"); + for (c = 0; c < children; c++) { + if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &islog) != 0 || !islog) + continue; + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); + print_list_stats(zhp, vname, child[c], cb, depth + 2); + free(vname); + } + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0 && children > 0) { + /* LINTED E_SEC_PRINTF_VAR_FMT */ + (void) printf(dashes, cb->cb_namewidth, "cache"); + for (c = 0; c < children; c++) { + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); + print_list_stats(zhp, vname, child[c], cb, depth + 2); + free(vname); + } + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, + &children) == 0 && children > 0) { + /* LINTED E_SEC_PRINTF_VAR_FMT */ + (void) printf(dashes, cb->cb_namewidth, "spare"); + for (c = 0; c < children; c++) { + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); + print_list_stats(zhp, vname, child[c], cb, depth + 2); + free(vname); + } + } +} + + +/* + * Generic callback function to list a pool. + */ +int +list_callback(zpool_handle_t *zhp, void *data) +{ + list_cbdata_t *cbp = data; + nvlist_t *config; + nvlist_t *nvroot; + + config = zpool_get_config(zhp, NULL); + + print_pool(zhp, cbp); + if (!cbp->cb_verbose) + return (0); + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + print_list_stats(zhp, NULL, nvroot, cbp, 0); + + return (0); +} + +/* + * zpool list [-Hp] [-o prop[,prop]*] [-T d|u] [pool] ... [interval [count]] + * + * -H Scripted mode. Don't display headers, and separate properties + * by a single tab. + * -o List of properties to display. Defaults to + * "name,size,allocated,free,expandsize,fragmentation,capacity," + * "dedupratio,health,altroot" + * -p Diplay values in parsable (exact) format. + * -T Display a timestamp in date(1) or Unix format + * + * List all pools in the system, whether or not they're healthy. Output space + * statistics for each one, as well as health status summary. + */ +int +zpool_do_list(int argc, char **argv) +{ + int c; + int ret; + list_cbdata_t cb = { 0 }; + static char default_props[] = + "name,size,allocated,free,checkpoint,expandsize,fragmentation," + "capacity,dedupratio,health,altroot"; + char *props = default_props; + unsigned long interval = 0, count = 0; + zpool_list_t *list; + boolean_t first = B_TRUE; + + /* check options */ + while ((c = getopt(argc, argv, ":Ho:pT:v")) != -1) { + switch (c) { + case 'H': + cb.cb_scripted = B_TRUE; + break; + case 'o': + props = optarg; + break; + case 'p': + cb.cb_literal = B_TRUE; + break; + case 'T': + get_timestamp_arg(*optarg); + break; + case 'v': + cb.cb_verbose = B_TRUE; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + get_interval_count(&argc, argv, &interval, &count); + + if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0) + usage(B_FALSE); + + for (;;) { + if ((list = pool_list_get(argc, argv, &cb.cb_proplist, + &ret)) == NULL) + return (1); + + if (pool_list_count(list) == 0) + break; + + cb.cb_namewidth = 0; + (void) pool_list_iter(list, B_FALSE, get_namewidth, &cb); + + if (timestamp_fmt != NODATE) + print_timestamp(timestamp_fmt); + + if (!cb.cb_scripted && (first || cb.cb_verbose)) { + print_header(&cb); + first = B_FALSE; + } + ret = pool_list_iter(list, B_TRUE, list_callback, &cb); + + if (interval == 0) + break; + + if (count != 0 && --count == 0) + break; + + pool_list_free(list); + (void) sleep(interval); + } + + if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) { + (void) printf(gettext("no pools available\n")); + ret = 0; + } + + pool_list_free(list); + zprop_free_list(cb.cb_proplist); + return (ret); +} + +static int +zpool_do_attach_or_replace(int argc, char **argv, int replacing) +{ + boolean_t force = B_FALSE; + int c; + nvlist_t *nvroot; + char *poolname, *old_disk, *new_disk; + zpool_handle_t *zhp; + zpool_boot_label_t boot_type; + uint64_t boot_size; + int ret; + + /* check options */ + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + force = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(B_FALSE); + } + + poolname = argv[0]; + + if (argc < 2) { + (void) fprintf(stderr, + gettext("missing <device> specification\n")); + usage(B_FALSE); + } + + old_disk = argv[1]; + + if (argc < 3) { + if (!replacing) { + (void) fprintf(stderr, + gettext("missing <new_device> specification\n")); + usage(B_FALSE); + } + new_disk = old_disk; + argc -= 1; + argv += 1; + } else { + new_disk = argv[2]; + argc -= 2; + argv += 2; + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + if (zpool_get_config(zhp, NULL) == NULL) { + (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"), + poolname); + zpool_close(zhp); + return (1); + } + + if (zpool_is_bootable(zhp)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + + boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL); + nvroot = make_root_vdev(zhp, force, B_FALSE, replacing, B_FALSE, + boot_type, boot_size, argc, argv); + if (nvroot == NULL) { + zpool_close(zhp); + return (1); + } + + ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing); + + nvlist_free(nvroot); + zpool_close(zhp); + + return (ret); +} + +/* + * zpool replace [-f] <pool> <device> <new_device> + * + * -f Force attach, even if <new_device> appears to be in use. + * + * Replace <device> with <new_device>. + */ +/* ARGSUSED */ +int +zpool_do_replace(int argc, char **argv) +{ + return (zpool_do_attach_or_replace(argc, argv, B_TRUE)); +} + +/* + * zpool attach [-f] <pool> <device> <new_device> + * + * -f Force attach, even if <new_device> appears to be in use. + * + * Attach <new_device> to the mirror containing <device>. If <device> is not + * part of a mirror, then <device> will be transformed into a mirror of + * <device> and <new_device>. In either case, <new_device> will begin life + * with a DTL of [0, now], and will immediately begin to resilver itself. + */ +int +zpool_do_attach(int argc, char **argv) +{ + return (zpool_do_attach_or_replace(argc, argv, B_FALSE)); +} + +/* + * zpool detach [-f] <pool> <device> + * + * -f Force detach of <device>, even if DTLs argue against it + * (not supported yet) + * + * Detach a device from a mirror. The operation will be refused if <device> + * is the last device in the mirror, or if the DTLs indicate that this device + * has the only valid copy of some data. + */ +/* ARGSUSED */ +int +zpool_do_detach(int argc, char **argv) +{ + int c; + char *poolname, *path; + zpool_handle_t *zhp; + int ret; + + /* check options */ + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(B_FALSE); + } + + if (argc < 2) { + (void) fprintf(stderr, + gettext("missing <device> specification\n")); + usage(B_FALSE); + } + + poolname = argv[0]; + path = argv[1]; + + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + ret = zpool_vdev_detach(zhp, path); + + zpool_close(zhp); + + return (ret); +} + +/* + * zpool split [-n] [-o prop=val] ... + * [-o mntopt] ... + * [-R altroot] <pool> <newpool> [<device> ...] + * + * -n Do not split the pool, but display the resulting layout if + * it were to be split. + * -o Set property=value, or set mount options. + * -R Mount the split-off pool under an alternate root. + * + * Splits the named pool and gives it the new pool name. Devices to be split + * off may be listed, provided that no more than one device is specified + * per top-level vdev mirror. The newly split pool is left in an exported + * state unless -R is specified. + * + * Restrictions: the top-level of the pool pool must only be made up of + * mirrors; all devices in the pool must be healthy; no device may be + * undergoing a resilvering operation. + */ +int +zpool_do_split(int argc, char **argv) +{ + char *srcpool, *newpool, *propval; + char *mntopts = NULL; + splitflags_t flags; + int c, ret = 0; + zpool_handle_t *zhp; + nvlist_t *config, *props = NULL; + + flags.dryrun = B_FALSE; + flags.import = B_FALSE; + + /* check options */ + while ((c = getopt(argc, argv, ":R:no:")) != -1) { + switch (c) { + case 'R': + flags.import = B_TRUE; + if (add_prop_list( + zpool_prop_to_name(ZPOOL_PROP_ALTROOT), optarg, + &props, B_TRUE) != 0) { + nvlist_free(props); + usage(B_FALSE); + } + break; + case 'n': + flags.dryrun = B_TRUE; + break; + case 'o': + if ((propval = strchr(optarg, '=')) != NULL) { + *propval = '\0'; + propval++; + if (add_prop_list(optarg, propval, + &props, B_TRUE) != 0) { + nvlist_free(props); + usage(B_FALSE); + } + } else { + mntopts = optarg; + } + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + break; + } + } + + if (!flags.import && mntopts != NULL) { + (void) fprintf(stderr, gettext("setting mntopts is only " + "valid when importing the pool\n")); + usage(B_FALSE); + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("Missing pool name\n")); + usage(B_FALSE); + } + if (argc < 2) { + (void) fprintf(stderr, gettext("Missing new pool name\n")); + usage(B_FALSE); + } + + srcpool = argv[0]; + newpool = argv[1]; + + argc -= 2; + argv += 2; + + if ((zhp = zpool_open(g_zfs, srcpool)) == NULL) + return (1); + + config = split_mirror_vdev(zhp, newpool, props, flags, argc, argv); + if (config == NULL) { + ret = 1; + } else { + if (flags.dryrun) { + (void) printf(gettext("would create '%s' with the " + "following layout:\n\n"), newpool); + print_vdev_tree(NULL, newpool, config, 0, B_FALSE); + } + nvlist_free(config); + } + + zpool_close(zhp); + + if (ret != 0 || flags.dryrun || !flags.import) + return (ret); + + /* + * The split was successful. Now we need to open the new + * pool and import it. + */ + if ((zhp = zpool_open_canfail(g_zfs, newpool)) == NULL) + return (1); + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && + zpool_enable_datasets(zhp, mntopts, 0) != 0) { + ret = 1; + (void) fprintf(stderr, gettext("Split was successful, but " + "the datasets could not all be mounted\n")); + (void) fprintf(stderr, gettext("Try doing '%s' with a " + "different altroot\n"), "zpool import"); + } + zpool_close(zhp); + + return (ret); +} + + + +/* + * zpool online <pool> <device> ... + */ +int +zpool_do_online(int argc, char **argv) +{ + int c, i; + char *poolname; + zpool_handle_t *zhp; + int ret = 0; + vdev_state_t newstate; + int flags = 0; + + /* check options */ + while ((c = getopt(argc, argv, "et")) != -1) { + switch (c) { + case 'e': + flags |= ZFS_ONLINE_EXPAND; + break; + case 't': + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(B_FALSE); + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing device name\n")); + usage(B_FALSE); + } + + poolname = argv[0]; + + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + for (i = 1; i < argc; i++) { + if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) { + if (newstate != VDEV_STATE_HEALTHY) { + (void) printf(gettext("warning: device '%s' " + "onlined, but remains in faulted state\n"), + argv[i]); + if (newstate == VDEV_STATE_FAULTED) + (void) printf(gettext("use 'zpool " + "clear' to restore a faulted " + "device\n")); + else + (void) printf(gettext("use 'zpool " + "replace' to replace devices " + "that are no longer present\n")); + } + } else { + ret = 1; + } + } + + zpool_close(zhp); + + return (ret); +} + +/* + * zpool offline [-ft] <pool> <device> ... + * + * -f Force the device into the offline state, even if doing + * so would appear to compromise pool availability. + * (not supported yet) + * + * -t Only take the device off-line temporarily. The offline + * state will not be persistent across reboots. + */ +/* ARGSUSED */ +int +zpool_do_offline(int argc, char **argv) +{ + int c, i; + char *poolname; + zpool_handle_t *zhp; + int ret = 0; + boolean_t istmp = B_FALSE; + + /* check options */ + while ((c = getopt(argc, argv, "ft")) != -1) { + switch (c) { + case 't': + istmp = B_TRUE; + break; + case 'f': + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(B_FALSE); + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing device name\n")); + usage(B_FALSE); + } + + poolname = argv[0]; + + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + for (i = 1; i < argc; i++) { + if (zpool_vdev_offline(zhp, argv[i], istmp) != 0) + ret = 1; + } + + zpool_close(zhp); + + return (ret); +} + +/* + * zpool clear <pool> [device] + * + * Clear all errors associated with a pool or a particular device. + */ +int +zpool_do_clear(int argc, char **argv) +{ + int c; + int ret = 0; + boolean_t dryrun = B_FALSE; + boolean_t do_rewind = B_FALSE; + boolean_t xtreme_rewind = B_FALSE; + uint32_t rewind_policy = ZPOOL_NO_REWIND; + nvlist_t *policy = NULL; + zpool_handle_t *zhp; + char *pool, *device; + + /* check options */ + while ((c = getopt(argc, argv, "FnX")) != -1) { + switch (c) { + case 'F': + do_rewind = B_TRUE; + break; + case 'n': + dryrun = B_TRUE; + break; + case 'X': + xtreme_rewind = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(B_FALSE); + } + + if (argc > 2) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + if ((dryrun || xtreme_rewind) && !do_rewind) { + (void) fprintf(stderr, + gettext("-n or -X only meaningful with -F\n")); + usage(B_FALSE); + } + if (dryrun) + rewind_policy = ZPOOL_TRY_REWIND; + else if (do_rewind) + rewind_policy = ZPOOL_DO_REWIND; + if (xtreme_rewind) + rewind_policy |= ZPOOL_EXTREME_REWIND; + + /* In future, further rewind policy choices can be passed along here */ + if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 || + nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, + rewind_policy) != 0) { + return (1); + } + + pool = argv[0]; + device = argc == 2 ? argv[1] : NULL; + + if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) { + nvlist_free(policy); + return (1); + } + + if (zpool_clear(zhp, device, policy) != 0) + ret = 1; + + zpool_close(zhp); + + nvlist_free(policy); + + return (ret); +} + +/* + * zpool reguid <pool> + */ +int +zpool_do_reguid(int argc, char **argv) +{ + int c; + char *poolname; + zpool_handle_t *zhp; + int ret = 0; + + /* check options */ + while ((c = getopt(argc, argv, "")) != -1) { + switch (c) { + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(B_FALSE); + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + poolname = argv[0]; + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + ret = zpool_reguid(zhp); + + zpool_close(zhp); + return (ret); +} + + +/* + * zpool reopen <pool> + * + * Reopen the pool so that the kernel can update the sizes of all vdevs. + */ +int +zpool_do_reopen(int argc, char **argv) +{ + int c; + int ret = 0; + zpool_handle_t *zhp; + char *pool; + + /* check options */ + while ((c = getopt(argc, argv, "")) != -1) { + switch (c) { + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc--; + argv++; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(B_FALSE); + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + pool = argv[0]; + if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) + return (1); + + ret = zpool_reopen(zhp); + zpool_close(zhp); + return (ret); +} + +typedef struct scrub_cbdata { + int cb_type; + int cb_argc; + char **cb_argv; + pool_scrub_cmd_t cb_scrub_cmd; +} scrub_cbdata_t; + +static boolean_t +zpool_has_checkpoint(zpool_handle_t *zhp) +{ + nvlist_t *config, *nvroot; + + config = zpool_get_config(zhp, NULL); + + if (config != NULL) { + pool_checkpoint_stat_t *pcs = NULL; + uint_t c; + + nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); + (void) nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c); + + if (pcs == NULL || pcs->pcs_state == CS_NONE) + return (B_FALSE); + + assert(pcs->pcs_state == CS_CHECKPOINT_EXISTS || + pcs->pcs_state == CS_CHECKPOINT_DISCARDING); + return (B_TRUE); + } + + return (B_FALSE); +} + +int +scrub_callback(zpool_handle_t *zhp, void *data) +{ + scrub_cbdata_t *cb = data; + int err; + + /* + * Ignore faulted pools. + */ + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + (void) fprintf(stderr, gettext("cannot scrub '%s': pool is " + "currently unavailable\n"), zpool_get_name(zhp)); + return (1); + } + + err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd); + + if (err == 0 && zpool_has_checkpoint(zhp) && + cb->cb_type == POOL_SCAN_SCRUB) { + (void) printf(gettext("warning: will not scrub state that " + "belongs to the checkpoint of pool '%s'\n"), + zpool_get_name(zhp)); + } + + return (err != 0); +} + +/* + * zpool scrub [-s | -p] <pool> ... + * + * -s Stop. Stops any in-progress scrub. + * -p Pause. Pause in-progress scrub. + */ +int +zpool_do_scrub(int argc, char **argv) +{ + int c; + scrub_cbdata_t cb; + + cb.cb_type = POOL_SCAN_SCRUB; + cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; + + /* check options */ + while ((c = getopt(argc, argv, "sp")) != -1) { + switch (c) { + case 's': + cb.cb_type = POOL_SCAN_NONE; + break; + case 'p': + cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + if (cb.cb_type == POOL_SCAN_NONE && + cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) { + (void) fprintf(stderr, gettext("invalid option combination: " + "-s and -p are mutually exclusive\n")); + usage(B_FALSE); + } + + cb.cb_argc = argc; + cb.cb_argv = argv; + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(B_FALSE); + } + + return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb)); +} + +typedef struct status_cbdata { + int cb_count; + boolean_t cb_allpools; + boolean_t cb_verbose; + boolean_t cb_explain; + boolean_t cb_first; + boolean_t cb_dedup_stats; +} status_cbdata_t; + +/* + * Print out detailed scrub status. + */ +static void +print_scan_status(pool_scan_stat_t *ps) +{ + time_t start, end, pause; + uint64_t elapsed, mins_left, hours_left; + uint64_t pass_exam, examined, total; + uint_t rate; + double fraction_done; + char processed_buf[7], examined_buf[7], total_buf[7], rate_buf[7]; + + (void) printf(gettext(" scan: ")); + + /* If there's never been a scan, there's not much to say. */ + if (ps == NULL || ps->pss_func == POOL_SCAN_NONE || + ps->pss_func >= POOL_SCAN_FUNCS) { + (void) printf(gettext("none requested\n")); + return; + } + + start = ps->pss_start_time; + end = ps->pss_end_time; + pause = ps->pss_pass_scrub_pause; + zfs_nicenum(ps->pss_processed, processed_buf, sizeof (processed_buf)); + + assert(ps->pss_func == POOL_SCAN_SCRUB || + ps->pss_func == POOL_SCAN_RESILVER); + /* + * Scan is finished or canceled. + */ + if (ps->pss_state == DSS_FINISHED) { + uint64_t minutes_taken = (end - start) / 60; + char *fmt = NULL; + + if (ps->pss_func == POOL_SCAN_SCRUB) { + fmt = gettext("scrub repaired %s in %lluh%um with " + "%llu errors on %s"); + } else if (ps->pss_func == POOL_SCAN_RESILVER) { + fmt = gettext("resilvered %s in %lluh%um with " + "%llu errors on %s"); + } + /* LINTED */ + (void) printf(fmt, processed_buf, + (u_longlong_t)(minutes_taken / 60), + (uint_t)(minutes_taken % 60), + (u_longlong_t)ps->pss_errors, + ctime((time_t *)&end)); + return; + } else if (ps->pss_state == DSS_CANCELED) { + if (ps->pss_func == POOL_SCAN_SCRUB) { + (void) printf(gettext("scrub canceled on %s"), + ctime(&end)); + } else if (ps->pss_func == POOL_SCAN_RESILVER) { + (void) printf(gettext("resilver canceled on %s"), + ctime(&end)); + } + return; + } + + assert(ps->pss_state == DSS_SCANNING); + + /* + * Scan is in progress. + */ + if (ps->pss_func == POOL_SCAN_SCRUB) { + if (pause == 0) { + (void) printf(gettext("scrub in progress since %s"), + ctime(&start)); + } else { + char buf[32]; + struct tm *p = localtime(&pause); + (void) strftime(buf, sizeof (buf), "%a %b %e %T %Y", p); + (void) printf(gettext("scrub paused since %s\n"), buf); + (void) printf(gettext("\tscrub started on %s"), + ctime(&start)); + } + } else if (ps->pss_func == POOL_SCAN_RESILVER) { + (void) printf(gettext("resilver in progress since %s"), + ctime(&start)); + } + + examined = ps->pss_examined ? ps->pss_examined : 1; + total = ps->pss_to_examine; + fraction_done = (double)examined / total; + + /* elapsed time for this pass */ + elapsed = time(NULL) - ps->pss_pass_start; + elapsed -= ps->pss_pass_scrub_spent_paused; + elapsed = elapsed ? elapsed : 1; + pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; + rate = pass_exam / elapsed; + rate = rate ? rate : 1; + mins_left = ((total - examined) / rate) / 60; + hours_left = mins_left / 60; + + zfs_nicenum(examined, examined_buf, sizeof (examined_buf)); + zfs_nicenum(total, total_buf, sizeof (total_buf)); + + /* + * do not print estimated time if hours_left is more than 30 days + * or we have a paused scrub + */ + if (pause == 0) { + zfs_nicenum(rate, rate_buf, sizeof (rate_buf)); + (void) printf(gettext("\t%s scanned out of %s at %s/s"), + examined_buf, total_buf, rate_buf); + if (hours_left < (30 * 24)) { + (void) printf(gettext(", %lluh%um to go\n"), + (u_longlong_t)hours_left, (uint_t)(mins_left % 60)); + } else { + (void) printf(gettext( + ", (scan is slow, no estimated time)\n")); + } + } else { + (void) printf(gettext("\t%s scanned out of %s\n"), + examined_buf, total_buf); + } + + if (ps->pss_func == POOL_SCAN_RESILVER) { + (void) printf(gettext(" %s resilvered, %.2f%% done\n"), + processed_buf, 100 * fraction_done); + } else if (ps->pss_func == POOL_SCAN_SCRUB) { + (void) printf(gettext(" %s repaired, %.2f%% done\n"), + processed_buf, 100 * fraction_done); + } +} + +/* + * As we don't scrub checkpointed blocks, we want to warn the + * user that we skipped scanning some blocks if a checkpoint exists + * or existed at any time during the scan. + */ +static void +print_checkpoint_scan_warning(pool_scan_stat_t *ps, pool_checkpoint_stat_t *pcs) +{ + if (ps == NULL || pcs == NULL) + return; + + if (pcs->pcs_state == CS_NONE || + pcs->pcs_state == CS_CHECKPOINT_DISCARDING) + return; + + assert(pcs->pcs_state == CS_CHECKPOINT_EXISTS); + + if (ps->pss_state == DSS_NONE) + return; + + if ((ps->pss_state == DSS_FINISHED || ps->pss_state == DSS_CANCELED) && + ps->pss_end_time < pcs->pcs_start_time) + return; + + if (ps->pss_state == DSS_FINISHED || ps->pss_state == DSS_CANCELED) { + (void) printf(gettext(" scan warning: skipped blocks " + "that are only referenced by the checkpoint.\n")); + } else { + assert(ps->pss_state == DSS_SCANNING); + (void) printf(gettext(" scan warning: skipping blocks " + "that are only referenced by the checkpoint.\n")); + } +} + +/* + * Print out detailed removal status. + */ +static void +print_removal_status(zpool_handle_t *zhp, pool_removal_stat_t *prs) +{ + char copied_buf[7], examined_buf[7], total_buf[7], rate_buf[7]; + time_t start, end; + nvlist_t *config, *nvroot; + nvlist_t **child; + uint_t children; + char *vdev_name; + + if (prs == NULL || prs->prs_state == DSS_NONE) + return; + + /* + * Determine name of vdev. + */ + config = zpool_get_config(zhp, NULL); + nvroot = fnvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE); + verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0); + assert(prs->prs_removing_vdev < children); + vdev_name = zpool_vdev_name(g_zfs, zhp, + child[prs->prs_removing_vdev], B_TRUE); + + (void) printf(gettext("remove: ")); + + start = prs->prs_start_time; + end = prs->prs_end_time; + zfs_nicenum(prs->prs_copied, copied_buf, sizeof (copied_buf)); + + /* + * Removal is finished or canceled. + */ + if (prs->prs_state == DSS_FINISHED) { + uint64_t minutes_taken = (end - start) / 60; + + (void) printf(gettext("Removal of vdev %llu copied %s " + "in %lluh%um, completed on %s"), + (longlong_t)prs->prs_removing_vdev, + copied_buf, + (u_longlong_t)(minutes_taken / 60), + (uint_t)(minutes_taken % 60), + ctime((time_t *)&end)); + } else if (prs->prs_state == DSS_CANCELED) { + (void) printf(gettext("Removal of %s canceled on %s"), + vdev_name, ctime(&end)); + } else { + uint64_t copied, total, elapsed, mins_left, hours_left; + double fraction_done; + uint_t rate; + + assert(prs->prs_state == DSS_SCANNING); + + /* + * Removal is in progress. + */ + (void) printf(gettext( + "Evacuation of %s in progress since %s"), + vdev_name, ctime(&start)); + + copied = prs->prs_copied > 0 ? prs->prs_copied : 1; + total = prs->prs_to_copy; + fraction_done = (double)copied / total; + + /* elapsed time for this pass */ + elapsed = time(NULL) - prs->prs_start_time; + elapsed = elapsed > 0 ? elapsed : 1; + rate = copied / elapsed; + rate = rate > 0 ? rate : 1; + mins_left = ((total - copied) / rate) / 60; + hours_left = mins_left / 60; + + zfs_nicenum(copied, examined_buf, sizeof (examined_buf)); + zfs_nicenum(total, total_buf, sizeof (total_buf)); + zfs_nicenum(rate, rate_buf, sizeof (rate_buf)); + + /* + * do not print estimated time if hours_left is more than + * 30 days + */ + (void) printf(gettext(" %s copied out of %s at %s/s, " + "%.2f%% done"), + examined_buf, total_buf, rate_buf, 100 * fraction_done); + if (hours_left < (30 * 24)) { + (void) printf(gettext(", %lluh%um to go\n"), + (u_longlong_t)hours_left, (uint_t)(mins_left % 60)); + } else { + (void) printf(gettext( + ", (copy is slow, no estimated time)\n")); + } + } + + if (prs->prs_mapping_memory > 0) { + char mem_buf[7]; + zfs_nicenum(prs->prs_mapping_memory, mem_buf, sizeof (mem_buf)); + (void) printf(gettext(" %s memory used for " + "removed device mappings\n"), + mem_buf); + } +} + +static void +print_checkpoint_status(pool_checkpoint_stat_t *pcs) +{ + time_t start; + char space_buf[7]; + + if (pcs == NULL || pcs->pcs_state == CS_NONE) + return; + + (void) printf(gettext("checkpoint: ")); + + start = pcs->pcs_start_time; + zfs_nicenum(pcs->pcs_space, space_buf, sizeof (space_buf)); + + if (pcs->pcs_state == CS_CHECKPOINT_EXISTS) { + char *date = ctime(&start); + + /* + * ctime() adds a newline at the end of the generated + * string, thus the weird format specifier and the + * strlen() call used to chop it off from the output. + */ + (void) printf(gettext("created %.*s, consumes %s\n"), + strlen(date) - 1, date, space_buf); + return; + } + + assert(pcs->pcs_state == CS_CHECKPOINT_DISCARDING); + + (void) printf(gettext("discarding, %s remaining.\n"), + space_buf); +} + +static void +print_error_log(zpool_handle_t *zhp) +{ + nvlist_t *nverrlist = NULL; + nvpair_t *elem; + char *pathname; + size_t len = MAXPATHLEN * 2; + + if (zpool_get_errlog(zhp, &nverrlist) != 0) { + (void) printf("errors: List of errors unavailable " + "(insufficient privileges)\n"); + return; + } + + (void) printf("errors: Permanent errors have been " + "detected in the following files:\n\n"); + + pathname = safe_malloc(len); + elem = NULL; + while ((elem = nvlist_next_nvpair(nverrlist, elem)) != NULL) { + nvlist_t *nv; + uint64_t dsobj, obj; + + verify(nvpair_value_nvlist(elem, &nv) == 0); + verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_DATASET, + &dsobj) == 0); + verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_OBJECT, + &obj) == 0); + zpool_obj_to_path(zhp, dsobj, obj, pathname, len); + (void) printf("%7s %s\n", "", pathname); + } + free(pathname); + nvlist_free(nverrlist); +} + +static void +print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares, + int namewidth) +{ + uint_t i; + char *name; + + if (nspares == 0) + return; + + (void) printf(gettext("\tspares\n")); + + for (i = 0; i < nspares; i++) { + name = zpool_vdev_name(g_zfs, zhp, spares[i], B_FALSE); + print_status_config(zhp, name, spares[i], + namewidth, 2, B_TRUE); + free(name); + } +} + +static void +print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache, + int namewidth) +{ + uint_t i; + char *name; + + if (nl2cache == 0) + return; + + (void) printf(gettext("\tcache\n")); + + for (i = 0; i < nl2cache; i++) { + name = zpool_vdev_name(g_zfs, zhp, l2cache[i], B_FALSE); + print_status_config(zhp, name, l2cache[i], + namewidth, 2, B_FALSE); + free(name); + } +} + +static void +print_dedup_stats(nvlist_t *config) +{ + ddt_histogram_t *ddh; + ddt_stat_t *dds; + ddt_object_t *ddo; + uint_t c; + + /* + * If the pool was faulted then we may not have been able to + * obtain the config. Otherwise, if we have anything in the dedup + * table continue processing the stats. + */ + if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, + (uint64_t **)&ddo, &c) != 0) + return; + + (void) printf("\n"); + (void) printf(gettext(" dedup: ")); + if (ddo->ddo_count == 0) { + (void) printf(gettext("no DDT entries\n")); + return; + } + + (void) printf("DDT entries %llu, size %llu on disk, %llu in core\n", + (u_longlong_t)ddo->ddo_count, + (u_longlong_t)ddo->ddo_dspace, + (u_longlong_t)ddo->ddo_mspace); + + verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, + (uint64_t **)&dds, &c) == 0); + verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, + (uint64_t **)&ddh, &c) == 0); + zpool_dump_ddt(dds, ddh); +} + +/* + * Display a summary of pool status. Displays a summary such as: + * + * pool: tank + * status: DEGRADED + * reason: One or more devices ... + * see: http://illumos.org/msg/ZFS-xxxx-01 + * config: + * mirror DEGRADED + * c1t0d0 OK + * c2t0d0 UNAVAIL + * + * When given the '-v' option, we print out the complete config. If the '-e' + * option is specified, then we print out error rate information as well. + */ +int +status_callback(zpool_handle_t *zhp, void *data) +{ + status_cbdata_t *cbp = data; + nvlist_t *config, *nvroot; + char *msgid; + int reason; + const char *health; + uint_t c; + vdev_stat_t *vs; + + config = zpool_get_config(zhp, NULL); + reason = zpool_get_status(zhp, &msgid); + + cbp->cb_count++; + + /* + * If we were given 'zpool status -x', only report those pools with + * problems. + */ + if (cbp->cb_explain && + (reason == ZPOOL_STATUS_OK || + reason == ZPOOL_STATUS_VERSION_OLDER || + reason == ZPOOL_STATUS_NON_NATIVE_ASHIFT || + reason == ZPOOL_STATUS_FEAT_DISABLED)) { + if (!cbp->cb_allpools) { + (void) printf(gettext("pool '%s' is healthy\n"), + zpool_get_name(zhp)); + if (cbp->cb_first) + cbp->cb_first = B_FALSE; + } + return (0); + } + + if (cbp->cb_first) + cbp->cb_first = B_FALSE; + else + (void) printf("\n"); + + nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); + verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) == 0); + health = zpool_state_to_name(vs->vs_state, vs->vs_aux); + + (void) printf(gettext(" pool: %s\n"), zpool_get_name(zhp)); + (void) printf(gettext(" state: %s\n"), health); + + switch (reason) { + case ZPOOL_STATUS_MISSING_DEV_R: + (void) printf(gettext("status: One or more devices could not " + "be opened. Sufficient replicas exist for\n\tthe pool to " + "continue functioning in a degraded state.\n")); + (void) printf(gettext("action: Attach the missing device and " + "online it using 'zpool online'.\n")); + break; + + case ZPOOL_STATUS_MISSING_DEV_NR: + (void) printf(gettext("status: One or more devices could not " + "be opened. There are insufficient\n\treplicas for the " + "pool to continue functioning.\n")); + (void) printf(gettext("action: Attach the missing device and " + "online it using 'zpool online'.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_LABEL_R: + (void) printf(gettext("status: One or more devices could not " + "be used because the label is missing or\n\tinvalid. " + "Sufficient replicas exist for the pool to continue\n\t" + "functioning in a degraded state.\n")); + (void) printf(gettext("action: Replace the device using " + "'zpool replace'.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_LABEL_NR: + (void) printf(gettext("status: One or more devices could not " + "be used because the label is missing \n\tor invalid. " + "There are insufficient replicas for the pool to " + "continue\n\tfunctioning.\n")); + zpool_explain_recover(zpool_get_handle(zhp), + zpool_get_name(zhp), reason, config); + break; + + case ZPOOL_STATUS_FAILING_DEV: + (void) printf(gettext("status: One or more devices has " + "experienced an unrecoverable error. An\n\tattempt was " + "made to correct the error. Applications are " + "unaffected.\n")); + (void) printf(gettext("action: Determine if the device needs " + "to be replaced, and clear the errors\n\tusing " + "'zpool clear' or replace the device with 'zpool " + "replace'.\n")); + break; + + case ZPOOL_STATUS_OFFLINE_DEV: + (void) printf(gettext("status: One or more devices has " + "been taken offline by the administrator.\n\tSufficient " + "replicas exist for the pool to continue functioning in " + "a\n\tdegraded state.\n")); + (void) printf(gettext("action: Online the device using " + "'zpool online' or replace the device with\n\t'zpool " + "replace'.\n")); + break; + + case ZPOOL_STATUS_REMOVED_DEV: + (void) printf(gettext("status: One or more devices has " + "been removed by the administrator.\n\tSufficient " + "replicas exist for the pool to continue functioning in " + "a\n\tdegraded state.\n")); + (void) printf(gettext("action: Online the device using " + "'zpool online' or replace the device with\n\t'zpool " + "replace'.\n")); + break; + + case ZPOOL_STATUS_RESILVERING: + (void) printf(gettext("status: One or more devices is " + "currently being resilvered. The pool will\n\tcontinue " + "to function, possibly in a degraded state.\n")); + (void) printf(gettext("action: Wait for the resilver to " + "complete.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_DATA: + (void) printf(gettext("status: One or more devices has " + "experienced an error resulting in data\n\tcorruption. " + "Applications may be affected.\n")); + (void) printf(gettext("action: Restore the file in question " + "if possible. Otherwise restore the\n\tentire pool from " + "backup.\n")); + break; + + case ZPOOL_STATUS_CORRUPT_POOL: + (void) printf(gettext("status: The pool metadata is corrupted " + "and the pool cannot be opened.\n")); + zpool_explain_recover(zpool_get_handle(zhp), + zpool_get_name(zhp), reason, config); + break; + + case ZPOOL_STATUS_VERSION_OLDER: + (void) printf(gettext("status: The pool is formatted using a " + "legacy on-disk format. The pool can\n\tstill be used, " + "but some features are unavailable.\n")); + (void) printf(gettext("action: Upgrade the pool using 'zpool " + "upgrade'. Once this is done, the\n\tpool will no longer " + "be accessible on software that does not support feature\n" + "\tflags.\n")); + break; + + case ZPOOL_STATUS_VERSION_NEWER: + (void) printf(gettext("status: The pool has been upgraded to a " + "newer, incompatible on-disk version.\n\tThe pool cannot " + "be accessed on this system.\n")); + (void) printf(gettext("action: Access the pool from a system " + "running more recent software, or\n\trestore the pool from " + "backup.\n")); + break; + + case ZPOOL_STATUS_FEAT_DISABLED: + (void) printf(gettext("status: Some supported features are not " + "enabled on the pool. The pool can\n\tstill be used, but " + "some features are unavailable.\n")); + (void) printf(gettext("action: Enable all features using " + "'zpool upgrade'. Once this is done,\n\tthe pool may no " + "longer be accessible by software that does not support\n\t" + "the features. See zpool-features(7) for details.\n")); + break; + + case ZPOOL_STATUS_UNSUP_FEAT_READ: + (void) printf(gettext("status: The pool cannot be accessed on " + "this system because it uses the\n\tfollowing feature(s) " + "not supported on this system:\n")); + zpool_print_unsup_feat(config); + (void) printf("\n"); + (void) printf(gettext("action: Access the pool from a system " + "that supports the required feature(s),\n\tor restore the " + "pool from backup.\n")); + break; + + case ZPOOL_STATUS_UNSUP_FEAT_WRITE: + (void) printf(gettext("status: The pool can only be accessed " + "in read-only mode on this system. It\n\tcannot be " + "accessed in read-write mode because it uses the " + "following\n\tfeature(s) not supported on this system:\n")); + zpool_print_unsup_feat(config); + (void) printf("\n"); + (void) printf(gettext("action: The pool cannot be accessed in " + "read-write mode. Import the pool with\n" + "\t\"-o readonly=on\", access the pool from a system that " + "supports the\n\trequired feature(s), or restore the " + "pool from backup.\n")); + break; + + case ZPOOL_STATUS_FAULTED_DEV_R: + (void) printf(gettext("status: One or more devices are " + "faulted in response to persistent errors.\n\tSufficient " + "replicas exist for the pool to continue functioning " + "in a\n\tdegraded state.\n")); + (void) printf(gettext("action: Replace the faulted device, " + "or use 'zpool clear' to mark the device\n\trepaired.\n")); + break; + + case ZPOOL_STATUS_FAULTED_DEV_NR: + (void) printf(gettext("status: One or more devices are " + "faulted in response to persistent errors. There are " + "insufficient replicas for the pool to\n\tcontinue " + "functioning.\n")); + (void) printf(gettext("action: Destroy and re-create the pool " + "from a backup source. Manually marking the device\n" + "\trepaired using 'zpool clear' may allow some data " + "to be recovered.\n")); + break; + + case ZPOOL_STATUS_IO_FAILURE_WAIT: + case ZPOOL_STATUS_IO_FAILURE_CONTINUE: + (void) printf(gettext("status: One or more devices are " + "faulted in response to IO failures.\n")); + (void) printf(gettext("action: Make sure the affected devices " + "are connected, then run 'zpool clear'.\n")); + break; + + case ZPOOL_STATUS_BAD_LOG: + (void) printf(gettext("status: An intent log record " + "could not be read.\n" + "\tWaiting for adminstrator intervention to fix the " + "faulted pool.\n")); + (void) printf(gettext("action: Either restore the affected " + "device(s) and run 'zpool online',\n" + "\tor ignore the intent log records by running " + "'zpool clear'.\n")); + break; + + case ZPOOL_STATUS_NON_NATIVE_ASHIFT: + (void) printf(gettext("status: One or more devices are " + "configured to use a non-native block size.\n" + "\tExpect reduced performance.\n")); + (void) printf(gettext("action: Replace affected devices with " + "devices that support the\n\tconfigured block size, or " + "migrate data to a properly configured\n\tpool.\n")); + break; + + default: + /* + * The remaining errors can't actually be generated, yet. + */ + assert(reason == ZPOOL_STATUS_OK); + } + + if (msgid != NULL) + (void) printf(gettext(" see: http://illumos.org/msg/%s\n"), + msgid); + + if (config != NULL) { + int namewidth; + uint64_t nerr; + nvlist_t **spares, **l2cache; + uint_t nspares, nl2cache; + pool_checkpoint_stat_t *pcs = NULL; + pool_scan_stat_t *ps = NULL; + pool_removal_stat_t *prs = NULL; + + (void) nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c); + (void) nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c); + (void) nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c); + + print_scan_status(ps); + print_checkpoint_scan_warning(ps, pcs); + print_removal_status(zhp, prs); + print_checkpoint_status(pcs); + + namewidth = max_width(zhp, nvroot, 0, 0); + if (namewidth < 10) + namewidth = 10; + + (void) printf(gettext("config:\n\n")); + (void) printf(gettext("\t%-*s %-8s %5s %5s %5s\n"), namewidth, + "NAME", "STATE", "READ", "WRITE", "CKSUM"); + print_status_config(zhp, zpool_get_name(zhp), nvroot, + namewidth, 0, B_FALSE); + + if (num_logs(nvroot) > 0) + print_logs(zhp, nvroot, namewidth, B_TRUE); + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache) == 0) + print_l2cache(zhp, l2cache, nl2cache, namewidth); + + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) + print_spares(zhp, spares, nspares, namewidth); + + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, + &nerr) == 0) { + nvlist_t *nverrlist = NULL; + + /* + * If the approximate error count is small, get a + * precise count by fetching the entire log and + * uniquifying the results. + */ + if (nerr > 0 && nerr < 100 && !cbp->cb_verbose && + zpool_get_errlog(zhp, &nverrlist) == 0) { + nvpair_t *elem; + + elem = NULL; + nerr = 0; + while ((elem = nvlist_next_nvpair(nverrlist, + elem)) != NULL) { + nerr++; + } + } + nvlist_free(nverrlist); + + (void) printf("\n"); + + if (nerr == 0) + (void) printf(gettext("errors: No known data " + "errors\n")); + else if (!cbp->cb_verbose) + (void) printf(gettext("errors: %llu data " + "errors, use '-v' for a list\n"), + (u_longlong_t)nerr); + else + print_error_log(zhp); + } + + if (cbp->cb_dedup_stats) + print_dedup_stats(config); + } else { + (void) printf(gettext("config: The configuration cannot be " + "determined.\n")); + } + + return (0); +} + +/* + * zpool status [-vx] [-T d|u] [pool] ... [interval [count]] + * + * -v Display complete error logs + * -x Display only pools with potential problems + * -D Display dedup status (undocumented) + * -T Display a timestamp in date(1) or Unix format + * + * Describes the health status of all pools or some subset. + */ +int +zpool_do_status(int argc, char **argv) +{ + int c; + int ret; + unsigned long interval = 0, count = 0; + status_cbdata_t cb = { 0 }; + + /* check options */ + while ((c = getopt(argc, argv, "vxDT:")) != -1) { + switch (c) { + case 'v': + cb.cb_verbose = B_TRUE; + break; + case 'x': + cb.cb_explain = B_TRUE; + break; + case 'D': + cb.cb_dedup_stats = B_TRUE; + break; + case 'T': + get_timestamp_arg(*optarg); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + get_interval_count(&argc, argv, &interval, &count); + + if (argc == 0) + cb.cb_allpools = B_TRUE; + + cb.cb_first = B_TRUE; + + for (;;) { + if (timestamp_fmt != NODATE) + print_timestamp(timestamp_fmt); + + ret = for_each_pool(argc, argv, B_TRUE, NULL, + status_callback, &cb); + + if (argc == 0 && cb.cb_count == 0) + (void) printf(gettext("no pools available\n")); + else if (cb.cb_explain && cb.cb_first && cb.cb_allpools) + (void) printf(gettext("all pools are healthy\n")); + + if (ret != 0) + return (ret); + + if (interval == 0) + break; + + if (count != 0 && --count == 0) + break; + + (void) sleep(interval); + } + + return (0); +} + +typedef struct upgrade_cbdata { + boolean_t cb_first; + boolean_t cb_unavail; + char cb_poolname[ZFS_MAX_DATASET_NAME_LEN]; + int cb_argc; + uint64_t cb_version; + char **cb_argv; +} upgrade_cbdata_t; + +#ifdef __FreeBSD__ +static int +is_root_pool(zpool_handle_t *zhp) +{ + static struct statfs sfs; + static char *poolname = NULL; + static boolean_t stated = B_FALSE; + char *slash; + + if (!stated) { + stated = B_TRUE; + if (statfs("/", &sfs) == -1) { + (void) fprintf(stderr, + "Unable to stat root file system: %s.\n", + strerror(errno)); + return (0); + } + if (strcmp(sfs.f_fstypename, "zfs") != 0) + return (0); + poolname = sfs.f_mntfromname; + if ((slash = strchr(poolname, '/')) != NULL) + *slash = '\0'; + } + return (poolname != NULL && strcmp(poolname, zpool_get_name(zhp)) == 0); +} + +static void +root_pool_upgrade_check(zpool_handle_t *zhp, char *poolname, int size) +{ + + if (poolname[0] == '\0' && is_root_pool(zhp)) + (void) strlcpy(poolname, zpool_get_name(zhp), size); +} +#endif /* FreeBSD */ + +static int +upgrade_version(zpool_handle_t *zhp, uint64_t version) +{ + int ret; + nvlist_t *config; + uint64_t oldversion; + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &oldversion) == 0); + + assert(SPA_VERSION_IS_SUPPORTED(oldversion)); + assert(oldversion < version); + + ret = zpool_upgrade(zhp, version); + if (ret != 0) + return (ret); + + if (version >= SPA_VERSION_FEATURES) { + (void) printf(gettext("Successfully upgraded " + "'%s' from version %llu to feature flags.\n"), + zpool_get_name(zhp), oldversion); + } else { + (void) printf(gettext("Successfully upgraded " + "'%s' from version %llu to version %llu.\n"), + zpool_get_name(zhp), oldversion, version); + } + + return (0); +} + +static int +upgrade_enable_all(zpool_handle_t *zhp, int *countp) +{ + int i, ret, count; + boolean_t firstff = B_TRUE; + nvlist_t *enabled = zpool_get_features(zhp); + + count = 0; + for (i = 0; i < SPA_FEATURES; i++) { + const char *fname = spa_feature_table[i].fi_uname; + const char *fguid = spa_feature_table[i].fi_guid; + if (!nvlist_exists(enabled, fguid)) { + char *propname; + verify(-1 != asprintf(&propname, "feature@%s", fname)); + ret = zpool_set_prop(zhp, propname, + ZFS_FEATURE_ENABLED); + if (ret != 0) { + free(propname); + return (ret); + } + count++; + + if (firstff) { + (void) printf(gettext("Enabled the " + "following features on '%s':\n"), + zpool_get_name(zhp)); + firstff = B_FALSE; + } + (void) printf(gettext(" %s\n"), fname); + free(propname); + } + } + + if (countp != NULL) + *countp = count; + return (0); +} + +static int +upgrade_cb(zpool_handle_t *zhp, void *arg) +{ + upgrade_cbdata_t *cbp = arg; + nvlist_t *config; + uint64_t version; + boolean_t printnl = B_FALSE; + int ret; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + (void) fprintf(stderr, gettext("cannot upgrade '%s': pool is " + "currently unavailable.\n\n"), zpool_get_name(zhp)); + cbp->cb_unavail = B_TRUE; + /* Allow iteration to continue. */ + return (0); + } + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &version) == 0); + + assert(SPA_VERSION_IS_SUPPORTED(version)); + + if (version < cbp->cb_version) { + cbp->cb_first = B_FALSE; + ret = upgrade_version(zhp, cbp->cb_version); + if (ret != 0) + return (ret); +#ifdef __FreeBSD__ + root_pool_upgrade_check(zhp, cbp->cb_poolname, + sizeof(cbp->cb_poolname)); +#endif /* __FreeBSD__ */ + printnl = B_TRUE; + +#ifdef illumos + /* + * If they did "zpool upgrade -a", then we could + * be doing ioctls to different pools. We need + * to log this history once to each pool, and bypass + * the normal history logging that happens in main(). + */ + (void) zpool_log_history(g_zfs, history_str); + log_history = B_FALSE; +#endif + } + + if (cbp->cb_version >= SPA_VERSION_FEATURES) { + int count; + ret = upgrade_enable_all(zhp, &count); + if (ret != 0) + return (ret); + + if (count > 0) { + cbp->cb_first = B_FALSE; + printnl = B_TRUE; +#ifdef __FreeBSD__ + root_pool_upgrade_check(zhp, cbp->cb_poolname, + sizeof(cbp->cb_poolname)); +#endif /* __FreeBSD__ */ + /* + * If they did "zpool upgrade -a", then we could + * be doing ioctls to different pools. We need + * to log this history once to each pool, and bypass + * the normal history logging that happens in main(). + */ + (void) zpool_log_history(g_zfs, history_str); + log_history = B_FALSE; + } + } + + if (printnl) { + (void) printf(gettext("\n")); + } + + return (0); +} + +static int +upgrade_list_unavail(zpool_handle_t *zhp, void *arg) +{ + upgrade_cbdata_t *cbp = arg; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + if (cbp->cb_first) { + (void) fprintf(stderr, gettext("The following pools " + "are unavailable and cannot be upgraded as this " + "time.\n\n")); + (void) fprintf(stderr, gettext("POOL\n")); + (void) fprintf(stderr, gettext("------------\n")); + cbp->cb_first = B_FALSE; + } + (void) printf(gettext("%s\n"), zpool_get_name(zhp)); + cbp->cb_unavail = B_TRUE; + } + return (0); +} + +static int +upgrade_list_older_cb(zpool_handle_t *zhp, void *arg) +{ + upgrade_cbdata_t *cbp = arg; + nvlist_t *config; + uint64_t version; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + /* + * This will have been reported by upgrade_list_unavail so + * just allow iteration to continue. + */ + cbp->cb_unavail = B_TRUE; + return (0); + } + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &version) == 0); + + assert(SPA_VERSION_IS_SUPPORTED(version)); + + if (version < SPA_VERSION_FEATURES) { + if (cbp->cb_first) { + (void) printf(gettext("The following pools are " + "formatted with legacy version numbers and can\n" + "be upgraded to use feature flags. After " + "being upgraded, these pools\nwill no " + "longer be accessible by software that does not " + "support feature\nflags.\n\n")); + (void) printf(gettext("VER POOL\n")); + (void) printf(gettext("--- ------------\n")); + cbp->cb_first = B_FALSE; + } + + (void) printf("%2llu %s\n", (u_longlong_t)version, + zpool_get_name(zhp)); + } + + return (0); +} + +static int +upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg) +{ + upgrade_cbdata_t *cbp = arg; + nvlist_t *config; + uint64_t version; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + /* + * This will have been reported by upgrade_list_unavail so + * just allow iteration to continue. + */ + cbp->cb_unavail = B_TRUE; + return (0); + } + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &version) == 0); + + if (version >= SPA_VERSION_FEATURES) { + int i; + boolean_t poolfirst = B_TRUE; + nvlist_t *enabled = zpool_get_features(zhp); + + for (i = 0; i < SPA_FEATURES; i++) { + const char *fguid = spa_feature_table[i].fi_guid; + const char *fname = spa_feature_table[i].fi_uname; + if (!nvlist_exists(enabled, fguid)) { + if (cbp->cb_first) { + (void) printf(gettext("\nSome " + "supported features are not " + "enabled on the following pools. " + "Once a\nfeature is enabled the " + "pool may become incompatible with " + "software\nthat does not support " + "the feature. See " + "zpool-features(7) for " + "details.\n\n")); + (void) printf(gettext("POOL " + "FEATURE\n")); + (void) printf(gettext("------" + "---------\n")); + cbp->cb_first = B_FALSE; + } + + if (poolfirst) { + (void) printf(gettext("%s\n"), + zpool_get_name(zhp)); + poolfirst = B_FALSE; + } + + (void) printf(gettext(" %s\n"), fname); + } + } + } + + return (0); +} + +/* ARGSUSED */ +static int +upgrade_one(zpool_handle_t *zhp, void *data) +{ + boolean_t printnl = B_FALSE; + upgrade_cbdata_t *cbp = data; + uint64_t cur_version; + int ret; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + (void) fprintf(stderr, gettext("cannot upgrade '%s': pool is " + "is currently unavailable.\n\n"), zpool_get_name(zhp)); + cbp->cb_unavail = B_TRUE; + return (1); + } + + if (strcmp("log", zpool_get_name(zhp)) == 0) { + (void) printf(gettext("'log' is now a reserved word\n" + "Pool 'log' must be renamed using export and import" + " to upgrade.\n\n")); + return (1); + } + + cur_version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); + if (cur_version > cbp->cb_version) { + (void) printf(gettext("Pool '%s' is already formatted " + "using more current version '%llu'.\n\n"), + zpool_get_name(zhp), cur_version); + return (0); + } + + if (cbp->cb_version != SPA_VERSION && cur_version == cbp->cb_version) { + (void) printf(gettext("Pool '%s' is already formatted " + "using version %llu.\n\n"), zpool_get_name(zhp), + cbp->cb_version); + return (0); + } + + if (cur_version != cbp->cb_version) { + printnl = B_TRUE; + ret = upgrade_version(zhp, cbp->cb_version); + if (ret != 0) + return (ret); +#ifdef __FreeBSD__ + root_pool_upgrade_check(zhp, cbp->cb_poolname, + sizeof(cbp->cb_poolname)); +#endif /* __FreeBSD__ */ + } + + if (cbp->cb_version >= SPA_VERSION_FEATURES) { + int count = 0; + ret = upgrade_enable_all(zhp, &count); + if (ret != 0) + return (ret); + + if (count != 0) { + printnl = B_TRUE; +#ifdef __FreeBSD__ + root_pool_upgrade_check(zhp, cbp->cb_poolname, + sizeof(cbp->cb_poolname)); +#endif /* __FreeBSD __*/ + } else if (cur_version == SPA_VERSION) { + (void) printf(gettext("Pool '%s' already has all " + "supported features enabled.\n\n"), + zpool_get_name(zhp)); + } + } + + if (printnl) { + (void) printf(gettext("\n")); + } + + return (0); +} + +/* + * zpool upgrade + * zpool upgrade -v + * zpool upgrade [-V version] <-a | pool ...> + * + * With no arguments, display downrev'd ZFS pool available for upgrade. + * Individual pools can be upgraded by specifying the pool, and '-a' will + * upgrade all pools. + */ +int +zpool_do_upgrade(int argc, char **argv) +{ + int c; + upgrade_cbdata_t cb = { 0 }; + int ret = 0; + boolean_t showversions = B_FALSE; + boolean_t upgradeall = B_FALSE; + char *end; + + + /* check options */ + while ((c = getopt(argc, argv, ":avV:")) != -1) { + switch (c) { + case 'a': + upgradeall = B_TRUE; + break; + case 'v': + showversions = B_TRUE; + break; + case 'V': + cb.cb_version = strtoll(optarg, &end, 10); + if (*end != '\0' || + !SPA_VERSION_IS_SUPPORTED(cb.cb_version)) { + (void) fprintf(stderr, + gettext("invalid version '%s'\n"), optarg); + usage(B_FALSE); + } + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + cb.cb_argc = argc; + cb.cb_argv = argv; + argc -= optind; + argv += optind; + + if (cb.cb_version == 0) { + cb.cb_version = SPA_VERSION; + } else if (!upgradeall && argc == 0) { + (void) fprintf(stderr, gettext("-V option is " + "incompatible with other arguments\n")); + usage(B_FALSE); + } + + if (showversions) { + if (upgradeall || argc != 0) { + (void) fprintf(stderr, gettext("-v option is " + "incompatible with other arguments\n")); + usage(B_FALSE); + } + } else if (upgradeall) { + if (argc != 0) { + (void) fprintf(stderr, gettext("-a option should not " + "be used along with a pool name\n")); + usage(B_FALSE); + } + } + + (void) printf(gettext("This system supports ZFS pool feature " + "flags.\n\n")); + if (showversions) { + int i; + + (void) printf(gettext("The following features are " + "supported:\n\n")); + (void) printf(gettext("FEAT DESCRIPTION\n")); + (void) printf("----------------------------------------------" + "---------------\n"); + for (i = 0; i < SPA_FEATURES; i++) { + zfeature_info_t *fi = &spa_feature_table[i]; + const char *ro = + (fi->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ? + " (read-only compatible)" : ""; + + (void) printf("%-37s%s\n", fi->fi_uname, ro); + (void) printf(" %s\n", fi->fi_desc); + } + (void) printf("\n"); + + (void) printf(gettext("The following legacy versions are also " + "supported:\n\n")); + (void) printf(gettext("VER DESCRIPTION\n")); + (void) printf("--- -----------------------------------------" + "---------------\n"); + (void) printf(gettext(" 1 Initial ZFS version\n")); + (void) printf(gettext(" 2 Ditto blocks " + "(replicated metadata)\n")); + (void) printf(gettext(" 3 Hot spares and double parity " + "RAID-Z\n")); + (void) printf(gettext(" 4 zpool history\n")); + (void) printf(gettext(" 5 Compression using the gzip " + "algorithm\n")); + (void) printf(gettext(" 6 bootfs pool property\n")); + (void) printf(gettext(" 7 Separate intent log devices\n")); + (void) printf(gettext(" 8 Delegated administration\n")); + (void) printf(gettext(" 9 refquota and refreservation " + "properties\n")); + (void) printf(gettext(" 10 Cache devices\n")); + (void) printf(gettext(" 11 Improved scrub performance\n")); + (void) printf(gettext(" 12 Snapshot properties\n")); + (void) printf(gettext(" 13 snapused property\n")); + (void) printf(gettext(" 14 passthrough-x aclinherit\n")); + (void) printf(gettext(" 15 user/group space accounting\n")); + (void) printf(gettext(" 16 stmf property support\n")); + (void) printf(gettext(" 17 Triple-parity RAID-Z\n")); + (void) printf(gettext(" 18 Snapshot user holds\n")); + (void) printf(gettext(" 19 Log device removal\n")); + (void) printf(gettext(" 20 Compression using zle " + "(zero-length encoding)\n")); + (void) printf(gettext(" 21 Deduplication\n")); + (void) printf(gettext(" 22 Received properties\n")); + (void) printf(gettext(" 23 Slim ZIL\n")); + (void) printf(gettext(" 24 System attributes\n")); + (void) printf(gettext(" 25 Improved scrub stats\n")); + (void) printf(gettext(" 26 Improved snapshot deletion " + "performance\n")); + (void) printf(gettext(" 27 Improved snapshot creation " + "performance\n")); + (void) printf(gettext(" 28 Multiple vdev replacements\n")); + (void) printf(gettext("\nFor more information on a particular " + "version, including supported releases,\n")); + (void) printf(gettext("see the ZFS Administration Guide.\n\n")); + } else if (argc == 0 && upgradeall) { + cb.cb_first = B_TRUE; + ret = zpool_iter(g_zfs, upgrade_cb, &cb); + if (ret == 0 && cb.cb_first) { + if (cb.cb_version == SPA_VERSION) { + (void) printf(gettext("All %spools are already " + "formatted using feature flags.\n\n"), + cb.cb_unavail ? gettext("available ") : ""); + (void) printf(gettext("Every %sfeature flags " + "pool already has all supported features " + "enabled.\n"), + cb.cb_unavail ? gettext("available ") : ""); + } else { + (void) printf(gettext("All pools are already " + "formatted with version %llu or higher.\n"), + cb.cb_version); + } + } + } else if (argc == 0) { + cb.cb_first = B_TRUE; + ret = zpool_iter(g_zfs, upgrade_list_unavail, &cb); + assert(ret == 0); + + if (!cb.cb_first) { + (void) fprintf(stderr, "\n"); + } + + cb.cb_first = B_TRUE; + ret = zpool_iter(g_zfs, upgrade_list_older_cb, &cb); + assert(ret == 0); + + if (cb.cb_first) { + (void) printf(gettext("All %spools are formatted using " + "feature flags.\n\n"), cb.cb_unavail ? + gettext("available ") : ""); + } else { + (void) printf(gettext("\nUse 'zpool upgrade -v' " + "for a list of available legacy versions.\n")); + } + + cb.cb_first = B_TRUE; + ret = zpool_iter(g_zfs, upgrade_list_disabled_cb, &cb); + assert(ret == 0); + + if (cb.cb_first) { + (void) printf(gettext("Every %sfeature flags pool has " + "all supported features enabled.\n"), + cb.cb_unavail ? gettext("available ") : ""); + } else { + (void) printf(gettext("\n")); + } + } else { + ret = for_each_pool(argc, argv, B_TRUE, NULL, + upgrade_one, &cb); + } + + if (cb.cb_poolname[0] != '\0') { + (void) printf( + "If you boot from pool '%s', don't forget to update boot code.\n" + "Assuming you use GPT partitioning and da0 is your boot disk\n" + "the following command will do it:\n" + "\n" + "\tgpart bootcode -b /boot/pmbr -p /boot/gptzfsboot -i 1 da0\n\n", + cb.cb_poolname); + } + + return (ret); +} + +typedef struct hist_cbdata { + boolean_t first; + boolean_t longfmt; + boolean_t internal; +} hist_cbdata_t; + +/* + * Print out the command history for a specific pool. + */ +static int +get_history_one(zpool_handle_t *zhp, void *data) +{ + nvlist_t *nvhis; + nvlist_t **records; + uint_t numrecords; + int ret, i; + hist_cbdata_t *cb = (hist_cbdata_t *)data; + + cb->first = B_FALSE; + + (void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp)); + + if ((ret = zpool_get_history(zhp, &nvhis)) != 0) + return (ret); + + verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD, + &records, &numrecords) == 0); + for (i = 0; i < numrecords; i++) { + nvlist_t *rec = records[i]; + char tbuf[30] = ""; + + if (nvlist_exists(rec, ZPOOL_HIST_TIME)) { + time_t tsec; + struct tm t; + + tsec = fnvlist_lookup_uint64(records[i], + ZPOOL_HIST_TIME); + (void) localtime_r(&tsec, &t); + (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); + } + + if (nvlist_exists(rec, ZPOOL_HIST_CMD)) { + (void) printf("%s %s", tbuf, + fnvlist_lookup_string(rec, ZPOOL_HIST_CMD)); + } else if (nvlist_exists(rec, ZPOOL_HIST_INT_EVENT)) { + int ievent = + fnvlist_lookup_uint64(rec, ZPOOL_HIST_INT_EVENT); + if (!cb->internal) + continue; + if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) { + (void) printf("%s unrecognized record:\n", + tbuf); + dump_nvlist(rec, 4); + continue; + } + (void) printf("%s [internal %s txg:%lld] %s", tbuf, + zfs_history_event_names[ievent], + fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), + fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR)); + } else if (nvlist_exists(rec, ZPOOL_HIST_INT_NAME)) { + if (!cb->internal) + continue; + (void) printf("%s [txg:%lld] %s", tbuf, + fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), + fnvlist_lookup_string(rec, ZPOOL_HIST_INT_NAME)); + if (nvlist_exists(rec, ZPOOL_HIST_DSNAME)) { + (void) printf(" %s (%llu)", + fnvlist_lookup_string(rec, + ZPOOL_HIST_DSNAME), + fnvlist_lookup_uint64(rec, + ZPOOL_HIST_DSID)); + } + (void) printf(" %s", fnvlist_lookup_string(rec, + ZPOOL_HIST_INT_STR)); + } else if (nvlist_exists(rec, ZPOOL_HIST_IOCTL)) { + if (!cb->internal) + continue; + (void) printf("%s ioctl %s\n", tbuf, + fnvlist_lookup_string(rec, ZPOOL_HIST_IOCTL)); + if (nvlist_exists(rec, ZPOOL_HIST_INPUT_NVL)) { + (void) printf(" input:\n"); + dump_nvlist(fnvlist_lookup_nvlist(rec, + ZPOOL_HIST_INPUT_NVL), 8); + } + if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_NVL)) { + (void) printf(" output:\n"); + dump_nvlist(fnvlist_lookup_nvlist(rec, + ZPOOL_HIST_OUTPUT_NVL), 8); + } + if (nvlist_exists(rec, ZPOOL_HIST_ERRNO)) { + (void) printf(" errno: %lld\n", + fnvlist_lookup_int64(rec, + ZPOOL_HIST_ERRNO)); + } + } else { + if (!cb->internal) + continue; + (void) printf("%s unrecognized record:\n", tbuf); + dump_nvlist(rec, 4); + } + + if (!cb->longfmt) { + (void) printf("\n"); + continue; + } + (void) printf(" ["); + if (nvlist_exists(rec, ZPOOL_HIST_WHO)) { + uid_t who = fnvlist_lookup_uint64(rec, ZPOOL_HIST_WHO); + struct passwd *pwd = getpwuid(who); + (void) printf("user %d ", (int)who); + if (pwd != NULL) + (void) printf("(%s) ", pwd->pw_name); + } + if (nvlist_exists(rec, ZPOOL_HIST_HOST)) { + (void) printf("on %s", + fnvlist_lookup_string(rec, ZPOOL_HIST_HOST)); + } + if (nvlist_exists(rec, ZPOOL_HIST_ZONE)) { + (void) printf(":%s", + fnvlist_lookup_string(rec, ZPOOL_HIST_ZONE)); + } + (void) printf("]"); + (void) printf("\n"); + } + (void) printf("\n"); + nvlist_free(nvhis); + + return (ret); +} + +/* + * zpool history <pool> + * + * Displays the history of commands that modified pools. + */ +int +zpool_do_history(int argc, char **argv) +{ + hist_cbdata_t cbdata = { 0 }; + int ret; + int c; + + cbdata.first = B_TRUE; + /* check options */ + while ((c = getopt(argc, argv, "li")) != -1) { + switch (c) { + case 'l': + cbdata.longfmt = B_TRUE; + break; + case 'i': + cbdata.internal = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + argc -= optind; + argv += optind; + + ret = for_each_pool(argc, argv, B_FALSE, NULL, get_history_one, + &cbdata); + + if (argc == 0 && cbdata.first == B_TRUE) { + (void) printf(gettext("no pools available\n")); + return (0); + } + + return (ret); +} + +static int +get_callback(zpool_handle_t *zhp, void *data) +{ + zprop_get_cbdata_t *cbp = (zprop_get_cbdata_t *)data; + char value[MAXNAMELEN]; + zprop_source_t srctype; + zprop_list_t *pl; + + for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { + + /* + * Skip the special fake placeholder. This will also skip + * over the name property when 'all' is specified. + */ + if (pl->pl_prop == ZPOOL_PROP_NAME && + pl == cbp->cb_proplist) + continue; + + if (pl->pl_prop == ZPROP_INVAL && + (zpool_prop_feature(pl->pl_user_prop) || + zpool_prop_unsupported(pl->pl_user_prop))) { + srctype = ZPROP_SRC_LOCAL; + + if (zpool_prop_get_feature(zhp, pl->pl_user_prop, + value, sizeof (value)) == 0) { + zprop_print_one_property(zpool_get_name(zhp), + cbp, pl->pl_user_prop, value, srctype, + NULL, NULL); + } + } else { + if (zpool_get_prop(zhp, pl->pl_prop, value, + sizeof (value), &srctype, cbp->cb_literal) != 0) + continue; + + zprop_print_one_property(zpool_get_name(zhp), cbp, + zpool_prop_to_name(pl->pl_prop), value, srctype, + NULL, NULL); + } + } + return (0); +} + +/* + * zpool get [-Hp] [-o "all" | field[,...]] <"all" | property[,...]> <pool> ... + * + * -H Scripted mode. Don't display headers, and separate properties + * by a single tab. + * -o List of columns to display. Defaults to + * "name,property,value,source". + * -p Diplay values in parsable (exact) format. + * + * Get properties of pools in the system. Output space statistics + * for each one as well as other attributes. + */ +int +zpool_do_get(int argc, char **argv) +{ + zprop_get_cbdata_t cb = { 0 }; + zprop_list_t fake_name = { 0 }; + int ret; + int c, i; + char *value; + + cb.cb_first = B_TRUE; + + /* + * Set up default columns and sources. + */ + cb.cb_sources = ZPROP_SRC_ALL; + cb.cb_columns[0] = GET_COL_NAME; + cb.cb_columns[1] = GET_COL_PROPERTY; + cb.cb_columns[2] = GET_COL_VALUE; + cb.cb_columns[3] = GET_COL_SOURCE; + cb.cb_type = ZFS_TYPE_POOL; + + /* check options */ + while ((c = getopt(argc, argv, ":Hpo:")) != -1) { + switch (c) { + case 'p': + cb.cb_literal = B_TRUE; + break; + case 'H': + cb.cb_scripted = B_TRUE; + break; + case 'o': + bzero(&cb.cb_columns, sizeof (cb.cb_columns)); + i = 0; + while (*optarg != '\0') { + static char *col_subopts[] = + { "name", "property", "value", "source", + "all", NULL }; + + if (i == ZFS_GET_NCOLS) { + (void) fprintf(stderr, gettext("too " + "many fields given to -o " + "option\n")); + usage(B_FALSE); + } + + switch (getsubopt(&optarg, col_subopts, + &value)) { + case 0: + cb.cb_columns[i++] = GET_COL_NAME; + break; + case 1: + cb.cb_columns[i++] = GET_COL_PROPERTY; + break; + case 2: + cb.cb_columns[i++] = GET_COL_VALUE; + break; + case 3: + cb.cb_columns[i++] = GET_COL_SOURCE; + break; + case 4: + if (i > 0) { + (void) fprintf(stderr, + gettext("\"all\" conflicts " + "with specific fields " + "given to -o option\n")); + usage(B_FALSE); + } + cb.cb_columns[0] = GET_COL_NAME; + cb.cb_columns[1] = GET_COL_PROPERTY; + cb.cb_columns[2] = GET_COL_VALUE; + cb.cb_columns[3] = GET_COL_SOURCE; + i = ZFS_GET_NCOLS; + break; + default: + (void) fprintf(stderr, + gettext("invalid column name " + "'%s'\n"), suboptarg); + usage(B_FALSE); + } + } + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing property " + "argument\n")); + usage(B_FALSE); + } + + if (zprop_get_list(g_zfs, argv[0], &cb.cb_proplist, + ZFS_TYPE_POOL) != 0) + usage(B_FALSE); + + argc--; + argv++; + + if (cb.cb_proplist != NULL) { + fake_name.pl_prop = ZPOOL_PROP_NAME; + fake_name.pl_width = strlen(gettext("NAME")); + fake_name.pl_next = cb.cb_proplist; + cb.cb_proplist = &fake_name; + } + + ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist, + get_callback, &cb); + + if (cb.cb_proplist == &fake_name) + zprop_free_list(fake_name.pl_next); + else + zprop_free_list(cb.cb_proplist); + + return (ret); +} + +typedef struct set_cbdata { + char *cb_propname; + char *cb_value; + boolean_t cb_any_successful; +} set_cbdata_t; + +int +set_callback(zpool_handle_t *zhp, void *data) +{ + int error; + set_cbdata_t *cb = (set_cbdata_t *)data; + + error = zpool_set_prop(zhp, cb->cb_propname, cb->cb_value); + + if (!error) + cb->cb_any_successful = B_TRUE; + + return (error); +} + +int +zpool_do_set(int argc, char **argv) +{ + set_cbdata_t cb = { 0 }; + int error; + + if (argc > 1 && argv[1][0] == '-') { + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + argv[1][1]); + usage(B_FALSE); + } + + if (argc < 2) { + (void) fprintf(stderr, gettext("missing property=value " + "argument\n")); + usage(B_FALSE); + } + + if (argc < 3) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(B_FALSE); + } + + if (argc > 3) { + (void) fprintf(stderr, gettext("too many pool names\n")); + usage(B_FALSE); + } + + cb.cb_propname = argv[1]; + cb.cb_value = strchr(cb.cb_propname, '='); + if (cb.cb_value == NULL) { + (void) fprintf(stderr, gettext("missing value in " + "property=value argument\n")); + usage(B_FALSE); + } + + *(cb.cb_value) = '\0'; + cb.cb_value++; + + error = for_each_pool(argc - 2, argv + 2, B_TRUE, NULL, + set_callback, &cb); + + return (error); +} + +static int +find_command_idx(char *command, int *idx) +{ + int i; + + for (i = 0; i < NCOMMAND; i++) { + if (command_table[i].name == NULL) + continue; + + if (strcmp(command, command_table[i].name) == 0) { + *idx = i; + return (0); + } + } + return (1); +} + +int +main(int argc, char **argv) +{ + int ret = 0; + int i; + char *cmdname; + + (void) setlocale(LC_ALL, ""); + (void) textdomain(TEXT_DOMAIN); + + if ((g_zfs = libzfs_init()) == NULL) { + (void) fprintf(stderr, gettext("internal error: failed to " + "initialize ZFS library\n")); + return (1); + } + + libzfs_print_on_error(g_zfs, B_TRUE); + + opterr = 0; + + /* + * Make sure the user has specified some command. + */ + if (argc < 2) { + (void) fprintf(stderr, gettext("missing command\n")); + usage(B_FALSE); + } + + cmdname = argv[1]; + + /* + * Special case '-?' + */ + if (strcmp(cmdname, "-?") == 0) + usage(B_TRUE); + + zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); + + /* + * Run the appropriate command. + */ + if (find_command_idx(cmdname, &i) == 0) { + current_command = &command_table[i]; + ret = command_table[i].func(argc - 1, argv + 1); + } else if (strchr(cmdname, '=')) { + verify(find_command_idx("set", &i) == 0); + current_command = &command_table[i]; + ret = command_table[i].func(argc, argv); + } else if (strcmp(cmdname, "freeze") == 0 && argc == 3) { + /* + * 'freeze' is a vile debugging abomination, so we treat + * it as such. + */ + zfs_cmd_t zc = { 0 }; + (void) strlcpy(zc.zc_name, argv[2], sizeof (zc.zc_name)); + return (!!zfs_ioctl(g_zfs, ZFS_IOC_POOL_FREEZE, &zc)); + } else { + (void) fprintf(stderr, gettext("unrecognized " + "command '%s'\n"), cmdname); + usage(B_FALSE); + } + + if (ret == 0 && log_history) + (void) zpool_log_history(g_zfs, history_str); + + libzfs_fini(g_zfs); + + /* + * The 'ZFS_ABORT' environment variable causes us to dump core on exit + * for the purposes of running ::findleaks. + */ + if (getenv("ZFS_ABORT") != NULL) { + (void) printf("dumping core by request\n"); + abort(); + } + + return (ret); +} diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c new file mode 100644 index 000000000000..c7a002efb17c --- /dev/null +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <errno.h> +#include <libgen.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> + +#include "zpool_util.h" + +/* + * Utility function to guarantee malloc() success. + */ +void * +safe_malloc(size_t size) +{ + void *data; + + if ((data = calloc(1, size)) == NULL) { + (void) fprintf(stderr, "internal error: out of memory\n"); + exit(1); + } + + return (data); +} + +/* + * Display an out of memory error message and abort the current program. + */ +void +zpool_no_memory(void) +{ + assert(errno == ENOMEM); + (void) fprintf(stderr, + gettext("internal error: out of memory\n")); + exit(1); +} + +/* + * Return the number of logs in supplied nvlist + */ +uint_t +num_logs(nvlist_t *nv) +{ + uint_t nlogs = 0; + uint_t c, children; + nvlist_t **child; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return (0); + + for (c = 0; c < children; c++) { + uint64_t is_log = B_FALSE; + + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + if (is_log) + nlogs++; + } + return (nlogs); +} diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h new file mode 100644 index 000000000000..8777edc9de17 --- /dev/null +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef ZPOOL_UTIL_H +#define ZPOOL_UTIL_H + +#include <libnvpair.h> +#include <libzfs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Basic utility functions + */ +void *safe_malloc(size_t); +void zpool_no_memory(void); +uint_t num_logs(nvlist_t *nv); + +/* + * Virtual device functions + */ + +nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, + boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type, + uint64_t boot_size, int argc, char **argv); +nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname, + nvlist_t *props, splitflags_t flags, int argc, char **argv); + +/* + * Pool list functions + */ +int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **, + zpool_iter_f, void *); + +typedef struct zpool_list zpool_list_t; + +zpool_list_t *pool_list_get(int, char **, zprop_list_t **, int *); +void pool_list_update(zpool_list_t *); +int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *); +void pool_list_free(zpool_list_t *); +int pool_list_count(zpool_list_t *); +void pool_list_remove(zpool_list_t *, zpool_handle_t *); + +libzfs_handle_t *g_zfs; + +#ifdef __cplusplus +} +#endif + +#endif /* ZPOOL_UTIL_H */ diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c new file mode 100644 index 000000000000..f72e3f7fc97c --- /dev/null +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c @@ -0,0 +1,1578 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. + * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. + */ + +/* + * Functions to convert between a list of vdevs and an nvlist representing the + * configuration. Each entry in the list can be one of: + * + * Device vdevs + * disk=(path=..., devid=...) + * file=(path=...) + * + * Group vdevs + * raidz[1|2]=(...) + * mirror=(...) + * + * Hot spares + * + * While the underlying implementation supports it, group vdevs cannot contain + * other group vdevs. All userland verification of devices is contained within + * this file. If successful, the nvlist returned can be passed directly to the + * kernel; we've done as much verification as possible in userland. + * + * Hot spares are a special case, and passed down as an array of disk vdevs, at + * the same level as the root of the vdev tree. + * + * The only function exported by this file is 'make_root_vdev'. The + * function performs several passes: + * + * 1. Construct the vdev specification. Performs syntax validation and + * makes sure each device is valid. + * 2. Check for devices in use. Using libdiskmgt, makes sure that no + * devices are also in use. Some can be overridden using the 'force' + * flag, others cannot. + * 3. Check for replication errors if the 'force' flag is not specified. + * validates that the replication level is consistent across the + * entire pool. + * 4. Call libzfs to label any whole disks with an EFI label. + */ + +#include <assert.h> +#include <devid.h> +#include <errno.h> +#include <fcntl.h> +#include <libintl.h> +#include <libnvpair.h> +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <paths.h> +#include <sys/stat.h> +#include <sys/disk.h> +#include <sys/mntent.h> +#include <libgeom.h> + +#include "zpool_util.h" + +#define BACKUP_SLICE "s2" + +/* + * For any given vdev specification, we can have multiple errors. The + * vdev_error() function keeps track of whether we have seen an error yet, and + * prints out a header if its the first error we've seen. + */ +boolean_t error_seen; +boolean_t is_force; + +/*PRINTFLIKE1*/ +static void +vdev_error(const char *fmt, ...) +{ + va_list ap; + + if (!error_seen) { + (void) fprintf(stderr, gettext("invalid vdev specification\n")); + if (!is_force) + (void) fprintf(stderr, gettext("use '-f' to override " + "the following errors:\n")); + else + (void) fprintf(stderr, gettext("the following errors " + "must be manually repaired:\n")); + error_seen = B_TRUE; + } + + va_start(ap, fmt); + (void) vfprintf(stderr, fmt, ap); + va_end(ap); +} + +#ifdef illumos +static void +libdiskmgt_error(int error) +{ + /* + * ENXIO/ENODEV is a valid error message if the device doesn't live in + * /dev/dsk. Don't bother printing an error message in this case. + */ + if (error == ENXIO || error == ENODEV) + return; + + (void) fprintf(stderr, gettext("warning: device in use checking " + "failed: %s\n"), strerror(error)); +} + +/* + * Validate a device, passing the bulk of the work off to libdiskmgt. + */ +static int +check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare) +{ + char *msg; + int error = 0; + dm_who_type_t who; + + if (force) + who = DM_WHO_ZPOOL_FORCE; + else if (isspare) + who = DM_WHO_ZPOOL_SPARE; + else + who = DM_WHO_ZPOOL; + + if (dm_inuse((char *)path, &msg, who, &error) || error) { + if (error != 0) { + libdiskmgt_error(error); + return (0); + } else { + vdev_error("%s", msg); + free(msg); + return (-1); + } + } + + /* + * If we're given a whole disk, ignore overlapping slices since we're + * about to label it anyway. + */ + error = 0; + if (!wholedisk && !force && + (dm_isoverlapping((char *)path, &msg, &error) || error)) { + if (error == 0) { + /* dm_isoverlapping returned -1 */ + vdev_error(gettext("%s overlaps with %s\n"), path, msg); + free(msg); + return (-1); + } else if (error != ENODEV) { + /* libdiskmgt's devcache only handles physical drives */ + libdiskmgt_error(error); + return (0); + } + } + + return (0); +} + + +/* + * Validate a whole disk. Iterate over all slices on the disk and make sure + * that none is in use by calling check_slice(). + */ +static int +check_disk(const char *name, dm_descriptor_t disk, int force, int isspare) +{ + dm_descriptor_t *drive, *media, *slice; + int err = 0; + int i; + int ret; + + /* + * Get the drive associated with this disk. This should never fail, + * because we already have an alias handle open for the device. + */ + if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE, + &err)) == NULL || *drive == NULL) { + if (err) + libdiskmgt_error(err); + return (0); + } + + if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA, + &err)) == NULL) { + dm_free_descriptors(drive); + if (err) + libdiskmgt_error(err); + return (0); + } + + dm_free_descriptors(drive); + + /* + * It is possible that the user has specified a removable media drive, + * and the media is not present. + */ + if (*media == NULL) { + dm_free_descriptors(media); + vdev_error(gettext("'%s' has no media in drive\n"), name); + return (-1); + } + + if ((slice = dm_get_associated_descriptors(*media, DM_SLICE, + &err)) == NULL) { + dm_free_descriptors(media); + if (err) + libdiskmgt_error(err); + return (0); + } + + dm_free_descriptors(media); + + ret = 0; + + /* + * Iterate over all slices and report any errors. We don't care about + * overlapping slices because we are using the whole disk. + */ + for (i = 0; slice[i] != NULL; i++) { + char *name = dm_get_name(slice[i], &err); + + if (check_slice(name, force, B_TRUE, isspare) != 0) + ret = -1; + + dm_free_name(name); + } + + dm_free_descriptors(slice); + return (ret); +} + +/* + * Validate a device. + */ +static int +check_device(const char *path, boolean_t force, boolean_t isspare) +{ + dm_descriptor_t desc; + int err; + char *dev; + + /* + * For whole disks, libdiskmgt does not include the leading dev path. + */ + dev = strrchr(path, '/'); + assert(dev != NULL); + dev++; + if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) { + err = check_disk(path, desc, force, isspare); + dm_free_descriptor(desc); + return (err); + } + + return (check_slice(path, force, B_FALSE, isspare)); +} +#endif /* illumos */ + +/* + * Check that a file is valid. All we can do in this case is check that it's + * not in use by another pool, and not in use by swap. + */ +static int +check_file(const char *file, boolean_t force, boolean_t isspare) +{ + char *name; + int fd; + int ret = 0; + int err; + pool_state_t state; + boolean_t inuse; + +#ifdef illumos + if (dm_inuse_swap(file, &err)) { + if (err) + libdiskmgt_error(err); + else + vdev_error(gettext("%s is currently used by swap. " + "Please see swap(1M).\n"), file); + return (-1); + } +#endif + + if ((fd = open(file, O_RDONLY)) < 0) + return (0); + + if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) { + const char *desc; + + switch (state) { + case POOL_STATE_ACTIVE: + desc = gettext("active"); + break; + + case POOL_STATE_EXPORTED: + desc = gettext("exported"); + break; + + case POOL_STATE_POTENTIALLY_ACTIVE: + desc = gettext("potentially active"); + break; + + default: + desc = gettext("unknown"); + break; + } + + /* + * Allow hot spares to be shared between pools. + */ + if (state == POOL_STATE_SPARE && isspare) + return (0); + + if (state == POOL_STATE_ACTIVE || + state == POOL_STATE_SPARE || !force) { + switch (state) { + case POOL_STATE_SPARE: + vdev_error(gettext("%s is reserved as a hot " + "spare for pool %s\n"), file, name); + break; + default: + vdev_error(gettext("%s is part of %s pool " + "'%s'\n"), file, desc, name); + break; + } + ret = -1; + } + + free(name); + } + + (void) close(fd); + return (ret); +} + +static int +check_device(const char *name, boolean_t force, boolean_t isspare) +{ + char path[MAXPATHLEN]; + + if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) != 0) + snprintf(path, sizeof(path), "%s%s", _PATH_DEV, name); + else + strlcpy(path, name, sizeof(path)); + + return (check_file(path, force, isspare)); +} + +/* + * By "whole disk" we mean an entire physical disk (something we can + * label, toggle the write cache on, etc.) as opposed to the full + * capacity of a pseudo-device such as lofi or did. We act as if we + * are labeling the disk, which should be a pretty good test of whether + * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if + * it isn't. + */ +static boolean_t +is_whole_disk(const char *arg) +{ +#ifdef illumos + struct dk_gpt *label; + int fd; + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), "%s%s%s", + ZFS_RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE); + if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) + return (B_FALSE); + if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { + (void) close(fd); + return (B_FALSE); + } + efi_free(label); + (void) close(fd); + return (B_TRUE); +#else + int fd; + + fd = g_open(arg, 0); + if (fd >= 0) { + g_close(fd); + return (B_TRUE); + } + return (B_FALSE); +#endif +} + +/* + * Create a leaf vdev. Determine if this is a file or a device. If it's a + * device, fill in the device id to make a complete nvlist. Valid forms for a + * leaf vdev are: + * + * /dev/dsk/xxx Complete disk path + * /xxx Full path to file + * xxx Shorthand for /dev/dsk/xxx + */ +static nvlist_t * +make_leaf_vdev(const char *arg, uint64_t is_log) +{ + char path[MAXPATHLEN]; + struct stat64 statbuf; + nvlist_t *vdev = NULL; + char *type = NULL; + boolean_t wholedisk = B_FALSE; + + /* + * Determine what type of vdev this is, and put the full path into + * 'path'. We detect whether this is a device of file afterwards by + * checking the st_mode of the file. + */ + if (arg[0] == '/') { + /* + * Complete device or file path. Exact type is determined by + * examining the file descriptor afterwards. + */ + wholedisk = is_whole_disk(arg); + if (!wholedisk && (stat64(arg, &statbuf) != 0)) { + (void) fprintf(stderr, + gettext("cannot open '%s': %s\n"), + arg, strerror(errno)); + return (NULL); + } + + (void) strlcpy(path, arg, sizeof (path)); + } else { + /* + * This may be a short path for a device, or it could be total + * gibberish. Check to see if it's a known device in + * /dev/dsk/. As part of this check, see if we've been given a + * an entire disk (minus the slice number). + */ + if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + strlcpy(path, arg, sizeof (path)); + else + snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg); + wholedisk = is_whole_disk(path); + if (!wholedisk && (stat64(path, &statbuf) != 0)) { + /* + * If we got ENOENT, then the user gave us + * gibberish, so try to direct them with a + * reasonable error message. Otherwise, + * regurgitate strerror() since it's the best we + * can do. + */ + if (errno == ENOENT) { + (void) fprintf(stderr, + gettext("cannot open '%s': no such " + "GEOM provider\n"), arg); + (void) fprintf(stderr, + gettext("must be a full path or " + "shorthand device name\n")); + return (NULL); + } else { + (void) fprintf(stderr, + gettext("cannot open '%s': %s\n"), + path, strerror(errno)); + return (NULL); + } + } + } + +#ifdef __FreeBSD__ + if (S_ISCHR(statbuf.st_mode)) { + statbuf.st_mode &= ~S_IFCHR; + statbuf.st_mode |= S_IFBLK; + wholedisk = B_FALSE; + } +#endif + + /* + * Determine whether this is a device or a file. + */ + if (wholedisk || S_ISBLK(statbuf.st_mode)) { + type = VDEV_TYPE_DISK; + } else if (S_ISREG(statbuf.st_mode)) { + type = VDEV_TYPE_FILE; + } else { + (void) fprintf(stderr, gettext("cannot use '%s': must be a " + "GEOM provider or regular file\n"), path); + return (NULL); + } + + /* + * Finally, we have the complete device or file, and we know that it is + * acceptable to use. Construct the nvlist to describe this vdev. All + * vdevs have a 'path' element, and devices also have a 'devid' element. + */ + verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0); + verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0); + verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0); + verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0); + if (strcmp(type, VDEV_TYPE_DISK) == 0) + verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, + (uint64_t)wholedisk) == 0); + +#ifdef have_devid + /* + * For a whole disk, defer getting its devid until after labeling it. + */ + if (S_ISBLK(statbuf.st_mode) && !wholedisk) { + /* + * Get the devid for the device. + */ + int fd; + ddi_devid_t devid; + char *minor = NULL, *devid_str = NULL; + + if ((fd = open(path, O_RDONLY)) < 0) { + (void) fprintf(stderr, gettext("cannot open '%s': " + "%s\n"), path, strerror(errno)); + nvlist_free(vdev); + return (NULL); + } + + if (devid_get(fd, &devid) == 0) { + if (devid_get_minor_name(fd, &minor) == 0 && + (devid_str = devid_str_encode(devid, minor)) != + NULL) { + verify(nvlist_add_string(vdev, + ZPOOL_CONFIG_DEVID, devid_str) == 0); + } + if (devid_str != NULL) + devid_str_free(devid_str); + if (minor != NULL) + devid_str_free(minor); + devid_free(devid); + } + + (void) close(fd); + } +#endif + + return (vdev); +} + +/* + * Go through and verify the replication level of the pool is consistent. + * Performs the following checks: + * + * For the new spec, verifies that devices in mirrors and raidz are the + * same size. + * + * If the current configuration already has inconsistent replication + * levels, ignore any other potential problems in the new spec. + * + * Otherwise, make sure that the current spec (if there is one) and the new + * spec have consistent replication levels. + */ +typedef struct replication_level { + char *zprl_type; + uint64_t zprl_children; + uint64_t zprl_parity; +} replication_level_t; + +#define ZPOOL_FUZZ (16 * 1024 * 1024) + +/* + * Given a list of toplevel vdevs, return the current replication level. If + * the config is inconsistent, then NULL is returned. If 'fatal' is set, then + * an error message will be displayed for each self-inconsistent vdev. + */ +static replication_level_t * +get_replication(nvlist_t *nvroot, boolean_t fatal) +{ + nvlist_t **top; + uint_t t, toplevels; + nvlist_t **child; + uint_t c, children; + nvlist_t *nv; + char *type; + replication_level_t lastrep = {0}; + replication_level_t rep; + replication_level_t *ret; + boolean_t dontreport; + + ret = safe_malloc(sizeof (replication_level_t)); + + verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &top, &toplevels) == 0); + + for (t = 0; t < toplevels; t++) { + uint64_t is_log = B_FALSE; + + nv = top[t]; + + /* + * For separate logs we ignore the top level vdev replication + * constraints. + */ + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); + if (is_log) + continue; + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, + &type) == 0); + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + /* + * This is a 'file' or 'disk' vdev. + */ + rep.zprl_type = type; + rep.zprl_children = 1; + rep.zprl_parity = 0; + } else { + uint64_t vdev_size; + + /* + * This is a mirror or RAID-Z vdev. Go through and make + * sure the contents are all the same (files vs. disks), + * keeping track of the number of elements in the + * process. + * + * We also check that the size of each vdev (if it can + * be determined) is the same. + */ + rep.zprl_type = type; + rep.zprl_children = 0; + + if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { + verify(nvlist_lookup_uint64(nv, + ZPOOL_CONFIG_NPARITY, + &rep.zprl_parity) == 0); + assert(rep.zprl_parity != 0); + } else { + rep.zprl_parity = 0; + } + + /* + * The 'dontreport' variable indicates that we've + * already reported an error for this spec, so don't + * bother doing it again. + */ + type = NULL; + dontreport = 0; + vdev_size = -1ULL; + for (c = 0; c < children; c++) { + boolean_t is_replacing, is_spare; + nvlist_t *cnv = child[c]; + char *path; + struct stat64 statbuf; + uint64_t size = -1ULL; + char *childtype; + int fd, err; + + rep.zprl_children++; + + verify(nvlist_lookup_string(cnv, + ZPOOL_CONFIG_TYPE, &childtype) == 0); + + /* + * If this is a replacing or spare vdev, then + * get the real first child of the vdev. + */ + is_replacing = strcmp(childtype, + VDEV_TYPE_REPLACING) == 0; + is_spare = strcmp(childtype, + VDEV_TYPE_SPARE) == 0; + if (is_replacing || is_spare) { + nvlist_t **rchild; + uint_t rchildren; + + verify(nvlist_lookup_nvlist_array(cnv, + ZPOOL_CONFIG_CHILDREN, &rchild, + &rchildren) == 0); + assert((is_replacing && rchildren == 2) + || (is_spare && rchildren >= 2)); + cnv = rchild[0]; + + verify(nvlist_lookup_string(cnv, + ZPOOL_CONFIG_TYPE, + &childtype) == 0); + if (strcmp(childtype, + VDEV_TYPE_SPARE) == 0) { + /* We have a replacing vdev with + * a spare child. Get the first + * real child of the spare + */ + verify( + nvlist_lookup_nvlist_array( + cnv, + ZPOOL_CONFIG_CHILDREN, + &rchild, + &rchildren) == 0); + assert(rchildren >= 2); + cnv = rchild[0]; + } + } + + verify(nvlist_lookup_string(cnv, + ZPOOL_CONFIG_PATH, &path) == 0); + + /* + * If we have a raidz/mirror that combines disks + * with files, report it as an error. + */ + if (!dontreport && type != NULL && + strcmp(type, childtype) != 0) { + if (ret != NULL) + free(ret); + ret = NULL; + if (fatal) + vdev_error(gettext( + "mismatched replication " + "level: %s contains both " + "files and devices\n"), + rep.zprl_type); + else + return (NULL); + dontreport = B_TRUE; + } + + /* + * According to stat(2), the value of 'st_size' + * is undefined for block devices and character + * devices. But there is no effective way to + * determine the real size in userland. + * + * Instead, we'll take advantage of an + * implementation detail of spec_size(). If the + * device is currently open, then we (should) + * return a valid size. + * + * If we still don't get a valid size (indicated + * by a size of 0 or MAXOFFSET_T), then ignore + * this device altogether. + */ + if ((fd = open(path, O_RDONLY)) >= 0) { + err = fstat64(fd, &statbuf); + (void) close(fd); + } else { + err = stat64(path, &statbuf); + } + + if (err != 0 || + statbuf.st_size == 0 || + statbuf.st_size == MAXOFFSET_T) + continue; + + size = statbuf.st_size; + + /* + * Also make sure that devices and + * slices have a consistent size. If + * they differ by a significant amount + * (~16MB) then report an error. + */ + if (!dontreport && + (vdev_size != -1ULL && + (labs(size - vdev_size) > + ZPOOL_FUZZ))) { + if (ret != NULL) + free(ret); + ret = NULL; + if (fatal) + vdev_error(gettext( + "%s contains devices of " + "different sizes\n"), + rep.zprl_type); + else + return (NULL); + dontreport = B_TRUE; + } + + type = childtype; + vdev_size = size; + } + } + + /* + * At this point, we have the replication of the last toplevel + * vdev in 'rep'. Compare it to 'lastrep' to see if its + * different. + */ + if (lastrep.zprl_type != NULL) { + if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) { + if (ret != NULL) + free(ret); + ret = NULL; + if (fatal) + vdev_error(gettext( + "mismatched replication level: " + "both %s and %s vdevs are " + "present\n"), + lastrep.zprl_type, rep.zprl_type); + else + return (NULL); + } else if (lastrep.zprl_parity != rep.zprl_parity) { + if (ret) + free(ret); + ret = NULL; + if (fatal) + vdev_error(gettext( + "mismatched replication level: " + "both %llu and %llu device parity " + "%s vdevs are present\n"), + lastrep.zprl_parity, + rep.zprl_parity, + rep.zprl_type); + else + return (NULL); + } else if (lastrep.zprl_children != rep.zprl_children) { + if (ret) + free(ret); + ret = NULL; + if (fatal) + vdev_error(gettext( + "mismatched replication level: " + "both %llu-way and %llu-way %s " + "vdevs are present\n"), + lastrep.zprl_children, + rep.zprl_children, + rep.zprl_type); + else + return (NULL); + } + } + lastrep = rep; + } + + if (ret != NULL) + *ret = rep; + + return (ret); +} + +/* + * Check the replication level of the vdev spec against the current pool. Calls + * get_replication() to make sure the new spec is self-consistent. If the pool + * has a consistent replication level, then we ignore any errors. Otherwise, + * report any difference between the two. + */ +static int +check_replication(nvlist_t *config, nvlist_t *newroot) +{ + nvlist_t **child; + uint_t children; + replication_level_t *current = NULL, *new; + int ret; + + /* + * If we have a current pool configuration, check to see if it's + * self-consistent. If not, simply return success. + */ + if (config != NULL) { + nvlist_t *nvroot; + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if ((current = get_replication(nvroot, B_FALSE)) == NULL) + return (0); + } + /* + * for spares there may be no children, and therefore no + * replication level to check + */ + if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) || (children == 0)) { + free(current); + return (0); + } + + /* + * If all we have is logs then there's no replication level to check. + */ + if (num_logs(newroot) == children) { + free(current); + return (0); + } + + /* + * Get the replication level of the new vdev spec, reporting any + * inconsistencies found. + */ + if ((new = get_replication(newroot, B_TRUE)) == NULL) { + free(current); + return (-1); + } + + /* + * Check to see if the new vdev spec matches the replication level of + * the current pool. + */ + ret = 0; + if (current != NULL) { + if (strcmp(current->zprl_type, new->zprl_type) != 0) { + vdev_error(gettext( + "mismatched replication level: pool uses %s " + "and new vdev is %s\n"), + current->zprl_type, new->zprl_type); + ret = -1; + } else if (current->zprl_parity != new->zprl_parity) { + vdev_error(gettext( + "mismatched replication level: pool uses %llu " + "device parity and new vdev uses %llu\n"), + current->zprl_parity, new->zprl_parity); + ret = -1; + } else if (current->zprl_children != new->zprl_children) { + vdev_error(gettext( + "mismatched replication level: pool uses %llu-way " + "%s and new vdev uses %llu-way %s\n"), + current->zprl_children, current->zprl_type, + new->zprl_children, new->zprl_type); + ret = -1; + } + } + + free(new); + if (current != NULL) + free(current); + + return (ret); +} + +#ifdef illumos +/* + * Go through and find any whole disks in the vdev specification, labelling them + * as appropriate. When constructing the vdev spec, we were unable to open this + * device in order to provide a devid. Now that we have labelled the disk and + * know the pool slice is valid, we can construct the devid now. + * + * If the disk was already labeled with an EFI label, we will have gotten the + * devid already (because we were able to open the whole disk). Otherwise, we + * need to get the devid after we label the disk. + */ +static int +make_disks(zpool_handle_t *zhp, nvlist_t *nv, zpool_boot_label_t boot_type, + uint64_t boot_size) +{ + nvlist_t **child; + uint_t c, children; + char *type, *path, *diskname; + char buf[MAXPATHLEN]; + uint64_t wholedisk; + int fd; + int ret; + int slice; + ddi_devid_t devid; + char *minor = NULL, *devid_str = NULL; + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + + if (strcmp(type, VDEV_TYPE_DISK) != 0) + return (0); + + /* + * We have a disk device. Get the path to the device + * and see if it's a whole disk by appending the backup + * slice and stat()ing the device. + */ + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); + + diskname = strrchr(path, '/'); + assert(diskname != NULL); + diskname++; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk) != 0 || !wholedisk) { + /* + * This is not whole disk, return error if + * boot partition creation was requested + */ + if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { + (void) fprintf(stderr, + gettext("creating boot partition is only " + "supported on whole disk vdevs: %s\n"), + diskname); + return (-1); + } + return (0); + } + + ret = zpool_label_disk(g_zfs, zhp, diskname, boot_type, + boot_size, &slice); + if (ret == -1) + return (ret); + + /* + * Fill in the devid, now that we've labeled the disk. + */ + (void) snprintf(buf, sizeof (buf), "%ss%d", path, slice); + if ((fd = open(buf, O_RDONLY)) < 0) { + (void) fprintf(stderr, + gettext("cannot open '%s': %s\n"), + buf, strerror(errno)); + return (-1); + } + + if (devid_get(fd, &devid) == 0) { + if (devid_get_minor_name(fd, &minor) == 0 && + (devid_str = devid_str_encode(devid, minor)) != + NULL) { + verify(nvlist_add_string(nv, + ZPOOL_CONFIG_DEVID, devid_str) == 0); + } + if (devid_str != NULL) + devid_str_free(devid_str); + if (minor != NULL) + devid_str_free(minor); + devid_free(devid); + } + + /* + * Update the path to refer to the pool slice. The presence of + * the 'whole_disk' field indicates to the CLI that we should + * chop off the slice number when displaying the device in + * future output. + */ + verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0); + + (void) close(fd); + + return (0); + } + + /* illumos kernel does not support booting from multi-vdev pools. */ + if ((boot_type == ZPOOL_CREATE_BOOT_LABEL)) { + if ((strcmp(type, VDEV_TYPE_ROOT) == 0) && children > 1) { + (void) fprintf(stderr, gettext("boot pool " + "can not have more than one vdev\n")); + return (-1); + } + } + + for (c = 0; c < children; c++) { + ret = make_disks(zhp, child[c], boot_type, boot_size); + if (ret != 0) + return (ret); + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) + for (c = 0; c < children; c++) { + ret = make_disks(zhp, child[c], boot_type, boot_size); + if (ret != 0) + return (ret); + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0) + for (c = 0; c < children; c++) { + ret = make_disks(zhp, child[c], boot_type, boot_size); + if (ret != 0) + return (ret); + } + + return (0); +} +#endif /* illumos */ + +/* + * Determine if the given path is a hot spare within the given configuration. + */ +static boolean_t +is_spare(nvlist_t *config, const char *path) +{ + int fd; + pool_state_t state; + char *name = NULL; + nvlist_t *label; + uint64_t guid, spareguid; + nvlist_t *nvroot; + nvlist_t **spares; + uint_t i, nspares; + boolean_t inuse; + + if ((fd = open(path, O_RDONLY)) < 0) + return (B_FALSE); + + if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || + !inuse || + state != POOL_STATE_SPARE || + zpool_read_label(fd, &label) != 0) { + free(name); + (void) close(fd); + return (B_FALSE); + } + free(name); + (void) close(fd); + + verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0); + nvlist_free(label); + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + for (i = 0; i < nspares; i++) { + verify(nvlist_lookup_uint64(spares[i], + ZPOOL_CONFIG_GUID, &spareguid) == 0); + if (spareguid == guid) + return (B_TRUE); + } + } + + return (B_FALSE); +} + +/* + * Go through and find any devices that are in use. We rely on libdiskmgt for + * the majority of this task. + */ +static boolean_t +is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, + boolean_t replacing, boolean_t isspare) +{ + nvlist_t **child; + uint_t c, children; + char *type, *path; + int ret = 0; + char buf[MAXPATHLEN]; + uint64_t wholedisk; + boolean_t anyinuse = B_FALSE; + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); + + /* + * As a generic check, we look to see if this is a replace of a + * hot spare within the same pool. If so, we allow it + * regardless of what libdiskmgt or zpool_in_use() says. + */ + if (replacing) { +#ifdef illumos + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk) == 0 && wholedisk) + (void) snprintf(buf, sizeof (buf), "%ss0", + path); + else +#endif + (void) strlcpy(buf, path, sizeof (buf)); + + if (is_spare(config, buf)) + return (B_FALSE); + } + + if (strcmp(type, VDEV_TYPE_DISK) == 0) + ret = check_device(path, force, isspare); + else if (strcmp(type, VDEV_TYPE_FILE) == 0) + ret = check_file(path, force, isspare); + + return (ret != 0); + } + + for (c = 0; c < children; c++) + if (is_device_in_use(config, child[c], force, replacing, + B_FALSE)) + anyinuse = B_TRUE; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) + for (c = 0; c < children; c++) + if (is_device_in_use(config, child[c], force, replacing, + B_TRUE)) + anyinuse = B_TRUE; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0) + for (c = 0; c < children; c++) + if (is_device_in_use(config, child[c], force, replacing, + B_FALSE)) + anyinuse = B_TRUE; + + return (anyinuse); +} + +static const char * +is_grouping(const char *type, int *mindev, int *maxdev) +{ + if (strncmp(type, "raidz", 5) == 0) { + const char *p = type + 5; + char *end; + long nparity; + + if (*p == '\0') { + nparity = 1; + } else if (*p == '0') { + return (NULL); /* no zero prefixes allowed */ + } else { + errno = 0; + nparity = strtol(p, &end, 10); + if (errno != 0 || nparity < 1 || nparity >= 255 || + *end != '\0') + return (NULL); + } + + if (mindev != NULL) + *mindev = nparity + 1; + if (maxdev != NULL) + *maxdev = 255; + return (VDEV_TYPE_RAIDZ); + } + + if (maxdev != NULL) + *maxdev = INT_MAX; + + if (strcmp(type, "mirror") == 0) { + if (mindev != NULL) + *mindev = 2; + return (VDEV_TYPE_MIRROR); + } + + if (strcmp(type, "spare") == 0) { + if (mindev != NULL) + *mindev = 1; + return (VDEV_TYPE_SPARE); + } + + if (strcmp(type, "log") == 0) { + if (mindev != NULL) + *mindev = 1; + return (VDEV_TYPE_LOG); + } + + if (strcmp(type, "cache") == 0) { + if (mindev != NULL) + *mindev = 1; + return (VDEV_TYPE_L2CACHE); + } + + return (NULL); +} + +/* + * Construct a syntactically valid vdev specification, + * and ensure that all devices and files exist and can be opened. + * Note: we don't bother freeing anything in the error paths + * because the program is just going to exit anyway. + */ +nvlist_t * +construct_spec(int argc, char **argv) +{ + nvlist_t *nvroot, *nv, **top, **spares, **l2cache; + int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache; + const char *type; + uint64_t is_log; + boolean_t seen_logs; + + top = NULL; + toplevels = 0; + spares = NULL; + l2cache = NULL; + nspares = 0; + nlogs = 0; + nl2cache = 0; + is_log = B_FALSE; + seen_logs = B_FALSE; + + while (argc > 0) { + nv = NULL; + + /* + * If it's a mirror or raidz, the subsequent arguments are + * its leaves -- until we encounter the next mirror or raidz. + */ + if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) { + nvlist_t **child = NULL; + int c, children = 0; + + if (strcmp(type, VDEV_TYPE_SPARE) == 0) { + if (spares != NULL) { + (void) fprintf(stderr, + gettext("invalid vdev " + "specification: 'spare' can be " + "specified only once\n")); + return (NULL); + } + is_log = B_FALSE; + } + + if (strcmp(type, VDEV_TYPE_LOG) == 0) { + if (seen_logs) { + (void) fprintf(stderr, + gettext("invalid vdev " + "specification: 'log' can be " + "specified only once\n")); + return (NULL); + } + seen_logs = B_TRUE; + is_log = B_TRUE; + argc--; + argv++; + /* + * A log is not a real grouping device. + * We just set is_log and continue. + */ + continue; + } + + if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { + if (l2cache != NULL) { + (void) fprintf(stderr, + gettext("invalid vdev " + "specification: 'cache' can be " + "specified only once\n")); + return (NULL); + } + is_log = B_FALSE; + } + + if (is_log) { + if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { + (void) fprintf(stderr, + gettext("invalid vdev " + "specification: unsupported 'log' " + "device: %s\n"), type); + return (NULL); + } + nlogs++; + } + + for (c = 1; c < argc; c++) { + if (is_grouping(argv[c], NULL, NULL) != NULL) + break; + children++; + child = realloc(child, + children * sizeof (nvlist_t *)); + if (child == NULL) + zpool_no_memory(); + if ((nv = make_leaf_vdev(argv[c], B_FALSE)) + == NULL) + return (NULL); + child[children - 1] = nv; + } + + if (children < mindev) { + (void) fprintf(stderr, gettext("invalid vdev " + "specification: %s requires at least %d " + "devices\n"), argv[0], mindev); + return (NULL); + } + + if (children > maxdev) { + (void) fprintf(stderr, gettext("invalid vdev " + "specification: %s supports no more than " + "%d devices\n"), argv[0], maxdev); + return (NULL); + } + + argc -= c; + argv += c; + + if (strcmp(type, VDEV_TYPE_SPARE) == 0) { + spares = child; + nspares = children; + continue; + } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { + l2cache = child; + nl2cache = children; + continue; + } else { + verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, + 0) == 0); + verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, + type) == 0); + verify(nvlist_add_uint64(nv, + ZPOOL_CONFIG_IS_LOG, is_log) == 0); + if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { + verify(nvlist_add_uint64(nv, + ZPOOL_CONFIG_NPARITY, + mindev - 1) == 0); + } + verify(nvlist_add_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, child, + children) == 0); + + for (c = 0; c < children; c++) + nvlist_free(child[c]); + free(child); + } + } else { + /* + * We have a device. Pass off to make_leaf_vdev() to + * construct the appropriate nvlist describing the vdev. + */ + if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL) + return (NULL); + if (is_log) + nlogs++; + argc--; + argv++; + } + + toplevels++; + top = realloc(top, toplevels * sizeof (nvlist_t *)); + if (top == NULL) + zpool_no_memory(); + top[toplevels - 1] = nv; + } + + if (toplevels == 0 && nspares == 0 && nl2cache == 0) { + (void) fprintf(stderr, gettext("invalid vdev " + "specification: at least one toplevel vdev must be " + "specified\n")); + return (NULL); + } + + if (seen_logs && nlogs == 0) { + (void) fprintf(stderr, gettext("invalid vdev specification: " + "log requires at least 1 device\n")); + return (NULL); + } + + /* + * Finally, create nvroot and add all top-level vdevs to it. + */ + verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0); + verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_ROOT) == 0); + verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + top, toplevels) == 0); + if (nspares != 0) + verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + spares, nspares) == 0); + if (nl2cache != 0) + verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, + l2cache, nl2cache) == 0); + + for (t = 0; t < toplevels; t++) + nvlist_free(top[t]); + for (t = 0; t < nspares; t++) + nvlist_free(spares[t]); + for (t = 0; t < nl2cache; t++) + nvlist_free(l2cache[t]); + if (spares) + free(spares); + if (l2cache) + free(l2cache); + free(top); + + return (nvroot); +} + +nvlist_t * +split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, + splitflags_t flags, int argc, char **argv) +{ + nvlist_t *newroot = NULL, **child; + uint_t c, children; +#ifdef illumos + zpool_boot_label_t boot_type; +#endif + + if (argc > 0) { + if ((newroot = construct_spec(argc, argv)) == NULL) { + (void) fprintf(stderr, gettext("Unable to build a " + "pool from the specified devices\n")); + return (NULL); + } + +#ifdef illumos + if (zpool_is_bootable(zhp)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + + if (!flags.dryrun && + make_disks(zhp, newroot, boot_type, 0) != 0) { + nvlist_free(newroot); + return (NULL); + } +#endif + + /* avoid any tricks in the spec */ + verify(nvlist_lookup_nvlist_array(newroot, + ZPOOL_CONFIG_CHILDREN, &child, &children) == 0); + for (c = 0; c < children; c++) { + char *path; + const char *type; + int min, max; + + verify(nvlist_lookup_string(child[c], + ZPOOL_CONFIG_PATH, &path) == 0); + if ((type = is_grouping(path, &min, &max)) != NULL) { + (void) fprintf(stderr, gettext("Cannot use " + "'%s' as a device for splitting\n"), type); + nvlist_free(newroot); + return (NULL); + } + } + } + + if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) { + nvlist_free(newroot); + return (NULL); + } + + return (newroot); +} + +/* + * Get and validate the contents of the given vdev specification. This ensures + * that the nvlist returned is well-formed, that all the devices exist, and that + * they are not currently in use by any other known consumer. The 'poolconfig' + * parameter is the current configuration of the pool when adding devices + * existing pool, and is used to perform additional checks, such as changing the + * replication level of the pool. It can be 'NULL' to indicate that this is a + * new pool. The 'force' flag controls whether devices should be forcefully + * added, even if they appear in use. + */ +nvlist_t * +make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, + boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type, + uint64_t boot_size, int argc, char **argv) +{ + nvlist_t *newroot; + nvlist_t *poolconfig = NULL; + is_force = force; + + /* + * Construct the vdev specification. If this is successful, we know + * that we have a valid specification, and that all devices can be + * opened. + */ + if ((newroot = construct_spec(argc, argv)) == NULL) + return (NULL); + + if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL)) + return (NULL); + + /* + * Validate each device to make sure that its not shared with another + * subsystem. We do this even if 'force' is set, because there are some + * uses (such as a dedicated dump device) that even '-f' cannot + * override. + */ + if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) { + nvlist_free(newroot); + return (NULL); + } + + /* + * Check the replication level of the given vdevs and report any errors + * found. We include the existing pool spec, if any, as we need to + * catch changes against the existing replication level. + */ + if (check_rep && check_replication(poolconfig, newroot) != 0) { + nvlist_free(newroot); + return (NULL); + } + +#ifdef illumos + /* + * Run through the vdev specification and label any whole disks found. + */ + if (!dryrun && make_disks(zhp, newroot, boot_type, boot_size) != 0) { + nvlist_free(newroot); + return (NULL); + } +#endif + + return (newroot); +} |