aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/lib/libzfs/libzfs_status.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/lib/libzfs/libzfs_status.c')
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_status.c508
1 files changed, 508 insertions, 0 deletions
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c
new file mode 100644
index 000000000000..435937041608
--- /dev/null
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c
@@ -0,0 +1,508 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
+ */
+
+/*
+ * This file contains the functions which analyze the status of a pool. This
+ * include both the status of an active pool, as well as the status exported
+ * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of
+ * the pool. This status is independent (to a certain degree) from the state of
+ * the pool. A pool's state describes only whether or not it is capable of
+ * providing the necessary fault tolerance for data. The status describes the
+ * overall status of devices. A pool that is online can still have a device
+ * that is experiencing errors.
+ *
+ * Only a subset of the possible faults can be detected using 'zpool status',
+ * and not all possible errors correspond to a FMA message ID. The explanation
+ * is left up to the caller, depending on whether it is a live pool or an
+ * import.
+ */
+
+#include <libzfs.h>
+#include <libzutil.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/systeminfo.h>
+#include "libzfs_impl.h"
+#include "zfeature_common.h"
+
+/*
+ * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines
+ * in include/libzfs.h. Note that there are some status results which go past
+ * the end of this table, and hence have no associated message ID.
+ */
+static char *zfs_msgid_table[] = {
+ "ZFS-8000-14", /* ZPOOL_STATUS_CORRUPT_CACHE */
+ "ZFS-8000-2Q", /* ZPOOL_STATUS_MISSING_DEV_R */
+ "ZFS-8000-3C", /* ZPOOL_STATUS_MISSING_DEV_NR */
+ "ZFS-8000-4J", /* ZPOOL_STATUS_CORRUPT_LABEL_R */
+ "ZFS-8000-5E", /* ZPOOL_STATUS_CORRUPT_LABEL_NR */
+ "ZFS-8000-6X", /* ZPOOL_STATUS_BAD_GUID_SUM */
+ "ZFS-8000-72", /* ZPOOL_STATUS_CORRUPT_POOL */
+ "ZFS-8000-8A", /* ZPOOL_STATUS_CORRUPT_DATA */
+ "ZFS-8000-9P", /* ZPOOL_STATUS_FAILING_DEV */
+ "ZFS-8000-A5", /* ZPOOL_STATUS_VERSION_NEWER */
+ "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_MISMATCH */
+ "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_ACTIVE */
+ "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_REQUIRED */
+ "ZFS-8000-HC", /* ZPOOL_STATUS_IO_FAILURE_WAIT */
+ "ZFS-8000-JQ", /* ZPOOL_STATUS_IO_FAILURE_CONTINUE */
+ "ZFS-8000-MM", /* ZPOOL_STATUS_IO_FAILURE_MMP */
+ "ZFS-8000-K4", /* ZPOOL_STATUS_BAD_LOG */
+ "ZFS-8000-ER", /* ZPOOL_STATUS_ERRATA */
+ /*
+ * The following results have no message ID.
+ * ZPOOL_STATUS_UNSUP_FEAT_READ
+ * ZPOOL_STATUS_UNSUP_FEAT_WRITE
+ * ZPOOL_STATUS_FAULTED_DEV_R
+ * ZPOOL_STATUS_FAULTED_DEV_NR
+ * ZPOOL_STATUS_VERSION_OLDER
+ * ZPOOL_STATUS_FEAT_DISABLED
+ * ZPOOL_STATUS_RESILVERING
+ * ZPOOL_STATUS_OFFLINE_DEV
+ * ZPOOL_STATUS_REMOVED_DEV
+ * ZPOOL_STATUS_REBUILDING
+ * ZPOOL_STATUS_REBUILD_SCRUB
+ * ZPOOL_STATUS_OK
+ */
+};
+
+#define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
+
+/* ARGSUSED */
+static int
+vdev_missing(vdev_stat_t *vs, uint_t vsc)
+{
+ return (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+ vs->vs_aux == VDEV_AUX_OPEN_FAILED);
+}
+
+/* ARGSUSED */
+static int
+vdev_faulted(vdev_stat_t *vs, uint_t vsc)
+{
+ return (vs->vs_state == VDEV_STATE_FAULTED);
+}
+
+/* ARGSUSED */
+static int
+vdev_errors(vdev_stat_t *vs, uint_t vsc)
+{
+ return (vs->vs_state == VDEV_STATE_DEGRADED ||
+ vs->vs_read_errors != 0 || vs->vs_write_errors != 0 ||
+ vs->vs_checksum_errors != 0);
+}
+
+/* ARGSUSED */
+static int
+vdev_broken(vdev_stat_t *vs, uint_t vsc)
+{
+ return (vs->vs_state == VDEV_STATE_CANT_OPEN);
+}
+
+/* ARGSUSED */
+static int
+vdev_offlined(vdev_stat_t *vs, uint_t vsc)
+{
+ return (vs->vs_state == VDEV_STATE_OFFLINE);
+}
+
+/* ARGSUSED */
+static int
+vdev_removed(vdev_stat_t *vs, uint_t vsc)
+{
+ return (vs->vs_state == VDEV_STATE_REMOVED);
+}
+
+static int
+vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc)
+{
+ if (getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") != NULL)
+ return (0);
+
+ return (VDEV_STAT_VALID(vs_physical_ashift, vsc) &&
+ vs->vs_configured_ashift < vs->vs_physical_ashift);
+}
+
+/*
+ * Detect if any leaf devices that have seen errors or could not be opened.
+ */
+static boolean_t
+find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t),
+ boolean_t ignore_replacing)
+{
+ nvlist_t **child;
+ vdev_stat_t *vs;
+ uint_t c, vsc, children;
+
+ /*
+ * Ignore problems within a 'replacing' vdev, since we're presumably in
+ * the process of repairing any such errors, and don't want to call them
+ * out again. We'll pick up the fact that a resilver is happening
+ * later.
+ */
+ if (ignore_replacing == B_TRUE) {
+ char *type;
+
+ verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE,
+ &type) == 0);
+ if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
+ return (B_FALSE);
+ }
+
+ if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
+ &children) == 0) {
+ for (c = 0; c < children; c++)
+ if (find_vdev_problem(child[c], func, ignore_replacing))
+ return (B_TRUE);
+ } else {
+ verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t **)&vs, &vsc) == 0);
+
+ if (func(vs, vsc) != 0)
+ return (B_TRUE);
+ }
+
+ /*
+ * Check any L2 cache devs
+ */
+ if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child,
+ &children) == 0) {
+ for (c = 0; c < children; c++)
+ if (find_vdev_problem(child[c], func, ignore_replacing))
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Active pool health status.
+ *
+ * To determine the status for a pool, we make several passes over the config,
+ * picking the most egregious error we find. In order of importance, we do the
+ * following:
+ *
+ * - Check for a complete and valid configuration
+ * - Look for any faulted or missing devices in a non-replicated config
+ * - Check for any data errors
+ * - Check for any faulted or missing devices in a replicated config
+ * - Look for any devices showing errors
+ * - Check for any resilvering or rebuilding devices
+ *
+ * There can obviously be multiple errors within a single pool, so this routine
+ * only picks the most damaging of all the current errors to report.
+ */
+static zpool_status_t
+check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap)
+{
+ nvlist_t *nvroot;
+ vdev_stat_t *vs;
+ pool_scan_stat_t *ps = NULL;
+ uint_t vsc, psc;
+ uint64_t nerr;
+ uint64_t version;
+ uint64_t stateval;
+ uint64_t suspended;
+ uint64_t hostid = 0;
+ uint64_t errata = 0;
+ unsigned long system_hostid = get_system_hostid();
+
+ verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+ &version) == 0);
+ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+ verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t **)&vs, &vsc) == 0);
+ verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+ &stateval) == 0);
+
+ /*
+ * Currently resilvering a vdev
+ */
+ (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
+ (uint64_t **)&ps, &psc);
+ if (ps != NULL && ps->pss_func == POOL_SCAN_RESILVER &&
+ ps->pss_state == DSS_SCANNING)
+ return (ZPOOL_STATUS_RESILVERING);
+
+ /*
+ * Currently rebuilding a vdev, check top-level vdevs.
+ */
+ vdev_rebuild_stat_t *vrs = NULL;
+ nvlist_t **child;
+ uint_t c, i, children;
+ uint64_t rebuild_end_time = 0;
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++) {
+ if ((nvlist_lookup_uint64_array(child[c],
+ ZPOOL_CONFIG_REBUILD_STATS,
+ (uint64_t **)&vrs, &i) == 0) && (vrs != NULL)) {
+ uint64_t state = vrs->vrs_state;
+
+ if (state == VDEV_REBUILD_ACTIVE) {
+ return (ZPOOL_STATUS_REBUILDING);
+ } else if (state == VDEV_REBUILD_COMPLETE &&
+ vrs->vrs_end_time > rebuild_end_time) {
+ rebuild_end_time = vrs->vrs_end_time;
+ }
+ }
+ }
+
+ /*
+ * If we can determine when the last scrub was run, and it
+ * was before the last rebuild completed, then recommend
+ * that the pool be scrubbed to verify all checksums. When
+ * ps is NULL we can infer the pool has never been scrubbed.
+ */
+ if (rebuild_end_time > 0) {
+ if (ps != NULL) {
+ if ((ps->pss_state == DSS_FINISHED &&
+ ps->pss_func == POOL_SCAN_SCRUB &&
+ rebuild_end_time > ps->pss_end_time) ||
+ ps->pss_state == DSS_NONE)
+ return (ZPOOL_STATUS_REBUILD_SCRUB);
+ } else {
+ return (ZPOOL_STATUS_REBUILD_SCRUB);
+ }
+ }
+ }
+
+ /*
+ * The multihost property is set and the pool may be active.
+ */
+ if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+ vs->vs_aux == VDEV_AUX_ACTIVE) {
+ mmp_state_t mmp_state;
+ nvlist_t *nvinfo;
+
+ nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
+ mmp_state = fnvlist_lookup_uint64(nvinfo,
+ ZPOOL_CONFIG_MMP_STATE);
+
+ if (mmp_state == MMP_STATE_ACTIVE)
+ return (ZPOOL_STATUS_HOSTID_ACTIVE);
+ else if (mmp_state == MMP_STATE_NO_HOSTID)
+ return (ZPOOL_STATUS_HOSTID_REQUIRED);
+ else
+ return (ZPOOL_STATUS_HOSTID_MISMATCH);
+ }
+
+ /*
+ * Pool last accessed by another system.
+ */
+ (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
+ if (hostid != 0 && (unsigned long)hostid != system_hostid &&
+ stateval == POOL_STATE_ACTIVE)
+ return (ZPOOL_STATUS_HOSTID_MISMATCH);
+
+ /*
+ * Newer on-disk version.
+ */
+ if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+ vs->vs_aux == VDEV_AUX_VERSION_NEWER)
+ return (ZPOOL_STATUS_VERSION_NEWER);
+
+ /*
+ * Unsupported feature(s).
+ */
+ if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+ vs->vs_aux == VDEV_AUX_UNSUP_FEAT) {
+ nvlist_t *nvinfo;
+
+ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
+ &nvinfo) == 0);
+ if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY))
+ return (ZPOOL_STATUS_UNSUP_FEAT_WRITE);
+ return (ZPOOL_STATUS_UNSUP_FEAT_READ);
+ }
+
+ /*
+ * Check that the config is complete.
+ */
+ if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+ vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
+ return (ZPOOL_STATUS_BAD_GUID_SUM);
+
+ /*
+ * Check whether the pool has suspended.
+ */
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
+ &suspended) == 0) {
+ uint64_t reason;
+
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED_REASON,
+ &reason) == 0 && reason == ZIO_SUSPEND_MMP)
+ return (ZPOOL_STATUS_IO_FAILURE_MMP);
+
+ if (suspended == ZIO_FAILURE_MODE_CONTINUE)
+ return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
+ return (ZPOOL_STATUS_IO_FAILURE_WAIT);
+ }
+
+ /*
+ * Could not read a log.
+ */
+ if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+ vs->vs_aux == VDEV_AUX_BAD_LOG) {
+ return (ZPOOL_STATUS_BAD_LOG);
+ }
+
+ /*
+ * Bad devices in non-replicated config.
+ */
+ if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+ find_vdev_problem(nvroot, vdev_faulted, B_TRUE))
+ return (ZPOOL_STATUS_FAULTED_DEV_NR);
+
+ if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+ find_vdev_problem(nvroot, vdev_missing, B_TRUE))
+ return (ZPOOL_STATUS_MISSING_DEV_NR);
+
+ if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+ find_vdev_problem(nvroot, vdev_broken, B_TRUE))
+ return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
+
+ /*
+ * Corrupted pool metadata
+ */
+ if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+ vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
+ return (ZPOOL_STATUS_CORRUPT_POOL);
+
+ /*
+ * Persistent data errors.
+ */
+ if (!isimport) {
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
+ &nerr) == 0 && nerr != 0)
+ return (ZPOOL_STATUS_CORRUPT_DATA);
+ }
+
+ /*
+ * Missing devices in a replicated config.
+ */
+ if (find_vdev_problem(nvroot, vdev_faulted, B_TRUE))
+ return (ZPOOL_STATUS_FAULTED_DEV_R);
+ if (find_vdev_problem(nvroot, vdev_missing, B_TRUE))
+ return (ZPOOL_STATUS_MISSING_DEV_R);
+ if (find_vdev_problem(nvroot, vdev_broken, B_TRUE))
+ return (ZPOOL_STATUS_CORRUPT_LABEL_R);
+
+ /*
+ * Devices with errors
+ */
+ if (!isimport && find_vdev_problem(nvroot, vdev_errors, B_TRUE))
+ return (ZPOOL_STATUS_FAILING_DEV);
+
+ /*
+ * Offlined devices
+ */
+ if (find_vdev_problem(nvroot, vdev_offlined, B_TRUE))
+ return (ZPOOL_STATUS_OFFLINE_DEV);
+
+ /*
+ * Removed device
+ */
+ if (find_vdev_problem(nvroot, vdev_removed, B_TRUE))
+ return (ZPOOL_STATUS_REMOVED_DEV);
+
+ /*
+ * Suboptimal, but usable, ashift configuration.
+ */
+ if (find_vdev_problem(nvroot, vdev_non_native_ashift, B_FALSE))
+ return (ZPOOL_STATUS_NON_NATIVE_ASHIFT);
+
+ /*
+ * Informational errata available.
+ */
+ (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata);
+ if (errata) {
+ *erratap = errata;
+ return (ZPOOL_STATUS_ERRATA);
+ }
+
+ /*
+ * Outdated, but usable, version
+ */
+ if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION)
+ return (ZPOOL_STATUS_VERSION_OLDER);
+
+ /*
+ * Usable pool with disabled features
+ */
+ if (version >= SPA_VERSION_FEATURES) {
+ int i;
+ nvlist_t *feat;
+
+ if (isimport) {
+ feat = fnvlist_lookup_nvlist(config,
+ ZPOOL_CONFIG_LOAD_INFO);
+ if (nvlist_exists(feat, ZPOOL_CONFIG_ENABLED_FEAT))
+ feat = fnvlist_lookup_nvlist(feat,
+ ZPOOL_CONFIG_ENABLED_FEAT);
+ } else {
+ feat = fnvlist_lookup_nvlist(config,
+ ZPOOL_CONFIG_FEATURE_STATS);
+ }
+
+ for (i = 0; i < SPA_FEATURES; i++) {
+ zfeature_info_t *fi = &spa_feature_table[i];
+ if (!nvlist_exists(feat, fi->fi_guid))
+ return (ZPOOL_STATUS_FEAT_DISABLED);
+ }
+ }
+
+ return (ZPOOL_STATUS_OK);
+}
+
+zpool_status_t
+zpool_get_status(zpool_handle_t *zhp, char **msgid, zpool_errata_t *errata)
+{
+ zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata);
+ if (msgid != NULL) {
+ if (ret >= NMSGID)
+ *msgid = NULL;
+ else
+ *msgid = zfs_msgid_table[ret];
+ }
+ return (ret);
+}
+
+zpool_status_t
+zpool_import_status(nvlist_t *config, char **msgid, zpool_errata_t *errata)
+{
+ zpool_status_t ret = check_status(config, B_TRUE, errata);
+
+ if (ret >= NMSGID)
+ *msgid = NULL;
+ else
+ *msgid = zfs_msgid_table[ret];
+
+ return (ret);
+}