aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c')
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c458
1 files changed, 458 insertions, 0 deletions
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
new file mode 100644
index 000000000000..a2daa77a61fe
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
@@ -0,0 +1,458 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License Version 1.0 (CDDL-1.0).
+ * You can obtain a copy of the license from the top-level file
+ * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
+ * You may not use this file except in compliance with the license.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016, Intel Corporation.
+ * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
+ * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
+ */
+
+#include <libnvpair.h>
+#include <libzfs.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/list.h>
+#include <sys/time.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dev.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/fs/zfs.h>
+#include <pthread.h>
+#include <unistd.h>
+
+#include "zfs_agents.h"
+#include "fmd_api.h"
+#include "../zed_log.h"
+
+/*
+ * agent dispatch code
+ */
+
+static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER;
+static list_t agent_events; /* list of pending events */
+static int agent_exiting;
+
+typedef struct agent_event {
+ char ae_class[64];
+ char ae_subclass[32];
+ nvlist_t *ae_nvl;
+ list_node_t ae_node;
+} agent_event_t;
+
+pthread_t g_agents_tid;
+
+libzfs_handle_t *g_zfs_hdl;
+
+/* guid search data */
+typedef enum device_type {
+ DEVICE_TYPE_L2ARC, /* l2arc device */
+ DEVICE_TYPE_SPARE, /* spare device */
+ DEVICE_TYPE_PRIMARY /* any primary pool storage device */
+} device_type_t;
+
+typedef struct guid_search {
+ uint64_t gs_pool_guid;
+ uint64_t gs_vdev_guid;
+ const char *gs_devid;
+ device_type_t gs_vdev_type;
+ uint64_t gs_vdev_expandtime; /* vdev expansion time */
+} guid_search_t;
+
+/*
+ * Walks the vdev tree recursively looking for a matching devid.
+ * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
+ */
+static boolean_t
+zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
+{
+ guid_search_t *gsp = arg;
+ const char *path = NULL;
+ uint_t c, children;
+ nvlist_t **child;
+ uint64_t vdev_guid;
+
+ /*
+ * First iterate over any children.
+ */
+ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++) {
+ if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
+ gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;
+ return (B_TRUE);
+ }
+ }
+ }
+ /*
+ * Iterate over any spares and cache devices
+ */
+ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++) {
+ if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
+ gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
+ return (B_TRUE);
+ }
+ }
+ }
+ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++) {
+ if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
+ gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
+ return (B_TRUE);
+ }
+ }
+ }
+ /*
+ * On a devid match, grab the vdev guid and expansion time, if any.
+ */
+ if (gsp->gs_devid != NULL &&
+ (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
+ (strcmp(gsp->gs_devid, path) == 0)) {
+ (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
+ &gsp->gs_vdev_guid);
+ (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
+ &gsp->gs_vdev_expandtime);
+ return (B_TRUE);
+ }
+ /*
+ * Otherwise, on a vdev guid match, grab the devid and expansion
+ * time. The devid might be missing on removal since its not part
+ * of blkid cache and L2ARC VDEV does not contain pool guid in its
+ * blkid, so this is a special case for L2ARC VDEV.
+ */
+ else if (gsp->gs_vdev_guid != 0 && gsp->gs_devid == NULL &&
+ nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 &&
+ gsp->gs_vdev_guid == vdev_guid) {
+ (void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID,
+ &gsp->gs_devid);
+ (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
+ &gsp->gs_vdev_expandtime);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static int
+zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
+{
+ guid_search_t *gsp = arg;
+ nvlist_t *config, *nvl;
+
+ /*
+ * For each vdev in this pool, look for a match by devid
+ */
+ if ((config = zpool_get_config(zhp, NULL)) != NULL) {
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvl) == 0) {
+ (void) zfs_agent_iter_vdev(zhp, nvl, gsp);
+ }
+ }
+ /*
+ * if a match was found then grab the pool guid
+ */
+ if (gsp->gs_vdev_guid && gsp->gs_devid) {
+ (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+ &gsp->gs_pool_guid);
+ }
+
+ zpool_close(zhp);
+ return (gsp->gs_devid != NULL && gsp->gs_vdev_guid != 0);
+}
+
+void
+zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
+{
+ agent_event_t *event;
+
+ if (subclass == NULL)
+ subclass = "";
+
+ event = malloc(sizeof (agent_event_t));
+ if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {
+ if (event)
+ free(event);
+ return;
+ }
+
+ if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
+ class = EC_ZFS;
+ subclass = ESC_ZFS_VDEV_CHECK;
+ }
+
+ /*
+ * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport
+ * from the vdev_disk layer after a hot unplug. Fortunately we do
+ * get an EC_DEV_REMOVE from our disk monitor and it is a suitable
+ * proxy so we remap it here for the benefit of the diagnosis engine.
+ * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa
+ * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful.
+ */
+ if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
+ (strcmp(subclass, ESC_DISK) == 0) &&
+ (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||
+ nvlist_exists(nvl, DEV_IDENTIFIER))) {
+ nvlist_t *payload = event->ae_nvl;
+ struct timeval tv;
+ int64_t tod[2];
+ uint64_t pool_guid = 0, vdev_guid = 0;
+ guid_search_t search = { 0 };
+ device_type_t devtype = DEVICE_TYPE_PRIMARY;
+ const char *devid = NULL;
+
+ class = "resource.fs.zfs.removed";
+ subclass = "";
+
+ (void) nvlist_add_string(payload, FM_CLASS, class);
+ (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid);
+ (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
+ (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
+
+ (void) gettimeofday(&tv, NULL);
+ tod[0] = tv.tv_sec;
+ tod[1] = tv.tv_usec;
+ (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
+
+ /*
+ * If devid is missing but vdev_guid is available, find devid
+ * and pool_guid from vdev_guid.
+ * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
+ * ZFS_EV_POOL_GUID may be missing so find them.
+ */
+ if (devid == NULL || pool_guid == 0 || vdev_guid == 0) {
+ if (devid == NULL)
+ search.gs_vdev_guid = vdev_guid;
+ else
+ search.gs_devid = devid;
+ zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
+ if (devid == NULL)
+ devid = search.gs_devid;
+ if (pool_guid == 0)
+ pool_guid = search.gs_pool_guid;
+ if (vdev_guid == 0)
+ vdev_guid = search.gs_vdev_guid;
+ devtype = search.gs_vdev_type;
+ }
+
+ /*
+ * We want to avoid reporting "remove" events coming from
+ * libudev for VDEVs which were expanded recently (10s) and
+ * avoid activating spares in response to partitions being
+ * deleted and created in rapid succession.
+ */
+ if (search.gs_vdev_expandtime != 0 &&
+ search.gs_vdev_expandtime + 10 > tv.tv_sec) {
+ zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
+ "for recently expanded device '%s'", EC_DEV_REMOVE,
+ devid);
+ fnvlist_free(payload);
+ free(event);
+ goto out;
+ }
+
+ (void) nvlist_add_uint64(payload,
+ FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
+ (void) nvlist_add_uint64(payload,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
+ switch (devtype) {
+ case DEVICE_TYPE_L2ARC:
+ (void) nvlist_add_string(payload,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
+ VDEV_TYPE_L2CACHE);
+ break;
+ case DEVICE_TYPE_SPARE:
+ (void) nvlist_add_string(payload,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
+ break;
+ case DEVICE_TYPE_PRIMARY:
+ (void) nvlist_add_string(payload,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);
+ break;
+ }
+
+ zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
+ EC_DEV_REMOVE, class);
+ }
+
+ (void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
+ (void) strlcpy(event->ae_subclass, subclass,
+ sizeof (event->ae_subclass));
+
+ (void) pthread_mutex_lock(&agent_lock);
+ list_insert_tail(&agent_events, event);
+ (void) pthread_mutex_unlock(&agent_lock);
+
+out:
+ (void) pthread_cond_signal(&agent_cond);
+}
+
+static void
+zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)
+{
+ /*
+ * The diagnosis engine subscribes to the following events.
+ * On illumos these subscriptions reside in:
+ * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
+ */
+ if (strstr(class, "ereport.fs.zfs.") != NULL ||
+ strstr(class, "resource.fs.zfs.") != NULL ||
+ strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
+ strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
+ strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
+ fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);
+ }
+
+ /*
+ * The retire agent subscribes to the following events.
+ * On illumos these subscriptions reside in:
+ * /usr/lib/fm/fmd/plugins/zfs-retire.conf
+ *
+ * NOTE: faults events come directly from our diagnosis engine
+ * and will not pass through the zfs kernel module.
+ */
+ if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
+ strcmp(class, "resource.fs.zfs.removed") == 0 ||
+ strcmp(class, "resource.fs.zfs.statechange") == 0 ||
+ strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) {
+ fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);
+ }
+
+ /*
+ * The SLM module only consumes disk events and vdev check events
+ *
+ * NOTE: disk events come directly from disk monitor and will
+ * not pass through the zfs kernel module.
+ */
+ if (strstr(class, "EC_dev_") != NULL ||
+ strcmp(class, EC_ZFS) == 0) {
+ (void) zfs_slm_event(class, subclass, nvl);
+ }
+}
+
+/*
+ * Events are consumed and dispatched from this thread
+ * An agent can also post an event so event list lock
+ * is not held when calling an agent.
+ * One event is consumed at a time.
+ */
+static void *
+zfs_agent_consumer_thread(void *arg)
+{
+ (void) arg;
+
+ for (;;) {
+ agent_event_t *event;
+
+ (void) pthread_mutex_lock(&agent_lock);
+
+ /* wait for an event to show up */
+ while (!agent_exiting && list_is_empty(&agent_events))
+ (void) pthread_cond_wait(&agent_cond, &agent_lock);
+
+ if (agent_exiting) {
+ (void) pthread_mutex_unlock(&agent_lock);
+ zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "
+ "exiting");
+ return (NULL);
+ }
+
+ if ((event = (list_head(&agent_events))) != NULL) {
+ list_remove(&agent_events, event);
+
+ (void) pthread_mutex_unlock(&agent_lock);
+
+ /* dispatch to all event subscribers */
+ zfs_agent_dispatch(event->ae_class, event->ae_subclass,
+ event->ae_nvl);
+
+ nvlist_free(event->ae_nvl);
+ free(event);
+ continue;
+ }
+
+ (void) pthread_mutex_unlock(&agent_lock);
+ }
+
+ return (NULL);
+}
+
+void
+zfs_agent_init(libzfs_handle_t *zfs_hdl)
+{
+ fmd_hdl_t *hdl;
+
+ g_zfs_hdl = zfs_hdl;
+
+ if (zfs_slm_init() != 0)
+ zed_log_die("Failed to initialize zfs slm");
+ zed_log_msg(LOG_INFO, "Add Agent: init");
+
+ hdl = fmd_module_hdl("zfs-diagnosis");
+ _zfs_diagnosis_init(hdl);
+ if (!fmd_module_initialized(hdl))
+ zed_log_die("Failed to initialize zfs diagnosis");
+
+ hdl = fmd_module_hdl("zfs-retire");
+ _zfs_retire_init(hdl);
+ if (!fmd_module_initialized(hdl))
+ zed_log_die("Failed to initialize zfs retire");
+
+ list_create(&agent_events, sizeof (agent_event_t),
+ offsetof(struct agent_event, ae_node));
+
+ if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,
+ NULL) != 0) {
+ list_destroy(&agent_events);
+ zed_log_die("Failed to initialize agents");
+ }
+ pthread_setname_np(g_agents_tid, "agents");
+}
+
+void
+zfs_agent_fini(void)
+{
+ fmd_hdl_t *hdl;
+ agent_event_t *event;
+
+ agent_exiting = 1;
+ (void) pthread_cond_signal(&agent_cond);
+
+ /* wait for zfs_enum_pools thread to complete */
+ (void) pthread_join(g_agents_tid, NULL);
+
+ /* drain any pending events */
+ while ((event = (list_head(&agent_events))) != NULL) {
+ list_remove(&agent_events, event);
+ nvlist_free(event->ae_nvl);
+ free(event);
+ }
+
+ list_destroy(&agent_events);
+
+ if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {
+ _zfs_retire_fini(hdl);
+ fmd_hdl_unregister(hdl);
+ }
+ if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {
+ _zfs_diagnosis_fini(hdl);
+ fmd_hdl_unregister(hdl);
+ }
+
+ zed_log_msg(LOG_INFO, "Add Agent: fini");
+ zfs_slm_fini();
+
+ g_zfs_hdl = NULL;
+}