aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/cmd/zed/zed_disk_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/cmd/zed/zed_disk_event.c')
-rw-r--r--sys/contrib/openzfs/cmd/zed/zed_disk_event.c416
1 files changed, 416 insertions, 0 deletions
diff --git a/sys/contrib/openzfs/cmd/zed/zed_disk_event.c b/sys/contrib/openzfs/cmd/zed/zed_disk_event.c
new file mode 100644
index 000000000000..174d24523253
--- /dev/null
+++ b/sys/contrib/openzfs/cmd/zed/zed_disk_event.c
@@ -0,0 +1,416 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License Version 1.0 (CDDL-1.0).
+ * You can obtain a copy of the license from the top-level file
+ * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
+ * You may not use this file except in compliance with the license.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016, 2017, Intel Corporation.
+ */
+
+#ifdef HAVE_LIBUDEV
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libnvpair.h>
+#include <libudev.h>
+#include <libzfs.h>
+#include <libzutil.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dev.h>
+
+#include "zed_log.h"
+#include "zed_disk_event.h"
+#include "agents/zfs_agents.h"
+
+/*
+ * Portions of ZED need to see disk events for disks belonging to ZFS pools.
+ * A libudev monitor is established to monitor block device actions and pass
+ * them on to internal ZED logic modules. Initially, zfs_mod.c is the only
+ * consumer and is the Linux equivalent for the illumos syseventd ZFS SLM
+ * module responsible for handling disk events for ZFS.
+ */
+
+pthread_t g_mon_tid;
+struct udev *g_udev;
+struct udev_monitor *g_mon;
+
+
+#define DEV_BYID_PATH "/dev/disk/by-id/"
+
+/* 64MB is minimum usable disk for ZFS */
+#define MINIMUM_SECTORS 131072
+
+
+/*
+ * Post disk event to SLM module
+ *
+ * occurs in the context of monitor thread
+ */
+static void
+zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
+{
+ char *strval;
+ uint64_t numval;
+
+ zed_log_msg(LOG_INFO, "zed_disk_event:");
+ zed_log_msg(LOG_INFO, "\tclass: %s", class);
+ zed_log_msg(LOG_INFO, "\tsubclass: %s", subclass);
+ if (nvlist_lookup_string(nvl, DEV_NAME, &strval) == 0)
+ zed_log_msg(LOG_INFO, "\t%s: %s", DEV_NAME, strval);
+ if (nvlist_lookup_string(nvl, DEV_PATH, &strval) == 0)
+ zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
+ if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
+ zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
+ if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
+ zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
+ if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
+ zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
+ if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
+ zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
+ if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
+ zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_VDEV_GUID, numval);
+
+ (void) zfs_agent_post_event(class, subclass, nvl);
+}
+
+/*
+ * dev_event_nvlist: place event schema into an nv pair list
+ *
+ * NAME VALUE (example)
+ * -------------- --------------------------------------------------------
+ * DEV_NAME /dev/sdl
+ * DEV_PATH /devices/pci0000:00/0000:00:03.0/0000:04:00.0/host0/...
+ * DEV_IDENTIFIER ata-Hitachi_HTS725050A9A362_100601PCG420VLJ37DMC
+ * DEV_PHYS_PATH pci-0000:04:00.0-sas-0x4433221101000000-lun-0
+ * DEV_IS_PART ---
+ * DEV_SIZE 500107862016
+ * ZFS_EV_POOL_GUID 17523635698032189180
+ * ZFS_EV_VDEV_GUID 14663607734290803088
+ */
+static nvlist_t *
+dev_event_nvlist(struct udev_device *dev)
+{
+ nvlist_t *nvl;
+ char strval[128];
+ const char *value, *path;
+ uint64_t guid;
+
+ if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
+ return (NULL);
+
+ if (zfs_device_get_devid(dev, strval, sizeof (strval)) == 0)
+ (void) nvlist_add_string(nvl, DEV_IDENTIFIER, strval);
+ if (zfs_device_get_physical(dev, strval, sizeof (strval)) == 0)
+ (void) nvlist_add_string(nvl, DEV_PHYS_PATH, strval);
+ if ((path = udev_device_get_devnode(dev)) != NULL)
+ (void) nvlist_add_string(nvl, DEV_NAME, path);
+ if ((value = udev_device_get_devpath(dev)) != NULL)
+ (void) nvlist_add_string(nvl, DEV_PATH, value);
+ value = udev_device_get_devtype(dev);
+ if ((value != NULL && strcmp("partition", value) == 0) ||
+ (udev_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER")
+ != NULL)) {
+ (void) nvlist_add_boolean(nvl, DEV_IS_PART);
+ }
+ if ((value = udev_device_get_sysattr_value(dev, "size")) != NULL) {
+ uint64_t numval = DEV_BSIZE;
+
+ numval *= strtoull(value, NULL, 10);
+ (void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
+ }
+
+ /*
+ * Grab the pool and vdev guids from blkid cache
+ */
+ value = udev_device_get_property_value(dev, "ID_FS_UUID");
+ if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0)
+ (void) nvlist_add_uint64(nvl, ZFS_EV_POOL_GUID, guid);
+
+ value = udev_device_get_property_value(dev, "ID_FS_UUID_SUB");
+ if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0)
+ (void) nvlist_add_uint64(nvl, ZFS_EV_VDEV_GUID, guid);
+
+ /*
+ * Either a vdev guid or a devid must be present for matching
+ */
+ if (!nvlist_exists(nvl, DEV_IDENTIFIER) &&
+ !nvlist_exists(nvl, ZFS_EV_VDEV_GUID)) {
+ nvlist_free(nvl);
+ return (NULL);
+ }
+
+ return (nvl);
+}
+
+/*
+ * Listen for block device uevents
+ */
+static void *
+zed_udev_monitor(void *arg)
+{
+ struct udev_monitor *mon = arg;
+ char *tmp, *tmp2;
+
+ zed_log_msg(LOG_INFO, "Waiting for new udev disk events...");
+
+ while (1) {
+ struct udev_device *dev;
+ const char *action, *type, *part, *sectors;
+ const char *bus, *uuid;
+ const char *class, *subclass;
+ nvlist_t *nvl;
+ boolean_t is_zfs = B_FALSE;
+
+ /* allow a cancellation while blocked (recvmsg) */
+ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+
+ /* blocks at recvmsg until an event occurs */
+ if ((dev = udev_monitor_receive_device(mon)) == NULL) {
+ zed_log_msg(LOG_WARNING, "zed_udev_monitor: receive "
+ "device error %d", errno);
+ continue;
+ }
+
+ /* allow all steps to complete before a cancellation */
+ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+
+ /*
+ * Strongly typed device is the preferred filter
+ */
+ type = udev_device_get_property_value(dev, "ID_FS_TYPE");
+ if (type != NULL && type[0] != '\0') {
+ if (strcmp(type, "zfs_member") == 0) {
+ is_zfs = B_TRUE;
+ } else {
+ /* not ours, so skip */
+ zed_log_msg(LOG_INFO, "zed_udev_monitor: skip "
+ "%s (in use by %s)",
+ udev_device_get_devnode(dev), type);
+ udev_device_unref(dev);
+ continue;
+ }
+ }
+
+ /*
+ * if this is a disk and it is partitioned, then the
+ * zfs label will reside in a DEVTYPE=partition and
+ * we can skip passing this event
+ */
+ type = udev_device_get_property_value(dev, "DEVTYPE");
+ part = udev_device_get_property_value(dev,
+ "ID_PART_TABLE_TYPE");
+ if (type != NULL && type[0] != '\0' &&
+ strcmp(type, "disk") == 0 &&
+ part != NULL && part[0] != '\0') {
+ /* skip and wait for partition event */
+ udev_device_unref(dev);
+ continue;
+ }
+
+ /*
+ * ignore small partitions
+ */
+ sectors = udev_device_get_property_value(dev,
+ "ID_PART_ENTRY_SIZE");
+ if (sectors == NULL)
+ sectors = udev_device_get_sysattr_value(dev, "size");
+ if (sectors != NULL &&
+ strtoull(sectors, NULL, 10) < MINIMUM_SECTORS) {
+ udev_device_unref(dev);
+ continue;
+ }
+
+ /*
+ * If the blkid probe didn't find ZFS, then a persistent
+ * device id string is required in the message schema
+ * for matching with vdevs. Preflight here for expected
+ * udev information.
+ */
+ bus = udev_device_get_property_value(dev, "ID_BUS");
+ uuid = udev_device_get_property_value(dev, "DM_UUID");
+ if (!is_zfs && (bus == NULL && uuid == NULL)) {
+ zed_log_msg(LOG_INFO, "zed_udev_monitor: %s no devid "
+ "source", udev_device_get_devnode(dev));
+ udev_device_unref(dev);
+ continue;
+ }
+
+ action = udev_device_get_action(dev);
+ if (strcmp(action, "add") == 0) {
+ class = EC_DEV_ADD;
+ subclass = ESC_DISK;
+ } else if (strcmp(action, "remove") == 0) {
+ class = EC_DEV_REMOVE;
+ subclass = ESC_DISK;
+ } else if (strcmp(action, "change") == 0) {
+ class = EC_DEV_STATUS;
+ subclass = ESC_DEV_DLE;
+ } else {
+ zed_log_msg(LOG_WARNING, "zed_udev_monitor: %s unknown",
+ action);
+ udev_device_unref(dev);
+ continue;
+ }
+
+ /*
+ * Special case an EC_DEV_ADD for multipath devices
+ *
+ * When a multipath device is created, udev reports the
+ * following:
+ *
+ * 1. "add" event of the dm device for the multipath device
+ * (like /dev/dm-3).
+ * 2. "change" event to create the actual multipath device
+ * symlink (like /dev/mapper/mpatha). The event also
+ * passes back the relevant DM vars we care about, like
+ * DM_UUID.
+ * 3. Another "change" event identical to #2 (that we ignore).
+ *
+ * To get the behavior we want, we treat the "change" event
+ * in #2 as a "add" event; as if "/dev/mapper/mpatha" was
+ * a new disk being added.
+ */
+ if (strcmp(class, EC_DEV_STATUS) == 0 &&
+ udev_device_get_property_value(dev, "DM_UUID") &&
+ udev_device_get_property_value(dev, "MPATH_SBIN_PATH")) {
+ tmp = (char *)udev_device_get_devnode(dev);
+ tmp2 = zfs_get_underlying_path(tmp);
+ if (tmp && tmp2 && (strcmp(tmp, tmp2) != 0)) {
+ /*
+ * We have a real underlying device, which
+ * means that this multipath "change" event is
+ * an "add" event.
+ *
+ * If the multipath device and the underlying
+ * dev are the same name (i.e. /dev/dm-5), then
+ * there is no real underlying disk for this
+ * multipath device, and so this "change" event
+ * really is a multipath removal.
+ */
+ class = EC_DEV_ADD;
+ subclass = ESC_DISK;
+ } else {
+ tmp = (char *)
+ udev_device_get_property_value(dev,
+ "DM_NR_VALID_PATHS");
+ /* treat as a multipath remove */
+ if (tmp != NULL && strcmp(tmp, "0") == 0) {
+ class = EC_DEV_REMOVE;
+ subclass = ESC_DISK;
+ }
+ }
+ free(tmp2);
+ }
+
+ /*
+ * Special case an EC_DEV_ADD for scsi_debug devices
+ *
+ * These devices require a udevadm trigger command after
+ * creation in order to register the vdev_id scsidebug alias
+ * rule (adds a persistent path (phys_path) used for fault
+ * management automated tests in the ZFS test suite.
+ *
+ * After udevadm trigger command, event registers as a "change"
+ * event but needs to instead be handled as another "add" event
+ * to allow for disk labeling and partitioning to occur.
+ */
+ if (strcmp(class, EC_DEV_STATUS) == 0 &&
+ udev_device_get_property_value(dev, "ID_VDEV") &&
+ udev_device_get_property_value(dev, "ID_MODEL")) {
+ const char *id_model, *id_model_sd = "scsi_debug";
+
+ id_model = udev_device_get_property_value(dev,
+ "ID_MODEL");
+ if (strcmp(id_model, id_model_sd) == 0) {
+ class = EC_DEV_ADD;
+ subclass = ESC_DISK;
+ }
+ }
+
+ if ((nvl = dev_event_nvlist(dev)) != NULL) {
+ zed_udev_event(class, subclass, nvl);
+ nvlist_free(nvl);
+ }
+
+ udev_device_unref(dev);
+ }
+
+ return (NULL);
+}
+
+int
+zed_disk_event_init()
+{
+ int fd, fflags;
+
+ if ((g_udev = udev_new()) == NULL) {
+ zed_log_msg(LOG_WARNING, "udev_new failed (%d)", errno);
+ return (-1);
+ }
+
+ /* Set up a udev monitor for block devices */
+ g_mon = udev_monitor_new_from_netlink(g_udev, "udev");
+ udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block", "disk");
+ udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block",
+ "partition");
+ udev_monitor_enable_receiving(g_mon);
+
+ /* Make sure monitoring socket is blocking */
+ fd = udev_monitor_get_fd(g_mon);
+ if ((fflags = fcntl(fd, F_GETFL)) & O_NONBLOCK)
+ (void) fcntl(fd, F_SETFL, fflags & ~O_NONBLOCK);
+
+ /* spawn a thread to monitor events */
+ if (pthread_create(&g_mon_tid, NULL, zed_udev_monitor, g_mon) != 0) {
+ udev_monitor_unref(g_mon);
+ udev_unref(g_udev);
+ zed_log_msg(LOG_WARNING, "pthread_create failed");
+ return (-1);
+ }
+
+ zed_log_msg(LOG_INFO, "zed_disk_event_init");
+
+ return (0);
+}
+
+void
+zed_disk_event_fini()
+{
+ /* cancel monitor thread at recvmsg() */
+ (void) pthread_cancel(g_mon_tid);
+ (void) pthread_join(g_mon_tid, NULL);
+
+ /* cleanup udev resources */
+ udev_monitor_unref(g_mon);
+ udev_unref(g_udev);
+
+ zed_log_msg(LOG_INFO, "zed_disk_event_fini");
+}
+
+#else
+
+#include "zed_disk_event.h"
+
+int
+zed_disk_event_init()
+{
+ return (0);
+}
+
+void
+zed_disk_event_fini()
+{
+}
+
+#endif /* HAVE_LIBUDEV */