aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan Moeller <ryan@iXsystems.com>2021-06-30 14:37:20 +0000
committerGitHub <noreply@github.com>2021-06-30 14:37:20 +0000
commitcfc564f9b11eed1421789fa018f7b3d141cc18d0 (patch)
treee9713aaeb3126a19d58c912b363f98543f4fefcc
parent4694131a0a05b6db2727c9801f9729b9378d3064 (diff)
downloadsrc-cfc564f9b11eed1421789fa018f7b3d141cc18d0.tar.gz
src-cfc564f9b11eed1421789fa018f7b3d141cc18d0.zip
ZED: Match added disk by pool/vdev GUID if found (#12217)
This enables ZED to auto-online vdevs that are not wholedisk managed by ZFS. Signed-off-by: Ryan Moeller <ryan@iXsystems.com> Reviewed-by: Don Brady <don.brady@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tony Hutter <hutter2@llnl.gov>
-rw-r--r--cmd/zed/agents/zfs_mod.c40
-rw-r--r--cmd/zed/zed_disk_event.c2
-rw-r--r--tests/runfiles/linux.run9
-rwxr-xr-xtests/test-runner/bin/zts-report.py.in1
-rw-r--r--tests/zfs-tests/tests/functional/fault/Makefile.am1
-rwxr-xr-xtests/zfs-tests/tests/functional/fault/auto_online_002_pos.ksh94
6 files changed, 137 insertions, 10 deletions
diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c
index b564f2b12a26..3bcdf6e1d718 100644
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@@ -641,6 +641,27 @@ devid_iter(const char *devid, zfs_process_func_t func, boolean_t is_slice)
}
/*
+ * Given a device guid, find any vdevs with a matching guid.
+ */
+static boolean_t
+guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid,
+ zfs_process_func_t func, boolean_t is_slice)
+{
+ dev_data_t data = { 0 };
+
+ data.dd_func = func;
+ data.dd_found = B_FALSE;
+ data.dd_pool_guid = pool_guid;
+ data.dd_vdev_guid = vdev_guid;
+ data.dd_islabeled = is_slice;
+ data.dd_new_devid = devid;
+
+ (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
+
+ return (data.dd_found);
+}
+
+/*
* Handle a EC_DEV_ADD.ESC_DISK event.
*
* illumos
@@ -663,15 +684,18 @@ static int
zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
{
char *devpath = NULL, *devid;
+ uint64_t pool_guid = 0, vdev_guid = 0;
boolean_t is_slice;
/*
- * Expecting a devid string and an optional physical location
+ * Expecting a devid string and an optional physical location and guid
*/
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0)
return (-1);
(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
+ (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
+ (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0);
@@ -682,12 +706,16 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
* Iterate over all vdevs looking for a match in the following order:
* 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
* 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
- *
- * For disks, we only want to pay attention to vdevs marked as whole
- * disks or are a multipath device.
+ * 3. ZPOOL_CONFIG_GUID (identifies unique vdev).
*/
- if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL)
- (void) devphys_iter(devpath, devid, zfs_process_add, is_slice);
+ if (devid_iter(devid, zfs_process_add, is_slice))
+ return (0);
+ if (devpath != NULL && devphys_iter(devpath, devid, zfs_process_add,
+ is_slice))
+ return (0);
+ if (vdev_guid != 0)
+ (void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add,
+ is_slice);
return (0);
}
diff --git a/cmd/zed/zed_disk_event.c b/cmd/zed/zed_disk_event.c
index 6ec566cff3ca..94e24236063c 100644
--- a/cmd/zed/zed_disk_event.c
+++ b/cmd/zed/zed_disk_event.c
@@ -72,6 +72,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
+ if (nvlist_lookup_boolean(nvl, DEV_IS_PART) == B_TRUE)
+ zed_log_msg(LOG_INFO, "\t%s: B_TRUE", DEV_IS_PART);
if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index c6d4f5f6d34e..642ed824d462 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -98,10 +98,11 @@ tests = ['fallocate_prealloc', 'fallocate_punch-hole']
tags = ['functional', 'fallocate']
[tests/functional/fault:Linux]
-tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos',
- 'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_multiple',
- 'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault',
- 'decompress_fault', 'scrub_after_resilver', 'zpool_status_-s']
+tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
+ 'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos',
+ 'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared',
+ 'decrypt_fault', 'decompress_fault', 'scrub_after_resilver',
+ 'zpool_status_-s']
tags = ['functional', 'fault']
[tests/functional/features/large_dnode:Linux]
diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in
index 27c865ed5c7a..8c3bce13491b 100755
--- a/tests/test-runner/bin/zts-report.py.in
+++ b/tests/test-runner/bin/zts-report.py.in
@@ -323,6 +323,7 @@ if os.environ.get('CI') == 'true':
'cli_root/zpool_split/zpool_split_wholedisk': ['SKIP', ci_reason],
'fault/auto_offline_001_pos': ['SKIP', ci_reason],
'fault/auto_online_001_pos': ['SKIP', ci_reason],
+ 'fault/auto_online_002_pos': ['SKIP', ci_reason],
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
'fault/auto_spare_ashift': ['SKIP', ci_reason],
'fault/auto_spare_shared': ['SKIP', ci_reason],
diff --git a/tests/zfs-tests/tests/functional/fault/Makefile.am b/tests/zfs-tests/tests/functional/fault/Makefile.am
index f2fc06877d3b..ba0d7d6992c6 100644
--- a/tests/zfs-tests/tests/functional/fault/Makefile.am
+++ b/tests/zfs-tests/tests/functional/fault/Makefile.am
@@ -4,6 +4,7 @@ dist_pkgdata_SCRIPTS = \
cleanup.ksh \
auto_offline_001_pos.ksh \
auto_online_001_pos.ksh \
+ auto_online_002_pos.ksh \
auto_replace_001_pos.ksh \
auto_spare_001_pos.ksh \
auto_spare_002_pos.ksh \
diff --git a/tests/zfs-tests/tests/functional/fault/auto_online_002_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_online_002_pos.ksh
new file mode 100755
index 000000000000..60185ace34bb
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fault/auto_online_002_pos.ksh
@@ -0,0 +1,94 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
+# Copyright (c) 2019 by Delphix. All rights reserved.
+# Portions Copyright 2021 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# Testing Fault Management Agent ZED Logic - Automated Auto-Online Test.
+# Now with partitioned vdevs.
+#
+# STRATEGY:
+# 1. Partition a scsi_debug device for simulating removal
+# 2. Create a pool
+# 3. Offline disk
+# 4. ZED polls for an event change for online disk to be automatically
+# added back to the pool.
+#
+verify_runnable "both"
+
+function cleanup
+{
+ poolexists ${TESTPOOL} && destroy_pool ${TESTPOOL}
+ unload_scsi_debug
+}
+
+log_assert "Testing automated auto-online FMA test with partitioned vdev"
+
+log_onexit cleanup
+
+load_scsi_debug ${SDSIZE} ${SDHOSTS} ${SDTGTS} ${SDLUNS} '512b'
+SDDEVICE=$(get_debug_device)
+zpool labelclear -f ${SDDEVICE}
+partition_disk ${SDSIZE} ${SDDEVICE} 1
+part=${SDDEVICE}1
+host=$(get_scsi_host ${SDDEVICE})
+
+block_device_wait /dev/${part}
+log_must zpool create -f ${TESTPOOL} raidz1 ${part} ${DISKS}
+
+# Add some data to the pool
+log_must mkfile ${FSIZE} /${TESTPOOL}/data
+
+remove_disk ${SDDEVICE}
+check_state ${TESTPOOL} "" "degraded" || \
+ log_fail "${TESTPOOL} is not degraded"
+
+# Clear zpool events
+log_must zpool events -c
+
+# Online disk
+insert_disk ${SDDEVICE} ${host}
+
+log_note "Delay for ZED auto-online"
+typeset -i timeout=0
+until is_pool_resilvered ${TESTPOOL}; do
+ if ((timeout++ == MAXTIMEOUT)); then
+ log_fail "Timeout occurred"
+ fi
+ sleep 1
+done
+log_note "Auto-online of ${SDDEVICE} is complete"
+
+# Validate auto-online was successful
+sleep 1
+check_state ${TESTPOOL} "" "online" || \
+ log_fail "${TESTPOOL} is not back online"
+
+log_must zpool destroy ${TESTPOOL}
+
+log_pass "Auto-online with partitioned vdev test successful"