aboutsummaryrefslogtreecommitdiff
path: root/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh
blob: c0387e1d323585c7fd273318e0f7c61c01a0a1f5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source.  A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/events/events_common.kshlib
. $STF_SUITE/tests/functional/fault/fault.cfg

#
# DESCRIPTION:
# Testing Fault Management Agent ZED Logic - Physically detached device is
# made removed and onlined when reattached
#
# STRATEGY:
# 1. Create a pool
# 2. Simulate physical removal of one device
# 3. Verify the device is removed when detached
# 4. Reattach the device
# 5. Verify the device is onlined
# 6. Repeat the same tests with a spare device:
#    zed will use the spare to handle the removed data device
# 7. Repeat the same tests again with a faulted spare device:
#    the removed data device should be removed
#
# NOTE: the use of 'block_device_wait' throughout the test helps avoid race
# conditions caused by mixing creation/removal events from partitioning the
# disk (zpool create) and events from physically removing it (remove_disk).
#
# NOTE: the test relies on ZED to transit state to removed on device removed
# event.  The ZED does receive a removal notification but only relies on it to
# activate a hot spare.  Additional work is planned to extend an existing ioctl
# interface to allow the ZED to transition the vdev in to a removed state.
#
verify_runnable "both"

if is_linux; then
	# Add one 512b scsi_debug device (4Kn would generate IO errors)
	# NOTE: must be larger than other "file" vdevs and minimum SPA devsize:
	# add 32m of fudge
	load_scsi_debug $(($MINVDEVSIZE/1024/1024+32)) 1 1 1 '512b'
else
	log_unsupported "scsi debug module unsupported"
fi

function cleanup
{
	destroy_pool $TESTPOOL
	rm -f $filedev1
	rm -f $filedev2
	rm -f $filedev3
	rm -f $sparedev
	unload_scsi_debug
}

log_assert "ZED detects physically removed devices"

log_onexit cleanup

filedev1="$TEST_BASE_DIR/file-vdev-1"
filedev2="$TEST_BASE_DIR/file-vdev-2"
filedev3="$TEST_BASE_DIR/file-vdev-3"
sparedev="$TEST_BASE_DIR/file-vdev-spare"
removedev=$(get_debug_device)

typeset poolconfs=(
    "mirror $filedev1 $removedev"
    "raidz3 $filedev1 $filedev2 $filedev3 $removedev"
    "mirror $filedev1 $filedev2 special mirror $filedev3 $removedev"
)

log_must truncate -s $MINVDEVSIZE $filedev1
log_must truncate -s $MINVDEVSIZE $filedev2
log_must truncate -s $MINVDEVSIZE $filedev3
log_must truncate -s $MINVDEVSIZE $sparedev

for conf in "${poolconfs[@]}"
do
	# 1. Create a pool
	log_must zpool create -f $TESTPOOL $conf
	block_device_wait ${DEV_DSKDIR}/${removedev}

	mntpnt=$(get_prop mountpoint /$TESTPOOL)

	# 2. Simulate physical removal of one device
	remove_disk $removedev
	log_must mkfile 1m $mntpnt/file
	sync_pool $TESTPOOL

	# 3. Verify the device is removed.
	log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"

	# 4. Reattach the device
	insert_disk $removedev

	# 5. Verify the device is onlined
	log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"

	# cleanup
	destroy_pool $TESTPOOL
	log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
	block_device_wait ${DEV_DSKDIR}/${removedev}
done

# 6. Repeat the same tests with a spare device: zed will use the spare to handle
#    the removed data device
for conf in "${poolconfs[@]}"
do
	# special vdev can not be replaced by a hot spare
	if [[ $conf = *"special mirror"* ]]; then
		continue
	fi

	# 1. Create a pool with a spare
	log_must zpool create -f $TESTPOOL $conf
	block_device_wait ${DEV_DSKDIR}/${removedev}
	log_must zpool add $TESTPOOL spare $sparedev

	mntpnt=$(get_prop mountpoint /$TESTPOOL)

	# 2. Simulate physical removal of one device
	remove_disk $removedev
	log_must mkfile 1m $mntpnt/file
	sync_pool $TESTPOOL

	# 3. Verify the device is handled by the spare.
	log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE"
	log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"

	# 4. Reattach the device
	insert_disk $removedev

	# 5. Verify the device is onlined
	log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"

	# cleanup
	destroy_pool $TESTPOOL
	log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
	block_device_wait ${DEV_DSKDIR}/${removedev}
done

# 7. Repeat the same tests again with a faulted spare device: zed should offline
#    the removed data device if no spare is available
for conf in "${poolconfs[@]}"
do
	# 1. Create a pool with a spare
	log_must zpool create -f $TESTPOOL $conf
	block_device_wait ${DEV_DSKDIR}/${removedev}
	log_must zpool add $TESTPOOL spare $sparedev

	mntpnt=$(get_prop mountpoint /$TESTPOOL)

	# 2. Fault the spare device making it unavailable
	log_must zpool offline -f $TESTPOOL $sparedev
	log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED"

	# 3. Simulate physical removal of one device
	remove_disk $removedev
	log_must mkfile 1m $mntpnt/file
	sync_pool $TESTPOOL

	# 4. Verify the device is removed
	log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"

	# 5. Reattach the device
	insert_disk $removedev

	# 6. Verify the device is onlined
	log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"

	# cleanup
	destroy_pool $TESTPOOL
	log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
	block_device_wait ${DEV_DSKDIR}/${removedev}
done

log_pass "ZED detects physically removed devices"