aboutsummaryrefslogtreecommitdiff
path: root/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh
diff options
context:
space:
mode:
authorDon Brady <don.brady@delphix.com>2020-09-04 17:34:28 +0000
committerGitHub <noreply@github.com>2020-09-04 17:34:28 +0000
commit4f0728278615eb42fc5022b2817c082f578e225f (patch)
tree598cd2bb948dd3b0eb0469139a33269ae58fc40e /tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh
parent3808032489f28c1f36b39c9a3274d5f4b6f9638a (diff)
downloadsrc-4f0728278615eb42fc5022b2817c082f578e225f.tar.gz
src-4f0728278615eb42fc5022b2817c082f578e225f.zip
Avoid posting duplicate zpool events
Duplicate io and checksum ereport events can misrepresent that things are worse than they seem. Ideally the zpool events and the corresponding vdev stat error counts in a zpool status should be for unique errors -- not the same error being counted over and over. This can be demonstrated in a simple example. With a single bad block in a datafile and just 5 reads of the file we end up with a degraded vdev, even though there is only one unique error in the pool. The proposed solution to the above issue, is to eliminate duplicates when posting events and when updating vdev error stats. We now save recent error events of interest when posting events so that we can easily check for duplicates when posting an error. Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@delphix.com> Closes #10861
Diffstat (limited to 'tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh')
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh155
1 files changed, 155 insertions, 0 deletions
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh
new file mode 100755
index 000000000000..1ba7b1b34496
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh
@@ -0,0 +1,155 @@
+#!/bin/ksh -p
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2020 by Delphix. All rights reserved.
+#
+
+# DESCRIPTION:
+# Verify that duplicate I/O ereport errors are not posted
+#
+# STRATEGY:
+# 1. Create a mirror pool
+# 2. Inject duplicate read/write IO errors and checksum errors
+# 3. Verify there are no duplicate events being posted
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "both"
+
+MOUNTDIR=$TEST_BASE_DIR/mount
+FILEPATH=$MOUNTDIR/badfile
+VDEV1=$TEST_BASE_DIR/vfile1
+VDEV2=$TEST_BASE_DIR/vfile2
+POOL=error_pool
+FILESIZE="10M"
+OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
+RETAIN_MAX=$(get_tunable ZEVENT_RETAIN_MAX)
+
+EREPORTS="$STF_SUITE/tests/functional/cli_root/zpool_events/ereports"
+
+duplicates=false
+
+function cleanup
+{
+ log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX
+
+ log_must zinject -c all
+ if poolexists $POOL ; then
+ destroy_pool $POOL
+ fi
+ log_must rm -f $VDEV1 $VDEV2
+}
+
+log_assert "Duplicate I/O ereport errors are not posted"
+log_note "zevent retain max setting: $RETAIN_MAX"
+
+log_onexit cleanup
+
+# Set our threshold high to avoid dropping events.
+set_tunable64 ZEVENT_LEN_MAX 20000
+
+log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2
+log_must mkdir -p $MOUNTDIR
+
+#
+# $1: test type - corrupt (checksum error), io
+# $2: read, write
+function do_dup_test
+{
+ ERR=$1
+ RW=$2
+
+ log_note "Testing $ERR $RW ereports"
+ log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL mirror $VDEV1 $VDEV2
+ log_must zpool events -c
+ log_must zfs set compression=off $POOL
+
+ if [ "$RW" == "read" ] ; then
+ log_must mkfile $FILESIZE $FILEPATH
+
+ # unmount and mount filesystems to purge file from ARC
+ # to force reads to go through error inject handler
+ log_must zfs unmount $POOL
+ log_must zfs mount $POOL
+
+ # all reads from this file get an error
+ if [ "$ERR" == "corrupt" ] ; then
+ log_must zinject -a -t data -e checksum -T read $FILEPATH
+ else
+ log_must zinject -a -t data -e io -T read $FILEPATH
+ fi
+
+ # Read the file a few times to generate some
+ # duplicate errors of the same blocks
+ # shellcheck disable=SC2034
+ for i in {1..15}; do
+ dd if=$FILEPATH of=/dev/null bs=128K > /dev/null 2>&1
+ done
+ log_must zinject -c all
+ fi
+
+ log_must zinject -d $VDEV1 -e $ERR -T $RW -f 100 $POOL
+
+ if [ "$RW" == "write" ] ; then
+ log_must mkfile $FILESIZE $FILEPATH
+ log_must zpool sync $POOL
+ else
+ # scrub twice to generate some duplicates
+ log_must zpool scrub $POOL
+ log_must zpool wait -t scrub $POOL
+ log_must zpool scrub $POOL
+ log_must zpool wait -t scrub $POOL
+ fi
+
+ log_must zinject -c all
+
+ # Wait for the pool to settle down and finish resilvering (if
+ # necessary). We want the errors to stop incrementing before we
+ # check for duplicates.
+ zpool wait -t resilver $POOL
+
+ ereports="$($EREPORTS | sort)"
+ actual=$(echo "$ereports" | wc -l)
+ unique=$(echo "$ereports" | uniq | wc -l)
+ log_note "$actual total $ERR $RW ereports where $unique were unique"
+
+ if [ $actual -gt $unique ] ; then
+ log_note "UNEXPECTED -- $((actual-unique)) duplicate $ERR $RW ereports"
+ echo "$ereports"
+ duplicates=true
+ fi
+
+ log_must zpool destroy $POOL
+}
+
+do_dup_test "corrupt" "read"
+do_dup_test "io" "read"
+do_dup_test "io" "write"
+
+if $duplicates; then
+ log_fail "FAILED -- Duplicate I/O ereport errors encountered"
+else
+ log_pass "Duplicate I/O ereport errors are not posted"
+fi
+