mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-10-31 13:29:32 +00:00 
			
		
		
		
	Avoid fault diagnosis if multiple vdevs have errors
When multiple drives are throwing errors, it is likely not a drive failing but rather a failure above the drives, like a controller. The active cases context of the drive's peers is now considered when making a diagnosis. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@klarasystems.com> Closes #16531
This commit is contained in:
		
							parent
							
								
									f245541e24
								
							
						
					
					
						commit
						ddf5f34f06
					
				| @ -71,6 +71,7 @@ typedef struct zfs_case_data { | ||||
| 	uint64_t	zc_ena; | ||||
| 	uint64_t	zc_pool_guid; | ||||
| 	uint64_t	zc_vdev_guid; | ||||
| 	uint64_t	zc_parent_guid; | ||||
| 	int		zc_pool_state; | ||||
| 	char		zc_serd_checksum[MAX_SERDLEN]; | ||||
| 	char		zc_serd_io[MAX_SERDLEN]; | ||||
| @ -181,10 +182,10 @@ zfs_case_unserialize(fmd_hdl_t *hdl, fmd_case_t *cp) | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * count other unique slow-io cases in a pool | ||||
|  * Return count of other unique SERD cases under same vdev parent | ||||
|  */ | ||||
| static uint_t | ||||
| zfs_other_slow_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case) | ||||
| zfs_other_serd_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case) | ||||
| { | ||||
| 	zfs_case_t *zcp; | ||||
| 	uint_t cases = 0; | ||||
| @ -206,10 +207,32 @@ zfs_other_slow_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case) | ||||
| 
 | ||||
| 	for (zcp = uu_list_first(zfs_cases); zcp != NULL; | ||||
| 	    zcp = uu_list_next(zfs_cases, zcp)) { | ||||
| 		if (zcp->zc_data.zc_pool_guid == zfs_case->zc_pool_guid && | ||||
| 		    zcp->zc_data.zc_vdev_guid != zfs_case->zc_vdev_guid && | ||||
| 		    zcp->zc_data.zc_serd_slow_io[0] != '\0' && | ||||
| 		    fmd_serd_active(hdl, zcp->zc_data.zc_serd_slow_io)) { | ||||
| 		zfs_case_data_t *zcd = &zcp->zc_data; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * must be same pool and parent vdev but different leaf vdev | ||||
| 		 */ | ||||
| 		if (zcd->zc_pool_guid != zfs_case->zc_pool_guid || | ||||
| 		    zcd->zc_parent_guid != zfs_case->zc_parent_guid || | ||||
| 		    zcd->zc_vdev_guid == zfs_case->zc_vdev_guid) { | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Check if there is another active serd case besides zfs_case | ||||
| 		 * | ||||
| 		 * Only one serd engine will be assigned to the case | ||||
| 		 */ | ||||
| 		if (zcd->zc_serd_checksum[0] == zfs_case->zc_serd_checksum[0] && | ||||
| 		    fmd_serd_active(hdl, zcd->zc_serd_checksum)) { | ||||
| 			cases++; | ||||
| 		} | ||||
| 		if (zcd->zc_serd_io[0] == zfs_case->zc_serd_io[0] && | ||||
| 		    fmd_serd_active(hdl, zcd->zc_serd_io)) { | ||||
| 			cases++; | ||||
| 		} | ||||
| 		if (zcd->zc_serd_slow_io[0] == zfs_case->zc_serd_slow_io[0] && | ||||
| 		    fmd_serd_active(hdl, zcd->zc_serd_slow_io)) { | ||||
| 			cases++; | ||||
| 		} | ||||
| 	} | ||||
| @ -502,6 +525,34 @@ zfs_ereport_when(fmd_hdl_t *hdl, nvlist_t *nvl, er_timeval_t *when) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Record the specified event in the SERD engine and return a | ||||
|  * boolean value indicating whether or not the engine fired as | ||||
|  * the result of inserting this event. | ||||
|  * | ||||
|  * When the pool has similar active cases on other vdevs, then | ||||
|  * the fired state is disregarded and the case is retired. | ||||
|  */ | ||||
| static int | ||||
| zfs_fm_serd_record(fmd_hdl_t *hdl, const char *name, fmd_event_t *ep, | ||||
|     zfs_case_t *zcp, const char *err_type) | ||||
| { | ||||
| 	int fired = fmd_serd_record(hdl, name, ep); | ||||
| 	int peers = 0; | ||||
| 
 | ||||
| 	if (fired && (peers = zfs_other_serd_cases(hdl, &zcp->zc_data)) > 0) { | ||||
| 		fmd_hdl_debug(hdl, "pool %llu is tracking %d other %s cases " | ||||
| 		    "-- skip faulting the vdev %llu", | ||||
| 		    (u_longlong_t)zcp->zc_data.zc_pool_guid, | ||||
| 		    peers, err_type, | ||||
| 		    (u_longlong_t)zcp->zc_data.zc_vdev_guid); | ||||
| 		zfs_case_retire(hdl, zcp); | ||||
| 		fired = 0; | ||||
| 	} | ||||
| 
 | ||||
| 	return (fired); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Main fmd entry point. | ||||
|  */ | ||||
| @ -510,7 +561,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) | ||||
| { | ||||
| 	zfs_case_t *zcp, *dcp; | ||||
| 	int32_t pool_state; | ||||
| 	uint64_t ena, pool_guid, vdev_guid; | ||||
| 	uint64_t ena, pool_guid, vdev_guid, parent_guid; | ||||
| 	uint64_t checksum_n, checksum_t; | ||||
| 	uint64_t io_n, io_t; | ||||
| 	er_timeval_t pool_load; | ||||
| @ -600,6 +651,9 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) | ||||
| 	if (nvlist_lookup_uint64(nvl, | ||||
| 	    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) | ||||
| 		vdev_guid = 0; | ||||
| 	if (nvlist_lookup_uint64(nvl, | ||||
| 	    FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, &parent_guid) != 0) | ||||
| 		parent_guid = 0; | ||||
| 	if (nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &ena) != 0) | ||||
| 		ena = 0; | ||||
| 
 | ||||
| @ -710,6 +764,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) | ||||
| 		data.zc_ena = ena; | ||||
| 		data.zc_pool_guid = pool_guid; | ||||
| 		data.zc_vdev_guid = vdev_guid; | ||||
| 		data.zc_parent_guid = parent_guid; | ||||
| 		data.zc_pool_state = (int)pool_state; | ||||
| 
 | ||||
| 		fmd_buf_write(hdl, cs, CASE_DATA, &data, sizeof (data)); | ||||
| @ -872,8 +927,10 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) | ||||
| 				    SEC2NSEC(io_t)); | ||||
| 				zfs_case_serialize(zcp); | ||||
| 			} | ||||
| 			if (fmd_serd_record(hdl, zcp->zc_data.zc_serd_io, ep)) | ||||
| 			if (zfs_fm_serd_record(hdl, zcp->zc_data.zc_serd_io, | ||||
| 			    ep, zcp, "io error")) { | ||||
| 				checkremove = B_TRUE; | ||||
| 			} | ||||
| 		} else if (fmd_nvl_class_match(hdl, nvl, | ||||
| 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY))) { | ||||
| 			uint64_t slow_io_n, slow_io_t; | ||||
| @ -899,25 +956,10 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) | ||||
| 			} | ||||
| 			/* Pass event to SERD engine and see if this triggers */ | ||||
| 			if (zcp->zc_data.zc_serd_slow_io[0] != '\0' && | ||||
| 			    fmd_serd_record(hdl, zcp->zc_data.zc_serd_slow_io, | ||||
| 			    ep)) { | ||||
| 				/*
 | ||||
| 				 * Ignore a slow io diagnosis when other | ||||
| 				 * VDEVs in the pool show signs of being slow. | ||||
| 				 */ | ||||
| 				if (zfs_other_slow_cases(hdl, &zcp->zc_data)) { | ||||
| 					zfs_case_retire(hdl, zcp); | ||||
| 					fmd_hdl_debug(hdl, "pool %llu has " | ||||
| 					    "multiple slow io cases -- skip " | ||||
| 					    "degrading vdev %llu", | ||||
| 					    (u_longlong_t) | ||||
| 					    zcp->zc_data.zc_pool_guid, | ||||
| 					    (u_longlong_t) | ||||
| 					    zcp->zc_data.zc_vdev_guid); | ||||
| 				} else { | ||||
| 					zfs_case_solve(hdl, zcp, | ||||
| 					    "fault.fs.zfs.vdev.slow_io"); | ||||
| 				} | ||||
| 			    zfs_fm_serd_record(hdl, | ||||
| 			    zcp->zc_data.zc_serd_slow_io, ep, zcp, "slow io")) { | ||||
| 				zfs_case_solve(hdl, zcp, | ||||
| 				    "fault.fs.zfs.vdev.slow_io"); | ||||
| 			} | ||||
| 		} else if (fmd_nvl_class_match(hdl, nvl, | ||||
| 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) { | ||||
| @ -968,8 +1010,9 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) | ||||
| 				    SEC2NSEC(checksum_t)); | ||||
| 				zfs_case_serialize(zcp); | ||||
| 			} | ||||
| 			if (fmd_serd_record(hdl, | ||||
| 			    zcp->zc_data.zc_serd_checksum, ep)) { | ||||
| 			if (zfs_fm_serd_record(hdl, | ||||
| 			    zcp->zc_data.zc_serd_checksum, ep, zcp, | ||||
| 			    "checksum")) { | ||||
| 				zfs_case_solve(hdl, zcp, | ||||
| 				    "fault.fs.zfs.vdev.checksum"); | ||||
| 			} | ||||
|  | ||||
| @ -109,7 +109,7 @@ tags = ['functional', 'direct'] | ||||
| [tests/functional/events:Linux] | ||||
| tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill', | ||||
|     'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config', | ||||
|     'zed_slow_io', 'zed_slow_io_many_vdevs'] | ||||
|     'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple'] | ||||
| tags = ['functional', 'events'] | ||||
| 
 | ||||
| [tests/functional/fadvise:Linux] | ||||
|  | ||||
| @ -1490,6 +1490,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ | ||||
| 	functional/events/setup.ksh \
 | ||||
| 	functional/events/zed_cksum_config.ksh \
 | ||||
| 	functional/events/zed_cksum_reported.ksh \
 | ||||
| 	functional/events/zed_diagnose_multiple.ksh \
 | ||||
| 	functional/events/zed_fd_spill.ksh \
 | ||||
| 	functional/events/zed_io_config.ksh \
 | ||||
| 	functional/events/zed_rc_filter.ksh \
 | ||||
|  | ||||
							
								
								
									
										168
									
								
								tests/zfs-tests/tests/functional/events/zed_diagnose_multiple.ksh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										168
									
								
								tests/zfs-tests/tests/functional/events/zed_diagnose_multiple.ksh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,168 @@ | ||||
| #!/bin/ksh -p | ||||
| # | ||||
| # CDDL HEADER START | ||||
| # | ||||
| # The contents of this file are subject to the terms of the | ||||
| # Common Development and Distribution License (the "License"). | ||||
| # You may not use this file except in compliance with the License. | ||||
| # | ||||
| # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | ||||
| # or https://opensource.org/licenses/CDDL-1.0. | ||||
| # See the License for the specific language governing permissions | ||||
| # and limitations under the License. | ||||
| # | ||||
| # When distributing Covered Code, include this CDDL HEADER in each | ||||
| # file and include the License file at usr/src/OPENSOLARIS.LICENSE. | ||||
| # If applicable, add the following below this CDDL HEADER, with the | ||||
| # fields enclosed by brackets "[]" replaced with your own identifying | ||||
| # information: Portions Copyright [yyyy] [name of copyright owner] | ||||
| # | ||||
| # CDDL HEADER END | ||||
| # | ||||
| 
 | ||||
| # | ||||
| # Copyright (c) 2024, Klara Inc. | ||||
| # | ||||
| 
 | ||||
| # DESCRIPTION: | ||||
| #	Verify that simultaneous io error events from multiple vdevs | ||||
| #	doesn't generate a fault | ||||
| # | ||||
| # STRATEGY: | ||||
| #	1. Create a pool with a 4 disk raidz vdev | ||||
| #	2. Inject io errors | ||||
| #	3. Verify that ZED detects the errors but doesn't fault any vdevs | ||||
| # | ||||
| 
 | ||||
| . $STF_SUITE/include/libtest.shlib | ||||
| 
 | ||||
| TESTDIR="$TEST_BASE_DIR/zed_error_multiple" | ||||
| VDEV1="$TEST_BASE_DIR/vdevfile1.$$" | ||||
| VDEV2="$TEST_BASE_DIR/vdevfile2.$$" | ||||
| VDEV3="$TEST_BASE_DIR/vdevfile3.$$" | ||||
| VDEV4="$TEST_BASE_DIR/vdevfile4.$$" | ||||
| VDEVS="$VDEV1 $VDEV2 $VDEV3 $VDEV4" | ||||
| TESTPOOL="zed_test_pool" | ||||
| FILEPATH="$TESTDIR/zed.testfile" | ||||
| 
 | ||||
| verify_runnable "both" | ||||
| 
 | ||||
| function cleanup | ||||
| { | ||||
| 	log_must zinject -c all | ||||
| 
 | ||||
| 	# if pool still exists then something failed so log additional info | ||||
| 	if poolexists $TESTPOOL ; then | ||||
| 		log_note "$(zpool status -s $TESTPOOL)" | ||||
| 		echo "=================== zed log search ===================" | ||||
| 		grep "Diagnosis Engine" $ZEDLET_DIR/zed.log | ||||
| 		destroy_pool $TESTPOOL | ||||
| 	fi | ||||
| 	log_must zed_stop | ||||
| 
 | ||||
| 	log_must rm -f $VDEVS | ||||
| } | ||||
| 
 | ||||
| function start_io_errors | ||||
| { | ||||
| 	for vdev in $VDEVS | ||||
| 	do | ||||
| 		log_must zpool set io_n=4 $TESTPOOL $vdev | ||||
| 		log_must zpool set io_t=60 $TESTPOOL $vdev | ||||
| 	done | ||||
| 	zpool sync | ||||
| 
 | ||||
| 	for vdev in $VDEVS | ||||
| 	do | ||||
| 		log_must zinject -d $vdev -e io $TESTPOOL | ||||
| 	done | ||||
| 	zpool sync | ||||
| } | ||||
| 
 | ||||
| function multiple_slow_vdevs_test | ||||
| { | ||||
| 	log_must truncate -s 1G $VDEVS | ||||
| 	default_raidz_setup_noexit $VDEVS | ||||
| 
 | ||||
| 	log_must zpool events -c | ||||
| 	log_must zfs set compression=off $TESTPOOL | ||||
| 	log_must zfs set primarycache=none $TESTPOOL | ||||
| 	log_must zfs set recordsize=4K $TESTPOOL | ||||
| 
 | ||||
| 	log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=4 | ||||
| 	zpool sync | ||||
| 
 | ||||
| 	# | ||||
| 	# Read the file with io errors injected on the disks | ||||
| 	# This will cause multiple errors on each disk to trip ZED SERD | ||||
| 	# | ||||
| 	#   pool: zed_test_pool | ||||
| 	#  state: ONLINE | ||||
| 	# status: One or more devices has experienced an unrecoverable error.  An | ||||
| 	#         attempt was made to correct the error.  Applications are unaffected. | ||||
| 	# action: Determine if the device needs to be replaced, and clear the errors | ||||
| 	#         using 'zpool clear' or replace the device with 'zpool replace'. | ||||
| 	#    see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-9P | ||||
| 	# config: | ||||
| 	# | ||||
| 	#         NAME                            STATE     READ WRITE CKSUM | ||||
| 	#         zed_test_pool                   ONLINE       0     0     0 | ||||
| 	#           raidz1-0                      ONLINE       0     0     0 | ||||
| 	#             /var/tmp/vdevfile1.1547063  ONLINE     532   561     0 | ||||
| 	#             /var/tmp/vdevfile2.1547063  ONLINE     547   594     0 | ||||
| 	#             /var/tmp/vdevfile3.1547063  ONLINE   1.05K 1.10K     0 | ||||
| 	#             /var/tmp/vdevfile4.1547063  ONLINE   1.05K 1.00K     0 | ||||
| 	# | ||||
| 
 | ||||
| 	start_io_errors | ||||
| 	dd if=$FILEPATH of=/dev/null bs=1M count=4 2>/dev/null | ||||
| 	log_must zinject -c all | ||||
| 
 | ||||
| 	# count io error events available for processing | ||||
| 	typeset -i i=0 | ||||
| 	typeset -i events=0 | ||||
| 	while [[ $i -lt 60 ]]; do | ||||
| 		events=$(zpool events | grep "ereport\.fs\.zfs.io" | wc -l) | ||||
| 		[[ $events -ge "50" ]] && break | ||||
| 		i=$((i+1)) | ||||
| 		sleep 1 | ||||
| 	done | ||||
| 	log_note "$events io error events found" | ||||
| 	if [[ $events -lt "50" ]]; then | ||||
| 		log_note "bailing: not enough events to complete the test" | ||||
| 		destroy_pool $TESTPOOL | ||||
| 		return | ||||
| 	fi | ||||
| 
 | ||||
| 	# | ||||
| 	# give slow ZED a chance to process the checkum events | ||||
| 	# | ||||
| 	typeset -i i=0 | ||||
| 	typeset -i skips=0 | ||||
| 	while [[ $i -lt 75 ]]; do | ||||
| 		skips=$(grep "retiring case" \ | ||||
| 			$ZEDLET_DIR/zed.log | wc -l) | ||||
| 		[[ $skips -gt "0" ]] && break | ||||
| 		i=$((i+1)) | ||||
| 		sleep 1 | ||||
| 	done | ||||
| 
 | ||||
| 	log_note $skips fault skips in ZED log after $i seconds | ||||
| 	[ $skips -gt "0" ] || log_fail "expecting to see skips" | ||||
| 
 | ||||
| 	fault=$(grep "zpool_vdev_fault" $ZEDLET_DIR/zed.log | wc -l) | ||||
| 	log_note $fault vdev fault in ZED log | ||||
| 	[ $fault -eq "0" ] || \ | ||||
| 		log_fail "expecting no fault events, found $fault" | ||||
| 
 | ||||
| 	destroy_pool $TESTPOOL | ||||
| } | ||||
| 
 | ||||
| log_assert "Test ZED io errors across multiple vdevs" | ||||
| log_onexit cleanup | ||||
| 
 | ||||
| log_must zed_events_drain | ||||
| log_must zed_start | ||||
| multiple_slow_vdevs_test | ||||
| 
 | ||||
| log_pass "Test ZED io errors across multiple vdevs" | ||||
| @ -25,10 +25,10 @@ | ||||
| # | ||||
| 
 | ||||
| # DESCRIPTION: | ||||
| #	Verify that delay events from multiple vdevs doesnt degrade | ||||
| #	Verify that delay events from multiple vdevs doesn't degrade | ||||
| # | ||||
| # STRATEGY: | ||||
| #	1. Create a pool with a 3 disk raidz vdev | ||||
| #	1. Create a pool with a 4 disk raidz vdev | ||||
| #	2. Inject slow io errors | ||||
| #	3. Verify that ZED detects slow I/Os but doesn't degrade any vdevs | ||||
| # | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Don Brady
						Don Brady