mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-10-31 22:53:11 +00:00 
			
		
		
		
	 c183d164aa
			
		
	
	
		c183d164aa
		
			
		
	
	
	
	
		
			
			This commit allow spa_load() to drop the spa_namespace_lock so that imports can happen concurrently. Prior to dropping the spa_namespace_lock, the import logic will set the spa_load_thread value to track the thread which is doing the import. Consumers of spa_lookup() retain the same behavior by blocking when either a thread is holding the spa_namespace_lock or the spa_load_thread value is set. This will ensure that critical concurrent operations cannot take place while a pool is being imported. The zpool command is also enhanced to provide multi-threaded support when invoking zpool import -a. Lastly, zinject provides a mechanism to insert artificial delays when importing a pool and new zfs tests are added to verify parallel import functionality. Contributions-by: Don Brady <don.brady@klarasystems.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: George Wilson <gwilson@delphix.com> Closes #16093
		
			
				
	
	
		
			1436 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1436 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * CDDL HEADER START
 | |
|  *
 | |
|  * The contents of this file are subject to the terms of the
 | |
|  * Common Development and Distribution License (the "License").
 | |
|  * You may not use this file except in compliance with the License.
 | |
|  *
 | |
|  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 | |
|  * or https://opensource.org/licenses/CDDL-1.0.
 | |
|  * See the License for the specific language governing permissions
 | |
|  * and limitations under the License.
 | |
|  *
 | |
|  * When distributing Covered Code, include this CDDL HEADER in each
 | |
|  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 | |
|  * If applicable, add the following below this CDDL HEADER, with the
 | |
|  * fields enclosed by brackets "[]" replaced with your own identifying
 | |
|  * information: Portions Copyright [yyyy] [name of copyright owner]
 | |
|  *
 | |
|  * CDDL HEADER END
 | |
|  */
 | |
| /*
 | |
|  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 | |
|  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
 | |
|  * Copyright (c) 2017, Intel Corporation.
 | |
|  * Copyright (c) 2023-2024, Klara Inc.
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * ZFS Fault Injector
 | |
|  *
 | |
|  * This userland component takes a set of options and uses libzpool to translate
 | |
|  * from a user-visible object type and name to an internal representation.
 | |
|  * There are two basic types of faults: device faults and data faults.
 | |
|  *
 | |
|  *
 | |
|  * DEVICE FAULTS
 | |
|  *
 | |
|  * Errors can be injected into a particular vdev using the '-d' option.  This
 | |
|  * option takes a path or vdev GUID to uniquely identify the device within a
 | |
|  * pool.  There are four types of errors that can be injected, IO, ENXIO,
 | |
|  * ECHILD, and EILSEQ.  These can be controlled through the '-e' option and the
 | |
|  * default is ENXIO.  For EIO failures, any attempt to read data from the device
 | |
|  * will return EIO, but a subsequent attempt to reopen the device will succeed.
 | |
|  * For ENXIO failures, any attempt to read from the device will return EIO, but
 | |
|  * any attempt to reopen the device will also return ENXIO.  The EILSEQ failures
 | |
|  * only apply to read operations (-T read) and will flip a bit after the device
 | |
|  * has read the original data.
 | |
|  *
 | |
|  * For label faults, the -L option must be specified. This allows faults
 | |
|  * to be injected into either the nvlist, uberblock, pad1, or pad2 region
 | |
|  * of all the labels for the specified device.
 | |
|  *
 | |
|  * This form of the command looks like:
 | |
|  *
 | |
|  * 	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
 | |
|  *
 | |
|  *
 | |
|  * DATA FAULTS
 | |
|  *
 | |
|  * We begin with a tuple of the form:
 | |
|  *
 | |
|  * 	<type,level,range,object>
 | |
|  *
 | |
|  * 	type	A string describing the type of data to target.  Each type
 | |
|  * 		implicitly describes how to interpret 'object'. Currently,
 | |
|  * 		the following values are supported:
 | |
|  *
 | |
|  * 		data		User data for a file
 | |
|  * 		dnode		Dnode for a file or directory
 | |
|  *
 | |
|  *		The following MOS objects are special.  Instead of injecting
 | |
|  *		errors on a particular object or blkid, we inject errors across
 | |
|  *		all objects of the given type.
 | |
|  *
 | |
|  * 		mos		Any data in the MOS
 | |
|  * 		mosdir		object directory
 | |
|  * 		config		pool configuration
 | |
|  * 		bpobj		blkptr list
 | |
|  * 		spacemap	spacemap
 | |
|  * 		metaslab	metaslab
 | |
|  * 		errlog		persistent error log
 | |
|  *
 | |
|  * 	level	Object level.  Defaults to '0', not applicable to all types.  If
 | |
|  * 		a range is given, this corresponds to the indirect block
 | |
|  * 		corresponding to the specific range.
 | |
|  *
 | |
|  *	range	A numerical range [start,end) within the object.  Defaults to
 | |
|  *		the full size of the file.
 | |
|  *
 | |
|  * 	object	A string describing the logical location of the object.  For
 | |
|  * 		files and directories (currently the only supported types),
 | |
|  * 		this is the path of the object on disk.
 | |
|  *
 | |
|  * This is translated, via libzpool, into the following internal representation:
 | |
|  *
 | |
|  * 	<type,objset,object,level,range>
 | |
|  *
 | |
|  * These types should be self-explanatory.  This tuple is then passed to the
 | |
|  * kernel via a special ioctl() to initiate fault injection for the given
 | |
|  * object.  Note that 'type' is not strictly necessary for fault injection, but
 | |
|  * is used when translating existing faults into a human-readable string.
 | |
|  *
 | |
|  *
 | |
|  * The command itself takes one of the forms:
 | |
|  *
 | |
|  * 	zinject
 | |
|  * 	zinject <-a | -u pool>
 | |
|  * 	zinject -c <id|all>
 | |
|  * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
 | |
|  *	    [-r range] <object>
 | |
|  * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
 | |
|  *
 | |
|  * With no arguments, the command prints all currently registered injection
 | |
|  * handlers, with their numeric identifiers.
 | |
|  *
 | |
|  * The '-c' option will clear the given handler, or all handlers if 'all' is
 | |
|  * specified.
 | |
|  *
 | |
|  * The '-e' option takes a string describing the errno to simulate.  This must
 | |
|  * be one of 'io', 'checksum', 'decompress', or 'decrypt'.  In most cases this
 | |
|  * will result in the same behavior, but RAID-Z will produce a different set of
 | |
|  * ereports for this situation.
 | |
|  *
 | |
|  * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
 | |
|  * specified, then the ARC cache is flushed appropriately.  If '-u' is
 | |
|  * specified, then the underlying SPA is unloaded.  Either of these flags can be
 | |
|  * specified independently of any other handlers.  The '-m' flag automatically
 | |
|  * does an unmount and remount of the underlying dataset to aid in flushing the
 | |
|  * cache.
 | |
|  *
 | |
|  * The '-f' flag controls the frequency of errors injected, expressed as a
 | |
|  * real number percentage between 0.0001 and 100.  The default is 100.
 | |
|  *
 | |
|  * The this form is responsible for actually injecting the handler into the
 | |
|  * framework.  It takes the arguments described above, translates them to the
 | |
|  * internal tuple using libzpool, and then issues an ioctl() to register the
 | |
|  * handler.
 | |
|  *
 | |
|  * The final form can target a specific bookmark, regardless of whether a
 | |
|  * human-readable interface has been designed.  It allows developers to specify
 | |
|  * a particular block by number.
 | |
|  */
 | |
| 
 | |
| #include <errno.h>
 | |
| #include <fcntl.h>
 | |
| #include <stdio.h>
 | |
| #include <stdlib.h>
 | |
| #include <string.h>
 | |
| #include <strings.h>
 | |
| #include <unistd.h>
 | |
| 
 | |
| #include <sys/fs/zfs.h>
 | |
| #include <sys/mount.h>
 | |
| 
 | |
| #include <libzfs.h>
 | |
| 
 | |
| #undef verify	/* both libzfs.h and zfs_context.h want to define this */
 | |
| 
 | |
| #include "zinject.h"
 | |
| 
 | |
| libzfs_handle_t *g_zfs;
 | |
| int zfs_fd;
 | |
| 
 | |
| static const char *const errtable[TYPE_INVAL] = {
 | |
| 	"data",
 | |
| 	"dnode",
 | |
| 	"mos",
 | |
| 	"mosdir",
 | |
| 	"metaslab",
 | |
| 	"config",
 | |
| 	"bpobj",
 | |
| 	"spacemap",
 | |
| 	"errlog",
 | |
| 	"uber",
 | |
| 	"nvlist",
 | |
| 	"pad1",
 | |
| 	"pad2"
 | |
| };
 | |
| 
 | |
| static err_type_t
 | |
| name_to_type(const char *arg)
 | |
| {
 | |
| 	int i;
 | |
| 	for (i = 0; i < TYPE_INVAL; i++)
 | |
| 		if (strcmp(errtable[i], arg) == 0)
 | |
| 			return (i);
 | |
| 
 | |
| 	return (TYPE_INVAL);
 | |
| }
 | |
| 
 | |
| static const char *
 | |
| type_to_name(uint64_t type)
 | |
| {
 | |
| 	switch (type) {
 | |
| 	case DMU_OT_OBJECT_DIRECTORY:
 | |
| 		return ("mosdir");
 | |
| 	case DMU_OT_OBJECT_ARRAY:
 | |
| 		return ("metaslab");
 | |
| 	case DMU_OT_PACKED_NVLIST:
 | |
| 		return ("config");
 | |
| 	case DMU_OT_BPOBJ:
 | |
| 		return ("bpobj");
 | |
| 	case DMU_OT_SPACE_MAP:
 | |
| 		return ("spacemap");
 | |
| 	case DMU_OT_ERROR_LOG:
 | |
| 		return ("errlog");
 | |
| 	default:
 | |
| 		return ("-");
 | |
| 	}
 | |
| }
 | |
| 
 | |
| struct errstr {
 | |
| 	int		err;
 | |
| 	const char	*str;
 | |
| };
 | |
| static const struct errstr errstrtable[] = {
 | |
| 	{ EIO,		"io" },
 | |
| 	{ ECKSUM,	"checksum" },
 | |
| 	{ EINVAL,	"decompress" },
 | |
| 	{ EACCES,	"decrypt" },
 | |
| 	{ ENXIO,	"nxio" },
 | |
| 	{ ECHILD,	"dtl" },
 | |
| 	{ EILSEQ,	"corrupt" },
 | |
| 	{ ENOSYS,	"noop" },
 | |
| 	{ 0, NULL },
 | |
| };
 | |
| 
 | |
| static int
 | |
| str_to_err(const char *str)
 | |
| {
 | |
| 	for (int i = 0; errstrtable[i].str != NULL; i++)
 | |
| 		if (strcasecmp(errstrtable[i].str, str) == 0)
 | |
| 			return (errstrtable[i].err);
 | |
| 	return (-1);
 | |
| }
 | |
| static const char *
 | |
| err_to_str(int err)
 | |
| {
 | |
| 	for (int i = 0; errstrtable[i].str != NULL; i++)
 | |
| 		if (errstrtable[i].err == err)
 | |
| 			return (errstrtable[i].str);
 | |
| 	return ("[unknown]");
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Print usage message.
 | |
|  */
 | |
| void
 | |
| usage(void)
 | |
| {
 | |
| 	(void) printf(
 | |
| 	    "usage:\n"
 | |
| 	    "\n"
 | |
| 	    "\tzinject\n"
 | |
| 	    "\n"
 | |
| 	    "\t\tList all active injection records.\n"
 | |
| 	    "\n"
 | |
| 	    "\tzinject -c <id|all>\n"
 | |
| 	    "\n"
 | |
| 	    "\t\tClear the particular record (if given a numeric ID), or\n"
 | |
| 	    "\t\tall records if 'all' is specified.\n"
 | |
| 	    "\n"
 | |
| 	    "\tzinject -p <function name> pool\n"
 | |
| 	    "\t\tInject a panic fault at the specified function. Only \n"
 | |
| 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
 | |
| 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
 | |
| 	    "\n"
 | |
| 	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
 | |
| 	    "\t\t[-T <read|write|free|claim|flush|all>] [-f frequency] pool\n\n"
 | |
| 	    "\t\tInject a fault into a particular device or the device's\n"
 | |
| 	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
 | |
| 	    "\t\t'pad1', or 'pad2'.\n"
 | |
| 	    "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl',\n"
 | |
| 	    "\t\t'corrupt' (bit flip), or 'noop' (successfully do nothing).\n"
 | |
| 	    "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
 | |
| 	    "\t\tdevice error injection to a percentage of the IOs.\n"
 | |
| 	    "\n"
 | |
| 	    "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
 | |
| 	    "\t\tPerform a specific action on a particular device.\n"
 | |
| 	    "\n"
 | |
| 	    "\tzinject -d device -D latency:lanes pool\n"
 | |
| 	    "\n"
 | |
| 	    "\t\tAdd an artificial delay to IO requests on a particular\n"
 | |
| 	    "\t\tdevice, such that the requests take a minimum of 'latency'\n"
 | |
| 	    "\t\tmilliseconds to complete. Each delay has an associated\n"
 | |
| 	    "\t\tnumber of 'lanes' which defines the number of concurrent\n"
 | |
| 	    "\t\tIO requests that can be processed.\n"
 | |
| 	    "\n"
 | |
| 	    "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
 | |
| 	    "\t\tthe device will only be able to service a single IO request\n"
 | |
| 	    "\t\tat a time with each request taking 10 ms to complete. So,\n"
 | |
| 	    "\t\tif only a single request is submitted every 10 ms, the\n"
 | |
| 	    "\t\taverage latency will be 10 ms; but if more than one request\n"
 | |
| 	    "\t\tis submitted every 10 ms, the average latency will be more\n"
 | |
| 	    "\t\tthan 10 ms.\n"
 | |
| 	    "\n"
 | |
| 	    "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
 | |
| 	    "\t\tlanes (-D 10:2), then the device will be able to service\n"
 | |
| 	    "\t\ttwo requests at a time, each with a minimum latency of\n"
 | |
| 	    "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
 | |
| 	    "\t\tthe average latency will be 10 ms; but if more than two\n"
 | |
| 	    "\t\trequests are submitted every 10 ms, the average latency\n"
 | |
| 	    "\t\twill be more than 10 ms.\n"
 | |
| 	    "\n"
 | |
| 	    "\t\tAlso note, these delays are additive. So two invocations\n"
 | |
| 	    "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
 | |
| 	    "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
 | |
| 	    "\t\tlanes with differing target latencies. For example, an\n"
 | |
| 	    "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
 | |
| 	    "\t\tcreate 3 lanes on the device; one lane with a latency\n"
 | |
| 	    "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
 | |
| 	    "\n"
 | |
| 	    "\tzinject -P import|export -s <seconds> pool\n"
 | |
| 	    "\t\tAdd an artificial delay to a future pool import or export,\n"
 | |
| 	    "\t\tsuch that the operation takes a minimum of supplied seconds\n"
 | |
| 	    "\t\tto complete.\n"
 | |
| 	    "\n"
 | |
| 	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
 | |
| 	    "\t\tCause the pool to stop writing blocks yet not\n"
 | |
| 	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
 | |
| 	    "\t\tthat fails to honor cache flush requests.\n"
 | |
| 	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
 | |
| 	    "\t\tat the end of the duration.\n"
 | |
| 	    "\n"
 | |
| 	    "\tzinject -b objset:object:level:blkid pool\n"
 | |
| 	    "\n"
 | |
| 	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
 | |
| 	    "\t\tspecified by the remaining tuple.  Each number is in\n"
 | |
| 	    "\t\thexadecimal, and only one block can be specified.\n"
 | |
| 	    "\n"
 | |
| 	    "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n"
 | |
| 	    "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n"
 | |
| 	    "\n"
 | |
| 	    "\t\tInject an error into the object specified by the '-t' option\n"
 | |
| 	    "\t\tand the object descriptor.  The 'object' parameter is\n"
 | |
| 	    "\t\tinterpreted depending on the '-t' option.\n"
 | |
| 	    "\n"
 | |
| 	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
 | |
| 	    "\t\t-e\tInject a specific error.  Must be one of 'io',\n"
 | |
| 	    "\t\t\t'checksum', 'decompress', or 'decrypt'.  Default is 'io'.\n"
 | |
| 	    "\t\t-C\tInject the given error only into specific DVAs. The\n"
 | |
| 	    "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n"
 | |
| 	    "\t\t\tseparated by commas (ex. '0,2').\n"
 | |
| 	    "\t\t-l\tInject error at a particular block level. Default is "
 | |
| 	    "0.\n"
 | |
| 	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
 | |
| 	    "\t\t-r\tInject error over a particular logical range of an\n"
 | |
| 	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
 | |
| 	    "\t\t\trange according to the object's properties.\n"
 | |
| 	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
 | |
| 	    "\t\t\tassociated object.\n"
 | |
| 	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
 | |
| 	    "\t\t\ta pool object.\n"
 | |
| 	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
 | |
| 	    "\t\t\ta percentage between 0.0001 and 100.\n"
 | |
| 	    "\n"
 | |
| 	    "\t-t data\t\tInject an error into the plain file contents of a\n"
 | |
| 	    "\t\t\tfile.  The object must be specified as a complete path\n"
 | |
| 	    "\t\t\tto a file on a ZFS filesystem.\n"
 | |
| 	    "\n"
 | |
| 	    "\t-t dnode\tInject an error into the metadnode in the block\n"
 | |
| 	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
 | |
| 	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
 | |
| 	    "\t\t\tis specified as a complete path to a file or directory\n"
 | |
| 	    "\t\t\ton a ZFS filesystem.\n"
 | |
| 	    "\n"
 | |
| 	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
 | |
| 	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bpobj,\n"
 | |
| 	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
 | |
| 	    "\t\t\tthe poolname.\n");
 | |
| }
 | |
| 
 | |
| static int
 | |
| iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
 | |
|     void *data)
 | |
| {
 | |
| 	zfs_cmd_t zc = {"\0"};
 | |
| 	int ret;
 | |
| 
 | |
| 	while (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
 | |
| 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
 | |
| 		    &zc.zc_inject_record, data)) != 0)
 | |
| 			return (ret);
 | |
| 
 | |
| 	if (errno != ENOENT) {
 | |
| 		(void) fprintf(stderr, "Unable to list handlers: %s\n",
 | |
| 		    strerror(errno));
 | |
| 		return (-1);
 | |
| 	}
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| static int
 | |
| print_data_handler(int id, const char *pool, zinject_record_t *record,
 | |
|     void *data)
 | |
| {
 | |
| 	int *count = data;
 | |
| 
 | |
| 	if (record->zi_guid != 0 || record->zi_func[0] != '\0' ||
 | |
| 	    record->zi_duration != 0) {
 | |
| 		return (0);
 | |
| 	}
 | |
| 
 | |
| 	if (*count == 0) {
 | |
| 		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-4s  "
 | |
| 		    "%-15s\n", "ID", "POOL", "OBJSET", "OBJECT", "TYPE",
 | |
| 		    "LVL", "DVAs", "RANGE");
 | |
| 		(void) printf("---  ---------------  ------  "
 | |
| 		    "------  --------  ---  ----  ---------------\n");
 | |
| 	}
 | |
| 
 | |
| 	*count += 1;
 | |
| 
 | |
| 	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %-3d  0x%02x  ",
 | |
| 	    id, pool, (u_longlong_t)record->zi_objset,
 | |
| 	    (u_longlong_t)record->zi_object, type_to_name(record->zi_type),
 | |
| 	    record->zi_level, record->zi_dvas);
 | |
| 
 | |
| 
 | |
| 	if (record->zi_start == 0 &&
 | |
| 	    record->zi_end == -1ULL)
 | |
| 		(void) printf("all\n");
 | |
| 	else
 | |
| 		(void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
 | |
| 		    (u_longlong_t)record->zi_end);
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| static int
 | |
| print_device_handler(int id, const char *pool, zinject_record_t *record,
 | |
|     void *data)
 | |
| {
 | |
| 	static const char *iotypestr[] = {
 | |
| 	    "null", "read", "write", "free", "claim", "flush", "trim", "all",
 | |
| 	};
 | |
| 
 | |
| 	int *count = data;
 | |
| 
 | |
| 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
 | |
| 		return (0);
 | |
| 
 | |
| 	if (record->zi_cmd == ZINJECT_DELAY_IO)
 | |
| 		return (0);
 | |
| 
 | |
| 	if (*count == 0) {
 | |
| 		(void) printf("%3s  %-15s  %-16s  %-5s  %-10s  %-9s\n",
 | |
| 		    "ID", "POOL", "GUID", "TYPE", "ERROR", "FREQ");
 | |
| 		(void) printf(
 | |
| 		    "---  ---------------  ----------------  "
 | |
| 		    "-----  ----------  ---------\n");
 | |
| 	}
 | |
| 
 | |
| 	*count += 1;
 | |
| 
 | |
| 	double freq = record->zi_freq == 0 ? 100.0f :
 | |
| 	    (((double)record->zi_freq) / ZI_PERCENTAGE_MAX) * 100.0f;
 | |
| 
 | |
| 	(void) printf("%3d  %-15s  %llx  %-5s  %-10s  %8.4f%%\n", id, pool,
 | |
| 	    (u_longlong_t)record->zi_guid, iotypestr[record->zi_iotype],
 | |
| 	    err_to_str(record->zi_error), freq);
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| static int
 | |
| print_delay_handler(int id, const char *pool, zinject_record_t *record,
 | |
|     void *data)
 | |
| {
 | |
| 	int *count = data;
 | |
| 
 | |
| 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
 | |
| 		return (0);
 | |
| 
 | |
| 	if (record->zi_cmd != ZINJECT_DELAY_IO)
 | |
| 		return (0);
 | |
| 
 | |
| 	if (*count == 0) {
 | |
| 		(void) printf("%3s  %-15s  %-15s  %-15s  %s\n",
 | |
| 		    "ID", "POOL", "DELAY (ms)", "LANES", "GUID");
 | |
| 		(void) printf("---  ---------------  ---------------  "
 | |
| 		    "---------------  ----------------\n");
 | |
| 	}
 | |
| 
 | |
| 	*count += 1;
 | |
| 
 | |
| 	(void) printf("%3d  %-15s  %-15llu  %-15llu  %llx\n", id, pool,
 | |
| 	    (u_longlong_t)NSEC2MSEC(record->zi_timer),
 | |
| 	    (u_longlong_t)record->zi_nlanes,
 | |
| 	    (u_longlong_t)record->zi_guid);
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| static int
 | |
| print_panic_handler(int id, const char *pool, zinject_record_t *record,
 | |
|     void *data)
 | |
| {
 | |
| 	int *count = data;
 | |
| 
 | |
| 	if (record->zi_func[0] == '\0')
 | |
| 		return (0);
 | |
| 
 | |
| 	if (*count == 0) {
 | |
| 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
 | |
| 		(void) printf("---  ---------------  ----------------\n");
 | |
| 	}
 | |
| 
 | |
| 	*count += 1;
 | |
| 
 | |
| 	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| static int
 | |
| print_pool_delay_handler(int id, const char *pool, zinject_record_t *record,
 | |
|     void *data)
 | |
| {
 | |
| 	int *count = data;
 | |
| 
 | |
| 	if (record->zi_cmd != ZINJECT_DELAY_IMPORT &&
 | |
| 	    record->zi_cmd != ZINJECT_DELAY_EXPORT) {
 | |
| 		return (0);
 | |
| 	}
 | |
| 
 | |
| 	if (*count == 0) {
 | |
| 		(void) printf("%3s  %-19s  %-11s  %s\n",
 | |
| 		    "ID", "POOL", "DELAY (sec)", "COMMAND");
 | |
| 		(void) printf("---  -------------------  -----------"
 | |
| 		    "  -------\n");
 | |
| 	}
 | |
| 
 | |
| 	*count += 1;
 | |
| 
 | |
| 	(void) printf("%3d  %-19s  %-11llu  %s\n",
 | |
| 	    id, pool, (u_longlong_t)record->zi_duration,
 | |
| 	    record->zi_cmd == ZINJECT_DELAY_IMPORT ? "import": "export");
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Print all registered error handlers.  Returns the number of handlers
 | |
|  * registered.
 | |
|  */
 | |
| static int
 | |
| print_all_handlers(void)
 | |
| {
 | |
| 	int count = 0, total = 0;
 | |
| 
 | |
| 	(void) iter_handlers(print_device_handler, &count);
 | |
| 	if (count > 0) {
 | |
| 		total += count;
 | |
| 		(void) printf("\n");
 | |
| 		count = 0;
 | |
| 	}
 | |
| 
 | |
| 	(void) iter_handlers(print_delay_handler, &count);
 | |
| 	if (count > 0) {
 | |
| 		total += count;
 | |
| 		(void) printf("\n");
 | |
| 		count = 0;
 | |
| 	}
 | |
| 
 | |
| 	(void) iter_handlers(print_data_handler, &count);
 | |
| 	if (count > 0) {
 | |
| 		total += count;
 | |
| 		(void) printf("\n");
 | |
| 		count = 0;
 | |
| 	}
 | |
| 
 | |
| 	(void) iter_handlers(print_pool_delay_handler, &count);
 | |
| 	if (count > 0) {
 | |
| 		total += count;
 | |
| 		(void) printf("\n");
 | |
| 		count = 0;
 | |
| 	}
 | |
| 
 | |
| 	(void) iter_handlers(print_panic_handler, &count);
 | |
| 
 | |
| 	return (count + total);
 | |
| }
 | |
| 
 | |
| static int
 | |
| cancel_one_handler(int id, const char *pool, zinject_record_t *record,
 | |
|     void *data)
 | |
| {
 | |
| 	(void) pool, (void) record, (void) data;
 | |
| 	zfs_cmd_t zc = {"\0"};
 | |
| 
 | |
| 	zc.zc_guid = (uint64_t)id;
 | |
| 
 | |
| 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
 | |
| 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
 | |
| 		    id, strerror(errno));
 | |
| 		return (1);
 | |
| 	}
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Remove all fault injection handlers.
 | |
|  */
 | |
| static int
 | |
| cancel_all_handlers(void)
 | |
| {
 | |
| 	int ret = iter_handlers(cancel_one_handler, NULL);
 | |
| 
 | |
| 	if (ret == 0)
 | |
| 		(void) printf("removed all registered handlers\n");
 | |
| 
 | |
| 	return (ret);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Remove a specific fault injection handler.
 | |
|  */
 | |
| static int
 | |
| cancel_handler(int id)
 | |
| {
 | |
| 	zfs_cmd_t zc = {"\0"};
 | |
| 
 | |
| 	zc.zc_guid = (uint64_t)id;
 | |
| 
 | |
| 	if (zfs_ioctl(g_zfs, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
 | |
| 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
 | |
| 		    id, strerror(errno));
 | |
| 		return (1);
 | |
| 	}
 | |
| 
 | |
| 	(void) printf("removed handler %d\n", id);
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Register a new fault injection handler.
 | |
|  */
 | |
| static int
 | |
| register_handler(const char *pool, int flags, zinject_record_t *record,
 | |
|     int quiet)
 | |
| {
 | |
| 	zfs_cmd_t zc = {"\0"};
 | |
| 
 | |
| 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
 | |
| 	zc.zc_inject_record = *record;
 | |
| 	zc.zc_guid = flags;
 | |
| 
 | |
| 	if (zfs_ioctl(g_zfs, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
 | |
| 		const char *errmsg = strerror(errno);
 | |
| 
 | |
| 		switch (errno) {
 | |
| 		case EDOM:
 | |
| 			errmsg = "block level exceeds max level of object";
 | |
| 			break;
 | |
| 		case EEXIST:
 | |
| 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
 | |
| 				errmsg = "pool already imported";
 | |
| 			if (record->zi_cmd == ZINJECT_DELAY_EXPORT)
 | |
| 				errmsg = "a handler already exists";
 | |
| 			break;
 | |
| 		case ENOENT:
 | |
| 			/* import delay injector running on older zfs module */
 | |
| 			if (record->zi_cmd == ZINJECT_DELAY_IMPORT)
 | |
| 				errmsg = "import delay injector not supported";
 | |
| 			break;
 | |
| 		default:
 | |
| 			break;
 | |
| 		}
 | |
| 		(void) fprintf(stderr, "failed to add handler: %s\n", errmsg);
 | |
| 		return (1);
 | |
| 	}
 | |
| 
 | |
| 	if (flags & ZINJECT_NULL)
 | |
| 		return (0);
 | |
| 
 | |
| 	if (quiet) {
 | |
| 		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
 | |
| 	} else {
 | |
| 		(void) printf("Added handler %llu with the following "
 | |
| 		    "properties:\n", (u_longlong_t)zc.zc_guid);
 | |
| 		(void) printf("  pool: %s\n", pool);
 | |
| 		if (record->zi_guid) {
 | |
| 			(void) printf("  vdev: %llx\n",
 | |
| 			    (u_longlong_t)record->zi_guid);
 | |
| 		} else if (record->zi_func[0] != '\0') {
 | |
| 			(void) printf("  panic function: %s\n",
 | |
| 			    record->zi_func);
 | |
| 		} else if (record->zi_duration > 0) {
 | |
| 			(void) printf(" time: %lld seconds\n",
 | |
| 			    (u_longlong_t)record->zi_duration);
 | |
| 		} else if (record->zi_duration < 0) {
 | |
| 			(void) printf(" txgs: %lld \n",
 | |
| 			    (u_longlong_t)-record->zi_duration);
 | |
| 		} else if (record->zi_timer > 0) {
 | |
| 			(void) printf(" timer: %lld ms\n",
 | |
| 			    (u_longlong_t)NSEC2MSEC(record->zi_timer));
 | |
| 		} else {
 | |
| 			(void) printf("objset: %llu\n",
 | |
| 			    (u_longlong_t)record->zi_objset);
 | |
| 			(void) printf("object: %llu\n",
 | |
| 			    (u_longlong_t)record->zi_object);
 | |
| 			(void) printf("  type: %llu\n",
 | |
| 			    (u_longlong_t)record->zi_type);
 | |
| 			(void) printf(" level: %d\n", record->zi_level);
 | |
| 			if (record->zi_start == 0 &&
 | |
| 			    record->zi_end == -1ULL)
 | |
| 				(void) printf(" range: all\n");
 | |
| 			else
 | |
| 				(void) printf(" range: [%llu, %llu)\n",
 | |
| 				    (u_longlong_t)record->zi_start,
 | |
| 				    (u_longlong_t)record->zi_end);
 | |
| 			(void) printf("  dvas: 0x%x\n", record->zi_dvas);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| static int
 | |
| perform_action(const char *pool, zinject_record_t *record, int cmd)
 | |
| {
 | |
| 	zfs_cmd_t zc = {"\0"};
 | |
| 
 | |
| 	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
 | |
| 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
 | |
| 	zc.zc_guid = record->zi_guid;
 | |
| 	zc.zc_cookie = cmd;
 | |
| 
 | |
| 	if (zfs_ioctl(g_zfs, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 | |
| 		return (0);
 | |
| 
 | |
| 	return (1);
 | |
| }
 | |
| 
 | |
| static int
 | |
| parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
 | |
| {
 | |
| 	unsigned long scan_delay;
 | |
| 	unsigned long scan_nlanes;
 | |
| 
 | |
| 	if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
 | |
| 		return (1);
 | |
| 
 | |
| 	/*
 | |
| 	 * We explicitly disallow a delay of zero here, because we key
 | |
| 	 * off this value being non-zero in translate_device(), to
 | |
| 	 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
 | |
| 	 */
 | |
| 	if (scan_delay == 0)
 | |
| 		return (1);
 | |
| 
 | |
| 	/*
 | |
| 	 * The units for the CLI delay parameter is milliseconds, but
 | |
| 	 * the data passed to the kernel is interpreted as nanoseconds.
 | |
| 	 * Thus we scale the milliseconds to nanoseconds here, and this
 | |
| 	 * nanosecond value is used to pass the delay to the kernel.
 | |
| 	 */
 | |
| 	*delay = MSEC2NSEC(scan_delay);
 | |
| 	*nlanes = scan_nlanes;
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| static int
 | |
| parse_frequency(const char *str, uint32_t *percent)
 | |
| {
 | |
| 	double val;
 | |
| 	char *post;
 | |
| 
 | |
| 	val = strtod(str, &post);
 | |
| 	if (post == NULL || *post != '\0')
 | |
| 		return (EINVAL);
 | |
| 
 | |
| 	/* valid range is [0.0001, 100.0] */
 | |
| 	val /= 100.0f;
 | |
| 	if (val < 0.000001f || val > 1.0f)
 | |
| 		return (ERANGE);
 | |
| 
 | |
| 	/* convert to an integer for use by kernel */
 | |
| 	*percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This function converts a string specifier for DVAs into a bit mask.
 | |
|  * The dva's provided by the user should be 0 indexed and separated by
 | |
|  * a comma. For example:
 | |
|  *	"1"	-> 0b0010  (0x2)
 | |
|  *	"0,1"	-> 0b0011  (0x3)
 | |
|  *	"0,1,2"	-> 0b0111  (0x7)
 | |
|  */
 | |
| static int
 | |
| parse_dvas(const char *str, uint32_t *dvas_out)
 | |
| {
 | |
| 	const char *c = str;
 | |
| 	uint32_t mask = 0;
 | |
| 	boolean_t need_delim = B_FALSE;
 | |
| 
 | |
| 	/* max string length is 5 ("0,1,2") */
 | |
| 	if (strlen(str) > 5 || strlen(str) == 0)
 | |
| 		return (EINVAL);
 | |
| 
 | |
| 	while (*c != '\0') {
 | |
| 		switch (*c) {
 | |
| 		case '0':
 | |
| 		case '1':
 | |
| 		case '2':
 | |
| 			/* check for pipe between DVAs */
 | |
| 			if (need_delim)
 | |
| 				return (EINVAL);
 | |
| 
 | |
| 			/* check if this DVA has been set already */
 | |
| 			if (mask & (1 << ((*c) - '0')))
 | |
| 				return (EINVAL);
 | |
| 
 | |
| 			mask |= (1 << ((*c) - '0'));
 | |
| 			need_delim = B_TRUE;
 | |
| 			break;
 | |
| 		case ',':
 | |
| 			need_delim = B_FALSE;
 | |
| 			break;
 | |
| 		default:
 | |
| 			/* check for invalid character */
 | |
| 			return (EINVAL);
 | |
| 		}
 | |
| 		c++;
 | |
| 	}
 | |
| 
 | |
| 	/* check for dangling delimiter */
 | |
| 	if (!need_delim)
 | |
| 		return (EINVAL);
 | |
| 
 | |
| 	*dvas_out = mask;
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| int
 | |
| main(int argc, char **argv)
 | |
| {
 | |
| 	int c;
 | |
| 	char *range = NULL;
 | |
| 	char *cancel = NULL;
 | |
| 	char *end;
 | |
| 	char *raw = NULL;
 | |
| 	char *device = NULL;
 | |
| 	int level = 0;
 | |
| 	int quiet = 0;
 | |
| 	int error = 0;
 | |
| 	int domount = 0;
 | |
| 	int io_type = ZIO_TYPES;
 | |
| 	int action = VDEV_STATE_UNKNOWN;
 | |
| 	err_type_t type = TYPE_INVAL;
 | |
| 	err_type_t label = TYPE_INVAL;
 | |
| 	zinject_record_t record = { 0 };
 | |
| 	char pool[MAXNAMELEN] = "";
 | |
| 	char dataset[MAXNAMELEN] = "";
 | |
| 	zfs_handle_t *zhp = NULL;
 | |
| 	int nowrites = 0;
 | |
| 	int dur_txg = 0;
 | |
| 	int dur_secs = 0;
 | |
| 	int ret;
 | |
| 	int flags = 0;
 | |
| 	uint32_t dvas = 0;
 | |
| 
 | |
| 	if ((g_zfs = libzfs_init()) == NULL) {
 | |
| 		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
 | |
| 		return (1);
 | |
| 	}
 | |
| 
 | |
| 	libzfs_print_on_error(g_zfs, B_TRUE);
 | |
| 
 | |
| 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
 | |
| 		(void) fprintf(stderr, "failed to open ZFS device\n");
 | |
| 		libzfs_fini(g_zfs);
 | |
| 		return (1);
 | |
| 	}
 | |
| 
 | |
| 	if (argc == 1) {
 | |
| 		/*
 | |
| 		 * No arguments.  Print the available handlers.  If there are no
 | |
| 		 * available handlers, direct the user to '-h' for help
 | |
| 		 * information.
 | |
| 		 */
 | |
| 		if (print_all_handlers() == 0) {
 | |
| 			(void) printf("No handlers registered.\n");
 | |
| 			(void) printf("Run 'zinject -h' for usage "
 | |
| 			    "information.\n");
 | |
| 		}
 | |
| 		libzfs_fini(g_zfs);
 | |
| 		return (0);
 | |
| 	}
 | |
| 
 | |
| 	while ((c = getopt(argc, argv,
 | |
| 	    ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) {
 | |
| 		switch (c) {
 | |
| 		case 'a':
 | |
| 			flags |= ZINJECT_FLUSH_ARC;
 | |
| 			break;
 | |
| 		case 'A':
 | |
| 			if (strcasecmp(optarg, "degrade") == 0) {
 | |
| 				action = VDEV_STATE_DEGRADED;
 | |
| 			} else if (strcasecmp(optarg, "fault") == 0) {
 | |
| 				action = VDEV_STATE_FAULTED;
 | |
| 			} else {
 | |
| 				(void) fprintf(stderr, "invalid action '%s': "
 | |
| 				    "must be 'degrade' or 'fault'\n", optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case 'b':
 | |
| 			raw = optarg;
 | |
| 			break;
 | |
| 		case 'c':
 | |
| 			cancel = optarg;
 | |
| 			break;
 | |
| 		case 'C':
 | |
| 			ret = parse_dvas(optarg, &dvas);
 | |
| 			if (ret != 0) {
 | |
| 				(void) fprintf(stderr, "invalid DVA list '%s': "
 | |
| 				    "DVAs should be 0 indexed and separated by "
 | |
| 				    "commas.\n", optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case 'd':
 | |
| 			device = optarg;
 | |
| 			break;
 | |
| 		case 'D':
 | |
| 			errno = 0;
 | |
| 			ret = parse_delay(optarg, &record.zi_timer,
 | |
| 			    &record.zi_nlanes);
 | |
| 			if (ret != 0) {
 | |
| 
 | |
| 				(void) fprintf(stderr, "invalid i/o delay "
 | |
| 				    "value: '%s'\n", optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case 'e':
 | |
| 			error = str_to_err(optarg);
 | |
| 			if (error < 0) {
 | |
| 				(void) fprintf(stderr, "invalid error type "
 | |
| 				    "'%s': must be one of: io decompress "
 | |
| 				    "decrypt nxio dtl corrupt noop\n",
 | |
| 				    optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case 'f':
 | |
| 			ret = parse_frequency(optarg, &record.zi_freq);
 | |
| 			if (ret != 0) {
 | |
| 				(void) fprintf(stderr, "%sfrequency value must "
 | |
| 				    "be in the range [0.0001, 100.0]\n",
 | |
| 				    ret == EINVAL ? "invalid value: " :
 | |
| 				    ret == ERANGE ? "out of range: " : "");
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case 'F':
 | |
| 			record.zi_failfast = B_TRUE;
 | |
| 			break;
 | |
| 		case 'g':
 | |
| 			dur_txg = 1;
 | |
| 			record.zi_duration = (int)strtol(optarg, &end, 10);
 | |
| 			if (record.zi_duration <= 0 || *end != '\0') {
 | |
| 				(void) fprintf(stderr, "invalid duration '%s': "
 | |
| 				    "must be a positive integer\n", optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			/* store duration of txgs as its negative */
 | |
| 			record.zi_duration *= -1;
 | |
| 			break;
 | |
| 		case 'h':
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (0);
 | |
| 		case 'I':
 | |
| 			/* default duration, if one hasn't yet been defined */
 | |
| 			nowrites = 1;
 | |
| 			if (dur_secs == 0 && dur_txg == 0)
 | |
| 				record.zi_duration = 30;
 | |
| 			break;
 | |
| 		case 'l':
 | |
| 			level = (int)strtol(optarg, &end, 10);
 | |
| 			if (*end != '\0') {
 | |
| 				(void) fprintf(stderr, "invalid level '%s': "
 | |
| 				    "must be an integer\n", optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case 'm':
 | |
| 			domount = 1;
 | |
| 			break;
 | |
| 		case 'p':
 | |
| 			(void) strlcpy(record.zi_func, optarg,
 | |
| 			    sizeof (record.zi_func));
 | |
| 			record.zi_cmd = ZINJECT_PANIC;
 | |
| 			break;
 | |
| 		case 'P':
 | |
| 			if (strcasecmp(optarg, "import") == 0) {
 | |
| 				record.zi_cmd = ZINJECT_DELAY_IMPORT;
 | |
| 			} else if (strcasecmp(optarg, "export") == 0) {
 | |
| 				record.zi_cmd = ZINJECT_DELAY_EXPORT;
 | |
| 			} else {
 | |
| 				(void) fprintf(stderr, "invalid command '%s': "
 | |
| 				    "must be 'import' or 'export'\n", optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case 'q':
 | |
| 			quiet = 1;
 | |
| 			break;
 | |
| 		case 'r':
 | |
| 			range = optarg;
 | |
| 			flags |= ZINJECT_CALC_RANGE;
 | |
| 			break;
 | |
| 		case 's':
 | |
| 			dur_secs = 1;
 | |
| 			record.zi_duration = (int)strtol(optarg, &end, 10);
 | |
| 			if (record.zi_duration <= 0 || *end != '\0') {
 | |
| 				(void) fprintf(stderr, "invalid duration '%s': "
 | |
| 				    "must be a positive integer\n", optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case 'T':
 | |
| 			if (strcasecmp(optarg, "read") == 0) {
 | |
| 				io_type = ZIO_TYPE_READ;
 | |
| 			} else if (strcasecmp(optarg, "write") == 0) {
 | |
| 				io_type = ZIO_TYPE_WRITE;
 | |
| 			} else if (strcasecmp(optarg, "free") == 0) {
 | |
| 				io_type = ZIO_TYPE_FREE;
 | |
| 			} else if (strcasecmp(optarg, "claim") == 0) {
 | |
| 				io_type = ZIO_TYPE_CLAIM;
 | |
| 			} else if (strcasecmp(optarg, "flush") == 0) {
 | |
| 				io_type = ZIO_TYPE_FLUSH;
 | |
| 			} else if (strcasecmp(optarg, "all") == 0) {
 | |
| 				io_type = ZIO_TYPES;
 | |
| 			} else {
 | |
| 				(void) fprintf(stderr, "invalid I/O type "
 | |
| 				    "'%s': must be 'read', 'write', 'free', "
 | |
| 				    "'claim', 'flush' or 'all'\n", optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case 't':
 | |
| 			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
 | |
| 			    !MOS_TYPE(type)) {
 | |
| 				(void) fprintf(stderr, "invalid type '%s'\n",
 | |
| 				    optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case 'u':
 | |
| 			flags |= ZINJECT_UNLOAD_SPA;
 | |
| 			break;
 | |
| 		case 'L':
 | |
| 			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
 | |
| 			    !LABEL_TYPE(type)) {
 | |
| 				(void) fprintf(stderr, "invalid label type "
 | |
| 				    "'%s'\n", optarg);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			break;
 | |
| 		case ':':
 | |
| 			(void) fprintf(stderr, "option -%c requires an "
 | |
| 			    "operand\n", optopt);
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (1);
 | |
| 		case '?':
 | |
| 			(void) fprintf(stderr, "invalid option '%c'\n",
 | |
| 			    optopt);
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	argc -= optind;
 | |
| 	argv += optind;
 | |
| 
 | |
| 	if (record.zi_duration != 0 && record.zi_cmd == 0)
 | |
| 		record.zi_cmd = ZINJECT_IGNORED_WRITES;
 | |
| 
 | |
| 	if (cancel != NULL) {
 | |
| 		/*
 | |
| 		 * '-c' is invalid with any other options.
 | |
| 		 */
 | |
| 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
 | |
| 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
 | |
| 		    record.zi_freq > 0 || dvas != 0) {
 | |
| 			(void) fprintf(stderr, "cancel (-c) incompatible with "
 | |
| 			    "any other options\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 		if (argc != 0) {
 | |
| 			(void) fprintf(stderr, "extraneous argument to '-c'\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		if (strcmp(cancel, "all") == 0) {
 | |
| 			return (cancel_all_handlers());
 | |
| 		} else {
 | |
| 			int id = (int)strtol(cancel, &end, 10);
 | |
| 			if (*end != '\0') {
 | |
| 				(void) fprintf(stderr, "invalid handle id '%s':"
 | |
| 				    " must be an integer or 'all'\n", cancel);
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 			return (cancel_handler(id));
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (device != NULL) {
 | |
| 		/*
 | |
| 		 * Device (-d) injection uses a completely different mechanism
 | |
| 		 * for doing injection, so handle it separately here.
 | |
| 		 */
 | |
| 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
 | |
| 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
 | |
| 		    dvas != 0) {
 | |
| 			(void) fprintf(stderr, "device (-d) incompatible with "
 | |
| 			    "data error injection\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		if (argc != 1) {
 | |
| 			(void) fprintf(stderr, "device (-d) injection requires "
 | |
| 			    "a single pool name\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		(void) strlcpy(pool, argv[0], sizeof (pool));
 | |
| 		dataset[0] = '\0';
 | |
| 
 | |
| 		if (error == ECKSUM) {
 | |
| 			(void) fprintf(stderr, "device error type must be "
 | |
| 			    "'io', 'nxio' or 'corrupt'\n");
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (1);
 | |
| 		}
 | |
| 
 | |
| 		if (error == EILSEQ &&
 | |
| 		    (record.zi_freq == 0 || io_type != ZIO_TYPE_READ)) {
 | |
| 			(void) fprintf(stderr, "device corrupt errors require "
 | |
| 			    "io type read and a frequency value\n");
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (1);
 | |
| 		}
 | |
| 
 | |
| 		record.zi_iotype = io_type;
 | |
| 		if (translate_device(pool, device, label, &record) != 0) {
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (1);
 | |
| 		}
 | |
| 
 | |
| 		if (record.zi_nlanes) {
 | |
| 			switch (io_type) {
 | |
| 			case ZIO_TYPE_READ:
 | |
| 			case ZIO_TYPE_WRITE:
 | |
| 			case ZIO_TYPES:
 | |
| 				break;
 | |
| 			default:
 | |
| 				(void) fprintf(stderr, "I/O type for a delay "
 | |
| 				    "must be 'read' or 'write'\n");
 | |
| 				usage();
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if (!error)
 | |
| 			error = ENXIO;
 | |
| 
 | |
| 		if (action != VDEV_STATE_UNKNOWN)
 | |
| 			return (perform_action(pool, &record, action));
 | |
| 
 | |
| 	} else if (raw != NULL) {
 | |
| 		if (range != NULL || type != TYPE_INVAL || level != 0 ||
 | |
| 		    record.zi_cmd != ZINJECT_UNINITIALIZED ||
 | |
| 		    record.zi_freq > 0 || dvas != 0) {
 | |
| 			(void) fprintf(stderr, "raw (-b) format with "
 | |
| 			    "any other options\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		if (argc != 1) {
 | |
| 			(void) fprintf(stderr, "raw (-b) format expects a "
 | |
| 			    "single pool name\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		(void) strlcpy(pool, argv[0], sizeof (pool));
 | |
| 		dataset[0] = '\0';
 | |
| 
 | |
| 		if (error == ENXIO) {
 | |
| 			(void) fprintf(stderr, "data error type must be "
 | |
| 			    "'checksum' or 'io'\n");
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (1);
 | |
| 		}
 | |
| 
 | |
| 		record.zi_cmd = ZINJECT_DATA_FAULT;
 | |
| 		if (translate_raw(raw, &record) != 0) {
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (1);
 | |
| 		}
 | |
| 		if (!error)
 | |
| 			error = EIO;
 | |
| 	} else if (record.zi_cmd == ZINJECT_PANIC) {
 | |
| 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
 | |
| 		    level != 0 || device != NULL || record.zi_freq > 0 ||
 | |
| 		    dvas != 0) {
 | |
| 			(void) fprintf(stderr, "%s incompatible with other "
 | |
| 			    "options\n", "import|export delay (-P)");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		if (argc < 1 || argc > 2) {
 | |
| 			(void) fprintf(stderr, "panic (-p) injection requires "
 | |
| 			    "a single pool name and an optional id\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		(void) strlcpy(pool, argv[0], sizeof (pool));
 | |
| 		if (argv[1] != NULL)
 | |
| 			record.zi_type = atoi(argv[1]);
 | |
| 		dataset[0] = '\0';
 | |
| 	} else if (record.zi_cmd == ZINJECT_DELAY_IMPORT ||
 | |
| 	    record.zi_cmd == ZINJECT_DELAY_EXPORT) {
 | |
| 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
 | |
| 		    level != 0 || device != NULL || record.zi_freq > 0 ||
 | |
| 		    dvas != 0) {
 | |
| 			(void) fprintf(stderr, "%s incompatible with other "
 | |
| 			    "options\n", "import|export delay (-P)");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		if (argc != 1 || record.zi_duration <= 0) {
 | |
| 			(void) fprintf(stderr, "import|export delay (-P) "
 | |
| 			    "injection requires a duration (-s) and a single "
 | |
| 			    "pool name\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		(void) strlcpy(pool, argv[0], sizeof (pool));
 | |
| 	} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
 | |
| 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
 | |
| 		    level != 0 || record.zi_freq > 0 || dvas != 0) {
 | |
| 			(void) fprintf(stderr, "hardware failure (-I) "
 | |
| 			    "incompatible with other options\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		if (nowrites == 0) {
 | |
| 			(void) fprintf(stderr, "-s or -g meaningless "
 | |
| 			    "without -I (ignore writes)\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		} else if (dur_secs && dur_txg) {
 | |
| 			(void) fprintf(stderr, "choose a duration either "
 | |
| 			    "in seconds (-s) or a number of txgs (-g) "
 | |
| 			    "but not both\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		} else if (argc != 1) {
 | |
| 			(void) fprintf(stderr, "ignore writes (-I) "
 | |
| 			    "injection requires a single pool name\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		(void) strlcpy(pool, argv[0], sizeof (pool));
 | |
| 		dataset[0] = '\0';
 | |
| 	} else if (type == TYPE_INVAL) {
 | |
| 		if (flags == 0) {
 | |
| 			(void) fprintf(stderr, "at least one of '-b', '-d', "
 | |
| 			    "'-t', '-a', '-p', '-I' or '-u' "
 | |
| 			    "must be specified\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
 | |
| 			(void) strlcpy(pool, argv[0], sizeof (pool));
 | |
| 			dataset[0] = '\0';
 | |
| 		} else if (argc != 0) {
 | |
| 			(void) fprintf(stderr, "extraneous argument for "
 | |
| 			    "'-f'\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		flags |= ZINJECT_NULL;
 | |
| 	} else {
 | |
| 		if (argc != 1) {
 | |
| 			(void) fprintf(stderr, "missing object\n");
 | |
| 			usage();
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (2);
 | |
| 		}
 | |
| 
 | |
| 		if (error == ENXIO || error == EILSEQ) {
 | |
| 			(void) fprintf(stderr, "data error type must be "
 | |
| 			    "'checksum' or 'io'\n");
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (1);
 | |
| 		}
 | |
| 
 | |
| 		if (dvas != 0) {
 | |
| 			if (error == EACCES || error == EINVAL) {
 | |
| 				(void) fprintf(stderr, "the '-C' option may "
 | |
| 				    "not be used with logical data errors "
 | |
| 				    "'decrypt' and 'decompress'\n");
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 
 | |
| 			record.zi_dvas = dvas;
 | |
| 		}
 | |
| 
 | |
| 		if (error == EACCES) {
 | |
| 			if (type != TYPE_DATA) {
 | |
| 				(void) fprintf(stderr, "decryption errors "
 | |
| 				    "may only be injected for 'data' types\n");
 | |
| 				libzfs_fini(g_zfs);
 | |
| 				return (1);
 | |
| 			}
 | |
| 
 | |
| 			record.zi_cmd = ZINJECT_DECRYPT_FAULT;
 | |
| 			/*
 | |
| 			 * Internally, ZFS actually uses ECKSUM for decryption
 | |
| 			 * errors since EACCES is used to indicate the key was
 | |
| 			 * not found.
 | |
| 			 */
 | |
| 			error = ECKSUM;
 | |
| 		} else {
 | |
| 			record.zi_cmd = ZINJECT_DATA_FAULT;
 | |
| 		}
 | |
| 
 | |
| 		if (translate_record(type, argv[0], range, level, &record, pool,
 | |
| 		    dataset) != 0) {
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (1);
 | |
| 		}
 | |
| 		if (!error)
 | |
| 			error = EIO;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
 | |
| 	 * unload the pool, so that we trigger spa-wide reopen of metadata next
 | |
| 	 * time we access the pool.
 | |
| 	 */
 | |
| 	if (dataset[0] != '\0' && domount) {
 | |
| 		if ((zhp = zfs_open(g_zfs, dataset,
 | |
| 		    ZFS_TYPE_DATASET)) == NULL) {
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (1);
 | |
| 		}
 | |
| 		if (zfs_unmount(zhp, NULL, 0) != 0) {
 | |
| 			libzfs_fini(g_zfs);
 | |
| 			return (1);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	record.zi_error = error;
 | |
| 
 | |
| 	ret = register_handler(pool, flags, &record, quiet);
 | |
| 
 | |
| 	if (dataset[0] != '\0' && domount)
 | |
| 		ret = (zfs_mount(zhp, NULL, 0) != 0);
 | |
| 
 | |
| 	libzfs_fini(g_zfs);
 | |
| 
 | |
| 	return (ret);
 | |
| }
 |