xref: /illumos-gate/usr/src/cmd/zinject/zinject.c (revision 97e81309)
1ea8dc4b6Seschrock /*
2ea8dc4b6Seschrock  * CDDL HEADER START
3ea8dc4b6Seschrock  *
4ea8dc4b6Seschrock  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7ea8dc4b6Seschrock  *
8ea8dc4b6Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ea8dc4b6Seschrock  * or http://www.opensolaris.org/os/licensing.
10ea8dc4b6Seschrock  * See the License for the specific language governing permissions
11ea8dc4b6Seschrock  * and limitations under the License.
12ea8dc4b6Seschrock  *
13ea8dc4b6Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
14ea8dc4b6Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ea8dc4b6Seschrock  * If applicable, add the following below this CDDL HEADER, with the
16ea8dc4b6Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
17ea8dc4b6Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
18ea8dc4b6Seschrock  *
19ea8dc4b6Seschrock  * CDDL HEADER END
20ea8dc4b6Seschrock  */
21ea8dc4b6Seschrock /*
2298d1cbfeSGeorge Wilson  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23*97e81309SPrakash Surya  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24ea8dc4b6Seschrock  */
25ea8dc4b6Seschrock 
26ea8dc4b6Seschrock /*
27ea8dc4b6Seschrock  * ZFS Fault Injector
28ea8dc4b6Seschrock  *
29ea8dc4b6Seschrock  * This userland component takes a set of options and uses libzpool to translate
30ea8dc4b6Seschrock  * from a user-visible object type and name to an internal representation.
31ea8dc4b6Seschrock  * There are two basic types of faults: device faults and data faults.
32ea8dc4b6Seschrock  *
33ea8dc4b6Seschrock  *
34ea8dc4b6Seschrock  * DEVICE FAULTS
35ea8dc4b6Seschrock  *
36ea8dc4b6Seschrock  * Errors can be injected into a particular vdev using the '-d' option.  This
37ea8dc4b6Seschrock  * option takes a path or vdev GUID to uniquely identify the device within a
38ea8dc4b6Seschrock  * pool.  There are two types of errors that can be injected, EIO and ENXIO,
3921bf64a7Sgw  * that can be controlled through the '-e' option.  The default is ENXIO.  For
40ea8dc4b6Seschrock  * EIO failures, any attempt to read data from the device will return EIO, but
41ea8dc4b6Seschrock  * subsequent attempt to reopen the device will succeed.  For ENXIO failures,
42ea8dc4b6Seschrock  * any attempt to read from the device will return EIO, but any attempt to
43ea8dc4b6Seschrock  * reopen the device will also return ENXIO.
4421bf64a7Sgw  * For label faults, the -L option must be specified. This allows faults
4598d1cbfeSGeorge Wilson  * to be injected into either the nvlist, uberblock, pad1, or pad2 region
4698d1cbfeSGeorge Wilson  * of all the labels for the specified device.
47ea8dc4b6Seschrock  *
48ea8dc4b6Seschrock  * This form of the command looks like:
49ea8dc4b6Seschrock  *
5098d1cbfeSGeorge Wilson  * 	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
51ea8dc4b6Seschrock  *
52ea8dc4b6Seschrock  *
53ea8dc4b6Seschrock  * DATA FAULTS
54ea8dc4b6Seschrock  *
55ea8dc4b6Seschrock  * We begin with a tuple of the form:
56ea8dc4b6Seschrock  *
57ea8dc4b6Seschrock  * 	<type,level,range,object>
58ea8dc4b6Seschrock  *
59ea8dc4b6Seschrock  * 	type	A string describing the type of data to target.  Each type
60ea8dc4b6Seschrock  * 		implicitly describes how to interpret 'object'. Currently,
61ea8dc4b6Seschrock  * 		the following values are supported:
62ea8dc4b6Seschrock  *
63ea8dc4b6Seschrock  * 		data		User data for a file
64ea8dc4b6Seschrock  * 		dnode		Dnode for a file or directory
65ea8dc4b6Seschrock  *
66ea8dc4b6Seschrock  *		The following MOS objects are special.  Instead of injecting
67ea8dc4b6Seschrock  *		errors on a particular object or blkid, we inject errors across
68ea8dc4b6Seschrock  *		all objects of the given type.
69ea8dc4b6Seschrock  *
70ea8dc4b6Seschrock  * 		mos		Any data in the MOS
71ea8dc4b6Seschrock  * 		mosdir		object directory
72ea8dc4b6Seschrock  * 		config		pool configuration
73cde58dbcSMatthew Ahrens  * 		bpobj		blkptr list
74ea8dc4b6Seschrock  * 		spacemap	spacemap
75ea8dc4b6Seschrock  * 		metaslab	metaslab
76ea8dc4b6Seschrock  * 		errlog		persistent error log
77ea8dc4b6Seschrock  *
78ea8dc4b6Seschrock  * 	level	Object level.  Defaults to '0', not applicable to all types.  If
79ea8dc4b6Seschrock  * 		a range is given, this corresponds to the indirect block
80ea8dc4b6Seschrock  * 		corresponding to the specific range.
81ea8dc4b6Seschrock  *
82ea8dc4b6Seschrock  *	range	A numerical range [start,end) within the object.  Defaults to
83ea8dc4b6Seschrock  *		the full size of the file.
84ea8dc4b6Seschrock  *
85ea8dc4b6Seschrock  * 	object	A string describing the logical location of the object.  For
86ea8dc4b6Seschrock  * 		files and directories (currently the only supported types),
87ea8dc4b6Seschrock  * 		this is the path of the object on disk.
88ea8dc4b6Seschrock  *
89ea8dc4b6Seschrock  * This is translated, via libzpool, into the following internal representation:
90ea8dc4b6Seschrock  *
91ea8dc4b6Seschrock  * 	<type,objset,object,level,range>
92ea8dc4b6Seschrock  *
93ea8dc4b6Seschrock  * These types should be self-explanatory.  This tuple is then passed to the
94ea8dc4b6Seschrock  * kernel via a special ioctl() to initiate fault injection for the given
95ea8dc4b6Seschrock  * object.  Note that 'type' is not strictly necessary for fault injection, but
96ea8dc4b6Seschrock  * is used when translating existing faults into a human-readable string.
97ea8dc4b6Seschrock  *
98ea8dc4b6Seschrock  *
99ea8dc4b6Seschrock  * The command itself takes one of the forms:
100ea8dc4b6Seschrock  *
101ea8dc4b6Seschrock  * 	zinject
102ea8dc4b6Seschrock  * 	zinject <-a | -u pool>
103ea8dc4b6Seschrock  * 	zinject -c <id|all>
104ea8dc4b6Seschrock  * 	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
105ea8dc4b6Seschrock  *	    [-r range] <object>
106ea8dc4b6Seschrock  * 	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
107ea8dc4b6Seschrock  *
108ea8dc4b6Seschrock  * With no arguments, the command prints all currently registered injection
109ea8dc4b6Seschrock  * handlers, with their numeric identifiers.
110ea8dc4b6Seschrock  *
111ea8dc4b6Seschrock  * The '-c' option will clear the given handler, or all handlers if 'all' is
112ea8dc4b6Seschrock  * specified.
113ea8dc4b6Seschrock  *
114ea8dc4b6Seschrock  * The '-e' option takes a string describing the errno to simulate.  This must
115ea8dc4b6Seschrock  * be either 'io' or 'checksum'.  In most cases this will result in the same
116ea8dc4b6Seschrock  * behavior, but RAID-Z will produce a different set of ereports for this
117ea8dc4b6Seschrock  * situation.
118ea8dc4b6Seschrock  *
119ea8dc4b6Seschrock  * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
120ea8dc4b6Seschrock  * specified, then the ARC cache is flushed appropriately.  If '-u' is
121ea8dc4b6Seschrock  * specified, then the underlying SPA is unloaded.  Either of these flags can be
122ea8dc4b6Seschrock  * specified independently of any other handlers.  The '-m' flag automatically
123ea8dc4b6Seschrock  * does an unmount and remount of the underlying dataset to aid in flushing the
124ea8dc4b6Seschrock  * cache.
125ea8dc4b6Seschrock  *
126ea8dc4b6Seschrock  * The '-f' flag controls the frequency of errors injected, expressed as a
127ea8dc4b6Seschrock  * integer percentage between 1 and 100.  The default is 100.
128ea8dc4b6Seschrock  *
129ea8dc4b6Seschrock  * The this form is responsible for actually injecting the handler into the
130ea8dc4b6Seschrock  * framework.  It takes the arguments described above, translates them to the
131ea8dc4b6Seschrock  * internal tuple using libzpool, and then issues an ioctl() to register the
132ea8dc4b6Seschrock  * handler.
133ea8dc4b6Seschrock  *
134ea8dc4b6Seschrock  * The final form can target a specific bookmark, regardless of whether a
135ea8dc4b6Seschrock  * human-readable interface has been designed.  It allows developers to specify
136ea8dc4b6Seschrock  * a particular block by number.
137ea8dc4b6Seschrock  */
138ea8dc4b6Seschrock 
139ea8dc4b6Seschrock #include <errno.h>
140ea8dc4b6Seschrock #include <fcntl.h>
141ea8dc4b6Seschrock #include <stdio.h>
142ea8dc4b6Seschrock #include <stdlib.h>
143ea8dc4b6Seschrock #include <strings.h>
144ea8dc4b6Seschrock #include <unistd.h>
145ea8dc4b6Seschrock 
146ea8dc4b6Seschrock #include <sys/fs/zfs.h>
147ea8dc4b6Seschrock #include <sys/mount.h>
148ea8dc4b6Seschrock 
149ea8dc4b6Seschrock #include <libzfs.h>
150ea8dc4b6Seschrock 
151ea8dc4b6Seschrock #undef verify	/* both libzfs.h and zfs_context.h want to define this */
152ea8dc4b6Seschrock 
153ea8dc4b6Seschrock #include "zinject.h"
154ea8dc4b6Seschrock 
15599653d4eSeschrock libzfs_handle_t *g_zfs;
156ea8dc4b6Seschrock int zfs_fd;
157ea8dc4b6Seschrock 
158ea8dc4b6Seschrock #define	ECKSUM	EBADE
159ea8dc4b6Seschrock 
160ea8dc4b6Seschrock static const char *errtable[TYPE_INVAL] = {
161ea8dc4b6Seschrock 	"data",
162ea8dc4b6Seschrock 	"dnode",
163ea8dc4b6Seschrock 	"mos",
164ea8dc4b6Seschrock 	"mosdir",
165ea8dc4b6Seschrock 	"metaslab",
166ea8dc4b6Seschrock 	"config",
167cde58dbcSMatthew Ahrens 	"bpobj",
168ea8dc4b6Seschrock 	"spacemap",
16921bf64a7Sgw 	"errlog",
17021bf64a7Sgw 	"uber",
17198d1cbfeSGeorge Wilson 	"nvlist",
17298d1cbfeSGeorge Wilson 	"pad1",
17398d1cbfeSGeorge Wilson 	"pad2"
174ea8dc4b6Seschrock };
175ea8dc4b6Seschrock 
176ea8dc4b6Seschrock static err_type_t
177ea8dc4b6Seschrock name_to_type(const char *arg)
178ea8dc4b6Seschrock {
179ea8dc4b6Seschrock 	int i;
180ea8dc4b6Seschrock 	for (i = 0; i < TYPE_INVAL; i++)
181ea8dc4b6Seschrock 		if (strcmp(errtable[i], arg) == 0)
182ea8dc4b6Seschrock 			return (i);
183ea8dc4b6Seschrock 
184ea8dc4b6Seschrock 	return (TYPE_INVAL);
185ea8dc4b6Seschrock }
186ea8dc4b6Seschrock 
187ea8dc4b6Seschrock static const char *
188ea8dc4b6Seschrock type_to_name(uint64_t type)
189ea8dc4b6Seschrock {
190ea8dc4b6Seschrock 	switch (type) {
191ea8dc4b6Seschrock 	case DMU_OT_OBJECT_DIRECTORY:
192ea8dc4b6Seschrock 		return ("mosdir");
193ea8dc4b6Seschrock 	case DMU_OT_OBJECT_ARRAY:
194ea8dc4b6Seschrock 		return ("metaslab");
195ea8dc4b6Seschrock 	case DMU_OT_PACKED_NVLIST:
196ea8dc4b6Seschrock 		return ("config");
197cde58dbcSMatthew Ahrens 	case DMU_OT_BPOBJ:
198cde58dbcSMatthew Ahrens 		return ("bpobj");
199ea8dc4b6Seschrock 	case DMU_OT_SPACE_MAP:
200ea8dc4b6Seschrock 		return ("spacemap");
201ea8dc4b6Seschrock 	case DMU_OT_ERROR_LOG:
202ea8dc4b6Seschrock 		return ("errlog");
203ea8dc4b6Seschrock 	default:
204ea8dc4b6Seschrock 		return ("-");
205ea8dc4b6Seschrock 	}
206ea8dc4b6Seschrock }
207ea8dc4b6Seschrock 
208ea8dc4b6Seschrock 
209ea8dc4b6Seschrock /*
210ea8dc4b6Seschrock  * Print usage message.
211ea8dc4b6Seschrock  */
212ea8dc4b6Seschrock void
213ea8dc4b6Seschrock usage(void)
214ea8dc4b6Seschrock {
215ea8dc4b6Seschrock 	(void) printf(
216ea8dc4b6Seschrock 	    "usage:\n"
217ea8dc4b6Seschrock 	    "\n"
218ea8dc4b6Seschrock 	    "\tzinject\n"
219ea8dc4b6Seschrock 	    "\n"
220ea8dc4b6Seschrock 	    "\t\tList all active injection records.\n"
221ea8dc4b6Seschrock 	    "\n"
222ea8dc4b6Seschrock 	    "\tzinject -c <id|all>\n"
223ea8dc4b6Seschrock 	    "\n"
224ea8dc4b6Seschrock 	    "\t\tClear the particular record (if given a numeric ID), or\n"
225ea8dc4b6Seschrock 	    "\t\tall records if 'all' is specificed.\n"
226ea8dc4b6Seschrock 	    "\n"
22788ecc943SGeorge Wilson 	    "\tzinject -p <function name> pool\n"
228*97e81309SPrakash Surya 	    "\n"
22988ecc943SGeorge Wilson 	    "\t\tInject a panic fault at the specified function. Only \n"
23088ecc943SGeorge Wilson 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
23188ecc943SGeorge Wilson 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
23288ecc943SGeorge Wilson 	    "\n"
23398d1cbfeSGeorge Wilson 	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
2348f18d1faSGeorge Wilson 	    "\t    [-T <read|write|free|claim|all> pool\n"
235*97e81309SPrakash Surya 	    "\n"
23621bf64a7Sgw 	    "\t\tInject a fault into a particular device or the device's\n"
23798d1cbfeSGeorge Wilson 	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
23898d1cbfeSGeorge Wilson 	    "\t\t'pad1', or 'pad2'.\n"
239cb04b873SMark J Musante 	    "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
240ea8dc4b6Seschrock 	    "\n"
2418f18d1faSGeorge Wilson 	    "\tzinject -d device -A <degrade|fault> pool\n"
242*97e81309SPrakash Surya 	    "\n"
2438f18d1faSGeorge Wilson 	    "\t\tPerform a specific action on a particular device\n"
2448f18d1faSGeorge Wilson 	    "\n"
245*97e81309SPrakash Surya 	    "\tzinject -d device -D latency:lanes pool\n"
246*97e81309SPrakash Surya 	    "\n"
247*97e81309SPrakash Surya 	    "\t\tAdd an artificial delay to IO requests on a particular\n"
248*97e81309SPrakash Surya 	    "\t\tdevice, such that the requests take a minimum of 'latency'\n"
249*97e81309SPrakash Surya 	    "\t\tmilliseconds to complete. Each delay has an associated\n"
250*97e81309SPrakash Surya 	    "\t\tnumber of 'lanes' which defines the number of concurrent\n"
251*97e81309SPrakash Surya 	    "\t\tIO requests that can be processed.\n"
252*97e81309SPrakash Surya 	    "\n"
253*97e81309SPrakash Surya 	    "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
254*97e81309SPrakash Surya 	    "\t\tthe device will only be able to service a single IO request\n"
255*97e81309SPrakash Surya 	    "\t\tat a time with each request taking 10 ms to complete. So,\n"
256*97e81309SPrakash Surya 	    "\t\tif only a single request is submitted every 10 ms, the\n"
257*97e81309SPrakash Surya 	    "\t\taverage latency will be 10 ms; but if more than one request\n"
258*97e81309SPrakash Surya 	    "\t\tis submitted every 10 ms, the average latency will be more\n"
259*97e81309SPrakash Surya 	    "\t\tthan 10 ms.\n"
260*97e81309SPrakash Surya 	    "\n"
261*97e81309SPrakash Surya 	    "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
262*97e81309SPrakash Surya 	    "\t\tlanes (-D 10:2), then the device will be able to service\n"
263*97e81309SPrakash Surya 	    "\t\ttwo requests at a time, each with a minimum latency of\n"
264*97e81309SPrakash Surya 	    "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
265*97e81309SPrakash Surya 	    "\t\tthe average latency will be 10 ms; but if more than two\n"
266*97e81309SPrakash Surya 	    "\t\trequests are submitted every 10 ms, the average latency\n"
267*97e81309SPrakash Surya 	    "\t\twill be more than 10 ms.\n"
268*97e81309SPrakash Surya 	    "\n"
269*97e81309SPrakash Surya 	    "\t\tAlso note, these delays are additive. So two invocations\n"
270*97e81309SPrakash Surya 	    "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
271*97e81309SPrakash Surya 	    "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
272*97e81309SPrakash Surya 	    "\t\tlanes with differing target latencies. For example, an\n"
273*97e81309SPrakash Surya 	    "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
274*97e81309SPrakash Surya 	    "\t\tcreate 3 lanes on the device; one lane with a latency\n"
275*97e81309SPrakash Surya 	    "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
276*97e81309SPrakash Surya 	    "\n"
277468c413aSTim Haley 	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
278*97e81309SPrakash Surya 	    "\n"
279468c413aSTim Haley 	    "\t\tCause the pool to stop writing blocks yet not\n"
280468c413aSTim Haley 	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
281468c413aSTim Haley 	    "\t\tthat fails to honor cache flush requests.\n"
282468c413aSTim Haley 	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
283468c413aSTim Haley 	    "\t\tat the end of the duration.\n"
284468c413aSTim Haley 	    "\n"
285ea8dc4b6Seschrock 	    "\tzinject -b objset:object:level:blkid pool\n"
286ea8dc4b6Seschrock 	    "\n"
287ea8dc4b6Seschrock 	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
288ea8dc4b6Seschrock 	    "\t\tspecified by the remaining tuple.  Each number is in\n"
289ea8dc4b6Seschrock 	    "\t\thexidecimal, and only one block can be specified.\n"
290ea8dc4b6Seschrock 	    "\n"
291ea8dc4b6Seschrock 	    "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n"
292ea8dc4b6Seschrock 	    "\t    [-a] [-m] [-u] [-f freq] <object>\n"
293ea8dc4b6Seschrock 	    "\n"
294ea8dc4b6Seschrock 	    "\t\tInject an error into the object specified by the '-t' option\n"
295ea8dc4b6Seschrock 	    "\t\tand the object descriptor.  The 'object' parameter is\n"
296ea8dc4b6Seschrock 	    "\t\tinterperted depending on the '-t' option.\n"
297ea8dc4b6Seschrock 	    "\n"
298ea8dc4b6Seschrock 	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
299ea8dc4b6Seschrock 	    "\t\t-e\tInject a specific error.  Must be either 'io' or\n"
300ea8dc4b6Seschrock 	    "\t\t\t'checksum'.  Default is 'io'.\n"
301ea8dc4b6Seschrock 	    "\t\t-l\tInject error at a particular block level. Default is "
302ea8dc4b6Seschrock 	    "0.\n"
303ea8dc4b6Seschrock 	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
304ea8dc4b6Seschrock 	    "\t\t-r\tInject error over a particular logical range of an\n"
305ea8dc4b6Seschrock 	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
306ea8dc4b6Seschrock 	    "\t\t\trange according to the object's properties.\n"
307ea8dc4b6Seschrock 	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
308ea8dc4b6Seschrock 	    "\t\t\tassociated object.\n"
309ea8dc4b6Seschrock 	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
310ea8dc4b6Seschrock 	    "\t\t\ta pool object.\n"
311ea8dc4b6Seschrock 	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
312ea8dc4b6Seschrock 	    "\t\t\ta percentage between 1 and 100.\n"
313ea8dc4b6Seschrock 	    "\n"
314ea8dc4b6Seschrock 	    "\t-t data\t\tInject an error into the plain file contents of a\n"
315ea8dc4b6Seschrock 	    "\t\t\tfile.  The object must be specified as a complete path\n"
316ea8dc4b6Seschrock 	    "\t\t\tto a file on a ZFS filesystem.\n"
317ea8dc4b6Seschrock 	    "\n"
318ea8dc4b6Seschrock 	    "\t-t dnode\tInject an error into the metadnode in the block\n"
319ea8dc4b6Seschrock 	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
320ea8dc4b6Seschrock 	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
321ea8dc4b6Seschrock 	    "\t\t\tis specified as a complete path to a file or directory\n"
322ea8dc4b6Seschrock 	    "\t\t\ton a ZFS filesystem.\n"
323ea8dc4b6Seschrock 	    "\n"
324ea8dc4b6Seschrock 	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
325cde58dbcSMatthew Ahrens 	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bpobj,\n"
32655434c77Sek 	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
32755434c77Sek 	    "\t\t\tthe poolname.\n");
328ea8dc4b6Seschrock }
329ea8dc4b6Seschrock 
330ea8dc4b6Seschrock static int
331ea8dc4b6Seschrock iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
332ea8dc4b6Seschrock     void *data)
333ea8dc4b6Seschrock {
334f4c46b1eSYuri Pankov 	zfs_cmd_t zc = { 0 };
335ea8dc4b6Seschrock 	int ret;
336ea8dc4b6Seschrock 
337ea8dc4b6Seschrock 	while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
338ea8dc4b6Seschrock 		if ((ret = func((int)zc.zc_guid, zc.zc_name,
339ea8dc4b6Seschrock 		    &zc.zc_inject_record, data)) != 0)
340ea8dc4b6Seschrock 			return (ret);
341ea8dc4b6Seschrock 
34254a91118SChris Kirby 	if (errno != ENOENT) {
34354a91118SChris Kirby 		(void) fprintf(stderr, "Unable to list handlers: %s\n",
34454a91118SChris Kirby 		    strerror(errno));
34554a91118SChris Kirby 		return (-1);
34654a91118SChris Kirby 	}
34754a91118SChris Kirby 
348ea8dc4b6Seschrock 	return (0);
349ea8dc4b6Seschrock }
350ea8dc4b6Seschrock 
351ea8dc4b6Seschrock static int
352ea8dc4b6Seschrock print_data_handler(int id, const char *pool, zinject_record_t *record,
353ea8dc4b6Seschrock     void *data)
354ea8dc4b6Seschrock {
355ea8dc4b6Seschrock 	int *count = data;
356ea8dc4b6Seschrock 
35788ecc943SGeorge Wilson 	if (record->zi_guid != 0 || record->zi_func[0] != '\0')
358ea8dc4b6Seschrock 		return (0);
359ea8dc4b6Seschrock 
360ea8dc4b6Seschrock 	if (*count == 0) {
361ea8dc4b6Seschrock 		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-15s\n",
362ea8dc4b6Seschrock 		    "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL",  "RANGE");
363ea8dc4b6Seschrock 		(void) printf("---  ---------------  ------  "
364ea8dc4b6Seschrock 		    "------  --------  ---  ---------------\n");
365ea8dc4b6Seschrock 	}
366ea8dc4b6Seschrock 
367ea8dc4b6Seschrock 	*count += 1;
368ea8dc4b6Seschrock 
369ea8dc4b6Seschrock 	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %3d  ", id, pool,
370ea8dc4b6Seschrock 	    (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object,
371ea8dc4b6Seschrock 	    type_to_name(record->zi_type), record->zi_level);
372ea8dc4b6Seschrock 
373ea8dc4b6Seschrock 	if (record->zi_start == 0 &&
374ea8dc4b6Seschrock 	    record->zi_end == -1ULL)
375ea8dc4b6Seschrock 		(void) printf("all\n");
376ea8dc4b6Seschrock 	else
377ea8dc4b6Seschrock 		(void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
378ea8dc4b6Seschrock 		    (u_longlong_t)record->zi_end);
379ea8dc4b6Seschrock 
380ea8dc4b6Seschrock 	return (0);
381ea8dc4b6Seschrock }
382ea8dc4b6Seschrock 
383ea8dc4b6Seschrock static int
384ea8dc4b6Seschrock print_device_handler(int id, const char *pool, zinject_record_t *record,
385ea8dc4b6Seschrock     void *data)
386ea8dc4b6Seschrock {
387ea8dc4b6Seschrock 	int *count = data;
388ea8dc4b6Seschrock 
38988ecc943SGeorge Wilson 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
390ea8dc4b6Seschrock 		return (0);
391ea8dc4b6Seschrock 
392*97e81309SPrakash Surya 	if (record->zi_cmd == ZINJECT_DELAY_IO)
393*97e81309SPrakash Surya 		return (0);
394*97e81309SPrakash Surya 
395ea8dc4b6Seschrock 	if (*count == 0) {
396ea8dc4b6Seschrock 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "GUID");
397ea8dc4b6Seschrock 		(void) printf("---  ---------------  ----------------\n");
398ea8dc4b6Seschrock 	}
399ea8dc4b6Seschrock 
400ea8dc4b6Seschrock 	*count += 1;
401ea8dc4b6Seschrock 
402ea8dc4b6Seschrock 	(void) printf("%3d  %-15s  %llx\n", id, pool,
403ea8dc4b6Seschrock 	    (u_longlong_t)record->zi_guid);
404ea8dc4b6Seschrock 
405ea8dc4b6Seschrock 	return (0);
406ea8dc4b6Seschrock }
407ea8dc4b6Seschrock 
408*97e81309SPrakash Surya static int
409*97e81309SPrakash Surya print_delay_handler(int id, const char *pool, zinject_record_t *record,
410*97e81309SPrakash Surya     void *data)
411*97e81309SPrakash Surya {
412*97e81309SPrakash Surya 	int *count = data;
413*97e81309SPrakash Surya 
414*97e81309SPrakash Surya 	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
415*97e81309SPrakash Surya 		return (0);
416*97e81309SPrakash Surya 
417*97e81309SPrakash Surya 	if (record->zi_cmd != ZINJECT_DELAY_IO)
418*97e81309SPrakash Surya 		return (0);
419*97e81309SPrakash Surya 
420*97e81309SPrakash Surya 	if (*count == 0) {
421*97e81309SPrakash Surya 		(void) printf("%3s  %-15s  %-15s  %-15s  %s\n",
422*97e81309SPrakash Surya 		    "ID", "POOL", "DELAY (ms)", "LANES", "GUID");
423*97e81309SPrakash Surya 		(void) printf("---  ---------------  ---------------  "
424*97e81309SPrakash Surya 		    "---------------  ----------------\n");
425*97e81309SPrakash Surya 	}
426*97e81309SPrakash Surya 
427*97e81309SPrakash Surya 	*count += 1;
428*97e81309SPrakash Surya 
429*97e81309SPrakash Surya 	(void) printf("%3d  %-15s  %-15llu  %-15llu  %llx\n", id, pool,
430*97e81309SPrakash Surya 	    (u_longlong_t)NSEC2MSEC(record->zi_timer),
431*97e81309SPrakash Surya 	    (u_longlong_t)record->zi_nlanes,
432*97e81309SPrakash Surya 	    (u_longlong_t)record->zi_guid);
433*97e81309SPrakash Surya 
434*97e81309SPrakash Surya 	return (0);
435*97e81309SPrakash Surya }
436*97e81309SPrakash Surya 
43788ecc943SGeorge Wilson static int
43888ecc943SGeorge Wilson print_panic_handler(int id, const char *pool, zinject_record_t *record,
43988ecc943SGeorge Wilson     void *data)
44088ecc943SGeorge Wilson {
44188ecc943SGeorge Wilson 	int *count = data;
44288ecc943SGeorge Wilson 
44388ecc943SGeorge Wilson 	if (record->zi_func[0] == '\0')
44488ecc943SGeorge Wilson 		return (0);
44588ecc943SGeorge Wilson 
44688ecc943SGeorge Wilson 	if (*count == 0) {
44788ecc943SGeorge Wilson 		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
44888ecc943SGeorge Wilson 		(void) printf("---  ---------------  ----------------\n");
44988ecc943SGeorge Wilson 	}
45088ecc943SGeorge Wilson 
45188ecc943SGeorge Wilson 	*count += 1;
45288ecc943SGeorge Wilson 
45388ecc943SGeorge Wilson 	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
45488ecc943SGeorge Wilson 
45588ecc943SGeorge Wilson 	return (0);
45688ecc943SGeorge Wilson }
45788ecc943SGeorge Wilson 
458ea8dc4b6Seschrock /*
459ea8dc4b6Seschrock  * Print all registered error handlers.  Returns the number of handlers
460ea8dc4b6Seschrock  * registered.
461ea8dc4b6Seschrock  */
462ea8dc4b6Seschrock static int
463ea8dc4b6Seschrock print_all_handlers(void)
464ea8dc4b6Seschrock {
465cb04b873SMark J Musante 	int count = 0, total = 0;
466ea8dc4b6Seschrock 
467ea8dc4b6Seschrock 	(void) iter_handlers(print_device_handler, &count);
468cb04b873SMark J Musante 	if (count > 0) {
469cb04b873SMark J Musante 		total += count;
470cb04b873SMark J Musante 		(void) printf("\n");
471cb04b873SMark J Musante 		count = 0;
472cb04b873SMark J Musante 	}
473cb04b873SMark J Musante 
474*97e81309SPrakash Surya 	(void) iter_handlers(print_delay_handler, &count);
475*97e81309SPrakash Surya 	if (count > 0) {
476*97e81309SPrakash Surya 		total += count;
477*97e81309SPrakash Surya 		(void) printf("\n");
478*97e81309SPrakash Surya 		count = 0;
479*97e81309SPrakash Surya 	}
480*97e81309SPrakash Surya 
481ea8dc4b6Seschrock 	(void) iter_handlers(print_data_handler, &count);
482cb04b873SMark J Musante 	if (count > 0) {
483cb04b873SMark J Musante 		total += count;
484cb04b873SMark J Musante 		(void) printf("\n");
485cb04b873SMark J Musante 		count = 0;
486cb04b873SMark J Musante 	}
487cb04b873SMark J Musante 
48888ecc943SGeorge Wilson 	(void) iter_handlers(print_panic_handler, &count);
489ea8dc4b6Seschrock 
490cb04b873SMark J Musante 	return (count + total);
491ea8dc4b6Seschrock }
492ea8dc4b6Seschrock 
493ea8dc4b6Seschrock /* ARGSUSED */
494ea8dc4b6Seschrock static int
495ea8dc4b6Seschrock cancel_one_handler(int id, const char *pool, zinject_record_t *record,
496ea8dc4b6Seschrock     void *data)
497ea8dc4b6Seschrock {
498f4c46b1eSYuri Pankov 	zfs_cmd_t zc = { 0 };
499ea8dc4b6Seschrock 
500ea8dc4b6Seschrock 	zc.zc_guid = (uint64_t)id;
501ea8dc4b6Seschrock 
502ea8dc4b6Seschrock 	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
503ea8dc4b6Seschrock 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
504ea8dc4b6Seschrock 		    id, strerror(errno));
505ea8dc4b6Seschrock 		return (1);
506ea8dc4b6Seschrock 	}
507ea8dc4b6Seschrock 
508ea8dc4b6Seschrock 	return (0);
509ea8dc4b6Seschrock }
510ea8dc4b6Seschrock 
511ea8dc4b6Seschrock /*
512ea8dc4b6Seschrock  * Remove all fault injection handlers.
513ea8dc4b6Seschrock  */
514ea8dc4b6Seschrock static int
515ea8dc4b6Seschrock cancel_all_handlers(void)
516ea8dc4b6Seschrock {
517ea8dc4b6Seschrock 	int ret = iter_handlers(cancel_one_handler, NULL);
518ea8dc4b6Seschrock 
51954a91118SChris Kirby 	if (ret == 0)
52054a91118SChris Kirby 		(void) printf("removed all registered handlers\n");
521ea8dc4b6Seschrock 
522ea8dc4b6Seschrock 	return (ret);
523ea8dc4b6Seschrock }
524ea8dc4b6Seschrock 
525ea8dc4b6Seschrock /*
526ea8dc4b6Seschrock  * Remove a specific fault injection handler.
527ea8dc4b6Seschrock  */
528ea8dc4b6Seschrock static int
529ea8dc4b6Seschrock cancel_handler(int id)
530ea8dc4b6Seschrock {
531f4c46b1eSYuri Pankov 	zfs_cmd_t zc = { 0 };
532ea8dc4b6Seschrock 
533ea8dc4b6Seschrock 	zc.zc_guid = (uint64_t)id;
534ea8dc4b6Seschrock 
535ea8dc4b6Seschrock 	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
536ea8dc4b6Seschrock 		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
537ea8dc4b6Seschrock 		    id, strerror(errno));
538ea8dc4b6Seschrock 		return (1);
539ea8dc4b6Seschrock 	}
540ea8dc4b6Seschrock 
541ea8dc4b6Seschrock 	(void) printf("removed handler %d\n", id);
542ea8dc4b6Seschrock 
543ea8dc4b6Seschrock 	return (0);
544ea8dc4b6Seschrock }
545ea8dc4b6Seschrock 
546ea8dc4b6Seschrock /*
547ea8dc4b6Seschrock  * Register a new fault injection handler.
548ea8dc4b6Seschrock  */
549ea8dc4b6Seschrock static int
550ea8dc4b6Seschrock register_handler(const char *pool, int flags, zinject_record_t *record,
551ea8dc4b6Seschrock     int quiet)
552ea8dc4b6Seschrock {
553f4c46b1eSYuri Pankov 	zfs_cmd_t zc = { 0 };
554ea8dc4b6Seschrock 
555ea8dc4b6Seschrock 	(void) strcpy(zc.zc_name, pool);
556ea8dc4b6Seschrock 	zc.zc_inject_record = *record;
557ea8dc4b6Seschrock 	zc.zc_guid = flags;
558ea8dc4b6Seschrock 
559ea8dc4b6Seschrock 	if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
560ea8dc4b6Seschrock 		(void) fprintf(stderr, "failed to add handler: %s\n",
561ea8dc4b6Seschrock 		    strerror(errno));
562ea8dc4b6Seschrock 		return (1);
563ea8dc4b6Seschrock 	}
564ea8dc4b6Seschrock 
565ea8dc4b6Seschrock 	if (flags & ZINJECT_NULL)
566ea8dc4b6Seschrock 		return (0);
567ea8dc4b6Seschrock 
568ea8dc4b6Seschrock 	if (quiet) {
569ea8dc4b6Seschrock 		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
570ea8dc4b6Seschrock 	} else {
571ea8dc4b6Seschrock 		(void) printf("Added handler %llu with the following "
572ea8dc4b6Seschrock 		    "properties:\n", (u_longlong_t)zc.zc_guid);
573ea8dc4b6Seschrock 		(void) printf("  pool: %s\n", pool);
574ea8dc4b6Seschrock 		if (record->zi_guid) {
575ea8dc4b6Seschrock 			(void) printf("  vdev: %llx\n",
576ea8dc4b6Seschrock 			    (u_longlong_t)record->zi_guid);
57788ecc943SGeorge Wilson 		} else if (record->zi_func[0] != '\0') {
57888ecc943SGeorge Wilson 			(void) printf("  panic function: %s\n",
57988ecc943SGeorge Wilson 			    record->zi_func);
580468c413aSTim Haley 		} else if (record->zi_duration > 0) {
581468c413aSTim Haley 			(void) printf(" time: %lld seconds\n",
582468c413aSTim Haley 			    (u_longlong_t)record->zi_duration);
583468c413aSTim Haley 		} else if (record->zi_duration < 0) {
584468c413aSTim Haley 			(void) printf(" txgs: %lld \n",
585468c413aSTim Haley 			    (u_longlong_t)-record->zi_duration);
586ea8dc4b6Seschrock 		} else {
587ea8dc4b6Seschrock 			(void) printf("objset: %llu\n",
588ea8dc4b6Seschrock 			    (u_longlong_t)record->zi_objset);
589ea8dc4b6Seschrock 			(void) printf("object: %llu\n",
590ea8dc4b6Seschrock 			    (u_longlong_t)record->zi_object);
591ea8dc4b6Seschrock 			(void) printf("  type: %llu\n",
592ea8dc4b6Seschrock 			    (u_longlong_t)record->zi_type);
593ea8dc4b6Seschrock 			(void) printf(" level: %d\n", record->zi_level);
594ea8dc4b6Seschrock 			if (record->zi_start == 0 &&
595ea8dc4b6Seschrock 			    record->zi_end == -1ULL)
596ea8dc4b6Seschrock 				(void) printf(" range: all\n");
597ea8dc4b6Seschrock 			else
598ea8dc4b6Seschrock 				(void) printf(" range: [%llu, %llu)\n",
599ea8dc4b6Seschrock 				    (u_longlong_t)record->zi_start,
600ea8dc4b6Seschrock 				    (u_longlong_t)record->zi_end);
601ea8dc4b6Seschrock 		}
602ea8dc4b6Seschrock 	}
603ea8dc4b6Seschrock 
604ea8dc4b6Seschrock 	return (0);
605ea8dc4b6Seschrock }
606ea8dc4b6Seschrock 
6078f18d1faSGeorge Wilson int
6088f18d1faSGeorge Wilson perform_action(const char *pool, zinject_record_t *record, int cmd)
6098f18d1faSGeorge Wilson {
610f4c46b1eSYuri Pankov 	zfs_cmd_t zc = { 0 };
6118f18d1faSGeorge Wilson 
6128f18d1faSGeorge Wilson 	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
6138f18d1faSGeorge Wilson 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
6148f18d1faSGeorge Wilson 	zc.zc_guid = record->zi_guid;
6158f18d1faSGeorge Wilson 	zc.zc_cookie = cmd;
6168f18d1faSGeorge Wilson 
6178f18d1faSGeorge Wilson 	if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
6188f18d1faSGeorge Wilson 		return (0);
6198f18d1faSGeorge Wilson 
6208f18d1faSGeorge Wilson 	return (1);
6218f18d1faSGeorge Wilson }
6228f18d1faSGeorge Wilson 
623*97e81309SPrakash Surya static int
624*97e81309SPrakash Surya parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
625*97e81309SPrakash Surya {
626*97e81309SPrakash Surya 	unsigned long scan_delay;
627*97e81309SPrakash Surya 	unsigned long scan_nlanes;
628*97e81309SPrakash Surya 
629*97e81309SPrakash Surya 	if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
630*97e81309SPrakash Surya 		return (1);
631*97e81309SPrakash Surya 
632*97e81309SPrakash Surya 	/*
633*97e81309SPrakash Surya 	 * We explicitly disallow a delay of zero here, because we key
634*97e81309SPrakash Surya 	 * off this value being non-zero in translate_device(), to
635*97e81309SPrakash Surya 	 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
636*97e81309SPrakash Surya 	 */
637*97e81309SPrakash Surya 	if (scan_delay == 0)
638*97e81309SPrakash Surya 		return (1);
639*97e81309SPrakash Surya 
640*97e81309SPrakash Surya 	/*
641*97e81309SPrakash Surya 	 * The units for the CLI delay parameter is milliseconds, but
642*97e81309SPrakash Surya 	 * the data passed to the kernel is interpreted as nanoseconds.
643*97e81309SPrakash Surya 	 * Thus we scale the milliseconds to nanoseconds here, and this
644*97e81309SPrakash Surya 	 * nanosecond value is used to pass the delay to the kernel.
645*97e81309SPrakash Surya 	 */
646*97e81309SPrakash Surya 	*delay = MSEC2NSEC(scan_delay);
647*97e81309SPrakash Surya 	*nlanes = scan_nlanes;
648*97e81309SPrakash Surya 
649*97e81309SPrakash Surya 	return (0);
650*97e81309SPrakash Surya }
651*97e81309SPrakash Surya 
652ea8dc4b6Seschrock int
653ea8dc4b6Seschrock main(int argc, char **argv)
654ea8dc4b6Seschrock {
655ea8dc4b6Seschrock 	int c;
656ea8dc4b6Seschrock 	char *range = NULL;
657ea8dc4b6Seschrock 	char *cancel = NULL;
658ea8dc4b6Seschrock 	char *end;
659ea8dc4b6Seschrock 	char *raw = NULL;
660ea8dc4b6Seschrock 	char *device = NULL;
661ea8dc4b6Seschrock 	int level = 0;
662ea8dc4b6Seschrock 	int quiet = 0;
663ea8dc4b6Seschrock 	int error = 0;
664ea8dc4b6Seschrock 	int domount = 0;
6658f18d1faSGeorge Wilson 	int io_type = ZIO_TYPES;
6668f18d1faSGeorge Wilson 	int action = VDEV_STATE_UNKNOWN;
667ea8dc4b6Seschrock 	err_type_t type = TYPE_INVAL;
66821bf64a7Sgw 	err_type_t label = TYPE_INVAL;
669ea8dc4b6Seschrock 	zinject_record_t record = { 0 };
670ea8dc4b6Seschrock 	char pool[MAXNAMELEN];
671ea8dc4b6Seschrock 	char dataset[MAXNAMELEN];
672ea8dc4b6Seschrock 	zfs_handle_t *zhp;
673468c413aSTim Haley 	int nowrites = 0;
674468c413aSTim Haley 	int dur_txg = 0;
675468c413aSTim Haley 	int dur_secs = 0;
676ea8dc4b6Seschrock 	int ret;
677ea8dc4b6Seschrock 	int flags = 0;
678ea8dc4b6Seschrock 
67999653d4eSeschrock 	if ((g_zfs = libzfs_init()) == NULL) {
68099653d4eSeschrock 		(void) fprintf(stderr, "internal error: failed to "
68199653d4eSeschrock 		    "initialize ZFS library\n");
68299653d4eSeschrock 		return (1);
68399653d4eSeschrock 	}
68499653d4eSeschrock 
68599653d4eSeschrock 	libzfs_print_on_error(g_zfs, B_TRUE);
68699653d4eSeschrock 
687ea8dc4b6Seschrock 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
688ea8dc4b6Seschrock 		(void) fprintf(stderr, "failed to open ZFS device\n");
689ea8dc4b6Seschrock 		return (1);
690ea8dc4b6Seschrock 	}
691ea8dc4b6Seschrock 
692ea8dc4b6Seschrock 	if (argc == 1) {
693ea8dc4b6Seschrock 		/*
694ea8dc4b6Seschrock 		 * No arguments.  Print the available handlers.  If there are no
695ea8dc4b6Seschrock 		 * available handlers, direct the user to '-h' for help
696ea8dc4b6Seschrock 		 * information.
697ea8dc4b6Seschrock 		 */
698ea8dc4b6Seschrock 		if (print_all_handlers() == 0) {
699ea8dc4b6Seschrock 			(void) printf("No handlers registered.\n");
700ea8dc4b6Seschrock 			(void) printf("Run 'zinject -h' for usage "
701ea8dc4b6Seschrock 			    "information.\n");
702ea8dc4b6Seschrock 		}
703ea8dc4b6Seschrock 
704ea8dc4b6Seschrock 		return (0);
705ea8dc4b6Seschrock 	}
706ea8dc4b6Seschrock 
7078f18d1faSGeorge Wilson 	while ((c = getopt(argc, argv,
708283b8460SGeorge.Wilson 	    ":aA:b:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
709ea8dc4b6Seschrock 		switch (c) {
710ea8dc4b6Seschrock 		case 'a':
711ea8dc4b6Seschrock 			flags |= ZINJECT_FLUSH_ARC;
712ea8dc4b6Seschrock 			break;
7138f18d1faSGeorge Wilson 		case 'A':
7148f18d1faSGeorge Wilson 			if (strcasecmp(optarg, "degrade") == 0) {
7158f18d1faSGeorge Wilson 				action = VDEV_STATE_DEGRADED;
7168f18d1faSGeorge Wilson 			} else if (strcasecmp(optarg, "fault") == 0) {
7178f18d1faSGeorge Wilson 				action = VDEV_STATE_FAULTED;
7188f18d1faSGeorge Wilson 			} else {
7198f18d1faSGeorge Wilson 				(void) fprintf(stderr, "invalid action '%s': "
7208f18d1faSGeorge Wilson 				    "must be 'degrade' or 'fault'\n", optarg);
7218f18d1faSGeorge Wilson 				usage();
7228f18d1faSGeorge Wilson 				return (1);
7238f18d1faSGeorge Wilson 			}
7248f18d1faSGeorge Wilson 			break;
725ea8dc4b6Seschrock 		case 'b':
726ea8dc4b6Seschrock 			raw = optarg;
727ea8dc4b6Seschrock 			break;
728ea8dc4b6Seschrock 		case 'c':
729ea8dc4b6Seschrock 			cancel = optarg;
730ea8dc4b6Seschrock 			break;
731ea8dc4b6Seschrock 		case 'd':
732ea8dc4b6Seschrock 			device = optarg;
733ea8dc4b6Seschrock 			break;
734283b8460SGeorge.Wilson 		case 'D':
735*97e81309SPrakash Surya 			ret = parse_delay(optarg, &record.zi_timer,
736*97e81309SPrakash Surya 			    &record.zi_nlanes);
737*97e81309SPrakash Surya 			if (ret != 0) {
738283b8460SGeorge.Wilson 				(void) fprintf(stderr, "invalid i/o delay "
739283b8460SGeorge.Wilson 				    "value: '%s'\n", optarg);
740283b8460SGeorge.Wilson 				usage();
741283b8460SGeorge.Wilson 				return (1);
742283b8460SGeorge.Wilson 			}
743283b8460SGeorge.Wilson 			break;
744ea8dc4b6Seschrock 		case 'e':
745ea8dc4b6Seschrock 			if (strcasecmp(optarg, "io") == 0) {
746ea8dc4b6Seschrock 				error = EIO;
747ea8dc4b6Seschrock 			} else if (strcasecmp(optarg, "checksum") == 0) {
748ea8dc4b6Seschrock 				error = ECKSUM;
749ea8dc4b6Seschrock 			} else if (strcasecmp(optarg, "nxio") == 0) {
750ea8dc4b6Seschrock 				error = ENXIO;
751cb04b873SMark J Musante 			} else if (strcasecmp(optarg, "dtl") == 0) {
752cb04b873SMark J Musante 				error = ECHILD;
753ea8dc4b6Seschrock 			} else {
754ea8dc4b6Seschrock 				(void) fprintf(stderr, "invalid error type "
755ea8dc4b6Seschrock 				    "'%s': must be 'io', 'checksum' or "
756ea8dc4b6Seschrock 				    "'nxio'\n", optarg);
757ea8dc4b6Seschrock 				usage();
758ea8dc4b6Seschrock 				return (1);
759ea8dc4b6Seschrock 			}
760ea8dc4b6Seschrock 			break;
761ea8dc4b6Seschrock 		case 'f':
762ea8dc4b6Seschrock 			record.zi_freq = atoi(optarg);
763ea8dc4b6Seschrock 			if (record.zi_freq < 1 || record.zi_freq > 100) {
764ea8dc4b6Seschrock 				(void) fprintf(stderr, "frequency range must "
765ea8dc4b6Seschrock 				    "be in the range (0, 100]\n");
766ea8dc4b6Seschrock 				return (1);
767ea8dc4b6Seschrock 			}
768ea8dc4b6Seschrock 			break;
7698956713aSEric Schrock 		case 'F':
7708956713aSEric Schrock 			record.zi_failfast = B_TRUE;
7718956713aSEric Schrock 			break;
772468c413aSTim Haley 		case 'g':
773468c413aSTim Haley 			dur_txg = 1;
774468c413aSTim Haley 			record.zi_duration = (int)strtol(optarg, &end, 10);
775468c413aSTim Haley 			if (record.zi_duration <= 0 || *end != '\0') {
776468c413aSTim Haley 				(void) fprintf(stderr, "invalid duration '%s': "
777468c413aSTim Haley 				    "must be a positive integer\n", optarg);
778468c413aSTim Haley 				usage();
779468c413aSTim Haley 				return (1);
780468c413aSTim Haley 			}
781468c413aSTim Haley 			/* store duration of txgs as its negative */
782468c413aSTim Haley 			record.zi_duration *= -1;
783468c413aSTim Haley 			break;
784ea8dc4b6Seschrock 		case 'h':
785ea8dc4b6Seschrock 			usage();
786ea8dc4b6Seschrock 			return (0);
787468c413aSTim Haley 		case 'I':
788468c413aSTim Haley 			/* default duration, if one hasn't yet been defined */
789468c413aSTim Haley 			nowrites = 1;
790468c413aSTim Haley 			if (dur_secs == 0 && dur_txg == 0)
791468c413aSTim Haley 				record.zi_duration = 30;
792468c413aSTim Haley 			break;
793ea8dc4b6Seschrock 		case 'l':
794ea8dc4b6Seschrock 			level = (int)strtol(optarg, &end, 10);
795ea8dc4b6Seschrock 			if (*end != '\0') {
796ea8dc4b6Seschrock 				(void) fprintf(stderr, "invalid level '%s': "
797ea8dc4b6Seschrock 				    "must be an integer\n", optarg);
798ea8dc4b6Seschrock 				usage();
799ea8dc4b6Seschrock 				return (1);
800ea8dc4b6Seschrock 			}
801ea8dc4b6Seschrock 			break;
802ea8dc4b6Seschrock 		case 'm':
803ea8dc4b6Seschrock 			domount = 1;
804ea8dc4b6Seschrock 			break;
80588ecc943SGeorge Wilson 		case 'p':
80688ecc943SGeorge Wilson 			(void) strlcpy(record.zi_func, optarg,
80788ecc943SGeorge Wilson 			    sizeof (record.zi_func));
808283b8460SGeorge.Wilson 			record.zi_cmd = ZINJECT_PANIC;
80988ecc943SGeorge Wilson 			break;
810ea8dc4b6Seschrock 		case 'q':
811ea8dc4b6Seschrock 			quiet = 1;
812ea8dc4b6Seschrock 			break;
813ea8dc4b6Seschrock 		case 'r':
814ea8dc4b6Seschrock 			range = optarg;
815ea8dc4b6Seschrock 			break;
816468c413aSTim Haley 		case 's':
817468c413aSTim Haley 			dur_secs = 1;
818468c413aSTim Haley 			record.zi_duration = (int)strtol(optarg, &end, 10);
819468c413aSTim Haley 			if (record.zi_duration <= 0 || *end != '\0') {
820468c413aSTim Haley 				(void) fprintf(stderr, "invalid duration '%s': "
821468c413aSTim Haley 				    "must be a positive integer\n", optarg);
822468c413aSTim Haley 				usage();
823468c413aSTim Haley 				return (1);
824468c413aSTim Haley 			}
825468c413aSTim Haley 			break;
8268f18d1faSGeorge Wilson 		case 'T':
8278f18d1faSGeorge Wilson 			if (strcasecmp(optarg, "read") == 0) {
8288f18d1faSGeorge Wilson 				io_type = ZIO_TYPE_READ;
8298f18d1faSGeorge Wilson 			} else if (strcasecmp(optarg, "write") == 0) {
8308f18d1faSGeorge Wilson 				io_type = ZIO_TYPE_WRITE;
8318f18d1faSGeorge Wilson 			} else if (strcasecmp(optarg, "free") == 0) {
8328f18d1faSGeorge Wilson 				io_type = ZIO_TYPE_FREE;
8338f18d1faSGeorge Wilson 			} else if (strcasecmp(optarg, "claim") == 0) {
8348f18d1faSGeorge Wilson 				io_type = ZIO_TYPE_CLAIM;
8358f18d1faSGeorge Wilson 			} else if (strcasecmp(optarg, "all") == 0) {
8368f18d1faSGeorge Wilson 				io_type = ZIO_TYPES;
8378f18d1faSGeorge Wilson 			} else {
8388f18d1faSGeorge Wilson 				(void) fprintf(stderr, "invalid I/O type "
8398f18d1faSGeorge Wilson 				    "'%s': must be 'read', 'write', 'free', "
8408f18d1faSGeorge Wilson 				    "'claim' or 'all'\n", optarg);
8418f18d1faSGeorge Wilson 				usage();
8428f18d1faSGeorge Wilson 				return (1);
8438f18d1faSGeorge Wilson 			}
8448f18d1faSGeorge Wilson 			break;
845ea8dc4b6Seschrock 		case 't':
84621bf64a7Sgw 			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
84721bf64a7Sgw 			    !MOS_TYPE(type)) {
848ea8dc4b6Seschrock 				(void) fprintf(stderr, "invalid type '%s'\n",
849ea8dc4b6Seschrock 				    optarg);
850ea8dc4b6Seschrock 				usage();
851ea8dc4b6Seschrock 				return (1);
852ea8dc4b6Seschrock 			}
853ea8dc4b6Seschrock 			break;
854ea8dc4b6Seschrock 		case 'u':
855ea8dc4b6Seschrock 			flags |= ZINJECT_UNLOAD_SPA;
856ea8dc4b6Seschrock 			break;
85721bf64a7Sgw 		case 'L':
85821bf64a7Sgw 			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
85921bf64a7Sgw 			    !LABEL_TYPE(type)) {
86021bf64a7Sgw 				(void) fprintf(stderr, "invalid label type "
86121bf64a7Sgw 				    "'%s'\n", optarg);
86221bf64a7Sgw 				usage();
86321bf64a7Sgw 				return (1);
86421bf64a7Sgw 			}
86521bf64a7Sgw 			break;
866ea8dc4b6Seschrock 		case ':':
867ea8dc4b6Seschrock 			(void) fprintf(stderr, "option -%c requires an "
868ea8dc4b6Seschrock 			    "operand\n", optopt);
869ea8dc4b6Seschrock 			usage();
870ea8dc4b6Seschrock 			return (1);
871ea8dc4b6Seschrock 		case '?':
872ea8dc4b6Seschrock 			(void) fprintf(stderr, "invalid option '%c'\n",
873ea8dc4b6Seschrock 			    optopt);
874ea8dc4b6Seschrock 			usage();
875ea8dc4b6Seschrock 			return (2);
876ea8dc4b6Seschrock 		}
877ea8dc4b6Seschrock 	}
878ea8dc4b6Seschrock 
879ea8dc4b6Seschrock 	argc -= optind;
880ea8dc4b6Seschrock 	argv += optind;
881ea8dc4b6Seschrock 
882283b8460SGeorge.Wilson 	if (record.zi_duration != 0)
883283b8460SGeorge.Wilson 		record.zi_cmd = ZINJECT_IGNORED_WRITES;
884283b8460SGeorge.Wilson 
885ea8dc4b6Seschrock 	if (cancel != NULL) {
886ea8dc4b6Seschrock 		/*
887ea8dc4b6Seschrock 		 * '-c' is invalid with any other options.
888ea8dc4b6Seschrock 		 */
889ea8dc4b6Seschrock 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
890283b8460SGeorge.Wilson 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
891ea8dc4b6Seschrock 			(void) fprintf(stderr, "cancel (-c) incompatible with "
892ea8dc4b6Seschrock 			    "any other options\n");
893ea8dc4b6Seschrock 			usage();
894ea8dc4b6Seschrock 			return (2);
895ea8dc4b6Seschrock 		}
896ea8dc4b6Seschrock 		if (argc != 0) {
897ea8dc4b6Seschrock 			(void) fprintf(stderr, "extraneous argument to '-c'\n");
898ea8dc4b6Seschrock 			usage();
899ea8dc4b6Seschrock 			return (2);
900ea8dc4b6Seschrock 		}
901ea8dc4b6Seschrock 
902ea8dc4b6Seschrock 		if (strcmp(cancel, "all") == 0) {
903ea8dc4b6Seschrock 			return (cancel_all_handlers());
904ea8dc4b6Seschrock 		} else {
905ea8dc4b6Seschrock 			int id = (int)strtol(cancel, &end, 10);
906ea8dc4b6Seschrock 			if (*end != '\0') {
907ea8dc4b6Seschrock 				(void) fprintf(stderr, "invalid handle id '%s':"
908ea8dc4b6Seschrock 				    " must be an integer or 'all'\n", cancel);
909ea8dc4b6Seschrock 				usage();
910ea8dc4b6Seschrock 				return (1);
911ea8dc4b6Seschrock 			}
912ea8dc4b6Seschrock 			return (cancel_handler(id));
913ea8dc4b6Seschrock 		}
914ea8dc4b6Seschrock 	}
915ea8dc4b6Seschrock 
916ea8dc4b6Seschrock 	if (device != NULL) {
917ea8dc4b6Seschrock 		/*
918ea8dc4b6Seschrock 		 * Device (-d) injection uses a completely different mechanism
919ea8dc4b6Seschrock 		 * for doing injection, so handle it separately here.
920ea8dc4b6Seschrock 		 */
921ea8dc4b6Seschrock 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
922283b8460SGeorge.Wilson 		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
923ea8dc4b6Seschrock 			(void) fprintf(stderr, "device (-d) incompatible with "
924ea8dc4b6Seschrock 			    "data error injection\n");
925ea8dc4b6Seschrock 			usage();
926ea8dc4b6Seschrock 			return (2);
927ea8dc4b6Seschrock 		}
928ea8dc4b6Seschrock 
929ea8dc4b6Seschrock 		if (argc != 1) {
930ea8dc4b6Seschrock 			(void) fprintf(stderr, "device (-d) injection requires "
931ea8dc4b6Seschrock 			    "a single pool name\n");
932ea8dc4b6Seschrock 			usage();
933ea8dc4b6Seschrock 			return (2);
934ea8dc4b6Seschrock 		}
935ea8dc4b6Seschrock 
936ea8dc4b6Seschrock 		(void) strcpy(pool, argv[0]);
937ea8dc4b6Seschrock 		dataset[0] = '\0';
938ea8dc4b6Seschrock 
939ea8dc4b6Seschrock 		if (error == ECKSUM) {
940ea8dc4b6Seschrock 			(void) fprintf(stderr, "device error type must be "
941ea8dc4b6Seschrock 			    "'io' or 'nxio'\n");
942ea8dc4b6Seschrock 			return (1);
943ea8dc4b6Seschrock 		}
944ea8dc4b6Seschrock 
9458f18d1faSGeorge Wilson 		record.zi_iotype = io_type;
94621bf64a7Sgw 		if (translate_device(pool, device, label, &record) != 0)
947ea8dc4b6Seschrock 			return (1);
948ea8dc4b6Seschrock 		if (!error)
949ea8dc4b6Seschrock 			error = ENXIO;
9508f18d1faSGeorge Wilson 
9518f18d1faSGeorge Wilson 		if (action != VDEV_STATE_UNKNOWN)
9528f18d1faSGeorge Wilson 			return (perform_action(pool, &record, action));
9538f18d1faSGeorge Wilson 
954ea8dc4b6Seschrock 	} else if (raw != NULL) {
95588ecc943SGeorge Wilson 		if (range != NULL || type != TYPE_INVAL || level != 0 ||
956283b8460SGeorge.Wilson 		    record.zi_cmd != ZINJECT_UNINITIALIZED) {
957ea8dc4b6Seschrock 			(void) fprintf(stderr, "raw (-b) format with "
958ea8dc4b6Seschrock 			    "any other options\n");
959ea8dc4b6Seschrock 			usage();
960ea8dc4b6Seschrock 			return (2);
961ea8dc4b6Seschrock 		}
962ea8dc4b6Seschrock 
963ea8dc4b6Seschrock 		if (argc != 1) {
964ea8dc4b6Seschrock 			(void) fprintf(stderr, "raw (-b) format expects a "
965ea8dc4b6Seschrock 			    "single pool name\n");
966ea8dc4b6Seschrock 			usage();
967ea8dc4b6Seschrock 			return (2);
968ea8dc4b6Seschrock 		}
969ea8dc4b6Seschrock 
970ea8dc4b6Seschrock 		(void) strcpy(pool, argv[0]);
971ea8dc4b6Seschrock 		dataset[0] = '\0';
972ea8dc4b6Seschrock 
973ea8dc4b6Seschrock 		if (error == ENXIO) {
974ea8dc4b6Seschrock 			(void) fprintf(stderr, "data error type must be "
975ea8dc4b6Seschrock 			    "'checksum' or 'io'\n");
976ea8dc4b6Seschrock 			return (1);
977ea8dc4b6Seschrock 		}
978ea8dc4b6Seschrock 
979283b8460SGeorge.Wilson 		record.zi_cmd = ZINJECT_DATA_FAULT;
980ea8dc4b6Seschrock 		if (translate_raw(raw, &record) != 0)
981ea8dc4b6Seschrock 			return (1);
982ea8dc4b6Seschrock 		if (!error)
983ea8dc4b6Seschrock 			error = EIO;
984283b8460SGeorge.Wilson 	} else if (record.zi_cmd == ZINJECT_PANIC) {
98588ecc943SGeorge Wilson 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
986283b8460SGeorge.Wilson 		    level != 0 || device != NULL) {
98788ecc943SGeorge Wilson 			(void) fprintf(stderr, "panic (-p) incompatible with "
98888ecc943SGeorge Wilson 			    "other options\n");
98988ecc943SGeorge Wilson 			usage();
99088ecc943SGeorge Wilson 			return (2);
99188ecc943SGeorge Wilson 		}
99288ecc943SGeorge Wilson 
9931195e687SMark J Musante 		if (argc < 1 || argc > 2) {
99488ecc943SGeorge Wilson 			(void) fprintf(stderr, "panic (-p) injection requires "
9951195e687SMark J Musante 			    "a single pool name and an optional id\n");
99688ecc943SGeorge Wilson 			usage();
99788ecc943SGeorge Wilson 			return (2);
99888ecc943SGeorge Wilson 		}
99988ecc943SGeorge Wilson 
1000468c413aSTim Haley 		(void) strcpy(pool, argv[0]);
10011195e687SMark J Musante 		if (argv[1] != NULL)
10021195e687SMark J Musante 			record.zi_type = atoi(argv[1]);
1003468c413aSTim Haley 		dataset[0] = '\0';
1004283b8460SGeorge.Wilson 	} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
1005468c413aSTim Haley 		if (nowrites == 0) {
1006468c413aSTim Haley 			(void) fprintf(stderr, "-s or -g meaningless "
1007468c413aSTim Haley 			    "without -I (ignore writes)\n");
1008468c413aSTim Haley 			usage();
1009468c413aSTim Haley 			return (2);
1010468c413aSTim Haley 		} else if (dur_secs && dur_txg) {
1011468c413aSTim Haley 			(void) fprintf(stderr, "choose a duration either "
1012468c413aSTim Haley 			    "in seconds (-s) or a number of txgs (-g) "
1013468c413aSTim Haley 			    "but not both\n");
1014468c413aSTim Haley 			usage();
1015468c413aSTim Haley 			return (2);
1016468c413aSTim Haley 		} else if (argc != 1) {
1017468c413aSTim Haley 			(void) fprintf(stderr, "ignore writes (-I) "
1018468c413aSTim Haley 			    "injection requires a single pool name\n");
1019468c413aSTim Haley 			usage();
1020468c413aSTim Haley 			return (2);
1021468c413aSTim Haley 		}
1022468c413aSTim Haley 
102388ecc943SGeorge Wilson 		(void) strcpy(pool, argv[0]);
102488ecc943SGeorge Wilson 		dataset[0] = '\0';
1025ea8dc4b6Seschrock 	} else if (type == TYPE_INVAL) {
1026ea8dc4b6Seschrock 		if (flags == 0) {
1027ea8dc4b6Seschrock 			(void) fprintf(stderr, "at least one of '-b', '-d', "
1028468c413aSTim Haley 			    "'-t', '-a', '-p', '-I' or '-u' "
1029468c413aSTim Haley 			    "must be specified\n");
1030ea8dc4b6Seschrock 			usage();
1031ea8dc4b6Seschrock 			return (2);
1032ea8dc4b6Seschrock 		}
1033ea8dc4b6Seschrock 
1034ea8dc4b6Seschrock 		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
1035ea8dc4b6Seschrock 			(void) strcpy(pool, argv[0]);
1036ea8dc4b6Seschrock 			dataset[0] = '\0';
1037ea8dc4b6Seschrock 		} else if (argc != 0) {
1038ea8dc4b6Seschrock 			(void) fprintf(stderr, "extraneous argument for "
1039ea8dc4b6Seschrock 			    "'-f'\n");
1040ea8dc4b6Seschrock 			usage();
1041ea8dc4b6Seschrock 			return (2);
1042ea8dc4b6Seschrock 		}
1043ea8dc4b6Seschrock 
1044ea8dc4b6Seschrock 		flags |= ZINJECT_NULL;
1045ea8dc4b6Seschrock 	} else {
1046ea8dc4b6Seschrock 		if (argc != 1) {
1047ea8dc4b6Seschrock 			(void) fprintf(stderr, "missing object\n");
1048ea8dc4b6Seschrock 			usage();
1049ea8dc4b6Seschrock 			return (2);
1050ea8dc4b6Seschrock 		}
1051ea8dc4b6Seschrock 
1052ea8dc4b6Seschrock 		if (error == ENXIO) {
1053ea8dc4b6Seschrock 			(void) fprintf(stderr, "data error type must be "
1054ea8dc4b6Seschrock 			    "'checksum' or 'io'\n");
1055ea8dc4b6Seschrock 			return (1);
1056ea8dc4b6Seschrock 		}
1057ea8dc4b6Seschrock 
1058283b8460SGeorge.Wilson 		record.zi_cmd = ZINJECT_DATA_FAULT;
1059ea8dc4b6Seschrock 		if (translate_record(type, argv[0], range, level, &record, pool,
1060ea8dc4b6Seschrock 		    dataset) != 0)
1061ea8dc4b6Seschrock 			return (1);
1062ea8dc4b6Seschrock 		if (!error)
1063ea8dc4b6Seschrock 			error = EIO;
1064ea8dc4b6Seschrock 	}
1065ea8dc4b6Seschrock 
1066ea8dc4b6Seschrock 	/*
1067ea8dc4b6Seschrock 	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
1068ea8dc4b6Seschrock 	 * unload the pool, so that we trigger spa-wide reopen of metadata next
1069ea8dc4b6Seschrock 	 * time we access the pool.
1070ea8dc4b6Seschrock 	 */
1071ea8dc4b6Seschrock 	if (dataset[0] != '\0' && domount) {
1072990b4856Slling 		if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
1073ea8dc4b6Seschrock 			return (1);
1074ea8dc4b6Seschrock 
1075ea8dc4b6Seschrock 		if (zfs_unmount(zhp, NULL, 0) != 0)
1076ea8dc4b6Seschrock 			return (1);
1077ea8dc4b6Seschrock 	}
1078ea8dc4b6Seschrock 
1079ea8dc4b6Seschrock 	record.zi_error = error;
1080ea8dc4b6Seschrock 
1081ea8dc4b6Seschrock 	ret = register_handler(pool, flags, &record, quiet);
1082ea8dc4b6Seschrock 
1083ea8dc4b6Seschrock 	if (dataset[0] != '\0' && domount)
1084ea8dc4b6Seschrock 		ret = (zfs_mount(zhp, NULL, 0) != 0);
1085ea8dc4b6Seschrock 
108699653d4eSeschrock 	libzfs_fini(g_zfs);
108799653d4eSeschrock 
1088ea8dc4b6Seschrock 	return (ret);
1089ea8dc4b6Seschrock }
1090