1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24 */
25
26/*
27 * ZFS Fault Injector
28 *
29 * This userland component takes a set of options and uses libzpool to translate
30 * from a user-visible object type and name to an internal representation.
31 * There are two basic types of faults: device faults and data faults.
32 *
33 *
34 * DEVICE FAULTS
35 *
36 * Errors can be injected into a particular vdev using the '-d' option.  This
37 * option takes a path or vdev GUID to uniquely identify the device within a
38 * pool.  There are two types of errors that can be injected, EIO and ENXIO,
39 * that can be controlled through the '-e' option.  The default is ENXIO.  For
40 * EIO failures, any attempt to read data from the device will return EIO, but
41 * subsequent attempt to reopen the device will succeed.  For ENXIO failures,
42 * any attempt to read from the device will return EIO, but any attempt to
43 * reopen the device will also return ENXIO.
44 * For label faults, the -L option must be specified. This allows faults
45 * to be injected into either the nvlist, uberblock, pad1, or pad2 region
46 * of all the labels for the specified device.
47 *
48 * This form of the command looks like:
49 *
50 *	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
51 *
52 *
53 * DATA FAULTS
54 *
55 * We begin with a tuple of the form:
56 *
57 *	<type,level,range,object>
58 *
59 *	type	A string describing the type of data to target.  Each type
60 *		implicitly describes how to interpret 'object'. Currently,
61 *		the following values are supported:
62 *
63 *		data		User data for a file
64 *		dnode		Dnode for a file or directory
65 *
66 *		The following MOS objects are special.  Instead of injecting
67 *		errors on a particular object or blkid, we inject errors across
68 *		all objects of the given type.
69 *
70 *		mos		Any data in the MOS
71 *		mosdir		object directory
72 *		config		pool configuration
73 *		bpobj		blkptr list
74 *		spacemap	spacemap
75 *		metaslab	metaslab
76 *		errlog		persistent error log
77 *
78 *	level	Object level.  Defaults to '0', not applicable to all types.  If
79 *		a range is given, this corresponds to the indirect block
80 *		corresponding to the specific range.
81 *
82 *	range	A numerical range [start,end) within the object.  Defaults to
83 *		the full size of the file.
84 *
85 *	object	A string describing the logical location of the object.  For
86 *		files and directories (currently the only supported types),
87 *		this is the path of the object on disk.
88 *
89 * This is translated, via libzpool, into the following internal representation:
90 *
91 *	<type,objset,object,level,range>
92 *
93 * These types should be self-explanatory.  This tuple is then passed to the
94 * kernel via a special ioctl() to initiate fault injection for the given
95 * object.  Note that 'type' is not strictly necessary for fault injection, but
96 * is used when translating existing faults into a human-readable string.
97 *
98 *
99 * The command itself takes one of the forms:
100 *
101 *	zinject
102 *	zinject <-a | -u pool>
103 *	zinject -c <id|all>
104 *	zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
105 *	    [-r range] <object>
106 *	zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
107 *
108 * With no arguments, the command prints all currently registered injection
109 * handlers, with their numeric identifiers.
110 *
111 * The '-c' option will clear the given handler, or all handlers if 'all' is
112 * specified.
113 *
114 * The '-e' option takes a string describing the errno to simulate.  This must
115 * be one of 'io', 'checksum', or 'decrypt'.  In most cases this will result
116 * in the same behavior, but RAID-Z will produce a different set of ereports
117 * for this situation.
118 *
119 * The '-a', '-u', and '-m' flags toggle internal flush behavior.  If '-a' is
120 * specified, then the ARC cache is flushed appropriately.  If '-u' is
121 * specified, then the underlying SPA is unloaded.  Either of these flags can be
122 * specified independently of any other handlers.  The '-m' flag automatically
123 * does an unmount and remount of the underlying dataset to aid in flushing the
124 * cache.
125 *
126 * The '-f' flag controls the frequency of errors injected, expressed as a
127 * integer percentage between 1 and 100.  The default is 100.
128 *
129 * The this form is responsible for actually injecting the handler into the
130 * framework.  It takes the arguments described above, translates them to the
131 * internal tuple using libzpool, and then issues an ioctl() to register the
132 * handler.
133 *
134 * The final form can target a specific bookmark, regardless of whether a
135 * human-readable interface has been designed.  It allows developers to specify
136 * a particular block by number.
137 */
138
139#include <errno.h>
140#include <fcntl.h>
141#include <stdio.h>
142#include <stdlib.h>
143#include <strings.h>
144#include <unistd.h>
145
146#include <sys/fs/zfs.h>
147#include <sys/mount.h>
148
149#include <libzfs.h>
150
151#undef verify	/* both libzfs.h and zfs_context.h want to define this */
152
153#include "zinject.h"
154
155libzfs_handle_t *g_zfs;
156int zfs_fd;
157
158#define	ECKSUM	EBADE
159
160static const char *errtable[TYPE_INVAL] = {
161	"data",
162	"dnode",
163	"mos",
164	"mosdir",
165	"metaslab",
166	"config",
167	"bpobj",
168	"spacemap",
169	"errlog",
170	"uber",
171	"nvlist",
172	"pad1",
173	"pad2"
174};
175
176static err_type_t
177name_to_type(const char *arg)
178{
179	int i;
180	for (i = 0; i < TYPE_INVAL; i++)
181		if (strcmp(errtable[i], arg) == 0)
182			return (i);
183
184	return (TYPE_INVAL);
185}
186
187static const char *
188type_to_name(uint64_t type)
189{
190	switch (type) {
191	case DMU_OT_OBJECT_DIRECTORY:
192		return ("mosdir");
193	case DMU_OT_OBJECT_ARRAY:
194		return ("metaslab");
195	case DMU_OT_PACKED_NVLIST:
196		return ("config");
197	case DMU_OT_BPOBJ:
198		return ("bpobj");
199	case DMU_OT_SPACE_MAP:
200		return ("spacemap");
201	case DMU_OT_ERROR_LOG:
202		return ("errlog");
203	default:
204		return ("-");
205	}
206}
207
208
209/*
210 * Print usage message.
211 */
212void
213usage(void)
214{
215	(void) printf(
216	    "usage:\n"
217	    "\n"
218	    "\tzinject\n"
219	    "\n"
220	    "\t\tList all active injection records.\n"
221	    "\n"
222	    "\tzinject -c <id|all>\n"
223	    "\n"
224	    "\t\tClear the particular record (if given a numeric ID), or\n"
225	    "\t\tall records if 'all' is specificed.\n"
226	    "\n"
227	    "\tzinject -p <function name> pool\n"
228	    "\n"
229	    "\t\tInject a panic fault at the specified function. Only \n"
230	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
231	    "\t\tspa_vdev_exit() will trigger a panic.\n"
232	    "\n"
233	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
234	    "\t    [-T <read|write|free|claim|all> pool\n"
235	    "\n"
236	    "\t\tInject a fault into a particular device or the device's\n"
237	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
238	    "\t\t'pad1', or 'pad2'.\n"
239	    "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
240	    "\n"
241	    "\tzinject -d device -A <degrade|fault> pool\n"
242	    "\n"
243	    "\t\tPerform a specific action on a particular device\n"
244	    "\n"
245	    "\tzinject -d device -D latency:lanes pool\n"
246	    "\n"
247	    "\t\tAdd an artificial delay to IO requests on a particular\n"
248	    "\t\tdevice, such that the requests take a minimum of 'latency'\n"
249	    "\t\tmilliseconds to complete. Each delay has an associated\n"
250	    "\t\tnumber of 'lanes' which defines the number of concurrent\n"
251	    "\t\tIO requests that can be processed.\n"
252	    "\n"
253	    "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
254	    "\t\tthe device will only be able to service a single IO request\n"
255	    "\t\tat a time with each request taking 10 ms to complete. So,\n"
256	    "\t\tif only a single request is submitted every 10 ms, the\n"
257	    "\t\taverage latency will be 10 ms; but if more than one request\n"
258	    "\t\tis submitted every 10 ms, the average latency will be more\n"
259	    "\t\tthan 10 ms.\n"
260	    "\n"
261	    "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
262	    "\t\tlanes (-D 10:2), then the device will be able to service\n"
263	    "\t\ttwo requests at a time, each with a minimum latency of\n"
264	    "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
265	    "\t\tthe average latency will be 10 ms; but if more than two\n"
266	    "\t\trequests are submitted every 10 ms, the average latency\n"
267	    "\t\twill be more than 10 ms.\n"
268	    "\n"
269	    "\t\tAlso note, these delays are additive. So two invocations\n"
270	    "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
271	    "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
272	    "\t\tlanes with differing target latencies. For example, an\n"
273	    "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
274	    "\t\tcreate 3 lanes on the device; one lane with a latency\n"
275	    "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
276	    "\n"
277	    "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
278	    "\n"
279	    "\t\tCause the pool to stop writing blocks yet not\n"
280	    "\t\treport errors for a duration.  Simulates buggy hardware\n"
281	    "\t\tthat fails to honor cache flush requests.\n"
282	    "\t\tDefault duration is 30 seconds.  The machine is panicked\n"
283	    "\t\tat the end of the duration.\n"
284	    "\n"
285	    "\tzinject -b objset:object:level:blkid pool\n"
286	    "\n"
287	    "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
288	    "\t\tspecified by the remaining tuple.  Each number is in\n"
289	    "\t\thexidecimal, and only one block can be specified.\n"
290	    "\n"
291	    "\tzinject [-q] <-t type> [-C dvas] [-e errno] [-l level]\n"
292	    "\t\t[-r range] [-a] [-m] [-u] [-f freq] <object>\n"
293	    "\n"
294	    "\t\tInject an error into the object specified by the '-t' option\n"
295	    "\t\tand the object descriptor.  The 'object' parameter is\n"
296	    "\t\tinterperted depending on the '-t' option.\n"
297	    "\n"
298	    "\t\t-q\tQuiet mode.  Only print out the handler number added.\n"
299	    "\t\t-e\tInject a specific error.  Must be one of 'io', "
300	    "'checksum',\n"
301	    "\t\t\t'decompress', or decrypt.  Default is 'io'.\n"
302	    "\t\t-C\tInject the given error only into specific DVAs. The\n"
303	    "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n"
304	    "\t\t\tseparated by commas (ex. '0,2').\n"
305	    "\t\t-l\tInject error at a particular block level. Default is "
306	    "0.\n"
307	    "\t\t-m\tAutomatically remount underlying filesystem.\n"
308	    "\t\t-r\tInject error over a particular logical range of an\n"
309	    "\t\t\tobject.  Will be translated to the appropriate blkid\n"
310	    "\t\t\trange according to the object's properties.\n"
311	    "\t\t-a\tFlush the ARC cache.  Can be specified without any\n"
312	    "\t\t\tassociated object.\n"
313	    "\t\t-u\tUnload the associated pool.  Can be specified with only\n"
314	    "\t\t\ta pool object.\n"
315	    "\t\t-f\tOnly inject errors a fraction of the time.  Expressed as\n"
316	    "\t\t\ta percentage between 1 and 100.\n"
317	    "\n"
318	    "\t-t data\t\tInject an error into the plain file contents of a\n"
319	    "\t\t\tfile.  The object must be specified as a complete path\n"
320	    "\t\t\tto a file on a ZFS filesystem.\n"
321	    "\n"
322	    "\t-t dnode\tInject an error into the metadnode in the block\n"
323	    "\t\t\tcorresponding to the dnode for a file or directory.  The\n"
324	    "\t\t\t'-r' option is incompatible with this mode.  The object\n"
325	    "\t\t\tis specified as a complete path to a file or directory\n"
326	    "\t\t\ton a ZFS filesystem.\n"
327	    "\n"
328	    "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
329	    "\t\t\ttype.  Valid types are: mos, mosdir, config, bpobj,\n"
330	    "\t\t\tspacemap, metaslab, errlog.  The only valid <object> is\n"
331	    "\t\t\tthe poolname.\n");
332}
333
334static int
335iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
336    void *data)
337{
338	zfs_cmd_t zc = { 0 };
339	int ret;
340
341	while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
342		if ((ret = func((int)zc.zc_guid, zc.zc_name,
343		    &zc.zc_inject_record, data)) != 0)
344			return (ret);
345
346	if (errno != ENOENT) {
347		(void) fprintf(stderr, "Unable to list handlers: %s\n",
348		    strerror(errno));
349		return (-1);
350	}
351
352	return (0);
353}
354
355static int
356print_data_handler(int id, const char *pool, zinject_record_t *record,
357    void *data)
358{
359	int *count = data;
360
361	if (record->zi_guid != 0 || record->zi_func[0] != '\0')
362		return (0);
363
364	if (*count == 0) {
365		(void) printf("%3s  %-15s  %-6s  %-6s  %-8s  %3s  %-4s  ",
366		    "%-15s\n", "ID", "POOL", "OBJSET", "OBJECT", "TYPE",
367		    "LVL", "DVAs", "RANGE");
368		(void) printf("---  ---------------  ------  "
369		    "------  --------  ---  ---- ----------------\n");
370	}
371
372	*count += 1;
373
374	(void) printf("%3d  %-15s  %-6llu  %-6llu  %-8s  %-3d  0x%02x  ",
375	    id, pool, (u_longlong_t)record->zi_objset,
376	    (u_longlong_t)record->zi_object, type_to_name(record->zi_type),
377	    record->zi_level, record->zi_dvas);
378
379	if (record->zi_start == 0 &&
380	    record->zi_end == -1ULL)
381		(void) printf("all\n");
382	else
383		(void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
384		    (u_longlong_t)record->zi_end);
385
386	return (0);
387}
388
389static int
390print_device_handler(int id, const char *pool, zinject_record_t *record,
391    void *data)
392{
393	int *count = data;
394
395	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
396		return (0);
397
398	if (record->zi_cmd == ZINJECT_DELAY_IO)
399		return (0);
400
401	if (*count == 0) {
402		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "GUID");
403		(void) printf("---  ---------------  ----------------\n");
404	}
405
406	*count += 1;
407
408	(void) printf("%3d  %-15s  %llx\n", id, pool,
409	    (u_longlong_t)record->zi_guid);
410
411	return (0);
412}
413
414static int
415print_delay_handler(int id, const char *pool, zinject_record_t *record,
416    void *data)
417{
418	int *count = data;
419
420	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
421		return (0);
422
423	if (record->zi_cmd != ZINJECT_DELAY_IO)
424		return (0);
425
426	if (*count == 0) {
427		(void) printf("%3s  %-15s  %-15s  %-15s  %s\n",
428		    "ID", "POOL", "DELAY (ms)", "LANES", "GUID");
429		(void) printf("---  ---------------  ---------------  "
430		    "---------------  ----------------\n");
431	}
432
433	*count += 1;
434
435	(void) printf("%3d  %-15s  %-15llu  %-15llu  %llx\n", id, pool,
436	    (u_longlong_t)NSEC2MSEC(record->zi_timer),
437	    (u_longlong_t)record->zi_nlanes,
438	    (u_longlong_t)record->zi_guid);
439
440	return (0);
441}
442
443static int
444print_panic_handler(int id, const char *pool, zinject_record_t *record,
445    void *data)
446{
447	int *count = data;
448
449	if (record->zi_func[0] == '\0')
450		return (0);
451
452	if (*count == 0) {
453		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
454		(void) printf("---  ---------------  ----------------\n");
455	}
456
457	*count += 1;
458
459	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
460
461	return (0);
462}
463
464/*
465 * Print all registered error handlers.  Returns the number of handlers
466 * registered.
467 */
468static int
469print_all_handlers(void)
470{
471	int count = 0, total = 0;
472
473	(void) iter_handlers(print_device_handler, &count);
474	if (count > 0) {
475		total += count;
476		(void) printf("\n");
477		count = 0;
478	}
479
480	(void) iter_handlers(print_delay_handler, &count);
481	if (count > 0) {
482		total += count;
483		(void) printf("\n");
484		count = 0;
485	}
486
487	(void) iter_handlers(print_data_handler, &count);
488	if (count > 0) {
489		total += count;
490		(void) printf("\n");
491		count = 0;
492	}
493
494	(void) iter_handlers(print_panic_handler, &count);
495
496	return (count + total);
497}
498
499/* ARGSUSED */
500static int
501cancel_one_handler(int id, const char *pool, zinject_record_t *record,
502    void *data)
503{
504	zfs_cmd_t zc = { 0 };
505
506	zc.zc_guid = (uint64_t)id;
507
508	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
509		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
510		    id, strerror(errno));
511		return (1);
512	}
513
514	return (0);
515}
516
517/*
518 * Remove all fault injection handlers.
519 */
520static int
521cancel_all_handlers(void)
522{
523	int ret = iter_handlers(cancel_one_handler, NULL);
524
525	if (ret == 0)
526		(void) printf("removed all registered handlers\n");
527
528	return (ret);
529}
530
531/*
532 * Remove a specific fault injection handler.
533 */
534static int
535cancel_handler(int id)
536{
537	zfs_cmd_t zc = { 0 };
538
539	zc.zc_guid = (uint64_t)id;
540
541	if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
542		(void) fprintf(stderr, "failed to remove handler %d: %s\n",
543		    id, strerror(errno));
544		return (1);
545	}
546
547	(void) printf("removed handler %d\n", id);
548
549	return (0);
550}
551
552/*
553 * Register a new fault injection handler.
554 */
555static int
556register_handler(const char *pool, int flags, zinject_record_t *record,
557    int quiet)
558{
559	zfs_cmd_t zc = { 0 };
560
561	(void) strcpy(zc.zc_name, pool);
562	zc.zc_inject_record = *record;
563	zc.zc_guid = flags;
564
565	if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
566		(void) fprintf(stderr, "failed to add handler: %s\n",
567		    strerror(errno));
568		return (1);
569	}
570
571	if (flags & ZINJECT_NULL)
572		return (0);
573
574	if (quiet) {
575		(void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
576	} else {
577		(void) printf("Added handler %llu with the following "
578		    "properties:\n", (u_longlong_t)zc.zc_guid);
579		(void) printf("  pool: %s\n", pool);
580		if (record->zi_guid) {
581			(void) printf("  vdev: %llx\n",
582			    (u_longlong_t)record->zi_guid);
583		} else if (record->zi_func[0] != '\0') {
584			(void) printf("  panic function: %s\n",
585			    record->zi_func);
586		} else if (record->zi_duration > 0) {
587			(void) printf(" time: %lld seconds\n",
588			    (u_longlong_t)record->zi_duration);
589		} else if (record->zi_duration < 0) {
590			(void) printf(" txgs: %lld \n",
591			    (u_longlong_t)-record->zi_duration);
592		} else {
593			(void) printf("objset: %llu\n",
594			    (u_longlong_t)record->zi_objset);
595			(void) printf("object: %llu\n",
596			    (u_longlong_t)record->zi_object);
597			(void) printf("  type: %llu\n",
598			    (u_longlong_t)record->zi_type);
599			(void) printf(" level: %d\n", record->zi_level);
600			if (record->zi_start == 0 &&
601			    record->zi_end == -1ULL)
602				(void) printf(" range: all\n");
603			else
604				(void) printf(" range: [%llu, %llu)\n",
605				    (u_longlong_t)record->zi_start,
606				    (u_longlong_t)record->zi_end);
607			(void) printf("  dvas: 0x%x\n", record->zi_dvas);
608		}
609	}
610
611	return (0);
612}
613
614int
615perform_action(const char *pool, zinject_record_t *record, int cmd)
616{
617	zfs_cmd_t zc = { 0 };
618
619	ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
620	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
621	zc.zc_guid = record->zi_guid;
622	zc.zc_cookie = cmd;
623
624	if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
625		return (0);
626
627	return (1);
628}
629
630static int
631parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
632{
633	unsigned long scan_delay;
634	unsigned long scan_nlanes;
635
636	if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
637		return (1);
638
639	/*
640	 * We explicitly disallow a delay of zero here, because we key
641	 * off this value being non-zero in translate_device(), to
642	 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
643	 */
644	if (scan_delay == 0)
645		return (1);
646
647	/*
648	 * The units for the CLI delay parameter is milliseconds, but
649	 * the data passed to the kernel is interpreted as nanoseconds.
650	 * Thus we scale the milliseconds to nanoseconds here, and this
651	 * nanosecond value is used to pass the delay to the kernel.
652	 */
653	*delay = MSEC2NSEC(scan_delay);
654	*nlanes = scan_nlanes;
655
656	return (0);
657}
658
659/*
660 * This function converts a string specifier for DVAs into a bit mask.
661 * The dva's provided by the user should be 0 indexed and separated by
662 * a comma. For example:
663 *     "1"     -> 0b0010  (0x2)
664 *     "0,1"   -> 0b0011  (0x3)
665 *     "0,1,2" -> 0b0111  (0x7)
666 */
667static int
668parse_dvas(const char *str, uint32_t *dvas_out)
669{
670	const char *c = str;
671	uint32_t mask = 0;
672	boolean_t need_delim = B_FALSE;
673
674	/* max string length is 5 ("0,1,2") */
675	if (strlen(str) > 5 || strlen(str) == 0)
676		return (EINVAL);
677
678	while (*c != '\0') {
679		switch (*c) {
680		case '0':
681		case '1':
682		case '2':
683			/* check for pipe between DVAs */
684			if (need_delim)
685				return (EINVAL);
686
687			/* check if this DVA has been set already */
688			if (mask & (1 << ((*c) - '0')))
689				return (EINVAL);
690
691			mask |= (1 << ((*c) - '0'));
692			need_delim = B_TRUE;
693			break;
694		case ',':
695			need_delim = B_FALSE;
696			break;
697		default:
698			/* check for invalid character */
699			return (EINVAL);
700		}
701		c++;
702	}
703
704	/* check for dangling delimiter */
705	if (!need_delim)
706		return (EINVAL);
707
708	*dvas_out = mask;
709	return (0);
710}
711
712int
713main(int argc, char **argv)
714{
715	int c;
716	char *range = NULL;
717	char *cancel = NULL;
718	char *end;
719	char *raw = NULL;
720	char *device = NULL;
721	int level = 0;
722	int quiet = 0;
723	int error = 0;
724	int domount = 0;
725	int io_type = ZIO_TYPES;
726	int action = VDEV_STATE_UNKNOWN;
727	err_type_t type = TYPE_INVAL;
728	err_type_t label = TYPE_INVAL;
729	zinject_record_t record = { 0 };
730	char pool[MAXNAMELEN];
731	char dataset[MAXNAMELEN];
732	zfs_handle_t *zhp;
733	int nowrites = 0;
734	int dur_txg = 0;
735	int dur_secs = 0;
736	int ret;
737	int flags = 0;
738	uint32_t dvas = 0;
739
740	if ((g_zfs = libzfs_init()) == NULL) {
741		(void) fprintf(stderr, "internal error: failed to "
742		    "initialize ZFS library\n");
743		return (1);
744	}
745
746	libzfs_print_on_error(g_zfs, B_TRUE);
747
748	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
749		(void) fprintf(stderr, "failed to open ZFS device\n");
750		return (1);
751	}
752
753	if (argc == 1) {
754		/*
755		 * No arguments.  Print the available handlers.  If there are no
756		 * available handlers, direct the user to '-h' for help
757		 * information.
758		 */
759		if (print_all_handlers() == 0) {
760			(void) printf("No handlers registered.\n");
761			(void) printf("Run 'zinject -h' for usage "
762			    "information.\n");
763		}
764
765		return (0);
766	}
767
768	while ((c = getopt(argc, argv,
769	    ":aA:b:C:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
770		switch (c) {
771		case 'a':
772			flags |= ZINJECT_FLUSH_ARC;
773			break;
774		case 'A':
775			if (strcasecmp(optarg, "degrade") == 0) {
776				action = VDEV_STATE_DEGRADED;
777			} else if (strcasecmp(optarg, "fault") == 0) {
778				action = VDEV_STATE_FAULTED;
779			} else {
780				(void) fprintf(stderr, "invalid action '%s': "
781				    "must be 'degrade' or 'fault'\n", optarg);
782				usage();
783				return (1);
784			}
785			break;
786		case 'b':
787			raw = optarg;
788			break;
789		case 'c':
790			cancel = optarg;
791			break;
792		case 'C':
793			ret = parse_dvas(optarg, &dvas);
794			if (ret != 0) {
795				(void) fprintf(stderr, "invalid DVA list '%s': "
796				    "DVAs should be 0 indexed and separated by "
797				    "commas.\n", optarg);
798				usage();
799				libzfs_fini(g_zfs);
800				return (1);
801			}
802			break;
803		case 'd':
804			device = optarg;
805			break;
806		case 'D':
807			ret = parse_delay(optarg, &record.zi_timer,
808			    &record.zi_nlanes);
809			if (ret != 0) {
810				(void) fprintf(stderr, "invalid i/o delay "
811				    "value: '%s'\n", optarg);
812				usage();
813				return (1);
814			}
815			break;
816		case 'e':
817			if (strcasecmp(optarg, "io") == 0) {
818				error = EIO;
819			} else if (strcasecmp(optarg, "checksum") == 0) {
820				error = ECKSUM;
821			} else if (strcasecmp(optarg, "decrypt") == 0) {
822				error = EACCES;
823			} else if (strcasecmp(optarg, "nxio") == 0) {
824				error = ENXIO;
825			} else if (strcasecmp(optarg, "dtl") == 0) {
826				error = ECHILD;
827			} else {
828				(void) fprintf(stderr, "invalid error type "
829				    "'%s': must be 'io', 'checksum' or "
830				    "'nxio'\n", optarg);
831				usage();
832				return (1);
833			}
834			break;
835		case 'f':
836			record.zi_freq = atoi(optarg);
837			if (record.zi_freq < 1 || record.zi_freq > 100) {
838				(void) fprintf(stderr, "frequency range must "
839				    "be in the range (0, 100]\n");
840				return (1);
841			}
842			break;
843		case 'F':
844			record.zi_failfast = B_TRUE;
845			break;
846		case 'g':
847			dur_txg = 1;
848			record.zi_duration = (int)strtol(optarg, &end, 10);
849			if (record.zi_duration <= 0 || *end != '\0') {
850				(void) fprintf(stderr, "invalid duration '%s': "
851				    "must be a positive integer\n", optarg);
852				usage();
853				return (1);
854			}
855			/* store duration of txgs as its negative */
856			record.zi_duration *= -1;
857			break;
858		case 'h':
859			usage();
860			return (0);
861		case 'I':
862			/* default duration, if one hasn't yet been defined */
863			nowrites = 1;
864			if (dur_secs == 0 && dur_txg == 0)
865				record.zi_duration = 30;
866			break;
867		case 'l':
868			level = (int)strtol(optarg, &end, 10);
869			if (*end != '\0') {
870				(void) fprintf(stderr, "invalid level '%s': "
871				    "must be an integer\n", optarg);
872				usage();
873				return (1);
874			}
875			break;
876		case 'm':
877			domount = 1;
878			break;
879		case 'p':
880			(void) strlcpy(record.zi_func, optarg,
881			    sizeof (record.zi_func));
882			record.zi_cmd = ZINJECT_PANIC;
883			break;
884		case 'q':
885			quiet = 1;
886			break;
887		case 'r':
888			range = optarg;
889			break;
890		case 's':
891			dur_secs = 1;
892			record.zi_duration = (int)strtol(optarg, &end, 10);
893			if (record.zi_duration <= 0 || *end != '\0') {
894				(void) fprintf(stderr, "invalid duration '%s': "
895				    "must be a positive integer\n", optarg);
896				usage();
897				return (1);
898			}
899			break;
900		case 'T':
901			if (strcasecmp(optarg, "read") == 0) {
902				io_type = ZIO_TYPE_READ;
903			} else if (strcasecmp(optarg, "write") == 0) {
904				io_type = ZIO_TYPE_WRITE;
905			} else if (strcasecmp(optarg, "free") == 0) {
906				io_type = ZIO_TYPE_FREE;
907			} else if (strcasecmp(optarg, "claim") == 0) {
908				io_type = ZIO_TYPE_CLAIM;
909			} else if (strcasecmp(optarg, "all") == 0) {
910				io_type = ZIO_TYPES;
911			} else {
912				(void) fprintf(stderr, "invalid I/O type "
913				    "'%s': must be 'read', 'write', 'free', "
914				    "'claim' or 'all'\n", optarg);
915				usage();
916				return (1);
917			}
918			break;
919		case 't':
920			if ((type = name_to_type(optarg)) == TYPE_INVAL &&
921			    !MOS_TYPE(type)) {
922				(void) fprintf(stderr, "invalid type '%s'\n",
923				    optarg);
924				usage();
925				return (1);
926			}
927			break;
928		case 'u':
929			flags |= ZINJECT_UNLOAD_SPA;
930			break;
931		case 'L':
932			if ((label = name_to_type(optarg)) == TYPE_INVAL &&
933			    !LABEL_TYPE(type)) {
934				(void) fprintf(stderr, "invalid label type "
935				    "'%s'\n", optarg);
936				usage();
937				return (1);
938			}
939			break;
940		case ':':
941			(void) fprintf(stderr, "option -%c requires an "
942			    "operand\n", optopt);
943			usage();
944			return (1);
945		case '?':
946			(void) fprintf(stderr, "invalid option '%c'\n",
947			    optopt);
948			usage();
949			return (2);
950		}
951	}
952
953	argc -= optind;
954	argv += optind;
955
956	if (record.zi_duration != 0)
957		record.zi_cmd = ZINJECT_IGNORED_WRITES;
958
959	if (cancel != NULL) {
960		/*
961		 * '-c' is invalid with any other options.
962		 */
963		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
964		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
965		    record.zi_freq > 0 || dvas != 0) {
966			(void) fprintf(stderr, "cancel (-c) incompatible with "
967			    "any other options\n");
968			usage();
969			return (2);
970		}
971		if (argc != 0) {
972			(void) fprintf(stderr, "extraneous argument to '-c'\n");
973			usage();
974			return (2);
975		}
976
977		if (strcmp(cancel, "all") == 0) {
978			return (cancel_all_handlers());
979		} else {
980			int id = (int)strtol(cancel, &end, 10);
981			if (*end != '\0') {
982				(void) fprintf(stderr, "invalid handle id '%s':"
983				    " must be an integer or 'all'\n", cancel);
984				usage();
985				return (1);
986			}
987			return (cancel_handler(id));
988		}
989	}
990
991	if (device != NULL) {
992		/*
993		 * Device (-d) injection uses a completely different mechanism
994		 * for doing injection, so handle it separately here.
995		 */
996		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
997		    level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
998		    dvas != 0) {
999			(void) fprintf(stderr, "device (-d) incompatible with "
1000			    "data error injection\n");
1001			usage();
1002			return (2);
1003		}
1004
1005		if (argc != 1) {
1006			(void) fprintf(stderr, "device (-d) injection requires "
1007			    "a single pool name\n");
1008			usage();
1009			return (2);
1010		}
1011
1012		(void) strcpy(pool, argv[0]);
1013		dataset[0] = '\0';
1014
1015		if (error == ECKSUM) {
1016			(void) fprintf(stderr, "device error type must be "
1017			    "'io' or 'nxio'\n");
1018			return (1);
1019		}
1020
1021		record.zi_iotype = io_type;
1022		if (translate_device(pool, device, label, &record) != 0)
1023			return (1);
1024		if (!error)
1025			error = ENXIO;
1026
1027		if (action != VDEV_STATE_UNKNOWN)
1028			return (perform_action(pool, &record, action));
1029
1030	} else if (raw != NULL) {
1031		if (range != NULL || type != TYPE_INVAL || level != 0 ||
1032		    record.zi_cmd != ZINJECT_UNINITIALIZED ||
1033		    record.zi_freq > 0 || dvas != 0) {
1034			(void) fprintf(stderr, "raw (-b) format with "
1035			    "any other options\n");
1036			usage();
1037			return (2);
1038		}
1039
1040		if (argc != 1) {
1041			(void) fprintf(stderr, "raw (-b) format expects a "
1042			    "single pool name\n");
1043			usage();
1044			return (2);
1045		}
1046
1047		(void) strcpy(pool, argv[0]);
1048		dataset[0] = '\0';
1049
1050		if (error == ENXIO) {
1051			(void) fprintf(stderr, "data error type must be "
1052			    "'checksum' or 'io'\n");
1053			return (1);
1054		}
1055
1056		record.zi_cmd = ZINJECT_DATA_FAULT;
1057		if (translate_raw(raw, &record) != 0)
1058			return (1);
1059		if (!error)
1060			error = EIO;
1061	} else if (record.zi_cmd == ZINJECT_PANIC) {
1062		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1063		    level != 0 || device != NULL || record.zi_freq > 0 ||
1064		    dvas != 0) {
1065			(void) fprintf(stderr, "panic (-p) incompatible with "
1066			    "other options\n");
1067			usage();
1068			return (2);
1069		}
1070
1071		if (argc < 1 || argc > 2) {
1072			(void) fprintf(stderr, "panic (-p) injection requires "
1073			    "a single pool name and an optional id\n");
1074			usage();
1075			return (2);
1076		}
1077
1078		(void) strcpy(pool, argv[0]);
1079		if (argv[1] != NULL)
1080			record.zi_type = atoi(argv[1]);
1081		dataset[0] = '\0';
1082	} else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
1083		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
1084		    level != 0 || record.zi_freq > 0 || dvas != 0) {
1085			(void) fprintf(stderr, "hardware failure (-I) "
1086			    "incompatible with other options\n");
1087			usage();
1088			libzfs_fini(g_zfs);
1089			return (2);
1090		}
1091
1092		if (nowrites == 0) {
1093			(void) fprintf(stderr, "-s or -g meaningless "
1094			    "without -I (ignore writes)\n");
1095			usage();
1096			return (2);
1097		} else if (dur_secs && dur_txg) {
1098			(void) fprintf(stderr, "choose a duration either "
1099			    "in seconds (-s) or a number of txgs (-g) "
1100			    "but not both\n");
1101			usage();
1102			return (2);
1103		} else if (argc != 1) {
1104			(void) fprintf(stderr, "ignore writes (-I) "
1105			    "injection requires a single pool name\n");
1106			usage();
1107			return (2);
1108		}
1109
1110		(void) strcpy(pool, argv[0]);
1111		dataset[0] = '\0';
1112	} else if (type == TYPE_INVAL) {
1113		if (flags == 0) {
1114			(void) fprintf(stderr, "at least one of '-b', '-d', "
1115			    "'-t', '-a', '-p', '-I' or '-u' "
1116			    "must be specified\n");
1117			usage();
1118			return (2);
1119		}
1120
1121		if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
1122			(void) strcpy(pool, argv[0]);
1123			dataset[0] = '\0';
1124		} else if (argc != 0) {
1125			(void) fprintf(stderr, "extraneous argument for "
1126			    "'-f'\n");
1127			usage();
1128			return (2);
1129		}
1130
1131		flags |= ZINJECT_NULL;
1132	} else {
1133		if (argc != 1) {
1134			(void) fprintf(stderr, "missing object\n");
1135			usage();
1136			return (2);
1137		}
1138
1139		if (error == ENXIO) {
1140			(void) fprintf(stderr, "data error type must be "
1141			    "'checksum' or 'io'\n");
1142			return (1);
1143		}
1144
1145		if (dvas != 0) {
1146			if (error == EACCES || error == EINVAL) {
1147				(void) fprintf(stderr, "the '-C' option may "
1148				    "not be used with logical data errors "
1149				    "'decrypt' and 'decompress'\n");
1150				record.zi_dvas = dvas;
1151			}
1152		}
1153
1154		record.zi_cmd = ZINJECT_DATA_FAULT;
1155
1156		if (error == EACCES) {
1157			if (type != TYPE_DATA) {
1158				(void) fprintf(stderr, "decryption errors "
1159				    "may only be injected for 'data' types\n");
1160				libzfs_fini(g_zfs);
1161				return (1);
1162			}
1163
1164			record.zi_cmd = ZINJECT_DECRYPT_FAULT;
1165			/*
1166			 * Internally, ZFS actually uses ECKSUM for decryption
1167			 * errors since EACCES is used to indicate the key was
1168			 * not found.
1169			 */
1170			error = ECKSUM;
1171		}
1172
1173		if (translate_record(type, argv[0], range, level, &record, pool,
1174		    dataset) != 0)
1175			return (1);
1176		if (!error)
1177			error = EIO;
1178	}
1179
1180	/*
1181	 * If this is pool-wide metadata, unmount everything.  The ioctl() will
1182	 * unload the pool, so that we trigger spa-wide reopen of metadata next
1183	 * time we access the pool.
1184	 */
1185	if (dataset[0] != '\0' && domount) {
1186		if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
1187			return (1);
1188
1189		if (zfs_unmount(zhp, NULL, 0) != 0)
1190			return (1);
1191	}
1192
1193	record.zi_error = error;
1194
1195	ret = register_handler(pool, flags, &record, quiet);
1196
1197	if (dataset[0] != '\0' && domount)
1198		ret = (zfs_mount(zhp, NULL, 0) != 0);
1199
1200	libzfs_fini(g_zfs);
1201
1202	return (ret);
1203}
1204