xref: /illumos-gate/usr/src/uts/common/fs/zfs/zio_inject.c (revision 874395d5)
1ea8dc4b6Seschrock /*
2ea8dc4b6Seschrock  * CDDL HEADER START
3ea8dc4b6Seschrock  *
4ea8dc4b6Seschrock  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7ea8dc4b6Seschrock  *
8ea8dc4b6Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ea8dc4b6Seschrock  * or http://www.opensolaris.org/os/licensing.
10ea8dc4b6Seschrock  * See the License for the specific language governing permissions
11ea8dc4b6Seschrock  * and limitations under the License.
12ea8dc4b6Seschrock  *
13ea8dc4b6Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
14ea8dc4b6Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ea8dc4b6Seschrock  * If applicable, add the following below this CDDL HEADER, with the
16ea8dc4b6Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
17ea8dc4b6Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
18ea8dc4b6Seschrock  *
19ea8dc4b6Seschrock  * CDDL HEADER END
20ea8dc4b6Seschrock  */
21ea8dc4b6Seschrock /*
22*874395d5Smaybee  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23ea8dc4b6Seschrock  * Use is subject to license terms.
24ea8dc4b6Seschrock  */
25ea8dc4b6Seschrock 
26ea8dc4b6Seschrock #pragma ident	"%Z%%M%	%I%	%E% SMI"
27ea8dc4b6Seschrock 
28ea8dc4b6Seschrock /*
29ea8dc4b6Seschrock  * ZFS fault injection
30ea8dc4b6Seschrock  *
31ea8dc4b6Seschrock  * To handle fault injection, we keep track of a series of zinject_record_t
32ea8dc4b6Seschrock  * structures which describe which logical block(s) should be injected with a
33ea8dc4b6Seschrock  * fault.  These are kept in a global list.  Each record corresponds to a given
34ea8dc4b6Seschrock  * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
35ea8dc4b6Seschrock  * or exported while the injection record exists.
36ea8dc4b6Seschrock  *
37ea8dc4b6Seschrock  * Device level injection is done using the 'zi_guid' field.  If this is set, it
38ea8dc4b6Seschrock  * means that the error is destined for a particular device, not a piece of
39ea8dc4b6Seschrock  * data.
40ea8dc4b6Seschrock  *
41ea8dc4b6Seschrock  * This is a rather poor data structure and algorithm, but we don't expect more
42ea8dc4b6Seschrock  * than a few faults at any one time, so it should be sufficient for our needs.
43ea8dc4b6Seschrock  */
44ea8dc4b6Seschrock 
45ea8dc4b6Seschrock #include <sys/arc.h>
46ea8dc4b6Seschrock #include <sys/zio_impl.h>
47ea8dc4b6Seschrock #include <sys/zfs_ioctl.h>
48ea8dc4b6Seschrock #include <sys/spa_impl.h>
49ea8dc4b6Seschrock #include <sys/vdev_impl.h>
50ea8dc4b6Seschrock 
51ea8dc4b6Seschrock uint32_t zio_injection_enabled;
52ea8dc4b6Seschrock 
53ea8dc4b6Seschrock typedef struct inject_handler {
54ea8dc4b6Seschrock 	int			zi_id;
55ea8dc4b6Seschrock 	spa_t			*zi_spa;
56ea8dc4b6Seschrock 	zinject_record_t	zi_record;
57ea8dc4b6Seschrock 	list_node_t		zi_link;
58ea8dc4b6Seschrock } inject_handler_t;
59ea8dc4b6Seschrock 
60ea8dc4b6Seschrock static list_t inject_handlers;
61ea8dc4b6Seschrock static krwlock_t inject_lock;
62ea8dc4b6Seschrock static int inject_next_id = 1;
63ea8dc4b6Seschrock 
64ea8dc4b6Seschrock /*
65ea8dc4b6Seschrock  * Returns true if the given record matches the I/O in progress.
66ea8dc4b6Seschrock  */
67ea8dc4b6Seschrock static boolean_t
68ea8dc4b6Seschrock zio_match_handler(zbookmark_t *zb, uint64_t type,
69ea8dc4b6Seschrock     zinject_record_t *record, int error)
70ea8dc4b6Seschrock {
71ea8dc4b6Seschrock 	/*
72ea8dc4b6Seschrock 	 * Check for a match against the MOS, which is based on type
73ea8dc4b6Seschrock 	 */
74ea8dc4b6Seschrock 	if (zb->zb_objset == 0 && record->zi_objset == 0 &&
75ea8dc4b6Seschrock 	    record->zi_object == 0) {
76ea8dc4b6Seschrock 		if (record->zi_type == DMU_OT_NONE ||
77ea8dc4b6Seschrock 		    type == record->zi_type)
78ea8dc4b6Seschrock 			return (record->zi_freq == 0 ||
79ea8dc4b6Seschrock 			    spa_get_random(100) < record->zi_freq);
80ea8dc4b6Seschrock 		else
81ea8dc4b6Seschrock 			return (B_FALSE);
82ea8dc4b6Seschrock 	}
83ea8dc4b6Seschrock 
84ea8dc4b6Seschrock 	/*
85ea8dc4b6Seschrock 	 * Check for an exact match.
86ea8dc4b6Seschrock 	 */
87ea8dc4b6Seschrock 	if (zb->zb_objset == record->zi_objset &&
88ea8dc4b6Seschrock 	    zb->zb_object == record->zi_object &&
89ea8dc4b6Seschrock 	    zb->zb_level == record->zi_level &&
90ea8dc4b6Seschrock 	    zb->zb_blkid >= record->zi_start &&
91ea8dc4b6Seschrock 	    zb->zb_blkid <= record->zi_end &&
92ea8dc4b6Seschrock 	    error == record->zi_error)
93ea8dc4b6Seschrock 		return (record->zi_freq == 0 ||
94ea8dc4b6Seschrock 		    spa_get_random(100) < record->zi_freq);
95ea8dc4b6Seschrock 
96ea8dc4b6Seschrock 	return (B_FALSE);
97ea8dc4b6Seschrock }
98ea8dc4b6Seschrock 
99ea8dc4b6Seschrock /*
100ea8dc4b6Seschrock  * Determine if the I/O in question should return failure.  Returns the errno
101ea8dc4b6Seschrock  * to be returned to the caller.
102ea8dc4b6Seschrock  */
103ea8dc4b6Seschrock int
104ea8dc4b6Seschrock zio_handle_fault_injection(zio_t *zio, int error)
105ea8dc4b6Seschrock {
106ea8dc4b6Seschrock 	int ret = 0;
107ea8dc4b6Seschrock 	inject_handler_t *handler;
108ea8dc4b6Seschrock 
109ea8dc4b6Seschrock 	/*
110ea8dc4b6Seschrock 	 * Ignore I/O not associated with any logical data.
111ea8dc4b6Seschrock 	 */
112ea8dc4b6Seschrock 	if (zio->io_logical == NULL)
113ea8dc4b6Seschrock 		return (0);
114ea8dc4b6Seschrock 
115ea8dc4b6Seschrock 	/*
116ea8dc4b6Seschrock 	 * Currently, we only support fault injection on reads.
117ea8dc4b6Seschrock 	 */
118ea8dc4b6Seschrock 	if (zio->io_type != ZIO_TYPE_READ)
119ea8dc4b6Seschrock 		return (0);
120ea8dc4b6Seschrock 
121ea8dc4b6Seschrock 	rw_enter(&inject_lock, RW_READER);
122ea8dc4b6Seschrock 
123ea8dc4b6Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
124ea8dc4b6Seschrock 	    handler = list_next(&inject_handlers, handler)) {
125ea8dc4b6Seschrock 
126ea8dc4b6Seschrock 		/* Ignore errors not destined for this pool */
127ea8dc4b6Seschrock 		if (zio->io_spa != handler->zi_spa)
128ea8dc4b6Seschrock 			continue;
129ea8dc4b6Seschrock 
130ea8dc4b6Seschrock 		/* Ignore device errors */
131ea8dc4b6Seschrock 		if (handler->zi_record.zi_guid != 0)
132ea8dc4b6Seschrock 			continue;
133ea8dc4b6Seschrock 
134ea8dc4b6Seschrock 		/* If this handler matches, return EIO */
135ea8dc4b6Seschrock 		if (zio_match_handler(&zio->io_logical->io_bookmark,
136ea8dc4b6Seschrock 		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
137ea8dc4b6Seschrock 		    &handler->zi_record, error)) {
138ea8dc4b6Seschrock 			ret = error;
139ea8dc4b6Seschrock 			break;
140ea8dc4b6Seschrock 		}
141ea8dc4b6Seschrock 	}
142ea8dc4b6Seschrock 
143ea8dc4b6Seschrock 	rw_exit(&inject_lock);
144ea8dc4b6Seschrock 
145ea8dc4b6Seschrock 	return (ret);
146ea8dc4b6Seschrock }
147ea8dc4b6Seschrock 
148ea8dc4b6Seschrock int
149ea8dc4b6Seschrock zio_handle_device_injection(vdev_t *vd, int error)
150ea8dc4b6Seschrock {
151ea8dc4b6Seschrock 	inject_handler_t *handler;
152ea8dc4b6Seschrock 	int ret = 0;
153ea8dc4b6Seschrock 
154ea8dc4b6Seschrock 	rw_enter(&inject_lock, RW_READER);
155ea8dc4b6Seschrock 
156ea8dc4b6Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
157ea8dc4b6Seschrock 	    handler = list_next(&inject_handlers, handler)) {
158ea8dc4b6Seschrock 
159ea8dc4b6Seschrock 		if (vd->vdev_guid == handler->zi_record.zi_guid) {
160ea8dc4b6Seschrock 			if (handler->zi_record.zi_error == error) {
161ea8dc4b6Seschrock 				/*
162ea8dc4b6Seschrock 				 * For a failed open, pretend like the device
163ea8dc4b6Seschrock 				 * has gone away.
164ea8dc4b6Seschrock 				 */
165ea8dc4b6Seschrock 				if (error == ENXIO)
166ea8dc4b6Seschrock 					vd->vdev_stat.vs_aux =
167ea8dc4b6Seschrock 					    VDEV_AUX_OPEN_FAILED;
168ea8dc4b6Seschrock 				ret = error;
169ea8dc4b6Seschrock 				break;
170ea8dc4b6Seschrock 			}
171ea8dc4b6Seschrock 			if (handler->zi_record.zi_error == ENXIO) {
172ea8dc4b6Seschrock 				ret = EIO;
173ea8dc4b6Seschrock 				break;
174ea8dc4b6Seschrock 			}
175ea8dc4b6Seschrock 		}
176ea8dc4b6Seschrock 	}
177ea8dc4b6Seschrock 
178ea8dc4b6Seschrock 	rw_exit(&inject_lock);
179ea8dc4b6Seschrock 
180ea8dc4b6Seschrock 	return (ret);
181ea8dc4b6Seschrock }
182ea8dc4b6Seschrock 
183ea8dc4b6Seschrock /*
184ea8dc4b6Seschrock  * Create a new handler for the given record.  We add it to the list, adding
185ea8dc4b6Seschrock  * a reference to the spa_t in the process.  We increment zio_injection_enabled,
186ea8dc4b6Seschrock  * which is the switch to trigger all fault injection.
187ea8dc4b6Seschrock  */
188ea8dc4b6Seschrock int
189ea8dc4b6Seschrock zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
190ea8dc4b6Seschrock {
191ea8dc4b6Seschrock 	inject_handler_t *handler;
192ea8dc4b6Seschrock 	int error;
193ea8dc4b6Seschrock 	spa_t *spa;
194ea8dc4b6Seschrock 
195ea8dc4b6Seschrock 	/*
196ea8dc4b6Seschrock 	 * If this is pool-wide metadata, make sure we unload the corresponding
197ea8dc4b6Seschrock 	 * spa_t, so that the next attempt to load it will trigger the fault.
198ea8dc4b6Seschrock 	 * We call spa_reset() to unload the pool appropriately.
199ea8dc4b6Seschrock 	 */
200ea8dc4b6Seschrock 	if (flags & ZINJECT_UNLOAD_SPA)
201ea8dc4b6Seschrock 		if ((error = spa_reset(name)) != 0)
202ea8dc4b6Seschrock 			return (error);
203ea8dc4b6Seschrock 
204ea8dc4b6Seschrock 	if (!(flags & ZINJECT_NULL)) {
205ea8dc4b6Seschrock 		/*
206ea8dc4b6Seschrock 		 * spa_inject_ref() will add an injection reference, which will
207ea8dc4b6Seschrock 		 * prevent the pool from being removed from the namespace while
208ea8dc4b6Seschrock 		 * still allowing it to be unloaded.
209ea8dc4b6Seschrock 		 */
210ea8dc4b6Seschrock 		if ((spa = spa_inject_addref(name)) == NULL)
211ea8dc4b6Seschrock 			return (ENOENT);
212ea8dc4b6Seschrock 
213ea8dc4b6Seschrock 		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
214ea8dc4b6Seschrock 
215ea8dc4b6Seschrock 		rw_enter(&inject_lock, RW_WRITER);
216ea8dc4b6Seschrock 
217ea8dc4b6Seschrock 		*id = handler->zi_id = inject_next_id++;
218ea8dc4b6Seschrock 		handler->zi_spa = spa;
219ea8dc4b6Seschrock 		handler->zi_record = *record;
220ea8dc4b6Seschrock 		list_insert_tail(&inject_handlers, handler);
221ea8dc4b6Seschrock 		atomic_add_32(&zio_injection_enabled, 1);
222ea8dc4b6Seschrock 
223ea8dc4b6Seschrock 		rw_exit(&inject_lock);
224ea8dc4b6Seschrock 	}
225ea8dc4b6Seschrock 
226ea8dc4b6Seschrock 	/*
227ea8dc4b6Seschrock 	 * Flush the ARC, so that any attempts to read this data will end up
228ea8dc4b6Seschrock 	 * going to the ZIO layer.  Note that this is a little overkill, but
229ea8dc4b6Seschrock 	 * we don't have the necessary ARC interfaces to do anything else, and
230ea8dc4b6Seschrock 	 * fault injection isn't a performance critical path.
231ea8dc4b6Seschrock 	 */
232ea8dc4b6Seschrock 	if (flags & ZINJECT_FLUSH_ARC)
233*874395d5Smaybee 		arc_flush(NULL);
234ea8dc4b6Seschrock 
235ea8dc4b6Seschrock 	return (0);
236ea8dc4b6Seschrock }
237ea8dc4b6Seschrock 
238ea8dc4b6Seschrock /*
239ea8dc4b6Seschrock  * Returns the next record with an ID greater than that supplied to the
240ea8dc4b6Seschrock  * function.  Used to iterate over all handlers in the system.
241ea8dc4b6Seschrock  */
242ea8dc4b6Seschrock int
243ea8dc4b6Seschrock zio_inject_list_next(int *id, char *name, size_t buflen,
244ea8dc4b6Seschrock     zinject_record_t *record)
245ea8dc4b6Seschrock {
246ea8dc4b6Seschrock 	inject_handler_t *handler;
247ea8dc4b6Seschrock 	int ret;
248ea8dc4b6Seschrock 
249ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
250ea8dc4b6Seschrock 	rw_enter(&inject_lock, RW_READER);
251ea8dc4b6Seschrock 
252ea8dc4b6Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
253ea8dc4b6Seschrock 	    handler = list_next(&inject_handlers, handler))
254ea8dc4b6Seschrock 		if (handler->zi_id > *id)
255ea8dc4b6Seschrock 			break;
256ea8dc4b6Seschrock 
257ea8dc4b6Seschrock 	if (handler) {
258ea8dc4b6Seschrock 		*record = handler->zi_record;
259ea8dc4b6Seschrock 		*id = handler->zi_id;
260ea8dc4b6Seschrock 		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
261ea8dc4b6Seschrock 		ret = 0;
262ea8dc4b6Seschrock 	} else {
263ea8dc4b6Seschrock 		ret = ENOENT;
264ea8dc4b6Seschrock 	}
265ea8dc4b6Seschrock 
266ea8dc4b6Seschrock 	rw_exit(&inject_lock);
267ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
268ea8dc4b6Seschrock 
269ea8dc4b6Seschrock 	return (ret);
270ea8dc4b6Seschrock }
271ea8dc4b6Seschrock 
272ea8dc4b6Seschrock /*
273ea8dc4b6Seschrock  * Clear the fault handler with the given identifier, or return ENOENT if none
274ea8dc4b6Seschrock  * exists.
275ea8dc4b6Seschrock  */
276ea8dc4b6Seschrock int
277ea8dc4b6Seschrock zio_clear_fault(int id)
278ea8dc4b6Seschrock {
279ea8dc4b6Seschrock 	inject_handler_t *handler;
280ea8dc4b6Seschrock 	int ret;
281ea8dc4b6Seschrock 
282ea8dc4b6Seschrock 	rw_enter(&inject_lock, RW_WRITER);
283ea8dc4b6Seschrock 
284ea8dc4b6Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
285ea8dc4b6Seschrock 	    handler = list_next(&inject_handlers, handler))
286ea8dc4b6Seschrock 		if (handler->zi_id == id)
287ea8dc4b6Seschrock 			break;
288ea8dc4b6Seschrock 
289ea8dc4b6Seschrock 	if (handler == NULL) {
290ea8dc4b6Seschrock 		ret = ENOENT;
291ea8dc4b6Seschrock 	} else {
292ea8dc4b6Seschrock 		list_remove(&inject_handlers, handler);
293ea8dc4b6Seschrock 		spa_inject_delref(handler->zi_spa);
294ea8dc4b6Seschrock 		kmem_free(handler, sizeof (inject_handler_t));
295ea8dc4b6Seschrock 		atomic_add_32(&zio_injection_enabled, -1);
296ea8dc4b6Seschrock 		ret = 0;
297ea8dc4b6Seschrock 	}
298ea8dc4b6Seschrock 
299ea8dc4b6Seschrock 	rw_exit(&inject_lock);
300ea8dc4b6Seschrock 
301ea8dc4b6Seschrock 	return (ret);
302ea8dc4b6Seschrock }
303ea8dc4b6Seschrock 
304ea8dc4b6Seschrock void
305ea8dc4b6Seschrock zio_inject_init(void)
306ea8dc4b6Seschrock {
307ea8dc4b6Seschrock 	list_create(&inject_handlers, sizeof (inject_handler_t),
308ea8dc4b6Seschrock 	    offsetof(inject_handler_t, zi_link));
309ea8dc4b6Seschrock }
310ea8dc4b6Seschrock 
311ea8dc4b6Seschrock void
312ea8dc4b6Seschrock zio_inject_fini(void)
313ea8dc4b6Seschrock {
314ea8dc4b6Seschrock 	list_destroy(&inject_handlers);
315ea8dc4b6Seschrock }
316