2ea8dc4beschrock * CDDL HEADER START
3ea8dc4beschrock *
4ea8dc4beschrock * The contents of this file are subject to the terms of the
5ea8dc4beschrock * Common Development and Distribution License (the "License").
6ea8dc4beschrock * You may not use this file except in compliance with the License.
7ea8dc4beschrock *
8ea8dc4beschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ea8dc4beschrock * or http://www.opensolaris.org/os/licensing.
10ea8dc4beschrock * See the License for the specific language governing permissions
11ea8dc4beschrock * and limitations under the License.
12ea8dc4beschrock *
13ea8dc4beschrock * When distributing Covered Code, include this CDDL HEADER in each
14ea8dc4beschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ea8dc4beschrock * If applicable, add the following below this CDDL HEADER, with the
16ea8dc4beschrock * fields enclosed by brackets "[]" replaced with your own identifying
17ea8dc4beschrock * information: Portions Copyright [yyyy] [name of copyright owner]
18ea8dc4beschrock *
19ea8dc4beschrock * CDDL HEADER END
20ea8dc4beschrock */
2298d1cbfGeorge Wilson * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2397e8130Prakash Surya * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24b853d39Don Brady * Copyright (c) 2017, Intel Corporation.
25ea8dc4beschrock */
28ea8dc4beschrock * ZFS fault injection
29ea8dc4beschrock *
30ea8dc4beschrock * To handle fault injection, we keep track of a series of zinject_record_t
31ea8dc4beschrock * structures which describe which logical block(s) should be injected with a
32ea8dc4beschrock * fault.  These are kept in a global list.  Each record corresponds to a given
33ea8dc4beschrock * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
34ea8dc4beschrock * or exported while the injection record exists.
35ea8dc4beschrock *
36ea8dc4beschrock * Device level injection is done using the 'zi_guid' field.  If this is set, it
37ea8dc4beschrock * means that the error is destined for a particular device, not a piece of
38ea8dc4beschrock * data.
39ea8dc4beschrock *
40ea8dc4beschrock * This is a rather poor data structure and algorithm, but we don't expect more
41ea8dc4beschrock * than a few faults at any one time, so it should be sufficient for our needs.
42ea8dc4beschrock */
44ea8dc4beschrock#include <sys/arc.h>
45ea8dc4beschrock#include <sys/zio_impl.h>
46ea8dc4beschrock#include <sys/zfs_ioctl.h>
47ea8dc4beschrock#include <sys/vdev_impl.h>
48b24ab67Jeff Bonwick#include <sys/dmu_objset.h>
49d8ab6e1Don Brady#include <sys/dsl_dataset.h>
5021bf64agw#include <sys/fs/zfs.h>
52d8ab6e1Don Bradyuint32_t zio_injection_enabled = 0;
5497e8130Prakash Surya/*
5597e8130Prakash Surya * Data describing each zinject handler registered on the system, and
5697e8130Prakash Surya * contains the list node linking the handler in the global zinject
5797e8130Prakash Surya * handler list.
5897e8130Prakash Surya */
59ea8dc4beschrocktypedef struct inject_handler {
60ea8dc4beschrock	int			zi_id;
61ea8dc4beschrock	spa_t			*zi_spa;
62ea8dc4beschrock	zinject_record_t	zi_record;
6397e8130Prakash Surya	uint64_t		*zi_lanes;
6497e8130Prakash Surya	int			zi_next_lane;
65ea8dc4beschrock	list_node_t		zi_link;
66ea8dc4beschrock} inject_handler_t;
6897e8130Prakash Surya/*
6997e8130Prakash Surya * List of all zinject handlers registered on the system, protected by
7097e8130Prakash Surya * the inject_lock defined below.
7197e8130Prakash Surya */
72ea8dc4beschrockstatic list_t inject_handlers;
7397e8130Prakash Surya
7497e8130Prakash Surya/*
7597e8130Prakash Surya * This protects insertion into, and traversal of, the inject handler
7697e8130Prakash Surya * list defined above; as well as the inject_delay_count. Any time a
7797e8130Prakash Surya * handler is inserted or removed from the list, this lock should be
7897e8130Prakash Surya * taken as a RW_WRITER; and any time traversal is done over the list
7997e8130Prakash Surya * (without modification to it) this lock should be taken as a RW_READER.
8097e8130Prakash Surya */
81ea8dc4beschrockstatic krwlock_t inject_lock;
8297e8130Prakash Surya
8397e8130Prakash Surya/*
8497e8130Prakash Surya * This holds the number of zinject delay handlers that have been
8597e8130Prakash Surya * registered on the system. It is protected by the inject_lock defined
8697e8130Prakash Surya * above. Thus modifications to this count must be a RW_WRITER of the
8797e8130Prakash Surya * inject_lock, and reads of this count must be (at least) a RW_READER
8897e8130Prakash Surya * of the lock.
8997e8130Prakash Surya */
9097e8130Prakash Suryastatic int inject_delay_count = 0;
9197e8130Prakash Surya
9297e8130Prakash Surya/*
9397e8130Prakash Surya * This lock is used only in zio_handle_io_delay(), refer to the comment
9497e8130Prakash Surya * in that function for more details.
9597e8130Prakash Surya */
9697e8130Prakash Suryastatic kmutex_t inject_delay_mtx;
9797e8130Prakash Surya
9897e8130Prakash Surya/*
9997e8130Prakash Surya * Used to assign unique identifying numbers to each new zinject handler.
10097e8130Prakash Surya */
101ea8dc4beschrockstatic int inject_next_id = 1;
104b853d39Don Brady * Test if the requested frequency was triggered
105b853d39Don Brady */
106b853d39Don Bradystatic boolean_t
107b853d39Don Bradyfreq_triggered(uint32_t frequency)
108b853d39Don Brady{
109b853d39Don Brady	/*
110b853d39Don Brady	 * zero implies always (100%)
111b853d39Don Brady	 */
112b853d39Don Brady	if (frequency == 0)
113b853d39Don Brady		return (B_TRUE);
114b853d39Don Brady
115b853d39Don Brady	/*
116b853d39Don Brady	 * Note: we still handle legacy (unscaled) frequecy values
117b853d39Don Brady	 */
118b853d39Don Brady	uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX;
119b853d39Don Brady
120b853d39Don Brady	return (spa_get_random(maximum) < frequency);
121b853d39Don Brady}
122b853d39Don Brady
123b853d39Don Brady/*
124ea8dc4beschrock * Returns true if the given record matches the I/O in progress.
125ea8dc4beschrock */
126ea8dc4beschrockstatic boolean_t
12712a8814Tom Caputizio_match_handler(zbookmark_phys_t *zb, uint64_t type, int dva,
128ea8dc4beschrock    zinject_record_t *record, int error)
130ea8dc4beschrock	/*
131ea8dc4beschrock	 * Check for a match against the MOS, which is based on type
132ea8dc4beschrock	 */
133b24ab67Jeff Bonwick	if (zb->zb_objset == DMU_META_OBJSET &&
134b24ab67Jeff Bonwick	    record->zi_objset == DMU_META_OBJSET &&
135b24ab67Jeff Bonwick	    record->zi_object == DMU_META_DNODE_OBJECT) {
136ea8dc4beschrock		if (record->zi_type == DMU_OT_NONE ||
137ea8dc4beschrock		    type == record->zi_type)
138b853d39Don Brady			return (freq_triggered(record->zi_freq));
139ea8dc4beschrock		else
140ea8dc4beschrock			return (B_FALSE);
141ea8dc4beschrock	}
143ea8dc4beschrock	/*
144ea8dc4beschrock	 * Check for an exact match.
145ea8dc4beschrock	 */
146ea8dc4beschrock	if (zb->zb_objset == record->zi_objset &&
147ea8dc4beschrock	    zb->zb_object == record->zi_object &&
148ea8dc4beschrock	    zb->zb_level == record->zi_level &&
149ea8dc4beschrock	    zb->zb_blkid >= record->zi_start &&
150ea8dc4beschrock	    zb->zb_blkid <= record->zi_end &&
15112a8814Tom Caputi	    (record->zi_dvas == 0 || (record->zi_dvas & (1ULL << dva))) &&
15212a8814Tom Caputi	    error == record->zi_error) {
153b853d39Don Brady		return (freq_triggered(record->zi_freq));
15412a8814Tom Caputi	}
156ea8dc4beschrock	return (B_FALSE);
16088ecc94George Wilson * Panic the system when a config change happens in the function
16188ecc94George Wilson * specified by tag.
16288ecc94George Wilson */
16388ecc94George Wilsonvoid
1641195e68Mark J Musantezio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type)
16588ecc94George Wilson{
16688ecc94George Wilson	inject_handler_t *handler;
16788ecc94George Wilson
16888ecc94George Wilson	rw_enter(&inject_lock, RW_READER);
16988ecc94George Wilson
17088ecc94George Wilson	for (handler = list_head(&inject_handlers); handler != NULL;
17188ecc94George Wilson	    handler = list_next(&inject_handlers, handler)) {
17288ecc94George Wilson
17388ecc94George Wilson		if (spa != handler->zi_spa)
17488ecc94George Wilson			continue;
17588ecc94George Wilson
1761195e68Mark J Musante		if (handler->zi_record.zi_type == type &&
1771195e68Mark J Musante		    strcmp(tag, handler->zi_record.zi_func) == 0)
17888ecc94George Wilson			panic("Panic requested in function %s\n", tag);
17988ecc94George Wilson	}
18088ecc94George Wilson
18188ecc94George Wilson	rw_exit(&inject_lock);
18288ecc94George Wilson}
18388ecc94George Wilson
18412a8814Tom Caputi
18512a8814Tom Caputi/*
18612a8814Tom Caputi * If this is a physical I/O for a vdev child determine which DVA it is
18712a8814Tom Caputi * for. We iterate backwards through the DVAs matching on the offset so
18812a8814Tom Caputi * that we end up with ZI_NO_DVA (-1) if we don't find a match.
18912a8814Tom Caputi */
19012a8814Tom Caputistatic int
19112a8814Tom Caputizio_match_dva(zio_t *zio)
19212a8814Tom Caputi{
19312a8814Tom Caputi	int i = ZI_NO_DVA;
19412a8814Tom Caputi
19512a8814Tom Caputi	if (zio->io_bp != NULL && zio->io_vd != NULL &&
19612a8814Tom Caputi	    zio->io_child_type == ZIO_CHILD_VDEV) {
19712a8814Tom Caputi		for (i = BP_GET_NDVAS(zio->io_bp) - 1; i >= 0; i--) {
19812a8814Tom Caputi			dva_t *dva = &zio->io_bp->blk_dva[i];
19912a8814Tom Caputi			uint64_t off = DVA_GET_OFFSET(dva);
20012a8814Tom Caputi			vdev_t *vd = vdev_lookup_top(zio->io_spa,
20112a8814Tom Caputi			    DVA_GET_VDEV(dva));
20212a8814Tom Caputi
20312a8814Tom Caputi			/* Compensate for vdev label added to leaves */
20412a8814Tom Caputi			if (zio->io_vd->vdev_ops->vdev_op_leaf)
20512a8814Tom Caputi				off += VDEV_LABEL_START_SIZE;
20612a8814Tom Caputi
20712a8814Tom Caputi			if (zio->io_vd == vd && zio->io_offset == off)
20812a8814Tom Caputi				break;
20912a8814Tom Caputi		}
21012a8814Tom Caputi	}
21112a8814Tom Caputi
21212a8814Tom Caputi	return (i);
21312a8814Tom Caputi}
21412a8814Tom Caputi
21512a8814Tom Caputi
21688ecc94George Wilson/*
217eb63303Tom Caputi * Inject a decryption failure. Decryption failures can occur in
218eb63303Tom Caputi * both the ARC and the ZIO layers.
219eb63303Tom Caputi */
220eb63303Tom Caputiint
221eb63303Tom Caputizio_handle_decrypt_injection(spa_t *spa, const zbookmark_phys_t *zb,
222eb63303Tom Caputi    uint64_t type, int error)
223eb63303Tom Caputi{
224eb63303Tom Caputi	int ret = 0;
225eb63303Tom Caputi	inject_handler_t *handler;
226eb63303Tom Caputi
227eb63303Tom Caputi	rw_enter(&inject_lock, RW_READER);
228eb63303Tom Caputi
229eb63303Tom Caputi	for (handler = list_head(&inject_handlers); handler != NULL;
230eb63303Tom Caputi	    handler = list_next(&inject_handlers, handler)) {
231eb63303Tom Caputi
232eb63303Tom Caputi		if (spa != handler->zi_spa ||
233eb63303Tom Caputi		    handler->zi_record.zi_cmd != ZINJECT_DECRYPT_FAULT)
234eb63303Tom Caputi			continue;
235eb63303Tom Caputi
236eb63303Tom Caputi		if (zio_match_handler((zbookmark_phys_t *)zb, type, ZI_NO_DVA,
237eb63303Tom Caputi		    &handler->zi_record, error)) {
238eb63303Tom Caputi			ret = error;
239eb63303Tom Caputi			break;
240eb63303Tom Caputi		}
241eb63303Tom Caputi	}
242eb63303Tom Caputi
243eb63303Tom Caputi	rw_exit(&inject_lock);
244eb63303Tom Caputi	return (ret);
245eb63303Tom Caputi}
246eb63303Tom Caputi
247eb63303Tom Caputi/*
248ea8dc4beschrock * Determine if the I/O in question should return failure.  Returns the errno
249ea8dc4beschrock * to be returned to the caller.
250ea8dc4beschrock */
252ea8dc4beschrockzio_handle_fault_injection(zio_t *zio, int error)
254ea8dc4beschrock	int ret = 0;
255ea8dc4beschrock	inject_handler_t *handler;
257ea8dc4beschrock	/*
258ea8dc4beschrock	 * Ignore I/O not associated with any logical data.
259ea8dc4beschrock	 */
260ea8dc4beschrock	if (zio->io_logical == NULL)
261ea8dc4beschrock		return (0);
263ea8dc4beschrock	/*
264ea8dc4beschrock	 * Currently, we only support fault injection on reads.
265ea8dc4beschrock	 */
266ea8dc4beschrock	if (zio->io_type != ZIO_TYPE_READ)
267ea8dc4beschrock		return (0);
269ea8dc4beschrock	rw_enter(&inject_lock, RW_READER);
271ea8dc4beschrock	for (handler = list_head(&inject_handlers); handler != NULL;
272ea8dc4beschrock	    handler = list_next(&inject_handlers, handler)) {
274283b846George.Wilson		if (zio->io_spa != handler->zi_spa ||
275283b846George.Wilson		    handler->zi_record.zi_cmd != ZINJECT_DATA_FAULT)
276ea8dc4beschrock			continue;
27812a8814Tom Caputi		/* If this handler matches, return the specified error */
279ea8dc4beschrock		if (zio_match_handler(&zio->io_logical->io_bookmark,
280ea8dc4beschrock		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
28112a8814Tom Caputi		    zio_match_dva(zio), &handler->zi_record, error)) {
282ea8dc4beschrock			ret = error;
283ea8dc4beschrock			break;
284ea8dc4beschrock		}
285ea8dc4beschrock	}
287ea8dc4beschrock	rw_exit(&inject_lock);
289ea8dc4beschrock	return (ret);
29321bf64agw * Determine if the zio is part of a label update and has an injection
29421bf64agw * handler associated with that portion of the label. Currently, we
29521bf64agw * allow error injection in either the nvlist or the uberblock region of
29621bf64agw * of the vdev label.
29721bf64agw */
29921bf64agwzio_handle_label_injection(zio_t *zio, int error)
30121bf64agw	inject_handler_t *handler;
30221bf64agw	vdev_t *vd = zio->io_vd;
30321bf64agw	uint64_t offset = zio->io_offset;
30421bf64agw	int label;
30521bf64agw	int ret = 0;
3078f18d1fGeorge Wilson	if (offset >= VDEV_LABEL_START_SIZE &&
30821bf64agw	    offset < vd->vdev_psize - VDEV_LABEL_END_SIZE)
30921bf64agw		return (0);
31121bf64agw	rw_enter(&inject_lock, RW_READER);
31321bf64agw	for (handler = list_head(&inject_handlers); handler != NULL;
31421bf64agw	    handler = list_next(&inject_handlers, handler)) {
31521bf64agw		uint64_t start = handler->zi_record.zi_start;
31621bf64agw		uint64_t end = handler->zi_record.zi_end;
318283b846George.Wilson		if (handler->zi_record.zi_cmd != ZINJECT_LABEL_FAULT)
31921bf64agw			continue;
32121bf64agw		/*
32221bf64agw		 * The injection region is the relative offsets within a
32321bf64agw		 * vdev label. We must determine the label which is being
32421bf64agw		 * updated and adjust our region accordingly.
32521bf64agw		 */
32621bf64agw		label = vdev_label_number(vd->vdev_psize, offset);
32721bf64agw		start = vdev_label_offset(vd->vdev_psize, label, start);
32821bf64agw		end = vdev_label_offset(vd->vdev_psize, label, end);
33021bf64agw		if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid &&
33121bf64agw		    (offset >= start && offset <= end)) {
33221bf64agw			ret = error;
33321bf64agw			break;
33421bf64agw		}
33521bf64agw	}
33621bf64agw	rw_exit(&inject_lock);
33721bf64agw	return (ret);
3428956713Eric Schrockzio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)