xref: /illumos-gate/usr/src/uts/common/fs/zfs/zio.c (revision bbf21555)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
223f9d6ad7SLin Ling  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23f78cdc34SPaul Dagnelie  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
245aeb9474SGarrett D'Amore  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
25c3d26abcSMatthew Ahrens  * Copyright (c) 2014 Integros [integros.com]
26663207adSDon Brady  * Copyright (c) 2017, Intel Corporation.
278548ec78SJohn Levon  * Copyright 2020 Joyent, Inc.
28fa9e4066Sahrens  */
29fa9e4066Sahrens 
30de710d24SJosef 'Jeff' Sipek #include <sys/sysmacros.h>
31fa9e4066Sahrens #include <sys/zfs_context.h>
32ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h>
33fa9e4066Sahrens #include <sys/spa.h>
34fa9e4066Sahrens #include <sys/txg.h>
35fa9e4066Sahrens #include <sys/spa_impl.h>
36fa9e4066Sahrens #include <sys/vdev_impl.h>
37084fd14fSBrian Behlendorf #include <sys/vdev_trim.h>
38fa9e4066Sahrens #include <sys/zio_impl.h>
39fa9e4066Sahrens #include <sys/zio_compress.h>
40fa9e4066Sahrens #include <sys/zio_checksum.h>
41b24ab676SJeff Bonwick #include <sys/dmu_objset.h>
42b24ab676SJeff Bonwick #include <sys/arc.h>
43b24ab676SJeff Bonwick #include <sys/ddt.h>
445d7b4d43SMatthew Ahrens #include <sys/blkptr.h>
4543466aaeSMax Grossman #include <sys/zfeature.h>
46dd50e0ccSTony Hutter #include <sys/time.h>
47a3874b8bSToomas Soome #include <sys/dsl_scan.h>
480f7643c7SGeorge Wilson #include <sys/metaslab_impl.h>
49770499e1SDan Kimmel #include <sys/abd.h>
50f78cdc34SPaul Dagnelie #include <sys/cityhash.h>
51eb633035STom Caputi #include <sys/dsl_crypt.h>
52f6ef4223SJoshua M. Clulow #include <sys/stdbool.h>
53fa9e4066Sahrens 
54fa9e4066Sahrens /*
55fa9e4066Sahrens  * ==========================================================================
56fa9e4066Sahrens  * I/O type descriptions
57fa9e4066Sahrens  * ==========================================================================
58fa9e4066Sahrens  */
5969962b56SMatthew Ahrens const char *zio_type_name[ZIO_TYPES] = {
6080eb36f2SGeorge Wilson 	"zio_null", "zio_read", "zio_write", "zio_free", "zio_claim",
61084fd14fSBrian Behlendorf 	"zio_ioctl", "z_trim"
6280eb36f2SGeorge Wilson };
63fa9e4066Sahrens 
640f7643c7SGeorge Wilson boolean_t zio_dva_throttle_enabled = B_TRUE;
650f7643c7SGeorge Wilson 
66fa9e4066Sahrens /*
67fa9e4066Sahrens  * ==========================================================================
68fa9e4066Sahrens  * I/O kmem caches
69fa9e4066Sahrens  * ==========================================================================
70fa9e4066Sahrens  */
71ccae0b50Seschrock kmem_cache_t *zio_cache;
72a3f829aeSBill Moore kmem_cache_t *zio_link_cache;
73fa9e4066Sahrens kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
74ad23a2dbSjohansen kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
75ad23a2dbSjohansen 
76ad23a2dbSjohansen #ifdef _KERNEL
77ad23a2dbSjohansen extern vmem_t *zio_alloc_arena;
78ad23a2dbSjohansen #endif
79fa9e4066Sahrens 
80738f37bcSGeorge Wilson #define	ZIO_PIPELINE_CONTINUE		0x100
81738f37bcSGeorge Wilson #define	ZIO_PIPELINE_STOP		0x101
82738f37bcSGeorge Wilson 
83dd50e0ccSTony Hutter /* Mark IOs as "slow" if they take longer than 30 seconds */
84dd50e0ccSTony Hutter int zio_slow_io_ms = (30 * MILLISEC);
85dd50e0ccSTony Hutter 
86a2cdcdd2SPaul Dagnelie #define	BP_SPANB(indblkshift, level) \
87a2cdcdd2SPaul Dagnelie 	(((uint64_t)1) << ((level) * ((indblkshift) - SPA_BLKPTRSHIFT)))
88a2cdcdd2SPaul Dagnelie #define	COMPARE_META_LEVEL	0x80000000ul
8901f55e48SGeorge Wilson /*
9001f55e48SGeorge Wilson  * The following actions directly effect the spa's sync-to-convergence logic.
9101f55e48SGeorge Wilson  * The values below define the sync pass when we start performing the action.
9201f55e48SGeorge Wilson  * Care should be taken when changing these values as they directly impact
9301f55e48SGeorge Wilson  * spa_sync() performance. Tuning these values may introduce subtle performance
9401f55e48SGeorge Wilson  * pathologies and should only be done in the context of performance analysis.
9501f55e48SGeorge Wilson  * These tunables will eventually be removed and replaced with #defines once
9601f55e48SGeorge Wilson  * enough analysis has been done to determine optimal values.
9701f55e48SGeorge Wilson  *
9801f55e48SGeorge Wilson  * The 'zfs_sync_pass_deferred_free' pass must be greater than 1 to ensure that
9901f55e48SGeorge Wilson  * regular blocks are not deferred.
10001f55e48SGeorge Wilson  */
10101f55e48SGeorge Wilson int zfs_sync_pass_deferred_free = 2; /* defer frees starting in this pass */
10201f55e48SGeorge Wilson int zfs_sync_pass_dont_compress = 5; /* don't compress starting in this pass */
10301f55e48SGeorge Wilson int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */
10401f55e48SGeorge Wilson 
1050a4e9518Sgw /*
106e14bb325SJeff Bonwick  * An allocating zio is one that either currently has the DVA allocate
107e14bb325SJeff Bonwick  * stage set or will have it later in its lifetime.
1080a4e9518Sgw  */
109b24ab676SJeff Bonwick #define	IO_IS_ALLOCATING(zio) ((zio)->io_orig_pipeline & ZIO_STAGE_DVA_ALLOCATE)
110b24ab676SJeff Bonwick 
11135a5a358SJonathan Adams boolean_t	zio_requeue_io_start_cut_in_line = B_TRUE;
11235a5a358SJonathan Adams 
113b24ab676SJeff Bonwick #ifdef ZFS_DEBUG
114b24ab676SJeff Bonwick int zio_buf_debug_limit = 16384;
115b24ab676SJeff Bonwick #else
116b24ab676SJeff Bonwick int zio_buf_debug_limit = 0;
117b24ab676SJeff Bonwick #endif
1180a4e9518Sgw 
1190f7643c7SGeorge Wilson static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t);
1200f7643c7SGeorge Wilson 
121fa9e4066Sahrens void
zio_init(void)122fa9e4066Sahrens zio_init(void)
123fa9e4066Sahrens {
124fa9e4066Sahrens 	size_t c;
125ad23a2dbSjohansen 	vmem_t *data_alloc_arena = NULL;
126ad23a2dbSjohansen 
127ad23a2dbSjohansen #ifdef _KERNEL
128ad23a2dbSjohansen 	data_alloc_arena = zio_alloc_arena;
129ad23a2dbSjohansen #endif
130a3f829aeSBill Moore 	zio_cache = kmem_cache_create("zio_cache",
131a3f829aeSBill Moore 	    sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
132a3f829aeSBill Moore 	zio_link_cache = kmem_cache_create("zio_link_cache",
133a3f829aeSBill Moore 	    sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
134ccae0b50Seschrock 
135fa9e4066Sahrens 	/*
136fa9e4066Sahrens 	 * For small buffers, we want a cache for each multiple of
137b5152584SMatthew Ahrens 	 * SPA_MINBLOCKSIZE.  For larger buffers, we want a cache
138b5152584SMatthew Ahrens 	 * for each quarter-power of 2.
139fa9e4066Sahrens 	 */
140fa9e4066Sahrens 	for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
141fa9e4066Sahrens 		size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
142fa9e4066Sahrens 		size_t p2 = size;
143fa9e4066Sahrens 		size_t align = 0;
144e291592aSJonathan Adams 		size_t cflags = (size > zio_buf_debug_limit) ? KMC_NODEBUG : 0;
145fa9e4066Sahrens 
146de710d24SJosef 'Jeff' Sipek 		while (!ISP2(p2))
147fa9e4066Sahrens 			p2 &= p2 - 1;
148fa9e4066Sahrens 
149cd1c8b85SMatthew Ahrens #ifndef _KERNEL
150cd1c8b85SMatthew Ahrens 		/*
151cd1c8b85SMatthew Ahrens 		 * If we are using watchpoints, put each buffer on its own page,
152cd1c8b85SMatthew Ahrens 		 * to eliminate the performance overhead of trapping to the
153cd1c8b85SMatthew Ahrens 		 * kernel when modifying a non-watched buffer that shares the
154cd1c8b85SMatthew Ahrens 		 * page with a watched buffer.
155cd1c8b85SMatthew Ahrens 		 */
156cd1c8b85SMatthew Ahrens 		if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
157cd1c8b85SMatthew Ahrens 			continue;
158cd1c8b85SMatthew Ahrens #endif
159fa9e4066Sahrens 		if (size <= 4 * SPA_MINBLOCKSIZE) {
160fa9e4066Sahrens 			align = SPA_MINBLOCKSIZE;
161cd1c8b85SMatthew Ahrens 		} else if (IS_P2ALIGNED(size, p2 >> 2)) {
162b5152584SMatthew Ahrens 			align = MIN(p2 >> 2, PAGESIZE);
163fa9e4066Sahrens 		}
164fa9e4066Sahrens 
165fa9e4066Sahrens 		if (align != 0) {
166ad23a2dbSjohansen 			char name[36];
1675ad82045Snd 			(void) sprintf(name, "zio_buf_%lu", (ulong_t)size);
168fa9e4066Sahrens 			zio_buf_cache[c] = kmem_cache_create(name, size,
169e291592aSJonathan Adams 			    align, NULL, NULL, NULL, NULL, NULL, cflags);
170ad23a2dbSjohansen 
171e291592aSJonathan Adams 			/*
172e291592aSJonathan Adams 			 * Since zio_data bufs do not appear in crash dumps, we
173e291592aSJonathan Adams 			 * pass KMC_NOTOUCH so that no allocator metadata is
174e291592aSJonathan Adams 			 * stored with the buffers.
175e291592aSJonathan Adams 			 */
176ad23a2dbSjohansen 			(void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size);
177ad23a2dbSjohansen 			zio_data_buf_cache[c] = kmem_cache_create(name, size,
178ad23a2dbSjohansen 			    align, NULL, NULL, NULL, NULL, data_alloc_arena,
179e291592aSJonathan Adams 			    cflags | KMC_NOTOUCH);
180fa9e4066Sahrens 		}
181fa9e4066Sahrens 	}
182fa9e4066Sahrens 
183fa9e4066Sahrens 	while (--c != 0) {
184fa9e4066Sahrens 		ASSERT(zio_buf_cache[c] != NULL);
185fa9e4066Sahrens 		if (zio_buf_cache[c - 1] == NULL)
186fa9e4066Sahrens 			zio_buf_cache[c - 1] = zio_buf_cache[c];
187ad23a2dbSjohansen 
188ad23a2dbSjohansen 		ASSERT(zio_data_buf_cache[c] != NULL);
189ad23a2dbSjohansen 		if (zio_data_buf_cache[c - 1] == NULL)
190ad23a2dbSjohansen 			zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
191fa9e4066Sahrens 	}
192ea8dc4b6Seschrock 
193ea8dc4b6Seschrock 	zio_inject_init();
194fa9e4066Sahrens }
195fa9e4066Sahrens 
196fa9e4066Sahrens void
zio_fini(void)197fa9e4066Sahrens zio_fini(void)
198fa9e4066Sahrens {
199fa9e4066Sahrens 	size_t c;
200fa9e4066Sahrens 	kmem_cache_t *last_cache = NULL;
201ad23a2dbSjohansen 	kmem_cache_t *last_data_cache = NULL;
202fa9e4066Sahrens 
203fa9e4066Sahrens 	for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
204fa9e4066Sahrens 		if (zio_buf_cache[c] != last_cache) {
205fa9e4066Sahrens 			last_cache = zio_buf_cache[c];
206fa9e4066Sahrens 			kmem_cache_destroy(zio_buf_cache[c]);
207fa9e4066Sahrens 		}
208fa9e4066Sahrens 		zio_buf_cache[c] = NULL;
209ad23a2dbSjohansen 
210ad23a2dbSjohansen 		if (zio_data_buf_cache[c] != last_data_cache) {
211ad23a2dbSjohansen 			last_data_cache = zio_data_buf_cache[c];
212ad23a2dbSjohansen 			kmem_cache_destroy(zio_data_buf_cache[c]);
213ad23a2dbSjohansen 		}
214ad23a2dbSjohansen 		zio_data_buf_cache[c] = NULL;
215fa9e4066Sahrens 	}
216ea8dc4b6Seschrock 
217a3f829aeSBill Moore 	kmem_cache_destroy(zio_link_cache);
218ccae0b50Seschrock 	kmem_cache_destroy(zio_cache);
219ccae0b50Seschrock 
220ea8dc4b6Seschrock 	zio_inject_fini();
221fa9e4066Sahrens }
222fa9e4066Sahrens 
223fa9e4066Sahrens /*
224fa9e4066Sahrens  * ==========================================================================
225fa9e4066Sahrens  * Allocate and free I/O buffers
226fa9e4066Sahrens  * ==========================================================================
227fa9e4066Sahrens  */
228ad23a2dbSjohansen 
229ad23a2dbSjohansen /*
230ad23a2dbSjohansen  * Use zio_buf_alloc to allocate ZFS metadata.  This data will appear in a
231ad23a2dbSjohansen  * crashdump if the kernel panics, so use it judiciously.  Obviously, it's
232ad23a2dbSjohansen  * useful to inspect ZFS metadata, but if possible, we should avoid keeping
233ad23a2dbSjohansen  * excess / transient data in-core during a crashdump.
234ad23a2dbSjohansen  */
235fa9e4066Sahrens void *
zio_buf_alloc(size_t size)236fa9e4066Sahrens zio_buf_alloc(size_t size)
237fa9e4066Sahrens {
238fa9e4066Sahrens 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
239fa9e4066Sahrens 
240f63ab3d5SMatthew Ahrens 	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
241fa9e4066Sahrens 
2421ab7f2deSmaybee 	return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE));
243fa9e4066Sahrens }
244fa9e4066Sahrens 
245ad23a2dbSjohansen /*
246ad23a2dbSjohansen  * Use zio_data_buf_alloc to allocate data.  The data will not appear in a
247ad23a2dbSjohansen  * crashdump if the kernel panics.  This exists so that we will limit the amount
248ad23a2dbSjohansen  * of ZFS data that shows up in a kernel crashdump.  (Thus reducing the amount
249ad23a2dbSjohansen  * of kernel heap dumped to disk when the kernel panics)
250ad23a2dbSjohansen  */
251ad23a2dbSjohansen void *
zio_data_buf_alloc(size_t size)252ad23a2dbSjohansen zio_data_buf_alloc(size_t size)
253ad23a2dbSjohansen {
254ad23a2dbSjohansen 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
255ad23a2dbSjohansen 
256f63ab3d5SMatthew Ahrens 	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
257ad23a2dbSjohansen 
2581ab7f2deSmaybee 	return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
259ad23a2dbSjohansen }
260ad23a2dbSjohansen 
261fa9e4066Sahrens void
zio_buf_free(void * buf,size_t size)262fa9e4066Sahrens zio_buf_free(void *buf, size_t size)
263fa9e4066Sahrens {
264fa9e4066Sahrens 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
265fa9e4066Sahrens 
266f63ab3d5SMatthew Ahrens 	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
267fa9e4066Sahrens 
268fa9e4066Sahrens 	kmem_cache_free(zio_buf_cache[c], buf);
269fa9e4066Sahrens }
270fa9e4066Sahrens 
271ad23a2dbSjohansen void
zio_data_buf_free(void * buf,size_t size)272ad23a2dbSjohansen zio_data_buf_free(void *buf, size_t size)
273ad23a2dbSjohansen {
274ad23a2dbSjohansen 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
275ad23a2dbSjohansen 
276f63ab3d5SMatthew Ahrens 	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
277ad23a2dbSjohansen 
278ad23a2dbSjohansen 	kmem_cache_free(zio_data_buf_cache[c], buf);
279ad23a2dbSjohansen }
280b3995adbSahrens 
281eb633035STom Caputi /* ARGSUSED */
282eb633035STom Caputi static void
zio_abd_free(void * abd,size_t size)283eb633035STom Caputi zio_abd_free(void *abd, size_t size)
284eb633035STom Caputi {
285eb633035STom Caputi 	abd_free((abd_t *)abd);
286eb633035STom Caputi }
287eb633035STom Caputi 
288fa9e4066Sahrens /*
289fa9e4066Sahrens  * ==========================================================================
290fa9e4066Sahrens  * Push and pop I/O transform buffers
291fa9e4066Sahrens  * ==========================================================================
292fa9e4066Sahrens  */
293dcbf3bd6SGeorge Wilson void
zio_push_transform(zio_t * zio,abd_t * data,uint64_t size,uint64_t bufsize,zio_transform_func_t * transform)294770499e1SDan Kimmel zio_push_transform(zio_t *zio, abd_t *data, uint64_t size, uint64_t bufsize,
2959a686fbcSPaul Dagnelie     zio_transform_func_t *transform)
296fa9e4066Sahrens {
297fa9e4066Sahrens 	zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP);
298fa9e4066Sahrens 
299770499e1SDan Kimmel 	/*
300770499e1SDan Kimmel 	 * Ensure that anyone expecting this zio to contain a linear ABD isn't
301770499e1SDan Kimmel 	 * going to get a nasty surprise when they try to access the data.
302770499e1SDan Kimmel 	 */
303770499e1SDan Kimmel 	IMPLY(abd_is_linear(zio->io_abd), abd_is_linear(data));
304770499e1SDan Kimmel 
305770499e1SDan Kimmel 	zt->zt_orig_abd = zio->io_abd;
306e14bb325SJeff Bonwick 	zt->zt_orig_size = zio->io_size;
307fa9e4066Sahrens 	zt->zt_bufsize = bufsize;
308e14bb325SJeff Bonwick 	zt->zt_transform = transform;
309fa9e4066Sahrens 
310fa9e4066Sahrens 	zt->zt_next = zio->io_transform_stack;
311fa9e4066Sahrens 	zio->io_transform_stack = zt;
312fa9e4066Sahrens 
313770499e1SDan Kimmel 	zio->io_abd = data;
314fa9e4066Sahrens 	zio->io_size = size;
315fa9e4066Sahrens }
316fa9e4066Sahrens 
317dcbf3bd6SGeorge Wilson void
zio_pop_transforms(zio_t * zio)318e14bb325SJeff Bonwick zio_pop_transforms(zio_t *zio)
319fa9e4066Sahrens {
320e14bb325SJeff Bonwick 	zio_transform_t *zt;
321e14bb325SJeff Bonwick 
322e14bb325SJeff Bonwick 	while ((zt = zio->io_transform_stack) != NULL) {
323e14bb325SJeff Bonwick 		if (zt->zt_transform != NULL)
324e14bb325SJeff Bonwick 			zt->zt_transform(zio,
325770499e1SDan Kimmel 			    zt->zt_orig_abd, zt->zt_orig_size);
326fa9e4066Sahrens 
327b24ab676SJeff Bonwick 		if (zt->zt_bufsize != 0)
328770499e1SDan Kimmel 			abd_free(zio->io_abd);
329fa9e4066Sahrens 
330770499e1SDan Kimmel 		zio->io_abd = zt->zt_orig_abd;
331e14bb325SJeff Bonwick 		zio->io_size = zt->zt_orig_size;
332e14bb325SJeff Bonwick 		zio->io_transform_stack = zt->zt_next;
333fa9e4066Sahrens 
334e14bb325SJeff Bonwick 		kmem_free(zt, sizeof (zio_transform_t));
335fa9e4066Sahrens 	}
336fa9e4066Sahrens }
337fa9e4066Sahrens 
338e14bb325SJeff Bonwick /*
339e14bb325SJeff Bonwick  * ==========================================================================
340eb633035STom Caputi  * I/O transform callbacks for subblocks, decompression, and decryption
341e14bb325SJeff Bonwick  * ==========================================================================
342e14bb325SJeff Bonwick  */
343e14bb325SJeff Bonwick static void
zio_subblock(zio_t * zio,abd_t * data,uint64_t size)344770499e1SDan Kimmel zio_subblock(zio_t *zio, abd_t *data, uint64_t size)
345e14bb325SJeff Bonwick {
346e14bb325SJeff Bonwick 	ASSERT(zio->io_size > size);
347e14bb325SJeff Bonwick 
348e14bb325SJeff Bonwick 	if (zio->io_type == ZIO_TYPE_READ)
349770499e1SDan Kimmel 		abd_copy(data, zio->io_abd, size);
350e14bb325SJeff Bonwick }
351e14bb325SJeff Bonwick 
352e14bb325SJeff Bonwick static void
zio_decompress(zio_t * zio,abd_t * data,uint64_t size)353770499e1SDan Kimmel zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
354e14bb325SJeff Bonwick {
355770499e1SDan Kimmel 	if (zio->io_error == 0) {
356770499e1SDan Kimmel 		void *tmp = abd_borrow_buf(data, size);
357770499e1SDan Kimmel 		int ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
358770499e1SDan Kimmel 		    zio->io_abd, tmp, zio->io_size, size);
359770499e1SDan Kimmel 		abd_return_buf_copy(data, tmp, size);
360770499e1SDan Kimmel 
361770499e1SDan Kimmel 		if (ret != 0)
362770499e1SDan Kimmel 			zio->io_error = SET_ERROR(EIO);
363770499e1SDan Kimmel 	}
364e14bb325SJeff Bonwick }
365e14bb325SJeff Bonwick 
366eb633035STom Caputi static void
zio_decrypt(zio_t * zio,abd_t * data,uint64_t size)367eb633035STom Caputi zio_decrypt(zio_t *zio, abd_t *data, uint64_t size)
368eb633035STom Caputi {
369eb633035STom Caputi 	int ret;
370eb633035STom Caputi 	void *tmp;
371eb633035STom Caputi 	blkptr_t *bp = zio->io_bp;
372eb633035STom Caputi 	spa_t *spa = zio->io_spa;
373eb633035STom Caputi 	uint64_t dsobj = zio->io_bookmark.zb_objset;
374eb633035STom Caputi 	uint64_t lsize = BP_GET_LSIZE(bp);
375eb633035STom Caputi 	dmu_object_type_t ot = BP_GET_TYPE(bp);
376eb633035STom Caputi 	uint8_t salt[ZIO_DATA_SALT_LEN];
377eb633035STom Caputi 	uint8_t iv[ZIO_DATA_IV_LEN];
378eb633035STom Caputi 	uint8_t mac[ZIO_DATA_MAC_LEN];
379eb633035STom Caputi 	boolean_t no_crypt = B_FALSE;
380eb633035STom Caputi 
381eb633035STom Caputi 	ASSERT(BP_USES_CRYPT(bp));
382eb633035STom Caputi 	ASSERT3U(size, !=, 0);
383eb633035STom Caputi 
384eb633035STom Caputi 	if (zio->io_error != 0)
385eb633035STom Caputi 		return;
386eb633035STom Caputi 
387eb633035STom Caputi 	/*
388eb633035STom Caputi 	 * Verify the cksum of MACs stored in an indirect bp. It will always
389eb633035STom Caputi 	 * be possible to verify this since it does not require an encryption
390eb633035STom Caputi 	 * key.
391eb633035STom Caputi 	 */
392eb633035STom Caputi 	if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) {
393eb633035STom Caputi 		zio_crypt_decode_mac_bp(bp, mac);
394eb633035STom Caputi 
395eb633035STom Caputi 		if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
396eb633035STom Caputi 			/*
397eb633035STom Caputi 			 * We haven't decompressed the data yet, but
398eb633035STom Caputi 			 * zio_crypt_do_indirect_mac_checksum() requires
399eb633035STom Caputi 			 * decompressed data to be able to parse out the MACs
400eb633035STom Caputi 			 * from the indirect block. We decompress it now and
401eb633035STom Caputi 			 * throw away the result after we are finished.
402eb633035STom Caputi 			 */
403eb633035STom Caputi 			tmp = zio_buf_alloc(lsize);
404eb633035STom Caputi 			ret = zio_decompress_data(BP_GET_COMPRESS(bp),
405eb633035STom Caputi 			    zio->io_abd, tmp, zio->io_size, lsize);
406eb633035STom Caputi 			if (ret != 0) {
407eb633035STom Caputi 				ret = SET_ERROR(EIO);
408eb633035STom Caputi 				goto error;
409eb633035STom Caputi 			}
410eb633035STom Caputi 			ret = zio_crypt_do_indirect_mac_checksum(B_FALSE,
411eb633035STom Caputi 			    tmp, lsize, BP_SHOULD_BYTESWAP(bp), mac);
412eb633035STom Caputi 			zio_buf_free(tmp, lsize);
413eb633035STom Caputi 		} else {
414eb633035STom Caputi 			ret = zio_crypt_do_indirect_mac_checksum_abd(B_FALSE,
415eb633035STom Caputi 			    zio->io_abd, size, BP_SHOULD_BYTESWAP(bp), mac);
416eb633035STom Caputi 		}
417eb633035STom Caputi 		abd_copy(data, zio->io_abd, size);
418eb633035STom Caputi 
419eb633035STom Caputi 		if (ret != 0)
420eb633035STom Caputi 			goto error;
421eb633035STom Caputi 
422eb633035STom Caputi 		return;
423eb633035STom Caputi 	}
424eb633035STom Caputi 
425eb633035STom Caputi 	/*
426eb633035STom Caputi 	 * If this is an authenticated block, just check the MAC. It would be
427eb633035STom Caputi 	 * nice to separate this out into its own flag, but for the moment
428eb633035STom Caputi 	 * enum zio_flag is out of bits.
429eb633035STom Caputi 	 */
430eb633035STom Caputi 	if (BP_IS_AUTHENTICATED(bp)) {
431eb633035STom Caputi 		if (ot == DMU_OT_OBJSET) {
432eb633035STom Caputi 			ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa,
433eb633035STom Caputi 			    dsobj, zio->io_abd, size, BP_SHOULD_BYTESWAP(bp));
434eb633035STom Caputi 		} else {
435eb633035STom Caputi 			zio_crypt_decode_mac_bp(bp, mac);
436eb633035STom Caputi 			ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj,
437eb633035STom Caputi 			    zio->io_abd, size, mac);
438eb633035STom Caputi 		}
439eb633035STom Caputi 		abd_copy(data, zio->io_abd, size);
440eb633035STom Caputi 
441eb633035STom Caputi 		if (zio_injection_enabled && ot != DMU_OT_DNODE && ret == 0) {
442eb633035STom Caputi 			ret = zio_handle_decrypt_injection(spa,
443eb633035STom Caputi 			    &zio->io_bookmark, ot, ECKSUM);
444eb633035STom Caputi 		}
445eb633035STom Caputi 		if (ret != 0)
446eb633035STom Caputi 			goto error;
447eb633035STom Caputi 
448eb633035STom Caputi 		return;
449eb633035STom Caputi 	}
450eb633035STom Caputi 
451eb633035STom Caputi 	zio_crypt_decode_params_bp(bp, salt, iv);
452eb633035STom Caputi 
453eb633035STom Caputi 	if (ot == DMU_OT_INTENT_LOG) {
454eb633035STom Caputi 		tmp = abd_borrow_buf_copy(zio->io_abd, sizeof (zil_chain_t));
455eb633035STom Caputi 		zio_crypt_decode_mac_zil(tmp, mac);
456eb633035STom Caputi 		abd_return_buf(zio->io_abd, tmp, sizeof (zil_chain_t));
457eb633035STom Caputi 	} else {
458eb633035STom Caputi 		zio_crypt_decode_mac_bp(bp, mac);
459eb633035STom Caputi 	}
460eb633035STom Caputi 
461eb633035STom Caputi 	ret = spa_do_crypt_abd(B_FALSE, spa, &zio->io_bookmark, BP_GET_TYPE(bp),
462eb633035STom Caputi 	    BP_GET_DEDUP(bp), BP_SHOULD_BYTESWAP(bp), salt, iv, mac, size, data,
463eb633035STom Caputi 	    zio->io_abd, &no_crypt);
464eb633035STom Caputi 	if (no_crypt)
465eb633035STom Caputi 		abd_copy(data, zio->io_abd, size);
466eb633035STom Caputi 
467eb633035STom Caputi 	if (ret != 0)
468eb633035STom Caputi 		goto error;
469eb633035STom Caputi 
470eb633035STom Caputi 	return;
471eb633035STom Caputi 
472eb633035STom Caputi error:
473