spa.h revision b24ab6762772a3f6a89393947930c7fa61306783
2fa9e406ahrens * CDDL HEADER START
3fa9e406ahrens *
4fa9e406ahrens * The contents of this file are subject to the terms of the
5ea8dc4beschrock * Common Development and Distribution License (the "License").
6ea8dc4beschrock * You may not use this file except in compliance with the License.
7fa9e406ahrens *
8fa9e406ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e406ahrens * or
10fa9e406ahrens * See the License for the specific language governing permissions
11fa9e406ahrens * and limitations under the License.
12fa9e406ahrens *
13fa9e406ahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e406ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e406ahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e406ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e406ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e406ahrens *
19fa9e406ahrens * CDDL HEADER END
20fa9e406ahrens */
22379c004Eric Schrock * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23fa9e406ahrens * Use is subject to license terms.
24fa9e406ahrens */
26fa9e406ahrens#ifndef _SYS_SPA_H
27fa9e406ahrens#define	_SYS_SPA_H
29fa9e406ahrens#include <sys/avl.h>
30fa9e406ahrens#include <sys/zfs_context.h>
31fa9e406ahrens#include <sys/nvpair.h>
32fa9e406ahrens#include <sys/sysmacros.h>
33fa9e406ahrens#include <sys/types.h>
34fa9e406ahrens#include <sys/fs/zfs.h>
36fa9e406ahrens#ifdef	__cplusplus
37fa9e406ahrensextern "C" {
41fa9e406ahrens * Forward references that lots of things need.
42fa9e406ahrens */
43fa9e406ahrenstypedef struct spa spa_t;
44fa9e406ahrenstypedef struct vdev vdev_t;
45fa9e406ahrenstypedef struct metaslab metaslab_t;
46b24ab67Jeff Bonwicktypedef struct metaslab_group metaslab_group_t;
47b24ab67Jeff Bonwicktypedef struct metaslab_class metaslab_class_t;
48b24ab67Jeff Bonwicktypedef struct zio zio_t;
49fa9e406ahrenstypedef struct zilog zilog_t;
50fa94a07brendantypedef struct spa_aux_vdev spa_aux_vdev_t;
51b24ab67Jeff Bonwicktypedef struct ddt ddt_t;
52b24ab67Jeff Bonwicktypedef struct ddt_entry ddt_entry_t;
53fa9e406ahrensstruct dsl_pool;
56fa9e406ahrens * General-purpose 32-bit and 64-bit bitfield encodings.
57fa9e406ahrens */
58fa9e406ahrens#define	BF32_DECODE(x, low, len)	P2PHASE((x) >> (low), 1U << (len))
59fa9e406ahrens#define	BF64_DECODE(x, low, len)	P2PHASE((x) >> (low), 1ULL << (len))
60fa9e406ahrens#define	BF32_ENCODE(x, low, len)	(P2PHASE((x), 1U << (len)) << (low))
61fa9e406ahrens#define	BF64_ENCODE(x, low, len)	(P2PHASE((x), 1ULL << (len)) << (low))
63fa9e406ahrens#define	BF32_GET(x, low, len)		BF32_DECODE(x, low, len)
64fa9e406ahrens#define	BF64_GET(x, low, len)		BF64_DECODE(x, low, len)
66fa9e406ahrens#define	BF32_SET(x, low, len, val)	\
675ad8204nd	((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
68fa9e406ahrens#define	BF64_SET(x, low, len, val)	\
695ad8204nd	((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
71fa9e406ahrens#define	BF32_GET_SB(x, low, len, shift, bias)	\
72fa9e406ahrens	((BF32_GET(x, low, len) + (bias)) << (shift))
73fa9e406ahrens#define	BF64_GET_SB(x, low, len, shift, bias)	\
74fa9e406ahrens	((BF64_GET(x, low, len) + (bias)) << (shift))
76fa9e406ahrens#define	BF32_SET_SB(x, low, len, shift, bias, val)	\
77fa9e406ahrens	BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
78fa9e406ahrens#define	BF64_SET_SB(x, low, len, shift, bias, val)	\
79fa9e406ahrens	BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
82fa9e406ahrens * We currently support nine block sizes, from 512 bytes to 128K.
83fa9e406ahrens * We could go higher, but the benefits are near-zero and the cost
84fa9e406ahrens * of COWing a giant block to modify one byte would become excessive.
85fa9e406ahrens */
86fa9e406ahrens#define	SPA_MINBLOCKSHIFT	9
87fa9e406ahrens#define	SPA_MAXBLOCKSHIFT	17
94f7991baTim Haley * Size of block to hold the configuration data (a packed nvlist)
95f7991baTim Haley */
96f7991baTim Haley#define	SPA_CONFIG_BLOCKSIZE	(1 << 14)
97f7991baTim Haley
98f7991baTim Haley/*
99fa9e406ahrens * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
100fa9e406ahrens * The ASIZE encoding should be at least 64 times larger (6 more bits)
101fa9e406ahrens * to support up to 4-way RAID-Z mirror mode with worst-case gang block
102fa9e406ahrens * overhead, three DVAs per bp, plus one more bit in case we do anything
103fa9e406ahrens * else that expands the ASIZE.
104fa9e406ahrens */
105fa9e406ahrens#define	SPA_LSIZEBITS		16	/* LSIZE up to 32M (2^16 * 512)	*/
106fa9e406ahrens#define	SPA_PSIZEBITS		16	/* PSIZE up to 32M (2^16 * 512)	*/
107fa9e406ahrens#define	SPA_ASIZEBITS		24	/* ASIZE up to 64 times larger	*/
110fa9e406ahrens * All SPA data is represented by 128-bit data virtual addresses (DVAs).
111fa9e406ahrens * The members of the dva_t should be considered opaque outside the SPA.
112fa9e406ahrens */
113fa9e406ahrenstypedef struct dva {
114fa9e406ahrens	uint64_t	dva_word[2];
115fa9e406ahrens} dva_t;
118fa9e406ahrens * Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
119fa9e406ahrens */
120fa9e406ahrenstypedef struct zio_cksum {
121fa9e406ahrens	uint64_t	zc_word[4];
122fa9e406ahrens} zio_cksum_t;
125fa9e406ahrens * Each block is described by its DVAs, time of birth, checksum, etc.
126fa9e406ahrens * The word-by-word, bit-by-bit layout of the blkptr is as follows:
127fa9e406ahrens *
128fa9e406ahrens *	64	56	48	40	32	24	16	8	0
129fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
130fa9e406ahrens * 0	|		vdev1		| GRID  |	  ASIZE		|
131fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
132fa9e406ahrens * 1	|G|			 offset1				|
133fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
134fa9e406ahrens * 2	|		vdev2		| GRID  |	  ASIZE		|
135fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
136fa9e406ahrens * 3	|G|			 offset2				|
137fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
138fa9e406ahrens * 4	|		vdev3		| GRID  |	  ASIZE		|
139fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
140fa9e406ahrens * 5	|G|			 offset3				|
141fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
142b24ab67Jeff Bonwick * 6	|BDX|lvl| type	| cksum | comp	|     PSIZE	|     LSIZE	|
143fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
144fa9e406ahrens * 7	|			padding					|
145fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
146fa9e406ahrens * 8	|			padding					|
147fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
148b24ab67Jeff Bonwick * 9	|			physical birth txg			|
149fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
150b24ab67Jeff Bonwick * a	|			logical birth txg			|
151fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
152fa9e406ahrens * b	|			fill count				|
153fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
154fa9e406ahrens * c	|			checksum[0]				|
155fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
156fa9e406ahrens * d	|			checksum[1]				|
157fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
158fa9e406ahrens * e	|			checksum[2]				|
159fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
160fa9e406ahrens * f	|			checksum[3]				|
161fa9e406ahrens *	+-------+-------+-------+-------+-------+-------+-------+-------+
162fa9e406ahrens *
163fa9e406ahrens * Legend:
164fa9e406ahrens *
165fa9e406ahrens * vdev		virtual device ID
166fa9e406ahrens * offset	offset into virtual device
167fa9e406ahrens * LSIZE	logical size
168fa9e406ahrens * PSIZE	physical size (after compression)
169fa9e406ahrens * ASIZE	allocated size (including RAID-Z parity and gang block headers)
170fa9e406ahrens * GRID		RAID-Z layout information (reserved for future use)
171fa9e406ahrens * cksum	checksum function
172fa9e406ahrens * comp		compression function
173fa9e406ahrens * G		gang block indicator
174b24ab67Jeff Bonwick * B		byteorder (endianness)
175b24ab67Jeff Bonwick * D		dedup
176b24ab67Jeff Bonwick * X		unused
177fa9e406ahrens * lvl		level of indirection
178b24ab67Jeff Bonwick * type		DMU object type
179b24ab67Jeff Bonwick * phys birth	txg of block allocation; zero if same as logical birth txg
180b24ab67Jeff Bonwick * log. birth	transaction group in which the block was logically born
181fa9e406ahrens * fill count	number of non-zero blocks under this bp
182fa9e406ahrens * checksum[4]	256-bit checksum of the data this bp describes
183fa9e406ahrens */
184fa9e406ahrens#define	SPA_BLKPTRSHIFT	7		/* blkptr_t is 128 bytes	*/
185fa9e406ahrens#define	SPA_DVAS_PER_BP	3		/* Number of DVAs in a bp	*/
187b24ab67Jeff Bonwicktypedef struct blkptr {
188b24ab67Jeff Bonwick	dva_t		blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
189b24ab67Jeff Bonwick	uint64_t	blk_prop;	/* size, compression, type, etc	    */
190b24ab67Jeff Bonwick	uint64_t	blk_pad[2];	/* Extra space for the future	    */
191b24ab67Jeff Bonwick	uint64_t	blk_phys_birth;	/* txg when block was allocated	    */
192b24ab67Jeff Bonwick	uint64_t	blk_birth;	/* transaction group at birth	    */
193b24ab67Jeff Bonwick	uint64_t	blk_fill;	/* fill count			    */
194b24ab67Jeff Bonwick	zio_cksum_t	blk_cksum;	/* 256-bit checksum		    */
195b24ab67Jeff Bonwick} blkptr_t;
196b24ab67Jeff Bonwick
198fa9e406ahrens * Macros to get and set fields in a bp or DVA.
199fa9e406ahrens */
200fa9e406ahrens#define	DVA_GET_ASIZE(dva)	\
201fa9e406ahrens	BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
202fa9e406ahrens#define	DVA_SET_ASIZE(dva, x)	\
203fa9e406ahrens	BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
205fa9e406ahrens#define	DVA_GET_GRID(dva)	BF64_GET((dva)->dva_word[0], 24, 8)
206fa9e406ahrens#define	DVA_SET_GRID(dva, x)	BF64_SET((dva)->dva_word[0], 24, 8, x)
208fa9e406ahrens#define	DVA_GET_VDEV(dva)	BF64_GET((dva)->dva_word[0], 32, 32)
209fa9e406ahrens#define	DVA_SET_VDEV(dva, x)	BF64_SET((dva)->dva_word[0], 32, 32, x)
211fa9e406ahrens#define	DVA_GET_OFFSET(dva)	\
212fa9e406ahrens	BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
213fa9e406ahrens#define	DVA_SET_OFFSET(dva, x)	\
214fa9e406ahrens	BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
216fa9e406ahrens#define	DVA_GET_GANG(dva)	BF64_GET((dva)->dva_word[1], 63, 1)
217fa9e406ahrens#define	DVA_SET_GANG(dva, x)	BF64_SET((dva)->dva_word[1], 63, 1, x)
219fa9e406ahrens#define	BP_GET_LSIZE(bp)	\
220975c32aNeil Perrin	BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
221fa9e406ahrens#define	BP_SET_LSIZE(bp, x)	\
222fa9e406ahrens	BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
224fa9e406ahrens#define	BP_GET_PSIZE(bp)	\
225fa9e406ahrens	BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
226fa9e406ahrens#define	BP_SET_PSIZE(bp, x)	\
227fa9e406ahrens	BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
229b24ab67Jeff Bonwick#define	BP_GET_COMPRESS(bp)		BF64_GET((bp)->blk_prop, 32, 8)
230b24ab67Jeff Bonwick#define	BP_SET_COMPRESS(bp, x)		BF64_SET((bp)->blk_prop, 32, 8, x)
231b24ab67Jeff Bonwick
232b24ab67Jeff Bonwick#define	BP_GET_CHECKSUM(bp)		BF64_GET((bp)->blk_prop, 40, 8)
233b24ab67Jeff Bonwick#define	BP_SET_CHECKSUM(bp, x)		BF64_SET((bp)->blk_prop, 40, 8, x)
234b24ab67Jeff Bonwick
235b24ab67Jeff Bonwick#define	BP_GET_TYPE(bp)			BF64_GET((bp)->blk_prop, 48, 8)
236b24ab67Jeff Bonwick#define	BP_SET_TYPE(bp, x)		BF64_SET((bp)->blk_prop, 48, 8, x)
238b24ab67Jeff Bonwick#define	BP_GET_LEVEL(bp)		BF64_GET((bp)->blk_prop, 56, 5)
239b24ab67Jeff Bonwick#define	BP_SET_LEVEL(bp, x)		BF64_SET((bp)->blk_prop, 56, 5, x)
241b24ab67Jeff Bonwick#define	BP_GET_PROP_BIT_61(bp)		BF64_GET((bp)->blk_prop, 61, 1)
242b24ab67Jeff Bonwick#define	BP_SET_PROP_BIT_61(bp, x)	BF64_SET((bp)->blk_prop, 61, 1, x)
244b24ab67Jeff Bonwick#define	BP_GET_DEDUP(bp)		BF64_GET((bp)->blk_prop, 62, 1)
245b24ab67Jeff Bonwick#define	BP_SET_DEDUP(bp, x)		BF64_SET((bp)->blk_prop, 62, 1, x)
247b24ab67Jeff Bonwick#define	BP_GET_BYTEORDER(bp)		(0 - BF64_GET((bp)->blk_prop, 63, 1))
248b24ab67Jeff Bonwick#define	BP_SET_BYTEORDER(bp, x)		BF64_SET((bp)->blk_prop, 63, 1, x)
249b24ab67Jeff Bonwick
250b24ab67Jeff Bonwick#define	BP_PHYSICAL_BIRTH(bp)		\
251b24ab67Jeff Bonwick	((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
252b24ab67Jeff Bonwick
253b24ab67Jeff Bonwick#define	BP_SET_BIRTH(bp, logical, physical)	\
254b24ab67Jeff Bonwick{						\
255b24ab67Jeff Bonwick	(bp)->blk_birth = (logical);		\
256b24ab67Jeff Bonwick	(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
257b24ab67Jeff Bonwick}
259fa9e406ahrens#define	BP_GET_ASIZE(bp)	\
260fa9e406ahrens	(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
26199653d4eschrock		DVA_GET_ASIZE(&(bp)->blk_dva[2]))
26399653d4eschrock#define	BP_GET_UCSIZE(bp) \
26499653d4eschrock	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
26599653d4eschrock	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
26744cd46cbillm#define	BP_GET_NDVAS(bp)	\
26844cd46cbillm	(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
26944cd46cbillm	!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
27044cd46cbillm	!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
27244cd46cbillm#define	BP_COUNT_GANG(bp)	\
27344cd46cbillm	(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
27444cd46cbillm	DVA_GET_GANG(&(bp)->blk_dva[1]) + \
27544cd46cbillm	DVA_GET_GANG(&(bp)->blk_dva[2]))
277fa9e406ahrens#define	DVA_EQUAL(dva1, dva2)	\
278fa9e406ahrens	((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
279fa9e406ahrens	(dva1)->dva_word[0] == (dva2)->dva_word[0])
281b24ab67Jeff Bonwick#define	BP_EQUAL(bp1, bp2)	\
282b24ab67Jeff Bonwick	(BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) &&	\
283b24ab67Jeff Bonwick	DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) &&	\
284b24ab67Jeff Bonwick	DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) &&	\
285b24ab67Jeff Bonwick	DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
286b24ab67Jeff Bonwick
2876b4acc8ahrens#define	ZIO_CHECKSUM_EQUAL(zc1, zc2) \
2886b4acc8ahrens	(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
2896b4acc8ahrens	((zc1).zc_word[1] - (zc2).zc_word[1]) | \
2906b4acc8ahrens	((zc1).zc_word[2] - (zc2).zc_word[2]) | \
2916b4acc8ahrens	((zc1).zc_word[3] - (zc2).zc_word[3])))
293fa9e406ahrens#define	DVA_IS_VALID(dva)	(DVA_GET_ASIZE(dva) != 0)
295fa9e406ahrens#define	ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3)	\
296fa9e406ahrens{						\
297fa9e406ahrens	(zcp)->zc_word[0] = w0;			\
298fa9e406ahrens	(zcp)->zc_word[1] = w1;			\
299fa9e406ahrens	(zcp)->zc_word[2] = w2;			\
300fa9e406ahrens	(zcp)->zc_word[3] = w3;			\
303fa9e406ahrens#define	BP_IDENTITY(bp)		(&(bp)->blk_dva[0])
30444cd46cbillm#define	BP_IS_GANG(bp)		DVA_GET_GANG(BP_IDENTITY(bp))
30544cd46cbillm#define	BP_IS_HOLE(bp)		((bp)->blk_birth == 0)
307e14bb32Jeff Bonwick#define	BP_ZERO(bp)				\
308fa9e406ahrens{						\
309fa9e406ahrens	(bp)->blk_dva[0].dva_word[0] = 0;	\
310fa9e406ahrens	(bp)->blk_dva[0].dva_word[1] = 0;	\
311fa9e406ahrens	(bp)->blk_dva[1].dva_word[0] = 0;	\
312fa9e406ahrens	(bp)->blk_dva[1].dva_word[1] = 0;	\
313fa9e406ahrens	(bp)->blk_dva[2].dva_word[0] = 0;	\
314fa9e406ahrens	(bp)->blk_dva[2].dva_word[1] = 0;	\
315fa9e406ahrens	(bp)->blk_prop = 0;			\
316fa9e406ahrens	(bp)->blk_pad[0] = 0;			\
317fa9e406ahrens	(bp)->blk_pad[1] = 0;			\
318b24ab67Jeff Bonwick	(bp)->blk_phys_birth = 0;		\
319e14bb32Jeff Bonwick	(bp)->blk_birth = 0;			\
320fa9e406ahrens	(bp)->blk_fill = 0;			\
321fa9e406ahrens	ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0);	\
325fa9e406ahrens * Note: the byteorder is either 0 or -1, both of which are palindromes.
326fa9e406ahrens * This simplifies the endianness handling a bit.
327fa9e406ahrens */
328fa9e406ahrens#ifdef _BIG_ENDIAN
329fa9e406ahrens#define	ZFS_HOST_BYTEORDER	(0ULL)
331fa9e406ahrens#define	ZFS_HOST_BYTEORDER	(-1ULL)
33644cd46cbillm#define	BP_SPRINTF_LEN	320
338b24ab67Jeff Bonwick/*
339b24ab67Jeff Bonwick * This macro allows code sharing between zfs, libzpool, and mdb.
340b24ab67Jeff Bonwick * 'func' is either snprintf() or mdb_snprintf().
341b24ab67Jeff Bonwick * 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
342b24ab67Jeff Bonwick */
343b24ab67Jeff Bonwick#define	SPRINTF_BLKPTR(func, ws, buf, bp, type, checksum, compress)	\
344b24ab67Jeff Bonwick{									\
345b24ab67Jeff Bonwick	static const char *copyname[] =					\
346b24ab67Jeff Bonwick	    { "zero", "single", "double", "triple" };			\
347b24ab67Jeff Bonwick	int size = BP_SPRINTF_LEN;					\
348b24ab67Jeff Bonwick	int len = 0;							\
349b24ab67Jeff Bonwick	int copies = 0;							\
350b24ab67Jeff Bonwick									\
351b24ab67Jeff Bonwick	if (bp == NULL) {						\
352b24ab67Jeff Bonwick		len = func(buf + len, size - len, "<NULL>");		\
353b24ab67Jeff Bonwick	} else if (BP_IS_HOLE(bp)) {					\
354b24ab67Jeff Bonwick		len = func(buf + len, size - len, "<hole>");		\
355b24ab67Jeff Bonwick	} else {							\
356b24ab67Jeff Bonwick		for (int d = 0; d < BP_GET_NDVAS(bp); d++) {		\
357b24ab67Jeff Bonwick			const dva_t *dva = &bp->blk_dva[d];		\
358b24ab67Jeff Bonwick			if (DVA_IS_VALID(dva))				\
359b24ab67Jeff Bonwick				copies++;				\
360b24ab67Jeff Bonwick			len += func(buf + len, size - len,		\
361b24ab67Jeff Bonwick			    "DVA[%d]=<%llu:%llx:%llx>%c", d,		\
362b24ab67Jeff Bonwick			    (u_longlong_t)DVA_GET_VDEV(dva),		\
363b24ab67Jeff Bonwick			    (u_longlong_t)DVA_GET_OFFSET(dva),		\
364b24ab67Jeff Bonwick			    (u_longlong_t)DVA_GET_ASIZE(dva),		\
365b24ab67Jeff Bonwick			    ws);					\
366b24ab67Jeff Bonwick		}							\
367b24ab67Jeff Bonwick		if (BP_IS_GANG(bp) &&					\
368b24ab67Jeff Bonwick		    DVA_GET_ASIZE(&bp->blk_dva[2]) <=			\
369b24ab67Jeff Bonwick		    DVA_GET_ASIZE(&bp->blk_dva[1]) / 2)			\
370b24ab67Jeff Bonwick			copies--;					\
371b24ab67Jeff Bonwick		len += func(buf + len, size - len,			\
372b24ab67Jeff Bonwick		    "[L%llu %s] %s %s %s %s %s %s%c"			\
373b24ab67Jeff Bonwick		    "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c"	\
374b24ab67Jeff Bonwick		    "cksum=%llx:%llx:%llx:%llx",			\
375b24ab67Jeff Bonwick		    (u_longlong_t)BP_GET_LEVEL(bp),			\
376b24ab67Jeff Bonwick		    type,						\
377b24ab67Jeff Bonwick		    checksum,						\
378b24ab67Jeff Bonwick		    compress,						\
379b24ab67Jeff Bonwick		    BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE",		\
380b24ab67Jeff Bonwick		    BP_IS_GANG(bp) ? "gang" : "contiguous",		\
381b24ab67Jeff Bonwick		    BP_GET_DEDUP(bp) ? "dedup" : "unique",		\
382b24ab67Jeff Bonwick		    copyname[copies],					\
383b24ab67Jeff Bonwick		    ws,							\
384b24ab67Jeff Bonwick		    (u_longlong_t)BP_GET_LSIZE(bp),			\
385b24ab67Jeff Bonwick		    (u_longlong_t)BP_GET_PSIZE(bp),			\
386b24ab67Jeff Bonwick		    (u_longlong_t)bp->blk_birth,			\
387b24ab67Jeff Bonwick		    (u_longlong_t)BP_PHYSICAL_BIRTH(bp),		\
388b24ab67Jeff Bonwick		    (u_longlong_t)bp->blk_fill,				\
389b24ab67Jeff Bonwick		    ws,							\
390b24ab67Jeff Bonwick		    (u_longlong_t)bp->blk_cksum.zc_word[0],		\
391b24ab67Jeff Bonwick		    (u_longlong_t)bp->blk_cksum.zc_word[1],		\
392b24ab67Jeff Bonwick		    (u_longlong_t)bp->blk_cksum.zc_word[2],		\
393b24ab67Jeff Bonwick		    (u_longlong_t)bp->blk_cksum.zc_word[3]);		\
394b24ab67Jeff Bonwick	}								\
395b24ab67Jeff Bonwick	ASSERT(len < size);						\
396b24ab67Jeff Bonwick}
397b24ab67Jeff Bonwick
398fa9e406ahrens#include <sys/dmu.h>
400ad23a2djohansen#define	BP_GET_BUFC_TYPE(bp)						\
401ad23a2djohansen	(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
402ad23a2djohansen	ARC_BUFC_METADATA : ARC_BUFC_DATA);
404fa9e406ahrens/* state manipulation functions */
405fa9e406ahrensextern int spa_open(const char *pool, spa_t **, void *tag);
406468c413Tim Haleyextern int spa_open_rewind(const char *pool, spa_t **, void *tag,
407468c413Tim Haley    nvlist_t *policy, nvlist_t **config);
408ea8dc4beschrockextern int spa_get_stats(const char *pool, nvlist_t **config,
409ea8dc4beschrock    char *altroot, size_t buflen);
410990b485llingextern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
4110a48a24timh    const char *history_str, nvlist_t *zplprops);
412051aabetaylorextern int spa_import_rootpool(char *devpath, char *devid);
413990b485llingextern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
4146809eb4Eric Schrockextern int spa_import_verbatim(const char *, nvlist_t *, nvlist_t *);
415fa9e406ahrensextern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
416fa9e406ahrensextern int spa_destroy(char *pool);
417394ab0cGeorge Wilsonextern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
418394ab0cGeorge Wilson    boolean_t hardforce);
419ea8dc4beschrockextern int spa_reset(char *pool);
420ea8dc4beschrockextern void spa_async_request(spa_t *spa, int flag);
421088f389ahrensextern void spa_async_unrequest(spa_t *spa, int flag);
422ea8dc4beschrockextern void spa_async_suspend(spa_t *spa);
423ea8dc4beschrockextern void spa_async_resume(spa_t *spa);
424ea8dc4beschrockextern spa_t *spa_inject_addref(char *pool);
425ea8dc4beschrockextern void spa_inject_delref(spa_t *spa);
427e14bb32Jeff Bonwick#define	SPA_ASYNC_CONFIG_UPDATE	0x01
428e14bb32Jeff Bonwick#define	SPA_ASYNC_REMOVE	0x02
429e14bb32Jeff Bonwick#define	SPA_ASYNC_PROBE		0x04
430e14bb32Jeff Bonwick#define	SPA_ASYNC_RESILVER_DONE	0x08
431e14bb32Jeff Bonwick#define	SPA_ASYNC_RESILVER	0x10
432573ca77George Wilson#define	SPA_ASYNC_AUTOEXPAND	0x20
434fa9e406ahrens/* device manipulation */
435fa9e406ahrensextern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
436ea8dc4beschrockextern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
437fa9e406ahrens    int replacing);
4388ad4d6dJeff Bonwickextern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
4398ad4d6dJeff Bonwick    int replace_done);
44099653d4eschrockextern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
441c67d967eschrockextern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
4426809eb4Eric Schrockextern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
44499653d4eschrock/* spare state (which is global across all pools) */
44539c2341eschrockextern void spa_spare_add(vdev_t *vd);
44639c2341eschrockextern void spa_spare_remove(vdev_t *vd);
44789a89ebllingextern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt);
44839c2341eschrockextern void spa_spare_activate(vdev_t *vd);
450fa94a07brendan/* L2ARC state (which is global across all pools) */
451fa94a07brendanextern void spa_l2cache_add(vdev_t *vd);
452fa94a07brendanextern void spa_l2cache_remove(vdev_t *vd);
453fa94a07brendanextern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
454fa94a07brendanextern void spa_l2cache_activate(vdev_t *vd);
455fa94a07brendanextern void spa_l2cache_drop(spa_t *spa);
457fa9e406ahrens/* scrubbing */
458088f389ahrensextern int spa_scrub(spa_t *spa, pool_scrub_type_t type);
460fa9e406ahrens/* spa syncing */
461fa9e406ahrensextern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
462fa9e406ahrensextern void spa_sync_allpools(void);
464b24ab67Jeff Bonwick#define	SYNC_PASS_DEFERRED_FREE	1	/* defer frees after this pass */
465b24ab67Jeff Bonwick#define	SYNC_PASS_DONT_COMPRESS	4	/* don't compress after this pass */
466b24ab67Jeff Bonwick#define	SYNC_PASS_REWRITE	1	/* rewrite new bps after this pass */
467b24ab67Jeff Bonwick
4683a737e0brendan/* spa namespace global mutex */
4693a737e0brendanextern kmutex_t spa_namespace_lock;
472fa9e406ahrens * SPA configuration functions in spa_config.c
473fa9e406ahrens */
4750373e76bonwick#define	SPA_CONFIG_UPDATE_POOL	0
4760373e76bonwick#define	SPA_CONFIG_UPDATE_VDEVS	1
478c5904d1eschrockextern void spa_config_sync(spa_t *, boolean_t, boolean_t);
479fa9e406ahrensextern void spa_config_load(void);
480fa9e406ahrensextern nvlist_t *spa_all_configs(uint64_t *);
481fa9e406ahrensextern void spa_config_set(spa_t *spa, nvlist_t *config);
482fa9e406ahrensextern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
483fa9e406ahrens    int getstats);
4840373e76bonwickextern void spa_config_update(spa_t *spa, int what);
487fa9e406ahrens * Miscellaneous SPA routines in spa_misc.c
488fa9e406ahrens */
490fa9e406ahrens/* Namespace manipulation */
491fa9e406ahrensextern spa_t *spa_lookup(const char *name);
492468c413Tim Haleyextern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot);
493fa9e406ahrensextern void spa_remove(spa_t *spa);
494fa9e406ahrensextern spa_t *spa_next(spa_t *prev);
496fa9e406ahrens/* Refcount functions */
497fa9e406ahrensextern void spa_open_ref(spa_t *spa, void *tag);
498fa9e406ahrensextern void spa_close(spa_t *spa, void *tag);
499fa9e406ahrensextern boolean_t spa_refcount_zero(spa_t *spa);
5018f18d1fGeorge Wilson#define	SCL_NONE	0x00
502e14bb32Jeff Bonwick#define	SCL_CONFIG	0x01
503e14bb32Jeff Bonwick#define	SCL_STATE	0x02
504e14bb32Jeff Bonwick#define	SCL_L2ARC	0x04		/* hack until L2ARC 2.0 */
505e14bb32Jeff Bonwick#define	SCL_ALLOC	0x08
506e14bb32Jeff Bonwick#define	SCL_ZIO		0x10
507e14bb32Jeff Bonwick#define	SCL_FREE	0x20
508e14bb32Jeff Bonwick#define	SCL_VDEV	0x40
509e14bb32Jeff Bonwick#define	SCL_LOCKS	7
510e14bb32Jeff Bonwick#define	SCL_ALL		((1 << SCL_LOCKS) - 1)
511e14bb32Jeff Bonwick#define	SCL_STATE_ALL	(SCL_STATE | SCL_L2ARC | SCL_ZIO)
512e14bb32Jeff Bonwick
513e14bb32Jeff Bonwick/* Pool configuration locks */
514e14bb32Jeff Bonwickextern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
515e14bb32Jeff Bonwickextern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
516e14bb32Jeff Bonwickextern void spa_config_exit(spa_t *spa, int locks, void *tag);
517e14bb32Jeff Bonwickextern int spa_config_held(spa_t *spa, int locks, krw_t rw);
519fa9e406ahrens/* Pool vdev add/remove lock */
520fa9e406ahrensextern uint64_t spa_vdev_enter(spa_t *spa);
52188ecc94George Wilsonextern uint64_t spa_vdev_config_enter(spa_t *spa);
52288ecc94George Wilsonextern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
52388ecc94George Wilson    int error, char *tag);
524fa9e406ahrensextern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
526e14bb32Jeff Bonwick/* Pool vdev state change lock */
5278f18d1fGeorge Wilsonextern void spa_vdev_state_enter(spa_t *spa, int oplock);
528e14bb32Jeff Bonwickextern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
529e14bb32Jeff Bonwick
530b24ab67Jeff Bonwick/* Log state */
531b24ab67Jeff Bonwicktypedef enum spa_log_state {
532b24ab67Jeff Bonwick	SPA_LOG_UNKNOWN = 0,	/* unknown log state */
533b24ab67Jeff Bonwick	SPA_LOG_MISSING,	/* missing log(s) */
534b24ab67Jeff Bonwick	SPA_LOG_CLEAR,		/* clear the log(s) */
535b24ab67Jeff Bonwick	SPA_LOG_GOOD,		/* log(s) are good */
536b24ab67Jeff Bonwick} spa_log_state_t;
537b24ab67Jeff Bonwick
538b24ab67Jeff Bonwickextern spa_log_state_t spa_get_log_state(spa_t *spa);
539b24ab67Jeff Bonwickextern void spa_set_log_state(spa_t *spa, spa_log_state_t state);
540b24ab67Jeff Bonwick
541b24ab67Jeff Bonwick/* Log claim callback */
542b24ab67Jeff Bonwickextern void spa_claim_notify(zio_t *zio);
543b24ab67Jeff Bonwick
544fa9e406ahrens/* Accessor functions */
54588b7b0fMatthew Ahrensextern boolean_t spa_shutting_down(spa_t *spa);
546fa9e406ahrensextern struct dsl_pool *spa_get_dsl(spa_t *spa);
547fa9e406ahrensextern blkptr_t *spa_get_rootblkptr(spa_t *spa);
548fa9e406ahrensextern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
549fa9e406ahrensextern void spa_altroot(spa_t *, char *, size_t);
550fa9e406ahrensextern int spa_sync_pass(spa_t *spa);
551fa9e406ahrensextern char *spa_name(spa_t *spa);
552fa9e406ahrensextern uint64_t spa_guid(spa_t *spa);
553fa9e406ahrensextern uint64_t spa_last_synced_txg(spa_t *spa);
554fa9e406ahrensextern uint64_t spa_first_txg(spa_t *spa);
555b24ab67Jeff Bonwickextern uint64_t spa_syncing_txg(spa_t *spa);
55699653d4eschrockextern uint64_t spa_version(spa_t *spa);
55788b7b0fMatthew Ahrensextern pool_state_t spa_state(spa_t *spa);
558fa9e406ahrensextern uint64_t spa_freeze_txg(spa_t *spa);
559fa9e406ahrensextern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
56044cd46cbillmextern uint64_t spa_version(spa_t *spa);
561b24ab67Jeff Bonwickextern boolean_t spa_deflate(spa_t *spa);
562b24ab67Jeff Bonwickextern metaslab_class_t *spa_normal_class(spa_t *spa);
563b24ab67Jeff Bonwickextern metaslab_class_t *spa_log_class(spa_t *spa);
56444cd46cbillmextern int spa_max_replication(spa_t *spa);
565fa9e406ahrensextern int spa_busy(void);
5660a4e951gwextern uint8_t spa_get_failmode(spa_t *spa);
567e14bb32Jeff Bonwickextern boolean_t spa_suspended(spa_t *spa);
568b24ab67Jeff Bonwickextern uint64_t spa_bootfs(spa_t *spa);
569b24ab67Jeff Bonwickextern uint64_t spa_delegation(spa_t *spa);
570b24ab67Jeff Bonwickextern objset_t *spa_meta_objset(spa_t *spa);
571b24ab67Jeff Bonwickextern enum zio_checksum spa_dedup_checksum(spa_t *spa);
573fa9e406ahrens/* Miscellaneous support routines */
574fa9e406ahrensextern int spa_rename(const char *oldname, const char *newname);
575fa9e406ahrensextern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
576fa9e406ahrensextern char *spa_strdup(const char *);
577fa9e406ahrensextern void spa_strfree(char *);
578fa9e406ahrensextern uint64_t spa_get_random(uint64_t range);
579b24ab67Jeff Bonwickextern void sprintf_blkptr(char *buf, const blkptr_t *bp);
580fa9e406ahrensextern void spa_freeze(spa_t *spa);
581990b485llingextern void spa_upgrade(spa_t *spa, uint64_t version);
582fa9e406ahrensextern void spa_evict_all(void);
583c5904d1eschrockextern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
584c5904d1eschrock    boolean_t l2cache);
58599653d4eschrockextern boolean_t spa_has_spare(spa_t *, uint64_t guid);
586b24ab67Jeff Bonwickextern uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva);
587b24ab67Jeff Bonwickextern uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp);
588b24ab67Jeff Bonwickextern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp);
5896ce0521perrinextern boolean_t spa_has_slogs(spa_t *spa);
590bf82a41eschrockextern boolean_t spa_is_root(spa_t *spa);
5918ad4d6dJeff Bonwickextern boolean_t spa_writeable(spa_t *spa);
592468c413Tim Haleyextern void spa_rewind_data_to_nvlist(spa_t *spa, nvlist_t *to);
593468c413Tim Haley
5948ad4d6dJeff Bonwickextern int spa_mode(spa_t *spa);
595ca45db4Chris Kirbyextern uint64_t strtonum(const char *str, char **nptr);
59706eeb2aek/* history logging */
598ecd6cf8markstypedef enum history_log_type {
599ecd6cf8marks	LOG_CMD_POOL_CREATE,
600ecd6cf8marks	LOG_CMD_NORMAL,
601ecd6cf8marks	LOG_INTERNAL
602ecd6cf8marks} history_log_type_t;
604ecd6cf8markstypedef struct history_arg {
605ecd6cf8marks	const char *ha_history_str;
606ecd6cf8marks	history_log_type_t ha_log_type;
607ecd6cf8marks	history_internal_events_t ha_event;
608ecd6cf8marks	char ha_zone[MAXPATHLEN];
609ecd6cf8marks} history_arg_t;
611ecd6cf8marksextern char *spa_his_ievent_table[];
61306eeb2aekextern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
61406eeb2aekextern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
61506eeb2aek    char *his_buf);
61606eeb2aekextern int spa_history_log(spa_t *spa, const char *his_buf,
617ecd6cf8marks    history_log_type_t what);
618c8e1f6dMark J Musanteextern void spa_history_internal_log(history_internal_events_t event,
619c8e1f6dMark J Musante    spa_t *spa, dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
620c8e1f6dMark J Musanteextern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
622ea8dc4beschrock/* error handling */
623ea8dc4beschrockstruct zbookmark;
624b24ab67Jeff Bonwickextern void spa_log_error(spa_t *spa, zio_t *zio);
625ea8dc4beschrockextern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
626b24ab67Jeff Bonwick    zio_t *zio, uint64_t stateoroffset, uint64_t length);
6273d7072feschrockextern void zfs_post_remove(spa_t *spa, vdev_t *vd);
628069f55eEric Schrockextern void zfs_post_state_change(spa_t *spa, vdev_t *vd);
6293d7072feschrockextern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
630ea8dc4beschrockextern uint64_t spa_get_errlog_size(spa_t *spa);
631ea8dc4beschrockextern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
632ea8dc4beschrockextern void spa_errlog_rotate(spa_t *spa);
633ea8dc4beschrockextern void spa_errlog_drain(spa_t *spa);
634ea8dc4beschrockextern void spa_errlog_sync(spa_t *spa, uint64_t txg);
635ea8dc4beschrockextern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
63787db74cek/* vdev cache */
63887db74cekextern void vdev_cache_stat_init(void);
63987db74cekextern void vdev_cache_stat_fini(void);
641fa9e406ahrens/* Initialization and termination */
642fa9e406ahrensextern void spa_init(int flags);
643fa9e406ahrensextern void spa_fini(void);
644e7cbe64gwextern void spa_boot_init();
646b1b8ab3lling/* properties */
647990b485llingextern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
648990b485llingextern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
649990b485llingextern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
650379c004Eric Schrockextern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);
6523d7072feschrock/* asynchronous event notification */
6533d7072feschrockextern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
655fa9e406ahrens#ifdef ZFS_DEBUG
656c0a8126ek#define	dprintf_bp(bp, fmt, ...) do {				\
657c0a8126ek	if (zfs_flags & ZFS_DEBUG_DPRINTF) { 			\
658c0a8126ek	char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP);	\
659b24ab67Jeff Bonwick	sprintf_blkptr(__blkbuf, (bp));				\
660c0a8126ek	dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf);		\
661c0a8126ek	kmem_free(__blkbuf, BP_SPRINTF_LEN);			\
662fa9e406ahrens	} \
663fa9e406ahrens_NOTE(CONSTCOND) } while (0)
665fa9e406ahrens#define	dprintf_bp(bp, fmt, ...)
6688ad4d6dJeff Bonwickextern int spa_mode_global;			/* mode, e.g. FREAD | FWRITE */
670fa9e406ahrens#ifdef	__cplusplus
674fa9e406ahrens#endif	/* _SYS_SPA_H */