spa.h revision c8e1f6d2e1adb3a8cc413859aaa8c61813665ac6
17c478bdstevel@tonic-gate/*
27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
5aa4a4f3nf * Common Development and Distribution License (the "License").
6aa4a4f3nf * You may not use this file except in compliance with the License.
77c478bdstevel@tonic-gate *
87c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bdstevel@tonic-gate * See the License for the specific language governing permissions
117c478bdstevel@tonic-gate * and limitations under the License.
127c478bdstevel@tonic-gate *
137c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bdstevel@tonic-gate *
197c478bdstevel@tonic-gate * CDDL HEADER END
207c478bdstevel@tonic-gate */
217c478bdstevel@tonic-gate/*
220fbb751John Levon * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bdstevel@tonic-gate * Use is subject to license terms.
247c478bdstevel@tonic-gate */
257c478bdstevel@tonic-gate
267c478bdstevel@tonic-gate#ifndef _SYS_SPA_H
277c478bdstevel@tonic-gate#define	_SYS_SPA_H
287c478bdstevel@tonic-gate
290209230gjelinek#include <sys/avl.h>
307c478bdstevel@tonic-gate#include <sys/zfs_context.h>
317c478bdstevel@tonic-gate#include <sys/nvpair.h>
327c478bdstevel@tonic-gate#include <sys/sysmacros.h>
337c478bdstevel@tonic-gate#include <sys/types.h>
347c478bdstevel@tonic-gate#include <sys/fs/zfs.h>
357c478bdstevel@tonic-gate
367c478bdstevel@tonic-gate#ifdef	__cplusplus
377c478bdstevel@tonic-gateextern "C" {
387c478bdstevel@tonic-gate#endif
397c478bdstevel@tonic-gate
407c478bdstevel@tonic-gate/*
417c478bdstevel@tonic-gate * Forward references that lots of things need.
427c478bdstevel@tonic-gate */
437c478bdstevel@tonic-gatetypedef struct spa spa_t;
447c478bdstevel@tonic-gatetypedef struct vdev vdev_t;
457c478bdstevel@tonic-gatetypedef struct metaslab metaslab_t;
467c478bdstevel@tonic-gatetypedef struct zilog zilog_t;
477c478bdstevel@tonic-gatetypedef struct spa_aux_vdev spa_aux_vdev_t;
487c478bdstevel@tonic-gatestruct dsl_pool;
497c478bdstevel@tonic-gate
507c478bdstevel@tonic-gate/*
517c478bdstevel@tonic-gate * General-purpose 32-bit and 64-bit bitfield encodings.
527c478bdstevel@tonic-gate */
537c478bdstevel@tonic-gate#define	BF32_DECODE(x, low, len)	P2PHASE((x) >> (low), 1U << (len))
547c478bdstevel@tonic-gate#define	BF64_DECODE(x, low, len)	P2PHASE((x) >> (low), 1ULL << (len))
557c478bdstevel@tonic-gate#define	BF32_ENCODE(x, low, len)	(P2PHASE((x), 1U << (len)) << (low))
567c478bdstevel@tonic-gate#define	BF64_ENCODE(x, low, len)	(P2PHASE((x), 1ULL << (len)) << (low))
577c478bdstevel@tonic-gate
587c478bdstevel@tonic-gate#define	BF32_GET(x, low, len)		BF32_DECODE(x, low, len)
597c478bdstevel@tonic-gate#define	BF64_GET(x, low, len)		BF64_DECODE(x, low, len)
607c478bdstevel@tonic-gate
617c478bdstevel@tonic-gate#define	BF32_SET(x, low, len, val)	\
627c478bdstevel@tonic-gate	((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
637c478bdstevel@tonic-gate#define	BF64_SET(x, low, len, val)	\
647c478bdstevel@tonic-gate	((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
657c478bdstevel@tonic-gate
667c478bdstevel@tonic-gate#define	BF32_GET_SB(x, low, len, shift, bias)	\
677c478bdstevel@tonic-gate	((BF32_GET(x, low, len) + (bias)) << (shift))
687c478bdstevel@tonic-gate#define	BF64_GET_SB(x, low, len, shift, bias)	\
697c478bdstevel@tonic-gate	((BF64_GET(x, low, len) + (bias)) << (shift))
707c478bdstevel@tonic-gate
717c478bdstevel@tonic-gate#define	BF32_SET_SB(x, low, len, shift, bias, val)	\
727c478bdstevel@tonic-gate	BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
737c478bdstevel@tonic-gate#define	BF64_SET_SB(x, low, len, shift, bias, val)	\
747c478bdstevel@tonic-gate	BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
757c478bdstevel@tonic-gate
767c478bdstevel@tonic-gate/*
777c478bdstevel@tonic-gate * We currently support nine block sizes, from 512 bytes to 128K.
787c478bdstevel@tonic-gate * We could go higher, but the benefits are near-zero and the cost
797c478bdstevel@tonic-gate * of COWing a giant block to modify one byte would become excessive.
807c478bdstevel@tonic-gate */
817c478bdstevel@tonic-gate#define	SPA_MINBLOCKSHIFT	9
827c478bdstevel@tonic-gate#define	SPA_MAXBLOCKSHIFT	17
837c478bdstevel@tonic-gate#define	SPA_MINBLOCKSIZE	(1ULL << SPA_MINBLOCKSHIFT)
847c478bdstevel@tonic-gate#define	SPA_MAXBLOCKSIZE	(1ULL << SPA_MAXBLOCKSHIFT)
857c478bdstevel@tonic-gate
867c478bdstevel@tonic-gate#define	SPA_BLOCKSIZES		(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
877c478bdstevel@tonic-gate
887c478bdstevel@tonic-gate/*
897c478bdstevel@tonic-gate * Size of block to hold the configuration data (a packed nvlist)
907c478bdstevel@tonic-gate */
917c478bdstevel@tonic-gate#define	SPA_CONFIG_BLOCKSIZE	(1 << 14)
927c478bdstevel@tonic-gate
937c478bdstevel@tonic-gate/*
947c478bdstevel@tonic-gate * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
957c478bdstevel@tonic-gate * The ASIZE encoding should be at least 64 times larger (6 more bits)
967c478bdstevel@tonic-gate * to support up to 4-way RAID-Z mirror mode with worst-case gang block
977c478bdstevel@tonic-gate * overhead, three DVAs per bp, plus one more bit in case we do anything
987c478bdstevel@tonic-gate * else that expands the ASIZE.
997c478bdstevel@tonic-gate */
1007c478bdstevel@tonic-gate#define	SPA_LSIZEBITS		16	/* LSIZE up to 32M (2^16 * 512)	*/
1017c478bdstevel@tonic-gate#define	SPA_PSIZEBITS		16	/* PSIZE up to 32M (2^16 * 512)	*/
1027c478bdstevel@tonic-gate#define	SPA_ASIZEBITS		24	/* ASIZE up to 64 times larger	*/
1037c478bdstevel@tonic-gate
1047c478bdstevel@tonic-gate/*
1057c478bdstevel@tonic-gate * All SPA data is represented by 128-bit data virtual addresses (DVAs).
1067c478bdstevel@tonic-gate * The members of the dva_t should be considered opaque outside the SPA.
1077c478bdstevel@tonic-gate */
1087c478bdstevel@tonic-gatetypedef struct dva {
1097c478bdstevel@tonic-gate	uint64_t	dva_word[2];
1107c478bdstevel@tonic-gate} dva_t;
1117c478bdstevel@tonic-gate
1127c478bdstevel@tonic-gate/*
1137c478bdstevel@tonic-gate * Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
1147c478bdstevel@tonic-gate */
1157c478bdstevel@tonic-gatetypedef struct zio_cksum {
1167c478bdstevel@tonic-gate	uint64_t	zc_word[4];
1177c478bdstevel@tonic-gate} zio_cksum_t;
1187c478bdstevel@tonic-gate
1197c478bdstevel@tonic-gate/*
1207c478bdstevel@tonic-gate * Each block is described by its DVAs, time of birth, checksum, etc.
1217c478bdstevel@tonic-gate * The word-by-word, bit-by-bit layout of the blkptr is as follows:
1227c478bdstevel@tonic-gate *
1237c478bdstevel@tonic-gate *	64	56	48	40	32	24	16	8	0
1247c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1257c478bdstevel@tonic-gate * 0	|		vdev1		| GRID  |	  ASIZE		|
1267c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1277c478bdstevel@tonic-gate * 1	|G|			 offset1				|
1287c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1297c478bdstevel@tonic-gate * 2	|		vdev2		| GRID  |	  ASIZE		|
1307c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1317c478bdstevel@tonic-gate * 3	|G|			 offset2				|
1327c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1337c478bdstevel@tonic-gate * 4	|		vdev3		| GRID  |	  ASIZE		|
1347c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1357c478bdstevel@tonic-gate * 5	|G|			 offset3				|
1367c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1377c478bdstevel@tonic-gate * 6	|E| lvl | type	| cksum | comp	|     PSIZE	|     LSIZE	|
1387c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1397c478bdstevel@tonic-gate * 7	|			padding					|
1407c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1417c478bdstevel@tonic-gate * 8	|			padding					|
1427c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1437c478bdstevel@tonic-gate * 9	|			padding					|
1447c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1457c478bdstevel@tonic-gate * a	|			birth txg				|
1467c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1477c478bdstevel@tonic-gate * b	|			fill count				|
1487c478bdstevel@tonic-gate *	+-------+-------+-------+-------+-------+-------+-------+-------+
1497c478bdstevel@tonic-gate * c	|			checksum[0]				|
150532877crd *	+-------+-------+-------+-------+-------+-------+-------+-------+
151532877crd * d	|			checksum[1]				|
152532877crd *	+-------+-------+-------+-------+-------+-------+-------+-------+
153532877crd * e	|			checksum[2]				|
154532877crd *	+-------+-------+-------+-------+-------+-------+-------+-------+
155532877crd * f	|			checksum[3]				|
156532877crd *	+-------+-------+-------+-------+-------+-------+-------+-------+
157532877crd *
158532877crd * Legend:
159532877crd *
160532877crd * vdev		virtual device ID
161532877crd * offset	offset into virtual device
162532877crd * LSIZE	logical size
163532877crd * PSIZE	physical size (after compression)
164532877crd * ASIZE	allocated size (including RAID-Z parity and gang block headers)
165532877crd * GRID		RAID-Z layout information (reserved for future use)
166532877crd * cksum	checksum function
167532877crd * comp		compression function
168532877crd * G		gang block indicator
169532877crd * E		endianness
170532877crd * type		DMU object type
171532877crd * lvl		level of indirection
172532877crd * birth txg	transaction group in which the block was born
173532877crd * fill count	number of non-zero blocks under this bp
174532877crd * checksum[4]	256-bit checksum of the data this bp describes
175532877crd */
176532877crdtypedef struct blkptr {
177532877crd	dva_t		blk_dva[3];	/* 128-bit Data Virtual Address	*/
178532877crd	uint64_t	blk_prop;	/* size, compression, type, etc	*/
179532877crd	uint64_t	blk_pad[3];	/* Extra space for the future	*/
180532877crd	uint64_t	blk_birth;	/* transaction group at birth	*/
181532877crd	uint64_t	blk_fill;	/* fill count			*/
182532877crd	zio_cksum_t	blk_cksum;	/* 256-bit checksum		*/
183532877crd} blkptr_t;
184532877crd
1857c478bdstevel@tonic-gate#define	SPA_BLKPTRSHIFT	7		/* blkptr_t is 128 bytes	*/
1867c478bdstevel@tonic-gate#define	SPA_DVAS_PER_BP	3		/* Number of DVAs in a bp	*/
1877c478bdstevel@tonic-gate
1887c478bdstevel@tonic-gate/*
1897c478bdstevel@tonic-gate * Macros to get and set fields in a bp or DVA.
1907c478bdstevel@tonic-gate */
1917c478bdstevel@tonic-gate#define	DVA_GET_ASIZE(dva)	\
1927c478bdstevel@tonic-gate	BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
1937c478bdstevel@tonic-gate#define	DVA_SET_ASIZE(dva, x)	\
1947c478bdstevel@tonic-gate	BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
1957c478bdstevel@tonic-gate
1967c478bdstevel@tonic-gate#define	DVA_GET_GRID(dva)	BF64_GET((dva)->dva_word[0], 24, 8)
1977c478bdstevel@tonic-gate#define	DVA_SET_GRID(dva, x)	BF64_SET((dva)->dva_word[0], 24, 8, x)
1987c478bdstevel@tonic-gate
1997c478bdstevel@tonic-gate#define	DVA_GET_VDEV(dva)	BF64_GET((dva)->dva_word[0], 32, 32)
2007c478bdstevel@tonic-gate#define	DVA_SET_VDEV(dva, x)	BF64_SET((dva)->dva_word[0], 32, 32, x)
2017c478bdstevel@tonic-gate
2027c478bdstevel@tonic-gate#define	DVA_GET_OFFSET(dva)	\
2037c478bdstevel@tonic-gate	BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
2047c478bdstevel@tonic-gate#define	DVA_SET_OFFSET(dva, x)	\
2057c478bdstevel@tonic-gate	BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
2067c478bdstevel@tonic-gate
2077c478bdstevel@tonic-gate#define	DVA_GET_GANG(dva)	BF64_GET((dva)->dva_word[1], 63, 1)
2087c478bdstevel@tonic-gate#define	DVA_SET_GANG(dva, x)	BF64_SET((dva)->dva_word[1], 63, 1, x)
2097c478bdstevel@tonic-gate
2107c478bdstevel@tonic-gate#define	BP_GET_LSIZE(bp)	\
2117c478bdstevel@tonic-gate	(BP_IS_HOLE(bp) ? 0 : \
2127c478bdstevel@tonic-gate	BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
2137c478bdstevel@tonic-gate#define	BP_SET_LSIZE(bp, x)	\
2147c478bdstevel@tonic-gate	BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
2157c478bdstevel@tonic-gate
2167c478bdstevel@tonic-gate#define	BP_GET_PSIZE(bp)	\
2177c478bdstevel@tonic-gate	BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
2187c478bdstevel@tonic-gate#define	BP_SET_PSIZE(bp, x)	\
2197c478bdstevel@tonic-gate	BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
2207c478bdstevel@tonic-gate
2217c478bdstevel@tonic-gate#define	BP_GET_COMPRESS(bp)	BF64_GET((bp)->blk_prop, 32, 8)
2227c478bdstevel@tonic-gate#define	BP_SET_COMPRESS(bp, x)	BF64_SET((bp)->blk_prop, 32, 8, x)
2237c478bdstevel@tonic-gate
2247c478bdstevel@tonic-gate#define	BP_GET_CHECKSUM(bp)	BF64_GET((bp)->blk_prop, 40, 8)
2257c478bdstevel@tonic-gate#define	BP_SET_CHECKSUM(bp, x)	BF64_SET((bp)->blk_prop, 40, 8, x)
2267c478bdstevel@tonic-gate
2277c478bdstevel@tonic-gate#define	BP_GET_TYPE(bp)		BF64_GET((bp)->blk_prop, 48, 8)
2287c478bdstevel@tonic-gate#define	BP_SET_TYPE(bp, x)	BF64_SET((bp)->blk_prop, 48, 8, x)
2297c478bdstevel@tonic-gate
2307c478bdstevel@tonic-gate#define	BP_GET_LEVEL(bp)	BF64_GET((bp)->blk_prop, 56, 5)
2317c478bdstevel@tonic-gate#define	BP_SET_LEVEL(bp, x)	BF64_SET((bp)->blk_prop, 56, 5, x)
2327c478bdstevel@tonic-gate
2337c478bdstevel@tonic-gate#define	BP_GET_BYTEORDER(bp)	(0 - BF64_GET((bp)->blk_prop, 63, 1))
2347c478bdstevel@tonic-gate#define	BP_SET_BYTEORDER(bp, x)	BF64_SET((bp)->blk_prop, 63, 1, x)
2357c478bdstevel@tonic-gate
2367c478bdstevel@tonic-gate#define	BP_GET_ASIZE(bp)	\
2377c478bdstevel@tonic-gate	(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
2387c478bdstevel@tonic-gate		DVA_GET_ASIZE(&(bp)->blk_dva[2]))
2397c478bdstevel@tonic-gate
2407c478bdstevel@tonic-gate#define	BP_GET_UCSIZE(bp) \
2417c478bdstevel@tonic-gate	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
2427c478bdstevel@tonic-gate	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
2437c478bdstevel@tonic-gate
2447c478bdstevel@tonic-gate#define	BP_GET_NDVAS(bp)	\
2457c478bdstevel@tonic-gate	(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
2467c478bdstevel@tonic-gate	!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
2477c478bdstevel@tonic-gate	!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
2487c478bdstevel@tonic-gate
2497c478bdstevel@tonic-gate#define	BP_COUNT_GANG(bp)	\
2507c478bdstevel@tonic-gate	(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
2517c478bdstevel@tonic-gate	DVA_GET_GANG(&(bp)->blk_dva[1]) + \
2527c478bdstevel@tonic-gate	DVA_GET_GANG(&(bp)->blk_dva[2]))
2537c478bdstevel@tonic-gate
2547c478bdstevel@tonic-gate#define	DVA_EQUAL(dva1, dva2)	\
2557c478bdstevel@tonic-gate	((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
2567c478bdstevel@tonic-gate	(dva1)->dva_word[0] == (dva2)->dva_word[0])
2577c478bdstevel@tonic-gate
2587c478bdstevel@tonic-gate#define	ZIO_CHECKSUM_EQUAL(zc1, zc2) \
2597c478bdstevel@tonic-gate	(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
2607c478bdstevel@tonic-gate	((zc1).zc_word[1] - (zc2).zc_word[1]) | \
2617c478bdstevel@tonic-gate	((zc1).zc_word[2] - (zc2).zc_word[2]) | \
2627c478bdstevel@tonic-gate	((zc1).zc_word[3] - (zc2).zc_word[3])))
2637c478bdstevel@tonic-gate
2647c478bdstevel@tonic-gate#define	DVA_IS_VALID(dva)	(DVA_GET_ASIZE(dva) != 0)
2657c478bdstevel@tonic-gate
2667c478bdstevel@tonic-gate#define	ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3)	\
2677c478bdstevel@tonic-gate{						\
2687c478bdstevel@tonic-gate	(zcp)->zc_word[0] = w0;			\
2697c478bdstevel@tonic-gate	(zcp)->zc_word[1] = w1;			\
2707c478bdstevel@tonic-gate	(zcp)->zc_word[2] = w2;			\
2717c478bdstevel@tonic-gate	(zcp)->zc_word[3] = w3;			\
2727c478bdstevel@tonic-gate}
2737c478bdstevel@tonic-gate
2747c478bdstevel@tonic-gate#define	BP_IDENTITY(bp)		(&(bp)->blk_dva[0])
2757c478bdstevel@tonic-gate#define	BP_IS_GANG(bp)		DVA_GET_GANG(BP_IDENTITY(bp))
2767c478bdstevel@tonic-gate#define	BP_IS_HOLE(bp)		((bp)->blk_birth == 0)
2777c478bdstevel@tonic-gate#define	BP_IS_OLDER(bp, txg)	(!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
2787c478bdstevel@tonic-gate
2797c478bdstevel@tonic-gate#define	BP_ZERO(bp)				\
2807c478bdstevel@tonic-gate{						\
2817c478bdstevel@tonic-gate	(bp)->blk_dva[0].dva_word[0] = 0;	\
2827c478bdstevel@tonic-gate	(bp)->blk_dva[0].dva_word[1] = 0;	\
2837c478bdstevel@tonic-gate	(bp)->blk_dva[1].dva_word[0] = 0;	\
2847c478bdstevel@tonic-gate	(bp)->blk_dva[1].dva_word[1] = 0;	\
2857c478bdstevel@tonic-gate	(bp)->blk_dva[2].dva_word[0] = 0;	\
2867c478bdstevel@tonic-gate	(bp)->blk_dva[2].dva_word[1] = 0;	\
2877c478bdstevel@tonic-gate	(bp)->blk_prop = 0;			\
2887c478bdstevel@tonic-gate	(bp)->blk_pad[0] = 0;			\
2897c478bdstevel@tonic-gate	(bp)->blk_pad[1] = 0;			\
2907c478bdstevel@tonic-gate	(bp)->blk_pad[2] = 0;			\
2917c478bdstevel@tonic-gate	(bp)->blk_birth = 0;			\
2927c478bdstevel@tonic-gate	(bp)->blk_fill = 0;			\
2937c478bdstevel@tonic-gate	ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0);	\
2947c478bdstevel@tonic-gate}
2957c478bdstevel@tonic-gate
2967c478bdstevel@tonic-gate#define	BLK_FILL_ALREADY_FREED	(-1ULL)
2977c478bdstevel@tonic-gate
2987c478bdstevel@tonic-gate/*
2997c478bdstevel@tonic-gate * Note: the byteorder is either 0 or -1, both of which are palindromes.
3007c478bdstevel@tonic-gate * This simplifies the endianness handling a bit.
3017c478bdstevel@tonic-gate */
3027c478bdstevel@tonic-gate#ifdef _BIG_ENDIAN
3037c478bdstevel@tonic-gate#define	ZFS_HOST_BYTEORDER	(0ULL)
3047c478bdstevel@tonic-gate#else
3057c478bdstevel@tonic-gate#define	ZFS_HOST_BYTEORDER	(-1ULL)
3067c478bdstevel@tonic-gate#endif
3077c478bdstevel@tonic-gate
3087c478bdstevel@tonic-gate#define	BP_SHOULD_BYTESWAP(bp)	(BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
3097c478bdstevel@tonic-gate
3107c478bdstevel@tonic-gate#define	BP_SPRINTF_LEN	320
3117c478bdstevel@tonic-gate
3127c478bdstevel@tonic-gate#include <sys/dmu.h>
3137c478bdstevel@tonic-gate
3147c478bdstevel@tonic-gate#define	BP_GET_BUFC_TYPE(bp)						\
3157c478bdstevel@tonic-gate	(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
3167c478bdstevel@tonic-gate	ARC_BUFC_METADATA : ARC_BUFC_DATA);
3177c478bdstevel@tonic-gate/*
3187c478bdstevel@tonic-gate * Routines found in spa.c
3197c478bdstevel@tonic-gate */
3207c478bdstevel@tonic-gate
3217c478bdstevel@tonic-gate/* state manipulation functions */
3227c478bdstevel@tonic-gateextern int spa_open(const char *pool, spa_t **, void *tag);
3237c478bdstevel@tonic-gateextern int spa_get_stats(const char *pool, nvlist_t **config,
3247c478bdstevel@tonic-gate    char *altroot, size_t buflen);
3257c478bdstevel@tonic-gateextern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
3267c478bdstevel@tonic-gate    const char *history_str, nvlist_t *zplprops);
3277c478bdstevel@tonic-gateextern int spa_import_rootpool(char *devpath, char *devid);
3287c478bdstevel@tonic-gateextern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
3297c478bdstevel@tonic-gateextern int spa_import_verbatim(const char *, nvlist_t *, nvlist_t *);
3307c478bdstevel@tonic-gateextern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
3317c478bdstevel@tonic-gateextern int spa_destroy(char *pool);
3327c478bdstevel@tonic-gateextern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
3337c478bdstevel@tonic-gate    boolean_t hardforce);
3347c478bdstevel@tonic-gateextern int spa_reset(char *pool);
3357c478bdstevel@tonic-gateextern void spa_async_request(spa_t *spa, int flag);
3367c478bdstevel@tonic-gateextern void spa_async_unrequest(spa_t *spa, int flag);
3377c478bdstevel@tonic-gateextern void spa_async_suspend(spa_t *spa);
3387c478bdstevel@tonic-gateextern void spa_async_resume(spa_t *spa);
3397c478bdstevel@tonic-gateextern spa_t *spa_inject_addref(char *pool);
3407c478bdstevel@tonic-gateextern void spa_inject_delref(spa_t *spa);
3417c478bdstevel@tonic-gate
3427c478bdstevel@tonic-gate#define	SPA_ASYNC_CONFIG_UPDATE	0x01
3437c478bdstevel@tonic-gate#define	SPA_ASYNC_REMOVE	0x02
3447c478bdstevel@tonic-gate#define	SPA_ASYNC_PROBE		0x04
3457c478bdstevel@tonic-gate#define	SPA_ASYNC_RESILVER_DONE	0x08
3467c478bdstevel@tonic-gate#define	SPA_ASYNC_RESILVER	0x10
3477c478bdstevel@tonic-gate#define	SPA_ASYNC_AUTOEXPAND	0x20
3487c478bdstevel@tonic-gate
3497c478bdstevel@tonic-gate/* device manipulation */
3507c478bdstevel@tonic-gateextern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
3517c478bdstevel@tonic-gateextern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
3527c478bdstevel@tonic-gate    int replacing);
3537c478bdstevel@tonic-gateextern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
3547c478bdstevel@tonic-gate    int replace_done);
3557c478bdstevel@tonic-gateextern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
3567c478bdstevel@tonic-gateextern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
3577c478bdstevel@tonic-gateextern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
3587c478bdstevel@tonic-gate
3597c478bdstevel@tonic-gate/* spare state (which is global across all pools) */
3607c478bdstevel@tonic-gateextern void spa_spare_add(vdev_t *vd);
3617c478bdstevel@tonic-gateextern void spa_spare_remove(vdev_t *vd);
3627c478bdstevel@tonic-gateextern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt);
3637c478bdstevel@tonic-gateextern void spa_spare_activate(vdev_t *vd);
3647c478bdstevel@tonic-gate
3657c478bdstevel@tonic-gate/* L2ARC state (which is global across all pools) */
3667c478bdstevel@tonic-gateextern void spa_l2cache_add(vdev_t *vd);
3677c478bdstevel@tonic-gateextern void spa_l2cache_remove(vdev_t *vd);
3687c478bdstevel@tonic-gateextern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
3697c478bdstevel@tonic-gateextern void spa_l2cache_activate(vdev_t *vd);
3707c478bdstevel@tonic-gateextern void spa_l2cache_drop(spa_t *spa);
3717c478bdstevel@tonic-gateextern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
3727c478bdstevel@tonic-gate
3737c478bdstevel@tonic-gate/* scrubbing */
3747c478bdstevel@tonic-gateextern int spa_scrub(spa_t *spa, pool_scrub_type_t type);
3757c478bdstevel@tonic-gate
3767c478bdstevel@tonic-gate/* spa syncing */
3777c478bdstevel@tonic-gateextern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
3787c478bdstevel@tonic-gateextern void spa_sync_allpools(void);
3797c478bdstevel@tonic-gate
3807c478bdstevel@tonic-gate/* spa namespace global mutex */
3817c478bdstevel@tonic-gateextern kmutex_t spa_namespace_lock;
3827c478bdstevel@tonic-gate
3837c478bdstevel@tonic-gate/*
3847c478bdstevel@tonic-gate * SPA configuration functions in spa_config.c
3857c478bdstevel@tonic-gate */
3867c478bdstevel@tonic-gate
3877c478bdstevel@tonic-gate#define	SPA_CONFIG_UPDATE_POOL	0
3887c478bdstevel@tonic-gate#define	SPA_CONFIG_UPDATE_VDEVS	1
3897c478bdstevel@tonic-gate
3907c478bdstevel@tonic-gateextern void spa_config_sync(spa_t *, boolean_t, boolean_t);
3917c478bdstevel@tonic-gateextern void spa_config_load(void);
3927c478bdstevel@tonic-gateextern nvlist_t *spa_all_configs(uint64_t *);
3937c478bdstevel@tonic-gateextern void spa_config_set(spa_t *spa, nvlist_t *config);
3947c478bdstevel@tonic-gateextern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
3957c478bdstevel@tonic-gate    int getstats);
3967c478bdstevel@tonic-gateextern void spa_config_update(spa_t *spa, int what);
3977c478bdstevel@tonic-gateextern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
3987c478bdstevel@tonic-gate
3997c478bdstevel@tonic-gate/*
4007c478bdstevel@tonic-gate * Miscellaneous SPA routines in spa_misc.c
4017c478bdstevel@tonic-gate */
4027c478bdstevel@tonic-gate
4037c478bdstevel@tonic-gate/* Namespace manipulation */
4047c478bdstevel@tonic-gateextern spa_t *spa_lookup(const char *name);
4057c478bdstevel@tonic-gateextern spa_t *spa_add(const char *name, const char *altroot);
4067c478bdstevel@tonic-gateextern void spa_remove(spa_t *spa);
4077c478bdstevel@tonic-gateextern spa_t *spa_next(spa_t *prev);
4087c478bdstevel@tonic-gate
4097c478bdstevel@tonic-gate/* Refcount functions */
4107c478bdstevel@tonic-gateextern void spa_open_ref(spa_t *spa, void *tag);
4117c478bdstevel@tonic-gateextern void spa_close(spa_t *spa, void *tag);
4127c478bdstevel@tonic-gateextern boolean_t spa_refcount_zero(spa_t *spa);
4137c478bdstevel@tonic-gate
4147c478bdstevel@tonic-gate#define	SCL_CONFIG	0x01
4157c478bdstevel@tonic-gate#define	SCL_STATE	0x02
4167c478bdstevel@tonic-gate#define	SCL_L2ARC	0x04		/* hack until L2ARC 2.0 */
4177c478bdstevel@tonic-gate#define	SCL_ALLOC	0x08
4187c478bdstevel@tonic-gate#define	SCL_ZIO		0x10
4197c478bdstevel@tonic-gate#define	SCL_FREE	0x20
4207c478bdstevel@tonic-gate#define	SCL_VDEV	0x40
4217c478bdstevel@tonic-gate#define	SCL_LOCKS	7
4227c478bdstevel@tonic-gate#define	SCL_ALL		((1 << SCL_LOCKS) - 1)
4237c478bdstevel@tonic-gate#define	SCL_STATE_ALL	(SCL_STATE | SCL_L2ARC | SCL_ZIO)
4247c478bdstevel@tonic-gate
4257c478bdstevel@tonic-gate/* Pool configuration locks */
4267c478bdstevel@tonic-gateextern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
4277c478bdstevel@tonic-gateextern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
4287c478bdstevel@tonic-gateextern void spa_config_exit(spa_t *spa, int locks, void *tag);
4297c478bdstevel@tonic-gateextern int spa_config_held(spa_t *spa, int locks, krw_t rw);
4307c478bdstevel@tonic-gate
4317c478bdstevel@tonic-gate/* Pool vdev add/remove lock */
4327c478bdstevel@tonic-gateextern uint64_t spa_vdev_enter(spa_t *spa);
4337c478bdstevel@tonic-gateextern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
4347c478bdstevel@tonic-gate
4357c478bdstevel@tonic-gate/* Pool vdev state change lock */
4367c478bdstevel@tonic-gateextern void spa_vdev_state_enter(spa_t *spa);
4377c478bdstevel@tonic-gateextern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
4387c478bdstevel@tonic-gate
4397c478bdstevel@tonic-gate/* Accessor functions */
4407c478bdstevel@tonic-gateextern boolean_t spa_shutting_down(spa_t *spa);
4417c478bdstevel@tonic-gateextern struct dsl_pool *spa_get_dsl(spa_t *spa);
4427c478bdstevel@tonic-gateextern blkptr_t *spa_get_rootblkptr(spa_t *spa);
4437c478bdstevel@tonic-gateextern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
4447c478bdstevel@tonic-gateextern void spa_altroot(spa_t *, char *, size_t);
4457c478bdstevel@tonic-gateextern int spa_sync_pass(spa_t *spa);
4467c478bdstevel@tonic-gateextern char *spa_name(spa_t *spa);
4477c478bdstevel@tonic-gateextern uint64_t spa_guid(spa_t *spa);
4487c478bdstevel@tonic-gateextern uint64_t spa_last_synced_txg(spa_t *spa);
4497c478bdstevel@tonic-gateextern uint64_t spa_first_txg(spa_t *spa);
4507c478bdstevel@tonic-gateextern uint64_t spa_version(spa_t *spa);
4517c478bdstevel@tonic-gateextern pool_state_t spa_state(spa_t *spa);
4527c478bdstevel@tonic-gateextern uint64_t spa_freeze_txg(spa_t *spa);
4537c478bdstevel@tonic-gateextern uint64_t spa_get_alloc(spa_t *spa);
4547c478bdstevel@tonic-gateextern uint64_t spa_get_space(spa_t *spa);
4557c478bdstevel@tonic-gateextern uint64_t spa_get_dspace(spa_t *spa);
4567c478bdstevel@tonic-gateextern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
4577c478bdstevel@tonic-gateextern uint64_t spa_version(spa_t *spa);
4587c478bdstevel@tonic-gateextern int spa_max_replication(spa_t *spa);
4597c478bdstevel@tonic-gateextern int spa_busy(void);
4607c478bdstevel@tonic-gateextern uint8_t spa_get_failmode(spa_t *spa);
4617c478bdstevel@tonic-gateextern boolean_t spa_suspended(spa_t *spa);
4627c478bdstevel@tonic-gate
4637c478bdstevel@tonic-gate/* Miscellaneous support routines */
4647c478bdstevel@tonic-gateextern int spa_rename(const char *oldname, const char *newname);
4657c478bdstevel@tonic-gateextern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
4667c478bdstevel@tonic-gateextern char *spa_strdup(const char *);
4677c478bdstevel@tonic-gateextern void spa_strfree(char *);
4687c478bdstevel@tonic-gateextern uint64_t spa_get_random(uint64_t range);
4697c478bdstevel@tonic-gateextern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
4707c478bdstevel@tonic-gateextern void spa_freeze(spa_t *spa);
4717c478bdstevel@tonic-gateextern void spa_upgrade(spa_t *spa, uint64_t version);
4727c478bdstevel@tonic-gateextern void spa_evict_all(void);
4737c478bdstevel@tonic-gateextern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
4747c478bdstevel@tonic-gate    boolean_t l2cache);
4757c478bdstevel@tonic-gateextern boolean_t spa_has_spare(spa_t *, uint64_t guid);
4767c478bdstevel@tonic-gateextern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
4777c478bdstevel@tonic-gateextern boolean_t spa_has_slogs(spa_t *spa);
4787c478bdstevel@tonic-gateextern boolean_t spa_is_root(spa_t *spa);
4797c478bdstevel@tonic-gateextern boolean_t spa_writeable(spa_t *spa);
4807c478bdstevel@tonic-gateextern int spa_mode(spa_t *spa);
4817c478bdstevel@tonic-gate
4827c478bdstevel@tonic-gate/* history logging */
4837c478bdstevel@tonic-gatetypedef enum history_log_type {
4847c478bdstevel@tonic-gate	LOG_CMD_POOL_CREATE,
4857c478bdstevel@tonic-gate	LOG_CMD_NORMAL,
4867c478bdstevel@tonic-gate	LOG_INTERNAL
4877c478bdstevel@tonic-gate} history_log_type_t;
4887c478bdstevel@tonic-gate
4897c478bdstevel@tonic-gatetypedef struct history_arg {
4907c478bdstevel@tonic-gate	const char *ha_history_str;
4917c478bdstevel@tonic-gate	history_log_type_t ha_log_type;
4927c478bdstevel@tonic-gate	history_internal_events_t ha_event;
4937c478bdstevel@tonic-gate	char ha_zone[MAXPATHLEN];
4947c478bdstevel@tonic-gate} history_arg_t;
4957c478bdstevel@tonic-gate
4967c478bdstevel@tonic-gateextern char *spa_his_ievent_table[];
4977c478bdstevel@tonic-gate
4987c478bdstevel@tonic-gateextern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
4997c478bdstevel@tonic-gateextern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
5007c478bdstevel@tonic-gate    char *his_buf);
5017c478bdstevel@tonic-gateextern int spa_history_log(spa_t *spa, const char *his_buf,
5027c478bdstevel@tonic-gate    history_log_type_t what);
5037c478bdstevel@tonic-gateextern void spa_history_internal_log(history_internal_events_t event,
5047c478bdstevel@tonic-gate    spa_t *spa, dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
5057c478bdstevel@tonic-gateextern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
5067c478bdstevel@tonic-gate
5077c478bdstevel@tonic-gate/* error handling */
5087c478bdstevel@tonic-gatestruct zbookmark;
5097c478bdstevel@tonic-gatestruct zio;
5107c478bdstevel@tonic-gateextern void spa_log_error(spa_t *spa, struct zio *zio);
5117c478bdstevel@tonic-gateextern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
51255c01d4Menno Lageman    struct zio *zio, uint64_t stateoroffset, uint64_t length);
5137c478bdstevel@tonic-gateextern void zfs_post_remove(spa_t *spa, vdev_t *vd);
5147c478bdstevel@tonic-gateextern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
5157c478bdstevel@tonic-gateextern uint64_t spa_get_errlog_size(spa_t *spa);
5167c478bdstevel@tonic-gateextern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
5177c478bdstevel@tonic-gateextern void spa_errlog_rotate(spa_t *spa);
5187c478bdstevel@tonic-gateextern void spa_errlog_drain(spa_t *spa);
5197c478bdstevel@tonic-gateextern void spa_errlog_sync(spa_t *spa, uint64_t txg);
5207c478bdstevel@tonic-gateextern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
5217c478bdstevel@tonic-gate
5227c478bdstevel@tonic-gate/* vdev cache */
5237c478bdstevel@tonic-gateextern void vdev_cache_stat_init(void);
5247c478bdstevel@tonic-gateextern void vdev_cache_stat_fini(void);
5257c478bdstevel@tonic-gate
5267c478bdstevel@tonic-gate/* Initialization and termination */
5277c478bdstevel@tonic-gateextern void spa_init(int flags);
5287c478bdstevel@tonic-gateextern void spa_fini(void);
5297c478bdstevel@tonic-gateextern void spa_boot_init();
5307c478bdstevel@tonic-gate
5317c478bdstevel@tonic-gate/* properties */
5327c478bdstevel@tonic-gateextern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
5337c478bdstevel@tonic-gateextern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
5347c478bdstevel@tonic-gateextern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
5357c478bdstevel@tonic-gateextern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);
5367c478bdstevel@tonic-gate
5377c478bdstevel@tonic-gate/* asynchronous event notification */
5387c478bdstevel@tonic-gateextern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
5397c478bdstevel@tonic-gate
5407c478bdstevel@tonic-gate#ifdef ZFS_DEBUG
5417c478bdstevel@tonic-gate#define	dprintf_bp(bp, fmt, ...) do {				\
5427c478bdstevel@tonic-gate	if (zfs_flags & ZFS_DEBUG_DPRINTF) { 			\
5437c478bdstevel@tonic-gate	char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP);	\
5447c478bdstevel@tonic-gate	sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp));		\
5457c478bdstevel@tonic-gate	dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf);		\
5467c478bdstevel@tonic-gate	kmem_free(__blkbuf, BP_SPRINTF_LEN);			\
5477c478bdstevel@tonic-gate	} \
5487c478bdstevel@tonic-gate_NOTE(CONSTCOND) } while (0)
5497c478bdstevel@tonic-gate#else
5507c478bdstevel@tonic-gate#define	dprintf_bp(bp, fmt, ...)
5517c478bdstevel@tonic-gate#endif
5527c478bdstevel@tonic-gate
5537c478bdstevel@tonic-gateextern int spa_mode_global;			/* mode, e.g. FREAD | FWRITE */
5547c478bdstevel@tonic-gate
5557c478bdstevel@tonic-gate#ifdef	__cplusplus
5567c478bdstevel@tonic-gate}
5577c478bdstevel@tonic-gate#endif
5587c478bdstevel@tonic-gate
5597c478bdstevel@tonic-gate#endif	/* _SYS_SPA_H */
5607c478bdstevel@tonic-gate