1b1b8ab34Slling /* 2b1b8ab34Slling * GRUB -- GRand Unified Bootloader 3b1b8ab34Slling * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. 4b1b8ab34Slling * 5b1b8ab34Slling * This program is free software; you can redistribute it and/or modify 6b1b8ab34Slling * it under the terms of the GNU General Public License as published by 7b1b8ab34Slling * the Free Software Foundation; either version 2 of the License, or 8b1b8ab34Slling * (at your option) any later version. 9b1b8ab34Slling * 10b1b8ab34Slling * This program is distributed in the hope that it will be useful, 11b1b8ab34Slling * but WITHOUT ANY WARRANTY; without even the implied warranty of 12b1b8ab34Slling * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13b1b8ab34Slling * GNU General Public License for more details. 14b1b8ab34Slling * 15b1b8ab34Slling * You should have received a copy of the GNU General Public License 16b1b8ab34Slling * along with this program; if not, write to the Free Software 17b1b8ab34Slling * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18b1b8ab34Slling */ 19b1b8ab34Slling /* 20*b24ab676SJeff Bonwick * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 21b1b8ab34Slling * Use is subject to license terms. 22b1b8ab34Slling */ 23b1b8ab34Slling 24b1b8ab34Slling #ifndef _SYS_SPA_H 25b1b8ab34Slling #define _SYS_SPA_H 26b1b8ab34Slling 27b1b8ab34Slling /* 28b1b8ab34Slling * General-purpose 32-bit and 64-bit bitfield encodings. 29b1b8ab34Slling */ 30b1b8ab34Slling #define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len)) 31b1b8ab34Slling #define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len)) 32b1b8ab34Slling #define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low)) 33b1b8ab34Slling #define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low)) 34b1b8ab34Slling 35b1b8ab34Slling #define BF32_GET(x, low, len) BF32_DECODE(x, low, len) 36b1b8ab34Slling #define BF64_GET(x, low, len) BF64_DECODE(x, low, len) 37b1b8ab34Slling 38b1b8ab34Slling #define BF32_SET(x, low, len, val) \ 39b1b8ab34Slling ((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len)) 40b1b8ab34Slling #define BF64_SET(x, low, len, val) \ 41b1b8ab34Slling ((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)) 42b1b8ab34Slling 43b1b8ab34Slling #define BF32_GET_SB(x, low, len, shift, bias) \ 44b1b8ab34Slling ((BF32_GET(x, low, len) + (bias)) << (shift)) 45b1b8ab34Slling #define BF64_GET_SB(x, low, len, shift, bias) \ 46b1b8ab34Slling ((BF64_GET(x, low, len) + (bias)) << (shift)) 47b1b8ab34Slling 48b1b8ab34Slling #define BF32_SET_SB(x, low, len, shift, bias, val) \ 49b1b8ab34Slling BF32_SET(x, low, len, ((val) >> (shift)) - (bias)) 50b1b8ab34Slling #define BF64_SET_SB(x, low, len, shift, bias, val) \ 51b1b8ab34Slling BF64_SET(x, low, len, ((val) >> (shift)) - (bias)) 52b1b8ab34Slling 53b1b8ab34Slling /* 54b1b8ab34Slling * We currently support nine block sizes, from 512 bytes to 128K. 55b1b8ab34Slling * We could go higher, but the benefits are near-zero and the cost 56b1b8ab34Slling * of COWing a giant block to modify one byte would become excessive. 57b1b8ab34Slling */ 58b1b8ab34Slling #define SPA_MINBLOCKSHIFT 9 59b1b8ab34Slling #define SPA_MAXBLOCKSHIFT 17 60b1b8ab34Slling #define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT) 61b1b8ab34Slling #define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT) 62b1b8ab34Slling 63b1b8ab34Slling #define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1) 64b1b8ab34Slling 65*b24ab676SJeff Bonwick /* 66*b24ab676SJeff Bonwick * Size of block to hold the configuration data (a packed nvlist) 67*b24ab676SJeff Bonwick */ 68*b24ab676SJeff Bonwick #define SPA_CONFIG_BLOCKSIZE (1 << 14) 69*b24ab676SJeff Bonwick 70b1b8ab34Slling /* 71b1b8ab34Slling * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. 72b1b8ab34Slling * The ASIZE encoding should be at least 64 times larger (6 more bits) 73b1b8ab34Slling * to support up to 4-way RAID-Z mirror mode with worst-case gang block 74b1b8ab34Slling * overhead, three DVAs per bp, plus one more bit in case we do anything 75b1b8ab34Slling * else that expands the ASIZE. 76b1b8ab34Slling */ 77b1b8ab34Slling #define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */ 78b1b8ab34Slling #define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */ 79b1b8ab34Slling #define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */ 80b1b8ab34Slling 81b1b8ab34Slling /* 82b1b8ab34Slling * All SPA data is represented by 128-bit data virtual addresses (DVAs). 83b1b8ab34Slling * The members of the dva_t should be considered opaque outside the SPA. 84b1b8ab34Slling */ 85b1b8ab34Slling typedef struct dva { 86b1b8ab34Slling uint64_t dva_word[2]; 87b1b8ab34Slling } dva_t; 88b1b8ab34Slling 89b1b8ab34Slling /* 90b1b8ab34Slling * Each block has a 256-bit checksum -- strong enough for cryptographic hashes. 91b1b8ab34Slling */ 92b1b8ab34Slling typedef struct zio_cksum { 93b1b8ab34Slling uint64_t zc_word[4]; 94b1b8ab34Slling } zio_cksum_t; 95b1b8ab34Slling 96b1b8ab34Slling /* 97b1b8ab34Slling * Each block is described by its DVAs, time of birth, checksum, etc. 98b1b8ab34Slling * The word-by-word, bit-by-bit layout of the blkptr is as follows: 99b1b8ab34Slling * 100b1b8ab34Slling * 64 56 48 40 32 24 16 8 0 101b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 102b1b8ab34Slling * 0 | vdev1 | GRID | ASIZE | 103b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 104b1b8ab34Slling * 1 |G| offset1 | 105b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 106b1b8ab34Slling * 2 | vdev2 | GRID | ASIZE | 107b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 108b1b8ab34Slling * 3 |G| offset2 | 109b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 110b1b8ab34Slling * 4 | vdev3 | GRID | ASIZE | 111b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 112b1b8ab34Slling * 5 |G| offset3 | 113b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 114*b24ab676SJeff Bonwick * 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE | 115b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 116b1b8ab34Slling * 7 | padding | 117b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 118b1b8ab34Slling * 8 | padding | 119b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 120*b24ab676SJeff Bonwick * 9 | physical birth txg | 121b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 122*b24ab676SJeff Bonwick * a | logical birth txg | 123b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 124b1b8ab34Slling * b | fill count | 125b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 126b1b8ab34Slling * c | checksum[0] | 127b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 128b1b8ab34Slling * d | checksum[1] | 129b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 130b1b8ab34Slling * e | checksum[2] | 131b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 132b1b8ab34Slling * f | checksum[3] | 133b1b8ab34Slling * +-------+-------+-------+-------+-------+-------+-------+-------+ 134b1b8ab34Slling * 135b1b8ab34Slling * Legend: 136b1b8ab34Slling * 137b1b8ab34Slling * vdev virtual device ID 138b1b8ab34Slling * offset offset into virtual device 139b1b8ab34Slling * LSIZE logical size 140b1b8ab34Slling * PSIZE physical size (after compression) 141b1b8ab34Slling * ASIZE allocated size (including RAID-Z parity and gang block headers) 142b1b8ab34Slling * GRID RAID-Z layout information (reserved for future use) 143b1b8ab34Slling * cksum checksum function 144b1b8ab34Slling * comp compression function 145b1b8ab34Slling * G gang block indicator 146*b24ab676SJeff Bonwick * B byteorder (endianness) 147*b24ab676SJeff Bonwick * D dedup 148*b24ab676SJeff Bonwick * X unused 149b1b8ab34Slling * lvl level of indirection 150*b24ab676SJeff Bonwick * type DMU object type 151*b24ab676SJeff Bonwick * phys birth txg of block allocation; zero if same as logical birth txg 152*b24ab676SJeff Bonwick * log. birth transaction group in which the block was logically born 153b1b8ab34Slling * fill count number of non-zero blocks under this bp 154b1b8ab34Slling * checksum[4] 256-bit checksum of the data this bp describes 155b1b8ab34Slling */ 156b1b8ab34Slling #define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */ 157b1b8ab34Slling #define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */ 158b1b8ab34Slling 159*b24ab676SJeff Bonwick typedef struct blkptr { 160*b24ab676SJeff Bonwick dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */ 161*b24ab676SJeff Bonwick uint64_t blk_prop; /* size, compression, type, etc */ 162*b24ab676SJeff Bonwick uint64_t blk_pad[2]; /* Extra space for the future */ 163*b24ab676SJeff Bonwick uint64_t blk_phys_birth; /* txg when block was allocated */ 164*b24ab676SJeff Bonwick uint64_t blk_birth; /* transaction group at birth */ 165*b24ab676SJeff Bonwick uint64_t blk_fill; /* fill count */ 166*b24ab676SJeff Bonwick zio_cksum_t blk_cksum; /* 256-bit checksum */ 167*b24ab676SJeff Bonwick } blkptr_t; 168*b24ab676SJeff Bonwick 169b1b8ab34Slling /* 170b1b8ab34Slling * Macros to get and set fields in a bp or DVA. 171b1b8ab34Slling */ 172b1b8ab34Slling #define DVA_GET_ASIZE(dva) \ 173b1b8ab34Slling BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0) 174b1b8ab34Slling #define DVA_SET_ASIZE(dva, x) \ 175b1b8ab34Slling BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x) 176b1b8ab34Slling 177b1b8ab34Slling #define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8) 178b1b8ab34Slling #define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x) 179b1b8ab34Slling 180b1b8ab34Slling #define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32) 181b1b8ab34Slling #define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x) 182b1b8ab34Slling 183b1b8ab34Slling #define DVA_GET_OFFSET(dva) \ 184b1b8ab34Slling BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0) 185b1b8ab34Slling #define DVA_SET_OFFSET(dva, x) \ 186b1b8ab34Slling BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x) 187b1b8ab34Slling 188b1b8ab34Slling #define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1) 189b1b8ab34Slling #define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x) 190b1b8ab34Slling 191b1b8ab34Slling #define BP_GET_LSIZE(bp) \ 192*b24ab676SJeff Bonwick BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1) 193b1b8ab34Slling #define BP_SET_LSIZE(bp, x) \ 194b1b8ab34Slling BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) 195b1b8ab34Slling 196b1b8ab34Slling #define BP_GET_PSIZE(bp) \ 197b1b8ab34Slling BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1) 198b1b8ab34Slling #define BP_SET_PSIZE(bp, x) \ 199b1b8ab34Slling BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) 200b1b8ab34Slling 201*b24ab676SJeff Bonwick #define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8) 202*b24ab676SJeff Bonwick #define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x) 203*b24ab676SJeff Bonwick 204*b24ab676SJeff Bonwick #define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) 205*b24ab676SJeff Bonwick #define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) 206*b24ab676SJeff Bonwick 207*b24ab676SJeff Bonwick #define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) 208*b24ab676SJeff Bonwick #define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x) 209*b24ab676SJeff Bonwick 210*b24ab676SJeff Bonwick #define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) 211*b24ab676SJeff Bonwick #define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) 212b1b8ab34Slling 213*b24ab676SJeff Bonwick #define BP_GET_PROP_BIT_61(bp) BF64_GET((bp)->blk_prop, 61, 1) 214*b24ab676SJeff Bonwick #define BP_SET_PROP_BIT_61(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x) 215b1b8ab34Slling 216*b24ab676SJeff Bonwick #define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) 217*b24ab676SJeff Bonwick #define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) 218b1b8ab34Slling 219*b24ab676SJeff Bonwick #define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1)) 220*b24ab676SJeff Bonwick #define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x) 221b1b8ab34Slling 222*b24ab676SJeff Bonwick #define BP_PHYSICAL_BIRTH(bp) \ 223*b24ab676SJeff Bonwick ((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth) 224*b24ab676SJeff Bonwick 225*b24ab676SJeff Bonwick #define BP_SET_BIRTH(bp, logical, physical) \ 226*b24ab676SJeff Bonwick { \ 227*b24ab676SJeff Bonwick (bp)->blk_birth = (logical); \ 228*b24ab676SJeff Bonwick (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \ 229*b24ab676SJeff Bonwick } 230b1b8ab34Slling 231b1b8ab34Slling #define BP_GET_ASIZE(bp) \ 232b1b8ab34Slling (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ 233b1b8ab34Slling DVA_GET_ASIZE(&(bp)->blk_dva[2])) 234b1b8ab34Slling 235b1b8ab34Slling #define BP_GET_UCSIZE(bp) \ 236b1b8ab34Slling ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ 237b1b8ab34Slling BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); 238b1b8ab34Slling 239b1b8ab34Slling #define BP_GET_NDVAS(bp) \ 240b1b8ab34Slling (!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ 241b1b8ab34Slling !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ 242b1b8ab34Slling !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) 243b1b8ab34Slling 244b1b8ab34Slling #define BP_COUNT_GANG(bp) \ 245b1b8ab34Slling (DVA_GET_GANG(&(bp)->blk_dva[0]) + \ 246b1b8ab34Slling DVA_GET_GANG(&(bp)->blk_dva[1]) + \ 247b1b8ab34Slling DVA_GET_GANG(&(bp)->blk_dva[2])) 248b1b8ab34Slling 249b1b8ab34Slling #define DVA_EQUAL(dva1, dva2) \ 250b1b8ab34Slling ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ 251b1b8ab34Slling (dva1)->dva_word[0] == (dva2)->dva_word[0]) 252b1b8ab34Slling 253*b24ab676SJeff Bonwick #define BP_EQUAL(bp1, bp2) \ 254*b24ab676SJeff Bonwick (BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \ 255*b24ab676SJeff Bonwick DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \ 256*b24ab676SJeff Bonwick DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \ 257*b24ab676SJeff Bonwick DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2])) 258*b24ab676SJeff Bonwick 259b1b8ab34Slling #define ZIO_CHECKSUM_EQUAL(zc1, zc2) \ 260b1b8ab34Slling (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \ 261b1b8ab34Slling ((zc1).zc_word[1] - (zc2).zc_word[1]) | \ 262b1b8ab34Slling ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ 263b1b8ab34Slling ((zc1).zc_word[3] - (zc2).zc_word[3]))) 264b1b8ab34Slling 265b1b8ab34Slling #define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0) 266b1b8ab34Slling 267b1b8ab34Slling #define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \ 268b1b8ab34Slling { \ 269b1b8ab34Slling (zcp)->zc_word[0] = w0; \ 270b1b8ab34Slling (zcp)->zc_word[1] = w1; \ 271b1b8ab34Slling (zcp)->zc_word[2] = w2; \ 272b1b8ab34Slling (zcp)->zc_word[3] = w3; \ 273b1b8ab34Slling } 274b1b8ab34Slling 275b1b8ab34Slling #define BP_IDENTITY(bp) (&(bp)->blk_dva[0]) 276b1b8ab34Slling #define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp)) 277b1b8ab34Slling #define BP_IS_HOLE(bp) ((bp)->blk_birth == 0) 278b1b8ab34Slling 279b1b8ab34Slling #define BP_ZERO(bp) \ 280b1b8ab34Slling { \ 281b1b8ab34Slling (bp)->blk_dva[0].dva_word[0] = 0; \ 282b1b8ab34Slling (bp)->blk_dva[0].dva_word[1] = 0; \ 283b1b8ab34Slling (bp)->blk_dva[1].dva_word[0] = 0; \ 284b1b8ab34Slling (bp)->blk_dva[1].dva_word[1] = 0; \ 285b1b8ab34Slling (bp)->blk_dva[2].dva_word[0] = 0; \ 286b1b8ab34Slling (bp)->blk_dva[2].dva_word[1] = 0; \ 287b1b8ab34Slling (bp)->blk_prop = 0; \ 288b1b8ab34Slling (bp)->blk_pad[0] = 0; \ 289b1b8ab34Slling (bp)->blk_pad[1] = 0; \ 290*b24ab676SJeff Bonwick (bp)->blk_phys_birth = 0; \ 291b1b8ab34Slling (bp)->blk_birth = 0; \ 292b1b8ab34Slling (bp)->blk_fill = 0; \ 293b1b8ab34Slling ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ 294b1b8ab34Slling } 295b1b8ab34Slling 296b1b8ab34Slling /* 297b1b8ab34Slling * Note: the byteorder is either 0 or -1, both of which are palindromes. 298b1b8ab34Slling * This simplifies the endianness handling a bit. 299b1b8ab34Slling */ 300b1b8ab34Slling #ifdef _BIG_ENDIAN 301b1b8ab34Slling #define ZFS_HOST_BYTEORDER (0ULL) 302b1b8ab34Slling #else 303b1b8ab34Slling #define ZFS_HOST_BYTEORDER (-1ULL) 304b1b8ab34Slling #endif 305b1b8ab34Slling 306b1b8ab34Slling #define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER) 307b1b8ab34Slling 308b1b8ab34Slling #define BP_SPRINTF_LEN 320 309b1b8ab34Slling 310b1b8ab34Slling #endif /* _SYS_SPA_H */ 311