1b1b8ab34Slling /* 2b1b8ab34Slling * GRUB -- GRand Unified Bootloader 3b1b8ab34Slling * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. 4b1b8ab34Slling * 5b1b8ab34Slling * This program is free software; you can redistribute it and/or modify 6b1b8ab34Slling * it under the terms of the GNU General Public License as published by 7b1b8ab34Slling * the Free Software Foundation; either version 2 of the License, or 8b1b8ab34Slling * (at your option) any later version. 9b1b8ab34Slling * 10b1b8ab34Slling * This program is distributed in the hope that it will be useful, 11b1b8ab34Slling * but WITHOUT ANY WARRANTY; without even the implied warranty of 12b1b8ab34Slling * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13b1b8ab34Slling * GNU General Public License for more details. 14b1b8ab34Slling * 15b1b8ab34Slling * You should have received a copy of the GNU General Public License 16b1b8ab34Slling * along with this program; if not, write to the Free Software 17b1b8ab34Slling * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18b1b8ab34Slling */ 19ad135b5dSChristopher Siden 20b1b8ab34Slling /* 216e1f5caaSNeil Perrin * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 22b1b8ab34Slling * Use is subject to license terms. 23b1b8ab34Slling */ 24b1b8ab34Slling 25ad135b5dSChristopher Siden /* 26*d94527b3SDan Kimmel * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 27a6f561b4SSašo Kiselkov * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. 28ad135b5dSChristopher Siden */ 29ad135b5dSChristopher Siden 30b1b8ab34Slling /* 31b1b8ab34Slling * The zfs plug-in routines for GRUB are: 32b1b8ab34Slling * 33b1b8ab34Slling * zfs_mount() - locates a valid uberblock of the root pool and reads 34b1b8ab34Slling * in its MOS at the memory address MOS. 35b1b8ab34Slling * 36b1b8ab34Slling * zfs_open() - locates a plain file object by following the MOS 37b1b8ab34Slling * and places its dnode at the memory address DNODE. 38b1b8ab34Slling * 39b1b8ab34Slling * zfs_read() - read in the data blocks pointed by the DNODE. 40b1b8ab34Slling * 41b1b8ab34Slling * ZFS_SCRATCH is used as a working area. 42b1b8ab34Slling * 43b1b8ab34Slling * (memory addr) MOS DNODE ZFS_SCRATCH 44b1b8ab34Slling * | | | 45b1b8ab34Slling * +-------V---------V----------V---------------+ 46b1b8ab34Slling * memory | | dnode | dnode | scratch | 47b1b8ab34Slling * | | 512B | 512B | area | 48b1b8ab34Slling * +--------------------------------------------+ 49b1b8ab34Slling */ 50b1b8ab34Slling 51b1b8ab34Slling #ifdef FSYS_ZFS 52b1b8ab34Slling 53b1b8ab34Slling #include "shared.h" 54b1b8ab34Slling #include "filesys.h" 55b1b8ab34Slling #include "fsys_zfs.h" 56b1b8ab34Slling 57b1b8ab34Slling /* cache for a file block of the currently zfs_open()-ed file */ 58b1b8ab34Slling static void *file_buf = NULL; 59b1b8ab34Slling static uint64_t file_start = 0; 60b1b8ab34Slling static uint64_t file_end = 0; 61b1b8ab34Slling 62b1b8ab34Slling /* cache for a dnode block */ 63b1b8ab34Slling static dnode_phys_t *dnode_buf = NULL; 64b1b8ab34Slling static dnode_phys_t *dnode_mdn = NULL; 65b1b8ab34Slling static uint64_t dnode_start = 0; 66b1b8ab34Slling static uint64_t dnode_end = 0; 67b1b8ab34Slling 68e23347b1SEric Taylor static uint64_t pool_guid = 0; 69051aabe6Staylor static uberblock_t current_uberblock; 70b1b8ab34Slling static char *stackbase; 71b1b8ab34Slling 72b1b8ab34Slling decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] = 73b1b8ab34Slling { 7415e6edf1Sgw {"inherit", 0}, /* ZIO_COMPRESS_INHERIT */ 75b1b8ab34Slling {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */ 7615e6edf1Sgw {"off", 0}, /* ZIO_COMPRESS_OFF */ 7715e6edf1Sgw {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */ 78a6f561b4SSašo Kiselkov {"empty", 0}, /* ZIO_COMPRESS_EMPTY */ 79a6f561b4SSašo Kiselkov {"gzip-1", 0}, /* ZIO_COMPRESS_GZIP_1 */ 80a6f561b4SSašo Kiselkov {"gzip-2", 0}, /* ZIO_COMPRESS_GZIP_2 */ 81a6f561b4SSašo Kiselkov {"gzip-3", 0}, /* ZIO_COMPRESS_GZIP_3 */ 82a6f561b4SSašo Kiselkov {"gzip-4", 0}, /* ZIO_COMPRESS_GZIP_4 */ 83a6f561b4SSašo Kiselkov {"gzip-5", 0}, /* ZIO_COMPRESS_GZIP_5 */ 84a6f561b4SSašo Kiselkov {"gzip-6", 0}, /* ZIO_COMPRESS_GZIP_6 */ 85a6f561b4SSašo Kiselkov {"gzip-7", 0}, /* ZIO_COMPRESS_GZIP_7 */ 86a6f561b4SSašo Kiselkov {"gzip-8", 0}, /* ZIO_COMPRESS_GZIP_8 */ 87a6f561b4SSašo Kiselkov {"gzip-9", 0}, /* ZIO_COMPRESS_GZIP_9 */ 88a6f561b4SSašo Kiselkov {"zle", 0}, /* ZIO_COMPRESS_ZLE */ 89a6f561b4SSašo Kiselkov {"lz4", lz4_decompress} /* ZIO_COMPRESS_LZ4 */ 90b1b8ab34Slling }; 91b1b8ab34Slling 92cd9c78d9SLin Ling static int zio_read_data(blkptr_t *bp, void *buf, char *stack); 93cd9c78d9SLin Ling 94b1b8ab34Slling /* 95b1b8ab34Slling * Our own version of bcmp(). 96b1b8ab34Slling */ 97b1b8ab34Slling static int 98b1b8ab34Slling zfs_bcmp(const void *s1, const void *s2, size_t n) 99b1b8ab34Slling { 100b1b8ab34Slling const uchar_t *ps1 = s1; 101b1b8ab34Slling const uchar_t *ps2 = s2; 102b1b8ab34Slling 103b1b8ab34Slling if (s1 != s2 && n != 0) { 104b1b8ab34Slling do { 105b1b8ab34Slling if (*ps1++ != *ps2++) 106b1b8ab34Slling return (1); 107b1b8ab34Slling } while (--n != 0); 108b1b8ab34Slling } 109b1b8ab34Slling 110b1b8ab34Slling return (0); 111b1b8ab34Slling } 112b1b8ab34Slling 113b1b8ab34Slling /* 114b1b8ab34Slling * Our own version of log2(). Same thing as highbit()-1. 115b1b8ab34Slling */ 116b1b8ab34Slling static int 117b1b8ab34Slling zfs_log2(uint64_t num) 118b1b8ab34Slling { 119b1b8ab34Slling int i = 0; 120b1b8ab34Slling 121b1b8ab34Slling while (num > 1) { 122b1b8ab34Slling i++; 123b1b8ab34Slling num = num >> 1; 124b1b8ab34Slling } 125b1b8ab34Slling 126b1b8ab34Slling return (i); 127b1b8ab34Slling } 128b1b8ab34Slling 129b1b8ab34Slling /* Checksum Functions */ 130b1b8ab34Slling static void 131b1b8ab34Slling zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) 132b1b8ab34Slling { 133b1b8ab34Slling ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); 134b1b8ab34Slling } 135b1b8ab34Slling 136b1b8ab34Slling /* Checksum Table and Values */ 137b1b8ab34Slling zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { 138ad135b5dSChristopher Siden {{NULL, NULL}, 0, 0, "inherit"}, 139ad135b5dSChristopher Siden {{NULL, NULL}, 0, 0, "on"}, 140ad135b5dSChristopher Siden {{zio_checksum_off, zio_checksum_off}, 0, 0, "off"}, 141ad135b5dSChristopher Siden {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"}, 142ad135b5dSChristopher Siden {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"}, 143ad135b5dSChristopher Siden {{NULL, NULL}, 0, 0, "zilog"}, 144ad135b5dSChristopher Siden {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"}, 145ad135b5dSChristopher Siden {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"}, 146ad135b5dSChristopher Siden {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"}, 147ad135b5dSChristopher Siden {{NULL, NULL}, 0, 0, "zilog2"}, 14845818ee1SMatthew Ahrens {{zio_checksum_off, zio_checksum_off}, 0, 0, "noparity"}, 14945818ee1SMatthew Ahrens {{zio_checksum_SHA512, NULL}, 0, 0, "SHA512"} 150b1b8ab34Slling }; 151b1b8ab34Slling 152b1b8ab34Slling /* 153b1b8ab34Slling * zio_checksum_verify: Provides support for checksum verification. 154b1b8ab34Slling * 15545818ee1SMatthew Ahrens * Fletcher2, Fletcher4, SHA-256 and SHA-512/256 are supported. 156b1b8ab34Slling * 157b1b8ab34Slling * Return: 158b1b8ab34Slling * -1 = Failure 159b1b8ab34Slling * 0 = Success 160b1b8ab34Slling */ 161b1b8ab34Slling static int 162b1b8ab34Slling zio_checksum_verify(blkptr_t *bp, char *data, int size) 163b1b8ab34Slling { 164b1b8ab34Slling zio_cksum_t zc = bp->blk_cksum; 165cd9c78d9SLin Ling uint32_t checksum = BP_GET_CHECKSUM(bp); 166b1b8ab34Slling int byteswap = BP_SHOULD_BYTESWAP(bp); 1676e1f5caaSNeil Perrin zio_eck_t *zec = (zio_eck_t *)(data + size) - 1; 168b1b8ab34Slling zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 169b1b8ab34Slling zio_cksum_t actual_cksum, expected_cksum; 170b1b8ab34Slling 1715d7b4d43SMatthew Ahrens if (byteswap) { 1725d7b4d43SMatthew Ahrens grub_printf("byteswap not supported\n"); 173b1b8ab34Slling return (-1); 1745d7b4d43SMatthew Ahrens } 175b1b8ab34Slling 1765d7b4d43SMatthew Ahrens if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) { 1775d7b4d43SMatthew Ahrens grub_printf("checksum algorithm %u not supported\n", checksum); 178b1b8ab34Slling return (-1); 1795d7b4d43SMatthew Ahrens } 180b1b8ab34Slling 1816e1f5caaSNeil Perrin if (ci->ci_eck) { 1826e1f5caaSNeil Perrin expected_cksum = zec->zec_cksum; 1836e1f5caaSNeil Perrin zec->zec_cksum = zc; 184cd9c78d9SLin Ling ci->ci_func[0](data, size, &actual_cksum); 1856e1f5caaSNeil Perrin zec->zec_cksum = expected_cksum; 186b1b8ab34Slling zc = expected_cksum; 187b1b8ab34Slling } else { 188b1b8ab34Slling ci->ci_func[byteswap](data, size, &actual_cksum); 189b1b8ab34Slling } 190b1b8ab34Slling 191b1b8ab34Slling if ((actual_cksum.zc_word[0] - zc.zc_word[0]) | 192b1b8ab34Slling (actual_cksum.zc_word[1] - zc.zc_word[1]) | 193b1b8ab34Slling (actual_cksum.zc_word[2] - zc.zc_word[2]) | 194b1b8ab34Slling (actual_cksum.zc_word[3] - zc.zc_word[3])) 195b1b8ab34Slling return (-1); 196b1b8ab34Slling 197b1b8ab34Slling return (0); 198b1b8ab34Slling } 199b1b8ab34Slling 200b1b8ab34Slling /* 201e23347b1SEric Taylor * vdev_label_start returns the physical disk offset (in bytes) of 202e23347b1SEric Taylor * label "l". 203b1b8ab34Slling */ 204e7cbe64fSgw static uint64_t 205e23347b1SEric Taylor vdev_label_start(uint64_t psize, int l) 206b1b8ab34Slling { 207e23347b1SEric Taylor return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 208b1b8ab34Slling 0 : psize - VDEV_LABELS * sizeof (vdev_label_t))); 209b1b8ab34Slling } 210b1b8ab34Slling 211b1b8ab34Slling /* 212b1b8ab34Slling * vdev_uberblock_compare takes two uberblock structures and returns an integer 213b1b8ab34Slling * indicating the more recent of the two. 214b1b8ab34Slling * Return Value = 1 if ub2 is more recent 215b1b8ab34Slling * Return Value = -1 if ub1 is more recent 216b1b8ab34Slling * The most recent uberblock is determined using its transaction number and 217b1b8ab34Slling * timestamp. The uberblock with the highest transaction number is 218b1b8ab34Slling * considered "newer". If the transaction numbers of the two blocks match, the 219b1b8ab34Slling * timestamps are compared to determine the "newer" of the two. 220b1b8ab34Slling */ 221b1b8ab34Slling static int 222b1b8ab34Slling vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) 223b1b8ab34Slling { 224b1b8ab34Slling if (ub1->ub_txg < ub2->ub_txg) 225b1b8ab34Slling return (-1); 226b1b8ab34Slling if (ub1->ub_txg > ub2->ub_txg) 227b1b8ab34Slling return (1); 228b1b8ab34Slling 229b1b8ab34Slling if (ub1->ub_timestamp < ub2->ub_timestamp) 230b1b8ab34Slling return (-1); 231b1b8ab34Slling if (ub1->ub_timestamp > ub2->ub_timestamp) 232b1b8ab34Slling return (1); 233b1b8ab34Slling 234b1b8ab34Slling return (0); 235b1b8ab34Slling } 236b1b8ab34Slling 237b1b8ab34Slling /* 238b1b8ab34Slling * Three pieces of information are needed to verify an uberblock: the magic 239b1b8ab34Slling * number, the version number, and the checksum. 240b1b8ab34Slling * 241b1b8ab34Slling * Return: 242b1b8ab34Slling * 0 - Success 243b1b8ab34Slling * -1 - Failure 244b1b8ab34Slling */ 245b1b8ab34Slling static int 24681b2d573SHans Rosenfeld uberblock_verify(uberblock_t *uber, uint64_t ub_size, uint64_t offset) 247b1b8ab34Slling { 248b1b8ab34Slling blkptr_t bp; 249b1b8ab34Slling 250b1b8ab34Slling BP_ZERO(&bp); 251b1b8ab34Slling BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); 252b1b8ab34Slling BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER); 253b1b8ab34Slling ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0); 254b1b8ab34Slling 25581b2d573SHans Rosenfeld if (zio_checksum_verify(&bp, (char *)uber, ub_size) != 0) 256b1b8ab34Slling return (-1); 257b1b8ab34Slling 258b1b8ab34Slling if (uber->ub_magic == UBERBLOCK_MAGIC && 259ad135b5dSChristopher Siden SPA_VERSION_IS_SUPPORTED(uber->ub_version)) 260b1b8ab34Slling return (0); 261b1b8ab34Slling 262b1b8ab34Slling return (-1); 263b1b8ab34Slling } 264b1b8ab34Slling 265b1b8ab34Slling /* 266b1b8ab34Slling * Find the best uberblock. 267b1b8ab34Slling * Return: 268b1b8ab34Slling * Success - Pointer to the best uberblock. 269b1b8ab34Slling * Failure - NULL 270b1b8ab34Slling */ 27181b2d573SHans Rosenfeld static uberblock_t * 27281b2d573SHans Rosenfeld find_bestub(char *ub_array, uint64_t ashift, uint64_t sector) 273b1b8ab34Slling { 27481b2d573SHans Rosenfeld uberblock_t *ubbest = NULL; 27581b2d573SHans Rosenfeld uberblock_t *ubnext; 27681b2d573SHans Rosenfeld uint64_t offset, ub_size; 277e23347b1SEric Taylor int i; 278b1b8ab34Slling 27981b2d573SHans Rosenfeld ub_size = VDEV_UBERBLOCK_SIZE(ashift); 28081b2d573SHans Rosenfeld 28181b2d573SHans Rosenfeld for (i = 0; i < VDEV_UBERBLOCK_COUNT(ashift); i++) { 28281b2d573SHans Rosenfeld ubnext = (uberblock_t *)ub_array; 28381b2d573SHans Rosenfeld ub_array += ub_size; 284e23347b1SEric Taylor offset = (sector << SPA_MINBLOCKSHIFT) + 28581b2d573SHans Rosenfeld VDEV_UBERBLOCK_OFFSET(ashift, i); 28681b2d573SHans Rosenfeld 28781b2d573SHans Rosenfeld if (uberblock_verify(ubnext, ub_size, offset) != 0) 28881b2d573SHans Rosenfeld continue; 28981b2d573SHans Rosenfeld 29081b2d573SHans Rosenfeld if (ubbest == NULL || 29181b2d573SHans Rosenfeld vdev_uberblock_compare(ubnext, ubbest) > 0) 29281b2d573SHans Rosenfeld ubbest = ubnext; 293b1b8ab34Slling } 294b1b8ab34Slling 295b1b8ab34Slling return (ubbest); 296b1b8ab34Slling } 297b1b8ab34Slling 298b1b8ab34Slling /* 299cd9c78d9SLin Ling * Read a block of data based on the gang block address dva, 300cd9c78d9SLin Ling * and put its data in buf. 301b1b8ab34Slling * 302b1b8ab34Slling * Return: 303b1b8ab34Slling * 0 - success 304cd9c78d9SLin Ling * 1 - failure 305b1b8ab34Slling */ 306b1b8ab34Slling static int 307cd9c78d9SLin Ling zio_read_gang(blkptr_t *bp, dva_t *dva, void *buf, char *stack) 308b1b8ab34Slling { 309cd9c78d9SLin Ling zio_gbh_phys_t *zio_gb; 310b1b8ab34Slling uint64_t offset, sector; 311cd9c78d9SLin Ling blkptr_t tmpbp; 312cd9c78d9SLin Ling int i; 313b1b8ab34Slling 314cd9c78d9SLin Ling zio_gb = (zio_gbh_phys_t *)stack; 315cd9c78d9SLin Ling stack += SPA_GANGBLOCKSIZE; 316cd9c78d9SLin Ling offset = DVA_GET_OFFSET(dva); 317ad135b5dSChristopher Siden sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); 318b1b8ab34Slling 319cd9c78d9SLin Ling /* read in the gang block header */ 320cd9c78d9SLin Ling if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) { 321cd9c78d9SLin Ling grub_printf("failed to read in a gang block header\n"); 322cd9c78d9SLin Ling return (1); 323cd9c78d9SLin Ling } 324cd9c78d9SLin Ling 325cd9c78d9SLin Ling /* self checksuming the gang block header */ 326cd9c78d9SLin Ling BP_ZERO(&tmpbp); 327cd9c78d9SLin Ling BP_SET_CHECKSUM(&tmpbp, ZIO_CHECKSUM_GANG_HEADER); 328cd9c78d9SLin Ling BP_SET_BYTEORDER(&tmpbp, ZFS_HOST_BYTEORDER); 329cd9c78d9SLin Ling ZIO_SET_CHECKSUM(&tmpbp.blk_cksum, DVA_GET_VDEV(dva), 330cd9c78d9SLin Ling DVA_GET_OFFSET(dva), bp->blk_birth, 0); 331cd9c78d9SLin Ling if (zio_checksum_verify(&tmpbp, (char *)zio_gb, SPA_GANGBLOCKSIZE)) { 332cd9c78d9SLin Ling grub_printf("failed to checksum a gang block header\n"); 333cd9c78d9SLin Ling return (1); 334cd9c78d9SLin Ling } 335cd9c78d9SLin Ling 336cd9c78d9SLin Ling for (i = 0; i < SPA_GBH_NBLKPTRS; i++) { 33743466aaeSMax Grossman if (BP_IS_HOLE(&zio_gb->zg_blkptr[i])) 338cd9c78d9SLin Ling continue; 339cd9c78d9SLin Ling 340cd9c78d9SLin Ling if (zio_read_data(&zio_gb->zg_blkptr[i], buf, stack)) 341cd9c78d9SLin Ling return (1); 342cd9c78d9SLin Ling buf += BP_GET_PSIZE(&zio_gb->zg_blkptr[i]); 343cd9c78d9SLin Ling } 344cd9c78d9SLin Ling 345cd9c78d9SLin Ling return (0); 346cd9c78d9SLin Ling } 347cd9c78d9SLin Ling 348cd9c78d9SLin Ling /* 349cd9c78d9SLin Ling * Read in a block of raw data to buf. 350cd9c78d9SLin Ling * 351cd9c78d9SLin Ling * Return: 352cd9c78d9SLin Ling * 0 - success 353cd9c78d9SLin Ling * 1 - failure 354cd9c78d9SLin Ling */ 355cd9c78d9SLin Ling static int 356cd9c78d9SLin Ling zio_read_data(blkptr_t *bp, void *buf, char *stack) 357cd9c78d9SLin Ling { 358cd9c78d9SLin Ling int i, psize; 359cd9c78d9SLin Ling 360cd9c78d9SLin Ling psize = BP_GET_PSIZE(bp); 361ae8180dbSlling 362b1b8ab34Slling /* pick a good dva from the block pointer */ 363b1b8ab34Slling for (i = 0; i < SPA_DVAS_PER_BP; i++) { 364cd9c78d9SLin Ling uint64_t offset, sector; 365b1b8ab34Slling 366b1b8ab34Slling if (bp->blk_dva[i].dva_word[0] == 0 && 367b1b8ab34Slling bp->blk_dva[i].dva_word[1] == 0) 368b1b8ab34Slling continue; 369b1b8ab34Slling 370cd9c78d9SLin Ling if (DVA_GET_GANG(&bp->blk_dva[i])) { 371*d94527b3SDan Kimmel if (zio_read_gang(bp, &bp->blk_dva[i], buf, stack) != 0) 372*d94527b3SDan Kimmel continue; 373b1b8ab34Slling } else { 374cd9c78d9SLin Ling /* read in a data block */ 375cd9c78d9SLin Ling offset = DVA_GET_OFFSET(&bp->blk_dva[i]); 376ad135b5dSChristopher Siden sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); 377*d94527b3SDan Kimmel if (devread(sector, 0, psize, buf) == 0) 378*d94527b3SDan Kimmel continue; 379*d94527b3SDan Kimmel } 380*d94527b3SDan Kimmel 381*d94527b3SDan Kimmel /* verify that the checksum matches */ 382*d94527b3SDan Kimmel if (zio_checksum_verify(bp, buf, psize) == 0) { 383*d94527b3SDan Kimmel return (0); 384b1b8ab34Slling } 385b1b8ab34Slling } 386b1b8ab34Slling 387*d94527b3SDan Kimmel grub_printf("could not read block due to EIO or ECKSUM\n"); 388cd9c78d9SLin Ling return (1); 389cd9c78d9SLin Ling } 390cd9c78d9SLin Ling 3915d7b4d43SMatthew Ahrens /* 3925d7b4d43SMatthew Ahrens * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be 3935d7b4d43SMatthew Ahrens * more than BPE_PAYLOAD_SIZE bytes). 3945d7b4d43SMatthew Ahrens */ 3955d7b4d43SMatthew Ahrens static void 3965d7b4d43SMatthew Ahrens decode_embedded_bp_compressed(const blkptr_t *bp, void *buf) 3975d7b4d43SMatthew Ahrens { 3985d7b4d43SMatthew Ahrens int psize, i; 3995d7b4d43SMatthew Ahrens uint8_t *buf8 = buf; 4005d7b4d43SMatthew Ahrens uint64_t w = 0; 4015d7b4d43SMatthew Ahrens const uint64_t *bp64 = (const uint64_t *)bp; 4025d7b4d43SMatthew Ahrens 4035d7b4d43SMatthew Ahrens psize = BPE_GET_PSIZE(bp); 4045d7b4d43SMatthew Ahrens 4055d7b4d43SMatthew Ahrens /* 4065d7b4d43SMatthew Ahrens * Decode the words of the block pointer into the byte array. 4075d7b4d43SMatthew Ahrens * Low bits of first word are the first byte (little endian). 4085d7b4d43SMatthew Ahrens */ 4095d7b4d43SMatthew Ahrens for (i = 0; i < psize; i++) { 4105d7b4d43SMatthew Ahrens if (i % sizeof (w) == 0) { 4115d7b4d43SMatthew Ahrens /* beginning of a word */ 4125d7b4d43SMatthew Ahrens w = *bp64; 4135d7b4d43SMatthew Ahrens bp64++; 4145d7b4d43SMatthew Ahrens if (!BPE_IS_PAYLOADWORD(bp, bp64)) 4155d7b4d43SMatthew Ahrens bp64++; 4165d7b4d43SMatthew Ahrens } 4175d7b4d43SMatthew Ahrens buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY); 4185d7b4d43SMatthew Ahrens } 4195d7b4d43SMatthew Ahrens } 4205d7b4d43SMatthew Ahrens 4215d7b4d43SMatthew Ahrens /* 4225d7b4d43SMatthew Ahrens * Fill in the buffer with the (decompressed) payload of the embedded 4235d7b4d43SMatthew Ahrens * blkptr_t. Takes into account compression and byteorder (the payload is 4245d7b4d43SMatthew Ahrens * treated as a stream of bytes). 4255d7b4d43SMatthew Ahrens * Return 0 on success, or ENOSPC if it won't fit in the buffer. 4265d7b4d43SMatthew Ahrens */ 4275d7b4d43SMatthew Ahrens static int 4285d7b4d43SMatthew Ahrens decode_embedded_bp(const blkptr_t *bp, void *buf) 4295d7b4d43SMatthew Ahrens { 4305d7b4d43SMatthew Ahrens int comp; 4315d7b4d43SMatthew Ahrens int lsize, psize; 4325d7b4d43SMatthew Ahrens uint8_t *dst = buf; 4335d7b4d43SMatthew Ahrens uint64_t w = 0; 4345d7b4d43SMatthew Ahrens 4355d7b4d43SMatthew Ahrens lsize = BPE_GET_LSIZE(bp); 4365d7b4d43SMatthew Ahrens psize = BPE_GET_PSIZE(bp); 4375d7b4d43SMatthew Ahrens comp = BP_GET_COMPRESS(bp); 4385d7b4d43SMatthew Ahrens 4395d7b4d43SMatthew Ahrens if (comp != ZIO_COMPRESS_OFF) { 4405d7b4d43SMatthew Ahrens uint8_t dstbuf[BPE_PAYLOAD_SIZE]; 4415d7b4d43SMatthew Ahrens 4425d7b4d43SMatthew Ahrens if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS || 4435d7b4d43SMatthew Ahrens decomp_table[comp].decomp_func == NULL) { 4445d7b4d43SMatthew Ahrens grub_printf("compression algorithm not supported\n"); 4455d7b4d43SMatthew Ahrens return (ERR_FSYS_CORRUPT); 4465d7b4d43SMatthew Ahrens } 4475d7b4d43SMatthew Ahrens 4485d7b4d43SMatthew Ahrens decode_embedded_bp_compressed(bp, dstbuf); 4495d7b4d43SMatthew Ahrens decomp_table[comp].decomp_func(dstbuf, buf, psize, lsize); 4505d7b4d43SMatthew Ahrens } else { 4515d7b4d43SMatthew Ahrens decode_embedded_bp_compressed(bp, buf); 4525d7b4d43SMatthew Ahrens } 4535d7b4d43SMatthew Ahrens 4545d7b4d43SMatthew Ahrens return (0); 4555d7b4d43SMatthew Ahrens } 4565d7b4d43SMatthew Ahrens 457cd9c78d9SLin Ling /* 458cd9c78d9SLin Ling * Read in a block of data, verify its checksum, decompress if needed, 459cd9c78d9SLin Ling * and put the uncompressed data in buf. 460cd9c78d9SLin Ling * 461cd9c78d9SLin Ling * Return: 462cd9c78d9SLin Ling * 0 - success 463cd9c78d9SLin Ling * errnum - failure 464cd9c78d9SLin Ling */ 465cd9c78d9SLin Ling static int 466cd9c78d9SLin Ling zio_read(blkptr_t *bp, void *buf, char *stack) 467cd9c78d9SLin Ling { 468cd9c78d9SLin Ling int lsize, psize, comp; 469cd9c78d9SLin Ling char *retbuf; 470cd9c78d9SLin Ling 4715d7b4d43SMatthew Ahrens if (BP_IS_EMBEDDED(bp)) { 4725d7b4d43SMatthew Ahrens if (BPE_GET_ETYPE(bp) != BP_EMBEDDED_TYPE_DATA) { 4735d7b4d43SMatthew Ahrens grub_printf("unsupported embedded BP (type=%u)\n", 4745d7b4d43SMatthew Ahrens (int)BPE_GET_ETYPE(bp)); 4755d7b4d43SMatthew Ahrens return (ERR_FSYS_CORRUPT); 4765d7b4d43SMatthew Ahrens } 4775d7b4d43SMatthew Ahrens return (decode_embedded_bp(bp, buf)); 4785d7b4d43SMatthew Ahrens } 4795d7b4d43SMatthew Ahrens 480cd9c78d9SLin Ling comp = BP_GET_COMPRESS(bp); 481cd9c78d9SLin Ling lsize = BP_GET_LSIZE(bp); 482cd9c78d9SLin Ling psize = BP_GET_PSIZE(bp); 483cd9c78d9SLin Ling 484cd9c78d9SLin Ling if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS || 485cd9c78d9SLin Ling (comp != ZIO_COMPRESS_OFF && 486cd9c78d9SLin Ling decomp_table[comp].decomp_func == NULL)) { 487cd9c78d9SLin Ling grub_printf("compression algorithm not supported\n"); 488cd9c78d9SLin Ling return (ERR_FSYS_CORRUPT); 489cd9c78d9SLin Ling } 490cd9c78d9SLin Ling 491cd9c78d9SLin Ling if ((char *)buf < stack && ((char *)buf) + lsize > stack) { 4925d7b4d43SMatthew Ahrens grub_printf("not enough memory to fit %u bytes on stack\n", 4935d7b4d43SMatthew Ahrens lsize); 494cd9c78d9SLin Ling return (ERR_WONT_FIT); 495cd9c78d9SLin Ling } 496cd9c78d9SLin Ling 497cd9c78d9SLin Ling retbuf = buf; 498cd9c78d9SLin Ling if (comp != ZIO_COMPRESS_OFF) { 499cd9c78d9SLin Ling buf = stack; 500cd9c78d9SLin Ling stack += psize; 501cd9c78d9SLin Ling } 502cd9c78d9SLin Ling 503ad135b5dSChristopher Siden if (zio_read_data(bp, buf, stack) != 0) { 504cd9c78d9SLin Ling grub_printf("zio_read_data failed\n"); 505cd9c78d9SLin Ling return (ERR_FSYS_CORRUPT); 506cd9c78d9SLin Ling } 507cd9c78d9SLin Ling 508a6f561b4SSašo Kiselkov if (comp != ZIO_COMPRESS_OFF) { 509a6f561b4SSašo Kiselkov if (decomp_table[comp].decomp_func(buf, retbuf, psize, 510a6f561b4SSašo Kiselkov lsize) != 0) { 511a6f561b4SSašo Kiselkov grub_printf("zio_read decompression failed\n"); 512a6f561b4SSašo Kiselkov return (ERR_FSYS_CORRUPT); 513a6f561b4SSašo Kiselkov } 514a6f561b4SSašo Kiselkov } 515cd9c78d9SLin Ling 516cd9c78d9SLin Ling return (0); 517b1b8ab34Slling } 518b1b8ab34Slling 519b1b8ab34Slling /* 520b1b8ab34Slling * Get the block from a block id. 521b1b8ab34Slling * push the block onto the stack. 522b1b8ab34Slling * 523b1b8ab34Slling * Return: 524b1b8ab34Slling * 0 - success 525b1b8ab34Slling * errnum - failure 526b1b8ab34Slling */ 527b1b8ab34Slling static int 528b1b8ab34Slling dmu_read(dnode_phys_t *dn, uint64_t blkid, void *buf, char *stack) 529b1b8ab34Slling { 530b1b8ab34Slling int idx, level; 531b1b8ab34Slling blkptr_t *bp_array = dn->dn_blkptr; 532b1b8ab34Slling int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 533b1b8ab34Slling blkptr_t *bp, *tmpbuf; 534b1b8ab34Slling 535b1b8ab34Slling bp = (blkptr_t *)stack; 536b1b8ab34Slling stack += sizeof (blkptr_t); 537b1b8ab34Slling 538b1b8ab34Slling tmpbuf = (blkptr_t *)stack; 539b1b8ab34Slling stack += 1<<dn->dn_indblkshift; 540b1b8ab34Slling 541b1b8ab34Slling for (level = dn->dn_nlevels - 1; level >= 0; level--) { 542b1b8ab34Slling idx = (blkid >> (epbs * level)) & ((1<<epbs)-1); 543b1b8ab34Slling *bp = bp_array[idx]; 544b1b8ab34Slling if (level == 0) 545b1b8ab34Slling tmpbuf = buf; 546ae8180dbSlling if (BP_IS_HOLE(bp)) { 547ae8180dbSlling grub_memset(buf, 0, 548ae8180dbSlling dn->dn_datablkszsec << SPA_MINBLOCKSHIFT); 549ae8180dbSlling break; 550ae8180dbSlling } else if (errnum = zio_read(bp, tmpbuf, stack)) { 551b1b8ab34Slling return (errnum); 552ae8180dbSlling } 553b1b8ab34Slling 554b1b8ab34Slling bp_array = tmpbuf; 555b1b8ab34Slling } 556b1b8ab34Slling 557b1b8ab34Slling return (0); 558b1b8ab34Slling } 559b1b8ab34Slling 560b1b8ab34Slling /* 561b1b8ab34Slling * mzap_lookup: Looks up property described by "name" and returns the value 562b1b8ab34Slling * in "value". 563b1b8ab34Slling * 564b1b8ab34Slling * Return: 565b1b8ab34Slling * 0 - success 566b1b8ab34Slling * errnum - failure 567b1b8ab34Slling */ 568b1b8ab34Slling static int 569ad135b5dSChristopher Siden mzap_lookup(mzap_phys_t *zapobj, int objsize, const char *name, 570b1b8ab34Slling uint64_t *value) 571b1b8ab34Slling { 572b1b8ab34Slling int i, chunks; 573b1b8ab34Slling mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; 574b1b8ab34Slling 575ad135b5dSChristopher Siden chunks = objsize / MZAP_ENT_LEN - 1; 576b1b8ab34Slling for (i = 0; i < chunks; i++) { 577b1b8ab34Slling if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) { 578b1b8ab34Slling *value = mzap_ent[i].mze_value; 579b1b8ab34Slling return (0); 580b1b8ab34Slling } 581b1b8ab34Slling } 582b1b8ab34Slling 583b1b8ab34Slling return (ERR_FSYS_CORRUPT); 584b1b8ab34Slling } 585b1b8ab34Slling 586b1b8ab34Slling static uint64_t 587b1b8ab34Slling zap_hash(uint64_t salt, const char *name) 588b1b8ab34Slling { 589b1b8ab34Slling static uint64_t table[256]; 590b1b8ab34Slling const uint8_t *cp; 591b1b8ab34Slling uint8_t c; 592b1b8ab34Slling uint64_t crc = salt; 593b1b8ab34Slling 594b1b8ab34Slling if (table[128] == 0) { 595b1b8ab34Slling uint64_t *ct; 596b1b8ab34Slling int i, j; 597b1b8ab34Slling for (i = 0; i < 256; i++) { 598b1b8ab34Slling for (ct = table + i, *ct = i, j = 8; j > 0; j--) 599b1b8ab34Slling *ct = (*ct >> 1) ^ (-(*ct & 1) & 600b1b8ab34Slling ZFS_CRC64_POLY); 601b1b8ab34Slling } 602b1b8ab34Slling } 603b1b8ab34Slling 604b1b8ab34Slling if (crc == 0 || table[128] != ZFS_CRC64_POLY) { 605b1b8ab34Slling errnum = ERR_FSYS_CORRUPT; 606b1b8ab34Slling return (0); 607b1b8ab34Slling } 608b1b8ab34Slling 609b1b8ab34Slling for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++) 610b1b8ab34Slling crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF]; 611b1b8ab34Slling 612b1b8ab34Slling /* 613b1b8ab34Slling * Only use 28 bits, since we need 4 bits in the cookie for the 614b1b8ab34Slling * collision differentiator. We MUST use the high bits, since 615ad135b5dSChristopher Siden * those are the ones that we first pay attention to when 616ad135b5dSChristopher Siden * choosing the bucket. 617b1b8ab34Slling */ 618b24ab676SJeff Bonwick crc &= ~((1ULL << (64 - 28)) - 1); 619b1b8ab34Slling 620b1b8ab34Slling return (crc); 621b1b8ab34Slling } 622b1b8ab34Slling 623b1b8ab34Slling /* 624b1b8ab34Slling * Only to be used on 8-bit arrays. 625b1b8ab34Slling * array_len is actual len in bytes (not encoded le_value_length). 626b1b8ab34Slling * buf is null-terminated. 627b1b8ab34Slling */ 628b1b8ab34Slling static int 629b1b8ab34Slling zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk, 630b1b8ab34Slling int array_len, const char *buf) 631b1b8ab34Slling { 632b1b8ab34Slling int bseen = 0; 633b1b8ab34Slling 634b1b8ab34Slling while (bseen < array_len) { 635b1b8ab34Slling struct zap_leaf_array *la = 636b1b8ab34Slling &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array; 637b1b8ab34Slling int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); 638b1b8ab34Slling 639b1b8ab34Slling if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) 640b1b8ab34Slling return (0); 641b1b8ab34Slling 642b1b8ab34Slling if (zfs_bcmp(la->la_array, buf + bseen, toread) != 0) 643b1b8ab34Slling break; 644b1b8ab34Slling chunk = la->la_next; 645b1b8ab34Slling bseen += toread; 646b1b8ab34Slling } 647b1b8ab34Slling return (bseen == array_len); 648b1b8ab34Slling } 649b1b8ab34Slling 650b1b8ab34Slling /* 651b1b8ab34Slling * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the 652b1b8ab34Slling * value for the property "name". 653b1b8ab34Slling * 654b1b8ab34Slling * Return: 655b1b8ab34Slling * 0 - success 656b1b8ab34Slling * errnum - failure 657b1b8ab34Slling */ 658e7cbe64fSgw static int 659b1b8ab34Slling zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h, 660b1b8ab34Slling const char *name, uint64_t *value) 661b1b8ab34Slling { 662b1b8ab34Slling uint16_t chunk; 663b1b8ab34Slling struct zap_leaf_entry *le; 664b1b8ab34Slling 665b1b8ab34Slling /* Verify if this is a valid leaf block */ 666b1b8ab34Slling if (l->l_hdr.lh_block_type != ZBT_LEAF) 667b1b8ab34Slling return (ERR_FSYS_CORRUPT); 668b1b8ab34Slling if (l->l_hdr.lh_magic != ZAP_LEAF_MAGIC) 669b1b8ab34Slling return (ERR_FSYS_CORRUPT); 670b1b8ab34Slling 671b1b8ab34Slling for (chunk = l->l_hash[LEAF_HASH(blksft, h)]; 672b1b8ab34Slling chunk != CHAIN_END; chunk = le->le_next) { 673b1b8ab34Slling 674b1b8ab34Slling if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) 675b1b8ab34Slling return (ERR_FSYS_CORRUPT); 676b1b8ab34Slling 677b1b8ab34Slling le = ZAP_LEAF_ENTRY(l, blksft, chunk); 678b1b8ab34Slling 679b1b8ab34Slling /* Verify the chunk entry */ 680b1b8ab34Slling if (le->le_type != ZAP_CHUNK_ENTRY) 681b1b8ab34Slling return (ERR_FSYS_CORRUPT); 682b1b8ab34Slling 683b1b8ab34Slling if (le->le_hash != h) 684b1b8ab34Slling continue; 685b1b8ab34Slling 686b1b8ab34Slling if (zap_leaf_array_equal(l, blksft, le->le_name_chunk, 687b1b8ab34Slling le->le_name_length, name)) { 688b1b8ab34Slling 689b1b8ab34Slling struct zap_leaf_array *la; 690b1b8ab34Slling uint8_t *ip; 691b1b8ab34Slling 692b1b8ab34Slling if (le->le_int_size != 8 || le->le_value_length != 1) 693e37b211cStaylor return (ERR_FSYS_CORRUPT); 694b1b8ab34Slling 695b1b8ab34Slling /* get the uint64_t property value */ 696b1b8ab34Slling la = &ZAP_LEAF_CHUNK(l, blksft, 697b1b8ab34Slling le->le_value_chunk).l_array; 698b1b8ab34Slling ip = la->la_array; 699b1b8ab34Slling 700b1b8ab34Slling *value = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | 701b1b8ab34Slling (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | 702b1b8ab34Slling (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | 703b1b8ab34Slling (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; 704b1b8ab34Slling 705b1b8ab34Slling return (0); 706b1b8ab34Slling } 707b1b8ab34Slling } 708b1b8ab34Slling 709b1b8ab34Slling return (ERR_FSYS_CORRUPT); 710b1b8ab34Slling } 711b1b8ab34Slling 712b1b8ab34Slling /* 713b1b8ab34Slling * Fat ZAP lookup 714b1b8ab34Slling * 715b1b8ab34Slling * Return: 716b1b8ab34Slling * 0 - success 717b1b8ab34Slling * errnum - failure 718b1b8ab34Slling */ 719e7cbe64fSgw static int 720b1b8ab34Slling fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap, 721ad135b5dSChristopher Siden const char *name, uint64_t *value, char *stack) 722b1b8ab34Slling { 723b1b8ab34Slling zap_leaf_phys_t *l; 724b1b8ab34Slling uint64_t hash, idx, blkid; 725b1b8ab34Slling int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT); 726b1b8ab34Slling 727b1b8ab34Slling /* Verify if this is a fat zap header block */ 728b24ab676SJeff Bonwick if (zap->zap_magic != (uint64_t)ZAP_MAGIC || 729b24ab676SJeff Bonwick zap->zap_flags != 0) 730b1b8ab34Slling return (ERR_FSYS_CORRUPT); 731b1b8ab34Slling 732b1b8ab34Slling hash = zap_hash(zap->zap_salt, name); 733b1b8ab34Slling if (errnum) 734b1b8ab34Slling return (errnum); 735b1b8ab34Slling 736b1b8ab34Slling /* get block id from index */ 737b1b8ab34Slling if (zap->zap_ptrtbl.zt_numblks != 0) { 738b1b8ab34Slling /* external pointer tables not supported */ 739b1b8ab34Slling return (ERR_FSYS_CORRUPT); 740b1b8ab34Slling } 741b1b8ab34Slling idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift); 742b1b8ab34Slling blkid = ((uint64_t *)zap)[idx + (1<<(blksft-3-1))]; 743b1b8ab34Slling 744b1b8ab34Slling /* Get the leaf block */ 745b1b8ab34Slling l = (zap_leaf_phys_t *)stack; 746b1b8ab34Slling stack += 1<<blksft; 747051aabe6Staylor if ((1<<blksft) < sizeof (zap_leaf_phys_t)) 748e37b211cStaylor return (ERR_FSYS_CORRUPT); 749b1b8ab34Slling if (errnum = dmu_read(zap_dnode, blkid, l, stack)) 750b1b8ab34Slling return (errnum); 751b1b8ab34Slling 752b1b8ab34Slling return (zap_leaf_lookup(l, blksft, hash, name, value)); 753b1b8ab34Slling } 754b1b8ab34Slling 755b1b8ab34Slling /* 756b1b8ab34Slling * Read in the data of a zap object and find the value for a matching 757b1b8ab34Slling * property name. 758b1b8ab34Slling * 759b1b8ab34Slling * Return: 760b1b8ab34Slling * 0 - success 761b1b8ab34Slling * errnum - failure 762b1b8ab34Slling */ 763b1b8ab34Slling static int 764ad135b5dSChristopher Siden zap_lookup(dnode_phys_t *zap_dnode, const char *name, uint64_t *val, 765ad135b5dSChristopher Siden char *stack) 766b1b8ab34Slling { 767b1b8ab34Slling uint64_t block_type; 768b1b8ab34Slling int size; 769b1b8ab34Slling void *zapbuf; 770b1b8ab34Slling 771b1b8ab34Slling /* Read in the first block of the zap object data. */ 772b1b8ab34Slling zapbuf = stack; 773b1b8ab34Slling size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; 774b1b8ab34Slling stack += size; 7750a586ceaSMark Shellenbaum 776ad135b5dSChristopher Siden if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack)) != 0) 777b1b8ab34Slling return (errnum); 778b1b8ab34Slling 779b1b8ab34Slling block_type = *((uint64_t *)zapbuf); 780b1b8ab34Slling 781b1b8ab34Slling if (block_type == ZBT_MICRO) { 782b1b8ab34Slling return (mzap_lookup(zapbuf, size, name, val)); 783b1b8ab34Slling } else if (block_type == ZBT_HEADER) { 784b1b8ab34Slling /* this is a fat zap */ 785b1b8ab34Slling return (fzap_lookup(zap_dnode, zapbuf, name, 786b1b8ab34Slling val, stack)); 787b1b8ab34Slling } 788b1b8ab34Slling 789b1b8ab34Slling return (ERR_FSYS_CORRUPT); 790b1b8ab34Slling } 791b1b8ab34Slling 792ad135b5dSChristopher Siden typedef struct zap_attribute { 793ad135b5dSChristopher Siden int za_integer_length; 794ad135b5dSChristopher Siden uint64_t za_num_integers; 795ad135b5dSChristopher Siden uint64_t za_first_integer; 796ad135b5dSChristopher Siden char *za_name; 797ad135b5dSChristopher Siden } zap_attribute_t; 798ad135b5dSChristopher Siden 799ad135b5dSChristopher Siden typedef int (zap_cb_t)(zap_attribute_t *za, void *arg, char *stack); 800ad135b5dSChristopher Siden 801ad135b5dSChristopher Siden static int 802ad135b5dSChristopher Siden zap_iterate(dnode_phys_t *zap_dnode, zap_cb_t *cb, void *arg, char *stack) 803ad135b5dSChristopher Siden { 804ad135b5dSChristopher Siden uint32_t size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; 805ad135b5dSChristopher Siden zap_attribute_t za; 806ad135b5dSChristopher Siden int i; 807ad135b5dSChristopher Siden mzap_phys_t *mzp = (mzap_phys_t *)stack; 808ad135b5dSChristopher Siden stack += size; 809ad135b5dSChristopher Siden 810ad135b5dSChristopher Siden if ((errnum = dmu_read(zap_dnode, 0, mzp, stack)) != 0) 811ad135b5dSChristopher Siden return (errnum); 812ad135b5dSChristopher Siden 813ad135b5dSChristopher Siden /* 814ad135b5dSChristopher Siden * Iteration over fatzap objects has not yet been implemented. 815ad135b5dSChristopher Siden * If we encounter a pool in which there are more features for 816ad135b5dSChristopher Siden * read than can fit inside a microzap (i.e., more than 2048 817ad135b5dSChristopher Siden * features for read), we can add support for fatzap iteration. 818ad135b5dSChristopher Siden * For now, fail. 819ad135b5dSChristopher Siden */ 820ad135b5dSChristopher Siden if (mzp->mz_block_type != ZBT_MICRO) { 821ad135b5dSChristopher Siden grub_printf("feature information stored in fatzap, pool " 822ad135b5dSChristopher Siden "version not supported\n"); 823ad135b5dSChristopher Siden return (1); 824ad135b5dSChristopher Siden } 825ad135b5dSChristopher Siden 826ad135b5dSChristopher Siden za.za_integer_length = 8; 827ad135b5dSChristopher Siden za.za_num_integers = 1; 828ad135b5dSChristopher Siden for (i = 0; i < size / MZAP_ENT_LEN - 1; i++) { 829ad135b5dSChristopher Siden mzap_ent_phys_t *mzep = &mzp->mz_chunk[i]; 830ad135b5dSChristopher Siden int err; 831ad135b5dSChristopher Siden 832ad135b5dSChristopher Siden za.za_first_integer = mzep->mze_value; 833ad135b5dSChristopher Siden za.za_name = mzep->mze_name; 834ad135b5dSChristopher Siden err = cb(&za, arg, stack); 835ad135b5dSChristopher Siden if (err != 0) 836ad135b5dSChristopher Siden return (err); 837ad135b5dSChristopher Siden } 838ad135b5dSChristopher Siden 839ad135b5dSChristopher Siden return (0); 840ad135b5dSChristopher Siden } 841ad135b5dSChristopher Siden 842b1b8ab34Slling /* 843b1b8ab34Slling * Get the dnode of an object number from the metadnode of an object set. 844b1b8ab34Slling * 845b1b8ab34Slling * Input 846b1b8ab34Slling * mdn - metadnode to get the object dnode 847b1b8ab34Slling * objnum - object number for the object dnode 8485d7b4d43SMatthew Ahrens * type - if nonzero, object must be of this type 849b1b8ab34Slling * buf - data buffer that holds the returning dnode 850b1b8ab34Slling * stack - scratch area 851b1b8ab34Slling * 852b1b8ab34Slling * Return: 853b1b8ab34Slling * 0 - success 854b1b8ab34Slling * errnum - failure 855b1b8ab34Slling */ 856b1b8ab34Slling static int 857b1b8ab34Slling dnode_get(dnode_phys_t *mdn, uint64_t objnum, uint8_t type, dnode_phys_t *buf, 858b1b8ab34Slling char *stack) 859b1b8ab34Slling { 860b1b8ab34Slling uint64_t blkid, blksz; /* the block id this object dnode is in */ 861b1b8ab34Slling int epbs; /* shift of number of dnodes in a block */ 862b1b8ab34Slling int idx; /* index within a block */ 863b1b8ab34Slling dnode_phys_t *dnbuf; 864b1b8ab34Slling 865b1b8ab34Slling blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT; 866b1b8ab34Slling epbs = zfs_log2(blksz) - DNODE_SHIFT; 867b1b8ab34Slling blkid = objnum >> epbs; 868b1b8ab34Slling idx = objnum & ((1<<epbs)-1); 869b1b8ab34Slling 870b1b8ab34Slling if (dnode_buf != NULL && dnode_mdn == mdn && 871b1b8ab34Slling objnum >= dnode_start && objnum < dnode_end) { 872b1b8ab34Slling grub_memmove(buf, &dnode_buf[idx], DNODE_SIZE); 873b1b8ab34Slling VERIFY_DN_TYPE(buf, type); 874b1b8ab34Slling return (0); 875b1b8ab34Slling } 876b1b8ab34Slling 877b1b8ab34Slling if (dnode_buf && blksz == 1<<DNODE_BLOCK_SHIFT) { 878b1b8ab34Slling dnbuf = dnode_buf; 879b1b8ab34Slling dnode_mdn = mdn; 880b1b8ab34Slling dnode_start = blkid << epbs; 881b1b8ab34Slling dnode_end = (blkid + 1) << epbs; 882b1b8ab34Slling } else { 883b1b8ab34Slling dnbuf = (dnode_phys_t *)stack; 884b1b8ab34Slling stack += blksz; 885b1b8ab34Slling } 886b1b8ab34Slling 887b1b8ab34Slling if (errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack)) 888b1b8ab34Slling return (errnum); 889b1b8ab34Slling 890b1b8ab34Slling grub_memmove(buf, &dnbuf[idx], DNODE_SIZE); 891b1b8ab34Slling VERIFY_DN_TYPE(buf, type); 892b1b8ab34Slling 893b1b8ab34Slling return (0); 894b1b8ab34Slling } 895b1b8ab34Slling 896b1b8ab34Slling /* 897eb2bd662Svikram * Check if this is a special file that resides at the top 898eb2bd662Svikram * dataset of the pool. Currently this is the GRUB menu, 899eb2bd662Svikram * boot signature and boot signature backup. 900b1b8ab34Slling * str starts with '/'. 901b1b8ab34Slling */ 902b1b8ab34Slling static int 903eb2bd662Svikram is_top_dataset_file(char *str) 904b1b8ab34Slling { 905b1b8ab34Slling char *tptr; 906b1b8ab34Slling 907b1b8ab34Slling if ((tptr = grub_strstr(str, "menu.lst")) && 908b1b8ab34Slling (tptr[8] == '\0' || tptr[8] == ' ') && 909b1b8ab34Slling *(tptr-1) == '/') 910b1b8ab34Slling return (1); 911b1b8ab34Slling 912eb2bd662Svikram if (grub_strncmp(str, BOOTSIGN_DIR"/", 9131183b401Svikram grub_strlen(BOOTSIGN_DIR) + 1) == 0) 914eb2bd662Svikram return (1); 915eb2bd662Svikram 916eb2bd662Svikram if (grub_strcmp(str, BOOTSIGN_BACKUP) == 0) 917eb2bd662Svikram return (1); 918eb2bd662Svikram 919b1b8ab34Slling return (0); 920b1b8ab34Slling } 921b1b8ab34Slling 922ad135b5dSChristopher Siden static int 923ad135b5dSChristopher Siden check_feature(zap_attribute_t *za, void *arg, char *stack) 924ad135b5dSChristopher Siden { 925ad135b5dSChristopher Siden const char **names = arg; 926ad135b5dSChristopher Siden int i; 927ad135b5dSChristopher Siden 928ad135b5dSChristopher Siden if (za->za_first_integer == 0) 929ad135b5dSChristopher Siden return (0); 930ad135b5dSChristopher Siden 931ad135b5dSChristopher Siden for (i = 0; names[i] != NULL; i++) { 932ad135b5dSChristopher Siden if (grub_strcmp(za->za_name, names[i]) == 0) { 933ad135b5dSChristopher Siden return (0); 934ad135b5dSChristopher Siden } 935ad135b5dSChristopher Siden } 936ad135b5dSChristopher Siden grub_printf("missing feature for read '%s'\n", za->za_name); 937ad135b5dSChristopher Siden return (ERR_NEWER_VERSION); 938ad135b5dSChristopher Siden } 939ad135b5dSChristopher Siden 940b1b8ab34Slling /* 941b1b8ab34Slling * Get the file dnode for a given file name where mdn is the meta dnode 942b1b8ab34Slling * for this ZFS object set. When found, place the file dnode in dn. 943b1b8ab34Slling * The 'path' argument will be mangled. 944b1b8ab34Slling * 945b1b8ab34Slling * Return: 946b1b8ab34Slling * 0 - success 947b1b8ab34Slling * errnum - failure 948b1b8ab34Slling */ 949b1b8ab34Slling static int 950b1b8ab34Slling dnode_get_path(dnode_phys_t *mdn, char *path, dnode_phys_t *dn, 951b1b8ab34Slling char *stack) 952b1b8ab34Slling { 953e7437265Sahrens uint64_t objnum, version; 954b1b8ab34Slling char *cname, ch; 955b1b8ab34Slling 956b1b8ab34Slling if (errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, 957b1b8ab34Slling dn, stack)) 958b1b8ab34Slling return (errnum); 959b1b8ab34Slling 960e7437265Sahrens if (errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack)) 961e7437265Sahrens return (errnum); 962e7437265Sahrens if (version > ZPL_VERSION) 963e7437265Sahrens return (-1); 964e7437265Sahrens 965b1b8ab34Slling if (errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack)) 966b1b8ab34Slling return (errnum); 967b1b8ab34Slling 968b1b8ab34Slling if (errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, 969b1b8ab34Slling dn, stack)) 970b1b8ab34Slling return (errnum); 971b1b8ab34Slling 972b1b8ab34Slling /* skip leading slashes */ 973b1b8ab34Slling while (*path == '/') 974b1b8ab34Slling path++; 975b1b8ab34Slling 976ad135b5dSChristopher Siden while (*path && !grub_isspace(*path)) { 977b1b8ab34Slling 978b1b8ab34Slling /* get the next component name */ 979b1b8ab34Slling cname = path; 980ad135b5dSChristopher Siden while (*path && !grub_isspace(*path) && *path != '/') 981b1b8ab34Slling path++; 982b1b8ab34Slling ch = *path; 983b1b8ab34Slling *path = 0; /* ensure null termination */ 984b1b8ab34Slling 985b1b8ab34Slling if (errnum = zap_lookup(dn, cname, &objnum, stack)) 986b1b8ab34Slling return (errnum); 987b1b8ab34Slling 988e7437265Sahrens objnum = ZFS_DIRENT_OBJ(objnum); 989b1b8ab34Slling if (errnum = dnode_get(mdn, objnum, 0, dn, stack)) 990b1b8ab34Slling return (errnum); 991b1b8ab34Slling 992b1b8ab34Slling *path = ch; 993b1b8ab34Slling while (*path == '/') 994b1b8ab34Slling path++; 995b1b8ab34Slling } 996b1b8ab34Slling 997b1b8ab34Slling /* We found the dnode for this file. Verify if it is a plain file. */ 998b1b8ab34Slling VERIFY_DN_TYPE(dn, DMU_OT_PLAIN_FILE_CONTENTS); 999b1b8ab34Slling 1000b1b8ab34Slling return (0); 1001b1b8ab34Slling } 1002b1b8ab34Slling 1003b1b8ab34Slling /* 1004b1b8ab34Slling * Get the default 'bootfs' property value from the rootpool. 1005b1b8ab34Slling * 1006b1b8ab34Slling * Return: 1007b1b8ab34Slling * 0 - success 1008b1b8ab34Slling * errnum -failure 1009b1b8ab34Slling */ 1010b1b8ab34Slling static int 1011b1b8ab34Slling get_default_bootfsobj(dnode_phys_t *mosmdn, uint64_t *obj, char *stack) 1012b1b8ab34Slling { 1013b1b8ab34Slling uint64_t objnum = 0; 1014b1b8ab34Slling dnode_phys_t *dn = (dnode_phys_t *)stack; 1015b1b8ab34Slling stack += DNODE_SIZE; 1016b1b8ab34Slling 1017ae8180dbSlling if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, 1018b1b8ab34Slling DMU_OT_OBJECT_DIRECTORY, dn, stack)) 1019ae8180dbSlling return (errnum); 1020b1b8ab34Slling 1021b1b8ab34Slling /* 1022b1b8ab34Slling * find the object number for 'pool_props', and get the dnode 1023b1b8ab34Slling * of the 'pool_props'. 1024b1b8ab34Slling */ 1025b1b8ab34Slling if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack)) 1026b1b8ab34Slling return (ERR_FILESYSTEM_NOT_FOUND); 1027b1b8ab34Slling 1028ae8180dbSlling if (errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack)) 1029ae8180dbSlling return (errnum); 1030b1b8ab34Slling 1031b1b8ab34Slling if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack)) 1032b1b8ab34Slling return (ERR_FILESYSTEM_NOT_FOUND); 1033b1b8ab34Slling 1034b1b8ab34Slling if (!objnum) 1035b1b8ab34Slling return (ERR_FILESYSTEM_NOT_FOUND); 1036b1b8ab34Slling 1037b1b8ab34Slling *obj = objnum; 1038b1b8ab34Slling return (0); 1039b1b8ab34Slling } 1040b1b8ab34Slling 1041ad135b5dSChristopher Siden /* 1042ad135b5dSChristopher Siden * List of pool features that the grub implementation of ZFS supports for 1043ad135b5dSChristopher Siden * read. Note that features that are only required for write do not need 1044ad135b5dSChristopher Siden * to be listed here since grub opens pools in read-only mode. 104533915f34SRichard Lowe * 104633915f34SRichard Lowe * When this list is updated the version number in usr/src/grub/capability 104733915f34SRichard Lowe * must be incremented to ensure the new grub gets installed. 1048ad135b5dSChristopher Siden */ 1049ad135b5dSChristopher Siden static const char *spa_feature_names[] = { 1050a6f561b4SSašo Kiselkov "org.illumos:lz4_compress", 105143466aaeSMax Grossman "com.delphix:hole_birth", 10522acef22dSMatthew Ahrens "com.delphix:extensible_dataset", 10535d7b4d43SMatthew Ahrens "com.delphix:embedded_data", 1054b5152584SMatthew Ahrens "org.open-zfs:large_blocks", 105545818ee1SMatthew Ahrens "org.illumos:sha512", 1056ad135b5dSChristopher Siden NULL 1057ad135b5dSChristopher Siden }; 1058ad135b5dSChristopher Siden 1059ad135b5dSChristopher Siden /* 1060ad135b5dSChristopher Siden * Checks whether the MOS features that are active are supported by this 1061ad135b5dSChristopher Siden * (GRUB's) implementation of ZFS. 1062ad135b5dSChristopher Siden * 1063ad135b5dSChristopher Siden * Return: 1064ad135b5dSChristopher Siden * 0: Success. 1065ad135b5dSChristopher Siden * errnum: Failure. 1066ad135b5dSChristopher Siden */ 1067ad135b5dSChristopher Siden static int 1068ad135b5dSChristopher Siden check_mos_features(dnode_phys_t *mosmdn, char *stack) 1069ad135b5dSChristopher Siden { 1070ad135b5dSChristopher Siden uint64_t objnum; 1071ad135b5dSChristopher Siden dnode_phys_t *dn; 1072ad135b5dSChristopher Siden uint8_t error = 0; 1073ad135b5dSChristopher Siden 1074ad135b5dSChristopher Siden dn = (dnode_phys_t *)stack; 1075ad135b5dSChristopher Siden stack += DNODE_SIZE; 1076ad135b5dSChristopher Siden 1077ad135b5dSChristopher Siden if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, 1078ad135b5dSChristopher Siden DMU_OT_OBJECT_DIRECTORY, dn, stack)) != 0) 1079ad135b5dSChristopher Siden return (errnum); 1080ad135b5dSChristopher Siden 1081ad135b5dSChristopher Siden /* 1082ad135b5dSChristopher Siden * Find the object number for 'features_for_read' and retrieve its 1083ad135b5dSChristopher Siden * corresponding dnode. Note that we don't check features_for_write 1084ad135b5dSChristopher Siden * because GRUB is not opening the pool for write. 1085ad135b5dSChristopher Siden */ 1086ad135b5dSChristopher Siden if ((errnum = zap_lookup(dn, DMU_POOL_FEATURES_FOR_READ, &objnum, 1087ad135b5dSChristopher Siden stack)) != 0) 1088ad135b5dSChristopher Siden return (errnum); 1089ad135b5dSChristopher Siden 1090ad135b5dSChristopher Siden if ((errnum = dnode_get(mosmdn, objnum, DMU_OTN_ZAP_METADATA, 1091ad135b5dSChristopher Siden dn, stack)) != 0) 1092ad135b5dSChristopher Siden return (errnum); 1093ad135b5dSChristopher Siden 1094ad135b5dSChristopher Siden return (zap_iterate(dn, check_feature, spa_feature_names, stack)); 1095ad135b5dSChristopher Siden } 1096ad135b5dSChristopher Siden 1097b1b8ab34Slling /* 1098b1b8ab34Slling * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname), 1099b1b8ab34Slling * e.g. pool/rootfs, or a given object number (obj), e.g. the object number 1100b1b8ab34Slling * of pool/rootfs. 1101b1b8ab34Slling * 1102b1b8ab34Slling * If no fsname and no obj are given, return the DSL_DIR metadnode. 1103b1b8ab34Slling * If fsname is given, return its metadnode and its matching object number. 1104b1b8ab34Slling * If only obj is given, return the metadnode for this object number. 1105b1b8ab34Slling * 1106b1b8ab34Slling * Return: 1107b1b8ab34Slling * 0 - success 1108b1b8ab34Slling * errnum - failure 1109b1b8ab34Slling */ 1110b1b8ab34Slling static int 1111b1b8ab34Slling get_objset_mdn(dnode_phys_t *mosmdn, char *fsname, uint64_t *obj, 1112b1b8ab34Slling dnode_phys_t *mdn, char *stack) 1113b1b8ab34Slling { 1114b1b8ab34Slling uint64_t objnum, headobj; 1115b1b8ab34Slling char *cname, ch; 1116b1b8ab34Slling blkptr_t *bp; 1117b1b8ab34Slling objset_phys_t *osp; 1118fe3e2633SEric Taylor int issnapshot = 0; 1119fe3e2633SEric Taylor char *snapname; 1120b1b8ab34Slling 1121b1b8ab34Slling if (fsname == NULL && obj) { 1122b1b8ab34Slling headobj = *obj; 1123b1b8ab34Slling goto skip; 1124b1b8ab34Slling } 1125b1b8ab34Slling 1126b1b8ab34Slling if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, 1127b1b8ab34Slling DMU_OT_OBJECT_DIRECTORY, mdn, stack)) 1128b1b8ab34Slling return (errnum); 1129b1b8ab34Slling 1130b1b8ab34Slling if (errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, 1131b1b8ab34Slling stack)) 1132b1b8ab34Slling return (errnum); 1133b1b8ab34Slling 11342acef22dSMatthew Ahrens if (errnum = dnode_get(mosmdn, objnum, 0, mdn, stack)) 1135b1b8ab34Slling return (errnum); 1136b1b8ab34Slling 1137b1b8ab34Slling if (fsname == NULL) { 1138b1b8ab34Slling headobj = 1139b1b8ab34Slling ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; 1140b1b8ab34Slling goto skip; 1141b1b8ab34Slling } 1142b1b8ab34Slling 1143b1b8ab34Slling /* take out the pool name */ 1144ad135b5dSChristopher Siden while (*fsname && !grub_isspace(*fsname) && *fsname != '/') 1145b1b8ab34Slling fsname++; 1146b1b8ab34Slling 1147ad135b5dSChristopher Siden while (*fsname && !grub_isspace(*fsname)) { 1148b1b8ab34Slling uint64_t childobj; 1149b1b8ab34Slling 1150b1b8ab34Slling while (*fsname == '/') 1151b1b8ab34Slling fsname++; 1152b1b8ab34Slling 1153b1b8ab34Slling cname = fsname; 1154ad135b5dSChristopher Siden while (*fsname && !grub_isspace(*fsname) && *fsname != '/') 1155b1b8ab34Slling fsname++; 1156b1b8ab34Slling ch = *fsname; 1157b1b8ab34Slling *fsname = 0; 1158b1b8ab34Slling 1159fe3e2633SEric Taylor snapname = cname; 1160ad135b5dSChristopher Siden while (*snapname && !grub_isspace(*snapname) && *snapname != 1161ad135b5dSChristopher Siden '@') 1162fe3e2633SEric Taylor snapname++; 1163fe3e2633SEric Taylor if (*snapname == '@') { 1164fe3e2633SEric Taylor issnapshot = 1; 1165fe3e2633SEric Taylor *snapname = 0; 1166fe3e2633SEric Taylor } 1167b1b8ab34Slling childobj = 1168b1b8ab34Slling ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj; 1169b1b8ab34Slling if (errnum = dnode_get(mosmdn, childobj, 1170b1b8ab34Slling DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack)) 1171b1b8ab34Slling return (errnum); 1172b1b8ab34Slling 1173ae8180dbSlling if (zap_lookup(mdn, cname, &objnum, stack)) 1174ae8180dbSlling return (ERR_FILESYSTEM_NOT_FOUND); 1175b1b8ab34Slling 11762acef22dSMatthew Ahrens if (errnum = dnode_get(mosmdn, objnum, 0, 1177b1b8ab34Slling mdn, stack)) 1178b1b8ab34Slling return (errnum); 1179b1b8ab34Slling 1180b1b8ab34Slling *fsname = ch; 1181fe3e2633SEric Taylor if (issnapshot) 1182fe3e2633SEric Taylor *snapname = '@'; 1183b1b8ab34Slling } 1184b1b8ab34Slling headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; 1185b1b8ab34Slling if (obj) 1186b1b8ab34Slling *obj = headobj; 1187b1b8ab34Slling 1188b1b8ab34Slling skip: 11892acef22dSMatthew Ahrens if (errnum = dnode_get(mosmdn, headobj, 0, mdn, stack)) 1190b1b8ab34Slling return (errnum); 1191fe3e2633SEric Taylor if (issnapshot) { 1192fe3e2633SEric Taylor uint64_t snapobj; 1193fe3e2633SEric Taylor 1194fe3e2633SEric Taylor snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))-> 1195fe3e2633SEric Taylor ds_snapnames_zapobj; 1196b1b8ab34Slling 1197fe3e2633SEric Taylor if (errnum = dnode_get(mosmdn, snapobj, 1198fe3e2633SEric Taylor DMU_OT_DSL_DS_SNAP_MAP, mdn, stack)) 1199fe3e2633SEric Taylor return (errnum); 1200fe3e2633SEric Taylor if (zap_lookup(mdn, snapname + 1, &headobj, stack)) 1201fe3e2633SEric Taylor return (ERR_FILESYSTEM_NOT_FOUND); 12022acef22dSMatthew Ahrens if (errnum = dnode_get(mosmdn, headobj, 0, mdn, stack)) 1203fe3e2633SEric Taylor return (errnum); 1204fe3e2633SEric Taylor if (obj) 1205fe3e2633SEric Taylor *obj = headobj; 1206fe3e2633SEric Taylor } 1207b1b8ab34Slling 1208b1b8ab34Slling bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp; 1209b1b8ab34Slling osp = (objset_phys_t *)stack; 1210b1b8ab34Slling stack += sizeof (objset_phys_t); 1211b1b8ab34Slling if (errnum = zio_read(bp, osp, stack)) 1212b1b8ab34Slling return (errnum); 1213b1b8ab34Slling 1214b1b8ab34Slling grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE); 1215b1b8ab34Slling 1216b1b8ab34Slling return (0); 1217b1b8ab34Slling } 1218b1b8ab34Slling 1219b1b8ab34Slling /* 1220e7cbe64fSgw * For a given XDR packed nvlist, verify the first 4 bytes and move on. 1221b1b8ab34Slling * 1222e7cbe64fSgw * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) : 1223b1b8ab34Slling * 1224b1b8ab34Slling * encoding method/host endian (4 bytes) 1225b1b8ab34Slling * nvl_version (4 bytes) 1226b1b8ab34Slling * nvl_nvflag (4 bytes) 1227b1b8ab34Slling * encoded nvpairs: 1228b1b8ab34Slling * encoded size of the nvpair (4 bytes) 1229b1b8ab34Slling * decoded size of the nvpair (4 bytes) 1230b1b8ab34Slling * name string size (4 bytes) 1231b1b8ab34Slling * name string data (sizeof(NV_ALIGN4(string)) 1232b1b8ab34Slling * data type (4 bytes) 1233b1b8ab34Slling * # of elements in the nvpair (4 bytes) 1234b1b8ab34Slling * data 1235b1b8ab34Slling * 2 zero's for the last nvpair 1236b1b8ab34Slling * (end of the entire list) (8 bytes) 1237b1b8ab34Slling * 1238b1b8ab34Slling * Return: 1239b1b8ab34Slling * 0 - success 1240b1b8ab34Slling * 1 - failure 1241b1b8ab34Slling */ 1242e7cbe64fSgw static int 1243e7cbe64fSgw nvlist_unpack(char *nvlist, char **out) 1244b1b8ab34Slling { 1245b1b8ab34Slling /* Verify if the 1st and 2nd byte in the nvlist are valid. */ 1246b1b8ab34Slling if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN) 1247b1b8ab34Slling return (1); 1248b1b8ab34Slling 1249ad135b5dSChristopher Siden *out = nvlist + 4; 1250e7cbe64fSgw return (0); 1251e7cbe64fSgw } 1252e7cbe64fSgw 1253e7cbe64fSgw static char * 1254e7cbe64fSgw nvlist_array(char *nvlist, int index) 1255e7cbe64fSgw { 1256e7cbe64fSgw int i, encode_size; 1257e7cbe64fSgw 1258e7cbe64fSgw for (i = 0; i < index; i++) { 1259e7cbe64fSgw /* skip the header, nvl_version, and nvl_nvflag */ 1260e7cbe64fSgw nvlist = nvlist + 4 * 2; 1261e7cbe64fSgw 1262e7cbe64fSgw while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) 1263e7cbe64fSgw nvlist += encode_size; /* goto the next nvpair */ 1264e7cbe64fSgw 1265e7cbe64fSgw nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */ 1266e7cbe64fSgw } 1267e7cbe64fSgw 1268e7cbe64fSgw return (nvlist); 1269e7cbe64fSgw } 1270e7cbe64fSgw 1271ad135b5dSChristopher Siden /* 1272ad135b5dSChristopher Siden * The nvlist_next_nvpair() function returns a handle to the next nvpair in the 1273ad135b5dSChristopher Siden * list following nvpair. If nvpair is NULL, the first pair is returned. If 1274ad135b5dSChristopher Siden * nvpair is the last pair in the nvlist, NULL is returned. 1275ad135b5dSChristopher Siden */ 1276ad135b5dSChristopher Siden static char * 1277ad135b5dSChristopher Siden nvlist_next_nvpair(char *nvl, char *nvpair) 1278ad135b5dSChristopher Siden { 1279ad135b5dSChristopher Siden char *cur, *prev; 1280ad135b5dSChristopher Siden int encode_size; 1281ad135b5dSChristopher Siden 1282ad135b5dSChristopher Siden if (nvl == NULL) 1283ad135b5dSChristopher Siden return (NULL); 1284ad135b5dSChristopher Siden 1285ad135b5dSChristopher Siden if (nvpair == NULL) { 1286ad135b5dSChristopher Siden /* skip over nvl_version and nvl_nvflag */ 1287ad135b5dSChristopher Siden nvpair = nvl + 4 * 2; 1288ad135b5dSChristopher Siden } else { 1289ad135b5dSChristopher Siden /* skip to the next nvpair */ 1290ad135b5dSChristopher Siden encode_size = BSWAP_32(*(uint32_t *)nvpair); 1291ad135b5dSChristopher Siden nvpair += encode_size; 1292ad135b5dSChristopher Siden } 1293ad135b5dSChristopher Siden 1294ad135b5dSChristopher Siden /* 8 bytes of 0 marks the end of the list */ 1295ad135b5dSChristopher Siden if (*(uint64_t *)nvpair == 0) 1296ad135b5dSChristopher Siden return (NULL); 1297ad135b5dSChristopher Siden 1298ad135b5dSChristopher Siden return (nvpair); 1299ad135b5dSChristopher Siden } 1300ad135b5dSChristopher Siden 1301ad135b5dSChristopher Siden /* 1302ad135b5dSChristopher Siden * This function returns 0 on success and 1 on failure. On success, a string 1303ad135b5dSChristopher Siden * containing the name of nvpair is saved in buf. 1304ad135b5dSChristopher Siden */ 1305e7cbe64fSgw static int 1306ad135b5dSChristopher Siden nvpair_name(char *nvp, char *buf, int buflen) 1307ad135b5dSChristopher Siden { 1308ad135b5dSChristopher Siden int len; 1309ad135b5dSChristopher Siden 1310ad135b5dSChristopher Siden /* skip over encode/decode size */ 1311ad135b5dSChristopher Siden nvp += 4 * 2; 1312ad135b5dSChristopher Siden 1313ad135b5dSChristopher Siden len = BSWAP_32(*(uint32_t *)nvp); 1314ad135b5dSChristopher Siden if (buflen < len + 1) 1315ad135b5dSChristopher Siden return (1); 1316ad135b5dSChristopher Siden 1317ad135b5dSChristopher Siden grub_memmove(buf, nvp + 4, len); 1318ad135b5dSChristopher Siden buf[len] = '\0'; 1319ad135b5dSChristopher Siden 1320ad135b5dSChristopher Siden return (0); 1321ad135b5dSChristopher Siden } 1322ad135b5dSChristopher Siden 1323ad135b5dSChristopher Siden /* 1324ad135b5dSChristopher Siden * This function retrieves the value of the nvpair in the form of enumerated 1325ad135b5dSChristopher Siden * type data_type_t. This is used to determine the appropriate type to pass to 1326ad135b5dSChristopher Siden * nvpair_value(). 1327ad135b5dSChristopher Siden */ 1328ad135b5dSChristopher Siden static int 1329ad135b5dSChristopher Siden nvpair_type(char *nvp) 1330e7cbe64fSgw { 1331ad135b5dSChristopher Siden int name_len, type; 1332ad135b5dSChristopher Siden 1333ad135b5dSChristopher Siden /* skip over encode/decode size */ 1334ad135b5dSChristopher Siden nvp += 4 * 2; 1335ad135b5dSChristopher Siden 1336ad135b5dSChristopher Siden /* skip over name_len */ 1337ad135b5dSChristopher Siden name_len = BSWAP_32(*(uint32_t *)nvp); 1338ad135b5dSChristopher Siden nvp += 4; 1339ad135b5dSChristopher Siden 1340ad135b5dSChristopher Siden /* skip over name */ 1341ad135b5dSChristopher Siden nvp = nvp + ((name_len + 3) & ~3); /* align */ 1342ad135b5dSChristopher Siden 1343ad135b5dSChristopher Siden type = BSWAP_32(*(uint32_t *)nvp); 1344ad135b5dSChristopher Siden 1345ad135b5dSChristopher Siden return (type); 1346ad135b5dSChristopher Siden } 1347ad135b5dSChristopher Siden 1348ad135b5dSChristopher Siden static int 1349ad135b5dSChristopher Siden nvpair_value(char *nvp, void *val, int valtype, int *nelmp) 1350ad135b5dSChristopher Siden { 1351ad135b5dSChristopher Siden int name_len, type, slen; 1352ad135b5dSChristopher Siden char *strval = val; 1353e7cbe64fSgw uint64_t *intval = val; 1354e7cbe64fSgw 1355ad135b5dSChristopher Siden /* skip over encode/decode size */ 1356ad135b5dSChristopher Siden nvp += 4 * 2; 1357b1b8ab34Slling 1358ad135b5dSChristopher Siden /* skip over name_len */ 1359ad135b5dSChristopher Siden name_len = BSWAP_32(*(uint32_t *)nvp); 1360ad135b5dSChristopher Siden nvp += 4; 1361b1b8ab34Slling 1362ad135b5dSChristopher Siden /* skip over name */ 1363ad135b5dSChristopher Siden nvp = nvp + ((name_len + 3) & ~3); /* align */ 1364b1b8ab34Slling 1365ad135b5dSChristopher Siden /* skip over type */ 1366ad135b5dSChristopher Siden type = BSWAP_32(*(uint32_t *)nvp); 1367ad135b5dSChristopher Siden nvp += 4; 1368b1b8ab34Slling 1369ad135b5dSChristopher Siden if (type == valtype) { 1370ad135b5dSChristopher Siden int nelm; 1371b1b8ab34Slling 1372ad135b5dSChristopher Siden nelm = BSWAP_32(*(uint32_t *)nvp); 1373ad135b5dSChristopher Siden if (valtype != DATA_TYPE_BOOLEAN && nelm < 1) 1374ad135b5dSChristopher Siden return (1); 1375ad135b5dSChristopher Siden nvp += 4; 1376b1b8ab34Slling 1377ad135b5dSChristopher Siden switch (valtype) { 1378ad135b5dSChristopher Siden case DATA_TYPE_BOOLEAN: 1379ad135b5dSChristopher Siden return (0); 1380b1b8ab34Slling 1381ad135b5dSChristopher Siden case DATA_TYPE_STRING: 1382ad135b5dSChristopher Siden slen = BSWAP_32(*(uint32_t *)nvp); 1383ad135b5dSChristopher Siden nvp += 4; 1384ad135b5dSChristopher Siden grub_memmove(strval, nvp, slen); 1385ad135b5dSChristopher Siden strval[slen] = '\0'; 1386ad135b5dSChristopher Siden return (0); 1387b1b8ab34Slling 1388ad135b5dSChristopher Siden case DATA_TYPE_UINT64: 1389ad135b5dSChristopher Siden *intval = BSWAP_64(*(uint64_t *)nvp); 1390ad135b5dSChristopher Siden return (0); 1391e7cbe64fSgw 1392ad135b5dSChristopher Siden case DATA_TYPE_NVLIST: 1393ad135b5dSChristopher Siden *(void **)val = (void *)nvp; 1394ad135b5dSChristopher Siden return (0); 1395e7cbe64fSgw 1396ad135b5dSChristopher Siden case DATA_TYPE_NVLIST_ARRAY: 1397ad135b5dSChristopher Siden *(void **)val = (void *)nvp; 1398ad135b5dSChristopher Siden if (nelmp) 1399ad135b5dSChristopher Siden *nelmp = nelm; 1400ad135b5dSChristopher Siden return (0); 1401b1b8ab34Slling } 1402b1b8ab34Slling } 1403b1b8ab34Slling 1404b1b8ab34Slling return (1); 1405b1b8ab34Slling } 1406b1b8ab34Slling 1407ad135b5dSChristopher Siden static int 1408ad135b5dSChristopher Siden nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype, 1409ad135b5dSChristopher Siden int *nelmp) 1410ad135b5dSChristopher Siden { 1411ad135b5dSChristopher Siden char *nvpair; 1412ad135b5dSChristopher Siden 1413ad135b5dSChristopher Siden for (nvpair = nvlist_next_nvpair(nvlist, NULL); 1414ad135b5dSChristopher Siden nvpair != NULL; 1415ad135b5dSChristopher Siden nvpair = nvlist_next_nvpair(nvlist, nvpair)) { 1416ad135b5dSChristopher Siden int name_len = BSWAP_32(*(uint32_t *)(nvpair + 4 * 2)); 1417ad135b5dSChristopher Siden char *nvp_name = nvpair + 4 * 3; 1418ad135b5dSChristopher Siden 1419ad135b5dSChristopher Siden if ((grub_strncmp(nvp_name, name, name_len) == 0) && 1420ad135b5dSChristopher Siden nvpair_type(nvpair) == valtype) { 1421ad135b5dSChristopher Siden return (nvpair_value(nvpair, val, valtype, nelmp)); 1422ad135b5dSChristopher Siden } 1423ad135b5dSChristopher Siden } 1424ad135b5dSChristopher Siden return (1); 1425ad135b5dSChristopher Siden } 1426ad135b5dSChristopher Siden 1427b1b8ab34Slling /* 1428e7cbe64fSgw * Check if this vdev is online and is in a good state. 1429e7cbe64fSgw */ 1430e7cbe64fSgw static int 1431e7cbe64fSgw vdev_validate(char *nv) 1432e7cbe64fSgw { 1433e7cbe64fSgw uint64_t ival; 1434e7cbe64fSgw 1435e7cbe64fSgw if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival, 1436e7cbe64fSgw DATA_TYPE_UINT64, NULL) == 0 || 1437e7cbe64fSgw nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival, 1438e7cbe64fSgw DATA_TYPE_UINT64, NULL) == 0 || 1439e7cbe64fSgw nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival, 1440e7cbe64fSgw DATA_TYPE_UINT64, NULL) == 0) 1441e7cbe64fSgw return (ERR_DEV_VALUES); 1442e7cbe64fSgw 1443e7cbe64fSgw return (0); 1444e7cbe64fSgw } 1445e7cbe64fSgw 1446e7cbe64fSgw /* 144721ecdf64SLin Ling * Get a valid vdev pathname/devid from the boot device. 1448ffb5616eSLin Ling * The caller should already allocate MAXPATHLEN memory for bootpath and devid. 1449e7cbe64fSgw */ 145021ecdf64SLin Ling static int 145121ecdf64SLin Ling vdev_get_bootpath(char *nv, uint64_t inguid, char *devid, char *bootpath, 145221ecdf64SLin Ling int is_spare) 1453e7cbe64fSgw { 1454e7cbe64fSgw char type[16]; 1455e7cbe64fSgw 1456e7cbe64fSgw if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING, 1457e7cbe64fSgw NULL)) 1458e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1459e7cbe64fSgw 1460ad135b5dSChristopher Siden if (grub_strcmp(type, VDEV_TYPE_DISK) == 0) { 1461ffb5616eSLin Ling uint64_t guid; 1462ffb5616eSLin Ling 1463ffb5616eSLin Ling if (vdev_validate(nv) != 0) 1464ffb5616eSLin Ling return (ERR_NO_BOOTPATH); 1465ffb5616eSLin Ling 1466ffb5616eSLin Ling if (nvlist_lookup_value(nv, ZPOOL_CONFIG_GUID, 1467ffb5616eSLin Ling &guid, DATA_TYPE_UINT64, NULL) != 0) 1468ffb5616eSLin Ling return (ERR_NO_BOOTPATH); 1469ffb5616eSLin Ling 1470ffb5616eSLin Ling if (guid != inguid) 1471e7cbe64fSgw return (ERR_NO_BOOTPATH); 1472e7cbe64fSgw 147321ecdf64SLin Ling /* for a spare vdev, pick the disk labeled with "is_spare" */ 147421ecdf64SLin Ling if (is_spare) { 147521ecdf64SLin Ling uint64_t spare = 0; 147621ecdf64SLin Ling (void) nvlist_lookup_value(nv, ZPOOL_CONFIG_IS_SPARE, 147721ecdf64SLin Ling &spare, DATA_TYPE_UINT64, NULL); 147821ecdf64SLin Ling if (!spare) 147921ecdf64SLin Ling return (ERR_NO_BOOTPATH); 148021ecdf64SLin Ling } 148121ecdf64SLin Ling 1482ffb5616eSLin Ling if (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, 1483ffb5616eSLin Ling bootpath, DATA_TYPE_STRING, NULL) != 0) 1484ffb5616eSLin Ling bootpath[0] = '\0'; 1485ffb5616eSLin Ling 1486ffb5616eSLin Ling if (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID, 1487ffb5616eSLin Ling devid, DATA_TYPE_STRING, NULL) != 0) 1488ffb5616eSLin Ling devid[0] = '\0'; 1489ffb5616eSLin Ling 1490ad135b5dSChristopher Siden if (grub_strlen(bootpath) >= MAXPATHLEN || 1491ad135b5dSChristopher Siden grub_strlen(devid) >= MAXPATHLEN) 1492ffb5616eSLin Ling return (ERR_WONT_FIT); 1493ffb5616eSLin Ling 1494ffb5616eSLin Ling return (0); 1495ffb5616eSLin Ling 1496ad135b5dSChristopher Siden } else if (grub_strcmp(type, VDEV_TYPE_MIRROR) == 0 || 1497ad135b5dSChristopher Siden grub_strcmp(type, VDEV_TYPE_REPLACING) == 0 || 1498ad135b5dSChristopher Siden (is_spare = (grub_strcmp(type, VDEV_TYPE_SPARE) == 0))) { 1499e7cbe64fSgw int nelm, i; 1500e7cbe64fSgw char *child; 1501e7cbe64fSgw 1502e7cbe64fSgw if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child, 1503e7cbe64fSgw DATA_TYPE_NVLIST_ARRAY, &nelm)) 1504e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1505e7cbe64fSgw 1506e7cbe64fSgw for (i = 0; i < nelm; i++) { 1507e7cbe64fSgw char *child_i; 1508e7cbe64fSgw 1509e7cbe64fSgw child_i = nvlist_array(child, i); 1510ffb5616eSLin Ling if (vdev_get_bootpath(child_i, inguid, devid, 151121ecdf64SLin Ling bootpath, is_spare) == 0) 1512ffb5616eSLin Ling return (0); 1513e7cbe64fSgw } 1514e7cbe64fSgw } 1515e7cbe64fSgw 1516ffb5616eSLin Ling return (ERR_NO_BOOTPATH); 1517e7cbe64fSgw } 1518e7cbe64fSgw 1519e7cbe64fSgw /* 1520e7cbe64fSgw * Check the disk label information and retrieve needed vdev name-value pairs. 1521b1b8ab34Slling * 1522b1b8ab34Slling * Return: 1523b1b8ab34Slling * 0 - success 1524e7cbe64fSgw * ERR_* - failure 1525b1b8ab34Slling */ 1526ad135b5dSChristopher Siden static int 1527e23347b1SEric Taylor check_pool_label(uint64_t sector, char *stack, char *outdevid, 1528ad135b5dSChristopher Siden char *outpath, uint64_t *outguid, uint64_t *outashift, uint64_t *outversion) 1529b1b8ab34Slling { 1530b1b8ab34Slling vdev_phys_t *vdev; 1531e23347b1SEric Taylor uint64_t pool_state, txg = 0; 1532ad135b5dSChristopher Siden char *nvlist, *nv, *features; 1533051aabe6Staylor uint64_t diskguid; 1534b1b8ab34Slling 1535e23347b1SEric Taylor sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT); 1536b1b8ab34Slling 1537b1b8ab34Slling /* Read in the vdev name-value pair list (112K). */ 1538b1b8ab34Slling if (devread(sector, 0, VDEV_PHYS_SIZE, stack) == 0) 1539b1b8ab34Slling return (ERR_READ); 1540b1b8ab34Slling 1541b1b8ab34Slling vdev = (vdev_phys_t *)stack; 1542e4c3b53dStaylor stack += sizeof (vdev_phys_t); 1543b1b8ab34Slling 1544e7cbe64fSgw if (nvlist_unpack(vdev->vp_nvlist, &nvlist)) 1545b1b8ab34Slling return (ERR_FSYS_CORRUPT); 1546e7cbe64fSgw 1547e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state, 1548e7cbe64fSgw DATA_TYPE_UINT64, NULL)) 1549e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1550e7cbe64fSgw 1551e7cbe64fSgw if (pool_state == POOL_STATE_DESTROYED) 1552e7cbe64fSgw return (ERR_FILESYSTEM_NOT_FOUND); 1553e7cbe64fSgw 1554e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME, 1555e7cbe64fSgw current_rootpool, DATA_TYPE_STRING, NULL)) 1556e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1557e7cbe64fSgw 1558e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg, 1559e7cbe64fSgw DATA_TYPE_UINT64, NULL)) 1560e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1561e7cbe64fSgw 1562e7cbe64fSgw /* not an active device */ 1563e7cbe64fSgw if (txg == 0) 1564e7cbe64fSgw return (ERR_NO_BOOTPATH); 1565e7cbe64fSgw 1566ad135b5dSChristopher Siden if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, outversion, 1567fe3e2633SEric Taylor DATA_TYPE_UINT64, NULL)) 1568fe3e2633SEric Taylor return (ERR_FSYS_CORRUPT); 1569ad135b5dSChristopher Siden if (!SPA_VERSION_IS_SUPPORTED(*outversion)) 1570fe3e2633SEric Taylor return (ERR_NEWER_VERSION); 1571e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv, 1572e7cbe64fSgw DATA_TYPE_NVLIST, NULL)) 1573e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1574051aabe6Staylor if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid, 1575051aabe6Staylor DATA_TYPE_UINT64, NULL)) 1576051aabe6Staylor return (ERR_FSYS_CORRUPT); 157781b2d573SHans Rosenfeld if (nvlist_lookup_value(nv, ZPOOL_CONFIG_ASHIFT, outashift, 157881b2d573SHans Rosenfeld DATA_TYPE_UINT64, NULL) != 0) 157981b2d573SHans Rosenfeld return (ERR_FSYS_CORRUPT); 158021ecdf64SLin Ling if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0)) 1581e7cbe64fSgw return (ERR_NO_BOOTPATH); 1582e23347b1SEric Taylor if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid, 1583e23347b1SEric Taylor DATA_TYPE_UINT64, NULL)) 1584e23347b1SEric Taylor return (ERR_FSYS_CORRUPT); 1585ad135b5dSChristopher Siden 1586ad135b5dSChristopher Siden if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ, 1587ad135b5dSChristopher Siden &features, DATA_TYPE_NVLIST, NULL) == 0) { 1588ad135b5dSChristopher Siden char *nvp; 1589ad135b5dSChristopher Siden char *name = stack; 1590ad135b5dSChristopher Siden stack += MAXNAMELEN; 1591ad135b5dSChristopher Siden 1592ad135b5dSChristopher Siden for (nvp = nvlist_next_nvpair(features, NULL); 1593ad135b5dSChristopher Siden nvp != NULL; 1594ad135b5dSChristopher Siden nvp = nvlist_next_nvpair(features, nvp)) { 1595ad135b5dSChristopher Siden zap_attribute_t za; 1596ad135b5dSChristopher Siden 1597ad135b5dSChristopher Siden if (nvpair_name(nvp, name, MAXNAMELEN) != 0) 1598ad135b5dSChristopher Siden return (ERR_FSYS_CORRUPT); 1599ad135b5dSChristopher Siden 1600ad135b5dSChristopher Siden za.za_integer_length = 8; 1601ad135b5dSChristopher Siden za.za_num_integers = 1; 1602ad135b5dSChristopher Siden za.za_first_integer = 1; 1603ad135b5dSChristopher Siden za.za_name = name; 1604ad135b5dSChristopher Siden if (check_feature(&za, spa_feature_names, stack) != 0) 1605ad135b5dSChristopher Siden return (ERR_NEWER_VERSION); 1606ad135b5dSChristopher Siden } 1607ad135b5dSChristopher Siden } 1608ad135b5dSChristopher Siden 1609e7cbe64fSgw return (0); 1610b1b8ab34Slling } 1611b1b8ab34Slling 1612b1b8ab34Slling /* 1613b1b8ab34Slling * zfs_mount() locates a valid uberblock of the root pool and read in its MOS 1614b1b8ab34Slling * to the memory address MOS. 1615b1b8ab34Slling * 1616b1b8ab34Slling * Return: 1617b1b8ab34Slling * 1 - success 1618b1b8ab34Slling * 0 - failure 1619b1b8ab34Slling */ 1620b1b8ab34Slling int 1621b1b8ab34Slling zfs_mount(void) 1622b1b8ab34Slling { 162381b2d573SHans Rosenfeld char *stack, *ub_array; 1624b1b8ab34Slling int label = 0; 162581b2d573SHans Rosenfeld uberblock_t *ubbest; 1626b1b8ab34Slling objset_phys_t *osp; 1627051aabe6Staylor char tmp_bootpath[MAXNAMELEN]; 1628051aabe6Staylor char tmp_devid[MAXNAMELEN]; 1629ad135b5dSChristopher Siden uint64_t tmp_guid, ashift, version; 1630e23347b1SEric Taylor uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT; 1631bbe6aa77SJan Setje-Eilers int err = errnum; /* preserve previous errnum state */ 1632051aabe6Staylor 1633051aabe6Staylor /* if it's our first time here, zero the best uberblock out */ 1634e23347b1SEric Taylor if (best_drive == 0 && best_part == 0 && find_best_root) { 1635e37b211cStaylor grub_memset(¤t_uberblock, 0, sizeof (uberblock_t)); 1636e23347b1SEric Taylor pool_guid = 0; 1637e23347b1SEric Taylor } 1638b1b8ab34Slling 1639b1b8ab34Slling stackbase = ZFS_SCRATCH; 1640b1b8ab34Slling stack = stackbase; 164181b2d573SHans Rosenfeld ub_array = stack; 1642b1b8ab34Slling stack += VDEV_UBERBLOCK_RING; 1643b1b8ab34Slling 1644b1b8ab34Slling osp = (objset_phys_t *)stack; 1645b1b8ab34Slling stack += sizeof (objset_phys_t); 1646e23347b1SEric Taylor adjpl = P2ALIGN(adjpl, (uint64_t)sizeof (vdev_label_t)); 1647b1b8ab34Slling 1648e23347b1SEric Taylor for (label = 0; label < VDEV_LABELS; label++) { 164998c507c4SJan Setje-Eilers 165098c507c4SJan Setje-Eilers /* 165198c507c4SJan Setje-Eilers * some eltorito stacks don't give us a size and 165298c507c4SJan Setje-Eilers * we end up setting the size to MAXUINT, further 165398c507c4SJan Setje-Eilers * some of these devices stop working once a single 165498c507c4SJan Setje-Eilers * read past the end has been issued. Checking 165598c507c4SJan Setje-Eilers * for a maximum part_length and skipping the backup 165698c507c4SJan Setje-Eilers * labels at the end of the slice/partition/device 165798c507c4SJan Setje-Eilers * avoids breaking down on such devices. 165898c507c4SJan Setje-Eilers */ 165998c507c4SJan Setje-Eilers if (part_length == MAXUINT && label == 2) 166098c507c4SJan Setje-Eilers break; 166198c507c4SJan Setje-Eilers 1662e23347b1SEric Taylor uint64_t sector = vdev_label_start(adjpl, 1663e23347b1SEric Taylor label) >> SPA_MINBLOCKSHIFT; 1664b1b8ab34Slling 1665b1b8ab34Slling /* Read in the uberblock ring (128K). */ 1666e23347b1SEric Taylor if (devread(sector + 166781b2d573SHans Rosenfeld ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT), 166881b2d573SHans Rosenfeld 0, VDEV_UBERBLOCK_RING, ub_array) == 0) 1669b1b8ab34Slling continue; 1670b1b8ab34Slling 167181b2d573SHans Rosenfeld if (check_pool_label(sector, stack, tmp_devid, 1672ad135b5dSChristopher Siden tmp_bootpath, &tmp_guid, &ashift, &version)) 167381b2d573SHans Rosenfeld continue; 167481b2d573SHans Rosenfeld 167581b2d573SHans Rosenfeld if (pool_guid == 0) 167681b2d573SHans Rosenfeld pool_guid = tmp_guid; 167781b2d573SHans Rosenfeld 167881b2d573SHans Rosenfeld if ((ubbest = find_bestub(ub_array, ashift, sector)) == NULL || 167981b2d573SHans Rosenfeld zio_read(&ubbest->ub_rootbp, osp, stack) != 0) 168081b2d573SHans Rosenfeld continue; 168181b2d573SHans Rosenfeld 168281b2d573SHans Rosenfeld VERIFY_OS_TYPE(osp, DMU_OST_META); 168381b2d573SHans Rosenfeld 1684ad135b5dSChristopher Siden if (version >= SPA_VERSION_FEATURES && 1685ad135b5dSChristopher Siden check_mos_features(&osp->os_meta_dnode, stack) != 0) 1686ad135b5dSChristopher Siden continue; 1687ad135b5dSChristopher Siden 168881b2d573SHans Rosenfeld if (find_best_root && ((pool_guid != tmp_guid) || 168981b2d573SHans Rosenfeld vdev_uberblock_compare(ubbest, &(current_uberblock)) <= 0)) 169081b2d573SHans Rosenfeld continue; 169181b2d573SHans Rosenfeld 169281b2d573SHans Rosenfeld /* Got the MOS. Save it at the memory addr MOS. */ 169381b2d573SHans Rosenfeld grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); 169481b2d573SHans Rosenfeld grub_memmove(¤t_uberblock, ubbest, sizeof (uberblock_t)); 169581b2d573SHans Rosenfeld grub_memmove(current_bootpath, tmp_bootpath, MAXNAMELEN); 169681b2d573SHans Rosenfeld grub_memmove(current_devid, tmp_devid, grub_strlen(tmp_devid)); 169781b2d573SHans Rosenfeld is_zfs_mount = 1; 169881b2d573SHans Rosenfeld return (1); 1699b1b8ab34Slling } 1700b1b8ab34Slling 1701bbe6aa77SJan Setje-Eilers /* 1702bbe6aa77SJan Setje-Eilers * While some fs impls. (tftp) rely on setting and keeping 1703bbe6aa77SJan Setje-Eilers * global errnums set, others won't reset it and will break 1704bbe6aa77SJan Setje-Eilers * when issuing rawreads. The goal here is to simply not 1705bbe6aa77SJan Setje-Eilers * have zfs mount attempts impact the previous state. 1706bbe6aa77SJan Setje-Eilers */ 1707bbe6aa77SJan Setje-Eilers errnum = err; 1708b1b8ab34Slling return (0); 1709b1b8ab34Slling } 1710b1b8ab34Slling 1711b1b8ab34Slling /* 1712b1b8ab34Slling * zfs_open() locates a file in the rootpool by following the 1713b1b8ab34Slling * MOS and places the dnode of the file in the memory address DNODE. 1714b1b8ab34Slling * 1715b1b8ab34Slling * Return: 1716b1b8ab34Slling * 1 - success 1717b1b8ab34Slling * 0 - failure 1718b1b8ab34Slling */ 1719b1b8ab34Slling int 1720b1b8ab34Slling zfs_open(char *filename) 1721b1b8ab34Slling { 1722b1b8ab34Slling char *stack; 1723b1b8ab34Slling dnode_phys_t *mdn; 1724b1b8ab34Slling 1725b1b8ab34Slling file_buf = NULL; 1726b1b8ab34Slling stackbase = ZFS_SCRATCH; 1727b1b8ab34Slling stack = stackbase; 1728b1b8ab34Slling 1729b1b8ab34Slling mdn = (dnode_phys_t *)stack; 1730b1b8ab34Slling stack += sizeof (dnode_phys_t); 1731b1b8ab34Slling 1732b1b8ab34Slling dnode_mdn = NULL; 1733b1b8ab34Slling dnode_buf = (dnode_phys_t *)stack; 1734b1b8ab34Slling stack += 1<<DNODE_BLOCK_SHIFT; 1735b1b8ab34Slling 1736b1b8ab34Slling /* 1737b1b8ab34Slling * menu.lst is placed at the root pool filesystem level, 1738b1b8ab34Slling * do not goto 'current_bootfs'. 1739b1b8ab34Slling */ 1740eb2bd662Svikram if (is_top_dataset_file(filename)) { 1741b1b8ab34Slling if (errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack)) 1742b1b8ab34Slling return (0); 1743b1b8ab34Slling 1744b1b8ab34Slling current_bootfs_obj = 0; 1745b1b8ab34Slling } else { 1746b1b8ab34Slling if (current_bootfs[0] == '\0') { 1747b1b8ab34Slling /* Get the default root filesystem object number */ 1748ae8180dbSlling if (errnum = get_default_bootfsobj(MOS, 1749ae8180dbSlling ¤t_bootfs_obj, stack)) 1750b1b8ab34Slling return (0); 1751b1b8ab34Slling 1752b1b8ab34Slling if (errnum = get_objset_mdn(MOS, NULL, 1753b1b8ab34Slling ¤t_bootfs_obj, mdn, stack)) 1754b1b8ab34Slling return (0); 1755b1b8ab34Slling } else { 1756b35c6776Staylor if (errnum = get_objset_mdn(MOS, current_bootfs, 1757b35c6776Staylor ¤t_bootfs_obj, mdn, stack)) { 1758051aabe6Staylor grub_memset(current_bootfs, 0, MAXNAMELEN); 1759b1b8ab34Slling return (0); 1760b35c6776Staylor } 1761b1b8ab34Slling } 1762b1b8ab34Slling } 1763b1b8ab34Slling 1764b1b8ab34Slling if (dnode_get_path(mdn, filename, DNODE, stack)) { 1765b1b8ab34Slling errnum = ERR_FILE_NOT_FOUND; 1766b1b8ab34Slling return (0); 1767b1b8ab34Slling } 1768b1b8ab34Slling 1769b1b8ab34Slling /* get the file size and set the file position to 0 */ 17700a586ceaSMark Shellenbaum 17710a586ceaSMark Shellenbaum /* 17720a586ceaSMark Shellenbaum * For DMU_OT_SA we will need to locate the SIZE attribute 17730a586ceaSMark Shellenbaum * attribute, which could be either in the bonus buffer 17740a586ceaSMark Shellenbaum * or the "spill" block. 17750a586ceaSMark Shellenbaum */ 17760a586ceaSMark Shellenbaum if (DNODE->dn_bonustype == DMU_OT_SA) { 17770a586ceaSMark Shellenbaum sa_hdr_phys_t *sahdrp; 17780a586ceaSMark Shellenbaum int hdrsize; 17790a586ceaSMark Shellenbaum 17800a586ceaSMark Shellenbaum if (DNODE->dn_bonuslen != 0) { 17810a586ceaSMark Shellenbaum sahdrp = (sa_hdr_phys_t *)DN_BONUS(DNODE); 17820a586ceaSMark Shellenbaum } else { 17830a586ceaSMark Shellenbaum if (DNODE->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { 17840a586ceaSMark Shellenbaum blkptr_t *bp = &DNODE->dn_spill; 17850a586ceaSMark Shellenbaum void *buf; 17860a586ceaSMark Shellenbaum 17870a586ceaSMark Shellenbaum buf = (void *)stack; 17880a586ceaSMark Shellenbaum stack += BP_GET_LSIZE(bp); 17890a586ceaSMark Shellenbaum 17900a586ceaSMark Shellenbaum /* reset errnum to rawread() failure */ 17910a586ceaSMark Shellenbaum errnum = 0; 17920a586ceaSMark Shellenbaum if (zio_read(bp, buf, stack) != 0) { 17930a586ceaSMark Shellenbaum return (0); 17940a586ceaSMark Shellenbaum } 17950a586ceaSMark Shellenbaum sahdrp = buf; 17960a586ceaSMark Shellenbaum } else { 17970a586ceaSMark Shellenbaum errnum = ERR_FSYS_CORRUPT; 17980a586ceaSMark Shellenbaum return (0); 17990a586ceaSMark Shellenbaum } 18000a586ceaSMark Shellenbaum } 18010a586ceaSMark Shellenbaum hdrsize = SA_HDR_SIZE(sahdrp); 18020a586ceaSMark Shellenbaum filemax = *(uint64_t *)((char *)sahdrp + hdrsize + 18030a586ceaSMark Shellenbaum SA_SIZE_OFFSET); 18040a586ceaSMark Shellenbaum } else { 18050a586ceaSMark Shellenbaum filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size; 18060a586ceaSMark Shellenbaum } 1807b1b8ab34Slling filepos = 0; 1808b1b8ab34Slling 1809b1b8ab34Slling dnode_buf = NULL; 1810b1b8ab34Slling return (1); 1811b1b8ab34Slling } 1812b1b8ab34Slling 1813b1b8ab34Slling /* 1814b1b8ab34Slling * zfs_read reads in the data blocks pointed by the DNODE. 1815b1b8ab34Slling * 1816b1b8ab34Slling * Return: 1817b1b8ab34Slling * len - the length successfully read in to the buffer 1818b1b8ab34Slling * 0 - failure 1819b1b8ab34Slling */ 1820b1b8ab34Slling int 1821b1b8ab34Slling zfs_read(char *buf, int len) 1822b1b8ab34Slling { 1823b1b8ab34Slling char *stack; 1824b1b8ab34Slling int blksz, length, movesize; 1825b1b8ab34Slling 1826b1b8ab34Slling if (file_buf == NULL) { 1827b1b8ab34Slling file_buf = stackbase; 1828b1b8ab34Slling stackbase += SPA_MAXBLOCKSIZE; 1829b1b8ab34Slling file_start = file_end = 0; 1830b1b8ab34Slling } 1831b1b8ab34Slling stack = stackbase; 1832b1b8ab34Slling 1833b1b8ab34Slling /* 1834b1b8ab34Slling * If offset is in memory, move it into the buffer provided and return. 1835b1b8ab34Slling */ 1836b1b8ab34Slling if (filepos >= file_start && filepos+len <= file_end) { 1837b1b8ab34Slling grub_memmove(buf, file_buf + filepos - file_start, len); 1838b1b8ab34Slling filepos += len; 1839b1b8ab34Slling return (len); 1840b1b8ab34Slling } 1841b1b8ab34Slling 1842b1b8ab34Slling blksz = DNODE->dn_datablkszsec << SPA_MINBLOCKSHIFT; 1843b1b8ab34Slling 1844b5152584SMatthew Ahrens /* 1845b5152584SMatthew Ahrens * Note: for GRUB, SPA_MAXBLOCKSIZE is 128KB. There is not enough 1846b5152584SMatthew Ahrens * memory to allocate the new max blocksize (16MB), so while 1847b5152584SMatthew Ahrens * GRUB understands the large_blocks on-disk feature, it can't 1848b5152584SMatthew Ahrens * actually read large blocks. 1849b5152584SMatthew Ahrens */ 1850b5152584SMatthew Ahrens if (blksz > SPA_MAXBLOCKSIZE) { 1851b5152584SMatthew Ahrens grub_printf("blocks larger than 128K are not supported\n"); 1852b5152584SMatthew Ahrens return (0); 1853b5152584SMatthew Ahrens } 1854b5152584SMatthew Ahrens 1855b1b8ab34Slling /* 1856b1b8ab34Slling * Entire Dnode is too big to fit into the space available. We 1857b1b8ab34Slling * will need to read it in chunks. This could be optimized to 1858b1b8ab34Slling * read in as large a chunk as there is space available, but for 1859b1b8ab34Slling * now, this only reads in one data block at a time. 1860b1b8ab34Slling */ 1861b1b8ab34Slling length = len; 1862b1b8ab34Slling while (length) { 1863b1b8ab34Slling /* 1864b1b8ab34Slling * Find requested blkid and the offset within that block. 1865b1b8ab34Slling */ 1866b1b8ab34Slling uint64_t blkid = filepos / blksz; 1867b1b8ab34Slling 1868b1b8ab34Slling if (errnum = dmu_read(DNODE, blkid, file_buf, stack)) 1869b1b8ab34Slling return (0); 1870b1b8ab34Slling 1871b1b8ab34Slling file_start = blkid * blksz; 1872b1b8ab34Slling file_end = file_start + blksz; 1873b1b8ab34Slling 1874b1b8ab34Slling movesize = MIN(length, file_end - filepos); 1875b1b8ab34Slling 1876b1b8ab34Slling grub_memmove(buf, file_buf + filepos - file_start, 1877b1b8ab34Slling movesize); 1878b1b8ab34Slling buf += movesize; 1879b1b8ab34Slling length -= movesize; 1880b1b8ab34Slling filepos += movesize; 1881b1b8ab34Slling } 1882b1b8ab34Slling 1883b1b8ab34Slling return (len); 1884b1b8ab34Slling } 1885b1b8ab34Slling 1886b1b8ab34Slling /* 1887b1b8ab34Slling * No-Op 1888b1b8ab34Slling */ 1889b1b8ab34Slling int 1890b1b8ab34Slling zfs_embed(int *start_sector, int needed_sectors) 1891b1b8ab34Slling { 1892b1b8ab34Slling return (1); 1893b1b8ab34Slling } 1894b1b8ab34Slling 1895b1b8ab34Slling #endif /* FSYS_ZFS */ 1896