1b1b8ab34Slling /* 2b1b8ab34Slling * GRUB -- GRand Unified Bootloader 3b1b8ab34Slling * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. 4b1b8ab34Slling * 5b1b8ab34Slling * This program is free software; you can redistribute it and/or modify 6b1b8ab34Slling * it under the terms of the GNU General Public License as published by 7b1b8ab34Slling * the Free Software Foundation; either version 2 of the License, or 8b1b8ab34Slling * (at your option) any later version. 9b1b8ab34Slling * 10b1b8ab34Slling * This program is distributed in the hope that it will be useful, 11b1b8ab34Slling * but WITHOUT ANY WARRANTY; without even the implied warranty of 12b1b8ab34Slling * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13b1b8ab34Slling * GNU General Public License for more details. 14b1b8ab34Slling * 15b1b8ab34Slling * You should have received a copy of the GNU General Public License 16b1b8ab34Slling * along with this program; if not, write to the Free Software 17b1b8ab34Slling * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18b1b8ab34Slling */ 19ad135b5dSChristopher Siden 20b1b8ab34Slling /* 216e1f5caaSNeil Perrin * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 22b1b8ab34Slling * Use is subject to license terms. 23b1b8ab34Slling */ 24b1b8ab34Slling 25ad135b5dSChristopher Siden /* 26d94527b3SDan Kimmel * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 27a6f561b4SSašo Kiselkov * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. 28*c3d26abcSMatthew Ahrens * Copyright (c) 2014 Integros [integros.com] 29ad135b5dSChristopher Siden */ 30ad135b5dSChristopher Siden 31b1b8ab34Slling /* 32b1b8ab34Slling * The zfs plug-in routines for GRUB are: 33b1b8ab34Slling * 34b1b8ab34Slling * zfs_mount() - locates a valid uberblock of the root pool and reads 35b1b8ab34Slling * in its MOS at the memory address MOS. 36b1b8ab34Slling * 37b1b8ab34Slling * zfs_open() - locates a plain file object by following the MOS 38b1b8ab34Slling * and places its dnode at the memory address DNODE. 39b1b8ab34Slling * 40b1b8ab34Slling * zfs_read() - read in the data blocks pointed by the DNODE. 41b1b8ab34Slling * 42b1b8ab34Slling * ZFS_SCRATCH is used as a working area. 43b1b8ab34Slling * 44b1b8ab34Slling * (memory addr) MOS DNODE ZFS_SCRATCH 45b1b8ab34Slling * | | | 46b1b8ab34Slling * +-------V---------V----------V---------------+ 47b1b8ab34Slling * memory | | dnode | dnode | scratch | 48b1b8ab34Slling * | | 512B | 512B | area | 49b1b8ab34Slling * +--------------------------------------------+ 50b1b8ab34Slling */ 51b1b8ab34Slling 52b1b8ab34Slling #ifdef FSYS_ZFS 53b1b8ab34Slling 54b1b8ab34Slling #include "shared.h" 55b1b8ab34Slling #include "filesys.h" 56b1b8ab34Slling #include "fsys_zfs.h" 57b1b8ab34Slling 58b1b8ab34Slling /* cache for a file block of the currently zfs_open()-ed file */ 59b1b8ab34Slling static void *file_buf = NULL; 60b1b8ab34Slling static uint64_t file_start = 0; 61b1b8ab34Slling static uint64_t file_end = 0; 62b1b8ab34Slling 63b1b8ab34Slling /* cache for a dnode block */ 64b1b8ab34Slling static dnode_phys_t *dnode_buf = NULL; 65b1b8ab34Slling static dnode_phys_t *dnode_mdn = NULL; 66b1b8ab34Slling static uint64_t dnode_start = 0; 67b1b8ab34Slling static uint64_t dnode_end = 0; 68b1b8ab34Slling 69e23347b1SEric Taylor static uint64_t pool_guid = 0; 70051aabe6Staylor static uberblock_t current_uberblock; 71b1b8ab34Slling static char *stackbase; 72b1b8ab34Slling 73b1b8ab34Slling decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] = 74b1b8ab34Slling { 7515e6edf1Sgw {"inherit", 0}, /* ZIO_COMPRESS_INHERIT */ 76b1b8ab34Slling {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */ 7715e6edf1Sgw {"off", 0}, /* ZIO_COMPRESS_OFF */ 7815e6edf1Sgw {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */ 79a6f561b4SSašo Kiselkov {"empty", 0}, /* ZIO_COMPRESS_EMPTY */ 80a6f561b4SSašo Kiselkov {"gzip-1", 0}, /* ZIO_COMPRESS_GZIP_1 */ 81a6f561b4SSašo Kiselkov {"gzip-2", 0}, /* ZIO_COMPRESS_GZIP_2 */ 82a6f561b4SSašo Kiselkov {"gzip-3", 0}, /* ZIO_COMPRESS_GZIP_3 */ 83a6f561b4SSašo Kiselkov {"gzip-4", 0}, /* ZIO_COMPRESS_GZIP_4 */ 84a6f561b4SSašo Kiselkov {"gzip-5", 0}, /* ZIO_COMPRESS_GZIP_5 */ 85a6f561b4SSašo Kiselkov {"gzip-6", 0}, /* ZIO_COMPRESS_GZIP_6 */ 86a6f561b4SSašo Kiselkov {"gzip-7", 0}, /* ZIO_COMPRESS_GZIP_7 */ 87a6f561b4SSašo Kiselkov {"gzip-8", 0}, /* ZIO_COMPRESS_GZIP_8 */ 88a6f561b4SSašo Kiselkov {"gzip-9", 0}, /* ZIO_COMPRESS_GZIP_9 */ 89a6f561b4SSašo Kiselkov {"zle", 0}, /* ZIO_COMPRESS_ZLE */ 90a6f561b4SSašo Kiselkov {"lz4", lz4_decompress} /* ZIO_COMPRESS_LZ4 */ 91b1b8ab34Slling }; 92b1b8ab34Slling 93cd9c78d9SLin Ling static int zio_read_data(blkptr_t *bp, void *buf, char *stack); 94cd9c78d9SLin Ling 95b1b8ab34Slling /* 96b1b8ab34Slling * Our own version of bcmp(). 97b1b8ab34Slling */ 98b1b8ab34Slling static int 99b1b8ab34Slling zfs_bcmp(const void *s1, const void *s2, size_t n) 100b1b8ab34Slling { 101b1b8ab34Slling const uchar_t *ps1 = s1; 102b1b8ab34Slling const uchar_t *ps2 = s2; 103b1b8ab34Slling 104b1b8ab34Slling if (s1 != s2 && n != 0) { 105b1b8ab34Slling do { 106b1b8ab34Slling if (*ps1++ != *ps2++) 107b1b8ab34Slling return (1); 108b1b8ab34Slling } while (--n != 0); 109b1b8ab34Slling } 110b1b8ab34Slling 111b1b8ab34Slling return (0); 112b1b8ab34Slling } 113b1b8ab34Slling 114b1b8ab34Slling /* 115b1b8ab34Slling * Our own version of log2(). Same thing as highbit()-1. 116b1b8ab34Slling */ 117b1b8ab34Slling static int 118b1b8ab34Slling zfs_log2(uint64_t num) 119b1b8ab34Slling { 120b1b8ab34Slling int i = 0; 121b1b8ab34Slling 122b1b8ab34Slling while (num > 1) { 123b1b8ab34Slling i++; 124b1b8ab34Slling num = num >> 1; 125b1b8ab34Slling } 126b1b8ab34Slling 127b1b8ab34Slling return (i); 128b1b8ab34Slling } 129b1b8ab34Slling 130b1b8ab34Slling /* Checksum Functions */ 131b1b8ab34Slling static void 132b1b8ab34Slling zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) 133b1b8ab34Slling { 134b1b8ab34Slling ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); 135b1b8ab34Slling } 136b1b8ab34Slling 137b1b8ab34Slling /* Checksum Table and Values */ 138b1b8ab34Slling zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { 139ad135b5dSChristopher Siden {{NULL, NULL}, 0, 0, "inherit"}, 140ad135b5dSChristopher Siden {{NULL, NULL}, 0, 0, "on"}, 141ad135b5dSChristopher Siden {{zio_checksum_off, zio_checksum_off}, 0, 0, "off"}, 142ad135b5dSChristopher Siden {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"}, 143ad135b5dSChristopher Siden {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"}, 144ad135b5dSChristopher Siden {{NULL, NULL}, 0, 0, "zilog"}, 145ad135b5dSChristopher Siden {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"}, 146ad135b5dSChristopher Siden {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"}, 147ad135b5dSChristopher Siden {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"}, 148ad135b5dSChristopher Siden {{NULL, NULL}, 0, 0, "zilog2"}, 14945818ee1SMatthew Ahrens {{zio_checksum_off, zio_checksum_off}, 0, 0, "noparity"}, 15045818ee1SMatthew Ahrens {{zio_checksum_SHA512, NULL}, 0, 0, "SHA512"} 151b1b8ab34Slling }; 152b1b8ab34Slling 153b1b8ab34Slling /* 154b1b8ab34Slling * zio_checksum_verify: Provides support for checksum verification. 155b1b8ab34Slling * 15645818ee1SMatthew Ahrens * Fletcher2, Fletcher4, SHA-256 and SHA-512/256 are supported. 157b1b8ab34Slling * 158b1b8ab34Slling * Return: 159b1b8ab34Slling * -1 = Failure 160b1b8ab34Slling * 0 = Success 161b1b8ab34Slling */ 162b1b8ab34Slling static int 163b1b8ab34Slling zio_checksum_verify(blkptr_t *bp, char *data, int size) 164b1b8ab34Slling { 165b1b8ab34Slling zio_cksum_t zc = bp->blk_cksum; 166cd9c78d9SLin Ling uint32_t checksum = BP_GET_CHECKSUM(bp); 167b1b8ab34Slling int byteswap = BP_SHOULD_BYTESWAP(bp); 1686e1f5caaSNeil Perrin zio_eck_t *zec = (zio_eck_t *)(data + size) - 1; 169b1b8ab34Slling zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 170b1b8ab34Slling zio_cksum_t actual_cksum, expected_cksum; 171b1b8ab34Slling 1725d7b4d43SMatthew Ahrens if (byteswap) { 1735d7b4d43SMatthew Ahrens grub_printf("byteswap not supported\n"); 174b1b8ab34Slling return (-1); 1755d7b4d43SMatthew Ahrens } 176b1b8ab34Slling 1775d7b4d43SMatthew Ahrens if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) { 1785d7b4d43SMatthew Ahrens grub_printf("checksum algorithm %u not supported\n", checksum); 179b1b8ab34Slling return (-1); 1805d7b4d43SMatthew Ahrens } 181b1b8ab34Slling 1826e1f5caaSNeil Perrin if (ci->ci_eck) { 1836e1f5caaSNeil Perrin expected_cksum = zec->zec_cksum; 1846e1f5caaSNeil Perrin zec->zec_cksum = zc; 185cd9c78d9SLin Ling ci->ci_func[0](data, size, &actual_cksum); 1866e1f5caaSNeil Perrin zec->zec_cksum = expected_cksum; 187b1b8ab34Slling zc = expected_cksum; 188b1b8ab34Slling } else { 189b1b8ab34Slling ci->ci_func[byteswap](data, size, &actual_cksum); 190b1b8ab34Slling } 191b1b8ab34Slling 192b1b8ab34Slling if ((actual_cksum.zc_word[0] - zc.zc_word[0]) | 193b1b8ab34Slling (actual_cksum.zc_word[1] - zc.zc_word[1]) | 194b1b8ab34Slling (actual_cksum.zc_word[2] - zc.zc_word[2]) | 195b1b8ab34Slling (actual_cksum.zc_word[3] - zc.zc_word[3])) 196b1b8ab34Slling return (-1); 197b1b8ab34Slling 198b1b8ab34Slling return (0); 199b1b8ab34Slling } 200b1b8ab34Slling 201b1b8ab34Slling /* 202e23347b1SEric Taylor * vdev_label_start returns the physical disk offset (in bytes) of 203e23347b1SEric Taylor * label "l". 204b1b8ab34Slling */ 205e7cbe64fSgw static uint64_t 206e23347b1SEric Taylor vdev_label_start(uint64_t psize, int l) 207b1b8ab34Slling { 208e23347b1SEric Taylor return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 209b1b8ab34Slling 0 : psize - VDEV_LABELS * sizeof (vdev_label_t))); 210b1b8ab34Slling } 211b1b8ab34Slling 212b1b8ab34Slling /* 213b1b8ab34Slling * vdev_uberblock_compare takes two uberblock structures and returns an integer 214b1b8ab34Slling * indicating the more recent of the two. 215b1b8ab34Slling * Return Value = 1 if ub2 is more recent 216b1b8ab34Slling * Return Value = -1 if ub1 is more recent 217b1b8ab34Slling * The most recent uberblock is determined using its transaction number and 218b1b8ab34Slling * timestamp. The uberblock with the highest transaction number is 219b1b8ab34Slling * considered "newer". If the transaction numbers of the two blocks match, the 220b1b8ab34Slling * timestamps are compared to determine the "newer" of the two. 221b1b8ab34Slling */ 222b1b8ab34Slling static int 223b1b8ab34Slling vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) 224b1b8ab34Slling { 225b1b8ab34Slling if (ub1->ub_txg < ub2->ub_txg) 226b1b8ab34Slling return (-1); 227b1b8ab34Slling if (ub1->ub_txg > ub2->ub_txg) 228b1b8ab34Slling return (1); 229b1b8ab34Slling 230b1b8ab34Slling if (ub1->ub_timestamp < ub2->ub_timestamp) 231b1b8ab34Slling return (-1); 232b1b8ab34Slling if (ub1->ub_timestamp > ub2->ub_timestamp) 233b1b8ab34Slling return (1); 234b1b8ab34Slling 235b1b8ab34Slling return (0); 236b1b8ab34Slling } 237b1b8ab34Slling 238b1b8ab34Slling /* 239b1b8ab34Slling * Three pieces of information are needed to verify an uberblock: the magic 240b1b8ab34Slling * number, the version number, and the checksum. 241b1b8ab34Slling * 242b1b8ab34Slling * Return: 243b1b8ab34Slling * 0 - Success 244b1b8ab34Slling * -1 - Failure 245b1b8ab34Slling */ 246b1b8ab34Slling static int 24781b2d573SHans Rosenfeld uberblock_verify(uberblock_t *uber, uint64_t ub_size, uint64_t offset) 248b1b8ab34Slling { 249b1b8ab34Slling blkptr_t bp; 250b1b8ab34Slling 251b1b8ab34Slling BP_ZERO(&bp); 252b1b8ab34Slling BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); 253b1b8ab34Slling BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER); 254b1b8ab34Slling ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0); 255b1b8ab34Slling 25681b2d573SHans Rosenfeld if (zio_checksum_verify(&bp, (char *)uber, ub_size) != 0) 257b1b8ab34Slling return (-1); 258b1b8ab34Slling 259b1b8ab34Slling if (uber->ub_magic == UBERBLOCK_MAGIC && 260ad135b5dSChristopher Siden SPA_VERSION_IS_SUPPORTED(uber->ub_version)) 261b1b8ab34Slling return (0); 262b1b8ab34Slling 263b1b8ab34Slling return (-1); 264b1b8ab34Slling } 265b1b8ab34Slling 266b1b8ab34Slling /* 267b1b8ab34Slling * Find the best uberblock. 268b1b8ab34Slling * Return: 269b1b8ab34Slling * Success - Pointer to the best uberblock. 270b1b8ab34Slling * Failure - NULL 271b1b8ab34Slling */ 27281b2d573SHans Rosenfeld static uberblock_t * 27381b2d573SHans Rosenfeld find_bestub(char *ub_array, uint64_t ashift, uint64_t sector) 274b1b8ab34Slling { 27581b2d573SHans Rosenfeld uberblock_t *ubbest = NULL; 27681b2d573SHans Rosenfeld uberblock_t *ubnext; 27781b2d573SHans Rosenfeld uint64_t offset, ub_size; 278e23347b1SEric Taylor int i; 279b1b8ab34Slling 28081b2d573SHans Rosenfeld ub_size = VDEV_UBERBLOCK_SIZE(ashift); 28181b2d573SHans Rosenfeld 28281b2d573SHans Rosenfeld for (i = 0; i < VDEV_UBERBLOCK_COUNT(ashift); i++) { 28381b2d573SHans Rosenfeld ubnext = (uberblock_t *)ub_array; 28481b2d573SHans Rosenfeld ub_array += ub_size; 285e23347b1SEric Taylor offset = (sector << SPA_MINBLOCKSHIFT) + 28681b2d573SHans Rosenfeld VDEV_UBERBLOCK_OFFSET(ashift, i); 28781b2d573SHans Rosenfeld 28881b2d573SHans Rosenfeld if (uberblock_verify(ubnext, ub_size, offset) != 0) 28981b2d573SHans Rosenfeld continue; 29081b2d573SHans Rosenfeld 29181b2d573SHans Rosenfeld if (ubbest == NULL || 29281b2d573SHans Rosenfeld vdev_uberblock_compare(ubnext, ubbest) > 0) 29381b2d573SHans Rosenfeld ubbest = ubnext; 294b1b8ab34Slling } 295b1b8ab34Slling 296b1b8ab34Slling return (ubbest); 297b1b8ab34Slling } 298b1b8ab34Slling 299b1b8ab34Slling /* 300cd9c78d9SLin Ling * Read a block of data based on the gang block address dva, 301cd9c78d9SLin Ling * and put its data in buf. 302b1b8ab34Slling * 303b1b8ab34Slling * Return: 304b1b8ab34Slling * 0 - success 305cd9c78d9SLin Ling * 1 - failure 306b1b8ab34Slling */ 307b1b8ab34Slling static int 308cd9c78d9SLin Ling zio_read_gang(blkptr_t *bp, dva_t *dva, void *buf, char *stack) 309b1b8ab34Slling { 310cd9c78d9SLin Ling zio_gbh_phys_t *zio_gb; 311b1b8ab34Slling uint64_t offset, sector; 312cd9c78d9SLin Ling blkptr_t tmpbp; 313cd9c78d9SLin Ling int i; 314b1b8ab34Slling 315cd9c78d9SLin Ling zio_gb = (zio_gbh_phys_t *)stack; 316cd9c78d9SLin Ling stack += SPA_GANGBLOCKSIZE; 317cd9c78d9SLin Ling offset = DVA_GET_OFFSET(dva); 318ad135b5dSChristopher Siden sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); 319b1b8ab34Slling 320cd9c78d9SLin Ling /* read in the gang block header */ 321cd9c78d9SLin Ling if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) { 322cd9c78d9SLin Ling grub_printf("failed to read in a gang block header\n"); 323cd9c78d9SLin Ling return (1); 324cd9c78d9SLin Ling } 325cd9c78d9SLin Ling 326cd9c78d9SLin Ling /* self checksuming the gang block header */ 327cd9c78d9SLin Ling BP_ZERO(&tmpbp); 328cd9c78d9SLin Ling BP_SET_CHECKSUM(&tmpbp, ZIO_CHECKSUM_GANG_HEADER); 329cd9c78d9SLin Ling BP_SET_BYTEORDER(&tmpbp, ZFS_HOST_BYTEORDER); 330cd9c78d9SLin Ling ZIO_SET_CHECKSUM(&tmpbp.blk_cksum, DVA_GET_VDEV(dva), 331cd9c78d9SLin Ling DVA_GET_OFFSET(dva), bp->blk_birth, 0); 332cd9c78d9SLin Ling if (zio_checksum_verify(&tmpbp, (char *)zio_gb, SPA_GANGBLOCKSIZE)) { 333cd9c78d9SLin Ling grub_printf("failed to checksum a gang block header\n"); 334cd9c78d9SLin Ling return (1); 335cd9c78d9SLin Ling } 336cd9c78d9SLin Ling 337cd9c78d9SLin Ling for (i = 0; i < SPA_GBH_NBLKPTRS; i++) { 33843466aaeSMax Grossman if (BP_IS_HOLE(&zio_gb->zg_blkptr[i])) 339cd9c78d9SLin Ling continue; 340cd9c78d9SLin Ling 341cd9c78d9SLin Ling if (zio_read_data(&zio_gb->zg_blkptr[i], buf, stack)) 342cd9c78d9SLin Ling return (1); 343cd9c78d9SLin Ling buf += BP_GET_PSIZE(&zio_gb->zg_blkptr[i]); 344cd9c78d9SLin Ling } 345cd9c78d9SLin Ling 346cd9c78d9SLin Ling return (0); 347cd9c78d9SLin Ling } 348cd9c78d9SLin Ling 349cd9c78d9SLin Ling /* 350cd9c78d9SLin Ling * Read in a block of raw data to buf. 351cd9c78d9SLin Ling * 352cd9c78d9SLin Ling * Return: 353cd9c78d9SLin Ling * 0 - success 354cd9c78d9SLin Ling * 1 - failure 355cd9c78d9SLin Ling */ 356cd9c78d9SLin Ling static int 357cd9c78d9SLin Ling zio_read_data(blkptr_t *bp, void *buf, char *stack) 358cd9c78d9SLin Ling { 359cd9c78d9SLin Ling int i, psize; 360cd9c78d9SLin Ling 361cd9c78d9SLin Ling psize = BP_GET_PSIZE(bp); 362ae8180dbSlling 363b1b8ab34Slling /* pick a good dva from the block pointer */ 364b1b8ab34Slling for (i = 0; i < SPA_DVAS_PER_BP; i++) { 365cd9c78d9SLin Ling uint64_t offset, sector; 366b1b8ab34Slling 367b1b8ab34Slling if (bp->blk_dva[i].dva_word[0] == 0 && 368b1b8ab34Slling bp->blk_dva[i].dva_word[1] == 0) 369b1b8ab34Slling continue; 370b1b8ab34Slling 371cd9c78d9SLin Ling if (DVA_GET_GANG(&bp->blk_dva[i])) { 372d94527b3SDan Kimmel if (zio_read_gang(bp, &bp->blk_dva[i], buf, stack) != 0) 373d94527b3SDan Kimmel continue; 374b1b8ab34Slling } else { 375cd9c78d9SLin Ling /* read in a data block */ 376cd9c78d9SLin Ling offset = DVA_GET_OFFSET(&bp->blk_dva[i]); 377ad135b5dSChristopher Siden sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); 378d94527b3SDan Kimmel if (devread(sector, 0, psize, buf) == 0) 379d94527b3SDan Kimmel continue; 380d94527b3SDan Kimmel } 381d94527b3SDan Kimmel 382d94527b3SDan Kimmel /* verify that the checksum matches */ 383d94527b3SDan Kimmel if (zio_checksum_verify(bp, buf, psize) == 0) { 384d94527b3SDan Kimmel return (0); 385b1b8ab34Slling } 386b1b8ab34Slling } 387b1b8ab34Slling 388d94527b3SDan Kimmel grub_printf("could not read block due to EIO or ECKSUM\n"); 389cd9c78d9SLin Ling return (1); 390cd9c78d9SLin Ling } 391cd9c78d9SLin Ling 3925d7b4d43SMatthew Ahrens /* 3935d7b4d43SMatthew Ahrens * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be 3945d7b4d43SMatthew Ahrens * more than BPE_PAYLOAD_SIZE bytes). 3955d7b4d43SMatthew Ahrens */ 3965d7b4d43SMatthew Ahrens static void 3975d7b4d43SMatthew Ahrens decode_embedded_bp_compressed(const blkptr_t *bp, void *buf) 3985d7b4d43SMatthew Ahrens { 3995d7b4d43SMatthew Ahrens int psize, i; 4005d7b4d43SMatthew Ahrens uint8_t *buf8 = buf; 4015d7b4d43SMatthew Ahrens uint64_t w = 0; 4025d7b4d43SMatthew Ahrens const uint64_t *bp64 = (const uint64_t *)bp; 4035d7b4d43SMatthew Ahrens 4045d7b4d43SMatthew Ahrens psize = BPE_GET_PSIZE(bp); 4055d7b4d43SMatthew Ahrens 4065d7b4d43SMatthew Ahrens /* 4075d7b4d43SMatthew Ahrens * Decode the words of the block pointer into the byte array. 4085d7b4d43SMatthew Ahrens * Low bits of first word are the first byte (little endian). 4095d7b4d43SMatthew Ahrens */ 4105d7b4d43SMatthew Ahrens for (i = 0; i < psize; i++) { 4115d7b4d43SMatthew Ahrens if (i % sizeof (w) == 0) { 4125d7b4d43SMatthew Ahrens /* beginning of a word */ 4135d7b4d43SMatthew Ahrens w = *bp64; 4145d7b4d43SMatthew Ahrens bp64++; 4155d7b4d43SMatthew Ahrens if (!BPE_IS_PAYLOADWORD(bp, bp64)) 4165d7b4d43SMatthew Ahrens bp64++; 4175d7b4d43SMatthew Ahrens } 4185d7b4d43SMatthew Ahrens buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY); 4195d7b4d43SMatthew Ahrens } 4205d7b4d43SMatthew Ahrens } 4215d7b4d43SMatthew Ahrens 4225d7b4d43SMatthew Ahrens /* 4235d7b4d43SMatthew Ahrens * Fill in the buffer with the (decompressed) payload of the embedded 4245d7b4d43SMatthew Ahrens * blkptr_t. Takes into account compression and byteorder (the payload is 4255d7b4d43SMatthew Ahrens * treated as a stream of bytes). 4265d7b4d43SMatthew Ahrens * Return 0 on success, or ENOSPC if it won't fit in the buffer. 4275d7b4d43SMatthew Ahrens */ 4285d7b4d43SMatthew Ahrens static int 4295d7b4d43SMatthew Ahrens decode_embedded_bp(const blkptr_t *bp, void *buf) 4305d7b4d43SMatthew Ahrens { 4315d7b4d43SMatthew Ahrens int comp; 4325d7b4d43SMatthew Ahrens int lsize, psize; 4335d7b4d43SMatthew Ahrens uint8_t *dst = buf; 4345d7b4d43SMatthew Ahrens uint64_t w = 0; 4355d7b4d43SMatthew Ahrens 4365d7b4d43SMatthew Ahrens lsize = BPE_GET_LSIZE(bp); 4375d7b4d43SMatthew Ahrens psize = BPE_GET_PSIZE(bp); 4385d7b4d43SMatthew Ahrens comp = BP_GET_COMPRESS(bp); 4395d7b4d43SMatthew Ahrens 4405d7b4d43SMatthew Ahrens if (comp != ZIO_COMPRESS_OFF) { 4415d7b4d43SMatthew Ahrens uint8_t dstbuf[BPE_PAYLOAD_SIZE]; 4425d7b4d43SMatthew Ahrens 4435d7b4d43SMatthew Ahrens if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS || 4445d7b4d43SMatthew Ahrens decomp_table[comp].decomp_func == NULL) { 4455d7b4d43SMatthew Ahrens grub_printf("compression algorithm not supported\n"); 4465d7b4d43SMatthew Ahrens return (ERR_FSYS_CORRUPT); 4475d7b4d43SMatthew Ahrens } 4485d7b4d43SMatthew Ahrens 4495d7b4d43SMatthew Ahrens decode_embedded_bp_compressed(bp, dstbuf); 4505d7b4d43SMatthew Ahrens decomp_table[comp].decomp_func(dstbuf, buf, psize, lsize); 4515d7b4d43SMatthew Ahrens } else { 4525d7b4d43SMatthew Ahrens decode_embedded_bp_compressed(bp, buf); 4535d7b4d43SMatthew Ahrens } 4545d7b4d43SMatthew Ahrens 4555d7b4d43SMatthew Ahrens return (0); 4565d7b4d43SMatthew Ahrens } 4575d7b4d43SMatthew Ahrens 458cd9c78d9SLin Ling /* 459cd9c78d9SLin Ling * Read in a block of data, verify its checksum, decompress if needed, 460cd9c78d9SLin Ling * and put the uncompressed data in buf. 461cd9c78d9SLin Ling * 462cd9c78d9SLin Ling * Return: 463cd9c78d9SLin Ling * 0 - success 464cd9c78d9SLin Ling * errnum - failure 465cd9c78d9SLin Ling */ 466cd9c78d9SLin Ling static int 467cd9c78d9SLin Ling zio_read(blkptr_t *bp, void *buf, char *stack) 468cd9c78d9SLin Ling { 469cd9c78d9SLin Ling int lsize, psize, comp; 470cd9c78d9SLin Ling char *retbuf; 471cd9c78d9SLin Ling 4725d7b4d43SMatthew Ahrens if (BP_IS_EMBEDDED(bp)) { 4735d7b4d43SMatthew Ahrens if (BPE_GET_ETYPE(bp) != BP_EMBEDDED_TYPE_DATA) { 4745d7b4d43SMatthew Ahrens grub_printf("unsupported embedded BP (type=%u)\n", 4755d7b4d43SMatthew Ahrens (int)BPE_GET_ETYPE(bp)); 4765d7b4d43SMatthew Ahrens return (ERR_FSYS_CORRUPT); 4775d7b4d43SMatthew Ahrens } 4785d7b4d43SMatthew Ahrens return (decode_embedded_bp(bp, buf)); 4795d7b4d43SMatthew Ahrens } 4805d7b4d43SMatthew Ahrens 481cd9c78d9SLin Ling comp = BP_GET_COMPRESS(bp); 482cd9c78d9SLin Ling lsize = BP_GET_LSIZE(bp); 483cd9c78d9SLin Ling psize = BP_GET_PSIZE(bp); 484cd9c78d9SLin Ling 485cd9c78d9SLin Ling if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS || 486cd9c78d9SLin Ling (comp != ZIO_COMPRESS_OFF && 487cd9c78d9SLin Ling decomp_table[comp].decomp_func == NULL)) { 488cd9c78d9SLin Ling grub_printf("compression algorithm not supported\n"); 489cd9c78d9SLin Ling return (ERR_FSYS_CORRUPT); 490cd9c78d9SLin Ling } 491cd9c78d9SLin Ling 492cd9c78d9SLin Ling if ((char *)buf < stack && ((char *)buf) + lsize > stack) { 4935d7b4d43SMatthew Ahrens grub_printf("not enough memory to fit %u bytes on stack\n", 4945d7b4d43SMatthew Ahrens lsize); 495cd9c78d9SLin Ling return (ERR_WONT_FIT); 496cd9c78d9SLin Ling } 497cd9c78d9SLin Ling 498cd9c78d9SLin Ling retbuf = buf; 499cd9c78d9SLin Ling if (comp != ZIO_COMPRESS_OFF) { 500cd9c78d9SLin Ling buf = stack; 501cd9c78d9SLin Ling stack += psize; 502cd9c78d9SLin Ling } 503cd9c78d9SLin Ling 504ad135b5dSChristopher Siden if (zio_read_data(bp, buf, stack) != 0) { 505cd9c78d9SLin Ling grub_printf("zio_read_data failed\n"); 506cd9c78d9SLin Ling return (ERR_FSYS_CORRUPT); 507cd9c78d9SLin Ling } 508cd9c78d9SLin Ling 509a6f561b4SSašo Kiselkov if (comp != ZIO_COMPRESS_OFF) { 510a6f561b4SSašo Kiselkov if (decomp_table[comp].decomp_func(buf, retbuf, psize, 511a6f561b4SSašo Kiselkov lsize) != 0) { 512a6f561b4SSašo Kiselkov grub_printf("zio_read decompression failed\n"); 513a6f561b4SSašo Kiselkov return (ERR_FSYS_CORRUPT); 514a6f561b4SSašo Kiselkov } 515a6f561b4SSašo Kiselkov } 516cd9c78d9SLin Ling 517cd9c78d9SLin Ling return (0); 518b1b8ab34Slling } 519b1b8ab34Slling 520b1b8ab34Slling /* 521b1b8ab34Slling * Get the block from a block id. 522b1b8ab34Slling * push the block onto the stack. 523b1b8ab34Slling * 524b1b8ab34Slling * Return: 525b1b8ab34Slling * 0 - success 526b1b8ab34Slling * errnum - failure 527b1b8ab34Slling */ 528b1b8ab34Slling static int 529b1b8ab34Slling dmu_read(dnode_phys_t *dn, uint64_t blkid, void *buf, char *stack) 530b1b8ab34Slling { 531b1b8ab34Slling int idx, level; 532b1b8ab34Slling blkptr_t *bp_array = dn->dn_blkptr; 533b1b8ab34Slling int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 534b1b8ab34Slling blkptr_t *bp, *tmpbuf; 535b1b8ab34Slling 536b1b8ab34Slling bp = (blkptr_t *)stack; 537b1b8ab34Slling stack += sizeof (blkptr_t); 538b1b8ab34Slling 539b1b8ab34Slling tmpbuf = (blkptr_t *)stack; 540b1b8ab34Slling stack += 1<<dn->dn_indblkshift; 541b1b8ab34Slling 542b1b8ab34Slling for (level = dn->dn_nlevels - 1; level >= 0; level--) { 543b1b8ab34Slling idx = (blkid >> (epbs * level)) & ((1<<epbs)-1); 544b1b8ab34Slling *bp = bp_array[idx]; 545b1b8ab34Slling if (level == 0) 546b1b8ab34Slling tmpbuf = buf; 547ae8180dbSlling if (BP_IS_HOLE(bp)) { 548ae8180dbSlling grub_memset(buf, 0, 549ae8180dbSlling dn->dn_datablkszsec << SPA_MINBLOCKSHIFT); 550ae8180dbSlling break; 551ae8180dbSlling } else if (errnum = zio_read(bp, tmpbuf, stack)) { 552b1b8ab34Slling return (errnum); 553ae8180dbSlling } 554b1b8ab34Slling 555b1b8ab34Slling bp_array = tmpbuf; 556b1b8ab34Slling } 557b1b8ab34Slling 558b1b8ab34Slling return (0); 559b1b8ab34Slling } 560b1b8ab34Slling 561b1b8ab34Slling /* 562b1b8ab34Slling * mzap_lookup: Looks up property described by "name" and returns the value 563b1b8ab34Slling * in "value". 564b1b8ab34Slling * 565b1b8ab34Slling * Return: 566b1b8ab34Slling * 0 - success 567b1b8ab34Slling * errnum - failure 568b1b8ab34Slling */ 569b1b8ab34Slling static int 570ad135b5dSChristopher Siden mzap_lookup(mzap_phys_t *zapobj, int objsize, const char *name, 5719a686fbcSPaul Dagnelie uint64_t *value) 572b1b8ab34Slling { 573b1b8ab34Slling int i, chunks; 574b1b8ab34Slling mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; 575b1b8ab34Slling 576ad135b5dSChristopher Siden chunks = objsize / MZAP_ENT_LEN - 1; 577b1b8ab34Slling for (i = 0; i < chunks; i++) { 578b1b8ab34Slling if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) { 579b1b8ab34Slling *value = mzap_ent[i].mze_value; 580b1b8ab34Slling return (0); 581b1b8ab34Slling } 582b1b8ab34Slling } 583b1b8ab34Slling 584b1b8ab34Slling return (ERR_FSYS_CORRUPT); 585b1b8ab34Slling } 586b1b8ab34Slling 587b1b8ab34Slling static uint64_t 588b1b8ab34Slling zap_hash(uint64_t salt, const char *name) 589b1b8ab34Slling { 590b1b8ab34Slling static uint64_t table[256]; 591b1b8ab34Slling const uint8_t *cp; 592b1b8ab34Slling uint8_t c; 593b1b8ab34Slling uint64_t crc = salt; 594b1b8ab34Slling 595b1b8ab34Slling if (table[128] == 0) { 596b1b8ab34Slling uint64_t *ct; 597b1b8ab34Slling int i, j; 598b1b8ab34Slling for (i = 0; i < 256; i++) { 599b1b8ab34Slling for (ct = table + i, *ct = i, j = 8; j > 0; j--) 600b1b8ab34Slling *ct = (*ct >> 1) ^ (-(*ct & 1) & 601b1b8ab34Slling ZFS_CRC64_POLY); 602b1b8ab34Slling } 603b1b8ab34Slling } 604b1b8ab34Slling 605b1b8ab34Slling if (crc == 0 || table[128] != ZFS_CRC64_POLY) { 606b1b8ab34Slling errnum = ERR_FSYS_CORRUPT; 607b1b8ab34Slling return (0); 608b1b8ab34Slling } 609b1b8ab34Slling 610b1b8ab34Slling for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++) 611b1b8ab34Slling crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF]; 612b1b8ab34Slling 613b1b8ab34Slling /* 614b1b8ab34Slling * Only use 28 bits, since we need 4 bits in the cookie for the 615b1b8ab34Slling * collision differentiator. We MUST use the high bits, since 616ad135b5dSChristopher Siden * those are the ones that we first pay attention to when 617ad135b5dSChristopher Siden * choosing the bucket. 618b1b8ab34Slling */ 619b24ab676SJeff Bonwick crc &= ~((1ULL << (64 - 28)) - 1); 620b1b8ab34Slling 621b1b8ab34Slling return (crc); 622b1b8ab34Slling } 623b1b8ab34Slling 624b1b8ab34Slling /* 625b1b8ab34Slling * Only to be used on 8-bit arrays. 626b1b8ab34Slling * array_len is actual len in bytes (not encoded le_value_length). 627b1b8ab34Slling * buf is null-terminated. 628b1b8ab34Slling */ 629b1b8ab34Slling static int 630b1b8ab34Slling zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk, 631b1b8ab34Slling int array_len, const char *buf) 632b1b8ab34Slling { 633b1b8ab34Slling int bseen = 0; 634b1b8ab34Slling 635b1b8ab34Slling while (bseen < array_len) { 636b1b8ab34Slling struct zap_leaf_array *la = 637b1b8ab34Slling &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array; 638b1b8ab34Slling int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); 639b1b8ab34Slling 640b1b8ab34Slling if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) 641b1b8ab34Slling return (0); 642b1b8ab34Slling 643b1b8ab34Slling if (zfs_bcmp(la->la_array, buf + bseen, toread) != 0) 644b1b8ab34Slling break; 645b1b8ab34Slling chunk = la->la_next; 646b1b8ab34Slling bseen += toread; 647b1b8ab34Slling } 648b1b8ab34Slling return (bseen == array_len); 649b1b8ab34Slling } 650b1b8ab34Slling 651b1b8ab34Slling /* 652b1b8ab34Slling * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the 653b1b8ab34Slling * value for the property "name". 654b1b8ab34Slling * 655b1b8ab34Slling * Return: 656b1b8ab34Slling * 0 - success 657b1b8ab34Slling * errnum - failure 658b1b8ab34Slling */ 659e7cbe64fSgw static int 660b1b8ab34Slling zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h, 661b1b8ab34Slling const char *name, uint64_t *value) 662b1b8ab34Slling { 663b1b8ab34Slling uint16_t chunk; 664b1b8ab34Slling struct zap_leaf_entry *le; 665b1b8ab34Slling 666b1b8ab34Slling /* Verify if this is a valid leaf block */ 667b1b8ab34Slling if (l->l_hdr.lh_block_type != ZBT_LEAF) 668b1b8ab34Slling return (ERR_FSYS_CORRUPT); 669b1b8ab34Slling if (l->l_hdr.lh_magic != ZAP_LEAF_MAGIC) 670b1b8ab34Slling return (ERR_FSYS_CORRUPT); 671b1b8ab34Slling 672b1b8ab34Slling for (chunk = l->l_hash[LEAF_HASH(blksft, h)]; 673b1b8ab34Slling chunk != CHAIN_END; chunk = le->le_next) { 674b1b8ab34Slling 675b1b8ab34Slling if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) 676b1b8ab34Slling return (ERR_FSYS_CORRUPT); 677b1b8ab34Slling 678b1b8ab34Slling le = ZAP_LEAF_ENTRY(l, blksft, chunk); 679b1b8ab34Slling 680b1b8ab34Slling /* Verify the chunk entry */ 681b1b8ab34Slling if (le->le_type != ZAP_CHUNK_ENTRY) 682b1b8ab34Slling return (ERR_FSYS_CORRUPT); 683b1b8ab34Slling 684b1b8ab34Slling if (le->le_hash != h) 685b1b8ab34Slling continue; 686b1b8ab34Slling 687b1b8ab34Slling if (zap_leaf_array_equal(l, blksft, le->le_name_chunk, 688b1b8ab34Slling le->le_name_length, name)) { 689b1b8ab34Slling 690b1b8ab34Slling struct zap_leaf_array *la; 691b1b8ab34Slling uint8_t *ip; 692b1b8ab34Slling 693b1b8ab34Slling if (le->le_int_size != 8 || le->le_value_length != 1) 694e37b211cStaylor return (ERR_FSYS_CORRUPT); 695b1b8ab34Slling 696b1b8ab34Slling /* get the uint64_t property value */ 697b1b8ab34Slling la = &ZAP_LEAF_CHUNK(l, blksft, 698b1b8ab34Slling le->le_value_chunk).l_array; 699b1b8ab34Slling ip = la->la_array; 700b1b8ab34Slling 701b1b8ab34Slling *value = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | 702b1b8ab34Slling (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | 703b1b8ab34Slling (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | 704b1b8ab34Slling (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; 705b1b8ab34Slling 706b1b8ab34Slling return (0); 707b1b8ab34Slling } 708b1b8ab34Slling } 709b1b8ab34Slling 710b1b8ab34Slling return (ERR_FSYS_CORRUPT); 711b1b8ab34Slling } 712b1b8ab34Slling 713b1b8ab34Slling /* 714b1b8ab34Slling * Fat ZAP lookup 715b1b8ab34Slling * 716b1b8ab34Slling * Return: 717b1b8ab34Slling * 0 - success 718b1b8ab34Slling * errnum - failure 719b1b8ab34Slling */ 720e7cbe64fSgw static int 721b1b8ab34Slling fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap, 722ad135b5dSChristopher Siden const char *name, uint64_t *value, char *stack) 723b1b8ab34Slling { 724b1b8ab34Slling zap_leaf_phys_t *l; 725b1b8ab34Slling uint64_t hash, idx, blkid; 726b1b8ab34Slling int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT); 727b1b8ab34Slling 728b1b8ab34Slling /* Verify if this is a fat zap header block */ 729b24ab676SJeff Bonwick if (zap->zap_magic != (uint64_t)ZAP_MAGIC || 730b24ab676SJeff Bonwick zap->zap_flags != 0) 731b1b8ab34Slling return (ERR_FSYS_CORRUPT); 732b1b8ab34Slling 733b1b8ab34Slling hash = zap_hash(zap->zap_salt, name); 734b1b8ab34Slling if (errnum) 735b1b8ab34Slling return (errnum); 736b1b8ab34Slling 737b1b8ab34Slling /* get block id from index */ 738b1b8ab34Slling if (zap->zap_ptrtbl.zt_numblks != 0) { 739b1b8ab34Slling /* external pointer tables not supported */ 740b1b8ab34Slling return (ERR_FSYS_CORRUPT); 741b1b8ab34Slling } 742b1b8ab34Slling idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift); 743b1b8ab34Slling blkid = ((uint64_t *)zap)[idx + (1<<(blksft-3-1))]; 744b1b8ab34Slling 745b1b8ab34Slling /* Get the leaf block */ 746b1b8ab34Slling l = (zap_leaf_phys_t *)stack; 747b1b8ab34Slling stack += 1<<blksft; 748051aabe6Staylor if ((1<<blksft) < sizeof (zap_leaf_phys_t)) 749e37b211cStaylor return (ERR_FSYS_CORRUPT); 750b1b8ab34Slling if (errnum = dmu_read(zap_dnode, blkid, l, stack)) 751b1b8ab34Slling return (errnum); 752b1b8ab34Slling 753b1b8ab34Slling return (zap_leaf_lookup(l, blksft, hash, name, value)); 754b1b8ab34Slling } 755b1b8ab34Slling 756b1b8ab34Slling /* 757b1b8ab34Slling * Read in the data of a zap object and find the value for a matching 758b1b8ab34Slling * property name. 759b1b8ab34Slling * 760b1b8ab34Slling * Return: 761b1b8ab34Slling * 0 - success 762b1b8ab34Slling * errnum - failure 763b1b8ab34Slling */ 764b1b8ab34Slling static int 765ad135b5dSChristopher Siden zap_lookup(dnode_phys_t *zap_dnode, const char *name, uint64_t *val, 766ad135b5dSChristopher Siden char *stack) 767b1b8ab34Slling { 768b1b8ab34Slling uint64_t block_type; 769b1b8ab34Slling int size; 770b1b8ab34Slling void *zapbuf; 771b1b8ab34Slling 772b1b8ab34Slling /* Read in the first block of the zap object data. */ 773b1b8ab34Slling zapbuf = stack; 774b1b8ab34Slling size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; 775b1b8ab34Slling stack += size; 7760a586ceaSMark Shellenbaum 777ad135b5dSChristopher Siden if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack)) != 0) 778b1b8ab34Slling return (errnum); 779b1b8ab34Slling 780b1b8ab34Slling block_type = *((uint64_t *)zapbuf); 781b1b8ab34Slling 782b1b8ab34Slling if (block_type == ZBT_MICRO) { 783b1b8ab34Slling return (mzap_lookup(zapbuf, size, name, val)); 784b1b8ab34Slling } else if (block_type == ZBT_HEADER) { 785b1b8ab34Slling /* this is a fat zap */ 786b1b8ab34Slling return (fzap_lookup(zap_dnode, zapbuf, name, 787b1b8ab34Slling val, stack)); 788b1b8ab34Slling } 789b1b8ab34Slling 790b1b8ab34Slling return (ERR_FSYS_CORRUPT); 791b1b8ab34Slling } 792b1b8ab34Slling 793ad135b5dSChristopher Siden typedef struct zap_attribute { 794ad135b5dSChristopher Siden int za_integer_length; 795ad135b5dSChristopher Siden uint64_t za_num_integers; 796ad135b5dSChristopher Siden uint64_t za_first_integer; 797ad135b5dSChristopher Siden char *za_name; 798ad135b5dSChristopher Siden } zap_attribute_t; 799ad135b5dSChristopher Siden 800ad135b5dSChristopher Siden typedef int (zap_cb_t)(zap_attribute_t *za, void *arg, char *stack); 801ad135b5dSChristopher Siden 802ad135b5dSChristopher Siden static int 803ad135b5dSChristopher Siden zap_iterate(dnode_phys_t *zap_dnode, zap_cb_t *cb, void *arg, char *stack) 804ad135b5dSChristopher Siden { 805ad135b5dSChristopher Siden uint32_t size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; 806ad135b5dSChristopher Siden zap_attribute_t za; 807ad135b5dSChristopher Siden int i; 808ad135b5dSChristopher Siden mzap_phys_t *mzp = (mzap_phys_t *)stack; 809ad135b5dSChristopher Siden stack += size; 810ad135b5dSChristopher Siden 811ad135b5dSChristopher Siden if ((errnum = dmu_read(zap_dnode, 0, mzp, stack)) != 0) 812ad135b5dSChristopher Siden return (errnum); 813ad135b5dSChristopher Siden 814ad135b5dSChristopher Siden /* 815ad135b5dSChristopher Siden * Iteration over fatzap objects has not yet been implemented. 816ad135b5dSChristopher Siden * If we encounter a pool in which there are more features for 817ad135b5dSChristopher Siden * read than can fit inside a microzap (i.e., more than 2048 818ad135b5dSChristopher Siden * features for read), we can add support for fatzap iteration. 819ad135b5dSChristopher Siden * For now, fail. 820ad135b5dSChristopher Siden */ 821ad135b5dSChristopher Siden if (mzp->mz_block_type != ZBT_MICRO) { 822ad135b5dSChristopher Siden grub_printf("feature information stored in fatzap, pool " 823ad135b5dSChristopher Siden "version not supported\n"); 824ad135b5dSChristopher Siden return (1); 825ad135b5dSChristopher Siden } 826ad135b5dSChristopher Siden 827ad135b5dSChristopher Siden za.za_integer_length = 8; 828ad135b5dSChristopher Siden za.za_num_integers = 1; 829ad135b5dSChristopher Siden for (i = 0; i < size / MZAP_ENT_LEN - 1; i++) { 830ad135b5dSChristopher Siden mzap_ent_phys_t *mzep = &mzp->mz_chunk[i]; 831ad135b5dSChristopher Siden int err; 832ad135b5dSChristopher Siden 833ad135b5dSChristopher Siden za.za_first_integer = mzep->mze_value; 834ad135b5dSChristopher Siden za.za_name = mzep->mze_name; 835ad135b5dSChristopher Siden err = cb(&za, arg, stack); 836ad135b5dSChristopher Siden if (err != 0) 837ad135b5dSChristopher Siden return (err); 838ad135b5dSChristopher Siden } 839ad135b5dSChristopher Siden 840ad135b5dSChristopher Siden return (0); 841ad135b5dSChristopher Siden } 842ad135b5dSChristopher Siden 843b1b8ab34Slling /* 844b1b8ab34Slling * Get the dnode of an object number from the metadnode of an object set. 845b1b8ab34Slling * 846b1b8ab34Slling * Input 847b1b8ab34Slling * mdn - metadnode to get the object dnode 848b1b8ab34Slling * objnum - object number for the object dnode 8495d7b4d43SMatthew Ahrens * type - if nonzero, object must be of this type 850b1b8ab34Slling * buf - data buffer that holds the returning dnode 851b1b8ab34Slling * stack - scratch area 852b1b8ab34Slling * 853b1b8ab34Slling * Return: 854b1b8ab34Slling * 0 - success 855b1b8ab34Slling * errnum - failure 856b1b8ab34Slling */ 857b1b8ab34Slling static int 858b1b8ab34Slling dnode_get(dnode_phys_t *mdn, uint64_t objnum, uint8_t type, dnode_phys_t *buf, 8599a686fbcSPaul Dagnelie char *stack) 860b1b8ab34Slling { 861b1b8ab34Slling uint64_t blkid, blksz; /* the block id this object dnode is in */ 862b1b8ab34Slling int epbs; /* shift of number of dnodes in a block */ 863b1b8ab34Slling int idx; /* index within a block */ 864b1b8ab34Slling dnode_phys_t *dnbuf; 865b1b8ab34Slling 866b1b8ab34Slling blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT; 867b1b8ab34Slling epbs = zfs_log2(blksz) - DNODE_SHIFT; 868b1b8ab34Slling blkid = objnum >> epbs; 869b1b8ab34Slling idx = objnum & ((1<<epbs)-1); 870b1b8ab34Slling 871b1b8ab34Slling if (dnode_buf != NULL && dnode_mdn == mdn && 872b1b8ab34Slling objnum >= dnode_start && objnum < dnode_end) { 873b1b8ab34Slling grub_memmove(buf, &dnode_buf[idx], DNODE_SIZE); 874b1b8ab34Slling VERIFY_DN_TYPE(buf, type); 875b1b8ab34Slling return (0); 876b1b8ab34Slling } 877b1b8ab34Slling 878b1b8ab34Slling if (dnode_buf && blksz == 1<<DNODE_BLOCK_SHIFT) { 879b1b8ab34Slling dnbuf = dnode_buf; 880b1b8ab34Slling dnode_mdn = mdn; 881b1b8ab34Slling dnode_start = blkid << epbs; 882b1b8ab34Slling dnode_end = (blkid + 1) << epbs; 883b1b8ab34Slling } else { 884b1b8ab34Slling dnbuf = (dnode_phys_t *)stack; 885b1b8ab34Slling stack += blksz; 886b1b8ab34Slling } 887b1b8ab34Slling 888b1b8ab34Slling if (errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack)) 889b1b8ab34Slling return (errnum); 890b1b8ab34Slling 891b1b8ab34Slling grub_memmove(buf, &dnbuf[idx], DNODE_SIZE); 892b1b8ab34Slling VERIFY_DN_TYPE(buf, type); 893b1b8ab34Slling 894b1b8ab34Slling return (0); 895b1b8ab34Slling } 896b1b8ab34Slling 897b1b8ab34Slling /* 898eb2bd662Svikram * Check if this is a special file that resides at the top 899eb2bd662Svikram * dataset of the pool. Currently this is the GRUB menu, 900eb2bd662Svikram * boot signature and boot signature backup. 901b1b8ab34Slling * str starts with '/'. 902b1b8ab34Slling */ 903b1b8ab34Slling static int 904eb2bd662Svikram is_top_dataset_file(char *str) 905b1b8ab34Slling { 906b1b8ab34Slling char *tptr; 907b1b8ab34Slling 908b1b8ab34Slling if ((tptr = grub_strstr(str, "menu.lst")) && 909b1b8ab34Slling (tptr[8] == '\0' || tptr[8] == ' ') && 910b1b8ab34Slling *(tptr-1) == '/') 911b1b8ab34Slling return (1); 912b1b8ab34Slling 913eb2bd662Svikram if (grub_strncmp(str, BOOTSIGN_DIR"/", 9141183b401Svikram grub_strlen(BOOTSIGN_DIR) + 1) == 0) 915eb2bd662Svikram return (1); 916eb2bd662Svikram 917eb2bd662Svikram if (grub_strcmp(str, BOOTSIGN_BACKUP) == 0) 918eb2bd662Svikram return (1); 919eb2bd662Svikram 920b1b8ab34Slling return (0); 921b1b8ab34Slling } 922b1b8ab34Slling 923ad135b5dSChristopher Siden static int 924ad135b5dSChristopher Siden check_feature(zap_attribute_t *za, void *arg, char *stack) 925ad135b5dSChristopher Siden { 926ad135b5dSChristopher Siden const char **names = arg; 927ad135b5dSChristopher Siden int i; 928ad135b5dSChristopher Siden 929ad135b5dSChristopher Siden if (za->za_first_integer == 0) 930ad135b5dSChristopher Siden return (0); 931ad135b5dSChristopher Siden 932ad135b5dSChristopher Siden for (i = 0; names[i] != NULL; i++) { 933ad135b5dSChristopher Siden if (grub_strcmp(za->za_name, names[i]) == 0) { 934ad135b5dSChristopher Siden return (0); 935ad135b5dSChristopher Siden } 936ad135b5dSChristopher Siden } 937ad135b5dSChristopher Siden grub_printf("missing feature for read '%s'\n", za->za_name); 938ad135b5dSChristopher Siden return (ERR_NEWER_VERSION); 939ad135b5dSChristopher Siden } 940ad135b5dSChristopher Siden 941b1b8ab34Slling /* 942b1b8ab34Slling * Get the file dnode for a given file name where mdn is the meta dnode 943b1b8ab34Slling * for this ZFS object set. When found, place the file dnode in dn. 944b1b8ab34Slling * The 'path' argument will be mangled. 945b1b8ab34Slling * 946b1b8ab34Slling * Return: 947b1b8ab34Slling * 0 - success 948b1b8ab34Slling * errnum - failure 949b1b8ab34Slling */ 950b1b8ab34Slling static int 951b1b8ab34Slling dnode_get_path(dnode_phys_t *mdn, char *path, dnode_phys_t *dn, 952b1b8ab34Slling char *stack) 953b1b8ab34Slling { 954e7437265Sahrens uint64_t objnum, version; 955b1b8ab34Slling char *cname, ch; 956b1b8ab34Slling 957b1b8ab34Slling if (errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, 958b1b8ab34Slling dn, stack)) 959b1b8ab34Slling return (errnum); 960b1b8ab34Slling 961e7437265Sahrens if (errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack)) 962e7437265Sahrens return (errnum); 963e7437265Sahrens if (version > ZPL_VERSION) 964e7437265Sahrens return (-1); 965e7437265Sahrens 966b1b8ab34Slling if (errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack)) 967b1b8ab34Slling return (errnum); 968b1b8ab34Slling 969b1b8ab34Slling if (errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, 970b1b8ab34Slling dn, stack)) 971b1b8ab34Slling return (errnum); 972b1b8ab34Slling 973b1b8ab34Slling /* skip leading slashes */ 974b1b8ab34Slling while (*path == '/') 975b1b8ab34Slling path++; 976b1b8ab34Slling 977ad135b5dSChristopher Siden while (*path && !grub_isspace(*path)) { 978b1b8ab34Slling 979b1b8ab34Slling /* get the next component name */ 980b1b8ab34Slling cname = path; 981ad135b5dSChristopher Siden while (*path && !grub_isspace(*path) && *path != '/') 982b1b8ab34Slling path++; 983b1b8ab34Slling ch = *path; 984b1b8ab34Slling *path = 0; /* ensure null termination */ 985b1b8ab34Slling 986b1b8ab34Slling if (errnum = zap_lookup(dn, cname, &objnum, stack)) 987b1b8ab34Slling return (errnum); 988b1b8ab34Slling 989e7437265Sahrens objnum = ZFS_DIRENT_OBJ(objnum); 990b1b8ab34Slling if (errnum = dnode_get(mdn, objnum, 0, dn, stack)) 991b1b8ab34Slling return (errnum); 992b1b8ab34Slling 993b1b8ab34Slling *path = ch; 994b1b8ab34Slling while (*path == '/') 995b1b8ab34Slling path++; 996b1b8ab34Slling } 997b1b8ab34Slling 998b1b8ab34Slling /* We found the dnode for this file. Verify if it is a plain file. */ 999b1b8ab34Slling VERIFY_DN_TYPE(dn, DMU_OT_PLAIN_FILE_CONTENTS); 1000b1b8ab34Slling 1001b1b8ab34Slling return (0); 1002b1b8ab34Slling } 1003b1b8ab34Slling 1004b1b8ab34Slling /* 1005b1b8ab34Slling * Get the default 'bootfs' property value from the rootpool. 1006b1b8ab34Slling * 1007b1b8ab34Slling * Return: 1008b1b8ab34Slling * 0 - success 1009b1b8ab34Slling * errnum -failure 1010b1b8ab34Slling */ 1011b1b8ab34Slling static int 1012b1b8ab34Slling get_default_bootfsobj(dnode_phys_t *mosmdn, uint64_t *obj, char *stack) 1013b1b8ab34Slling { 1014b1b8ab34Slling uint64_t objnum = 0; 1015b1b8ab34Slling dnode_phys_t *dn = (dnode_phys_t *)stack; 1016b1b8ab34Slling stack += DNODE_SIZE; 1017b1b8ab34Slling 1018ae8180dbSlling if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, 1019b1b8ab34Slling DMU_OT_OBJECT_DIRECTORY, dn, stack)) 1020ae8180dbSlling return (errnum); 1021b1b8ab34Slling 1022b1b8ab34Slling /* 1023b1b8ab34Slling * find the object number for 'pool_props', and get the dnode 1024b1b8ab34Slling * of the 'pool_props'. 1025b1b8ab34Slling */ 1026b1b8ab34Slling if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack)) 1027b1b8ab34Slling return (ERR_FILESYSTEM_NOT_FOUND); 1028b1b8ab34Slling 1029ae8180dbSlling if (errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack)) 1030ae8180dbSlling return (errnum); 1031b1b8ab34Slling 1032b1b8ab34Slling if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack)) 1033b1b8ab34Slling return (ERR_FILESYSTEM_NOT_FOUND); 1034b1b8ab34Slling 1035b1b8ab34Slling if (!objnum) 1036b1b8ab34Slling return (ERR_FILESYSTEM_NOT_FOUND); 1037b1b8ab34Slling 1038b1b8ab34Slling *obj = objnum; 1039b1b8ab34Slling return (0); 1040b1b8ab34Slling } 1041b1b8ab34Slling 1042ad135b5dSChristopher Siden /* 1043ad135b5dSChristopher Siden * List of pool features that the grub implementation of ZFS supports for 1044ad135b5dSChristopher Siden * read. Note that features that are only required for write do not need 1045ad135b5dSChristopher Siden * to be listed here since grub opens pools in read-only mode. 104633915f34SRichard Lowe * 104733915f34SRichard Lowe * When this list is updated the version number in usr/src/grub/capability 104833915f34SRichard Lowe * must be incremented to ensure the new grub gets installed. 1049ad135b5dSChristopher Siden */ 1050ad135b5dSChristopher Siden static const char *spa_feature_names[] = { 1051a6f561b4SSašo Kiselkov "org.illumos:lz4_compress", 105243466aaeSMax Grossman "com.delphix:hole_birth", 10532acef22dSMatthew Ahrens "com.delphix:extensible_dataset", 10545d7b4d43SMatthew Ahrens "com.delphix:embedded_data", 1055b5152584SMatthew Ahrens "org.open-zfs:large_blocks", 105645818ee1SMatthew Ahrens "org.illumos:sha512", 1057ad135b5dSChristopher Siden NULL 1058ad135b5dSChristopher Siden }; 1059ad135b5dSChristopher Siden 1060ad135b5dSChristopher Siden /* 1061ad135b5dSChristopher Siden * Checks whether the MOS features that are active are supported by this 1062ad135b5dSChristopher Siden * (GRUB's) implementation of ZFS. 1063ad135b5dSChristopher Siden * 1064ad135b5dSChristopher Siden * Return: 1065ad135b5dSChristopher Siden * 0: Success. 1066ad135b5dSChristopher Siden * errnum: Failure. 1067ad135b5dSChristopher Siden */ 1068ad135b5dSChristopher Siden static int 1069ad135b5dSChristopher Siden check_mos_features(dnode_phys_t *mosmdn, char *stack) 1070ad135b5dSChristopher Siden { 1071ad135b5dSChristopher Siden uint64_t objnum; 1072ad135b5dSChristopher Siden dnode_phys_t *dn; 1073ad135b5dSChristopher Siden uint8_t error = 0; 1074ad135b5dSChristopher Siden 1075ad135b5dSChristopher Siden dn = (dnode_phys_t *)stack; 1076ad135b5dSChristopher Siden stack += DNODE_SIZE; 1077ad135b5dSChristopher Siden 1078ad135b5dSChristopher Siden if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, 1079ad135b5dSChristopher Siden DMU_OT_OBJECT_DIRECTORY, dn, stack)) != 0) 1080ad135b5dSChristopher Siden return (errnum); 1081ad135b5dSChristopher Siden 1082ad135b5dSChristopher Siden /* 1083ad135b5dSChristopher Siden * Find the object number for 'features_for_read' and retrieve its 1084ad135b5dSChristopher Siden * corresponding dnode. Note that we don't check features_for_write 1085ad135b5dSChristopher Siden * because GRUB is not opening the pool for write. 1086ad135b5dSChristopher Siden */ 1087ad135b5dSChristopher Siden if ((errnum = zap_lookup(dn, DMU_POOL_FEATURES_FOR_READ, &objnum, 1088ad135b5dSChristopher Siden stack)) != 0) 1089ad135b5dSChristopher Siden return (errnum); 1090ad135b5dSChristopher Siden 1091ad135b5dSChristopher Siden if ((errnum = dnode_get(mosmdn, objnum, DMU_OTN_ZAP_METADATA, 1092ad135b5dSChristopher Siden dn, stack)) != 0) 1093ad135b5dSChristopher Siden return (errnum); 1094ad135b5dSChristopher Siden 1095ad135b5dSChristopher Siden return (zap_iterate(dn, check_feature, spa_feature_names, stack)); 1096ad135b5dSChristopher Siden } 1097ad135b5dSChristopher Siden 1098b1b8ab34Slling /* 1099b1b8ab34Slling * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname), 1100b1b8ab34Slling * e.g. pool/rootfs, or a given object number (obj), e.g. the object number 1101b1b8ab34Slling * of pool/rootfs. 1102b1b8ab34Slling * 1103b1b8ab34Slling * If no fsname and no obj are given, return the DSL_DIR metadnode. 1104b1b8ab34Slling * If fsname is given, return its metadnode and its matching object number. 1105b1b8ab34Slling * If only obj is given, return the metadnode for this object number. 1106b1b8ab34Slling * 1107b1b8ab34Slling * Return: 1108b1b8ab34Slling * 0 - success 1109b1b8ab34Slling * errnum - failure 1110b1b8ab34Slling */ 1111b1b8ab34Slling static int 1112b1b8ab34Slling get_objset_mdn(dnode_phys_t *mosmdn, char *fsname, uint64_t *obj, 1113b1b8ab34Slling dnode_phys_t *mdn, char *stack) 1114b1b8ab34Slling { 1115b1b8ab34Slling uint64_t objnum, headobj; 1116b1b8ab34Slling char *cname, ch; 1117b1b8ab34Slling blkptr_t *bp; 1118b1b8ab34Slling objset_phys_t *osp; 1119fe3e2633SEric Taylor int issnapshot = 0; 1120fe3e2633SEric Taylor char *snapname; 1121b1b8ab34Slling 1122b1b8ab34Slling if (fsname == NULL && obj) { 1123b1b8ab34Slling headobj = *obj; 1124b1b8ab34Slling goto skip; 1125b1b8ab34Slling } 1126b1b8ab34Slling 1127b1b8ab34Slling if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, 1128b1b8ab34Slling DMU_OT_OBJECT_DIRECTORY, mdn, stack)) 1129b1b8ab34Slling return (errnum); 1130b1b8ab34Slling 1131b1b8ab34Slling if (errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, 1132b1b8ab34Slling stack)) 1133b1b8ab34Slling return (errnum); 1134b1b8ab34Slling 11352acef22dSMatthew Ahrens if (errnum = dnode_get(mosmdn, objnum, 0, mdn, stack)) 1136b1b8ab34Slling return (errnum); 1137b1b8ab34Slling 1138b1b8ab34Slling if (fsname == NULL) { 1139b1b8ab34Slling headobj = 1140b1b8ab34Slling ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; 1141b1b8ab34Slling goto skip; 1142b1b8ab34Slling } 1143b1b8ab34Slling 1144b1b8ab34Slling /* take out the pool name */ 1145ad135b5dSChristopher Siden while (*fsname && !grub_isspace(*fsname) && *fsname != '/') 1146b1b8ab34Slling fsname++; 1147b1b8ab34Slling 1148ad135b5dSChristopher Siden while (*fsname && !grub_isspace(*fsname)) { 1149b1b8ab34Slling uint64_t childobj; 1150b1b8ab34Slling 1151b1b8ab34Slling while (*fsname == '/') 1152b1b8ab34Slling fsname++; 1153b1b8ab34Slling 1154b1b8ab34Slling cname = fsname; 1155ad135b5dSChristopher Siden while (*fsname && !grub_isspace(*fsname) && *fsname != '/') 1156b1b8ab34Slling fsname++; 1157b1b8ab34Slling ch = *fsname; 1158b1b8ab34Slling *fsname = 0; 1159b1b8ab34Slling 1160fe3e2633SEric Taylor snapname = cname; 1161ad135b5dSChristopher Siden while (*snapname && !grub_isspace(*snapname) && *snapname != 1162ad135b5dSChristopher Siden '@') 1163fe3e2633SEric Taylor snapname++; 1164fe3e2633SEric Taylor if (*snapname == '@') { 1165fe3e2633SEric Taylor issnapshot = 1; 1166fe3e2633SEric Taylor *snapname = 0; 1167fe3e2633SEric Taylor } 1168b1b8ab34Slling childobj = 1169b1b8ab34Slling ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj; 1170b1b8ab34Slling if (errnum = dnode_get(mosmdn, childobj, 1171b1b8ab34Slling DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack)) 1172b1b8ab34Slling return (errnum); 1173b1b8ab34Slling 1174ae8180dbSlling if (zap_lookup(mdn, cname, &objnum, stack)) 1175ae8180dbSlling return (ERR_FILESYSTEM_NOT_FOUND); 1176b1b8ab34Slling 11772acef22dSMatthew Ahrens if (errnum = dnode_get(mosmdn, objnum, 0, 1178b1b8ab34Slling mdn, stack)) 1179b1b8ab34Slling return (errnum); 1180b1b8ab34Slling 1181b1b8ab34Slling *fsname = ch; 1182fe3e2633SEric Taylor if (issnapshot) 1183fe3e2633SEric Taylor *snapname = '@'; 1184b1b8ab34Slling } 1185b1b8ab34Slling headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; 1186b1b8ab34Slling if (obj) 1187b1b8ab34Slling *obj = headobj; 1188b1b8ab34Slling 1189b1b8ab34Slling skip: 11902acef22dSMatthew Ahrens if (errnum = dnode_get(mosmdn, headobj, 0, mdn, stack)) 1191b1b8ab34Slling return (errnum); 1192fe3e2633SEric Taylor if (issnapshot) { 1193fe3e2633SEric Taylor uint64_t snapobj; 1194fe3e2633SEric Taylor 1195fe3e2633SEric Taylor snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))-> 1196fe3e2633SEric Taylor ds_snapnames_zapobj; 1197b1b8ab34Slling 1198fe3e2633SEric Taylor if (errnum = dnode_get(mosmdn, snapobj, 1199fe3e2633SEric Taylor DMU_OT_DSL_DS_SNAP_MAP, mdn, stack)) 1200fe3e2633SEric Taylor return (errnum); 1201fe3e2633SEric Taylor if (zap_lookup(mdn, snapname + 1, &headobj, stack)) 1202fe3e2633SEric Taylor return (ERR_FILESYSTEM_NOT_FOUND); 12032acef22dSMatthew Ahrens if (errnum = dnode_get(mosmdn, headobj, 0, mdn, stack)) 1204fe3e2633SEric Taylor return (errnum); 1205fe3e2633SEric Taylor if (obj) 1206fe3e2633SEric Taylor *obj = headobj; 1207fe3e2633SEric Taylor } 1208b1b8ab34Slling 1209b1b8ab34Slling bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp; 1210b1b8ab34Slling osp = (objset_phys_t *)stack; 1211b1b8ab34Slling stack += sizeof (objset_phys_t); 1212b1b8ab34Slling if (errnum = zio_read(bp, osp, stack)) 1213b1b8ab34Slling return (errnum); 1214b1b8ab34Slling 1215b1b8ab34Slling grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE); 1216b1b8ab34Slling 1217b1b8ab34Slling return (0); 1218b1b8ab34Slling } 1219b1b8ab34Slling 1220b1b8ab34Slling /* 1221e7cbe64fSgw * For a given XDR packed nvlist, verify the first 4 bytes and move on. 1222b1b8ab34Slling * 1223e7cbe64fSgw * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) : 1224b1b8ab34Slling * 1225b1b8ab34Slling * encoding method/host endian (4 bytes) 1226b1b8ab34Slling * nvl_version (4 bytes) 1227b1b8ab34Slling * nvl_nvflag (4 bytes) 1228b1b8ab34Slling * encoded nvpairs: 1229b1b8ab34Slling * encoded size of the nvpair (4 bytes) 1230b1b8ab34Slling * decoded size of the nvpair (4 bytes) 1231b1b8ab34Slling * name string size (4 bytes) 1232b1b8ab34Slling * name string data (sizeof(NV_ALIGN4(string)) 1233b1b8ab34Slling * data type (4 bytes) 1234b1b8ab34Slling * # of elements in the nvpair (4 bytes) 1235b1b8ab34Slling * data 1236b1b8ab34Slling * 2 zero's for the last nvpair 1237b1b8ab34Slling * (end of the entire list) (8 bytes) 1238b1b8ab34Slling * 1239b1b8ab34Slling * Return: 1240b1b8ab34Slling * 0 - success 1241b1b8ab34Slling * 1 - failure 1242b1b8ab34Slling */ 1243e7cbe64fSgw static int 1244e7cbe64fSgw nvlist_unpack(char *nvlist, char **out) 1245b1b8ab34Slling { 1246b1b8ab34Slling /* Verify if the 1st and 2nd byte in the nvlist are valid. */ 1247b1b8ab34Slling if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN) 1248b1b8ab34Slling return (1); 1249b1b8ab34Slling 1250ad135b5dSChristopher Siden *out = nvlist + 4; 1251e7cbe64fSgw return (0); 1252e7cbe64fSgw } 1253e7cbe64fSgw 1254e7cbe64fSgw static char * 1255e7cbe64fSgw nvlist_array(char *nvlist, int index) 1256e7cbe64fSgw { 1257e7cbe64fSgw int i, encode_size; 1258e7cbe64fSgw 1259e7cbe64fSgw for (i = 0; i < index; i++) { 1260e7cbe64fSgw /* skip the header, nvl_version, and nvl_nvflag */ 1261e7cbe64fSgw nvlist = nvlist + 4 * 2; 1262e7cbe64fSgw 1263e7cbe64fSgw while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) 1264e7cbe64fSgw nvlist += encode_size; /* goto the next nvpair */ 1265e7cbe64fSgw 1266e7cbe64fSgw nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */ 1267e7cbe64fSgw } 1268e7cbe64fSgw 1269e7cbe64fSgw return (nvlist); 1270e7cbe64fSgw } 1271e7cbe64fSgw 1272ad135b5dSChristopher Siden /* 1273ad135b5dSChristopher Siden * The nvlist_next_nvpair() function returns a handle to the next nvpair in the 1274ad135b5dSChristopher Siden * list following nvpair. If nvpair is NULL, the first pair is returned. If 1275ad135b5dSChristopher Siden * nvpair is the last pair in the nvlist, NULL is returned. 1276ad135b5dSChristopher Siden */ 1277ad135b5dSChristopher Siden static char * 1278ad135b5dSChristopher Siden nvlist_next_nvpair(char *nvl, char *nvpair) 1279ad135b5dSChristopher Siden { 1280ad135b5dSChristopher Siden char *cur, *prev; 1281ad135b5dSChristopher Siden int encode_size; 1282ad135b5dSChristopher Siden 1283ad135b5dSChristopher Siden if (nvl == NULL) 1284ad135b5dSChristopher Siden return (NULL); 1285ad135b5dSChristopher Siden 1286ad135b5dSChristopher Siden if (nvpair == NULL) { 1287ad135b5dSChristopher Siden /* skip over nvl_version and nvl_nvflag */ 1288ad135b5dSChristopher Siden nvpair = nvl + 4 * 2; 1289ad135b5dSChristopher Siden } else { 1290ad135b5dSChristopher Siden /* skip to the next nvpair */ 1291ad135b5dSChristopher Siden encode_size = BSWAP_32(*(uint32_t *)nvpair); 1292ad135b5dSChristopher Siden nvpair += encode_size; 1293ad135b5dSChristopher Siden } 1294ad135b5dSChristopher Siden 1295ad135b5dSChristopher Siden /* 8 bytes of 0 marks the end of the list */ 1296ad135b5dSChristopher Siden if (*(uint64_t *)nvpair == 0) 1297ad135b5dSChristopher Siden return (NULL); 1298ad135b5dSChristopher Siden 1299ad135b5dSChristopher Siden return (nvpair); 1300ad135b5dSChristopher Siden } 1301ad135b5dSChristopher Siden 1302ad135b5dSChristopher Siden /* 1303ad135b5dSChristopher Siden * This function returns 0 on success and 1 on failure. On success, a string 1304ad135b5dSChristopher Siden * containing the name of nvpair is saved in buf. 1305ad135b5dSChristopher Siden */ 1306e7cbe64fSgw static int 1307ad135b5dSChristopher Siden nvpair_name(char *nvp, char *buf, int buflen) 1308ad135b5dSChristopher Siden { 1309ad135b5dSChristopher Siden int len; 1310ad135b5dSChristopher Siden 1311ad135b5dSChristopher Siden /* skip over encode/decode size */ 1312ad135b5dSChristopher Siden nvp += 4 * 2; 1313ad135b5dSChristopher Siden 1314ad135b5dSChristopher Siden len = BSWAP_32(*(uint32_t *)nvp); 1315ad135b5dSChristopher Siden if (buflen < len + 1) 1316ad135b5dSChristopher Siden return (1); 1317ad135b5dSChristopher Siden 1318ad135b5dSChristopher Siden grub_memmove(buf, nvp + 4, len); 1319ad135b5dSChristopher Siden buf[len] = '\0'; 1320ad135b5dSChristopher Siden 1321ad135b5dSChristopher Siden return (0); 1322ad135b5dSChristopher Siden } 1323ad135b5dSChristopher Siden 1324ad135b5dSChristopher Siden /* 1325ad135b5dSChristopher Siden * This function retrieves the value of the nvpair in the form of enumerated 1326ad135b5dSChristopher Siden * type data_type_t. This is used to determine the appropriate type to pass to 1327ad135b5dSChristopher Siden * nvpair_value(). 1328ad135b5dSChristopher Siden */ 1329ad135b5dSChristopher Siden static int 1330ad135b5dSChristopher Siden nvpair_type(char *nvp) 1331e7cbe64fSgw { 1332ad135b5dSChristopher Siden int name_len, type; 1333ad135b5dSChristopher Siden 1334ad135b5dSChristopher Siden /* skip over encode/decode size */ 1335ad135b5dSChristopher Siden nvp += 4 * 2; 1336ad135b5dSChristopher Siden 1337ad135b5dSChristopher Siden /* skip over name_len */ 1338ad135b5dSChristopher Siden name_len = BSWAP_32(*(uint32_t *)nvp); 1339ad135b5dSChristopher Siden nvp += 4; 1340ad135b5dSChristopher Siden 1341ad135b5dSChristopher Siden /* skip over name */ 1342ad135b5dSChristopher Siden nvp = nvp + ((name_len + 3) & ~3); /* align */ 1343ad135b5dSChristopher Siden 1344ad135b5dSChristopher Siden type = BSWAP_32(*(uint32_t *)nvp); 1345ad135b5dSChristopher Siden 1346ad135b5dSChristopher Siden return (type); 1347ad135b5dSChristopher Siden } 1348ad135b5dSChristopher Siden 1349ad135b5dSChristopher Siden static int 1350ad135b5dSChristopher Siden nvpair_value(char *nvp, void *val, int valtype, int *nelmp) 1351ad135b5dSChristopher Siden { 1352ad135b5dSChristopher Siden int name_len, type, slen; 1353ad135b5dSChristopher Siden char *strval = val; 1354e7cbe64fSgw uint64_t *intval = val; 1355e7cbe64fSgw 1356ad135b5dSChristopher Siden /* skip over encode/decode size */ 1357ad135b5dSChristopher Siden nvp += 4 * 2; 1358b1b8ab34Slling 1359ad135b5dSChristopher Siden /* skip over name_len */ 1360ad135b5dSChristopher Siden name_len = BSWAP_32(*(uint32_t *)nvp); 1361ad135b5dSChristopher Siden nvp += 4; 1362b1b8ab34Slling 1363ad135b5dSChristopher Siden /* skip over name */ 1364ad135b5dSChristopher Siden nvp = nvp + ((name_len + 3) & ~3); /* align */ 1365b1b8ab34Slling 1366ad135b5dSChristopher Siden /* skip over type */ 1367ad135b5dSChristopher Siden type = BSWAP_32(*(uint32_t *)nvp); 1368ad135b5dSChristopher Siden nvp += 4; 1369b1b8ab34Slling 1370ad135b5dSChristopher Siden if (type == valtype) { 1371ad135b5dSChristopher Siden int nelm; 1372b1b8ab34Slling 1373ad135b5dSChristopher Siden nelm = BSWAP_32(*(uint32_t *)nvp); 1374ad135b5dSChristopher Siden if (valtype != DATA_TYPE_BOOLEAN && nelm < 1) 1375ad135b5dSChristopher Siden return (1); 1376ad135b5dSChristopher Siden nvp += 4; 1377b1b8ab34Slling 1378ad135b5dSChristopher Siden switch (valtype) { 1379ad135b5dSChristopher Siden case DATA_TYPE_BOOLEAN: 1380ad135b5dSChristopher Siden return (0); 1381b1b8ab34Slling 1382ad135b5dSChristopher Siden case DATA_TYPE_STRING: 1383ad135b5dSChristopher Siden slen = BSWAP_32(*(uint32_t *)nvp); 1384ad135b5dSChristopher Siden nvp += 4; 1385ad135b5dSChristopher Siden grub_memmove(strval, nvp, slen); 1386ad135b5dSChristopher Siden strval[slen] = '\0'; 1387ad135b5dSChristopher Siden return (0); 1388b1b8ab34Slling 1389ad135b5dSChristopher Siden case DATA_TYPE_UINT64: 1390ad135b5dSChristopher Siden *intval = BSWAP_64(*(uint64_t *)nvp); 1391ad135b5dSChristopher Siden return (0); 1392e7cbe64fSgw 1393ad135b5dSChristopher Siden case DATA_TYPE_NVLIST: 1394ad135b5dSChristopher Siden *(void **)val = (void *)nvp; 1395ad135b5dSChristopher Siden return (0); 1396e7cbe64fSgw 1397ad135b5dSChristopher Siden case DATA_TYPE_NVLIST_ARRAY: 1398ad135b5dSChristopher Siden *(void **)val = (void *)nvp; 1399ad135b5dSChristopher Siden if (nelmp) 1400ad135b5dSChristopher Siden *nelmp = nelm; 1401ad135b5dSChristopher Siden return (0); 1402b1b8ab34Slling } 1403b1b8ab34Slling } 1404b1b8ab34Slling 1405b1b8ab34Slling return (1); 1406b1b8ab34Slling } 1407b1b8ab34Slling 1408ad135b5dSChristopher Siden static int 1409ad135b5dSChristopher Siden nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype, 1410ad135b5dSChristopher Siden int *nelmp) 1411ad135b5dSChristopher Siden { 1412ad135b5dSChristopher Siden char *nvpair; 1413ad135b5dSChristopher Siden 1414ad135b5dSChristopher Siden for (nvpair = nvlist_next_nvpair(nvlist, NULL); 1415ad135b5dSChristopher Siden nvpair != NULL; 1416ad135b5dSChristopher Siden nvpair = nvlist_next_nvpair(nvlist, nvpair)) { 1417ad135b5dSChristopher Siden int name_len = BSWAP_32(*(uint32_t *)(nvpair + 4 * 2)); 1418ad135b5dSChristopher Siden char *nvp_name = nvpair + 4 * 3; 1419ad135b5dSChristopher Siden 1420ad135b5dSChristopher Siden if ((grub_strncmp(nvp_name, name, name_len) == 0) && 1421ad135b5dSChristopher Siden nvpair_type(nvpair) == valtype) { 1422ad135b5dSChristopher Siden return (nvpair_value(nvpair, val, valtype, nelmp)); 1423ad135b5dSChristopher Siden } 1424ad135b5dSChristopher Siden } 1425ad135b5dSChristopher Siden return (1); 1426ad135b5dSChristopher Siden } 1427ad135b5dSChristopher Siden 1428b1b8ab34Slling /* 1429e7cbe64fSgw * Check if this vdev is online and is in a good state. 1430e7cbe64fSgw */ 1431e7cbe64fSgw static int 1432e7cbe64fSgw vdev_validate(char *nv) 1433e7cbe64fSgw { 1434e7cbe64fSgw uint64_t ival; 1435e7cbe64fSgw 1436e7cbe64fSgw if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival, 1437e7cbe64fSgw DATA_TYPE_UINT64, NULL) == 0 || 1438e7cbe64fSgw nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival, 1439e7cbe64fSgw DATA_TYPE_UINT64, NULL) == 0 || 1440e7cbe64fSgw nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival, 1441e7cbe64fSgw DATA_TYPE_UINT64, NULL) == 0) 1442e7cbe64fSgw return (ERR_DEV_VALUES); 1443e7cbe64fSgw 1444e7cbe64fSgw return (0); 1445e7cbe64fSgw } 1446e7cbe64fSgw 1447e7cbe64fSgw /* 144821ecdf64SLin Ling * Get a valid vdev pathname/devid from the boot device. 1449ffb5616eSLin Ling * The caller should already allocate MAXPATHLEN memory for bootpath and devid. 1450e7cbe64fSgw */ 145121ecdf64SLin Ling static int 145221ecdf64SLin Ling vdev_get_bootpath(char *nv, uint64_t inguid, char *devid, char *bootpath, 145321ecdf64SLin Ling int is_spare) 1454e7cbe64fSgw { 1455e7cbe64fSgw char type[16]; 1456e7cbe64fSgw 1457e7cbe64fSgw if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING, 1458e7cbe64fSgw NULL)) 1459e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1460e7cbe64fSgw 1461ad135b5dSChristopher Siden if (grub_strcmp(type, VDEV_TYPE_DISK) == 0) { 1462ffb5616eSLin Ling uint64_t guid; 1463ffb5616eSLin Ling 1464ffb5616eSLin Ling if (vdev_validate(nv) != 0) 1465ffb5616eSLin Ling return (ERR_NO_BOOTPATH); 1466ffb5616eSLin Ling 1467ffb5616eSLin Ling if (nvlist_lookup_value(nv, ZPOOL_CONFIG_GUID, 1468ffb5616eSLin Ling &guid, DATA_TYPE_UINT64, NULL) != 0) 1469ffb5616eSLin Ling return (ERR_NO_BOOTPATH); 1470ffb5616eSLin Ling 1471ffb5616eSLin Ling if (guid != inguid) 1472e7cbe64fSgw return (ERR_NO_BOOTPATH); 1473e7cbe64fSgw 147421ecdf64SLin Ling /* for a spare vdev, pick the disk labeled with "is_spare" */ 147521ecdf64SLin Ling if (is_spare) { 147621ecdf64SLin Ling uint64_t spare = 0; 147721ecdf64SLin Ling (void) nvlist_lookup_value(nv, ZPOOL_CONFIG_IS_SPARE, 147821ecdf64SLin Ling &spare, DATA_TYPE_UINT64, NULL); 147921ecdf64SLin Ling if (!spare) 148021ecdf64SLin Ling return (ERR_NO_BOOTPATH); 148121ecdf64SLin Ling } 148221ecdf64SLin Ling 1483ffb5616eSLin Ling if (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, 1484ffb5616eSLin Ling bootpath, DATA_TYPE_STRING, NULL) != 0) 1485ffb5616eSLin Ling bootpath[0] = '\0'; 1486ffb5616eSLin Ling 1487ffb5616eSLin Ling if (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID, 1488ffb5616eSLin Ling devid, DATA_TYPE_STRING, NULL) != 0) 1489ffb5616eSLin Ling devid[0] = '\0'; 1490ffb5616eSLin Ling 1491ad135b5dSChristopher Siden if (grub_strlen(bootpath) >= MAXPATHLEN || 1492ad135b5dSChristopher Siden grub_strlen(devid) >= MAXPATHLEN) 1493ffb5616eSLin Ling return (ERR_WONT_FIT); 1494ffb5616eSLin Ling 1495ffb5616eSLin Ling return (0); 1496ffb5616eSLin Ling 1497ad135b5dSChristopher Siden } else if (grub_strcmp(type, VDEV_TYPE_MIRROR) == 0 || 1498ad135b5dSChristopher Siden grub_strcmp(type, VDEV_TYPE_REPLACING) == 0 || 1499ad135b5dSChristopher Siden (is_spare = (grub_strcmp(type, VDEV_TYPE_SPARE) == 0))) { 1500e7cbe64fSgw int nelm, i; 1501e7cbe64fSgw char *child; 1502e7cbe64fSgw 1503e7cbe64fSgw if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child, 1504e7cbe64fSgw DATA_TYPE_NVLIST_ARRAY, &nelm)) 1505e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1506e7cbe64fSgw 1507e7cbe64fSgw for (i = 0; i < nelm; i++) { 1508e7cbe64fSgw char *child_i; 1509e7cbe64fSgw 1510e7cbe64fSgw child_i = nvlist_array(child, i); 1511ffb5616eSLin Ling if (vdev_get_bootpath(child_i, inguid, devid, 151221ecdf64SLin Ling bootpath, is_spare) == 0) 1513ffb5616eSLin Ling return (0); 1514e7cbe64fSgw } 1515e7cbe64fSgw } 1516e7cbe64fSgw 1517ffb5616eSLin Ling return (ERR_NO_BOOTPATH); 1518e7cbe64fSgw } 1519e7cbe64fSgw 1520e7cbe64fSgw /* 1521e7cbe64fSgw * Check the disk label information and retrieve needed vdev name-value pairs. 1522b1b8ab34Slling * 1523b1b8ab34Slling * Return: 1524b1b8ab34Slling * 0 - success 1525e7cbe64fSgw * ERR_* - failure 1526b1b8ab34Slling */ 1527ad135b5dSChristopher Siden static int 1528e23347b1SEric Taylor check_pool_label(uint64_t sector, char *stack, char *outdevid, 1529ad135b5dSChristopher Siden char *outpath, uint64_t *outguid, uint64_t *outashift, uint64_t *outversion) 1530b1b8ab34Slling { 1531b1b8ab34Slling vdev_phys_t *vdev; 1532e23347b1SEric Taylor uint64_t pool_state, txg = 0; 1533ad135b5dSChristopher Siden char *nvlist, *nv, *features; 1534051aabe6Staylor uint64_t diskguid; 1535b1b8ab34Slling 1536e23347b1SEric Taylor sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT); 1537b1b8ab34Slling 1538b1b8ab34Slling /* Read in the vdev name-value pair list (112K). */ 1539b1b8ab34Slling if (devread(sector, 0, VDEV_PHYS_SIZE, stack) == 0) 1540b1b8ab34Slling return (ERR_READ); 1541b1b8ab34Slling 1542b1b8ab34Slling vdev = (vdev_phys_t *)stack; 1543e4c3b53dStaylor stack += sizeof (vdev_phys_t); 1544b1b8ab34Slling 1545e7cbe64fSgw if (nvlist_unpack(vdev->vp_nvlist, &nvlist)) 1546b1b8ab34Slling return (ERR_FSYS_CORRUPT); 1547e7cbe64fSgw 1548e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state, 1549e7cbe64fSgw DATA_TYPE_UINT64, NULL)) 1550e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1551e7cbe64fSgw 1552e7cbe64fSgw if (pool_state == POOL_STATE_DESTROYED) 1553e7cbe64fSgw return (ERR_FILESYSTEM_NOT_FOUND); 1554e7cbe64fSgw 1555e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME, 1556e7cbe64fSgw current_rootpool, DATA_TYPE_STRING, NULL)) 1557e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1558e7cbe64fSgw 1559e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg, 1560e7cbe64fSgw DATA_TYPE_UINT64, NULL)) 1561e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1562e7cbe64fSgw 1563e7cbe64fSgw /* not an active device */ 1564e7cbe64fSgw if (txg == 0) 1565e7cbe64fSgw return (ERR_NO_BOOTPATH); 1566e7cbe64fSgw 1567ad135b5dSChristopher Siden if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, outversion, 1568fe3e2633SEric Taylor DATA_TYPE_UINT64, NULL)) 1569fe3e2633SEric Taylor return (ERR_FSYS_CORRUPT); 1570ad135b5dSChristopher Siden if (!SPA_VERSION_IS_SUPPORTED(*outversion)) 1571fe3e2633SEric Taylor return (ERR_NEWER_VERSION); 1572e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv, 1573e7cbe64fSgw DATA_TYPE_NVLIST, NULL)) 1574e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1575051aabe6Staylor if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid, 1576051aabe6Staylor DATA_TYPE_UINT64, NULL)) 1577051aabe6Staylor return (ERR_FSYS_CORRUPT); 157881b2d573SHans Rosenfeld if (nvlist_lookup_value(nv, ZPOOL_CONFIG_ASHIFT, outashift, 157981b2d573SHans Rosenfeld DATA_TYPE_UINT64, NULL) != 0) 158081b2d573SHans Rosenfeld return (ERR_FSYS_CORRUPT); 158121ecdf64SLin Ling if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0)) 1582e7cbe64fSgw return (ERR_NO_BOOTPATH); 1583e23347b1SEric Taylor if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid, 1584e23347b1SEric Taylor DATA_TYPE_UINT64, NULL)) 1585e23347b1SEric Taylor return (ERR_FSYS_CORRUPT); 1586ad135b5dSChristopher Siden 1587ad135b5dSChristopher Siden if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ, 1588ad135b5dSChristopher Siden &features, DATA_TYPE_NVLIST, NULL) == 0) { 1589ad135b5dSChristopher Siden char *nvp; 1590ad135b5dSChristopher Siden char *name = stack; 1591ad135b5dSChristopher Siden stack += MAXNAMELEN; 1592ad135b5dSChristopher Siden 1593ad135b5dSChristopher Siden for (nvp = nvlist_next_nvpair(features, NULL); 1594ad135b5dSChristopher Siden nvp != NULL; 1595ad135b5dSChristopher Siden nvp = nvlist_next_nvpair(features, nvp)) { 1596ad135b5dSChristopher Siden zap_attribute_t za; 1597ad135b5dSChristopher Siden 1598ad135b5dSChristopher Siden if (nvpair_name(nvp, name, MAXNAMELEN) != 0) 1599ad135b5dSChristopher Siden return (ERR_FSYS_CORRUPT); 1600ad135b5dSChristopher Siden 1601ad135b5dSChristopher Siden za.za_integer_length = 8; 1602ad135b5dSChristopher Siden za.za_num_integers = 1; 1603ad135b5dSChristopher Siden za.za_first_integer = 1; 1604ad135b5dSChristopher Siden za.za_name = name; 1605ad135b5dSChristopher Siden if (check_feature(&za, spa_feature_names, stack) != 0) 1606ad135b5dSChristopher Siden return (ERR_NEWER_VERSION); 1607ad135b5dSChristopher Siden } 1608ad135b5dSChristopher Siden } 1609ad135b5dSChristopher Siden 1610e7cbe64fSgw return (0); 1611b1b8ab34Slling } 1612b1b8ab34Slling 1613b1b8ab34Slling /* 1614b1b8ab34Slling * zfs_mount() locates a valid uberblock of the root pool and read in its MOS 1615b1b8ab34Slling * to the memory address MOS. 1616b1b8ab34Slling * 1617b1b8ab34Slling * Return: 1618b1b8ab34Slling * 1 - success 1619b1b8ab34Slling * 0 - failure 1620b1b8ab34Slling */ 1621b1b8ab34Slling int 1622b1b8ab34Slling zfs_mount(void) 1623b1b8ab34Slling { 162481b2d573SHans Rosenfeld char *stack, *ub_array; 1625b1b8ab34Slling int label = 0; 162681b2d573SHans Rosenfeld uberblock_t *ubbest; 1627b1b8ab34Slling objset_phys_t *osp; 1628051aabe6Staylor char tmp_bootpath[MAXNAMELEN]; 1629051aabe6Staylor char tmp_devid[MAXNAMELEN]; 1630ad135b5dSChristopher Siden uint64_t tmp_guid, ashift, version; 1631e23347b1SEric Taylor uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT; 1632bbe6aa77SJan Setje-Eilers int err = errnum; /* preserve previous errnum state */ 1633051aabe6Staylor 1634051aabe6Staylor /* if it's our first time here, zero the best uberblock out */ 1635e23347b1SEric Taylor if (best_drive == 0 && best_part == 0 && find_best_root) { 1636e37b211cStaylor grub_memset(¤t_uberblock, 0, sizeof (uberblock_t)); 1637e23347b1SEric Taylor pool_guid = 0; 1638e23347b1SEric Taylor } 1639b1b8ab34Slling 1640b1b8ab34Slling stackbase = ZFS_SCRATCH; 1641b1b8ab34Slling stack = stackbase; 164281b2d573SHans Rosenfeld ub_array = stack; 1643b1b8ab34Slling stack += VDEV_UBERBLOCK_RING; 1644b1b8ab34Slling 1645b1b8ab34Slling osp = (objset_phys_t *)stack; 1646b1b8ab34Slling stack += sizeof (objset_phys_t); 1647e23347b1SEric Taylor adjpl = P2ALIGN(adjpl, (uint64_t)sizeof (vdev_label_t)); 1648b1b8ab34Slling 1649e23347b1SEric Taylor for (label = 0; label < VDEV_LABELS; label++) { 165098c507c4SJan Setje-Eilers 165198c507c4SJan Setje-Eilers /* 165298c507c4SJan Setje-Eilers * some eltorito stacks don't give us a size and 165398c507c4SJan Setje-Eilers * we end up setting the size to MAXUINT, further 165498c507c4SJan Setje-Eilers * some of these devices stop working once a single 165598c507c4SJan Setje-Eilers * read past the end has been issued. Checking 165698c507c4SJan Setje-Eilers * for a maximum part_length and skipping the backup 165798c507c4SJan Setje-Eilers * labels at the end of the slice/partition/device 165898c507c4SJan Setje-Eilers * avoids breaking down on such devices. 165998c507c4SJan Setje-Eilers */ 166098c507c4SJan Setje-Eilers if (part_length == MAXUINT && label == 2) 166198c507c4SJan Setje-Eilers break; 166298c507c4SJan Setje-Eilers 1663e23347b1SEric Taylor uint64_t sector = vdev_label_start(adjpl, 1664e23347b1SEric Taylor label) >> SPA_MINBLOCKSHIFT; 1665b1b8ab34Slling 1666b1b8ab34Slling /* Read in the uberblock ring (128K). */ 1667e23347b1SEric Taylor if (devread(sector + 166881b2d573SHans Rosenfeld ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT), 166981b2d573SHans Rosenfeld 0, VDEV_UBERBLOCK_RING, ub_array) == 0) 1670b1b8ab34Slling continue; 1671b1b8ab34Slling 167281b2d573SHans Rosenfeld if (check_pool_label(sector, stack, tmp_devid, 1673ad135b5dSChristopher Siden tmp_bootpath, &tmp_guid, &ashift, &version)) 167481b2d573SHans Rosenfeld continue; 167581b2d573SHans Rosenfeld 167681b2d573SHans Rosenfeld if (pool_guid == 0) 167781b2d573SHans Rosenfeld pool_guid = tmp_guid; 167881b2d573SHans Rosenfeld 167981b2d573SHans Rosenfeld if ((ubbest = find_bestub(ub_array, ashift, sector)) == NULL || 168081b2d573SHans Rosenfeld zio_read(&ubbest->ub_rootbp, osp, stack) != 0) 168181b2d573SHans Rosenfeld continue; 168281b2d573SHans Rosenfeld 168381b2d573SHans Rosenfeld VERIFY_OS_TYPE(osp, DMU_OST_META); 168481b2d573SHans Rosenfeld 1685ad135b5dSChristopher Siden if (version >= SPA_VERSION_FEATURES && 1686ad135b5dSChristopher Siden check_mos_features(&osp->os_meta_dnode, stack) != 0) 1687ad135b5dSChristopher Siden continue; 1688ad135b5dSChristopher Siden 168981b2d573SHans Rosenfeld if (find_best_root && ((pool_guid != tmp_guid) || 169081b2d573SHans Rosenfeld vdev_uberblock_compare(ubbest, &(current_uberblock)) <= 0)) 169181b2d573SHans Rosenfeld continue; 169281b2d573SHans Rosenfeld 169381b2d573SHans Rosenfeld /* Got the MOS. Save it at the memory addr MOS. */ 169481b2d573SHans Rosenfeld grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); 169581b2d573SHans Rosenfeld grub_memmove(¤t_uberblock, ubbest, sizeof (uberblock_t)); 169681b2d573SHans Rosenfeld grub_memmove(current_bootpath, tmp_bootpath, MAXNAMELEN); 169781b2d573SHans Rosenfeld grub_memmove(current_devid, tmp_devid, grub_strlen(tmp_devid)); 169881b2d573SHans Rosenfeld is_zfs_mount = 1; 169981b2d573SHans Rosenfeld return (1); 1700b1b8ab34Slling } 1701b1b8ab34Slling 1702bbe6aa77SJan Setje-Eilers /* 1703bbe6aa77SJan Setje-Eilers * While some fs impls. (tftp) rely on setting and keeping 1704bbe6aa77SJan Setje-Eilers * global errnums set, others won't reset it and will break 1705bbe6aa77SJan Setje-Eilers * when issuing rawreads. The goal here is to simply not 1706bbe6aa77SJan Setje-Eilers * have zfs mount attempts impact the previous state. 1707bbe6aa77SJan Setje-Eilers */ 1708bbe6aa77SJan Setje-Eilers errnum = err; 1709b1b8ab34Slling return (0); 1710b1b8ab34Slling } 1711b1b8ab34Slling 1712b1b8ab34Slling /* 1713b1b8ab34Slling * zfs_open() locates a file in the rootpool by following the 1714b1b8ab34Slling * MOS and places the dnode of the file in the memory address DNODE. 1715b1b8ab34Slling * 1716b1b8ab34Slling * Return: 1717b1b8ab34Slling * 1 - success 1718b1b8ab34Slling * 0 - failure 1719b1b8ab34Slling */ 1720b1b8ab34Slling int 1721b1b8ab34Slling zfs_open(char *filename) 1722b1b8ab34Slling { 1723b1b8ab34Slling char *stack; 1724b1b8ab34Slling dnode_phys_t *mdn; 1725b1b8ab34Slling 1726b1b8ab34Slling file_buf = NULL; 1727b1b8ab34Slling stackbase = ZFS_SCRATCH; 1728b1b8ab34Slling stack = stackbase; 1729b1b8ab34Slling 1730b1b8ab34Slling mdn = (dnode_phys_t *)stack; 1731b1b8ab34Slling stack += sizeof (dnode_phys_t); 1732b1b8ab34Slling 1733b1b8ab34Slling dnode_mdn = NULL; 1734b1b8ab34Slling dnode_buf = (dnode_phys_t *)stack; 1735b1b8ab34Slling stack += 1<<DNODE_BLOCK_SHIFT; 1736b1b8ab34Slling 1737b1b8ab34Slling /* 1738b1b8ab34Slling * menu.lst is placed at the root pool filesystem level, 1739b1b8ab34Slling * do not goto 'current_bootfs'. 1740b1b8ab34Slling */ 1741eb2bd662Svikram if (is_top_dataset_file(filename)) { 1742b1b8ab34Slling if (errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack)) 1743b1b8ab34Slling return (0); 1744b1b8ab34Slling 1745b1b8ab34Slling current_bootfs_obj = 0; 1746b1b8ab34Slling } else { 1747b1b8ab34Slling if (current_bootfs[0] == '\0') { 1748b1b8ab34Slling /* Get the default root filesystem object number */ 1749ae8180dbSlling if (errnum = get_default_bootfsobj(MOS, 1750ae8180dbSlling ¤t_bootfs_obj, stack)) 1751b1b8ab34Slling return (0); 1752b1b8ab34Slling 1753b1b8ab34Slling if (errnum = get_objset_mdn(MOS, NULL, 1754b1b8ab34Slling ¤t_bootfs_obj, mdn, stack)) 1755b1b8ab34Slling return (0); 1756b1b8ab34Slling } else { 1757b35c6776Staylor if (errnum = get_objset_mdn(MOS, current_bootfs, 1758b35c6776Staylor ¤t_bootfs_obj, mdn, stack)) { 1759051aabe6Staylor grub_memset(current_bootfs, 0, MAXNAMELEN); 1760b1b8ab34Slling return (0); 1761b35c6776Staylor } 1762b1b8ab34Slling } 1763b1b8ab34Slling } 1764b1b8ab34Slling 1765b1b8ab34Slling if (dnode_get_path(mdn, filename, DNODE, stack)) { 1766b1b8ab34Slling errnum = ERR_FILE_NOT_FOUND; 1767b1b8ab34Slling return (0); 1768b1b8ab34Slling } 1769b1b8ab34Slling 1770b1b8ab34Slling /* get the file size and set the file position to 0 */ 17710a586ceaSMark Shellenbaum 17720a586ceaSMark Shellenbaum /* 17730a586ceaSMark Shellenbaum * For DMU_OT_SA we will need to locate the SIZE attribute 17740a586ceaSMark Shellenbaum * attribute, which could be either in the bonus buffer 17750a586ceaSMark Shellenbaum * or the "spill" block. 17760a586ceaSMark Shellenbaum */ 17770a586ceaSMark Shellenbaum if (DNODE->dn_bonustype == DMU_OT_SA) { 17780a586ceaSMark Shellenbaum sa_hdr_phys_t *sahdrp; 17790a586ceaSMark Shellenbaum int hdrsize; 17800a586ceaSMark Shellenbaum 17810a586ceaSMark Shellenbaum if (DNODE->dn_bonuslen != 0) { 17820a586ceaSMark Shellenbaum sahdrp = (sa_hdr_phys_t *)DN_BONUS(DNODE); 17830a586ceaSMark Shellenbaum } else { 17840a586ceaSMark Shellenbaum if (DNODE->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { 17850a586ceaSMark Shellenbaum blkptr_t *bp = &DNODE->dn_spill; 17860a586ceaSMark Shellenbaum void *buf; 17870a586ceaSMark Shellenbaum 17880a586ceaSMark Shellenbaum buf = (void *)stack; 17890a586ceaSMark Shellenbaum stack += BP_GET_LSIZE(bp); 17900a586ceaSMark Shellenbaum 17910a586ceaSMark Shellenbaum /* reset errnum to rawread() failure */ 17920a586ceaSMark Shellenbaum errnum = 0; 17930a586ceaSMark Shellenbaum if (zio_read(bp, buf, stack) != 0) { 17940a586ceaSMark Shellenbaum return (0); 17950a586ceaSMark Shellenbaum } 17960a586ceaSMark Shellenbaum sahdrp = buf; 17970a586ceaSMark Shellenbaum } else { 17980a586ceaSMark Shellenbaum errnum = ERR_FSYS_CORRUPT; 17990a586ceaSMark Shellenbaum return (0); 18000a586ceaSMark Shellenbaum } 18010a586ceaSMark Shellenbaum } 18020a586ceaSMark Shellenbaum hdrsize = SA_HDR_SIZE(sahdrp); 18030a586ceaSMark Shellenbaum filemax = *(uint64_t *)((char *)sahdrp + hdrsize + 18040a586ceaSMark Shellenbaum SA_SIZE_OFFSET); 18050a586ceaSMark Shellenbaum } else { 18060a586ceaSMark Shellenbaum filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size; 18070a586ceaSMark Shellenbaum } 1808b1b8ab34Slling filepos = 0; 1809b1b8ab34Slling 1810b1b8ab34Slling dnode_buf = NULL; 1811b1b8ab34Slling return (1); 1812b1b8ab34Slling } 1813b1b8ab34Slling 1814b1b8ab34Slling /* 1815b1b8ab34Slling * zfs_read reads in the data blocks pointed by the DNODE. 1816b1b8ab34Slling * 1817b1b8ab34Slling * Return: 1818b1b8ab34Slling * len - the length successfully read in to the buffer 1819b1b8ab34Slling * 0 - failure 1820b1b8ab34Slling */ 1821b1b8ab34Slling int 1822b1b8ab34Slling zfs_read(char *buf, int len) 1823b1b8ab34Slling { 1824b1b8ab34Slling char *stack; 1825b1b8ab34Slling int blksz, length, movesize; 1826b1b8ab34Slling 1827b1b8ab34Slling if (file_buf == NULL) { 1828b1b8ab34Slling file_buf = stackbase; 1829b1b8ab34Slling stackbase += SPA_MAXBLOCKSIZE; 1830b1b8ab34Slling file_start = file_end = 0; 1831b1b8ab34Slling } 1832b1b8ab34Slling stack = stackbase; 1833b1b8ab34Slling 1834b1b8ab34Slling /* 1835b1b8ab34Slling * If offset is in memory, move it into the buffer provided and return. 1836b1b8ab34Slling */ 1837b1b8ab34Slling if (filepos >= file_start && filepos+len <= file_end) { 1838b1b8ab34Slling grub_memmove(buf, file_buf + filepos - file_start, len); 1839b1b8ab34Slling filepos += len; 1840b1b8ab34Slling return (len); 1841b1b8ab34Slling } 1842b1b8ab34Slling 1843b1b8ab34Slling blksz = DNODE->dn_datablkszsec << SPA_MINBLOCKSHIFT; 1844b1b8ab34Slling 1845b5152584SMatthew Ahrens /* 1846b5152584SMatthew Ahrens * Note: for GRUB, SPA_MAXBLOCKSIZE is 128KB. There is not enough 1847b5152584SMatthew Ahrens * memory to allocate the new max blocksize (16MB), so while 1848b5152584SMatthew Ahrens * GRUB understands the large_blocks on-disk feature, it can't 1849b5152584SMatthew Ahrens * actually read large blocks. 1850b5152584SMatthew Ahrens */ 1851b5152584SMatthew Ahrens if (blksz > SPA_MAXBLOCKSIZE) { 1852b5152584SMatthew Ahrens grub_printf("blocks larger than 128K are not supported\n"); 1853b5152584SMatthew Ahrens return (0); 1854b5152584SMatthew Ahrens } 1855b5152584SMatthew Ahrens 1856b1b8ab34Slling /* 1857b1b8ab34Slling * Entire Dnode is too big to fit into the space available. We 1858b1b8ab34Slling * will need to read it in chunks. This could be optimized to 1859b1b8ab34Slling * read in as large a chunk as there is space available, but for 1860b1b8ab34Slling * now, this only reads in one data block at a time. 1861b1b8ab34Slling */ 1862b1b8ab34Slling length = len; 1863b1b8ab34Slling while (length) { 1864b1b8ab34Slling /* 1865b1b8ab34Slling * Find requested blkid and the offset within that block. 1866b1b8ab34Slling */ 1867b1b8ab34Slling uint64_t blkid = filepos / blksz; 1868b1b8ab34Slling 1869b1b8ab34Slling if (errnum = dmu_read(DNODE, blkid, file_buf, stack)) 1870b1b8ab34Slling return (0); 1871b1b8ab34Slling 1872b1b8ab34Slling file_start = blkid * blksz; 1873b1b8ab34Slling file_end = file_start + blksz; 1874b1b8ab34Slling 1875b1b8ab34Slling movesize = MIN(length, file_end - filepos); 1876b1b8ab34Slling 1877b1b8ab34Slling grub_memmove(buf, file_buf + filepos - file_start, 1878b1b8ab34Slling movesize); 1879b1b8ab34Slling buf += movesize; 1880b1b8ab34Slling length -= movesize; 1881b1b8ab34Slling filepos += movesize; 1882b1b8ab34Slling } 1883b1b8ab34Slling 1884b1b8ab34Slling return (len); 1885b1b8ab34Slling } 1886b1b8ab34Slling 1887b1b8ab34Slling /* 1888b1b8ab34Slling * No-Op 1889b1b8ab34Slling */ 1890b1b8ab34Slling int 1891b1b8ab34Slling zfs_embed(int *start_sector, int needed_sectors) 1892b1b8ab34Slling { 1893b1b8ab34Slling return (1); 1894b1b8ab34Slling } 1895b1b8ab34Slling 1896b1b8ab34Slling #endif /* FSYS_ZFS */ 1897