1b1b8ab34Slling /* 2b1b8ab34Slling * GRUB -- GRand Unified Bootloader 3b1b8ab34Slling * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. 4b1b8ab34Slling * 5b1b8ab34Slling * This program is free software; you can redistribute it and/or modify 6b1b8ab34Slling * it under the terms of the GNU General Public License as published by 7b1b8ab34Slling * the Free Software Foundation; either version 2 of the License, or 8b1b8ab34Slling * (at your option) any later version. 9b1b8ab34Slling * 10b1b8ab34Slling * This program is distributed in the hope that it will be useful, 11b1b8ab34Slling * but WITHOUT ANY WARRANTY; without even the implied warranty of 12b1b8ab34Slling * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13b1b8ab34Slling * GNU General Public License for more details. 14b1b8ab34Slling * 15b1b8ab34Slling * You should have received a copy of the GNU General Public License 16b1b8ab34Slling * along with this program; if not, write to the Free Software 17b1b8ab34Slling * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18b1b8ab34Slling */ 19b1b8ab34Slling /* 206e1f5caaSNeil Perrin * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 21b1b8ab34Slling * Use is subject to license terms. 22b1b8ab34Slling */ 23b1b8ab34Slling 24b1b8ab34Slling /* 25b1b8ab34Slling * The zfs plug-in routines for GRUB are: 26b1b8ab34Slling * 27b1b8ab34Slling * zfs_mount() - locates a valid uberblock of the root pool and reads 28b1b8ab34Slling * in its MOS at the memory address MOS. 29b1b8ab34Slling * 30b1b8ab34Slling * zfs_open() - locates a plain file object by following the MOS 31b1b8ab34Slling * and places its dnode at the memory address DNODE. 32b1b8ab34Slling * 33b1b8ab34Slling * zfs_read() - read in the data blocks pointed by the DNODE. 34b1b8ab34Slling * 35b1b8ab34Slling * ZFS_SCRATCH is used as a working area. 36b1b8ab34Slling * 37b1b8ab34Slling * (memory addr) MOS DNODE ZFS_SCRATCH 38b1b8ab34Slling * | | | 39b1b8ab34Slling * +-------V---------V----------V---------------+ 40b1b8ab34Slling * memory | | dnode | dnode | scratch | 41b1b8ab34Slling * | | 512B | 512B | area | 42b1b8ab34Slling * +--------------------------------------------+ 43b1b8ab34Slling */ 44b1b8ab34Slling 45b1b8ab34Slling #ifdef FSYS_ZFS 46b1b8ab34Slling 47b1b8ab34Slling #include "shared.h" 48b1b8ab34Slling #include "filesys.h" 49b1b8ab34Slling #include "fsys_zfs.h" 50b1b8ab34Slling 51b1b8ab34Slling /* cache for a file block of the currently zfs_open()-ed file */ 52b1b8ab34Slling static void *file_buf = NULL; 53b1b8ab34Slling static uint64_t file_start = 0; 54b1b8ab34Slling static uint64_t file_end = 0; 55b1b8ab34Slling 56b1b8ab34Slling /* cache for a dnode block */ 57b1b8ab34Slling static dnode_phys_t *dnode_buf = NULL; 58b1b8ab34Slling static dnode_phys_t *dnode_mdn = NULL; 59b1b8ab34Slling static uint64_t dnode_start = 0; 60b1b8ab34Slling static uint64_t dnode_end = 0; 61b1b8ab34Slling 62e23347b1SEric Taylor static uint64_t pool_guid = 0; 63051aabe6Staylor static uberblock_t current_uberblock; 64b1b8ab34Slling static char *stackbase; 65b1b8ab34Slling 66b1b8ab34Slling decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] = 67b1b8ab34Slling { 6815e6edf1Sgw {"inherit", 0}, /* ZIO_COMPRESS_INHERIT */ 69b1b8ab34Slling {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */ 7015e6edf1Sgw {"off", 0}, /* ZIO_COMPRESS_OFF */ 7115e6edf1Sgw {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */ 7215e6edf1Sgw {"empty", 0} /* ZIO_COMPRESS_EMPTY */ 73b1b8ab34Slling }; 74b1b8ab34Slling 75cd9c78d9SLin Ling static int zio_read_data(blkptr_t *bp, void *buf, char *stack); 76cd9c78d9SLin Ling 77b1b8ab34Slling /* 78b1b8ab34Slling * Our own version of bcmp(). 79b1b8ab34Slling */ 80b1b8ab34Slling static int 81b1b8ab34Slling zfs_bcmp(const void *s1, const void *s2, size_t n) 82b1b8ab34Slling { 83b1b8ab34Slling const uchar_t *ps1 = s1; 84b1b8ab34Slling const uchar_t *ps2 = s2; 85b1b8ab34Slling 86b1b8ab34Slling if (s1 != s2 && n != 0) { 87b1b8ab34Slling do { 88b1b8ab34Slling if (*ps1++ != *ps2++) 89b1b8ab34Slling return (1); 90b1b8ab34Slling } while (--n != 0); 91b1b8ab34Slling } 92b1b8ab34Slling 93b1b8ab34Slling return (0); 94b1b8ab34Slling } 95b1b8ab34Slling 96b1b8ab34Slling /* 97b1b8ab34Slling * Our own version of log2(). Same thing as highbit()-1. 98b1b8ab34Slling */ 99b1b8ab34Slling static int 100b1b8ab34Slling zfs_log2(uint64_t num) 101b1b8ab34Slling { 102b1b8ab34Slling int i = 0; 103b1b8ab34Slling 104b1b8ab34Slling while (num > 1) { 105b1b8ab34Slling i++; 106b1b8ab34Slling num = num >> 1; 107b1b8ab34Slling } 108b1b8ab34Slling 109b1b8ab34Slling return (i); 110b1b8ab34Slling } 111b1b8ab34Slling 112b1b8ab34Slling /* Checksum Functions */ 113b1b8ab34Slling static void 114b1b8ab34Slling zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) 115b1b8ab34Slling { 116b1b8ab34Slling ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); 117b1b8ab34Slling } 118b1b8ab34Slling 119b1b8ab34Slling /* Checksum Table and Values */ 120b1b8ab34Slling zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { 121b1b8ab34Slling NULL, NULL, 0, 0, "inherit", 122b1b8ab34Slling NULL, NULL, 0, 0, "on", 123b1b8ab34Slling zio_checksum_off, zio_checksum_off, 0, 0, "off", 124b1b8ab34Slling zio_checksum_SHA256, zio_checksum_SHA256, 1, 1, "label", 125b1b8ab34Slling zio_checksum_SHA256, zio_checksum_SHA256, 1, 1, "gang_header", 1266e1f5caaSNeil Perrin NULL, NULL, 0, 0, "zilog", 127b1b8ab34Slling fletcher_2_native, fletcher_2_byteswap, 0, 0, "fletcher2", 128b1b8ab34Slling fletcher_4_native, fletcher_4_byteswap, 1, 0, "fletcher4", 129b1b8ab34Slling zio_checksum_SHA256, zio_checksum_SHA256, 1, 0, "SHA256", 1306e1f5caaSNeil Perrin NULL, NULL, 0, 0, "zilog2", 131b1b8ab34Slling }; 132b1b8ab34Slling 133b1b8ab34Slling /* 134b1b8ab34Slling * zio_checksum_verify: Provides support for checksum verification. 135b1b8ab34Slling * 136b1b8ab34Slling * Fletcher2, Fletcher4, and SHA256 are supported. 137b1b8ab34Slling * 138b1b8ab34Slling * Return: 139b1b8ab34Slling * -1 = Failure 140b1b8ab34Slling * 0 = Success 141b1b8ab34Slling */ 142b1b8ab34Slling static int 143b1b8ab34Slling zio_checksum_verify(blkptr_t *bp, char *data, int size) 144b1b8ab34Slling { 145b1b8ab34Slling zio_cksum_t zc = bp->blk_cksum; 146cd9c78d9SLin Ling uint32_t checksum = BP_GET_CHECKSUM(bp); 147b1b8ab34Slling int byteswap = BP_SHOULD_BYTESWAP(bp); 1486e1f5caaSNeil Perrin zio_eck_t *zec = (zio_eck_t *)(data + size) - 1; 149b1b8ab34Slling zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 150b1b8ab34Slling zio_cksum_t actual_cksum, expected_cksum; 151b1b8ab34Slling 152b1b8ab34Slling /* byteswap is not supported */ 153b1b8ab34Slling if (byteswap) 154b1b8ab34Slling return (-1); 155b1b8ab34Slling 156b1b8ab34Slling if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) 157b1b8ab34Slling return (-1); 158b1b8ab34Slling 1596e1f5caaSNeil Perrin if (ci->ci_eck) { 1606e1f5caaSNeil Perrin expected_cksum = zec->zec_cksum; 1616e1f5caaSNeil Perrin zec->zec_cksum = zc; 162cd9c78d9SLin Ling ci->ci_func[0](data, size, &actual_cksum); 1636e1f5caaSNeil Perrin zec->zec_cksum = expected_cksum; 164b1b8ab34Slling zc = expected_cksum; 165b1b8ab34Slling 166b1b8ab34Slling } else { 167b1b8ab34Slling ci->ci_func[byteswap](data, size, &actual_cksum); 168b1b8ab34Slling } 169b1b8ab34Slling 170b1b8ab34Slling if ((actual_cksum.zc_word[0] - zc.zc_word[0]) | 171b1b8ab34Slling (actual_cksum.zc_word[1] - zc.zc_word[1]) | 172b1b8ab34Slling (actual_cksum.zc_word[2] - zc.zc_word[2]) | 173b1b8ab34Slling (actual_cksum.zc_word[3] - zc.zc_word[3])) 174b1b8ab34Slling return (-1); 175b1b8ab34Slling 176b1b8ab34Slling return (0); 177b1b8ab34Slling } 178b1b8ab34Slling 179b1b8ab34Slling /* 180e23347b1SEric Taylor * vdev_label_start returns the physical disk offset (in bytes) of 181e23347b1SEric Taylor * label "l". 182b1b8ab34Slling */ 183e7cbe64fSgw static uint64_t 184e23347b1SEric Taylor vdev_label_start(uint64_t psize, int l) 185b1b8ab34Slling { 186e23347b1SEric Taylor return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 187b1b8ab34Slling 0 : psize - VDEV_LABELS * sizeof (vdev_label_t))); 188b1b8ab34Slling } 189b1b8ab34Slling 190b1b8ab34Slling /* 191b1b8ab34Slling * vdev_uberblock_compare takes two uberblock structures and returns an integer 192b1b8ab34Slling * indicating the more recent of the two. 193b1b8ab34Slling * Return Value = 1 if ub2 is more recent 194b1b8ab34Slling * Return Value = -1 if ub1 is more recent 195b1b8ab34Slling * The most recent uberblock is determined using its transaction number and 196b1b8ab34Slling * timestamp. The uberblock with the highest transaction number is 197b1b8ab34Slling * considered "newer". If the transaction numbers of the two blocks match, the 198b1b8ab34Slling * timestamps are compared to determine the "newer" of the two. 199b1b8ab34Slling */ 200b1b8ab34Slling static int 201b1b8ab34Slling vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) 202b1b8ab34Slling { 203b1b8ab34Slling if (ub1->ub_txg < ub2->ub_txg) 204b1b8ab34Slling return (-1); 205b1b8ab34Slling if (ub1->ub_txg > ub2->ub_txg) 206b1b8ab34Slling return (1); 207b1b8ab34Slling 208b1b8ab34Slling if (ub1->ub_timestamp < ub2->ub_timestamp) 209b1b8ab34Slling return (-1); 210b1b8ab34Slling if (ub1->ub_timestamp > ub2->ub_timestamp) 211b1b8ab34Slling return (1); 212b1b8ab34Slling 213b1b8ab34Slling return (0); 214b1b8ab34Slling } 215b1b8ab34Slling 216b1b8ab34Slling /* 217b1b8ab34Slling * Three pieces of information are needed to verify an uberblock: the magic 218b1b8ab34Slling * number, the version number, and the checksum. 219b1b8ab34Slling * 220b1b8ab34Slling * Currently Implemented: version number, magic number 221b1b8ab34Slling * Need to Implement: checksum 222b1b8ab34Slling * 223b1b8ab34Slling * Return: 224b1b8ab34Slling * 0 - Success 225b1b8ab34Slling * -1 - Failure 226b1b8ab34Slling */ 227b1b8ab34Slling static int 228e23347b1SEric Taylor uberblock_verify(uberblock_phys_t *ub, uint64_t offset) 229b1b8ab34Slling { 230b1b8ab34Slling 231b1b8ab34Slling uberblock_t *uber = &ub->ubp_uberblock; 232b1b8ab34Slling blkptr_t bp; 233b1b8ab34Slling 234b1b8ab34Slling BP_ZERO(&bp); 235b1b8ab34Slling BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); 236b1b8ab34Slling BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER); 237b1b8ab34Slling ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0); 238b1b8ab34Slling 239b1b8ab34Slling if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0) 240b1b8ab34Slling return (-1); 241b1b8ab34Slling 242b1b8ab34Slling if (uber->ub_magic == UBERBLOCK_MAGIC && 243bb0ade09Sahrens uber->ub_version > 0 && uber->ub_version <= SPA_VERSION) 244b1b8ab34Slling return (0); 245b1b8ab34Slling 246b1b8ab34Slling return (-1); 247b1b8ab34Slling } 248b1b8ab34Slling 249b1b8ab34Slling /* 250b1b8ab34Slling * Find the best uberblock. 251b1b8ab34Slling * Return: 252b1b8ab34Slling * Success - Pointer to the best uberblock. 253b1b8ab34Slling * Failure - NULL 254b1b8ab34Slling */ 255b1b8ab34Slling static uberblock_phys_t * 256e23347b1SEric Taylor find_bestub(uberblock_phys_t *ub_array, uint64_t sector) 257b1b8ab34Slling { 258b1b8ab34Slling uberblock_phys_t *ubbest = NULL; 259e23347b1SEric Taylor uint64_t offset; 260e23347b1SEric Taylor int i; 261b1b8ab34Slling 262b1b8ab34Slling for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) { 263e23347b1SEric Taylor offset = (sector << SPA_MINBLOCKSHIFT) + 264e23347b1SEric Taylor VDEV_UBERBLOCK_OFFSET(i); 265b1b8ab34Slling if (uberblock_verify(&ub_array[i], offset) == 0) { 266b1b8ab34Slling if (ubbest == NULL) { 267b1b8ab34Slling ubbest = &ub_array[i]; 268e7437265Sahrens } else if (vdev_uberblock_compare( 269e7437265Sahrens &(ub_array[i].ubp_uberblock), 270e7437265Sahrens &(ubbest->ubp_uberblock)) > 0) { 271e7437265Sahrens ubbest = &ub_array[i]; 272b1b8ab34Slling } 273b1b8ab34Slling } 274b1b8ab34Slling } 275b1b8ab34Slling 276b1b8ab34Slling return (ubbest); 277b1b8ab34Slling } 278b1b8ab34Slling 279b1b8ab34Slling /* 280cd9c78d9SLin Ling * Read a block of data based on the gang block address dva, 281cd9c78d9SLin Ling * and put its data in buf. 282b1b8ab34Slling * 283b1b8ab34Slling * Return: 284b1b8ab34Slling * 0 - success 285cd9c78d9SLin Ling * 1 - failure 286b1b8ab34Slling */ 287b1b8ab34Slling static int 288cd9c78d9SLin Ling zio_read_gang(blkptr_t *bp, dva_t *dva, void *buf, char *stack) 289b1b8ab34Slling { 290cd9c78d9SLin Ling zio_gbh_phys_t *zio_gb; 291b1b8ab34Slling uint64_t offset, sector; 292cd9c78d9SLin Ling blkptr_t tmpbp; 293cd9c78d9SLin Ling int i; 294b1b8ab34Slling 295cd9c78d9SLin Ling zio_gb = (zio_gbh_phys_t *)stack; 296cd9c78d9SLin Ling stack += SPA_GANGBLOCKSIZE; 297cd9c78d9SLin Ling offset = DVA_GET_OFFSET(dva); 298cd9c78d9SLin Ling sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); 299b1b8ab34Slling 300cd9c78d9SLin Ling /* read in the gang block header */ 301cd9c78d9SLin Ling if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) { 302cd9c78d9SLin Ling grub_printf("failed to read in a gang block header\n"); 303cd9c78d9SLin Ling return (1); 304cd9c78d9SLin Ling } 305cd9c78d9SLin Ling 306cd9c78d9SLin Ling /* self checksuming the gang block header */ 307cd9c78d9SLin Ling BP_ZERO(&tmpbp); 308cd9c78d9SLin Ling BP_SET_CHECKSUM(&tmpbp, ZIO_CHECKSUM_GANG_HEADER); 309cd9c78d9SLin Ling BP_SET_BYTEORDER(&tmpbp, ZFS_HOST_BYTEORDER); 310cd9c78d9SLin Ling ZIO_SET_CHECKSUM(&tmpbp.blk_cksum, DVA_GET_VDEV(dva), 311cd9c78d9SLin Ling DVA_GET_OFFSET(dva), bp->blk_birth, 0); 312cd9c78d9SLin Ling if (zio_checksum_verify(&tmpbp, (char *)zio_gb, SPA_GANGBLOCKSIZE)) { 313cd9c78d9SLin Ling grub_printf("failed to checksum a gang block header\n"); 314cd9c78d9SLin Ling return (1); 315cd9c78d9SLin Ling } 316cd9c78d9SLin Ling 317cd9c78d9SLin Ling for (i = 0; i < SPA_GBH_NBLKPTRS; i++) { 318cd9c78d9SLin Ling if (zio_gb->zg_blkptr[i].blk_birth == 0) 319cd9c78d9SLin Ling continue; 320cd9c78d9SLin Ling 321cd9c78d9SLin Ling if (zio_read_data(&zio_gb->zg_blkptr[i], buf, stack)) 322cd9c78d9SLin Ling return (1); 323cd9c78d9SLin Ling buf += BP_GET_PSIZE(&zio_gb->zg_blkptr[i]); 324cd9c78d9SLin Ling } 325cd9c78d9SLin Ling 326cd9c78d9SLin Ling return (0); 327cd9c78d9SLin Ling } 328cd9c78d9SLin Ling 329cd9c78d9SLin Ling /* 330cd9c78d9SLin Ling * Read in a block of raw data to buf. 331cd9c78d9SLin Ling * 332cd9c78d9SLin Ling * Return: 333cd9c78d9SLin Ling * 0 - success 334cd9c78d9SLin Ling * 1 - failure 335cd9c78d9SLin Ling */ 336cd9c78d9SLin Ling static int 337cd9c78d9SLin Ling zio_read_data(blkptr_t *bp, void *buf, char *stack) 338cd9c78d9SLin Ling { 339cd9c78d9SLin Ling int i, psize; 340cd9c78d9SLin Ling 341cd9c78d9SLin Ling psize = BP_GET_PSIZE(bp); 342ae8180dbSlling 343b1b8ab34Slling /* pick a good dva from the block pointer */ 344b1b8ab34Slling for (i = 0; i < SPA_DVAS_PER_BP; i++) { 345cd9c78d9SLin Ling uint64_t offset, sector; 346b1b8ab34Slling 347b1b8ab34Slling if (bp->blk_dva[i].dva_word[0] == 0 && 348b1b8ab34Slling bp->blk_dva[i].dva_word[1] == 0) 349b1b8ab34Slling continue; 350b1b8ab34Slling 351cd9c78d9SLin Ling if (DVA_GET_GANG(&bp->blk_dva[i])) { 352cd9c78d9SLin Ling if (zio_read_gang(bp, &bp->blk_dva[i], buf, stack) == 0) 353cd9c78d9SLin Ling return (0); 354b1b8ab34Slling } else { 355cd9c78d9SLin Ling /* read in a data block */ 356cd9c78d9SLin Ling offset = DVA_GET_OFFSET(&bp->blk_dva[i]); 357cd9c78d9SLin Ling sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); 358cd9c78d9SLin Ling if (devread(sector, 0, psize, buf)) 359cd9c78d9SLin Ling return (0); 360b1b8ab34Slling } 361b1b8ab34Slling } 362b1b8ab34Slling 363cd9c78d9SLin Ling return (1); 364cd9c78d9SLin Ling } 365cd9c78d9SLin Ling 366cd9c78d9SLin Ling /* 367cd9c78d9SLin Ling * Read in a block of data, verify its checksum, decompress if needed, 368cd9c78d9SLin Ling * and put the uncompressed data in buf. 369cd9c78d9SLin Ling * 370cd9c78d9SLin Ling * Return: 371cd9c78d9SLin Ling * 0 - success 372cd9c78d9SLin Ling * errnum - failure 373cd9c78d9SLin Ling */ 374cd9c78d9SLin Ling static int 375cd9c78d9SLin Ling zio_read(blkptr_t *bp, void *buf, char *stack) 376cd9c78d9SLin Ling { 377cd9c78d9SLin Ling int lsize, psize, comp; 378cd9c78d9SLin Ling char *retbuf; 379cd9c78d9SLin Ling 380cd9c78d9SLin Ling comp = BP_GET_COMPRESS(bp); 381cd9c78d9SLin Ling lsize = BP_GET_LSIZE(bp); 382cd9c78d9SLin Ling psize = BP_GET_PSIZE(bp); 383cd9c78d9SLin Ling 384cd9c78d9SLin Ling if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS || 385cd9c78d9SLin Ling (comp != ZIO_COMPRESS_OFF && 386cd9c78d9SLin Ling decomp_table[comp].decomp_func == NULL)) { 387cd9c78d9SLin Ling grub_printf("compression algorithm not supported\n"); 388cd9c78d9SLin Ling return (ERR_FSYS_CORRUPT); 389cd9c78d9SLin Ling } 390cd9c78d9SLin Ling 391cd9c78d9SLin Ling if ((char *)buf < stack && ((char *)buf) + lsize > stack) { 392cd9c78d9SLin Ling grub_printf("not enough memory allocated\n"); 393cd9c78d9SLin Ling return (ERR_WONT_FIT); 394cd9c78d9SLin Ling } 395cd9c78d9SLin Ling 396cd9c78d9SLin Ling retbuf = buf; 397cd9c78d9SLin Ling if (comp != ZIO_COMPRESS_OFF) { 398cd9c78d9SLin Ling buf = stack; 399cd9c78d9SLin Ling stack += psize; 400cd9c78d9SLin Ling } 401cd9c78d9SLin Ling 402cd9c78d9SLin Ling if (zio_read_data(bp, buf, stack)) { 403cd9c78d9SLin Ling grub_printf("zio_read_data failed\n"); 404cd9c78d9SLin Ling return (ERR_FSYS_CORRUPT); 405cd9c78d9SLin Ling } 406cd9c78d9SLin Ling 407cd9c78d9SLin Ling if (zio_checksum_verify(bp, buf, psize) != 0) { 408cd9c78d9SLin Ling grub_printf("checksum verification failed\n"); 409cd9c78d9SLin Ling return (ERR_FSYS_CORRUPT); 410cd9c78d9SLin Ling } 411cd9c78d9SLin Ling 412cd9c78d9SLin Ling if (comp != ZIO_COMPRESS_OFF) 413cd9c78d9SLin Ling decomp_table[comp].decomp_func(buf, retbuf, psize, lsize); 414cd9c78d9SLin Ling 415cd9c78d9SLin Ling return (0); 416b1b8ab34Slling } 417b1b8ab34Slling 418b1b8ab34Slling /* 419b1b8ab34Slling * Get the block from a block id. 420b1b8ab34Slling * push the block onto the stack. 421b1b8ab34Slling * 422b1b8ab34Slling * Return: 423b1b8ab34Slling * 0 - success 424b1b8ab34Slling * errnum - failure 425b1b8ab34Slling */ 426b1b8ab34Slling static int 427b1b8ab34Slling dmu_read(dnode_phys_t *dn, uint64_t blkid, void *buf, char *stack) 428b1b8ab34Slling { 429b1b8ab34Slling int idx, level; 430b1b8ab34Slling blkptr_t *bp_array = dn->dn_blkptr; 431b1b8ab34Slling int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 432b1b8ab34Slling blkptr_t *bp, *tmpbuf; 433b1b8ab34Slling 434b1b8ab34Slling bp = (blkptr_t *)stack; 435b1b8ab34Slling stack += sizeof (blkptr_t); 436b1b8ab34Slling 437b1b8ab34Slling tmpbuf = (blkptr_t *)stack; 438b1b8ab34Slling stack += 1<<dn->dn_indblkshift; 439b1b8ab34Slling 440b1b8ab34Slling for (level = dn->dn_nlevels - 1; level >= 0; level--) { 441b1b8ab34Slling idx = (blkid >> (epbs * level)) & ((1<<epbs)-1); 442b1b8ab34Slling *bp = bp_array[idx]; 443b1b8ab34Slling if (level == 0) 444b1b8ab34Slling tmpbuf = buf; 445ae8180dbSlling if (BP_IS_HOLE(bp)) { 446ae8180dbSlling grub_memset(buf, 0, 447ae8180dbSlling dn->dn_datablkszsec << SPA_MINBLOCKSHIFT); 448ae8180dbSlling break; 449ae8180dbSlling } else if (errnum = zio_read(bp, tmpbuf, stack)) { 450b1b8ab34Slling return (errnum); 451ae8180dbSlling } 452b1b8ab34Slling 453b1b8ab34Slling bp_array = tmpbuf; 454b1b8ab34Slling } 455b1b8ab34Slling 456b1b8ab34Slling return (0); 457b1b8ab34Slling } 458b1b8ab34Slling 459b1b8ab34Slling /* 460b1b8ab34Slling * mzap_lookup: Looks up property described by "name" and returns the value 461b1b8ab34Slling * in "value". 462b1b8ab34Slling * 463b1b8ab34Slling * Return: 464b1b8ab34Slling * 0 - success 465b1b8ab34Slling * errnum - failure 466b1b8ab34Slling */ 467b1b8ab34Slling static int 468b1b8ab34Slling mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name, 469b1b8ab34Slling uint64_t *value) 470b1b8ab34Slling { 471b1b8ab34Slling int i, chunks; 472b1b8ab34Slling mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; 473b1b8ab34Slling 474b1b8ab34Slling chunks = objsize/MZAP_ENT_LEN - 1; 475b1b8ab34Slling for (i = 0; i < chunks; i++) { 476b1b8ab34Slling if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) { 477b1b8ab34Slling *value = mzap_ent[i].mze_value; 478b1b8ab34Slling return (0); 479b1b8ab34Slling } 480b1b8ab34Slling } 481b1b8ab34Slling 482b1b8ab34Slling return (ERR_FSYS_CORRUPT); 483b1b8ab34Slling } 484b1b8ab34Slling 485b1b8ab34Slling static uint64_t 486b1b8ab34Slling zap_hash(uint64_t salt, const char *name) 487b1b8ab34Slling { 488b1b8ab34Slling static uint64_t table[256]; 489b1b8ab34Slling const uint8_t *cp; 490b1b8ab34Slling uint8_t c; 491b1b8ab34Slling uint64_t crc = salt; 492b1b8ab34Slling 493b1b8ab34Slling if (table[128] == 0) { 494b1b8ab34Slling uint64_t *ct; 495b1b8ab34Slling int i, j; 496b1b8ab34Slling for (i = 0; i < 256; i++) { 497b1b8ab34Slling for (ct = table + i, *ct = i, j = 8; j > 0; j--) 498b1b8ab34Slling *ct = (*ct >> 1) ^ (-(*ct & 1) & 499b1b8ab34Slling ZFS_CRC64_POLY); 500b1b8ab34Slling } 501b1b8ab34Slling } 502b1b8ab34Slling 503b1b8ab34Slling if (crc == 0 || table[128] != ZFS_CRC64_POLY) { 504b1b8ab34Slling errnum = ERR_FSYS_CORRUPT; 505b1b8ab34Slling return (0); 506b1b8ab34Slling } 507b1b8ab34Slling 508b1b8ab34Slling for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++) 509b1b8ab34Slling crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF]; 510b1b8ab34Slling 511b1b8ab34Slling /* 512b1b8ab34Slling * Only use 28 bits, since we need 4 bits in the cookie for the 513b1b8ab34Slling * collision differentiator. We MUST use the high bits, since 514b1b8ab34Slling * those are the onces that we first pay attention to when 515b1b8ab34Slling * chosing the bucket. 516b1b8ab34Slling */ 517b24ab676SJeff Bonwick crc &= ~((1ULL << (64 - 28)) - 1); 518b1b8ab34Slling 519b1b8ab34Slling return (crc); 520b1b8ab34Slling } 521b1b8ab34Slling 522b1b8ab34Slling /* 523b1b8ab34Slling * Only to be used on 8-bit arrays. 524b1b8ab34Slling * array_len is actual len in bytes (not encoded le_value_length). 525b1b8ab34Slling * buf is null-terminated. 526b1b8ab34Slling */ 527b1b8ab34Slling static int 528b1b8ab34Slling zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk, 529b1b8ab34Slling int array_len, const char *buf) 530b1b8ab34Slling { 531b1b8ab34Slling int bseen = 0; 532b1b8ab34Slling 533b1b8ab34Slling while (bseen < array_len) { 534b1b8ab34Slling struct zap_leaf_array *la = 535b1b8ab34Slling &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array; 536b1b8ab34Slling int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); 537b1b8ab34Slling 538b1b8ab34Slling if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) 539b1b8ab34Slling return (0); 540b1b8ab34Slling 541b1b8ab34Slling if (zfs_bcmp(la->la_array, buf + bseen, toread) != 0) 542b1b8ab34Slling break; 543b1b8ab34Slling chunk = la->la_next; 544b1b8ab34Slling bseen += toread; 545b1b8ab34Slling } 546b1b8ab34Slling return (bseen == array_len); 547b1b8ab34Slling } 548b1b8ab34Slling 549b1b8ab34Slling /* 550b1b8ab34Slling * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the 551b1b8ab34Slling * value for the property "name". 552b1b8ab34Slling * 553b1b8ab34Slling * Return: 554b1b8ab34Slling * 0 - success 555b1b8ab34Slling * errnum - failure 556b1b8ab34Slling */ 557e7cbe64fSgw static int 558b1b8ab34Slling zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h, 559b1b8ab34Slling const char *name, uint64_t *value) 560b1b8ab34Slling { 561b1b8ab34Slling uint16_t chunk; 562b1b8ab34Slling struct zap_leaf_entry *le; 563b1b8ab34Slling 564b1b8ab34Slling /* Verify if this is a valid leaf block */ 565b1b8ab34Slling if (l->l_hdr.lh_block_type != ZBT_LEAF) 566b1b8ab34Slling return (ERR_FSYS_CORRUPT); 567b1b8ab34Slling if (l->l_hdr.lh_magic != ZAP_LEAF_MAGIC) 568b1b8ab34Slling return (ERR_FSYS_CORRUPT); 569b1b8ab34Slling 570b1b8ab34Slling for (chunk = l->l_hash[LEAF_HASH(blksft, h)]; 571b1b8ab34Slling chunk != CHAIN_END; chunk = le->le_next) { 572b1b8ab34Slling 573b1b8ab34Slling if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) 574b1b8ab34Slling return (ERR_FSYS_CORRUPT); 575b1b8ab34Slling 576b1b8ab34Slling le = ZAP_LEAF_ENTRY(l, blksft, chunk); 577b1b8ab34Slling 578b1b8ab34Slling /* Verify the chunk entry */ 579b1b8ab34Slling if (le->le_type != ZAP_CHUNK_ENTRY) 580b1b8ab34Slling return (ERR_FSYS_CORRUPT); 581b1b8ab34Slling 582b1b8ab34Slling if (le->le_hash != h) 583b1b8ab34Slling continue; 584b1b8ab34Slling 585b1b8ab34Slling if (zap_leaf_array_equal(l, blksft, le->le_name_chunk, 586b1b8ab34Slling le->le_name_length, name)) { 587b1b8ab34Slling 588b1b8ab34Slling struct zap_leaf_array *la; 589b1b8ab34Slling uint8_t *ip; 590b1b8ab34Slling 591b1b8ab34Slling if (le->le_int_size != 8 || le->le_value_length != 1) 592e37b211cStaylor return (ERR_FSYS_CORRUPT); 593b1b8ab34Slling 594b1b8ab34Slling /* get the uint64_t property value */ 595b1b8ab34Slling la = &ZAP_LEAF_CHUNK(l, blksft, 596b1b8ab34Slling le->le_value_chunk).l_array; 597b1b8ab34Slling ip = la->la_array; 598b1b8ab34Slling 599b1b8ab34Slling *value = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 | 600b1b8ab34Slling (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 | 601b1b8ab34Slling (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 | 602b1b8ab34Slling (uint64_t)ip[6] << 8 | (uint64_t)ip[7]; 603b1b8ab34Slling 604b1b8ab34Slling return (0); 605b1b8ab34Slling } 606b1b8ab34Slling } 607b1b8ab34Slling 608b1b8ab34Slling return (ERR_FSYS_CORRUPT); 609b1b8ab34Slling } 610b1b8ab34Slling 611b1b8ab34Slling /* 612b1b8ab34Slling * Fat ZAP lookup 613b1b8ab34Slling * 614b1b8ab34Slling * Return: 615b1b8ab34Slling * 0 - success 616b1b8ab34Slling * errnum - failure 617b1b8ab34Slling */ 618e7cbe64fSgw static int 619b1b8ab34Slling fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap, 620b1b8ab34Slling char *name, uint64_t *value, char *stack) 621b1b8ab34Slling { 622b1b8ab34Slling zap_leaf_phys_t *l; 623b1b8ab34Slling uint64_t hash, idx, blkid; 624b1b8ab34Slling int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT); 625b1b8ab34Slling 626b1b8ab34Slling /* Verify if this is a fat zap header block */ 627b24ab676SJeff Bonwick if (zap->zap_magic != (uint64_t)ZAP_MAGIC || 628b24ab676SJeff Bonwick zap->zap_flags != 0) 629b1b8ab34Slling return (ERR_FSYS_CORRUPT); 630b1b8ab34Slling 631b1b8ab34Slling hash = zap_hash(zap->zap_salt, name); 632b1b8ab34Slling if (errnum) 633b1b8ab34Slling return (errnum); 634b1b8ab34Slling 635b1b8ab34Slling /* get block id from index */ 636b1b8ab34Slling if (zap->zap_ptrtbl.zt_numblks != 0) { 637b1b8ab34Slling /* external pointer tables not supported */ 638b1b8ab34Slling return (ERR_FSYS_CORRUPT); 639b1b8ab34Slling } 640b1b8ab34Slling idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift); 641b1b8ab34Slling blkid = ((uint64_t *)zap)[idx + (1<<(blksft-3-1))]; 642b1b8ab34Slling 643b1b8ab34Slling /* Get the leaf block */ 644b1b8ab34Slling l = (zap_leaf_phys_t *)stack; 645b1b8ab34Slling stack += 1<<blksft; 646051aabe6Staylor if ((1<<blksft) < sizeof (zap_leaf_phys_t)) 647e37b211cStaylor return (ERR_FSYS_CORRUPT); 648b1b8ab34Slling if (errnum = dmu_read(zap_dnode, blkid, l, stack)) 649b1b8ab34Slling return (errnum); 650b1b8ab34Slling 651b1b8ab34Slling return (zap_leaf_lookup(l, blksft, hash, name, value)); 652b1b8ab34Slling } 653b1b8ab34Slling 654b1b8ab34Slling /* 655b1b8ab34Slling * Read in the data of a zap object and find the value for a matching 656b1b8ab34Slling * property name. 657b1b8ab34Slling * 658b1b8ab34Slling * Return: 659b1b8ab34Slling * 0 - success 660b1b8ab34Slling * errnum - failure 661b1b8ab34Slling */ 662b1b8ab34Slling static int 663b1b8ab34Slling zap_lookup(dnode_phys_t *zap_dnode, char *name, uint64_t *val, char *stack) 664b1b8ab34Slling { 665b1b8ab34Slling uint64_t block_type; 666b1b8ab34Slling int size; 667b1b8ab34Slling void *zapbuf; 668b1b8ab34Slling 669b1b8ab34Slling /* Read in the first block of the zap object data. */ 670b1b8ab34Slling zapbuf = stack; 671b1b8ab34Slling size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; 672b1b8ab34Slling stack += size; 673*0a586ceaSMark Shellenbaum 674b1b8ab34Slling if (errnum = dmu_read(zap_dnode, 0, zapbuf, stack)) 675b1b8ab34Slling return (errnum); 676b1b8ab34Slling 677b1b8ab34Slling block_type = *((uint64_t *)zapbuf); 678b1b8ab34Slling 679b1b8ab34Slling if (block_type == ZBT_MICRO) { 680b1b8ab34Slling return (mzap_lookup(zapbuf, size, name, val)); 681b1b8ab34Slling } else if (block_type == ZBT_HEADER) { 682b1b8ab34Slling /* this is a fat zap */ 683b1b8ab34Slling return (fzap_lookup(zap_dnode, zapbuf, name, 684b1b8ab34Slling val, stack)); 685b1b8ab34Slling } 686b1b8ab34Slling 687b1b8ab34Slling return (ERR_FSYS_CORRUPT); 688b1b8ab34Slling } 689b1b8ab34Slling 690b1b8ab34Slling /* 691b1b8ab34Slling * Get the dnode of an object number from the metadnode of an object set. 692b1b8ab34Slling * 693b1b8ab34Slling * Input 694b1b8ab34Slling * mdn - metadnode to get the object dnode 695b1b8ab34Slling * objnum - object number for the object dnode 696b1b8ab34Slling * buf - data buffer that holds the returning dnode 697b1b8ab34Slling * stack - scratch area 698b1b8ab34Slling * 699b1b8ab34Slling * Return: 700b1b8ab34Slling * 0 - success 701b1b8ab34Slling * errnum - failure 702b1b8ab34Slling */ 703b1b8ab34Slling static int 704b1b8ab34Slling dnode_get(dnode_phys_t *mdn, uint64_t objnum, uint8_t type, dnode_phys_t *buf, 705b1b8ab34Slling char *stack) 706b1b8ab34Slling { 707b1b8ab34Slling uint64_t blkid, blksz; /* the block id this object dnode is in */ 708b1b8ab34Slling int epbs; /* shift of number of dnodes in a block */ 709b1b8ab34Slling int idx; /* index within a block */ 710b1b8ab34Slling dnode_phys_t *dnbuf; 711b1b8ab34Slling 712b1b8ab34Slling blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT; 713b1b8ab34Slling epbs = zfs_log2(blksz) - DNODE_SHIFT; 714b1b8ab34Slling blkid = objnum >> epbs; 715b1b8ab34Slling idx = objnum & ((1<<epbs)-1); 716b1b8ab34Slling 717b1b8ab34Slling if (dnode_buf != NULL && dnode_mdn == mdn && 718b1b8ab34Slling objnum >= dnode_start && objnum < dnode_end) { 719b1b8ab34Slling grub_memmove(buf, &dnode_buf[idx], DNODE_SIZE); 720b1b8ab34Slling VERIFY_DN_TYPE(buf, type); 721b1b8ab34Slling return (0); 722b1b8ab34Slling } 723b1b8ab34Slling 724b1b8ab34Slling if (dnode_buf && blksz == 1<<DNODE_BLOCK_SHIFT) { 725b1b8ab34Slling dnbuf = dnode_buf; 726b1b8ab34Slling dnode_mdn = mdn; 727b1b8ab34Slling dnode_start = blkid << epbs; 728b1b8ab34Slling dnode_end = (blkid + 1) << epbs; 729b1b8ab34Slling } else { 730b1b8ab34Slling dnbuf = (dnode_phys_t *)stack; 731b1b8ab34Slling stack += blksz; 732b1b8ab34Slling } 733b1b8ab34Slling 734b1b8ab34Slling if (errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack)) 735b1b8ab34Slling return (errnum); 736b1b8ab34Slling 737b1b8ab34Slling grub_memmove(buf, &dnbuf[idx], DNODE_SIZE); 738b1b8ab34Slling VERIFY_DN_TYPE(buf, type); 739b1b8ab34Slling 740b1b8ab34Slling return (0); 741b1b8ab34Slling } 742b1b8ab34Slling 743b1b8ab34Slling /* 744eb2bd662Svikram * Check if this is a special file that resides at the top 745eb2bd662Svikram * dataset of the pool. Currently this is the GRUB menu, 746eb2bd662Svikram * boot signature and boot signature backup. 747b1b8ab34Slling * str starts with '/'. 748b1b8ab34Slling */ 749b1b8ab34Slling static int 750eb2bd662Svikram is_top_dataset_file(char *str) 751b1b8ab34Slling { 752b1b8ab34Slling char *tptr; 753b1b8ab34Slling 754b1b8ab34Slling if ((tptr = grub_strstr(str, "menu.lst")) && 755b1b8ab34Slling (tptr[8] == '\0' || tptr[8] == ' ') && 756b1b8ab34Slling *(tptr-1) == '/') 757b1b8ab34Slling return (1); 758b1b8ab34Slling 759eb2bd662Svikram if (grub_strncmp(str, BOOTSIGN_DIR"/", 7601183b401Svikram grub_strlen(BOOTSIGN_DIR) + 1) == 0) 761eb2bd662Svikram return (1); 762eb2bd662Svikram 763eb2bd662Svikram if (grub_strcmp(str, BOOTSIGN_BACKUP) == 0) 764eb2bd662Svikram return (1); 765eb2bd662Svikram 766b1b8ab34Slling return (0); 767b1b8ab34Slling } 768b1b8ab34Slling 769b1b8ab34Slling /* 770b1b8ab34Slling * Get the file dnode for a given file name where mdn is the meta dnode 771b1b8ab34Slling * for this ZFS object set. When found, place the file dnode in dn. 772b1b8ab34Slling * The 'path' argument will be mangled. 773b1b8ab34Slling * 774b1b8ab34Slling * Return: 775b1b8ab34Slling * 0 - success 776b1b8ab34Slling * errnum - failure 777b1b8ab34Slling */ 778b1b8ab34Slling static int 779b1b8ab34Slling dnode_get_path(dnode_phys_t *mdn, char *path, dnode_phys_t *dn, 780b1b8ab34Slling char *stack) 781b1b8ab34Slling { 782e7437265Sahrens uint64_t objnum, version; 783b1b8ab34Slling char *cname, ch; 784b1b8ab34Slling 785b1b8ab34Slling if (errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, 786b1b8ab34Slling dn, stack)) 787b1b8ab34Slling return (errnum); 788b1b8ab34Slling 789e7437265Sahrens if (errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack)) 790e7437265Sahrens return (errnum); 791e7437265Sahrens if (version > ZPL_VERSION) 792e7437265Sahrens return (-1); 793e7437265Sahrens 794b1b8ab34Slling if (errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack)) 795b1b8ab34Slling return (errnum); 796b1b8ab34Slling 797b1b8ab34Slling if (errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, 798b1b8ab34Slling dn, stack)) 799b1b8ab34Slling return (errnum); 800b1b8ab34Slling 801b1b8ab34Slling /* skip leading slashes */ 802b1b8ab34Slling while (*path == '/') 803b1b8ab34Slling path++; 804b1b8ab34Slling 805b1b8ab34Slling while (*path && !isspace(*path)) { 806b1b8ab34Slling 807b1b8ab34Slling /* get the next component name */ 808b1b8ab34Slling cname = path; 809b1b8ab34Slling while (*path && !isspace(*path) && *path != '/') 810b1b8ab34Slling path++; 811b1b8ab34Slling ch = *path; 812b1b8ab34Slling *path = 0; /* ensure null termination */ 813b1b8ab34Slling 814b1b8ab34Slling if (errnum = zap_lookup(dn, cname, &objnum, stack)) 815b1b8ab34Slling return (errnum); 816b1b8ab34Slling 817e7437265Sahrens objnum = ZFS_DIRENT_OBJ(objnum); 818b1b8ab34Slling if (errnum = dnode_get(mdn, objnum, 0, dn, stack)) 819b1b8ab34Slling return (errnum); 820b1b8ab34Slling 821b1b8ab34Slling *path = ch; 822b1b8ab34Slling while (*path == '/') 823b1b8ab34Slling path++; 824b1b8ab34Slling } 825b1b8ab34Slling 826b1b8ab34Slling /* We found the dnode for this file. Verify if it is a plain file. */ 827b1b8ab34Slling VERIFY_DN_TYPE(dn, DMU_OT_PLAIN_FILE_CONTENTS); 828b1b8ab34Slling 829b1b8ab34Slling return (0); 830b1b8ab34Slling } 831b1b8ab34Slling 832b1b8ab34Slling /* 833b1b8ab34Slling * Get the default 'bootfs' property value from the rootpool. 834b1b8ab34Slling * 835b1b8ab34Slling * Return: 836b1b8ab34Slling * 0 - success 837b1b8ab34Slling * errnum -failure 838b1b8ab34Slling */ 839b1b8ab34Slling static int 840b1b8ab34Slling get_default_bootfsobj(dnode_phys_t *mosmdn, uint64_t *obj, char *stack) 841b1b8ab34Slling { 842b1b8ab34Slling uint64_t objnum = 0; 843b1b8ab34Slling dnode_phys_t *dn = (dnode_phys_t *)stack; 844b1b8ab34Slling stack += DNODE_SIZE; 845b1b8ab34Slling 846ae8180dbSlling if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, 847b1b8ab34Slling DMU_OT_OBJECT_DIRECTORY, dn, stack)) 848ae8180dbSlling return (errnum); 849b1b8ab34Slling 850b1b8ab34Slling /* 851b1b8ab34Slling * find the object number for 'pool_props', and get the dnode 852b1b8ab34Slling * of the 'pool_props'. 853b1b8ab34Slling */ 854b1b8ab34Slling if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack)) 855b1b8ab34Slling return (ERR_FILESYSTEM_NOT_FOUND); 856b1b8ab34Slling 857ae8180dbSlling if (errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack)) 858ae8180dbSlling return (errnum); 859b1b8ab34Slling 860b1b8ab34Slling if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack)) 861b1b8ab34Slling return (ERR_FILESYSTEM_NOT_FOUND); 862b1b8ab34Slling 863b1b8ab34Slling if (!objnum) 864b1b8ab34Slling return (ERR_FILESYSTEM_NOT_FOUND); 865b1b8ab34Slling 866b1b8ab34Slling *obj = objnum; 867b1b8ab34Slling return (0); 868b1b8ab34Slling } 869b1b8ab34Slling 870b1b8ab34Slling /* 871b1b8ab34Slling * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname), 872b1b8ab34Slling * e.g. pool/rootfs, or a given object number (obj), e.g. the object number 873b1b8ab34Slling * of pool/rootfs. 874b1b8ab34Slling * 875b1b8ab34Slling * If no fsname and no obj are given, return the DSL_DIR metadnode. 876b1b8ab34Slling * If fsname is given, return its metadnode and its matching object number. 877b1b8ab34Slling * If only obj is given, return the metadnode for this object number. 878b1b8ab34Slling * 879b1b8ab34Slling * Return: 880b1b8ab34Slling * 0 - success 881b1b8ab34Slling * errnum - failure 882b1b8ab34Slling */ 883b1b8ab34Slling static int 884b1b8ab34Slling get_objset_mdn(dnode_phys_t *mosmdn, char *fsname, uint64_t *obj, 885b1b8ab34Slling dnode_phys_t *mdn, char *stack) 886b1b8ab34Slling { 887b1b8ab34Slling uint64_t objnum, headobj; 888b1b8ab34Slling char *cname, ch; 889b1b8ab34Slling blkptr_t *bp; 890b1b8ab34Slling objset_phys_t *osp; 891fe3e2633SEric Taylor int issnapshot = 0; 892fe3e2633SEric Taylor char *snapname; 893b1b8ab34Slling 894b1b8ab34Slling if (fsname == NULL && obj) { 895b1b8ab34Slling headobj = *obj; 896b1b8ab34Slling goto skip; 897b1b8ab34Slling } 898b1b8ab34Slling 899b1b8ab34Slling if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, 900b1b8ab34Slling DMU_OT_OBJECT_DIRECTORY, mdn, stack)) 901b1b8ab34Slling return (errnum); 902b1b8ab34Slling 903b1b8ab34Slling if (errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, 904b1b8ab34Slling stack)) 905b1b8ab34Slling return (errnum); 906b1b8ab34Slling 907b1b8ab34Slling if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack)) 908b1b8ab34Slling return (errnum); 909b1b8ab34Slling 910b1b8ab34Slling if (fsname == NULL) { 911b1b8ab34Slling headobj = 912b1b8ab34Slling ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; 913b1b8ab34Slling goto skip; 914b1b8ab34Slling } 915b1b8ab34Slling 916b1b8ab34Slling /* take out the pool name */ 917b1b8ab34Slling while (*fsname && !isspace(*fsname) && *fsname != '/') 918b1b8ab34Slling fsname++; 919b1b8ab34Slling 920b1b8ab34Slling while (*fsname && !isspace(*fsname)) { 921b1b8ab34Slling uint64_t childobj; 922b1b8ab34Slling 923b1b8ab34Slling while (*fsname == '/') 924b1b8ab34Slling fsname++; 925b1b8ab34Slling 926b1b8ab34Slling cname = fsname; 927b1b8ab34Slling while (*fsname && !isspace(*fsname) && *fsname != '/') 928b1b8ab34Slling fsname++; 929b1b8ab34Slling ch = *fsname; 930b1b8ab34Slling *fsname = 0; 931b1b8ab34Slling 932fe3e2633SEric Taylor snapname = cname; 933fe3e2633SEric Taylor while (*snapname && !isspace(*snapname) && *snapname != '@') 934fe3e2633SEric Taylor snapname++; 935fe3e2633SEric Taylor if (*snapname == '@') { 936fe3e2633SEric Taylor issnapshot = 1; 937fe3e2633SEric Taylor *snapname = 0; 938fe3e2633SEric Taylor } 939b1b8ab34Slling childobj = 940b1b8ab34Slling ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj; 941b1b8ab34Slling if (errnum = dnode_get(mosmdn, childobj, 942b1b8ab34Slling DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack)) 943b1b8ab34Slling return (errnum); 944b1b8ab34Slling 945ae8180dbSlling if (zap_lookup(mdn, cname, &objnum, stack)) 946ae8180dbSlling return (ERR_FILESYSTEM_NOT_FOUND); 947b1b8ab34Slling 948b1b8ab34Slling if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, 949b1b8ab34Slling mdn, stack)) 950b1b8ab34Slling return (errnum); 951b1b8ab34Slling 952b1b8ab34Slling *fsname = ch; 953fe3e2633SEric Taylor if (issnapshot) 954fe3e2633SEric Taylor *snapname = '@'; 955b1b8ab34Slling } 956b1b8ab34Slling headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; 957b1b8ab34Slling if (obj) 958b1b8ab34Slling *obj = headobj; 959b1b8ab34Slling 960b1b8ab34Slling skip: 961b1b8ab34Slling if (errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack)) 962b1b8ab34Slling return (errnum); 963fe3e2633SEric Taylor if (issnapshot) { 964fe3e2633SEric Taylor uint64_t snapobj; 965fe3e2633SEric Taylor 966fe3e2633SEric Taylor snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))-> 967fe3e2633SEric Taylor ds_snapnames_zapobj; 968b1b8ab34Slling 969fe3e2633SEric Taylor if (errnum = dnode_get(mosmdn, snapobj, 970fe3e2633SEric Taylor DMU_OT_DSL_DS_SNAP_MAP, mdn, stack)) 971fe3e2633SEric Taylor return (errnum); 972fe3e2633SEric Taylor if (zap_lookup(mdn, snapname + 1, &headobj, stack)) 973fe3e2633SEric Taylor return (ERR_FILESYSTEM_NOT_FOUND); 974fe3e2633SEric Taylor if (errnum = dnode_get(mosmdn, headobj, 975fe3e2633SEric Taylor DMU_OT_DSL_DATASET, mdn, stack)) 976fe3e2633SEric Taylor return (errnum); 977fe3e2633SEric Taylor if (obj) 978fe3e2633SEric Taylor *obj = headobj; 979fe3e2633SEric Taylor } 980b1b8ab34Slling 981b1b8ab34Slling bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp; 982b1b8ab34Slling osp = (objset_phys_t *)stack; 983b1b8ab34Slling stack += sizeof (objset_phys_t); 984b1b8ab34Slling if (errnum = zio_read(bp, osp, stack)) 985b1b8ab34Slling return (errnum); 986b1b8ab34Slling 987b1b8ab34Slling grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE); 988b1b8ab34Slling 989b1b8ab34Slling return (0); 990b1b8ab34Slling } 991b1b8ab34Slling 992b1b8ab34Slling /* 993e7cbe64fSgw * For a given XDR packed nvlist, verify the first 4 bytes and move on. 994b1b8ab34Slling * 995e7cbe64fSgw * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) : 996b1b8ab34Slling * 997b1b8ab34Slling * encoding method/host endian (4 bytes) 998b1b8ab34Slling * nvl_version (4 bytes) 999b1b8ab34Slling * nvl_nvflag (4 bytes) 1000b1b8ab34Slling * encoded nvpairs: 1001b1b8ab34Slling * encoded size of the nvpair (4 bytes) 1002b1b8ab34Slling * decoded size of the nvpair (4 bytes) 1003b1b8ab34Slling * name string size (4 bytes) 1004b1b8ab34Slling * name string data (sizeof(NV_ALIGN4(string)) 1005b1b8ab34Slling * data type (4 bytes) 1006b1b8ab34Slling * # of elements in the nvpair (4 bytes) 1007b1b8ab34Slling * data 1008b1b8ab34Slling * 2 zero's for the last nvpair 1009b1b8ab34Slling * (end of the entire list) (8 bytes) 1010b1b8ab34Slling * 1011b1b8ab34Slling * Return: 1012b1b8ab34Slling * 0 - success 1013b1b8ab34Slling * 1 - failure 1014b1b8ab34Slling */ 1015e7cbe64fSgw static int 1016e7cbe64fSgw nvlist_unpack(char *nvlist, char **out) 1017b1b8ab34Slling { 1018b1b8ab34Slling /* Verify if the 1st and 2nd byte in the nvlist are valid. */ 1019b1b8ab34Slling if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN) 1020b1b8ab34Slling return (1); 1021b1b8ab34Slling 1022e7cbe64fSgw nvlist += 4; 1023e7cbe64fSgw *out = nvlist; 1024e7cbe64fSgw return (0); 1025e7cbe64fSgw } 1026e7cbe64fSgw 1027e7cbe64fSgw static char * 1028e7cbe64fSgw nvlist_array(char *nvlist, int index) 1029e7cbe64fSgw { 1030e7cbe64fSgw int i, encode_size; 1031e7cbe64fSgw 1032e7cbe64fSgw for (i = 0; i < index; i++) { 1033e7cbe64fSgw /* skip the header, nvl_version, and nvl_nvflag */ 1034e7cbe64fSgw nvlist = nvlist + 4 * 2; 1035e7cbe64fSgw 1036e7cbe64fSgw while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) 1037e7cbe64fSgw nvlist += encode_size; /* goto the next nvpair */ 1038e7cbe64fSgw 1039e7cbe64fSgw nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */ 1040e7cbe64fSgw } 1041e7cbe64fSgw 1042e7cbe64fSgw return (nvlist); 1043e7cbe64fSgw } 1044e7cbe64fSgw 1045e7cbe64fSgw static int 1046e7cbe64fSgw nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype, 1047e7cbe64fSgw int *nelmp) 1048e7cbe64fSgw { 1049e7cbe64fSgw int name_len, type, slen, encode_size; 1050e7cbe64fSgw char *nvpair, *nvp_name, *strval = val; 1051e7cbe64fSgw uint64_t *intval = val; 1052e7cbe64fSgw 1053b1b8ab34Slling /* skip the header, nvl_version, and nvl_nvflag */ 1054e7cbe64fSgw nvlist = nvlist + 4 * 2; 1055b1b8ab34Slling 1056b1b8ab34Slling /* 1057b1b8ab34Slling * Loop thru the nvpair list 1058b1b8ab34Slling * The XDR representation of an integer is in big-endian byte order. 1059b1b8ab34Slling */ 1060b1b8ab34Slling while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) { 1061b1b8ab34Slling 1062b1b8ab34Slling nvpair = nvlist + 4 * 2; /* skip the encode/decode size */ 1063b1b8ab34Slling 1064b1b8ab34Slling name_len = BSWAP_32(*(uint32_t *)nvpair); 1065b1b8ab34Slling nvpair += 4; 1066b1b8ab34Slling 1067b1b8ab34Slling nvp_name = nvpair; 1068b1b8ab34Slling nvpair = nvpair + ((name_len + 3) & ~3); /* align */ 1069b1b8ab34Slling 1070b1b8ab34Slling type = BSWAP_32(*(uint32_t *)nvpair); 1071b1b8ab34Slling nvpair += 4; 1072b1b8ab34Slling 1073b1b8ab34Slling if ((grub_strncmp(nvp_name, name, name_len) == 0) && 1074b1b8ab34Slling type == valtype) { 1075e7cbe64fSgw int nelm; 1076b1b8ab34Slling 1077e7cbe64fSgw if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1) 1078b1b8ab34Slling return (1); 1079b1b8ab34Slling nvpair += 4; 1080b1b8ab34Slling 1081b1b8ab34Slling switch (valtype) { 1082b1b8ab34Slling case DATA_TYPE_STRING: 1083b1b8ab34Slling slen = BSWAP_32(*(uint32_t *)nvpair); 1084b1b8ab34Slling nvpair += 4; 1085b1b8ab34Slling grub_memmove(strval, nvpair, slen); 1086b1b8ab34Slling strval[slen] = '\0'; 1087b1b8ab34Slling return (0); 1088b1b8ab34Slling 1089b1b8ab34Slling case DATA_TYPE_UINT64: 1090b1b8ab34Slling *intval = BSWAP_64(*(uint64_t *)nvpair); 1091b1b8ab34Slling return (0); 1092e7cbe64fSgw 1093e7cbe64fSgw case DATA_TYPE_NVLIST: 1094e7cbe64fSgw *(void **)val = (void *)nvpair; 1095e7cbe64fSgw return (0); 1096e7cbe64fSgw 1097e7cbe64fSgw case DATA_TYPE_NVLIST_ARRAY: 1098e7cbe64fSgw *(void **)val = (void *)nvpair; 1099e7cbe64fSgw if (nelmp) 1100e7cbe64fSgw *nelmp = nelm; 1101e7cbe64fSgw return (0); 1102b1b8ab34Slling } 1103b1b8ab34Slling } 1104b1b8ab34Slling 1105b1b8ab34Slling nvlist += encode_size; /* goto the next nvpair */ 1106b1b8ab34Slling } 1107b1b8ab34Slling 1108b1b8ab34Slling return (1); 1109b1b8ab34Slling } 1110b1b8ab34Slling 1111b1b8ab34Slling /* 1112e7cbe64fSgw * Check if this vdev is online and is in a good state. 1113e7cbe64fSgw */ 1114e7cbe64fSgw static int 1115e7cbe64fSgw vdev_validate(char *nv) 1116e7cbe64fSgw { 1117e7cbe64fSgw uint64_t ival; 1118e7cbe64fSgw 1119e7cbe64fSgw if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival, 1120e7cbe64fSgw DATA_TYPE_UINT64, NULL) == 0 || 1121e7cbe64fSgw nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival, 1122e7cbe64fSgw DATA_TYPE_UINT64, NULL) == 0 || 1123e7cbe64fSgw nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival, 1124e7cbe64fSgw DATA_TYPE_UINT64, NULL) == 0) 1125e7cbe64fSgw return (ERR_DEV_VALUES); 1126e7cbe64fSgw 1127e7cbe64fSgw return (0); 1128e7cbe64fSgw } 1129e7cbe64fSgw 1130e7cbe64fSgw /* 113121ecdf64SLin Ling * Get a valid vdev pathname/devid from the boot device. 1132ffb5616eSLin Ling * The caller should already allocate MAXPATHLEN memory for bootpath and devid. 1133e7cbe64fSgw */ 113421ecdf64SLin Ling static int 113521ecdf64SLin Ling vdev_get_bootpath(char *nv, uint64_t inguid, char *devid, char *bootpath, 113621ecdf64SLin Ling int is_spare) 1137e7cbe64fSgw { 1138e7cbe64fSgw char type[16]; 1139e7cbe64fSgw 1140e7cbe64fSgw if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING, 1141e7cbe64fSgw NULL)) 1142e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1143e7cbe64fSgw 1144e7cbe64fSgw if (strcmp(type, VDEV_TYPE_DISK) == 0) { 1145ffb5616eSLin Ling uint64_t guid; 1146ffb5616eSLin Ling 1147ffb5616eSLin Ling if (vdev_validate(nv) != 0) 1148ffb5616eSLin Ling return (ERR_NO_BOOTPATH); 1149ffb5616eSLin Ling 1150ffb5616eSLin Ling if (nvlist_lookup_value(nv, ZPOOL_CONFIG_GUID, 1151ffb5616eSLin Ling &guid, DATA_TYPE_UINT64, NULL) != 0) 1152ffb5616eSLin Ling return (ERR_NO_BOOTPATH); 1153ffb5616eSLin Ling 1154ffb5616eSLin Ling if (guid != inguid) 1155e7cbe64fSgw return (ERR_NO_BOOTPATH); 1156e7cbe64fSgw 115721ecdf64SLin Ling /* for a spare vdev, pick the disk labeled with "is_spare" */ 115821ecdf64SLin Ling if (is_spare) { 115921ecdf64SLin Ling uint64_t spare = 0; 116021ecdf64SLin Ling (void) nvlist_lookup_value(nv, ZPOOL_CONFIG_IS_SPARE, 116121ecdf64SLin Ling &spare, DATA_TYPE_UINT64, NULL); 116221ecdf64SLin Ling if (!spare) 116321ecdf64SLin Ling return (ERR_NO_BOOTPATH); 116421ecdf64SLin Ling } 116521ecdf64SLin Ling 1166ffb5616eSLin Ling if (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, 1167ffb5616eSLin Ling bootpath, DATA_TYPE_STRING, NULL) != 0) 1168ffb5616eSLin Ling bootpath[0] = '\0'; 1169ffb5616eSLin Ling 1170ffb5616eSLin Ling if (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID, 1171ffb5616eSLin Ling devid, DATA_TYPE_STRING, NULL) != 0) 1172ffb5616eSLin Ling devid[0] = '\0'; 1173ffb5616eSLin Ling 1174ffb5616eSLin Ling if (strlen(bootpath) >= MAXPATHLEN || 1175ffb5616eSLin Ling strlen(devid) >= MAXPATHLEN) 1176ffb5616eSLin Ling return (ERR_WONT_FIT); 1177ffb5616eSLin Ling 1178ffb5616eSLin Ling return (0); 1179ffb5616eSLin Ling 118021ecdf64SLin Ling } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 || 118121ecdf64SLin Ling strcmp(type, VDEV_TYPE_REPLACING) == 0 || 118221ecdf64SLin Ling (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) { 1183e7cbe64fSgw int nelm, i; 1184e7cbe64fSgw char *child; 1185e7cbe64fSgw 1186e7cbe64fSgw if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child, 1187e7cbe64fSgw DATA_TYPE_NVLIST_ARRAY, &nelm)) 1188e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1189e7cbe64fSgw 1190e7cbe64fSgw for (i = 0; i < nelm; i++) { 1191e7cbe64fSgw char *child_i; 1192e7cbe64fSgw 1193e7cbe64fSgw child_i = nvlist_array(child, i); 1194ffb5616eSLin Ling if (vdev_get_bootpath(child_i, inguid, devid, 119521ecdf64SLin Ling bootpath, is_spare) == 0) 1196ffb5616eSLin Ling return (0); 1197e7cbe64fSgw } 1198e7cbe64fSgw } 1199e7cbe64fSgw 1200ffb5616eSLin Ling return (ERR_NO_BOOTPATH); 1201e7cbe64fSgw } 1202e7cbe64fSgw 1203e7cbe64fSgw /* 1204e7cbe64fSgw * Check the disk label information and retrieve needed vdev name-value pairs. 1205b1b8ab34Slling * 1206b1b8ab34Slling * Return: 1207b1b8ab34Slling * 0 - success 1208e7cbe64fSgw * ERR_* - failure 1209b1b8ab34Slling */ 1210051aabe6Staylor int 1211e23347b1SEric Taylor check_pool_label(uint64_t sector, char *stack, char *outdevid, 1212e23347b1SEric Taylor char *outpath, uint64_t *outguid) 1213b1b8ab34Slling { 1214b1b8ab34Slling vdev_phys_t *vdev; 1215e23347b1SEric Taylor uint64_t pool_state, txg = 0; 1216e7cbe64fSgw char *nvlist, *nv; 1217051aabe6Staylor uint64_t diskguid; 1218fe3e2633SEric Taylor uint64_t version; 1219b1b8ab34Slling 1220e23347b1SEric Taylor sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT); 1221b1b8ab34Slling 1222b1b8ab34Slling /* Read in the vdev name-value pair list (112K). */ 1223b1b8ab34Slling if (devread(sector, 0, VDEV_PHYS_SIZE, stack) == 0) 1224b1b8ab34Slling return (ERR_READ); 1225b1b8ab34Slling 1226b1b8ab34Slling vdev = (vdev_phys_t *)stack; 1227e4c3b53dStaylor stack += sizeof (vdev_phys_t); 1228b1b8ab34Slling 1229e7cbe64fSgw if (nvlist_unpack(vdev->vp_nvlist, &nvlist)) 1230b1b8ab34Slling return (ERR_FSYS_CORRUPT); 1231e7cbe64fSgw 1232e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state, 1233e7cbe64fSgw DATA_TYPE_UINT64, NULL)) 1234e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1235e7cbe64fSgw 1236e7cbe64fSgw if (pool_state == POOL_STATE_DESTROYED) 1237e7cbe64fSgw return (ERR_FILESYSTEM_NOT_FOUND); 1238e7cbe64fSgw 1239e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME, 1240e7cbe64fSgw current_rootpool, DATA_TYPE_STRING, NULL)) 1241e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1242e7cbe64fSgw 1243e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg, 1244e7cbe64fSgw DATA_TYPE_UINT64, NULL)) 1245e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1246e7cbe64fSgw 1247e7cbe64fSgw /* not an active device */ 1248e7cbe64fSgw if (txg == 0) 1249e7cbe64fSgw return (ERR_NO_BOOTPATH); 1250e7cbe64fSgw 1251fe3e2633SEric Taylor if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version, 1252fe3e2633SEric Taylor DATA_TYPE_UINT64, NULL)) 1253fe3e2633SEric Taylor return (ERR_FSYS_CORRUPT); 1254fe3e2633SEric Taylor if (version > SPA_VERSION) 1255fe3e2633SEric Taylor return (ERR_NEWER_VERSION); 1256e7cbe64fSgw if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv, 1257e7cbe64fSgw DATA_TYPE_NVLIST, NULL)) 1258e7cbe64fSgw return (ERR_FSYS_CORRUPT); 1259051aabe6Staylor if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid, 1260051aabe6Staylor DATA_TYPE_UINT64, NULL)) 1261051aabe6Staylor return (ERR_FSYS_CORRUPT); 126221ecdf64SLin Ling if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0)) 1263e7cbe64fSgw return (ERR_NO_BOOTPATH); 1264e23347b1SEric Taylor if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid, 1265e23347b1SEric Taylor DATA_TYPE_UINT64, NULL)) 1266e23347b1SEric Taylor return (ERR_FSYS_CORRUPT); 1267e7cbe64fSgw return (0); 1268b1b8ab34Slling } 1269b1b8ab34Slling 1270b1b8ab34Slling /* 1271b1b8ab34Slling * zfs_mount() locates a valid uberblock of the root pool and read in its MOS 1272b1b8ab34Slling * to the memory address MOS. 1273b1b8ab34Slling * 1274b1b8ab34Slling * Return: 1275b1b8ab34Slling * 1 - success 1276b1b8ab34Slling * 0 - failure 1277b1b8ab34Slling */ 1278b1b8ab34Slling int 1279b1b8ab34Slling zfs_mount(void) 1280b1b8ab34Slling { 1281b1b8ab34Slling char *stack; 1282b1b8ab34Slling int label = 0; 1283e23347b1SEric Taylor uberblock_phys_t *ub_array, *ubbest; 1284b1b8ab34Slling objset_phys_t *osp; 1285051aabe6Staylor char tmp_bootpath[MAXNAMELEN]; 1286051aabe6Staylor char tmp_devid[MAXNAMELEN]; 1287e23347b1SEric Taylor uint64_t tmp_guid; 1288e23347b1SEric Taylor uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT; 1289bbe6aa77SJan Setje-Eilers int err = errnum; /* preserve previous errnum state */ 1290051aabe6Staylor 1291051aabe6Staylor /* if it's our first time here, zero the best uberblock out */ 1292e23347b1SEric Taylor if (best_drive == 0 && best_part == 0 && find_best_root) { 1293e37b211cStaylor grub_memset(¤t_uberblock, 0, sizeof (uberblock_t)); 1294e23347b1SEric Taylor pool_guid = 0; 1295e23347b1SEric Taylor } 1296b1b8ab34Slling 1297b1b8ab34Slling stackbase = ZFS_SCRATCH; 1298b1b8ab34Slling stack = stackbase; 1299b1b8ab34Slling ub_array = (uberblock_phys_t *)stack; 1300b1b8ab34Slling stack += VDEV_UBERBLOCK_RING; 1301b1b8ab34Slling 1302b1b8ab34Slling osp = (objset_phys_t *)stack; 1303b1b8ab34Slling stack += sizeof (objset_phys_t); 1304e23347b1SEric Taylor adjpl = P2ALIGN(adjpl, (uint64_t)sizeof (vdev_label_t)); 1305b1b8ab34Slling 1306e23347b1SEric Taylor for (label = 0; label < VDEV_LABELS; label++) { 130798c507c4SJan Setje-Eilers 130898c507c4SJan Setje-Eilers /* 130998c507c4SJan Setje-Eilers * some eltorito stacks don't give us a size and 131098c507c4SJan Setje-Eilers * we end up setting the size to MAXUINT, further 131198c507c4SJan Setje-Eilers * some of these devices stop working once a single 131298c507c4SJan Setje-Eilers * read past the end has been issued. Checking 131398c507c4SJan Setje-Eilers * for a maximum part_length and skipping the backup 131498c507c4SJan Setje-Eilers * labels at the end of the slice/partition/device 131598c507c4SJan Setje-Eilers * avoids breaking down on such devices. 131698c507c4SJan Setje-Eilers */ 131798c507c4SJan Setje-Eilers if (part_length == MAXUINT && label == 2) 131898c507c4SJan Setje-Eilers break; 131998c507c4SJan Setje-Eilers 1320e23347b1SEric Taylor uint64_t sector = vdev_label_start(adjpl, 1321e23347b1SEric Taylor label) >> SPA_MINBLOCKSHIFT; 1322b1b8ab34Slling 1323b1b8ab34Slling /* Read in the uberblock ring (128K). */ 1324e23347b1SEric Taylor if (devread(sector + 1325e23347b1SEric Taylor ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> 1326e23347b1SEric Taylor SPA_MINBLOCKSHIFT), 0, VDEV_UBERBLOCK_RING, 1327b1b8ab34Slling (char *)ub_array) == 0) 1328b1b8ab34Slling continue; 1329b1b8ab34Slling 1330e23347b1SEric Taylor if ((ubbest = find_bestub(ub_array, sector)) != NULL && 1331b1b8ab34Slling zio_read(&ubbest->ubp_uberblock.ub_rootbp, osp, stack) 1332b1b8ab34Slling == 0) { 1333b1b8ab34Slling 1334b1b8ab34Slling VERIFY_OS_TYPE(osp, DMU_OST_META); 1335b1b8ab34Slling 1336e23347b1SEric Taylor if (check_pool_label(sector, stack, tmp_devid, 1337e23347b1SEric Taylor tmp_bootpath, &tmp_guid)) 1338e23347b1SEric Taylor continue; 1339e23347b1SEric Taylor if (pool_guid == 0) 1340e23347b1SEric Taylor pool_guid = tmp_guid; 1341b1b8ab34Slling 1342e23347b1SEric Taylor if (find_best_root && ((pool_guid != tmp_guid) || 1343051aabe6Staylor vdev_uberblock_compare(&ubbest->ubp_uberblock, 1344e23347b1SEric Taylor &(current_uberblock)) <= 0)) 1345051aabe6Staylor continue; 1346ffb5616eSLin Ling 1347051aabe6Staylor /* Got the MOS. Save it at the memory addr MOS. */ 1348051aabe6Staylor grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); 1349051aabe6Staylor grub_memmove(¤t_uberblock, 1350ffb5616eSLin Ling &ubbest->ubp_uberblock, sizeof (uberblock_t)); 1351051aabe6Staylor grub_memmove(current_bootpath, tmp_bootpath, 1352051aabe6Staylor MAXNAMELEN); 1353051aabe6Staylor grub_memmove(current_devid, tmp_devid, 1354051aabe6Staylor grub_strlen(tmp_devid)); 1355b1b8ab34Slling is_zfs_mount = 1; 1356b1b8ab34Slling return (1); 1357b1b8ab34Slling } 1358b1b8ab34Slling } 1359b1b8ab34Slling 1360bbe6aa77SJan Setje-Eilers /* 1361bbe6aa77SJan Setje-Eilers * While some fs impls. (tftp) rely on setting and keeping 1362bbe6aa77SJan Setje-Eilers * global errnums set, others won't reset it and will break 1363bbe6aa77SJan Setje-Eilers * when issuing rawreads. The goal here is to simply not 1364bbe6aa77SJan Setje-Eilers * have zfs mount attempts impact the previous state. 1365bbe6aa77SJan Setje-Eilers */ 1366bbe6aa77SJan Setje-Eilers errnum = err; 1367b1b8ab34Slling return (0); 1368b1b8ab34Slling } 1369b1b8ab34Slling 1370b1b8ab34Slling /* 1371b1b8ab34Slling * zfs_open() locates a file in the rootpool by following the 1372b1b8ab34Slling * MOS and places the dnode of the file in the memory address DNODE. 1373b1b8ab34Slling * 1374b1b8ab34Slling * Return: 1375b1b8ab34Slling * 1 - success 1376b1b8ab34Slling * 0 - failure 1377b1b8ab34Slling */ 1378b1b8ab34Slling int 1379b1b8ab34Slling zfs_open(char *filename) 1380b1b8ab34Slling { 1381b1b8ab34Slling char *stack; 1382b1b8ab34Slling dnode_phys_t *mdn; 1383b1b8ab34Slling 1384b1b8ab34Slling file_buf = NULL; 1385b1b8ab34Slling stackbase = ZFS_SCRATCH; 1386b1b8ab34Slling stack = stackbase; 1387b1b8ab34Slling 1388b1b8ab34Slling mdn = (dnode_phys_t *)stack; 1389b1b8ab34Slling stack += sizeof (dnode_phys_t); 1390b1b8ab34Slling 1391b1b8ab34Slling dnode_mdn = NULL; 1392b1b8ab34Slling dnode_buf = (dnode_phys_t *)stack; 1393b1b8ab34Slling stack += 1<<DNODE_BLOCK_SHIFT; 1394b1b8ab34Slling 1395b1b8ab34Slling /* 1396b1b8ab34Slling * menu.lst is placed at the root pool filesystem level, 1397b1b8ab34Slling * do not goto 'current_bootfs'. 1398b1b8ab34Slling */ 1399eb2bd662Svikram if (is_top_dataset_file(filename)) { 1400b1b8ab34Slling if (errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack)) 1401b1b8ab34Slling return (0); 1402b1b8ab34Slling 1403b1b8ab34Slling current_bootfs_obj = 0; 1404b1b8ab34Slling } else { 1405b1b8ab34Slling if (current_bootfs[0] == '\0') { 1406b1b8ab34Slling /* Get the default root filesystem object number */ 1407ae8180dbSlling if (errnum = get_default_bootfsobj(MOS, 1408ae8180dbSlling ¤t_bootfs_obj, stack)) 1409b1b8ab34Slling return (0); 1410b1b8ab34Slling 1411b1b8ab34Slling if (errnum = get_objset_mdn(MOS, NULL, 1412b1b8ab34Slling ¤t_bootfs_obj, mdn, stack)) 1413b1b8ab34Slling return (0); 1414b1b8ab34Slling } else { 1415b35c6776Staylor if (errnum = get_objset_mdn(MOS, current_bootfs, 1416b35c6776Staylor ¤t_bootfs_obj, mdn, stack)) { 1417051aabe6Staylor grub_memset(current_bootfs, 0, MAXNAMELEN); 1418b1b8ab34Slling return (0); 1419b35c6776Staylor } 1420b1b8ab34Slling } 1421b1b8ab34Slling } 1422b1b8ab34Slling 1423b1b8ab34Slling if (dnode_get_path(mdn, filename, DNODE, stack)) { 1424b1b8ab34Slling errnum = ERR_FILE_NOT_FOUND; 1425b1b8ab34Slling return (0); 1426b1b8ab34Slling } 1427b1b8ab34Slling 1428b1b8ab34Slling /* get the file size and set the file position to 0 */ 1429*0a586ceaSMark Shellenbaum 1430*0a586ceaSMark Shellenbaum /* 1431*0a586ceaSMark Shellenbaum * For DMU_OT_SA we will need to locate the SIZE attribute 1432*0a586ceaSMark Shellenbaum * attribute, which could be either in the bonus buffer 1433*0a586ceaSMark Shellenbaum * or the "spill" block. 1434*0a586ceaSMark Shellenbaum */ 1435*0a586ceaSMark Shellenbaum if (DNODE->dn_bonustype == DMU_OT_SA) { 1436*0a586ceaSMark Shellenbaum sa_hdr_phys_t *sahdrp; 1437*0a586ceaSMark Shellenbaum int hdrsize; 1438*0a586ceaSMark Shellenbaum 1439*0a586ceaSMark Shellenbaum sahdrp = (sa_hdr_phys_t *)DN_BONUS(DNODE); 1440*0a586ceaSMark Shellenbaum if (DNODE->dn_bonuslen != 0) { 1441*0a586ceaSMark Shellenbaum sahdrp = (sa_hdr_phys_t *)DN_BONUS(DNODE); 1442*0a586ceaSMark Shellenbaum } else { 1443*0a586ceaSMark Shellenbaum if (DNODE->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { 1444*0a586ceaSMark Shellenbaum blkptr_t *bp = &DNODE->dn_spill; 1445*0a586ceaSMark Shellenbaum void *buf; 1446*0a586ceaSMark Shellenbaum 1447*0a586ceaSMark Shellenbaum buf = (void *)stack; 1448*0a586ceaSMark Shellenbaum stack += BP_GET_LSIZE(bp); 1449*0a586ceaSMark Shellenbaum 1450*0a586ceaSMark Shellenbaum /* reset errnum to rawread() failure */ 1451*0a586ceaSMark Shellenbaum errnum = 0; 1452*0a586ceaSMark Shellenbaum if (zio_read(bp, buf, stack) != 0) { 1453*0a586ceaSMark Shellenbaum return (0); 1454*0a586ceaSMark Shellenbaum } 1455*0a586ceaSMark Shellenbaum sahdrp = buf; 1456*0a586ceaSMark Shellenbaum } else { 1457*0a586ceaSMark Shellenbaum errnum = ERR_FSYS_CORRUPT; 1458*0a586ceaSMark Shellenbaum return (0); 1459*0a586ceaSMark Shellenbaum } 1460*0a586ceaSMark Shellenbaum } 1461*0a586ceaSMark Shellenbaum hdrsize = SA_HDR_SIZE(sahdrp); 1462*0a586ceaSMark Shellenbaum filemax = *(uint64_t *)((char *)sahdrp + hdrsize + 1463*0a586ceaSMark Shellenbaum SA_SIZE_OFFSET); 1464*0a586ceaSMark Shellenbaum } else { 1465*0a586ceaSMark Shellenbaum filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size; 1466*0a586ceaSMark Shellenbaum } 1467b1b8ab34Slling filepos = 0; 1468b1b8ab34Slling 1469b1b8ab34Slling dnode_buf = NULL; 1470b1b8ab34Slling return (1); 1471b1b8ab34Slling } 1472b1b8ab34Slling 1473b1b8ab34Slling /* 1474b1b8ab34Slling * zfs_read reads in the data blocks pointed by the DNODE. 1475b1b8ab34Slling * 1476b1b8ab34Slling * Return: 1477b1b8ab34Slling * len - the length successfully read in to the buffer 1478b1b8ab34Slling * 0 - failure 1479b1b8ab34Slling */ 1480b1b8ab34Slling int 1481b1b8ab34Slling zfs_read(char *buf, int len) 1482b1b8ab34Slling { 1483b1b8ab34Slling char *stack; 1484b1b8ab34Slling char *tmpbuf; 1485b1b8ab34Slling int blksz, length, movesize; 1486b1b8ab34Slling 1487b1b8ab34Slling if (file_buf == NULL) { 1488b1b8ab34Slling file_buf = stackbase; 1489b1b8ab34Slling stackbase += SPA_MAXBLOCKSIZE; 1490b1b8ab34Slling file_start = file_end = 0; 1491b1b8ab34Slling } 1492b1b8ab34Slling stack = stackbase; 1493b1b8ab34Slling 1494b1b8ab34Slling /* 1495b1b8ab34Slling * If offset is in memory, move it into the buffer provided and return. 1496b1b8ab34Slling */ 1497b1b8ab34Slling if (filepos >= file_start && filepos+len <= file_end) { 1498b1b8ab34Slling grub_memmove(buf, file_buf + filepos - file_start, len); 1499b1b8ab34Slling filepos += len; 1500b1b8ab34Slling return (len); 1501b1b8ab34Slling } 1502b1b8ab34Slling 1503b1b8ab34Slling blksz = DNODE->dn_datablkszsec << SPA_MINBLOCKSHIFT; 1504b1b8ab34Slling 1505b1b8ab34Slling /* 1506b1b8ab34Slling * Entire Dnode is too big to fit into the space available. We 1507b1b8ab34Slling * will need to read it in chunks. This could be optimized to 1508b1b8ab34Slling * read in as large a chunk as there is space available, but for 1509b1b8ab34Slling * now, this only reads in one data block at a time. 1510b1b8ab34Slling */ 1511b1b8ab34Slling length = len; 1512b1b8ab34Slling while (length) { 1513b1b8ab34Slling /* 1514b1b8ab34Slling * Find requested blkid and the offset within that block. 1515b1b8ab34Slling */ 1516b1b8ab34Slling uint64_t blkid = filepos / blksz; 1517b1b8ab34Slling 1518b1b8ab34Slling if (errnum = dmu_read(DNODE, blkid, file_buf, stack)) 1519b1b8ab34Slling return (0); 1520b1b8ab34Slling 1521b1b8ab34Slling file_start = blkid * blksz; 1522b1b8ab34Slling file_end = file_start + blksz; 1523b1b8ab34Slling 1524b1b8ab34Slling movesize = MIN(length, file_end - filepos); 1525b1b8ab34Slling 1526b1b8ab34Slling grub_memmove(buf, file_buf + filepos - file_start, 1527b1b8ab34Slling movesize); 1528b1b8ab34Slling buf += movesize; 1529b1b8ab34Slling length -= movesize; 1530b1b8ab34Slling filepos += movesize; 1531b1b8ab34Slling } 1532b1b8ab34Slling 1533b1b8ab34Slling return (len); 1534b1b8ab34Slling } 1535b1b8ab34Slling 1536b1b8ab34Slling /* 1537b1b8ab34Slling * No-Op 1538b1b8ab34Slling */ 1539b1b8ab34Slling int 1540b1b8ab34Slling zfs_embed(int *start_sector, int needed_sectors) 1541b1b8ab34Slling { 1542b1b8ab34Slling return (1); 1543b1b8ab34Slling } 1544b1b8ab34Slling 1545b1b8ab34Slling #endif /* FSYS_ZFS */ 1546