1b1b8ab34Slling /*
2b1b8ab34Slling  *  GRUB  --  GRand Unified Bootloader
3b1b8ab34Slling  *  Copyright (C) 1999,2000,2001,2002,2003,2004  Free Software Foundation, Inc.
4b1b8ab34Slling  *
5b1b8ab34Slling  *  This program is free software; you can redistribute it and/or modify
6b1b8ab34Slling  *  it under the terms of the GNU General Public License as published by
7b1b8ab34Slling  *  the Free Software Foundation; either version 2 of the License, or
8b1b8ab34Slling  *  (at your option) any later version.
9b1b8ab34Slling  *
10b1b8ab34Slling  *  This program is distributed in the hope that it will be useful,
11b1b8ab34Slling  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12b1b8ab34Slling  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13b1b8ab34Slling  *  GNU General Public License for more details.
14b1b8ab34Slling  *
15b1b8ab34Slling  *  You should have received a copy of the GNU General Public License
16b1b8ab34Slling  *  along with this program; if not, write to the Free Software
17b1b8ab34Slling  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18b1b8ab34Slling  */
19b1b8ab34Slling /*
20e7cbe64fSgw  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
21b1b8ab34Slling  * Use is subject to license terms.
22b1b8ab34Slling  */
23b1b8ab34Slling 
24b1b8ab34Slling /*
25b1b8ab34Slling  * The zfs plug-in routines for GRUB are:
26b1b8ab34Slling  *
27b1b8ab34Slling  * zfs_mount() - locates a valid uberblock of the root pool and reads
28b1b8ab34Slling  *		in its MOS at the memory address MOS.
29b1b8ab34Slling  *
30b1b8ab34Slling  * zfs_open() - locates a plain file object by following the MOS
31b1b8ab34Slling  *		and places its dnode at the memory address DNODE.
32b1b8ab34Slling  *
33b1b8ab34Slling  * zfs_read() - read in the data blocks pointed by the DNODE.
34b1b8ab34Slling  *
35b1b8ab34Slling  * ZFS_SCRATCH is used as a working area.
36b1b8ab34Slling  *
37b1b8ab34Slling  * (memory addr)   MOS      DNODE	ZFS_SCRATCH
38b1b8ab34Slling  *		    |         |          |
39b1b8ab34Slling  *	    +-------V---------V----------V---------------+
40b1b8ab34Slling  *   memory |       | dnode   | dnode    |  scratch      |
41b1b8ab34Slling  *	    |       | 512B    | 512B     |  area         |
42b1b8ab34Slling  *	    +--------------------------------------------+
43b1b8ab34Slling  */
44b1b8ab34Slling 
45b1b8ab34Slling #ifdef	FSYS_ZFS
46b1b8ab34Slling 
47b1b8ab34Slling #include "shared.h"
48b1b8ab34Slling #include "filesys.h"
49b1b8ab34Slling #include "fsys_zfs.h"
50b1b8ab34Slling 
51b1b8ab34Slling /* cache for a file block of the currently zfs_open()-ed file */
52b1b8ab34Slling static void *file_buf = NULL;
53b1b8ab34Slling static uint64_t file_start = 0;
54b1b8ab34Slling static uint64_t file_end = 0;
55b1b8ab34Slling 
56b1b8ab34Slling /* cache for a dnode block */
57b1b8ab34Slling static dnode_phys_t *dnode_buf = NULL;
58b1b8ab34Slling static dnode_phys_t *dnode_mdn = NULL;
59b1b8ab34Slling static uint64_t dnode_start = 0;
60b1b8ab34Slling static uint64_t dnode_end = 0;
61b1b8ab34Slling 
62051aabe6Staylor static uberblock_t current_uberblock;
63b1b8ab34Slling static char *stackbase;
64b1b8ab34Slling 
65b1b8ab34Slling decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] =
66b1b8ab34Slling {
6715e6edf1Sgw 	{"inherit", 0},			/* ZIO_COMPRESS_INHERIT */
68b1b8ab34Slling 	{"on", lzjb_decompress}, 	/* ZIO_COMPRESS_ON */
6915e6edf1Sgw 	{"off", 0},			/* ZIO_COMPRESS_OFF */
7015e6edf1Sgw 	{"lzjb", lzjb_decompress},	/* ZIO_COMPRESS_LZJB */
7115e6edf1Sgw 	{"empty", 0}			/* ZIO_COMPRESS_EMPTY */
72b1b8ab34Slling };
73b1b8ab34Slling 
74*cd9c78d9SLin Ling static int zio_read_data(blkptr_t *bp, void *buf, char *stack);
75*cd9c78d9SLin Ling 
76b1b8ab34Slling /*
77b1b8ab34Slling  * Our own version of bcmp().
78b1b8ab34Slling  */
79b1b8ab34Slling static int
80b1b8ab34Slling zfs_bcmp(const void *s1, const void *s2, size_t n)
81b1b8ab34Slling {
82b1b8ab34Slling 	const uchar_t *ps1 = s1;
83b1b8ab34Slling 	const uchar_t *ps2 = s2;
84b1b8ab34Slling 
85b1b8ab34Slling 	if (s1 != s2 && n != 0) {
86b1b8ab34Slling 		do {
87b1b8ab34Slling 			if (*ps1++ != *ps2++)
88b1b8ab34Slling 				return (1);
89b1b8ab34Slling 		} while (--n != 0);
90b1b8ab34Slling 	}
91b1b8ab34Slling 
92b1b8ab34Slling 	return (0);
93b1b8ab34Slling }
94b1b8ab34Slling 
95b1b8ab34Slling /*
96b1b8ab34Slling  * Our own version of log2().  Same thing as highbit()-1.
97b1b8ab34Slling  */
98b1b8ab34Slling static int
99b1b8ab34Slling zfs_log2(uint64_t num)
100b1b8ab34Slling {
101b1b8ab34Slling 	int i = 0;
102b1b8ab34Slling 
103b1b8ab34Slling 	while (num > 1) {
104b1b8ab34Slling 		i++;
105b1b8ab34Slling 		num = num >> 1;
106b1b8ab34Slling 	}
107b1b8ab34Slling 
108b1b8ab34Slling 	return (i);
109b1b8ab34Slling }
110b1b8ab34Slling 
111b1b8ab34Slling /* Checksum Functions */
112b1b8ab34Slling static void
113b1b8ab34Slling zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
114b1b8ab34Slling {
115b1b8ab34Slling 	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
116b1b8ab34Slling }
117b1b8ab34Slling 
118b1b8ab34Slling /* Checksum Table and Values */
119b1b8ab34Slling zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
120b1b8ab34Slling 	NULL,			NULL,			0, 0,	"inherit",
121b1b8ab34Slling 	NULL,			NULL,			0, 0,	"on",
122b1b8ab34Slling 	zio_checksum_off,	zio_checksum_off,	0, 0,	"off",
123b1b8ab34Slling 	zio_checksum_SHA256,	zio_checksum_SHA256,	1, 1,	"label",
124b1b8ab34Slling 	zio_checksum_SHA256,	zio_checksum_SHA256,	1, 1,	"gang_header",
125b1b8ab34Slling 	fletcher_2_native,	fletcher_2_byteswap,	0, 1,	"zilog",
126b1b8ab34Slling 	fletcher_2_native,	fletcher_2_byteswap,	0, 0,	"fletcher2",
127b1b8ab34Slling 	fletcher_4_native,	fletcher_4_byteswap,	1, 0,	"fletcher4",
128b1b8ab34Slling 	zio_checksum_SHA256,	zio_checksum_SHA256,	1, 0,	"SHA256",
129b1b8ab34Slling };
130b1b8ab34Slling 
131b1b8ab34Slling /*
132b1b8ab34Slling  * zio_checksum_verify: Provides support for checksum verification.
133b1b8ab34Slling  *
134b1b8ab34Slling  * Fletcher2, Fletcher4, and SHA256 are supported.
135b1b8ab34Slling  *
136b1b8ab34Slling  * Return:
137b1b8ab34Slling  * 	-1 = Failure
138b1b8ab34Slling  *	 0 = Success
139b1b8ab34Slling  */
140b1b8ab34Slling static int
141b1b8ab34Slling zio_checksum_verify(blkptr_t *bp, char *data, int size)
142b1b8ab34Slling {
143b1b8ab34Slling 	zio_cksum_t zc = bp->blk_cksum;
144*cd9c78d9SLin Ling 	uint32_t checksum = BP_GET_CHECKSUM(bp);
145b1b8ab34Slling 	int byteswap = BP_SHOULD_BYTESWAP(bp);
146b1b8ab34Slling 	zio_block_tail_t *zbt = (zio_block_tail_t *)(data + size) - 1;
147b1b8ab34Slling 	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
148b1b8ab34Slling 	zio_cksum_t actual_cksum, expected_cksum;
149b1b8ab34Slling 
150b1b8ab34Slling 	/* byteswap is not supported */
151b1b8ab34Slling 	if (byteswap)
152b1b8ab34Slling 		return (-1);
153b1b8ab34Slling 
154b1b8ab34Slling 	if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
155b1b8ab34Slling 		return (-1);
156b1b8ab34Slling 
157b1b8ab34Slling 	if (ci->ci_zbt) {
158*cd9c78d9SLin Ling 		expected_cksum = zbt->zbt_cksum;
159*cd9c78d9SLin Ling 		zbt->zbt_cksum = zc;
160*cd9c78d9SLin Ling 		ci->ci_func[0](data, size, &actual_cksum);
161*cd9c78d9SLin Ling 		zbt->zbt_cksum = expected_cksum;
162b1b8ab34Slling 		zc = expected_cksum;
163b1b8ab34Slling 
164b1b8ab34Slling 	} else {
165b1b8ab34Slling 		ci->ci_func[byteswap](data, size, &actual_cksum);
166b1b8ab34Slling 	}
167b1b8ab34Slling 
168b1b8ab34Slling 	if ((actual_cksum.zc_word[0] - zc.zc_word[0]) |
169b1b8ab34Slling 	    (actual_cksum.zc_word[1] - zc.zc_word[1]) |
170b1b8ab34Slling 	    (actual_cksum.zc_word[2] - zc.zc_word[2]) |
171b1b8ab34Slling 	    (actual_cksum.zc_word[3] - zc.zc_word[3]))
172b1b8ab34Slling 		return (-1);
173b1b8ab34Slling 
174b1b8ab34Slling 	return (0);
175b1b8ab34Slling }
176b1b8ab34Slling 
177b1b8ab34Slling /*
178b1b8ab34Slling  * vdev_label_offset takes "offset" (the offset within a vdev_label) and
179b1b8ab34Slling  * returns its physical disk offset (starting from the beginning of the vdev).
180b1b8ab34Slling  *
181b1b8ab34Slling  * Input:
182b1b8ab34Slling  *	psize	: Physical size of this vdev
183b1b8ab34Slling  *      l	: Label Number (0-3)
184b1b8ab34Slling  *	offset	: The offset with a vdev_label in which we want the physical
185b1b8ab34Slling  *		  address
186b1b8ab34Slling  * Return:
187b1b8ab34Slling  * 	Success : physical disk offset
188b1b8ab34Slling  * 	Failure : errnum = ERR_BAD_ARGUMENT, return value is meaningless
189b1b8ab34Slling  */
190e7cbe64fSgw static uint64_t
191b1b8ab34Slling vdev_label_offset(uint64_t psize, int l, uint64_t offset)
192b1b8ab34Slling {
193b1b8ab34Slling 	/* XXX Need to add back label support! */
194b1b8ab34Slling 	if (l >= VDEV_LABELS/2 || offset > sizeof (vdev_label_t)) {
195b1b8ab34Slling 		errnum = ERR_BAD_ARGUMENT;
196b1b8ab34Slling 		return (0);
197b1b8ab34Slling 	}
198b1b8ab34Slling 
199b1b8ab34Slling 	return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
200b1b8ab34Slling 	    0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
201b1b8ab34Slling 
202b1b8ab34Slling }
203b1b8ab34Slling 
204b1b8ab34Slling /*
205b1b8ab34Slling  * vdev_uberblock_compare takes two uberblock structures and returns an integer
206b1b8ab34Slling  * indicating the more recent of the two.
207b1b8ab34Slling  * 	Return Value = 1 if ub2 is more recent
208b1b8ab34Slling  * 	Return Value = -1 if ub1 is more recent
209b1b8ab34Slling  * The most recent uberblock is determined using its transaction number and
210b1b8ab34Slling  * timestamp.  The uberblock with the highest transaction number is
211b1b8ab34Slling  * considered "newer".  If the transaction numbers of the two blocks match, the
212b1b8ab34Slling  * timestamps are compared to determine the "newer" of the two.
213b1b8ab34Slling  */
214b1b8ab34Slling static int
215b1b8ab34Slling vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
216b1b8ab34Slling {
217b1b8ab34Slling 	if (ub1->ub_txg < ub2->ub_txg)
218b1b8ab34Slling 		return (-1);
219b1b8ab34Slling 	if (ub1->ub_txg > ub2->ub_txg)
220b1b8ab34Slling 		return (1);
221b1b8ab34Slling 
222b1b8ab34Slling 	if (ub1->ub_timestamp < ub2->ub_timestamp)
223b1b8ab34Slling 		return (-1);
224b1b8ab34Slling 	if (ub1->ub_timestamp > ub2->ub_timestamp)
225b1b8ab34Slling 		return (1);
226b1b8ab34Slling 
227b1b8ab34Slling 	return (0);
228b1b8ab34Slling }
229b1b8ab34Slling 
230b1b8ab34Slling /*
231b1b8ab34Slling  * Three pieces of information are needed to verify an uberblock: the magic
232b1b8ab34Slling  * number, the version number, and the checksum.
233b1b8ab34Slling  *
234b1b8ab34Slling  * Currently Implemented: version number, magic number
235b1b8ab34Slling  * Need to Implement: checksum
236b1b8ab34Slling  *
237b1b8ab34Slling  * Return:
238b1b8ab34Slling  *     0 - Success
239b1b8ab34Slling  *    -1 - Failure
240b1b8ab34Slling  */
241b1b8ab34Slling static int
242b1b8ab34Slling uberblock_verify(uberblock_phys_t *ub, int offset)
243b1b8ab34Slling {
244b1b8ab34Slling 
245b1b8ab34Slling 	uberblock_t *uber = &ub->ubp_uberblock;
246b1b8ab34Slling 	blkptr_t bp;
247b1b8ab34Slling 
248b1b8ab34Slling 	BP_ZERO(&bp);
249b1b8ab34Slling 	BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
250b1b8ab34Slling 	BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER);
251b1b8ab34Slling 	ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0);
252b1b8ab34Slling 
253b1b8ab34Slling 	if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0)
254b1b8ab34Slling 		return (-1);
255b1b8ab34Slling 
256b1b8ab34Slling 	if (uber->ub_magic == UBERBLOCK_MAGIC &&
257bb0ade09Sahrens 	    uber->ub_version > 0 && uber->ub_version <= SPA_VERSION)
258b1b8ab34Slling 		return (0);
259b1b8ab34Slling 
260b1b8ab34Slling 	return (-1);
261b1b8ab34Slling }
262b1b8ab34Slling 
263b1b8ab34Slling /*
264b1b8ab34Slling  * Find the best uberblock.
265b1b8ab34Slling  * Return:
266b1b8ab34Slling  *    Success - Pointer to the best uberblock.
267b1b8ab34Slling  *    Failure - NULL
268b1b8ab34Slling  */
269b1b8ab34Slling static uberblock_phys_t *
270b1b8ab34Slling find_bestub(uberblock_phys_t *ub_array, int label)
271b1b8ab34Slling {
272b1b8ab34Slling 	uberblock_phys_t *ubbest = NULL;
273b1b8ab34Slling 	int i, offset;
274b1b8ab34Slling 
275b1b8ab34Slling 	for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) {
276b1b8ab34Slling 		offset = vdev_label_offset(0, label, VDEV_UBERBLOCK_OFFSET(i));
277b1b8ab34Slling 		if (errnum == ERR_BAD_ARGUMENT)
278b1b8ab34Slling 			return (NULL);
279b1b8ab34Slling 		if (uberblock_verify(&ub_array[i], offset) == 0) {
280b1b8ab34Slling 			if (ubbest == NULL) {
281b1b8ab34Slling 				ubbest = &ub_array[i];
282e7437265Sahrens 			} else if (vdev_uberblock_compare(
283e7437265Sahrens 			    &(ub_array[i].ubp_uberblock),
284e7437265Sahrens 			    &(ubbest->ubp_uberblock)) > 0) {
285e7437265Sahrens 				ubbest = &ub_array[i];
286b1b8ab34Slling 			}
287b1b8ab34Slling 		}
288b1b8ab34Slling 	}
289b1b8ab34Slling 
290b1b8ab34Slling 	return (ubbest);
291b1b8ab34Slling }
292b1b8ab34Slling 
293b1b8ab34Slling /*
294*cd9c78d9SLin Ling  * Read a block of data based on the gang block address dva,
295*cd9c78d9SLin Ling  * and put its data in buf.
296b1b8ab34Slling  *
297b1b8ab34Slling  * Return:
298b1b8ab34Slling  *	0 - success
299*cd9c78d9SLin Ling  *	1 - failure
300b1b8ab34Slling  */
301b1b8ab34Slling static int
302*cd9c78d9SLin Ling zio_read_gang(blkptr_t *bp, dva_t *dva, void *buf, char *stack)
303b1b8ab34Slling {
304*cd9c78d9SLin Ling 	zio_gbh_phys_t *zio_gb;
305b1b8ab34Slling 	uint64_t offset, sector;
306*cd9c78d9SLin Ling 	blkptr_t tmpbp;
307*cd9c78d9SLin Ling 	int i;
308b1b8ab34Slling 
309*cd9c78d9SLin Ling 	zio_gb = (zio_gbh_phys_t *)stack;
310*cd9c78d9SLin Ling 	stack += SPA_GANGBLOCKSIZE;
311*cd9c78d9SLin Ling 	offset = DVA_GET_OFFSET(dva);
312*cd9c78d9SLin Ling 	sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
313b1b8ab34Slling 
314*cd9c78d9SLin Ling 	/* read in the gang block header */
315*cd9c78d9SLin Ling 	if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) {
316*cd9c78d9SLin Ling 		grub_printf("failed to read in a gang block header\n");
317*cd9c78d9SLin Ling 		return (1);
318*cd9c78d9SLin Ling 	}
319*cd9c78d9SLin Ling 
320*cd9c78d9SLin Ling 	/* self checksuming the gang block header */
321*cd9c78d9SLin Ling 	BP_ZERO(&tmpbp);
322*cd9c78d9SLin Ling 	BP_SET_CHECKSUM(&tmpbp, ZIO_CHECKSUM_GANG_HEADER);
323*cd9c78d9SLin Ling 	BP_SET_BYTEORDER(&tmpbp, ZFS_HOST_BYTEORDER);
324*cd9c78d9SLin Ling 	ZIO_SET_CHECKSUM(&tmpbp.blk_cksum, DVA_GET_VDEV(dva),
325*cd9c78d9SLin Ling 	    DVA_GET_OFFSET(dva), bp->blk_birth, 0);
326*cd9c78d9SLin Ling 	if (zio_checksum_verify(&tmpbp, (char *)zio_gb, SPA_GANGBLOCKSIZE)) {
327*cd9c78d9SLin Ling 		grub_printf("failed to checksum a gang block header\n");
328*cd9c78d9SLin Ling 		return (1);
329*cd9c78d9SLin Ling 	}
330*cd9c78d9SLin Ling 
331*cd9c78d9SLin Ling 	for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
332*cd9c78d9SLin Ling 		if (zio_gb->zg_blkptr[i].blk_birth == 0)
333*cd9c78d9SLin Ling 			continue;
334*cd9c78d9SLin Ling 
335*cd9c78d9SLin Ling 		if (zio_read_data(&zio_gb->zg_blkptr[i], buf, stack))
336*cd9c78d9SLin Ling 			return (1);
337*cd9c78d9SLin Ling 		buf += BP_GET_PSIZE(&zio_gb->zg_blkptr[i]);
338*cd9c78d9SLin Ling 	}
339*cd9c78d9SLin Ling 
340*cd9c78d9SLin Ling 	return (0);
341*cd9c78d9SLin Ling }
342*cd9c78d9SLin Ling 
343*cd9c78d9SLin Ling /*
344*cd9c78d9SLin Ling  * Read in a block of raw data to buf.
345*cd9c78d9SLin Ling  *
346*cd9c78d9SLin Ling  * Return:
347*cd9c78d9SLin Ling  *	0 - success
348*cd9c78d9SLin Ling  *	1 - failure
349*cd9c78d9SLin Ling  */
350*cd9c78d9SLin Ling static int
351*cd9c78d9SLin Ling zio_read_data(blkptr_t *bp, void *buf, char *stack)
352*cd9c78d9SLin Ling {
353*cd9c78d9SLin Ling 	int i, psize;
354*cd9c78d9SLin Ling 
355*cd9c78d9SLin Ling 	psize = BP_GET_PSIZE(bp);
356ae8180dbSlling 
357b1b8ab34Slling 	/* pick a good dva from the block pointer */
358b1b8ab34Slling 	for (i = 0; i < SPA_DVAS_PER_BP; i++) {
359*cd9c78d9SLin Ling 		uint64_t offset, sector;
360b1b8ab34Slling 
361b1b8ab34Slling 		if (bp->blk_dva[i].dva_word[0] == 0 &&
362b1b8ab34Slling 		    bp->blk_dva[i].dva_word[1] == 0)
363b1b8ab34Slling 			continue;
364b1b8ab34Slling 
365*cd9c78d9SLin Ling 		if (DVA_GET_GANG(&bp->blk_dva[i])) {
366*cd9c78d9SLin Ling 			if (zio_read_gang(bp, &bp->blk_dva[i], buf, stack) == 0)
367*cd9c78d9SLin Ling 				return (0);
368b1b8ab34Slling 		} else {
369*cd9c78d9SLin Ling 			/* read in a data block */
370*cd9c78d9SLin Ling 			offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
371*cd9c78d9SLin Ling 			sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
372*cd9c78d9SLin Ling 			if (devread(sector, 0, psize, buf))
373*cd9c78d9SLin Ling 				return (0);
374b1b8ab34Slling 		}
375b1b8ab34Slling 	}
376b1b8ab34Slling 
377*cd9c78d9SLin Ling 	return (1);
378*cd9c78d9SLin Ling }
379*cd9c78d9SLin Ling 
380*cd9c78d9SLin Ling /*
381*cd9c78d9SLin Ling  * Read in a block of data, verify its checksum, decompress if needed,
382*cd9c78d9SLin Ling  * and put the uncompressed data in buf.
383*cd9c78d9SLin Ling  *
384*cd9c78d9SLin Ling  * Return:
385*cd9c78d9SLin Ling  *	0 - success
386*cd9c78d9SLin Ling  *	errnum - failure
387*cd9c78d9SLin Ling  */
388*cd9c78d9SLin Ling static int
389*cd9c78d9SLin Ling zio_read(blkptr_t *bp, void *buf, char *stack)
390*cd9c78d9SLin Ling {
391*cd9c78d9SLin Ling 	int lsize, psize, comp;
392*cd9c78d9SLin Ling 	char *retbuf;
393*cd9c78d9SLin Ling 
394*cd9c78d9SLin Ling 	comp = BP_GET_COMPRESS(bp);
395*cd9c78d9SLin Ling 	lsize = BP_GET_LSIZE(bp);
396*cd9c78d9SLin Ling 	psize = BP_GET_PSIZE(bp);
397*cd9c78d9SLin Ling 
398*cd9c78d9SLin Ling 	if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS ||
399*cd9c78d9SLin Ling 	    (comp != ZIO_COMPRESS_OFF &&
400*cd9c78d9SLin Ling 	    decomp_table[comp].decomp_func == NULL)) {
401*cd9c78d9SLin Ling 		grub_printf("compression algorithm not supported\n");
402*cd9c78d9SLin Ling 		return (ERR_FSYS_CORRUPT);
403*cd9c78d9SLin Ling 	}
404*cd9c78d9SLin Ling 
405*cd9c78d9SLin Ling 	if ((char *)buf < stack && ((char *)buf) + lsize > stack) {
406*cd9c78d9SLin Ling 		grub_printf("not enough memory allocated\n");
407*cd9c78d9SLin Ling 		return (ERR_WONT_FIT);
408*cd9c78d9SLin Ling 	}
409*cd9c78d9SLin Ling 
410*cd9c78d9SLin Ling 	retbuf = buf;
411*cd9c78d9SLin Ling 	if (comp != ZIO_COMPRESS_OFF) {
412*cd9c78d9SLin Ling 		buf = stack;
413*cd9c78d9SLin Ling 		stack += psize;
414*cd9c78d9SLin Ling 	}
415*cd9c78d9SLin Ling 
416*cd9c78d9SLin Ling 	if (zio_read_data(bp, buf, stack)) {
417*cd9c78d9SLin Ling 		grub_printf("zio_read_data failed\n");
418*cd9c78d9SLin Ling 		return (ERR_FSYS_CORRUPT);
419*cd9c78d9SLin Ling 	}
420*cd9c78d9SLin Ling 
421*cd9c78d9SLin Ling 	if (zio_checksum_verify(bp, buf, psize) != 0) {
422*cd9c78d9SLin Ling 		grub_printf("checksum verification failed\n");
423*cd9c78d9SLin Ling 		return (ERR_FSYS_CORRUPT);
424*cd9c78d9SLin Ling 	}
425*cd9c78d9SLin Ling 
426*cd9c78d9SLin Ling 	if (comp != ZIO_COMPRESS_OFF)
427*cd9c78d9SLin Ling 		decomp_table[comp].decomp_func(buf, retbuf, psize, lsize);
428*cd9c78d9SLin Ling 
429*cd9c78d9SLin Ling 	return (0);
430b1b8ab34Slling }
431b1b8ab34Slling 
432b1b8ab34Slling /*
433b1b8ab34Slling  * Get the block from a block id.
434b1b8ab34Slling  * push the block onto the stack.
435b1b8ab34Slling  *
436b1b8ab34Slling  * Return:
437b1b8ab34Slling  * 	0 - success
438b1b8ab34Slling  * 	errnum - failure
439b1b8ab34Slling  */
440b1b8ab34Slling static int
441b1b8ab34Slling dmu_read(dnode_phys_t *dn, uint64_t blkid, void *buf, char *stack)
442b1b8ab34Slling {
443b1b8ab34Slling 	int idx, level;
444b1b8ab34Slling 	blkptr_t *bp_array = dn->dn_blkptr;
445b1b8ab34Slling 	int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
446b1b8ab34Slling 	blkptr_t *bp, *tmpbuf;
447b1b8ab34Slling 
448b1b8ab34Slling 	bp = (blkptr_t *)stack;
449b1b8ab34Slling 	stack += sizeof (blkptr_t);
450b1b8ab34Slling 
451b1b8ab34Slling 	tmpbuf = (blkptr_t *)stack;
452b1b8ab34Slling 	stack += 1<<dn->dn_indblkshift;
453b1b8ab34Slling 
454b1b8ab34Slling 	for (level = dn->dn_nlevels - 1; level >= 0; level--) {
455b1b8ab34Slling 		idx = (blkid >> (epbs * level)) & ((1<<epbs)-1);
456b1b8ab34Slling 		*bp = bp_array[idx];
457b1b8ab34Slling 		if (level == 0)
458b1b8ab34Slling 			tmpbuf = buf;
459ae8180dbSlling 		if (BP_IS_HOLE(bp)) {
460ae8180dbSlling 			grub_memset(buf, 0,
461ae8180dbSlling 			    dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
462ae8180dbSlling 			break;
463ae8180dbSlling 		} else if (errnum = zio_read(bp, tmpbuf, stack)) {
464b1b8ab34Slling 			return (errnum);
465ae8180dbSlling 		}
466b1b8ab34Slling 
467b1b8ab34Slling 		bp_array = tmpbuf;
468b1b8ab34Slling 	}
469b1b8ab34Slling 
470b1b8ab34Slling 	return (0);
471b1b8ab34Slling }
472b1b8ab34Slling 
473b1b8ab34Slling /*
474b1b8ab34Slling  * mzap_lookup: Looks up property described by "name" and returns the value
475b1b8ab34Slling  * in "value".
476b1b8ab34Slling  *
477b1b8ab34Slling  * Return:
478b1b8ab34Slling  *	0 - success
479b1b8ab34Slling  *	errnum - failure
480b1b8ab34Slling  */
481b1b8ab34Slling static int
482b1b8ab34Slling mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name,
483b1b8ab34Slling 	uint64_t *value)
484b1b8ab34Slling {
485b1b8ab34Slling 	int i, chunks;
486b1b8ab34Slling 	mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
487b1b8ab34Slling 
488b1b8ab34Slling 	chunks = objsize/MZAP_ENT_LEN - 1;
489b1b8ab34Slling 	for (i = 0; i < chunks; i++) {
490b1b8ab34Slling 		if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) {
491b1b8ab34Slling 			*value = mzap_ent[i].mze_value;
492b1b8ab34Slling 			return (0);
493b1b8ab34Slling 		}
494b1b8ab34Slling 	}
495b1b8ab34Slling 
496b1b8ab34Slling 	return (ERR_FSYS_CORRUPT);
497b1b8ab34Slling }
498b1b8ab34Slling 
499b1b8ab34Slling static uint64_t
500b1b8ab34Slling zap_hash(uint64_t salt, const char *name)
501b1b8ab34Slling {
502b1b8ab34Slling 	static uint64_t table[256];
503b1b8ab34Slling 	const uint8_t *cp;
504b1b8ab34Slling 	uint8_t c;
505b1b8ab34Slling 	uint64_t crc = salt;
506b1b8ab34Slling 
507b1b8ab34Slling 	if (table[128] == 0) {
508b1b8ab34Slling 		uint64_t *ct;
509b1b8ab34Slling 		int i, j;
510b1b8ab34Slling 		for (i = 0; i < 256; i++) {
511b1b8ab34Slling 			for (ct = table + i, *ct = i, j = 8; j > 0; j--)
512b1b8ab34Slling 				*ct = (*ct >> 1) ^ (-(*ct & 1) &
513b1b8ab34Slling 				    ZFS_CRC64_POLY);
514b1b8ab34Slling 		}
515b1b8ab34Slling 	}
516b1b8ab34Slling 
517b1b8ab34Slling 	if (crc == 0 || table[128] != ZFS_CRC64_POLY) {
518b1b8ab34Slling 		errnum = ERR_FSYS_CORRUPT;
519b1b8ab34Slling 		return (0);
520b1b8ab34Slling 	}
521b1b8ab34Slling 
522b1b8ab34Slling 	for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++)
523b1b8ab34Slling 		crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF];
524b1b8ab34Slling 
525b1b8ab34Slling 	/*
526b1b8ab34Slling 	 * Only use 28 bits, since we need 4 bits in the cookie for the
527b1b8ab34Slling 	 * collision differentiator.  We MUST use the high bits, since
528b1b8ab34Slling 	 * those are the onces that we first pay attention to when
529b1b8ab34Slling 	 * chosing the bucket.
530b1b8ab34Slling 	 */
531b1b8ab34Slling 	crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
532b1b8ab34Slling 
533b1b8ab34Slling 	return (crc);
534b1b8ab34Slling }
535b1b8ab34Slling 
536b1b8ab34Slling /*
537b1b8ab34Slling  * Only to be used on 8-bit arrays.
538b1b8ab34Slling  * array_len is actual len in bytes (not encoded le_value_length).
539b1b8ab34Slling  * buf is null-terminated.
540b1b8ab34Slling  */
541b1b8ab34Slling static int
542b1b8ab34Slling zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk,
543b1b8ab34Slling     int array_len, const char *buf)
544b1b8ab34Slling {
545b1b8ab34Slling 	int bseen = 0;
546b1b8ab34Slling 
547b1b8ab34Slling 	while (bseen < array_len) {
548b1b8ab34Slling 		struct zap_leaf_array *la =
549b1b8ab34Slling 		    &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array;
550b1b8ab34Slling 		int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
551b1b8ab34Slling 
552b1b8ab34Slling 		if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
553b1b8ab34Slling 			return (0);
554b1b8ab34Slling 
555b1b8ab34Slling 		if (zfs_bcmp(la->la_array, buf + bseen, toread) != 0)
556b1b8ab34Slling 			break;
557b1b8ab34Slling 		chunk = la->la_next;
558b1b8ab34Slling 		bseen += toread;
559b1b8ab34Slling 	}
560b1b8ab34Slling 	return (bseen == array_len);
561b1b8ab34Slling }
562b1b8ab34Slling 
563b1b8ab34Slling /*
564b1b8ab34Slling  * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
565b1b8ab34Slling  * value for the property "name".
566b1b8ab34Slling  *
567b1b8ab34Slling  * Return:
568b1b8ab34Slling  *	0 - success
569b1b8ab34Slling  *	errnum - failure
570b1b8ab34Slling  */
571e7cbe64fSgw static int
572b1b8ab34Slling zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h,
573b1b8ab34Slling     const char *name, uint64_t *value)
574b1b8ab34Slling {
575b1b8ab34Slling 	uint16_t chunk;
576b1b8ab34Slling 	struct zap_leaf_entry *le;
577b1b8ab34Slling 
578b1b8ab34Slling 	/* Verify if this is a valid leaf block */
579b1b8ab34Slling 	if (l->l_hdr.lh_block_type != ZBT_LEAF)
580b1b8ab34Slling 		return (ERR_FSYS_CORRUPT);
581b1b8ab34Slling 	if (l->l_hdr.lh_magic != ZAP_LEAF_MAGIC)
582b1b8ab34Slling 		return (ERR_FSYS_CORRUPT);
583b1b8ab34Slling 
584b1b8ab34Slling 	for (chunk = l->l_hash[LEAF_HASH(blksft, h)];
585b1b8ab34Slling 	    chunk != CHAIN_END; chunk = le->le_next) {
586b1b8ab34Slling 
587b1b8ab34Slling 		if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
588b1b8ab34Slling 			return (ERR_FSYS_CORRUPT);
589b1b8ab34Slling 
590b1b8ab34Slling 		le = ZAP_LEAF_ENTRY(l, blksft, chunk);
591b1b8ab34Slling 
592b1b8ab34Slling 		/* Verify the chunk entry */
593b1b8ab34Slling 		if (le->le_type != ZAP_CHUNK_ENTRY)
594b1b8ab34Slling 			return (ERR_FSYS_CORRUPT);
595b1b8ab34Slling 
596b1b8ab34Slling 		if (le->le_hash != h)
597b1b8ab34Slling 			continue;
598b1b8ab34Slling 
599b1b8ab34Slling 		if (zap_leaf_array_equal(l, blksft, le->le_name_chunk,
600b1b8ab34Slling 		    le->le_name_length, name)) {
601b1b8ab34Slling 
602b1b8ab34Slling 			struct zap_leaf_array *la;
603b1b8ab34Slling 			uint8_t *ip;
604b1b8ab34Slling 
605b1b8ab34Slling 			if (le->le_int_size != 8 || le->le_value_length != 1)
606e37b211cStaylor 				return (ERR_FSYS_CORRUPT);
607b1b8ab34Slling 
608b1b8ab34Slling 			/* get the uint64_t property value */
609b1b8ab34Slling 			la = &ZAP_LEAF_CHUNK(l, blksft,
610b1b8ab34Slling 			    le->le_value_chunk).l_array;
611b1b8ab34Slling 			ip = la->la_array;
612b1b8ab34Slling 
613b1b8ab34Slling 			*value = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
614b1b8ab34Slling 			    (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
615b1b8ab34Slling 			    (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
616b1b8ab34Slling 			    (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
617b1b8ab34Slling 
618b1b8ab34Slling 			return (0);
619b1b8ab34Slling 		}
620b1b8ab34Slling 	}
621b1b8ab34Slling 
622b1b8ab34Slling 	return (ERR_FSYS_CORRUPT);
623b1b8ab34Slling }
624b1b8ab34Slling 
625b1b8ab34Slling /*
626b1b8ab34Slling  * Fat ZAP lookup
627b1b8ab34Slling  *
628b1b8ab34Slling  * Return:
629b1b8ab34Slling  *	0 - success
630b1b8ab34Slling  *	errnum - failure
631b1b8ab34Slling  */
632e7cbe64fSgw static int
633b1b8ab34Slling fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap,
634b1b8ab34Slling     char *name, uint64_t *value, char *stack)
635b1b8ab34Slling {
636b1b8ab34Slling 	zap_leaf_phys_t *l;
637b1b8ab34Slling 	uint64_t hash, idx, blkid;
638b1b8ab34Slling 	int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT);
639b1b8ab34Slling 
640b1b8ab34Slling 	/* Verify if this is a fat zap header block */
641b1b8ab34Slling 	if (zap->zap_magic != (uint64_t)ZAP_MAGIC)
642b1b8ab34Slling 		return (ERR_FSYS_CORRUPT);
643b1b8ab34Slling 
644b1b8ab34Slling 	hash = zap_hash(zap->zap_salt, name);
645b1b8ab34Slling 	if (errnum)
646b1b8ab34Slling 		return (errnum);
647b1b8ab34Slling 
648b1b8ab34Slling 	/* get block id from index */
649b1b8ab34Slling 	if (zap->zap_ptrtbl.zt_numblks != 0) {
650b1b8ab34Slling 		/* external pointer tables not supported */
651b1b8ab34Slling 		return (ERR_FSYS_CORRUPT);
652b1b8ab34Slling 	}
653b1b8ab34Slling 	idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift);
654b1b8ab34Slling 	blkid = ((uint64_t *)zap)[idx + (1<<(blksft-3-1))];
655b1b8ab34Slling 
656b1b8ab34Slling 	/* Get the leaf block */
657b1b8ab34Slling 	l = (zap_leaf_phys_t *)stack;
658b1b8ab34Slling 	stack += 1<<blksft;
659051aabe6Staylor 	if ((1<<blksft) < sizeof (zap_leaf_phys_t))
660e37b211cStaylor 		return (ERR_FSYS_CORRUPT);
661b1b8ab34Slling 	if (errnum = dmu_read(zap_dnode, blkid, l, stack))
662b1b8ab34Slling 		return (errnum);
663b1b8ab34Slling 
664b1b8ab34Slling 	return (zap_leaf_lookup(l, blksft, hash, name, value));
665b1b8ab34Slling }
666b1b8ab34Slling 
667b1b8ab34Slling /*
668b1b8ab34Slling  * Read in the data of a zap object and find the value for a matching
669b1b8ab34Slling  * property name.
670b1b8ab34Slling  *
671b1b8ab34Slling  * Return:
672b1b8ab34Slling  *	0 - success
673b1b8ab34Slling  *	errnum - failure
674b1b8ab34Slling  */
675b1b8ab34Slling static int
676b1b8ab34Slling zap_lookup(dnode_phys_t *zap_dnode, char *name, uint64_t *val, char *stack)
677b1b8ab34Slling {
678b1b8ab34Slling 	uint64_t block_type;
679b1b8ab34Slling 	int size;
680b1b8ab34Slling 	void *zapbuf;
681b1b8ab34Slling 
682b1b8ab34Slling 	/* Read in the first block of the zap object data. */
683b1b8ab34Slling 	zapbuf = stack;
684b1b8ab34Slling 	size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
685b1b8ab34Slling 	stack += size;
686b1b8ab34Slling 	if (errnum = dmu_read(zap_dnode, 0, zapbuf, stack))
687b1b8ab34Slling 		return (errnum);
688b1b8ab34Slling 
689b1b8ab34Slling 	block_type = *((uint64_t *)zapbuf);
690b1b8ab34Slling 
691b1b8ab34Slling 	if (block_type == ZBT_MICRO) {
692b1b8ab34Slling 		return (mzap_lookup(zapbuf, size, name, val));
693b1b8ab34Slling 	} else if (block_type == ZBT_HEADER) {
694b1b8ab34Slling 		/* this is a fat zap */
695b1b8ab34Slling 		return (fzap_lookup(zap_dnode, zapbuf, name,
696b1b8ab34Slling 		    val, stack));
697b1b8ab34Slling 	}
698b1b8ab34Slling 
699b1b8ab34Slling 	return (ERR_FSYS_CORRUPT);
700b1b8ab34Slling }
701b1b8ab34Slling 
702b1b8ab34Slling /*
703b1b8ab34Slling  * Get the dnode of an object number from the metadnode of an object set.
704b1b8ab34Slling  *
705b1b8ab34Slling  * Input
706b1b8ab34Slling  *	mdn - metadnode to get the object dnode
707b1b8ab34Slling  *	objnum - object number for the object dnode
708b1b8ab34Slling  *	buf - data buffer that holds the returning dnode
709b1b8ab34Slling  *	stack - scratch area
710b1b8ab34Slling  *
711b1b8ab34Slling  * Return:
712b1b8ab34Slling  *	0 - success
713b1b8ab34Slling  *	errnum - failure
714b1b8ab34Slling  */
715b1b8ab34Slling static int
716b1b8ab34Slling dnode_get(dnode_phys_t *mdn, uint64_t objnum, uint8_t type, dnode_phys_t *buf,
717b1b8ab34Slling 	char *stack)
718b1b8ab34Slling {
719b1b8ab34Slling 	uint64_t blkid, blksz; /* the block id this object dnode is in */
720b1b8ab34Slling 	int epbs; /* shift of number of dnodes in a block */
721b1b8ab34Slling 	int idx; /* index within a block */
722b1b8ab34Slling 	dnode_phys_t *dnbuf;
723b1b8ab34Slling 
724b1b8ab34Slling 	blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT;
725b1b8ab34Slling 	epbs = zfs_log2(blksz) - DNODE_SHIFT;
726b1b8ab34Slling 	blkid = objnum >> epbs;
727b1b8ab34Slling 	idx = objnum & ((1<<epbs)-1);
728b1b8ab34Slling 
729b1b8ab34Slling 	if (dnode_buf != NULL && dnode_mdn == mdn &&
730b1b8ab34Slling 	    objnum >= dnode_start && objnum < dnode_end) {
731b1b8ab34Slling 		grub_memmove(buf, &dnode_buf[idx], DNODE_SIZE);
732b1b8ab34Slling 		VERIFY_DN_TYPE(buf, type);
733b1b8ab34Slling 		return (0);
734b1b8ab34Slling 	}
735b1b8ab34Slling 
736b1b8ab34Slling 	if (dnode_buf && blksz == 1<<DNODE_BLOCK_SHIFT) {
737b1b8ab34Slling 		dnbuf = dnode_buf;
738b1b8ab34Slling 		dnode_mdn = mdn;
739b1b8ab34Slling 		dnode_start = blkid << epbs;
740b1b8ab34Slling 		dnode_end = (blkid + 1) << epbs;
741b1b8ab34Slling 	} else {
742b1b8ab34Slling 		dnbuf = (dnode_phys_t *)stack;
743b1b8ab34Slling 		stack += blksz;
744b1b8ab34Slling 	}
745b1b8ab34Slling 
746b1b8ab34Slling 	if (errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack))
747b1b8ab34Slling 		return (errnum);
748b1b8ab34Slling 
749b1b8ab34Slling 	grub_memmove(buf, &dnbuf[idx], DNODE_SIZE);
750b1b8ab34Slling 	VERIFY_DN_TYPE(buf, type);
751b1b8ab34Slling 
752b1b8ab34Slling 	return (0);
753b1b8ab34Slling }
754b1b8ab34Slling 
755b1b8ab34Slling /*
756eb2bd662Svikram  * Check if this is a special file that resides at the top
757eb2bd662Svikram  * dataset of the pool. Currently this is the GRUB menu,
758eb2bd662Svikram  * boot signature and boot signature backup.
759b1b8ab34Slling  * str starts with '/'.
760b1b8ab34Slling  */
761b1b8ab34Slling static int
762eb2bd662Svikram is_top_dataset_file(char *str)
763b1b8ab34Slling {
764b1b8ab34Slling 	char *tptr;
765b1b8ab34Slling 
766b1b8ab34Slling 	if ((tptr = grub_strstr(str, "menu.lst")) &&
767b1b8ab34Slling 	    (tptr[8] == '\0' || tptr[8] == ' ') &&
768b1b8ab34Slling 	    *(tptr-1) == '/')
769b1b8ab34Slling 		return (1);
770b1b8ab34Slling 
771eb2bd662Svikram 	if (grub_strncmp(str, BOOTSIGN_DIR"/",
7721183b401Svikram 	    grub_strlen(BOOTSIGN_DIR) + 1) == 0)
773eb2bd662Svikram 		return (1);
774eb2bd662Svikram 
775eb2bd662Svikram 	if (grub_strcmp(str, BOOTSIGN_BACKUP) == 0)
776eb2bd662Svikram 		return (1);
777eb2bd662Svikram 
778b1b8ab34Slling 	return (0);
779b1b8ab34Slling }
780b1b8ab34Slling 
781b1b8ab34Slling /*
782b1b8ab34Slling  * Get the file dnode for a given file name where mdn is the meta dnode
783b1b8ab34Slling  * for this ZFS object set. When found, place the file dnode in dn.
784b1b8ab34Slling  * The 'path' argument will be mangled.
785b1b8ab34Slling  *
786b1b8ab34Slling  * Return:
787b1b8ab34Slling  *	0 - success
788b1b8ab34Slling  *	errnum - failure
789b1b8ab34Slling  */
790b1b8ab34Slling static int
791b1b8ab34Slling dnode_get_path(dnode_phys_t *mdn, char *path, dnode_phys_t *dn,
792b1b8ab34Slling     char *stack)
793b1b8ab34Slling {
794e7437265Sahrens 	uint64_t objnum, version;
795b1b8ab34Slling 	char *cname, ch;
796b1b8ab34Slling 
797b1b8ab34Slling 	if (errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE,
798b1b8ab34Slling 	    dn, stack))
799b1b8ab34Slling 		return (errnum);
800b1b8ab34Slling 
801e7437265Sahrens 	if (errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack))
802e7437265Sahrens 		return (errnum);
803e7437265Sahrens 	if (version > ZPL_VERSION)
804e7437265Sahrens 		return (-1);
805e7437265Sahrens 
806b1b8ab34Slling 	if (errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack))
807b1b8ab34Slling 		return (errnum);
808b1b8ab34Slling 
809b1b8ab34Slling 	if (errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS,
810b1b8ab34Slling 	    dn, stack))
811b1b8ab34Slling 		return (errnum);
812b1b8ab34Slling 
813b1b8ab34Slling 	/* skip leading slashes */
814b1b8ab34Slling 	while (*path == '/')
815b1b8ab34Slling 		path++;
816b1b8ab34Slling 
817b1b8ab34Slling 	while (*path && !isspace(*path)) {
818b1b8ab34Slling 
819b1b8ab34Slling 		/* get the next component name */
820b1b8ab34Slling 		cname = path;
821b1b8ab34Slling 		while (*path && !isspace(*path) && *path != '/')
822b1b8ab34Slling 			path++;
823b1b8ab34Slling 		ch = *path;
824b1b8ab34Slling 		*path = 0;   /* ensure null termination */
825b1b8ab34Slling 
826b1b8ab34Slling 		if (errnum = zap_lookup(dn, cname, &objnum, stack))
827b1b8ab34Slling 			return (errnum);
828b1b8ab34Slling 
829e7437265Sahrens 		objnum = ZFS_DIRENT_OBJ(objnum);
830b1b8ab34Slling 		if (errnum = dnode_get(mdn, objnum, 0, dn, stack))
831b1b8ab34Slling 			return (errnum);
832b1b8ab34Slling 
833b1b8ab34Slling 		*path = ch;
834b1b8ab34Slling 		while (*path == '/')
835b1b8ab34Slling 			path++;
836b1b8ab34Slling 	}
837b1b8ab34Slling 
838b1b8ab34Slling 	/* We found the dnode for this file. Verify if it is a plain file. */
839b1b8ab34Slling 	VERIFY_DN_TYPE(dn, DMU_OT_PLAIN_FILE_CONTENTS);
840b1b8ab34Slling 
841b1b8ab34Slling 	return (0);
842b1b8ab34Slling }
843b1b8ab34Slling 
844b1b8ab34Slling /*
845b1b8ab34Slling  * Get the default 'bootfs' property value from the rootpool.
846b1b8ab34Slling  *
847b1b8ab34Slling  * Return:
848b1b8ab34Slling  *	0 - success
849b1b8ab34Slling  *	errnum -failure
850b1b8ab34Slling  */
851b1b8ab34Slling static int
852b1b8ab34Slling get_default_bootfsobj(dnode_phys_t *mosmdn, uint64_t *obj, char *stack)
853b1b8ab34Slling {
854b1b8ab34Slling 	uint64_t objnum = 0;
855b1b8ab34Slling 	dnode_phys_t *dn = (dnode_phys_t *)stack;
856b1b8ab34Slling 	stack += DNODE_SIZE;
857b1b8ab34Slling 
858ae8180dbSlling 	if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT,
859b1b8ab34Slling 	    DMU_OT_OBJECT_DIRECTORY, dn, stack))
860ae8180dbSlling 		return (errnum);
861b1b8ab34Slling 
862b1b8ab34Slling 	/*
863b1b8ab34Slling 	 * find the object number for 'pool_props', and get the dnode
864b1b8ab34Slling 	 * of the 'pool_props'.
865b1b8ab34Slling 	 */
866b1b8ab34Slling 	if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack))
867b1b8ab34Slling 		return (ERR_FILESYSTEM_NOT_FOUND);
868b1b8ab34Slling 
869ae8180dbSlling 	if (errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack))
870ae8180dbSlling 		return (errnum);
871b1b8ab34Slling 
872b1b8ab34Slling 	if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack))
873b1b8ab34Slling 		return (ERR_FILESYSTEM_NOT_FOUND);
874b1b8ab34Slling 
875b1b8ab34Slling 	if (!objnum)
876b1b8ab34Slling 		return (ERR_FILESYSTEM_NOT_FOUND);
877b1b8ab34Slling 
878b1b8ab34Slling 	*obj = objnum;
879b1b8ab34Slling 	return (0);
880b1b8ab34Slling }
881b1b8ab34Slling 
882b1b8ab34Slling /*
883b1b8ab34Slling  * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
884b1b8ab34Slling  * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
885b1b8ab34Slling  * of pool/rootfs.
886b1b8ab34Slling  *
887b1b8ab34Slling  * If no fsname and no obj are given, return the DSL_DIR metadnode.
888b1b8ab34Slling  * If fsname is given, return its metadnode and its matching object number.
889b1b8ab34Slling  * If only obj is given, return the metadnode for this object number.
890b1b8ab34Slling  *
891b1b8ab34Slling  * Return:
892b1b8ab34Slling  *	0 - success
893b1b8ab34Slling  *	errnum - failure
894b1b8ab34Slling  */
895b1b8ab34Slling static int
896b1b8ab34Slling get_objset_mdn(dnode_phys_t *mosmdn, char *fsname, uint64_t *obj,
897b1b8ab34Slling     dnode_phys_t *mdn, char *stack)
898b1b8ab34Slling {
899b1b8ab34Slling 	uint64_t objnum, headobj;
900b1b8ab34Slling 	char *cname, ch;
901b1b8ab34Slling 	blkptr_t *bp;
902b1b8ab34Slling 	objset_phys_t *osp;
903fe3e2633SEric Taylor 	int issnapshot = 0;
904fe3e2633SEric Taylor 	char *snapname;
905b1b8ab34Slling 
906b1b8ab34Slling 	if (fsname == NULL && obj) {
907b1b8ab34Slling 		headobj = *obj;
908b1b8ab34Slling 		goto skip;
909b1b8ab34Slling 	}
910b1b8ab34Slling 
911b1b8ab34Slling 	if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT,
912b1b8ab34Slling 	    DMU_OT_OBJECT_DIRECTORY, mdn, stack))
913b1b8ab34Slling 		return (errnum);
914b1b8ab34Slling 
915b1b8ab34Slling 	if (errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum,
916b1b8ab34Slling 	    stack))
917b1b8ab34Slling 		return (errnum);
918b1b8ab34Slling 
919b1b8ab34Slling 	if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack))
920b1b8ab34Slling 		return (errnum);
921b1b8ab34Slling 
922b1b8ab34Slling 	if (fsname == NULL) {
923b1b8ab34Slling 		headobj =
924b1b8ab34Slling 		    ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
925b1b8ab34Slling 		goto skip;
926b1b8ab34Slling 	}
927b1b8ab34Slling 
928b1b8ab34Slling 	/* take out the pool name */
929b1b8ab34Slling 	while (*fsname && !isspace(*fsname) && *fsname != '/')
930b1b8ab34Slling 		fsname++;
931b1b8ab34Slling 
932b1b8ab34Slling 	while (*fsname && !isspace(*fsname)) {
933b1b8ab34Slling 		uint64_t childobj;
934b1b8ab34Slling 
935b1b8ab34Slling 		while (*fsname == '/')
936b1b8ab34Slling 			fsname++;
937b1b8ab34Slling 
938b1b8ab34Slling 		cname = fsname;
939b1b8ab34Slling 		while (*fsname && !isspace(*fsname) && *fsname != '/')
940b1b8ab34Slling 			fsname++;
941b1b8ab34Slling 		ch = *fsname;
942b1b8ab34Slling 		*fsname = 0;
943b1b8ab34Slling 
944fe3e2633SEric Taylor 		snapname = cname;
945fe3e2633SEric Taylor 		while (*snapname && !isspace(*snapname) && *snapname != '@')
946fe3e2633SEric Taylor 			snapname++;
947fe3e2633SEric Taylor 		if (*snapname == '@') {
948fe3e2633SEric Taylor 			issnapshot = 1;
949fe3e2633SEric Taylor 			*snapname = 0;
950fe3e2633SEric Taylor 		}
951b1b8ab34Slling 		childobj =
952b1b8ab34Slling 		    ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj;
953b1b8ab34Slling 		if (errnum = dnode_get(mosmdn, childobj,
954b1b8ab34Slling 		    DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack))
955b1b8ab34Slling 			return (errnum);
956b1b8ab34Slling 
957ae8180dbSlling 		if (zap_lookup(mdn, cname, &objnum, stack))
958ae8180dbSlling 			return (ERR_FILESYSTEM_NOT_FOUND);
959b1b8ab34Slling 
960b1b8ab34Slling 		if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR,
961b1b8ab34Slling 		    mdn, stack))
962b1b8ab34Slling 			return (errnum);
963b1b8ab34Slling 
964b1b8ab34Slling 		*fsname = ch;
965fe3e2633SEric Taylor 		if (issnapshot)
966fe3e2633SEric Taylor 			*snapname = '@';
967b1b8ab34Slling 	}
968b1b8ab34Slling 	headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
969b1b8ab34Slling 	if (obj)
970b1b8ab34Slling 		*obj = headobj;
971b1b8ab34Slling 
972b1b8ab34Slling skip:
973b1b8ab34Slling 	if (errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack))
974b1b8ab34Slling 		return (errnum);
975fe3e2633SEric Taylor 	if (issnapshot) {
976fe3e2633SEric Taylor 		uint64_t snapobj;
977fe3e2633SEric Taylor 
978fe3e2633SEric Taylor 		snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))->
979fe3e2633SEric Taylor 		    ds_snapnames_zapobj;
980b1b8ab34Slling 
981fe3e2633SEric Taylor 		if (errnum = dnode_get(mosmdn, snapobj,
982fe3e2633SEric Taylor 		    DMU_OT_DSL_DS_SNAP_MAP, mdn, stack))
983fe3e2633SEric Taylor 			return (errnum);
984fe3e2633SEric Taylor 		if (zap_lookup(mdn, snapname + 1, &headobj, stack))
985fe3e2633SEric Taylor 			return (ERR_FILESYSTEM_NOT_FOUND);
986fe3e2633SEric Taylor 		if (errnum = dnode_get(mosmdn, headobj,
987fe3e2633SEric Taylor 		    DMU_OT_DSL_DATASET, mdn, stack))
988fe3e2633SEric Taylor 			return (errnum);
989fe3e2633SEric Taylor 		if (obj)
990fe3e2633SEric Taylor 			*obj = headobj;
991fe3e2633SEric Taylor 	}
992b1b8ab34Slling 
993b1b8ab34Slling 	bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp;
994b1b8ab34Slling 	osp = (objset_phys_t *)stack;
995b1b8ab34Slling 	stack += sizeof (objset_phys_t);
996b1b8ab34Slling 	if (errnum = zio_read(bp, osp, stack))
997b1b8ab34Slling 		return (errnum);
998b1b8ab34Slling 
999b1b8ab34Slling 	grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE);
1000b1b8ab34Slling 
1001b1b8ab34Slling 	return (0);
1002b1b8ab34Slling }
1003b1b8ab34Slling 
1004b1b8ab34Slling /*
1005e7cbe64fSgw  * For a given XDR packed nvlist, verify the first 4 bytes and move on.
1006b1b8ab34Slling  *
1007e7cbe64fSgw  * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
1008b1b8ab34Slling  *
1009b1b8ab34Slling  *      encoding method/host endian     (4 bytes)
1010b1b8ab34Slling  *      nvl_version                     (4 bytes)
1011b1b8ab34Slling  *      nvl_nvflag                      (4 bytes)
1012b1b8ab34Slling  *	encoded nvpairs:
1013b1b8ab34Slling  *		encoded size of the nvpair      (4 bytes)
1014b1b8ab34Slling  *		decoded size of the nvpair      (4 bytes)
1015b1b8ab34Slling  *		name string size                (4 bytes)
1016b1b8ab34Slling  *		name string data                (sizeof(NV_ALIGN4(string))
1017b1b8ab34Slling  *		data type                       (4 bytes)
1018b1b8ab34Slling  *		# of elements in the nvpair     (4 bytes)
1019b1b8ab34Slling  *		data
1020b1b8ab34Slling  *      2 zero's for the last nvpair
1021b1b8ab34Slling  *		(end of the entire list)	(8 bytes)
1022b1b8ab34Slling  *
1023b1b8ab34Slling  * Return:
1024b1b8ab34Slling  *	0 - success
1025b1b8ab34Slling  *	1 - failure
1026b1b8ab34Slling  */
1027e7cbe64fSgw static int
1028e7cbe64fSgw nvlist_unpack(char *nvlist, char **out)
1029b1b8ab34Slling {
1030b1b8ab34Slling 	/* Verify if the 1st and 2nd byte in the nvlist are valid. */
1031b1b8ab34Slling 	if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN)
1032b1b8ab34Slling 		return (1);
1033b1b8ab34Slling 
1034e7cbe64fSgw 	nvlist += 4;
1035e7cbe64fSgw 	*out = nvlist;
1036e7cbe64fSgw 	return (0);
1037e7cbe64fSgw }
1038e7cbe64fSgw 
1039e7cbe64fSgw static char *
1040e7cbe64fSgw nvlist_array(char *nvlist, int index)
1041e7cbe64fSgw {
1042e7cbe64fSgw 	int i, encode_size;
1043e7cbe64fSgw 
1044e7cbe64fSgw 	for (i = 0; i < index; i++) {
1045e7cbe64fSgw 		/* skip the header, nvl_version, and nvl_nvflag */
1046e7cbe64fSgw 		nvlist = nvlist + 4 * 2;
1047e7cbe64fSgw 
1048e7cbe64fSgw 		while (encode_size = BSWAP_32(*(uint32_t *)nvlist))
1049e7cbe64fSgw 			nvlist += encode_size; /* goto the next nvpair */
1050e7cbe64fSgw 
1051e7cbe64fSgw 		nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */
1052e7cbe64fSgw 	}
1053e7cbe64fSgw 
1054e7cbe64fSgw 	return (nvlist);
1055e7cbe64fSgw }
1056e7cbe64fSgw 
1057e7cbe64fSgw static int
1058e7cbe64fSgw nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype,
1059e7cbe64fSgw     int *nelmp)
1060e7cbe64fSgw {
1061e7cbe64fSgw 	int name_len, type, slen, encode_size;
1062e7cbe64fSgw 	char *nvpair, *nvp_name, *strval = val;
1063e7cbe64fSgw 	uint64_t *intval = val;
1064e7cbe64fSgw 
1065b1b8ab34Slling 	/* skip the header, nvl_version, and nvl_nvflag */
1066e7cbe64fSgw 	nvlist = nvlist + 4 * 2;
1067b1b8ab34Slling 
1068b1b8ab34Slling 	/*
1069b1b8ab34Slling 	 * Loop thru the nvpair list
1070b1b8ab34Slling 	 * The XDR representation of an integer is in big-endian byte order.
1071b1b8ab34Slling 	 */
1072b1b8ab34Slling 	while (encode_size = BSWAP_32(*(uint32_t *)nvlist))  {
1073b1b8ab34Slling 
1074b1b8ab34Slling 		nvpair = nvlist + 4 * 2; /* skip the encode/decode size */
1075b1b8ab34Slling 
1076b1b8ab34Slling 		name_len = BSWAP_32(*(uint32_t *)nvpair);
1077b1b8ab34Slling 		nvpair += 4;
1078b1b8ab34Slling 
1079b1b8ab34Slling 		nvp_name = nvpair;
1080b1b8ab34Slling 		nvpair = nvpair + ((name_len + 3) & ~3); /* align */
1081b1b8ab34Slling 
1082b1b8ab34Slling 		type = BSWAP_32(*(uint32_t *)nvpair);
1083b1b8ab34Slling 		nvpair += 4;
1084b1b8ab34Slling 
1085b1b8ab34Slling 		if ((grub_strncmp(nvp_name, name, name_len) == 0) &&
1086b1b8ab34Slling 		    type == valtype) {
1087e7cbe64fSgw 			int nelm;
1088b1b8ab34Slling 
1089e7cbe64fSgw 			if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1)
1090b1b8ab34Slling 				return (1);
1091b1b8ab34Slling 			nvpair += 4;
1092b1b8ab34Slling 
1093b1b8ab34Slling 			switch (valtype) {
1094b1b8ab34Slling 			case DATA_TYPE_STRING:
1095b1b8ab34Slling 				slen = BSWAP_32(*(uint32_t *)nvpair);
1096b1b8ab34Slling 				nvpair += 4;
1097b1b8ab34Slling 				grub_memmove(strval, nvpair, slen);
1098b1b8ab34Slling 				strval[slen] = '\0';
1099b1b8ab34Slling 				return (0);
1100b1b8ab34Slling 
1101b1b8ab34Slling 			case DATA_TYPE_UINT64:
1102b1b8ab34Slling 				*intval = BSWAP_64(*(uint64_t *)nvpair);
1103b1b8ab34Slling 				return (0);
1104e7cbe64fSgw 
1105e7cbe64fSgw 			case DATA_TYPE_NVLIST:
1106e7cbe64fSgw 				*(void **)val = (void *)nvpair;
1107e7cbe64fSgw 				return (0);
1108e7cbe64fSgw 
1109e7cbe64fSgw 			case DATA_TYPE_NVLIST_ARRAY:
1110e7cbe64fSgw 				*(void **)val = (void *)nvpair;
1111e7cbe64fSgw 				if (nelmp)
1112e7cbe64fSgw 					*nelmp = nelm;
1113e7cbe64fSgw 				return (0);
1114b1b8ab34Slling 			}
1115b1b8ab34Slling 		}
1116b1b8ab34Slling 
1117b1b8ab34Slling 		nvlist += encode_size; /* goto the next nvpair */
1118b1b8ab34Slling 	}
1119b1b8ab34Slling 
1120b1b8ab34Slling 	return (1);
1121b1b8ab34Slling }
1122b1b8ab34Slling 
1123b1b8ab34Slling /*
1124e7cbe64fSgw  * Check if this vdev is online and is in a good state.
1125e7cbe64fSgw  */
1126e7cbe64fSgw static int
1127e7cbe64fSgw vdev_validate(char *nv)
1128e7cbe64fSgw {
1129e7cbe64fSgw 	uint64_t ival;
1130e7cbe64fSgw 
1131e7cbe64fSgw 	if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival,
1132e7cbe64fSgw 	    DATA_TYPE_UINT64, NULL) == 0 ||
1133e7cbe64fSgw 	    nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival,
1134e7cbe64fSgw 	    DATA_TYPE_UINT64, NULL) == 0 ||
1135e7cbe64fSgw 	    nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival,
1136e7cbe64fSgw 	    DATA_TYPE_UINT64, NULL) == 0)
1137e7cbe64fSgw 		return (ERR_DEV_VALUES);
1138e7cbe64fSgw 
1139e7cbe64fSgw 	return (0);
1140e7cbe64fSgw }
1141e7cbe64fSgw 
1142e7cbe64fSgw /*
1143e7cbe64fSgw  * Get a list of valid vdev pathname from the boot device.
1144ffb5616eSLin Ling  * The caller should already allocate MAXPATHLEN memory for bootpath and devid.
1145e7cbe64fSgw  */
1146051aabe6Staylor int
1147051aabe6Staylor vdev_get_bootpath(char *nv, uint64_t inguid, char *devid, char *bootpath)
1148e7cbe64fSgw {
1149e7cbe64fSgw 	char type[16];
1150e7cbe64fSgw 
1151e7cbe64fSgw 	if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING,
1152e7cbe64fSgw 	    NULL))
1153e7cbe64fSgw 		return (ERR_FSYS_CORRUPT);
1154e7cbe64fSgw 
1155e7cbe64fSgw 	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1156ffb5616eSLin Ling 		uint64_t guid;
1157ffb5616eSLin Ling 
1158ffb5616eSLin Ling 		if (vdev_validate(nv) != 0)
1159ffb5616eSLin Ling 			return (ERR_NO_BOOTPATH);
1160ffb5616eSLin Ling 
1161ffb5616eSLin Ling 		if (nvlist_lookup_value(nv, ZPOOL_CONFIG_GUID,
1162ffb5616eSLin Ling 		    &guid, DATA_TYPE_UINT64, NULL) != 0)
1163ffb5616eSLin Ling 			return (ERR_NO_BOOTPATH);
1164ffb5616eSLin Ling 
1165ffb5616eSLin Ling 		if (guid != inguid)
1166e7cbe64fSgw 			return (ERR_NO_BOOTPATH);
1167e7cbe64fSgw 
1168ffb5616eSLin Ling 		if (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH,
1169ffb5616eSLin Ling 		    bootpath, DATA_TYPE_STRING, NULL) != 0)
1170ffb5616eSLin Ling 			bootpath[0] = '\0';
1171ffb5616eSLin Ling 
1172ffb5616eSLin Ling 		if (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID,
1173ffb5616eSLin Ling 		    devid, DATA_TYPE_STRING, NULL) != 0)
1174ffb5616eSLin Ling 			devid[0] = '\0';
1175ffb5616eSLin Ling 
1176ffb5616eSLin Ling 		if (strlen(bootpath) >= MAXPATHLEN ||
1177ffb5616eSLin Ling 		    strlen(devid) >= MAXPATHLEN)
1178ffb5616eSLin Ling 			return (ERR_WONT_FIT);
1179ffb5616eSLin Ling 
1180ffb5616eSLin Ling 		return (0);
1181ffb5616eSLin Ling 
1182e7cbe64fSgw 	} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
1183e7cbe64fSgw 		int nelm, i;
1184e7cbe64fSgw 		char *child;
1185e7cbe64fSgw 
1186e7cbe64fSgw 		if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child,
1187e7cbe64fSgw 		    DATA_TYPE_NVLIST_ARRAY, &nelm))
1188e7cbe64fSgw 			return (ERR_FSYS_CORRUPT);
1189e7cbe64fSgw 
1190e7cbe64fSgw 		for (i = 0; i < nelm; i++) {
1191e7cbe64fSgw 			char *child_i;
1192e7cbe64fSgw 
1193e7cbe64fSgw 			child_i = nvlist_array(child, i);
1194ffb5616eSLin Ling 			if (vdev_get_bootpath(child_i, inguid, devid,
1195ffb5616eSLin Ling 			    bootpath) == 0)
1196ffb5616eSLin Ling 				return (0);
1197e7cbe64fSgw 		}
1198e7cbe64fSgw 	}
1199e7cbe64fSgw 
1200ffb5616eSLin Ling 	return (ERR_NO_BOOTPATH);
1201e7cbe64fSgw }
1202e7cbe64fSgw 
1203e7cbe64fSgw /*
1204e7cbe64fSgw  * Check the disk label information and retrieve needed vdev name-value pairs.
1205b1b8ab34Slling  *
1206b1b8ab34Slling  * Return:
1207b1b8ab34Slling  *	0 - success
1208e7cbe64fSgw  *	ERR_* - failure
1209b1b8ab34Slling  */
1210051aabe6Staylor int
1211051aabe6Staylor check_pool_label(int label, char *stack, char *outdevid, char *outpath)
1212b1b8ab34Slling {
1213b1b8ab34Slling 	vdev_phys_t *vdev;
1214e7cbe64fSgw 	uint64_t sector, pool_state, txg = 0;
1215e7cbe64fSgw 	char *nvlist, *nv;
1216051aabe6Staylor 	uint64_t diskguid;
1217fe3e2633SEric Taylor 	uint64_t version;
1218b1b8ab34Slling 
1219b1b8ab34Slling 	sector = (label * sizeof (vdev_label_t) + VDEV_SKIP_SIZE +
1220b1b8ab34Slling 	    VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT;
1221b1b8ab34Slling 
1222b1b8ab34Slling 	/* Read in the vdev name-value pair list (112K). */
1223b1b8ab34Slling 	if (devread(sector, 0, VDEV_PHYS_SIZE, stack) == 0)
1224b1b8ab34Slling 		return (ERR_READ);
1225b1b8ab34Slling 
1226b1b8ab34Slling 	vdev = (vdev_phys_t *)stack;
1227e4c3b53dStaylor 	stack += sizeof (vdev_phys_t);
1228b1b8ab34Slling 
1229e7cbe64fSgw 	if (nvlist_unpack(vdev->vp_nvlist, &nvlist))
1230b1b8ab34Slling 		return (ERR_FSYS_CORRUPT);
1231e7cbe64fSgw 
1232e7cbe64fSgw 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state,
1233e7cbe64fSgw 	    DATA_TYPE_UINT64, NULL))
1234e7cbe64fSgw 		return (ERR_FSYS_CORRUPT);
1235e7cbe64fSgw 
1236e7cbe64fSgw 	if (pool_state == POOL_STATE_DESTROYED)
1237e7cbe64fSgw 		return (ERR_FILESYSTEM_NOT_FOUND);
1238e7cbe64fSgw 
1239e7cbe64fSgw 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME,
1240e7cbe64fSgw 	    current_rootpool, DATA_TYPE_STRING, NULL))
1241e7cbe64fSgw 		return (ERR_FSYS_CORRUPT);
1242e7cbe64fSgw 
1243e7cbe64fSgw 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg,
1244e7cbe64fSgw 	    DATA_TYPE_UINT64, NULL))
1245e7cbe64fSgw 		return (ERR_FSYS_CORRUPT);
1246e7cbe64fSgw 
1247e7cbe64fSgw 	/* not an active device */
1248e7cbe64fSgw 	if (txg == 0)
1249e7cbe64fSgw 		return (ERR_NO_BOOTPATH);
1250e7cbe64fSgw 
1251fe3e2633SEric Taylor 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version,
1252fe3e2633SEric Taylor 	    DATA_TYPE_UINT64, NULL))
1253fe3e2633SEric Taylor 		return (ERR_FSYS_CORRUPT);
1254fe3e2633SEric Taylor 	if (version > SPA_VERSION)
1255fe3e2633SEric Taylor 		return (ERR_NEWER_VERSION);
1256e7cbe64fSgw 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
1257e7cbe64fSgw 	    DATA_TYPE_NVLIST, NULL))
1258e7cbe64fSgw 		return (ERR_FSYS_CORRUPT);
1259051aabe6Staylor 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid,
1260051aabe6Staylor 	    DATA_TYPE_UINT64, NULL))
1261051aabe6Staylor 		return (ERR_FSYS_CORRUPT);
1262051aabe6Staylor 	if (vdev_get_bootpath(nv, diskguid, outdevid, outpath))
1263e7cbe64fSgw 		return (ERR_NO_BOOTPATH);
1264e7cbe64fSgw 	return (0);
1265b1b8ab34Slling }
1266b1b8ab34Slling 
1267b1b8ab34Slling /*
1268b1b8ab34Slling  * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
1269b1b8ab34Slling  * to the memory address MOS.
1270b1b8ab34Slling  *
1271b1b8ab34Slling  * Return:
1272b1b8ab34Slling  *	1 - success
1273b1b8ab34Slling  *	0 - failure
1274b1b8ab34Slling  */
1275b1b8ab34Slling int
1276b1b8ab34Slling zfs_mount(void)
1277b1b8ab34Slling {
1278b1b8ab34Slling 	char *stack;
1279b1b8ab34Slling 	int label = 0;
1280b1b8ab34Slling 	uberblock_phys_t *ub_array, *ubbest = NULL;
1281b35c6776Staylor 	vdev_boot_header_t *bh;
1282b1b8ab34Slling 	objset_phys_t *osp;
1283051aabe6Staylor 	char tmp_bootpath[MAXNAMELEN];
1284051aabe6Staylor 	char tmp_devid[MAXNAMELEN];
1285051aabe6Staylor 
1286051aabe6Staylor 	/* if it's our first time here, zero the best uberblock out */
1287051aabe6Staylor 	if (best_drive == 0 && best_part == 0 && find_best_root)
1288e37b211cStaylor 		grub_memset(&current_uberblock, 0, sizeof (uberblock_t));
1289b1b8ab34Slling 
1290b1b8ab34Slling 	stackbase = ZFS_SCRATCH;
1291b1b8ab34Slling 	stack = stackbase;
1292b1b8ab34Slling 	ub_array = (uberblock_phys_t *)stack;
1293b1b8ab34Slling 	stack += VDEV_UBERBLOCK_RING;
1294b1b8ab34Slling 
1295b35c6776Staylor 	bh = (vdev_boot_header_t *)stack;
1296b35c6776Staylor 	stack += VDEV_BOOT_HEADER_SIZE;
1297b35c6776Staylor 
1298b1b8ab34Slling 	osp = (objset_phys_t *)stack;
1299b1b8ab34Slling 	stack += sizeof (objset_phys_t);
1300b1b8ab34Slling 
1301b1b8ab34Slling 	/* XXX add back labels support? */
1302b1b8ab34Slling 	for (label = 0; ubbest == NULL && label < (VDEV_LABELS/2); label++) {
1303b1b8ab34Slling 		uint64_t sector = (label * sizeof (vdev_label_t) +
1304b35c6776Staylor 		    VDEV_SKIP_SIZE) >> SPA_MINBLOCKSHIFT;
1305b35c6776Staylor 		if (devread(sector, 0, VDEV_BOOT_HEADER_SIZE,
1306b35c6776Staylor 		    (char *)bh) == 0)
1307b35c6776Staylor 			continue;
1308b35c6776Staylor 		if ((bh->vb_magic != VDEV_BOOT_MAGIC) ||
1309b35c6776Staylor 		    (bh->vb_version != VDEV_BOOT_VERSION)) {
1310b35c6776Staylor 			continue;
1311b35c6776Staylor 		}
1312b35c6776Staylor 		sector += (VDEV_BOOT_HEADER_SIZE +
1313b1b8ab34Slling 		    VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT;
1314b1b8ab34Slling 
1315b1b8ab34Slling 		/* Read in the uberblock ring (128K). */
1316b1b8ab34Slling 		if (devread(sector, 0, VDEV_UBERBLOCK_RING,
1317b1b8ab34Slling 		    (char *)ub_array) == 0)
1318b1b8ab34Slling 			continue;
1319b1b8ab34Slling 
1320b1b8ab34Slling 		if ((ubbest = find_bestub(ub_array, label)) != NULL &&
1321b1b8ab34Slling 		    zio_read(&ubbest->ubp_uberblock.ub_rootbp, osp, stack)
1322b1b8ab34Slling 		    == 0) {
1323b1b8ab34Slling 
1324b1b8ab34Slling 			VERIFY_OS_TYPE(osp, DMU_OST_META);
1325b1b8ab34Slling 
1326e4c3b53dStaylor 			if (check_pool_label(label, stack, tmp_devid,
1327e4c3b53dStaylor 			    tmp_bootpath))
1328b1b8ab34Slling 				return (0);
1329b1b8ab34Slling 
1330051aabe6Staylor 			if (find_best_root &&
1331051aabe6Staylor 			    vdev_uberblock_compare(&ubbest->ubp_uberblock,
1332051aabe6Staylor 			    &(current_uberblock)) <= 0)
1333051aabe6Staylor 				continue;
1334ffb5616eSLin Ling 
1335051aabe6Staylor 			/* Got the MOS. Save it at the memory addr MOS. */
1336051aabe6Staylor 			grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE);
1337051aabe6Staylor 			grub_memmove(&current_uberblock,
1338ffb5616eSLin Ling 			    &ubbest->ubp_uberblock, sizeof (uberblock_t));
1339051aabe6Staylor 			grub_memmove(current_bootpath, tmp_bootpath,
1340051aabe6Staylor 			    MAXNAMELEN);
1341051aabe6Staylor 			grub_memmove(current_devid, tmp_devid,
1342051aabe6Staylor 			    grub_strlen(tmp_devid));
1343b1b8ab34Slling 			is_zfs_mount = 1;
1344b1b8ab34Slling 			return (1);
1345b1b8ab34Slling 		}
1346b1b8ab34Slling 	}
1347b1b8ab34Slling 
1348b1b8ab34Slling 	return (0);
1349b1b8ab34Slling }
1350b1b8ab34Slling 
1351b1b8ab34Slling /*
1352b1b8ab34Slling  * zfs_open() locates a file in the rootpool by following the
1353b1b8ab34Slling  * MOS and places the dnode of the file in the memory address DNODE.
1354b1b8ab34Slling  *
1355b1b8ab34Slling  * Return:
1356b1b8ab34Slling  *	1 - success
1357b1b8ab34Slling  *	0 - failure
1358b1b8ab34Slling  */
1359b1b8ab34Slling int
1360b1b8ab34Slling zfs_open(char *filename)
1361b1b8ab34Slling {
1362b1b8ab34Slling 	char *stack;
1363b1b8ab34Slling 	dnode_phys_t *mdn;
1364b1b8ab34Slling 
1365b1b8ab34Slling 	file_buf = NULL;
1366b1b8ab34Slling 	stackbase = ZFS_SCRATCH;
1367b1b8ab34Slling 	stack = stackbase;
1368b1b8ab34Slling 
1369b1b8ab34Slling 	mdn = (dnode_phys_t *)stack;
1370b1b8ab34Slling 	stack += sizeof (dnode_phys_t);
1371b1b8ab34Slling 
1372b1b8ab34Slling 	dnode_mdn = NULL;
1373b1b8ab34Slling 	dnode_buf = (dnode_phys_t *)stack;
1374b1b8ab34Slling 	stack += 1<<DNODE_BLOCK_SHIFT;
1375b1b8ab34Slling 
1376b1b8ab34Slling 	/*
1377b1b8ab34Slling 	 * menu.lst is placed at the root pool filesystem level,
1378b1b8ab34Slling 	 * do not goto 'current_bootfs'.
1379b1b8ab34Slling 	 */
1380eb2bd662Svikram 	if (is_top_dataset_file(filename)) {
1381b1b8ab34Slling 		if (errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack))
1382b1b8ab34Slling 			return (0);
1383b1b8ab34Slling 
1384b1b8ab34Slling 		current_bootfs_obj = 0;
1385b1b8ab34Slling 	} else {
1386b1b8ab34Slling 		if (current_bootfs[0] == '\0') {
1387b1b8ab34Slling 			/* Get the default root filesystem object number */
1388ae8180dbSlling 			if (errnum = get_default_bootfsobj(MOS,
1389ae8180dbSlling 			    &current_bootfs_obj, stack))
1390b1b8ab34Slling 				return (0);
1391b1b8ab34Slling 
1392b1b8ab34Slling 			if (errnum = get_objset_mdn(MOS, NULL,
1393b1b8ab34Slling 			    &current_bootfs_obj, mdn, stack))
1394b1b8ab34Slling 				return (0);
1395b1b8ab34Slling 		} else {
1396b35c6776Staylor 			if (errnum = get_objset_mdn(MOS, current_bootfs,
1397b35c6776Staylor 			    &current_bootfs_obj, mdn, stack)) {
1398051aabe6Staylor 				grub_memset(current_bootfs, 0, MAXNAMELEN);
1399b1b8ab34Slling 				return (0);
1400b35c6776Staylor 			}
1401b1b8ab34Slling 		}
1402b1b8ab34Slling 	}
1403b1b8ab34Slling 
1404b1b8ab34Slling 	if (dnode_get_path(mdn, filename, DNODE, stack)) {
1405b1b8ab34Slling 		errnum = ERR_FILE_NOT_FOUND;
1406b1b8ab34Slling 		return (0);
1407b1b8ab34Slling 	}
1408b1b8ab34Slling 
1409b1b8ab34Slling 	/* get the file size and set the file position to 0 */
1410b1b8ab34Slling 	filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size;
1411b1b8ab34Slling 	filepos = 0;
1412b1b8ab34Slling 
1413b1b8ab34Slling 	dnode_buf = NULL;
1414b1b8ab34Slling 	return (1);
1415b1b8ab34Slling }
1416b1b8ab34Slling 
1417b1b8ab34Slling /*
1418b1b8ab34Slling  * zfs_read reads in the data blocks pointed by the DNODE.
1419b1b8ab34Slling  *
1420b1b8ab34Slling  * Return:
1421b1b8ab34Slling  *	len - the length successfully read in to the buffer
1422b1b8ab34Slling  *	0   - failure
1423b1b8ab34Slling  */
1424b1b8ab34Slling int
1425b1b8ab34Slling zfs_read(char *buf, int len)
1426b1b8ab34Slling {
1427b1b8ab34Slling 	char *stack;
1428b1b8ab34Slling 	char *tmpbuf;
1429b1b8ab34Slling 	int blksz, length, movesize;
1430b1b8ab34Slling 
1431b1b8ab34Slling 	if (file_buf == NULL) {
1432b1b8ab34Slling 		file_buf = stackbase;
1433b1b8ab34Slling 		stackbase += SPA_MAXBLOCKSIZE;
1434b1b8ab34Slling 		file_start = file_end = 0;
1435b1b8ab34Slling 	}
1436b1b8ab34Slling 	stack = stackbase;
1437b1b8ab34Slling 
1438b1b8ab34Slling 	/*
1439b1b8ab34Slling 	 * If offset is in memory, move it into the buffer provided and return.
1440b1b8ab34Slling 	 */
1441b1b8ab34Slling 	if (filepos >= file_start && filepos+len <= file_end) {
1442b1b8ab34Slling 		grub_memmove(buf, file_buf + filepos - file_start, len);
1443b1b8ab34Slling 		filepos += len;
1444b1b8ab34Slling 		return (len);
1445b1b8ab34Slling 	}
1446b1b8ab34Slling 
1447b1b8ab34Slling 	blksz = DNODE->dn_datablkszsec << SPA_MINBLOCKSHIFT;
1448b1b8ab34Slling 
1449b1b8ab34Slling 	/*
1450b1b8ab34Slling 	 * Entire Dnode is too big to fit into the space available.  We
1451b1b8ab34Slling 	 * will need to read it in chunks.  This could be optimized to
1452b1b8ab34Slling 	 * read in as large a chunk as there is space available, but for
1453b1b8ab34Slling 	 * now, this only reads in one data block at a time.
1454b1b8ab34Slling 	 */
1455b1b8ab34Slling 	length = len;
1456b1b8ab34Slling 	while (length) {
1457b1b8ab34Slling 		/*
1458b1b8ab34Slling 		 * Find requested blkid and the offset within that block.
1459b1b8ab34Slling 		 */
1460b1b8ab34Slling 		uint64_t blkid = filepos / blksz;
1461b1b8ab34Slling 
1462b1b8ab34Slling 		if (errnum = dmu_read(DNODE, blkid, file_buf, stack))
1463b1b8ab34Slling 			return (0);
1464b1b8ab34Slling 
1465b1b8ab34Slling 		file_start = blkid * blksz;
1466b1b8ab34Slling 		file_end = file_start + blksz;
1467b1b8ab34Slling 
1468b1b8ab34Slling 		movesize = MIN(length, file_end - filepos);
1469b1b8ab34Slling 
1470b1b8ab34Slling 		grub_memmove(buf, file_buf + filepos - file_start,
1471b1b8ab34Slling 		    movesize);
1472b1b8ab34Slling 		buf += movesize;
1473b1b8ab34Slling 		length -= movesize;
1474b1b8ab34Slling 		filepos += movesize;
1475b1b8ab34Slling 	}
1476b1b8ab34Slling 
1477b1b8ab34Slling 	return (len);
1478b1b8ab34Slling }
1479b1b8ab34Slling 
1480b1b8ab34Slling /*
1481b1b8ab34Slling  * No-Op
1482b1b8ab34Slling  */
1483b1b8ab34Slling int
1484b1b8ab34Slling zfs_embed(int *start_sector, int needed_sectors)
1485b1b8ab34Slling {
1486b1b8ab34Slling 	return (1);
1487b1b8ab34Slling }
1488b1b8ab34Slling 
1489b1b8ab34Slling #endif /* FSYS_ZFS */
1490