1c023f65Toomas Soome/*
2199767fToomas Soome * Copyright (c) 2007 Doug Rabson
3199767fToomas Soome * All rights reserved.
4199767fToomas Soome *
5199767fToomas Soome * Redistribution and use in source and binary forms, with or without
6199767fToomas Soome * modification, are permitted provided that the following conditions
7199767fToomas Soome * are met:
8199767fToomas Soome * 1. Redistributions of source code must retain the above copyright
9199767fToomas Soome *    notice, this list of conditions and the following disclaimer.
10199767fToomas Soome * 2. Redistributions in binary form must reproduce the above copyright
11199767fToomas Soome *    notice, this list of conditions and the following disclaimer in the
12199767fToomas Soome *    documentation and/or other materials provided with the distribution.
13199767fToomas Soome *
14199767fToomas Soome * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15199767fToomas Soome * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16199767fToomas Soome * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17199767fToomas Soome * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18199767fToomas Soome * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19199767fToomas Soome * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20199767fToomas Soome * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21199767fToomas Soome * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22199767fToomas Soome * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23199767fToomas Soome * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24199767fToomas Soome * SUCH DAMAGE.
25199767fToomas Soome */
26199767fToomas Soome
27199767fToomas Soome#include <sys/cdefs.h>
28199767fToomas Soome
29199767fToomas Soome/*
30199767fToomas Soome *	Stand-alone ZFS file reader.
31199767fToomas Soome */
32199767fToomas Soome
3313a6e30Toomas Soome#include <sys/endian.h>
34199767fToomas Soome#include <sys/stat.h>
35199767fToomas Soome#include <sys/stdint.h>
36c023f65Toomas Soome#include <sys/list.h>
37c023f65Toomas Soome#include <inttypes.h>
38199767fToomas Soome
39199767fToomas Soome#include "zfsimpl.h"
40199767fToomas Soome#include "zfssubr.c"
41199767fToomas Soome
42199767fToomas Soome
43199767fToomas Soomestruct zfsmount {
44199767fToomas Soome	const spa_t	*spa;
45199767fToomas Soome	objset_phys_t	objset;
46199767fToomas Soome	uint64_t	rootobj;
47199767fToomas Soome};
48199767fToomas Soome
49199767fToomas Soome/*
50c023f65Toomas Soome * The indirect_child_t represents the vdev that we will read from, when we
51c023f65Toomas Soome * need to read all copies of the data (e.g. for scrub or reconstruction).
52c023f65Toomas Soome * For plain (non-mirror) top-level vdevs (i.e. is_vdev is not a mirror),
53c023f65Toomas Soome * ic_vdev is the same as is_vdev.  However, for mirror top-level vdevs,
54c023f65Toomas Soome * ic_vdev is a child of the mirror.
55c023f65Toomas Soome */
56c023f65Toomas Soometypedef struct indirect_child {
57c023f65Toomas Soome	void *ic_data;
58c023f65Toomas Soome	vdev_t *ic_vdev;
59c023f65Toomas Soome} indirect_child_t;
60c023f65Toomas Soome
61c023f65Toomas Soome/*
62c023f65Toomas Soome * The indirect_split_t represents one mapped segment of an i/o to the
63c023f65Toomas Soome * indirect vdev. For non-split (contiguously-mapped) blocks, there will be
64c023f65Toomas Soome * only one indirect_split_t, with is_split_offset==0 and is_size==io_size.
65c023f65Toomas Soome * For split blocks, there will be several of these.
66c023f65Toomas Soome */
67c023f65Toomas Soometypedef struct indirect_split {
68c023f65Toomas Soome	list_node_t is_node; /* link on iv_splits */
69c023f65Toomas Soome
70c023f65Toomas Soome	/*
71c023f65Toomas Soome	 * is_split_offset is the offset into the i/o.
72c023f65Toomas Soome	 * This is the sum of the previous splits' is_size's.
73c023f65Toomas Soome	 */
74c023f65Toomas Soome	uint64_t is_split_offset;
75c023f65Toomas Soome
76c023f65Toomas Soome	vdev_t *is_vdev; /* top-level vdev */
77c023f65Toomas Soome	uint64_t is_target_offset; /* offset on is_vdev */
78c023f65Toomas Soome	uint64_t is_size;
79c023f65Toomas Soome	int is_children; /* number of entries in is_child[] */
80c023f65Toomas Soome
81c023f65Toomas Soome	/*
82c023f65Toomas Soome	 * is_good_child is the child that we are currently using to
83c023f65Toomas Soome	 * attempt reconstruction.
84c023f65Toomas Soome	 */
85c023f65Toomas Soome	int is_good_child;
86c023f65Toomas Soome
87c023f65Toomas Soome	indirect_child_t is_child[1]; /* variable-length */
88c023f65Toomas Soome} indirect_split_t;
89c023f65Toomas Soome
90c023f65Toomas Soome/*
91c023f65Toomas Soome * The indirect_vsd_t is associated with each i/o to the indirect vdev.
92c023f65Toomas Soome * It is the "Vdev-Specific Data" in the zio_t's io_vsd.
93c023f65Toomas Soome */
94c023f65Toomas Soometypedef struct indirect_vsd {
95c023f65Toomas Soome	boolean_t iv_split_block;
96c023f65Toomas Soome	boolean_t iv_reconstruct;
97c023f65Toomas Soome
98c023f65Toomas Soome	list_t iv_splits; /* list of indirect_split_t's */
99c023f65Toomas Soome} indirect_vsd_t;
100c023f65Toomas Soome
101c023f65Toomas Soome/*
102199767fToomas Soome * List of all vdevs, chained through v_alllink.
103199767fToomas Soome */
104199767fToomas Soomestatic vdev_list_t zfs_vdevs;
105199767fToomas Soome
1066fd7fa3Toomas Soome/*
107199767fToomas Soome * List of ZFS features supported for read
108199767fToomas Soome */
109199767fToomas Soomestatic const char *features_for_read[] = {
110199767fToomas Soome	"org.illumos:lz4_compress",
111199767fToomas Soome	"com.delphix:hole_birth",
112199767fToomas Soome	"com.delphix:extensible_dataset",
113199767fToomas Soome	"com.delphix:embedded_data",
114199767fToomas Soome	"org.open-zfs:large_blocks",
115199767fToomas Soome	"org.illumos:sha512",
1164a04e8dToomas Soome	"org.illumos:skein",
1174a04e8dToomas Soome	"org.illumos:edonr",
118f905073Toomas Soome	"org.zfsonlinux:large_dnode",
1196f8e6e5Alex Wilson	"com.joyent:multi_vdev_crash_dump",
12042b4b09Andy Fiddaman	"com.delphix:spacemap_histogram",
12142b4b09Andy Fiddaman	"com.delphix:zpool_checkpoint",
12242b4b09Andy Fiddaman	"com.delphix:spacemap_v2",
12342b4b09Andy Fiddaman	"com.datto:encryption",
12442b4b09Andy Fiddaman	"com.datto:bookmark_v2",
12542b4b09Andy Fiddaman	"org.zfsonlinux:allocation_classes",
12642b4b09Andy Fiddaman	"com.datto:resilver_defer",
127c023f65Toomas Soome	"com.delphix:device_removal",
128c023f65Toomas Soome	"com.delphix:obsolete_counts",
129199767fToomas Soome	NULL
130199767fToomas Soome};
131199767fToomas Soome
132199767fToomas Soome/*
133199767fToomas Soome * List of all pools, chained through spa_link.
134199767fToomas Soome */
135199767fToomas Soomestatic spa_list_t zfs_pools;
136199767fToomas Soome
137edb3504Toomas Soomestatic const dnode_phys_t *dnode_cache_obj;
138199767fToomas Soomestatic uint64_t dnode_cache_bn;
139199767fToomas Soomestatic char *dnode_cache_buf;
140199767fToomas Soome
141199767fToomas Soomestatic int zio_read(const spa_t *spa, const blkptr_t *bp, void *buf);
142199767fToomas Soomestatic int zfs_get_root(const spa_t *spa, uint64_t *objid);
143199767fToomas Soomestatic int zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result);
1444a04e8dToomas Soomestatic int zap_lookup(const spa_t *spa, const dnode_phys_t *dnode,
1454a04e8dToomas Soome    const char *name, uint64_t integer_size, uint64_t num_integers,
1464a04e8dToomas Soome    void *value);
147c023f65Toomas Soomestatic int objset_get_dnode(const spa_t *, const objset_phys_t *, uint64_t,
148c023f65Toomas Soome    dnode_phys_t *);
149c023f65Toomas Soomestatic int dnode_read(const spa_t *, const dnode_phys_t *, off_t, void *,
150c023f65Toomas Soome    size_t);
151c023f65Toomas Soomestatic int vdev_indirect_read(vdev_t *, const blkptr_t *, void *, off_t,
152c023f65Toomas Soome    size_t);
153c023f65Toomas Soomestatic int vdev_mirror_read(vdev_t *, const blkptr_t *, void *, off_t,
154c023f65Toomas Soome    size_t);
155199767fToomas Soome
156199767fToomas Soomestatic void
157199767fToomas Soomezfs_init(void)
158199767fToomas Soome{
159199767fToomas Soome	STAILQ_INIT(&zfs_vdevs);
160199767fToomas Soome	STAILQ_INIT(&zfs_pools);
161199767fToomas Soome
162199767fToomas Soome	dnode_cache_buf = malloc(SPA_MAXBLOCKSIZE);
163199767fToomas Soome
164199767fToomas Soome	zfs_init_crc();
165199767fToomas Soome}
166199767fToomas Soome
167199767fToomas Soomestatic int
168199767fToomas Soomexdr_int(const unsigned char **xdr, int *ip)
169199767fToomas Soome{
17013a6e30Toomas Soome	*ip = be32dec(*xdr);
171199767fToomas Soome	(*xdr) += 4;
172199767fToomas Soome	return (0);
173199767fToomas Soome}
174199767fToomas Soome
175199767fToomas Soomestatic int
1766fd7fa3Toomas Soomexdr_u_int(const unsigned char **xdr, uint_t *ip)
177199767fToomas Soome{
17813a6e30Toomas Soome	*ip = be32dec(*xdr);
179199767fToomas Soome	(*xdr) += 4;
180199767fToomas Soome	return (0);
181199767fToomas Soome}
182199767fToomas Soome
183199767fToomas Soomestatic int
184199767fToomas Soomexdr_uint64_t(const unsigned char **xdr, uint64_t *lp)
185199767fToomas Soome{
1866fd7fa3Toomas Soome	uint_t hi, lo;
187199767fToomas Soome
188199767fToomas Soome	xdr_u_int(xdr, &hi);
189199767fToomas Soome	xdr_u_int(xdr, &lo);
1906fd7fa3Toomas Soome	*lp = (((uint64_t)hi) << 32) | lo;
191199767fToomas Soome	return (0);
192199767fToomas Soome}
193199767fToomas Soome
194199767fToomas Soomestatic int
195199767fToomas Soomenvlist_find(const unsigned char *nvlist, const char *name, int type,
196ce5f7fbToomas Soome    int *elementsp, void *valuep, int *sizep)
197199767fToomas Soome{
198199767fToomas Soome	const unsigned char *p, *pair;
199199767fToomas Soome	int junk;
200199767fToomas Soome	int encoded_size, decoded_size;
201199767fToomas Soome
202199767fToomas Soome	p = nvlist;
203199767fToomas Soome	xdr_int(&p, &junk);
204199767fToomas Soome	xdr_int(&p, &junk);
205199767fToomas Soome
206199767fToomas Soome	pair = p;
207199767fToomas Soome	xdr_int(&p, &encoded_size);
208199767fToomas Soome	xdr_int(&p, &decoded_size);
209199767fToomas Soome	while (encoded_size && decoded_size) {
210199767fToomas Soome		int namelen, pairtype, elements;
211199767fToomas Soome		const char *pairname;
212199767fToomas Soome
213199767fToomas Soome		xdr_int(&p, &namelen);
2146fd7fa3Toomas Soome		pairname = (const char *)p;
215199767fToomas Soome		p += roundup(namelen, 4);
216199767fToomas Soome		xdr_int(&p, &pairtype);
217199767fToomas Soome
2186fd7fa3Toomas Soome		if (memcmp(name, pairname, namelen) == 0 && type == pairtype) {
219199767fToomas Soome			xdr_int(&p, &elements);
220199767fToomas Soome			if (elementsp)
221199767fToomas Soome				*elementsp = elements;
222199767fToomas Soome			if (type == DATA_TYPE_UINT64) {
2236fd7fa3Toomas Soome				xdr_uint64_t(&p, (uint64_t *)valuep);
224199767fToomas Soome				return (0);
225199767fToomas Soome			} else if (type == DATA_TYPE_STRING) {
226199767fToomas Soome				int len;
227199767fToomas Soome				xdr_int(&p, &len);
228ce5f7fbToomas Soome				if (sizep != NULL)
229ce5f7fbToomas Soome					*sizep = len;
2306fd7fa3Toomas Soome				(*(const char **)valuep) = (const char *)p;
231199767fToomas Soome				return (0);
2326fd7fa3Toomas Soome			} else if (type == DATA_TYPE_NVLIST ||
2336fd7fa3Toomas Soome			    type == DATA_TYPE_NVLIST_ARRAY) {
2346fd7fa3Toomas Soome				(*(const unsigned char **)valuep) =
2356fd7fa3Toomas Soome				    (const unsigned char *)p;
236199767fToomas Soome				return (0);
237199767fToomas Soome			} else {
238199767fToomas Soome				return (EIO);
239199767fToomas Soome			}
240199767fToomas Soome		} else {
241199767fToomas Soome			/*
2426fd7fa3Toomas Soome			 * Not the pair we are looking for, skip to the
2436fd7fa3Toomas Soome			 * next one.
244199767fToomas Soome			 */
245199767fToomas Soome			p = pair + encoded_size;
246199767fToomas Soome		}
247199767fToomas Soome
248199767fToomas Soome		pair = p;
249199767fToomas Soome		xdr_int(&p, &encoded_size);
250199767fToomas Soome		xdr_int(&p, &decoded_size);
251199767fToomas Soome	}
252199767fToomas Soome
253199767fToomas Soome	return (EIO);
254199767fToomas Soome}
255199767fToomas Soome
256199767fToomas Soomestatic int
257199767fToomas Soomenvlist_check_features_for_read(const unsigned char *nvlist)
258199767fToomas Soome{
259199767fToomas Soome	const unsigned char *p, *pair;
260199767fToomas Soome	int junk;
261199767fToomas Soome	int encoded_size, decoded_size;
262199767fToomas Soome	int rc;
263199767fToomas Soome
264199767fToomas Soome	rc = 0;
265199767fToomas Soome
266199767fToomas Soome	p = nvlist;
267199767fToomas Soome	xdr_int(&p, &junk);
268199767fToomas Soome	xdr_int(&p, &junk);
269199767fToomas Soome
270199767fToomas Soome	pair = p;
271199767fToomas Soome	xdr_int(&p, &encoded_size);
272199767fToomas Soome	xdr_int(&p, &decoded_size);
273199767fToomas Soome	while (encoded_size && decoded_size) {
274199767fToomas Soome		int namelen, pairtype;
275199767fToomas Soome		const char *pairname;
276199767fToomas Soome		int i, found;
277199767fToomas Soome
278199767fToomas Soome		found = 0;
279199767fToomas Soome
280199767fToomas Soome		xdr_int(&p, &namelen);
2816fd7fa3Toomas Soome		pairname = (const char *)p;
282199767fToomas Soome		p += roundup(namelen, 4);
283199767fToomas Soome		xdr_int(&p, &pairtype);
284199767fToomas Soome
285199767fToomas Soome		for (i = 0; features_for_read[i] != NULL; i++) {
2866fd7fa3Toomas Soome			if (memcmp(pairname, features_for_read[i],
2876fd7fa3Toomas Soome			    namelen) == 0) {
288199767fToomas Soome				found = 1;
289199767fToomas Soome				break;
290199767fToomas Soome			}
291199767fToomas Soome		}
292199767fToomas Soome
293199767fToomas Soome		if (!found) {
294199767fToomas Soome			printf("ZFS: unsupported feature: %s\n", pairname);
295199767fToomas Soome			rc = EIO;
296199767fToomas Soome		}
297199767fToomas Soome
298199767fToomas Soome		p = pair + encoded_size;
299199767fToomas Soome
300199767fToomas Soome		pair = p;
301199767fToomas Soome		xdr_int(&p, &encoded_size);
302199767fToomas Soome		xdr_int(&p, &decoded_size);
303199767fToomas Soome	}
304199767fToomas Soome
305199767fToomas Soome	return (rc);
306199767fToomas Soome}
307199767fToomas Soome
308199767fToomas Soome/*
309199767fToomas Soome * Return the next nvlist in an nvlist array.
310199767fToomas Soome */
311199767fToomas Soomestatic const unsigned char *
312199767fToomas Soomenvlist_next(const unsigned char *nvlist)
313199767fToomas Soome{
314199767fToomas Soome	const unsigned char *p, *pair;
315199767fToomas Soome	int junk;
316199767fToomas Soome	int encoded_size, decoded_size;
317199767fToomas Soome
318199767fToomas Soome	p = nvlist;
319199767fToomas Soome	xdr_int(&p, &junk);
320199767fToomas Soome	xdr_int(&p, &junk);
321199767fToomas Soome
322199767fToomas Soome	pair = p;
323199767fToomas Soome	xdr_int(&p, &encoded_size);
324199767fToomas Soome	xdr_int(&p, &decoded_size);
325199767fToomas Soome	while (encoded_size && decoded_size) {
326199767fToomas Soome		p = pair + encoded_size;
327199767fToomas Soome
328199767fToomas Soome		pair = p;
329199767fToomas Soome		xdr_int(&p, &encoded_size);
330199767fToomas Soome		xdr_int(&p, &decoded_size);
331199767fToomas Soome	}
332199767fToomas Soome
3336fd7fa3Toomas Soome	return (p);
334199767fToomas Soome}
335199767fToomas Soome
336199767fToomas Soome#ifdef TEST
337199767fToomas Soome
338199767fToomas Soomestatic const unsigned char *
339199767fToomas Soomenvlist_print(const unsigned char *nvlist, unsigned int indent)
340199767fToomas Soome{
3416fd7fa3Toomas Soome	static const char *typenames[] = {
342199767fToomas Soome		"DATA_TYPE_UNKNOWN",
343199767fToomas Soome		"DATA_TYPE_BOOLEAN",
344199767fToomas Soome		"DATA_TYPE_BYTE",
345199767fToomas Soome		"DATA_TYPE_INT16",
346199767fToomas Soome		"DATA_TYPE_UINT16",
347199767fToomas Soome		"DATA_TYPE_INT32",
348199767fToomas Soome		"DATA_TYPE_UINT32",
349199767fToomas Soome		"DATA_TYPE_INT64",
350199767fToomas Soome		"DATA_TYPE_UINT64",
351199767fToomas Soome		"DATA_TYPE_STRING",
352199767fToomas Soome		"DATA_TYPE_BYTE_ARRAY",
353199767fToomas Soome		"DATA_TYPE_INT16_ARRAY",
354199767fToomas Soome		"DATA_TYPE_UINT16_ARRAY",
355199767fToomas Soome		"DATA_TYPE_INT32_ARRAY",
356199767fToomas Soome		"DATA_TYPE_UINT32_ARRAY",
357199767fToomas Soome		"DATA_TYPE_INT64_ARRAY",
358199767fToomas Soome		"DATA_TYPE_UINT64_ARRAY",
359199767fToomas Soome		"DATA_TYPE_STRING_ARRAY",
360199767fToomas Soome		"DATA_TYPE_HRTIME",
361199767fToomas Soome		"DATA_TYPE_NVLIST",
362199767fToomas Soome		"DATA_TYPE_NVLIST_ARRAY",
363199767fToomas Soome		"DATA_TYPE_BOOLEAN_VALUE",
364199767fToomas Soome		"DATA_TYPE_INT8",
365199767fToomas Soome		"DATA_TYPE_UINT8",
366199767fToomas Soome		"DATA_TYPE_BOOLEAN_ARRAY",
367199767fToomas Soome		"DATA_TYPE_INT8_ARRAY",
368199767fToomas Soome		"DATA_TYPE_UINT8_ARRAY"
369199767fToomas Soome	};
370199767fToomas Soome
371199767fToomas Soome	unsigned int i, j;
372199767fToomas Soome	const unsigned char *p, *pair;
373199767fToomas Soome	int junk;
374199767fToomas Soome	int encoded_size, decoded_size;
375199767fToomas Soome
376199767fToomas Soome	p = nvlist;
377199767fToomas Soome	xdr_int(&p, &junk);
378199767fToomas Soome	xdr_int(&p, &junk);
379199767fToomas Soome
380199767fToomas Soome	pair = p;
381199767fToomas Soome	xdr_int(&p, &encoded_size);
382199767fToomas Soome	xdr_int(&p, &decoded_size);
383199767fToomas Soome	while (encoded_size && decoded_size) {
384199767fToomas Soome		int namelen, pairtype, elements;
385199767fToomas Soome		const char *pairname;
386199767fToomas Soome
387199767fToomas Soome		xdr_int(&p, &namelen);
3886fd7fa3Toomas Soome		pairname = (const char *)p;
389199767fToomas Soome		p += roundup(namelen, 4);
390199767fToomas Soome		xdr_int(&p, &pairtype);
391199767fToomas Soome
392199767fToomas Soome		for (i = 0; i < indent; i++)
393199767fToomas Soome			printf(" ");
394ce5f7fbToomas Soome		printf("%s %.*s", typenames[pairtype], namelen, pairname);
395199767fToomas Soome
396199767fToomas Soome		xdr_int(&p, &elements);
397199767fToomas Soome		switch (pairtype) {
398199767fToomas Soome		case DATA_TYPE_UINT64: {
399199767fToomas Soome			uint64_t val;
400199767fToomas Soome			xdr_uint64_t(&p, &val);
401199767fToomas Soome			printf(" = 0x%jx\n", (uintmax_t)val);
402199767fToomas Soome			break;
403199767fToomas Soome		}
404199767fToomas Soome
405199767fToomas Soome		case DATA_TYPE_STRING: {
406199767fToomas Soome			int len;
407199767fToomas Soome			xdr_int(&p, &len);
408ce5f7fbToomas Soome			printf(" = \"%.*s\"\n", len, p);
409199767fToomas Soome			break;
410199767fToomas Soome		}
411199767fToomas Soome
412199767fToomas Soome		case DATA_TYPE_NVLIST:
413199767fToomas Soome			printf("\n");
414199767fToomas Soome			nvlist_print(p, indent + 1);
415199767fToomas Soome			break;
416199767fToomas Soome
417199767fToomas Soome		case DATA_TYPE_NVLIST_ARRAY:
418199767fToomas Soome			for (j = 0; j < elements; j++) {
419199767fToomas Soome				printf("[%d]\n", j);
420199767fToomas Soome				p = nvlist_print(p, indent + 1);
421199767fToomas Soome				if (j != elements - 1) {
422199767fToomas Soome					for (i = 0; i < indent; i++)
423199767fToomas Soome						printf(" ");
424ce5f7fbToomas Soome					printf("%s %.*s", typenames[pairtype],
425ce5f7fbToomas Soome					    namelen, pairname);
426199767fToomas Soome				}
427199767fToomas Soome			}
428199767fToomas Soome			break;
429199767fToomas Soome
430199767fToomas Soome		default:
431199767fToomas Soome			printf("\n");
432199767fToomas Soome		}
433199767fToomas Soome
434199767fToomas Soome		p = pair + encoded_size;
435199767fToomas Soome
436199767fToomas Soome		pair = p;
437199767fToomas Soome		xdr_int(&p, &encoded_size);
438199767fToomas Soome		xdr_int(&p, &decoded_size);
439199767fToomas Soome	}
440199767fToomas Soome
4416fd7fa3Toomas Soome	return (p);
442199767fToomas Soome}
443199767fToomas Soome
444199767fToomas Soome#endif
445199767fToomas Soome
446199767fToomas Soomestatic int
447199767fToomas Soomevdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf,
448199767fToomas Soome    off_t offset, size_t size)
449199767fToomas Soome{
450199767fToomas Soome	size_t psize;
451199767fToomas Soome	int rc;
452199767fToomas Soome
453199767fToomas Soome	if (!vdev->v_phys_read)
454199767fToomas Soome		return (EIO);
455199767fToomas Soome
456199767fToomas Soome	if (bp) {
457199767fToomas Soome		psize = BP_GET_PSIZE(bp);
458199767fToomas Soome	} else {
459199767fToomas Soome		psize = size;
460199767fToomas Soome	}
461199767fToomas Soome
462199767fToomas Soome	rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize);
463da9bf00Toomas Soome	if (rc == 0) {
464da9bf00Toomas Soome		if (bp != NULL)
465da9bf00Toomas Soome			rc = zio_checksum_verify(vdev->v_spa, bp, buf);
466da9bf00Toomas Soome	}
467199767fToomas Soome
468da9bf00Toomas Soome	return (rc);
469199767fToomas Soome}
470199767fToomas Soome
471c023f65Toomas Soometypedef struct remap_segment {
472c023f65Toomas Soome	vdev_t *rs_vd;
473c023f65Toomas Soome	uint64_t rs_offset;
474c023f65Toomas Soome	uint64_t rs_asize;
475c023f65Toomas Soome	uint64_t rs_split_offset;
476c023f65Toomas Soome	list_node_t rs_node;
477c023f65Toomas Soome} remap_segment_t;
478c023f65Toomas Soome
479c023f65Toomas Soomestatic remap_segment_t *
480c023f65Toomas Soomers_alloc(vdev_t *vd, uint64_t offset, uint64_t asize, uint64_t split_offset)
481c023f65Toomas Soome{
482c023f65Toomas Soome	remap_segment_t *rs = malloc(sizeof (remap_segment_t));
483c023f65Toomas Soome
484c023f65Toomas Soome	if (rs != NULL) {
485c023f65Toomas Soome		rs->rs_vd = vd;
486c023f65Toomas Soome		rs->rs_offset = offset;
487c023f65Toomas Soome		rs->rs_asize = asize;
488c023f65Toomas Soome		rs->rs_split_offset = split_offset;
489c023f65Toomas Soome	}
490c023f65Toomas Soome
491c023f65Toomas Soome	return (rs);
492c023f65Toomas Soome}
493c023f65Toomas Soome
494c023f65Toomas Soomevdev_indirect_mapping_t *
495c023f65Toomas Soomevdev_indirect_mapping_open(spa_t *spa, objset_phys_t *os,
496c023f65Toomas Soome    uint64_t mapping_object)
497c023f65Toomas Soome{
498c023f65Toomas Soome	vdev_indirect_mapping_t *vim;
499c023f65Toomas Soome	vdev_indirect_mapping_phys_t *vim_phys;
500c023f65Toomas Soome	int rc;
501c023f65Toomas Soome
502c023f65Toomas Soome	vim = calloc(1, sizeof (*vim));
503c023f65Toomas Soome	if (vim == NULL)
504c023f65Toomas Soome		return (NULL);
505c023f65Toomas Soome
506c023f65Toomas Soome	vim->vim_dn = calloc(1, sizeof (*vim->vim_dn));
507c023f65Toomas Soome	if (vim->vim_dn == NULL) {
508c023f65Toomas Soome		free(vim);
509c023f65Toomas Soome		return (NULL);
510c023f65Toomas Soome	}
511c023f65Toomas Soome
512c023f65Toomas Soome	rc = objset_get_dnode(spa, os, mapping_object, vim->vim_dn);
513c023f65Toomas Soome	if (rc != 0) {
514c023f65Toomas Soome		free(vim->vim_dn);
515c023f65Toomas Soome		free(vim);
516c023f65Toomas Soome		return (NULL);
517c023f65Toomas Soome	}
518c023f65Toomas Soome
519c023f65Toomas Soome	vim->vim_spa = spa;
520c023f65Toomas Soome	vim->vim_phys = malloc(sizeof (*vim->vim_phys));
521c023f65Toomas Soome	if (vim->vim_phys == NULL) {
522c023f65Toomas Soome		free(vim->vim_dn);
523c023f65Toomas Soome		free(vim);
524c023f65Toomas Soome		return (NULL);
525c023f65Toomas Soome	}
526c023f65Toomas Soome
527c023f65Toomas Soome	vim_phys = (vdev_indirect_mapping_phys_t *)DN_BONUS(vim->vim_dn);
528c023f65Toomas Soome	*vim->vim_phys = *vim_phys;
529c023f65Toomas Soome
530c023f65Toomas Soome	vim->vim_objset = os;
531c023f65Toomas Soome	vim->vim_object = mapping_object;
532c023f65Toomas Soome	vim->vim_entries = NULL;
533c023f65Toomas Soome
534c023f65Toomas Soome	vim->vim_havecounts =
535c023f65Toomas Soome	    (vim->vim_dn->dn_bonuslen > VDEV_INDIRECT_MAPPING_SIZE_V0);
536c023f65Toomas Soome
537c023f65Toomas Soome	return (vim);
538c023f65Toomas Soome}
539c023f65Toomas Soome
540c023f65Toomas Soome/*
541c023f65Toomas Soome * Compare an offset with an indirect mapping entry; there are three
542c023f65Toomas Soome * possible scenarios:
543c023f65Toomas Soome *
544c023f65Toomas Soome *     1. The offset is "less than" the mapping entry; meaning the
545c023f65Toomas Soome *        offset is less than the source offset of the mapping entry. In
546c023f65Toomas Soome *        this case, there is no overlap between the offset and the
547c023f65Toomas Soome *        mapping entry and -1 will be returned.
548c023f65Toomas Soome *
549c023f65Toomas Soome *     2. The offset is "greater than" the mapping entry; meaning the
550c023f65Toomas Soome *        offset is greater than the mapping entry's source offset plus
551c023f65Toomas Soome *        the entry's size. In this case, there is no overlap between
552c023f65Toomas Soome *        the offset and the mapping entry and 1 will be returned.
553c023f65Toomas Soome *
554c023f65Toomas Soome *        NOTE: If the offset is actually equal to the entry's offset
555c023f65Toomas Soome *        plus size, this is considered to be "greater" than the entry,
556c023f65Toomas Soome *        and this case applies (i.e. 1 will be returned). Thus, the
557c023f65Toomas Soome *        entry's "range" can be considered to be inclusive at its
558c023f65Toomas Soome *        start, but exclusive at its end: e.g. [src, src + size).
559c023f65Toomas Soome *
560c023f65Toomas Soome *     3. The last case to consider is if the offset actually falls
561c023f65Toomas Soome *        within the mapping entry's range. If this is the case, the
562c023f65Toomas Soome *        offset is considered to be "equal to" the mapping entry and
563c023f65Toomas Soome *        0 will be returned.
564c023f65Toomas Soome *
565c023f65Toomas Soome *        NOTE: If the offset is equal to the entry's source offset,
566c023f65Toomas Soome *        this case applies and 0 will be returned. If the offset is
567c023f65Toomas Soome *        equal to the entry's source plus its size, this case does
568c023f65Toomas Soome *        *not* apply (see "NOTE" above for scenario 2), and 1 will be
569c023f65Toomas Soome *        returned.
570c023f65Toomas Soome */
571c023f65Toomas Soomestatic int
572c023f65Toomas Soomedva_mapping_overlap_compare(const void *v_key, const void *v_array_elem)
573c023f65Toomas Soome{
574c023f65Toomas Soome	const uint64_t *key = v_key;
575c023f65Toomas Soome	const vdev_indirect_mapping_entry_phys_t *array_elem =
576c023f65Toomas Soome	    v_array_elem;
577c023f65Toomas Soome	uint64_t src_offset = DVA_MAPPING_GET_SRC_OFFSET(array_elem);
578c023f65Toomas Soome
579c023f65Toomas Soome	if (*key < src_offset) {
580c023f65Toomas Soome		return (-1);
581c023f65Toomas Soome	} else if (*key < src_offset + DVA_GET_ASIZE(&array_elem->vimep_dst)) {
582c023f65Toomas Soome		return (0);
583c023f65Toomas Soome	} else {
584c023f65Toomas Soome		return (1);
585c023f65Toomas Soome	}
586c023f65Toomas Soome}
587c023f65Toomas Soome
588c023f65Toomas Soome/*
589c023f65Toomas Soome * Return array entry.
590c023f65Toomas Soome */
591c023f65Toomas Soomestatic vdev_indirect_mapping_entry_phys_t *
592c023f65Toomas Soomevdev_indirect_mapping_entry(vdev_indirect_mapping_t *vim, uint64_t index)
593c023f65Toomas Soome{
594c023f65Toomas Soome	uint64_t size;
595c023f65Toomas Soome	off_t offset = 0;
596c023f65Toomas Soome	int rc;
597c023f65Toomas Soome
598c023f65Toomas Soome	if (vim->vim_phys->vimp_num_entries == 0)
599c023f65Toomas Soome		return (NULL);
600c023f65Toomas Soome
601c023f65Toomas Soome	if (vim->vim_entries == NULL) {
602c023f65Toomas Soome		uint64_t bsize;
603c023f65Toomas Soome
604c023f65Toomas Soome		bsize = vim->vim_dn->dn_datablkszsec << SPA_MINBLOCKSHIFT;
605c023f65Toomas Soome		size = vim->vim_phys->vimp_num_entries *
606c023f65Toomas Soome		    sizeof (*vim->vim_entries);
607c023f65Toomas Soome		if (size > bsize) {
608c023f65Toomas Soome			size = bsize / sizeof (*vim->vim_entries);
609c023f65Toomas Soome			size *= sizeof (*vim->vim_entries);
610c023f65Toomas Soome		}
611c023f65Toomas Soome		vim->vim_entries = malloc(size);
612c023f65Toomas Soome		if (vim->vim_entries == NULL)
613c023f65Toomas Soome			return (NULL);
614c023f65Toomas Soome		vim->vim_num_entries = size / sizeof (*vim->vim_entries);
615c023f65Toomas Soome		offset = index * sizeof (*vim->vim_entries);
616c023f65Toomas Soome	}
617c023f65Toomas Soome
618c023f65Toomas Soome	/* We have data in vim_entries */
619c023f65Toomas Soome	if (offset == 0) {
620c023f65Toomas Soome		if (index >= vim->vim_entry_offset &&
621c023f65Toomas Soome		    index <= vim->vim_entry_offset + vim->vim_num_entries) {
622c023f65Toomas Soome			index -= vim->vim_entry_offset;
623c023f65Toomas Soome			return (&vim->vim_entries[index]);
624c023f65Toomas Soome		}
625c023f65Toomas Soome		offset = index * sizeof (*vim->vim_entries);
626c023f65Toomas Soome	}
627c023f65Toomas Soome
628c023f65Toomas Soome	vim->vim_entry_offset = index;
629c023f65Toomas Soome	size = vim->vim_num_entries * sizeof (*vim->vim_entries);
630c023f65Toomas Soome	rc = dnode_read(vim->vim_spa, vim->vim_dn, offset, vim->vim_entries,
631c023f65Toomas Soome	    size);
632c023f65Toomas Soome	if (rc != 0) {
633c023f65Toomas Soome		/* Read error, invalidate vim_entries. */
634c023f65Toomas Soome		free(vim->vim_entries);
635c023f65Toomas Soome		vim->vim_entries = NULL;
636c023f65Toomas Soome		return (NULL);
637c023f65Toomas Soome	}
638c023f65Toomas Soome	index -= vim->vim_entry_offset;
639c023f65Toomas Soome	return (&vim->vim_entries[index]);
640c023f65Toomas Soome}
641c023f65Toomas Soome
642c023f65Toomas Soome/*
643c023f65Toomas Soome * Returns the mapping entry for the given offset.
644c023f65Toomas Soome *
645c023f65Toomas Soome * It's possible that the given offset will not be in the mapping table
646c023f65Toomas Soome * (i.e. no mapping entries contain this offset), in which case, the
647c023f65Toomas Soome * return value value depends on the "next_if_missing" parameter.
648c023f65Toomas Soome *
649c023f65Toomas Soome * If the offset is not found in the table and "next_if_missing" is
650c023f65Toomas Soome * B_FALSE, then NULL will always be returned. The behavior is intended
651c023f65Toomas Soome * to allow consumers to get the entry corresponding to the offset
652c023f65Toomas Soome * parameter, iff the offset overlaps with an entry in the table.
653c023f65Toomas Soome *
654c023f65Toomas Soome * If the offset is not found in the table and "next_if_missing" is
655c023f65Toomas Soome * B_TRUE, then the entry nearest to the given offset will be returned,
656c023f65Toomas Soome * such that the entry's source offset is greater than the offset
657c023f65Toomas Soome * passed in (i.e. the "next" mapping entry in the table is returned, if
658c023f65Toomas Soome * the offset is missing from the table). If there are no entries whose
659c023f65Toomas Soome * source offset is greater than the passed in offset, NULL is returned.
660c023f65Toomas Soome */
661c023f65Toomas Soomestatic vdev_indirect_mapping_entry_phys_t *
662c023f65Toomas Soomevdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t *vim,
663c023f65Toomas Soome    uint64_t offset)
664c023f65Toomas Soome{
665c023f65Toomas Soome	ASSERT(vim->vim_phys->vimp_num_entries > 0);
666c023f65Toomas Soome
667c023f65Toomas Soome	vdev_indirect_mapping_entry_phys_t *entry;
668c023f65Toomas Soome
669c023f65Toomas Soome	uint64_t last = vim->vim_phys->vimp_num_entries - 1;
670c023f65Toomas Soome	uint64_t base = 0;
671c023f65Toomas Soome
672c023f65Toomas Soome	/*
673c023f65Toomas Soome	 * We don't define these inside of the while loop because we use
674c023f65Toomas Soome	 * their value in the case that offset isn't in the mapping.
675c023f65Toomas Soome	 */
676c023f65Toomas Soome	uint64_t mid;
677c023f65Toomas Soome	int result;
678c023f65Toomas Soome
679c023f65Toomas Soome	while (last >= base) {
680c023f65Toomas Soome		mid = base + ((last - base) >> 1);
681c023f65Toomas Soome
682c023f65Toomas Soome		entry = vdev_indirect_mapping_entry(vim, mid);
683c023f65Toomas Soome		if (entry == NULL)
684c023f65Toomas Soome			break;
685c023f65Toomas Soome		result = dva_mapping_overlap_compare(&offset, entry);
686c023f65Toomas Soome
687c023f65Toomas Soome		if (result == 0) {
688c023f65Toomas Soome			break;
689c023f65Toomas Soome		} else if (result < 0) {
690c023f65Toomas Soome			last = mid - 1;
691c023f65Toomas Soome		} else {
692c023f65Toomas Soome			base = mid + 1;
693c023f65Toomas Soome		}
694c023f65Toomas Soome	}
695c023f65Toomas Soome	return (entry);
696c023f65Toomas Soome}
697c023f65Toomas Soome
698c023f65Toomas Soome/*
699c023f65Toomas Soome * Given an indirect vdev and an extent on that vdev, it duplicates the
700c023f65Toomas Soome * physical entries of the indirect mapping that correspond to the extent
701c023f65Toomas Soome * to a new array and returns a pointer to it. In addition, copied_entries
702c023f65Toomas Soome * is populated with the number of mapping entries that were duplicated.
703c023f65Toomas Soome *
704c023f65Toomas Soome * Finally, since we are doing an allocation, it is up to the caller to
705c023f65Toomas Soome * free the array allocated in this function.
706c023f65Toomas Soome */
707c023f65Toomas Soomevdev_indirect_mapping_entry_phys_t *
708c023f65Toomas Soomevdev_indirect_mapping_duplicate_adjacent_entries(vdev_t *vd, uint64_t offset,
709c023f65Toomas Soome    uint64_t asize, uint64_t *copied_entries)
710c023f65Toomas Soome{
711c023f65Toomas Soome	vdev_indirect_mapping_entry_phys_t *duplicate_mappings = NULL;
712c023f65Toomas Soome	vdev_indirect_mapping_t *vim = vd->v_mapping;
713c023f65Toomas Soome	uint64_t entries = 0;
714c023f65Toomas Soome
715c023f65Toomas Soome	vdev_indirect_mapping_entry_phys_t *first_mapping =
716c023f65Toomas Soome	    vdev_indirect_mapping_entry_for_offset(vim, offset);
717c023f65Toomas Soome	ASSERT3P(first_mapping, !=, NULL);
718c023f65Toomas Soome
719c023f65Toomas Soome	vdev_indirect_mapping_entry_phys_t *m = first_mapping;
720c023f65Toomas Soome	while (asize > 0) {
721c023f65Toomas Soome		uint64_t size = DVA_GET_ASIZE(&m->vimep_dst);
722c023f65Toomas Soome		uint64_t inner_offset = offset - DVA_MAPPING_GET_SRC_OFFSET(m);
723c023f65Toomas Soome		uint64_t inner_size = MIN(asize, size - inner_offset);
724c023f65Toomas Soome
725c023f65Toomas Soome		offset += inner_size;
726c023f65Toomas Soome		asize -= inner_size;
727c023f65Toomas Soome		entries++;
728c023f65Toomas Soome		m++;
729c023f65Toomas Soome	}
730c023f65Toomas Soome
731c023f65Toomas Soome	size_t copy_length = entries * sizeof (*first_mapping);
732c023f65Toomas Soome	duplicate_mappings = malloc(copy_length);
733c023f65Toomas Soome	if (duplicate_mappings != NULL)
734c023f65Toomas Soome		bcopy(first_mapping, duplicate_mappings, copy_length);
735c023f65Toomas Soome	else
736c023f65Toomas Soome		entries = 0;
737c023f65Toomas Soome
738c023f65Toomas Soome	*copied_entries = entries;
739c023f65Toomas Soome
740c023f65Toomas Soome	return (duplicate_mappings);
741c023f65Toomas Soome}
742c023f65Toomas Soome
743c023f65Toomas Soomestatic vdev_t *
744c023f65Toomas Soomevdev_lookup_top(spa_t *spa, uint64_t vdev)
745c023f65Toomas Soome{
746c023f65Toomas Soome	vdev_t *rvd;
747da9bf00Toomas Soome	vdev_list_t *vlist;
748c023f65Toomas Soome
749da9bf00Toomas Soome	vlist = &spa->spa_root_vdev->v_children;
750da9bf00Toomas Soome	STAILQ_FOREACH(rvd, vlist, v_childlink)
751c023f65Toomas Soome		if (rvd->v_id == vdev)
752c023f65Toomas Soome			break;
753c023f65Toomas Soome
754c023f65Toomas Soome	return (rvd);
755c023f65Toomas Soome}
756c023f65Toomas Soome
757c023f65Toomas Soome/*
758c023f65Toomas Soome * This is a callback for vdev_indirect_remap() which allocates an
759c023f65Toomas Soome * indirect_split_t for each split segment and adds it to iv_splits.
760c023f65Toomas Soome */
761c023f65Toomas Soomestatic void
762c023f65Toomas Soomevdev_indirect_gather_splits(uint64_t split_offset, vdev_t *vd, uint64_t offset,
763c023f65Toomas Soome    uint64_t size, void *arg)
764c023f65Toomas Soome{
765c023f65Toomas Soome	int n = 1;
766c023f65Toomas Soome	zio_t *zio = arg;
767c023f65Toomas Soome	indirect_vsd_t *iv = zio->io_vsd;
768c023f65Toomas Soome
769c023f65Toomas Soome	if (vd->v_read == vdev_indirect_read)
770c023f65Toomas Soome		return;
771c023f65Toomas Soome
772c023f65Toomas Soome	if (vd->v_read == vdev_mirror_read)
773c023f65Toomas Soome		n = vd->v_nchildren;
774c023f65Toomas Soome
775c023f65Toomas Soome	indirect_split_t *is =
776c023f65Toomas Soome	    malloc(offsetof(indirect_split_t, is_child[n]));
777c023f65Toomas Soome	if (is == NULL) {
778c023f65Toomas Soome		zio->io_error = ENOMEM;
779c023f65Toomas Soome		return;
780c023f65Toomas Soome	}
781c023f65Toomas Soome	bzero(is, offsetof(indirect_split_t, is_child[n]));
782c023f65Toomas Soome
783c023f65Toomas Soome	is->is_children = n;
784c023f65Toomas Soome	is->is_size = size;
785c023f65Toomas Soome	is->is_split_offset = split_offset;
786c023f65Toomas Soome	is->is_target_offset = offset;
787c023f65Toomas Soome	is->is_vdev = vd;
788c023f65Toomas Soome
789c023f65Toomas Soome	/*
790c023f65Toomas Soome	 * Note that we only consider multiple copies of the data for
791c023f65Toomas Soome	 * *mirror* vdevs.  We don't for "replacing" or "spare" vdevs, even
792c023f65Toomas Soome	 * though they use the same ops as mirror, because there's only one
793c023f65Toomas Soome	 * "good" copy under the replacing/spare.
794c023f65Toomas Soome	 */
795c023f65Toomas Soome	if (vd->v_read == vdev_mirror_read) {
796c023f65Toomas Soome		int i = 0;
797c023f65Toomas Soome		vdev_t *kid;
798c023f65Toomas Soome
799c023f65Toomas Soome		STAILQ_FOREACH(kid, &vd->v_children, v_childlink) {
800c023f65Toomas Soome			is->is_child[i++].ic_vdev = kid;
801c023f65Toomas Soome		}
802c023f65Toomas Soome	} else {
803c023f65Toomas Soome		is->is_child[0].ic_vdev = vd;
804c023f65Toomas Soome	}
805c023f65Toomas Soome
806c023f65Toomas Soome	list_insert_tail(&iv->iv_splits, is);
807c023f65Toomas Soome}
808c023f65Toomas Soome
809c023f65Toomas Soomestatic void
810c023f65Toomas Soomevdev_indirect_remap(vdev_t *vd, uint64_t offset, uint64_t asize, void *arg)
811c023f65Toomas Soome{
812c023f65Toomas Soome	list_t stack;
813da9bf00Toomas Soome	spa_t *spa = vd->v_spa;
814c023f65Toomas Soome	zio_t *zio = arg;
815042b560Toomas Soome	remap_segment_t *rs;
816c023f65Toomas Soome
817c023f65Toomas Soome	list_create(&stack, sizeof (remap_segment_t),
818c023f65Toomas Soome	    offsetof(remap_segment_t, rs_node));
819c023f65Toomas Soome
820042b560Toomas Soome	rs = rs_alloc(vd, offset, asize, 0);
821042b560Toomas Soome	if (rs == NULL) {
822042b560Toomas Soome		printf("vdev_indirect_remap: out of memory.\n");
823042b560Toomas Soome		zio->io_error = ENOMEM;
824042b560Toomas Soome	}
8256fd7fa3Toomas Soome	for (; rs != NULL; rs = list_remove_head(&stack)) {
826c023f65Toomas Soome		vdev_t *v = rs->rs_vd;
827c023f65Toomas Soome		uint64_t num_entries = 0;
828c023f65Toomas Soome		/* vdev_indirect_mapping_t *vim = v->v_mapping; */
829c023f65Toomas Soome		vdev_indirect_mapping_entry_phys_t *mapping =
830c023f65Toomas Soome		    vdev_indirect_mapping_duplicate_adjacent_entries(v,
831c023f65Toomas Soome		    rs->rs_offset, rs->rs_asize, &num_entries);
832c023f65Toomas Soome
833042b560Toomas Soome		if (num_entries == 0)
834042b560Toomas Soome			zio->io_error = ENOMEM;
835042b560Toomas Soome
836c023f65Toomas Soome		for (uint64_t i = 0; i < num_entries; i++) {
837c023f65Toomas Soome			vdev_indirect_mapping_entry_phys_t *m = &mapping[i];
838c023f65Toomas Soome			uint64_t size = DVA_GET_ASIZE(&m->vimep_dst);
839c023f65Toomas Soome			uint64_t dst_offset = DVA_GET_OFFSET(&m->vimep_dst);
840c023f65Toomas Soome			uint64_t dst_vdev = DVA_GET_VDEV(&m->vimep_dst);
841c023f65Toomas Soome			uint64_t inner_offset = rs->rs_offset -
842c023f65Toomas Soome			    DVA_MAPPING_GET_SRC_OFFSET(m);
843c023f65Toomas Soome			uint64_t inner_size =
844c023f65Toomas Soome			    MIN(rs->rs_asize, size - inner_offset);
845c023f65Toomas Soome			vdev_t *dst_v = vdev_lookup_top(spa, dst_vdev);
846c023f65Toomas Soome
847c023f65Toomas Soome			if (dst_v->v_read == vdev_indirect_read) {
848042b560Toomas Soome				remap_segment_t *o;
849042b560Toomas Soome
850042b560Toomas Soome				o = rs_alloc(dst_v, dst_offset + inner_offset,
851042b560Toomas Soome				    inner_size, rs->rs_split_offset);
852042b560Toomas Soome				if (o == NULL) {
853042b560Toomas Soome					printf("vdev_indirect_remap: "
854042b560Toomas Soome					    "out of memory.\n");
855042b560Toomas Soome					zio->io_error = ENOMEM;
856042b560Toomas Soome					break;
857042b560Toomas Soome				}
858042b560Toomas Soome
859042b560Toomas Soome				list_insert_head(&stack, o);
860c023f65Toomas Soome			}
861c023f65Toomas Soome			vdev_indirect_gather_splits(rs->rs_split_offset, dst_v,
862c023f65Toomas Soome			    dst_offset + inner_offset,
863c023f65Toomas Soome			    inner_size, arg);
864c023f65Toomas Soome
865c023f65Toomas Soome			/*
866c023f65Toomas Soome			 * vdev_indirect_gather_splits can have memory
867c023f65Toomas Soome			 * allocation error, we can not recover from it.
868c023f65Toomas Soome			 */
869c023f65Toomas Soome			if (zio->io_error != 0)
870c023f65Toomas Soome				break;
871c023f65Toomas Soome			rs->rs_offset += inner_size;
872c023f65Toomas Soome			rs->rs_asize -= inner_size;
873c023f65Toomas Soome			rs->rs_split_offset += inner_size;
874c023f65Toomas Soome		}
875c023f65Toomas Soome
876c023f65Toomas Soome		free(mapping);
877c023f65Toomas Soome		free(rs);
878c023f65Toomas Soome		if (zio->io_error != 0)
879c023f65Toomas Soome			break;
880c023f65Toomas Soome	}
881c023f65Toomas Soome
882c023f65Toomas Soome	list_destroy(&stack);
883c023f65Toomas Soome}
884c023f65Toomas Soome
885c023f65Toomas Soomestatic void
886c023f65Toomas Soomevdev_indirect_map_free(zio_t *zio)
887c023f65Toomas Soome{
888c023f65Toomas Soome	indirect_vsd_t *iv = zio->io_vsd;
889c023f65Toomas Soome	indirect_split_t *is;
890c023f65Toomas Soome
891c023f65Toomas Soome	while ((is = list_head(&iv->iv_splits)) != NULL) {
892c023f65Toomas Soome		for (int c = 0; c < is->is_children; c++) {
893c023f65Toomas Soome			indirect_child_t *ic = &is->is_child[c];
894c023f65Toomas Soome			free(ic->ic_data);
895c023f65Toomas Soome		}
896c023f65Toomas Soome		list_remove(&iv->iv_splits, is);
897c023f65Toomas Soome		free(is);
898c023f65Toomas Soome	}
899c023f65Toomas Soome	free(iv);
900c023f65Toomas Soome}
901c023f65Toomas Soome
902c023f65Toomas Soomestatic int
903c023f65Toomas Soomevdev_indirect_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
904c023f65Toomas Soome    off_t offset, size_t bytes)
905c023f65Toomas Soome{
906da9bf00Toomas Soome	zio_t zio;
907da9bf00Toomas Soome	spa_t *spa = vdev->v_spa;
908da9bf00Toomas Soome	indirect_vsd_t *iv;
909c023f65Toomas Soome	indirect_split_t *first;
910c023f65Toomas Soome	int rc = EIO;
911c023f65Toomas Soome
912da9bf00Toomas Soome	iv = calloc(1, sizeof (*iv));
913c023f65Toomas Soome	if (iv == NULL)
914c023f65Toomas Soome		return (ENOMEM);
915c023f65Toomas Soome
916c023f65Toomas Soome	list_create(&iv->iv_splits,
917c023f65Toomas Soome	    sizeof (indirect_split_t), offsetof(indirect_split_t, is_node));
918c023f65Toomas Soome
919da9bf00Toomas Soome	bzero(&zio, sizeof (zio));
920c023f65Toomas Soome	zio.io_spa = spa;
921c023f65Toomas Soome	zio.io_bp = (blkptr_t *)bp;
922c023f65Toomas Soome	zio.io_data = buf;
923c023f65Toomas Soome	zio.io_size = bytes;
924c023f65Toomas Soome	zio.io_offset = offset;
925c023f65Toomas Soome	zio.io_vd = vdev;
926c023f65Toomas Soome	zio.io_vsd = iv;
927c023f65Toomas Soome
928c023f65Toomas Soome	if (vdev->v_mapping == NULL) {
929c023f65Toomas Soome		vdev_indirect_config_t *vic;
930c023f65Toomas Soome
931c023f65Toomas Soome		vic = &vdev->vdev_indirect_config;
932c023f65Toomas Soome		vdev->v_mapping = vdev_indirect_mapping_open(spa,
933c023f65Toomas Soome		    &spa->spa_mos, vic->vic_mapping_object);
934c023f65Toomas Soome	}
935c023f65Toomas Soome
936c023f65Toomas Soome	vdev_indirect_remap(vdev, offset, bytes, &zio);
937c023f65Toomas Soome	if (zio.io_error != 0)
938c023f65Toomas Soome		return (zio.io_error);
939c023f65Toomas Soome
940c023f65Toomas Soome	first = list_head(&iv->iv_splits);
941c023f65Toomas Soome	if (first->is_size == zio.io_size) {
942c023f65Toomas Soome		/*
943c023f65Toomas Soome		 * This is not a split block; we are pointing to the entire
944c023f65Toomas Soome		 * data, which will checksum the same as the original data.
945c023f65Toomas Soome		 * Pass the BP down so that the child i/o can verify the
946c023f65Toomas Soome		 * checksum, and try a different location if available
947c023f65Toomas Soome		 * (e.g. on a mirror).
948c023f65Toomas Soome		 *
949c023f65Toomas Soome		 * While this special case could be handled the same as the
950c023f65Toomas Soome		 * general (split block) case, doing it this way ensures
951c023f65Toomas Soome		 * that the vast majority of blocks on indirect vdevs
952c023f65Toomas Soome		 * (which are not split) are handled identically to blocks
953c023f65Toomas Soome		 * on non-indirect vdevs.  This allows us to be less strict
954c023f65Toomas Soome		 * about performance in the general (but rare) case.
955c023f65Toomas Soome		 */
956c023f65Toomas Soome		rc = first->is_vdev->v_read(first->is_vdev, zio.io_bp,
957c023f65Toomas Soome		    zio.io_data, first->is_target_offset, bytes);
958c023f65Toomas Soome	} else {
959c023f65Toomas Soome		iv->iv_split_block = B_TRUE;
960c023f65Toomas Soome		/*
961c023f65Toomas Soome		 * Read one copy of each split segment, from the
962c023f65Toomas Soome		 * top-level vdev.  Since we don't know the
963c023f65Toomas Soome		 * checksum of each split individually, the child
964c023f65Toomas Soome		 * zio can't ensure that we get the right data.
965c023f65Toomas Soome		 * E.g. if it's a mirror, it will just read from a
966c023f65Toomas Soome		 * random (healthy) leaf vdev.  We have to verify
967c023f65Toomas Soome		 * the checksum in vdev_indirect_io_done().
968c023f65Toomas Soome		 */
969c023f65Toomas Soome		for (indirect_split_t *is = list_head(&iv->iv_splits);
970c023f65Toomas Soome		    is != NULL; is = list_next(&iv->iv_splits, is)) {
971c023f65Toomas Soome			char *ptr = zio.io_data;
972c023f65Toomas Soome
973c023f65Toomas Soome			rc = is->is_vdev->v_read(is->is_vdev, zio.io_bp,
974c023f65Toomas Soome			    ptr + is->is_split_offset, is->is_target_offset,
975c023f65Toomas Soome			    is->is_size);
976c023f65Toomas Soome		}
977c023f65Toomas Soome		if (zio_checksum_verify(spa, zio.io_bp, zio.io_data))
978c023f65Toomas Soome			rc = ECKSUM;
979c023f65Toomas Soome		else
980c023f65Toomas Soome			rc = 0;
981c023f65Toomas Soome	}
982c023f65Toomas Soome
983c023f65Toomas Soome	vdev_indirect_map_free(&zio);
984c023f65Toomas Soome	if (rc == 0)
985c023f65Toomas Soome		rc = zio.io_error;
986c023f65Toomas Soome
987c023f65Toomas Soome	return (rc);
988c023f65Toomas Soome}
989c023f65Toomas Soome
990199767fToomas Soomestatic int
991199767fToomas Soomevdev_disk_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
992199767fToomas Soome    off_t offset, size_t bytes)
993199767fToomas Soome{
994199767fToomas Soome
995199767fToomas Soome	return (vdev_read_phys(vdev, bp, buf,
9966fd7fa3Toomas Soome	    offset + VDEV_LABEL_START_SIZE, bytes));
997199767fToomas Soome}
998199767fToomas Soome
9999117d17Toomas Soomestatic int
10009117d17Toomas Soomevdev_missing_read(vdev_t *vdev __unused, const blkptr_t *bp __unused,
10019117d17Toomas Soome    void *buf __unused, off_t offset __unused, size_t bytes __unused)
10029117d17Toomas Soome{
10039117d17Toomas Soome
10049117d17Toomas Soome	return (ENOTSUP);
10059117d17Toomas Soome}
1006199767fToomas Soome
1007199767fToomas Soomestatic int
1008199767fToomas Soomevdev_mirror_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
1009199767fToomas Soome    off_t offset, size_t bytes)
1010199767fToomas Soome{
1011199767fToomas Soome	vdev_t *kid;
1012199767fToomas Soome	int rc;
1013199767fToomas Soome
1014199767fToomas Soome	rc = EIO;
1015199767fToomas Soome	STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
1016199767fToomas Soome		if (kid->v_state != VDEV_STATE_HEALTHY)
1017199767fToomas Soome			continue;
1018199767fToomas Soome		rc = kid->v_read(kid, bp, buf, offset, bytes);
1019199767fToomas Soome		if (!rc)
1020199767fToomas Soome			return (0);
1021199767fToomas Soome	}
1022199767fToomas Soome
1023199767fToomas Soome	return (rc);
1024199767fToomas Soome}
1025199767fToomas Soome
1026199767fToomas Soomestatic int
1027199767fToomas Soomevdev_replacing_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
1028199767fToomas Soome    off_t offset, size_t bytes)
1029199767fToomas Soome{
1030199767fToomas Soome	vdev_t *kid;
1031199767fToomas Soome
1032199767fToomas Soome	/*
1033199767fToomas Soome	 * Here we should have two kids:
1034199767fToomas Soome	 * First one which is the one we are replacing and we can trust
1035199767fToomas Soome	 * only this one to have valid data, but it might not be present.
1036199767fToomas Soome	 * Second one is that one we are replacing with. It is most likely
1037199767fToomas Soome	 * healthy, but we can't trust it has needed data, so we won't use it.
1038199767fToomas Soome	 */
1039199767fToomas Soome	kid = STAILQ_FIRST(&vdev->v_children);
1040199767fToomas Soome	if (kid == NULL)
1041199767fToomas Soome		return (EIO);
1042199767fToomas Soome	if (kid->v_state != VDEV_STATE_HEALTHY)
1043199767fToomas Soome		return (EIO);
1044199767fToomas Soome	return (kid->v_read(kid, bp, buf, offset, bytes));
1045199767fToomas Soome}
1046199767fToomas Soome
1047199767fToomas Soomestatic vdev_t *
1048199767fToomas Soomevdev_find(uint64_t guid)
1049199767fToomas Soome{
1050199767fToomas Soome	vdev_t *vdev;
1051199767fToomas Soome
1052199767fToomas Soome	STAILQ_FOREACH(vdev, &zfs_vdevs, v_alllink)
1053199767fToomas Soome		if (vdev->v_guid == guid)
1054199767fToomas Soome			return (vdev);
1055199767fToomas Soome
1056199767fToomas Soome	return (0);
1057199767fToomas Soome}
1058199767fToomas Soome
1059199767fToomas Soomestatic vdev_t *
1060199767fToomas Soomevdev_create(uint64_t guid, vdev_read_t *vdev_read)
1061199767fToomas Soome{
1062199767fToomas Soome	vdev_t *vdev;
1063c023f65Toomas Soome	vdev_indirect_config_t *vic;
1064199767fToomas Soome
1065da9bf00Toomas Soome	vdev = calloc(1, sizeof (vdev_t));
1066da9bf00Toomas Soome	if (vdev != NULL) {
1067da9bf00Toomas Soome		STAILQ_INIT(&vdev->v_children);
1068da9bf00Toomas Soome		vdev->v_guid = guid;
1069da9bf00Toomas Soome		vdev->v_read = vdev_read;
1070c023f65Toomas Soome
1071da9bf00Toomas Soome		/*
1072da9bf00Toomas Soome		 * root vdev has no read function, we use this fact to
1073da9bf00Toomas Soome		 * skip setting up data we do not need for root vdev.
1074da9bf00Toomas Soome		 * We only point root vdev from spa.
1075da9bf00Toomas Soome		 */
1076da9bf00Toomas Soome		if (vdev_read != NULL) {
1077da9bf00Toomas Soome			vic = &vdev->vdev_indirect_config;
1078da9bf00Toomas Soome			vic->vic_prev_indirect_vdev = UINT64_MAX;
1079da9bf00Toomas Soome			STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink);
1080da9bf00Toomas Soome		}
1081da9bf00Toomas Soome	}
1082199767fToomas Soome
1083199767fToomas Soome	return (vdev);
1084199767fToomas Soome}
1085199767fToomas Soome
1086da9bf00Toomas Soomestatic void
1087da9bf00Toomas Soomevdev_set_initial_state(vdev_t *vdev, const unsigned char *nvlist)
1088199767fToomas Soome{
1089199767fToomas Soome	uint64_t is_offline, is_faulted, is_degraded, is_removed, isnt_present;
109067806cdToomas Soome	uint64_t is_log;
1091199767fToomas Soome
1092da9bf00Toomas Soome	is_offline = is_removed = is_faulted = is_degraded = isnt_present = 0;
1093da9bf00Toomas Soome	is_log = 0;
1094da9bf00Toomas Soome	(void) nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, NULL,
1095ce5f7fbToomas Soome	    &is_offline, NULL);
1096da9bf00Toomas Soome	(void) nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, NULL,
1097ce5f7fbToomas Soome	    &is_removed, NULL);
1098da9bf00Toomas Soome	(void) nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, NULL,
1099ce5f7fbToomas Soome	    &is_faulted, NULL);
1100da9bf00Toomas Soome	(void) nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64,
1101ce5f7fbToomas Soome	    NULL, &is_degraded, NULL);
1102da9bf00Toomas Soome	(void) nvlist_find(nvlist, ZPOOL_CONFIG_NOT_PRESENT, DATA_TYPE_UINT64,
1103ce5f7fbToomas Soome	    NULL, &isnt_present, NULL);
1104da9bf00Toomas Soome	(void) nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64, NULL,
1105ce5f7fbToomas Soome	    &is_log, NULL);
1106da9bf00Toomas Soome
1107da9bf00Toomas Soome	if (is_offline != 0)
1108da9bf00Toomas Soome		vdev->v_state = VDEV_STATE_OFFLINE;
1109da9bf00Toomas Soome	else if (is_removed != 0)
1110da9bf00Toomas Soome		vdev->v_state = VDEV_STATE_REMOVED;
1111da9bf00Toomas Soome	else if (is_faulted != 0)
1112da9bf00Toomas Soome		vdev->v_state = VDEV_STATE_FAULTED;
1113da9bf00Toomas Soome	else if (is_degraded != 0)
1114da9bf00Toomas Soome		vdev->v_state = VDEV_STATE_DEGRADED;
1115da9bf00Toomas Soome	else if (isnt_present != 0)
1116da9bf00Toomas Soome		vdev->v_state = VDEV_STATE_CANT_OPEN;
1117da9bf00Toomas Soome
1118da9bf00Toomas Soome	vdev->v_islog = is_log != 0;
1119da9bf00Toomas Soome}
1120da9bf00Toomas Soome
1121da9bf00Toomas Soomestatic int
1122da9bf00Toomas Soomevdev_init(uint64_t guid, const unsigned char *nvlist, vdev_t **vdevp)
1123da9bf00Toomas Soome{
1124da9bf00Toomas Soome	uint64_t id, ashift, asize, nparity;
1125da9bf00Toomas Soome	const char *path;
1126da9bf00Toomas Soome	const char *type;
1127ce5f7fbToomas Soome	int len, pathlen;
1128ce5f7fbToomas Soome	char *name;
1129da9bf00Toomas Soome	vdev_t *vdev;
1130da9bf00Toomas Soome
1131ce5f7fbToomas Soome	if (nvlist_find(nvlist, ZPOOL_CONFIG_ID, DATA_TYPE_UINT64, NULL, &id,
1132ce5f7fbToomas Soome	    NULL) ||
1133edb3504Toomas Soome	    nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING,
1134ce5f7fbToomas Soome	    NULL, &type, &len)) {
1135199767fToomas Soome		return (ENOENT);
1136199767fToomas Soome	}
1137199767fToomas Soome
1138ce5f7fbToomas Soome	if (memcmp(type, VDEV_TYPE_MIRROR, len) != 0 &&
1139ce5f7fbToomas Soome	    memcmp(type, VDEV_TYPE_DISK, len) != 0 &&
1140199767fToomas Soome#ifdef ZFS_TEST
1141ce5f7fbToomas Soome	    memcmp(type, VDEV_TYPE_FILE, len) != 0 &&
1142199767fToomas Soome#endif
1143ce5f7fbToomas Soome	    memcmp(type, VDEV_TYPE_RAIDZ, len) != 0 &&
1144ce5f7fbToomas Soome	    memcmp(type, VDEV_TYPE_INDIRECT, len) != 0 &&
11459117d17Toomas Soome	    memcmp(type, VDEV_TYPE_REPLACING, len) != 0 &&
11469117d17Toomas Soome	    memcmp(type, VDEV_TYPE_HOLE, len) != 0) {
11476fd7fa3Toomas Soome		printf("ZFS: can only boot from disk, mirror, raidz1, "
11489117d17Toomas Soome		    "raidz2 and raidz3 vdevs, got: %.*s\n", len, type);
1149199767fToomas Soome		return (EIO);
1150199767fToomas Soome	}
1151199767fToomas Soome
1152ce5f7fbToomas Soome	if (memcmp(type, VDEV_TYPE_MIRROR, len) == 0)
1153da9bf00Toomas Soome		vdev = vdev_create(guid, vdev_mirror_read);
1154ce5f7fbToomas Soome	else if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0)
1155da9bf00Toomas Soome		vdev = vdev_create(guid, vdev_raidz_read);
1156ce5f7fbToomas Soome	else if (memcmp(type, VDEV_TYPE_REPLACING, len) == 0)
1157da9bf00Toomas Soome		vdev = vdev_create(guid, vdev_replacing_read);
1158ce5f7fbToomas Soome	else if (memcmp(type, VDEV_TYPE_INDIRECT, len) == 0) {
1159da9bf00Toomas Soome		vdev_indirect_config_t *vic;
1160199767fToomas Soome
1161da9bf00Toomas Soome		vdev = vdev_create(guid, vdev_indirect_read);
1162da9bf00Toomas Soome		if (vdev != NULL) {
1163c023f65Toomas Soome			vdev->v_state = VDEV_STATE_HEALTHY;
1164c023f65Toomas Soome			vic = &vdev->vdev_indirect_config;
1165c023f65Toomas Soome
1166c023f65Toomas Soome			nvlist_find(nvlist,
1167da9bf00Toomas Soome			    ZPOOL_CONFIG_INDIRECT_OBJECT,
1168da9bf00Toomas Soome			    DATA_TYPE_UINT64,
1169ce5f7fbToomas Soome			    NULL, &vic->vic_mapping_object, NULL);
1170c023f65Toomas Soome			nvlist_find(nvlist,
1171da9bf00Toomas Soome			    ZPOOL_CONFIG_INDIRECT_BIRTHS,
1172da9bf00Toomas Soome			    DATA_TYPE_UINT64,
1173ce5f7fbToomas Soome			    NULL, &vic->vic_births_object, NULL);
1174c023f65Toomas Soome			nvlist_find(nvlist,
1175da9bf00Toomas Soome			    ZPOOL_CONFIG_PREV_INDIRECT_VDEV,
1176da9bf00Toomas Soome			    DATA_TYPE_UINT64,
1177ce5f7fbToomas Soome			    NULL, &vic->vic_prev_indirect_vdev, NULL);
11784c2b14fToomas Soome		}
11799117d17Toomas Soome	} else if (memcmp(type, VDEV_TYPE_HOLE, len) == 0) {
11809117d17Toomas Soome		vdev = vdev_create(guid, vdev_missing_read);
1181da9bf00Toomas Soome	} else {
1182da9bf00Toomas Soome		vdev = vdev_create(guid, vdev_disk_read);
1183da9bf00Toomas Soome	}
1184da9bf00Toomas Soome
1185da9bf00Toomas Soome	if (vdev == NULL)
1186da9bf00Toomas Soome		return (ENOMEM);
1187da9bf00Toomas Soome
1188da9bf00Toomas Soome	vdev_set_initial_state(vdev, nvlist);
1189da9bf00Toomas Soome	vdev->v_id = id;
1190da9bf00Toomas Soome	if (nvlist_find(nvlist, ZPOOL_CONFIG_ASHIFT,
1191ce5f7fbToomas Soome	    DATA_TYPE_UINT64, NULL, &ashift, NULL) == 0)
1192da9bf00Toomas Soome		vdev->v_ashift = ashift;
1193da9bf00Toomas Soome
1194da9bf00Toomas Soome	if (nvlist_find(nvlist, ZPOOL_CONFIG_ASIZE,
1195ce5f7fbToomas Soome	    DATA_TYPE_UINT64, NULL, &asize, NULL) == 0) {
1196da9bf00Toomas Soome		vdev->v_psize = asize +
1197da9bf00Toomas Soome		    VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
1198da9bf00Toomas Soome	}
1199da9bf00Toomas Soome
1200da9bf00Toomas Soome	if (nvlist_find(nvlist, ZPOOL_CONFIG_NPARITY,
1201ce5f7fbToomas Soome	    DATA_TYPE_UINT64, NULL, &nparity, NULL) == 0)
1202da9bf00Toomas Soome		vdev->v_nparity = nparity;
1203da9bf00Toomas Soome
1204da9bf00Toomas Soome	if (nvlist_find(nvlist, ZPOOL_CONFIG_PATH,
1205ce5f7fbToomas Soome	    DATA_TYPE_STRING, NULL, &path, &pathlen) == 0) {
1206ce5f7fbToomas Soome		char prefix[] = "/dev/dsk/";
1207ce5f7fbToomas Soome
1208ce5f7fbToomas Soome		len = strlen(prefix);
1209ce5f7fbToomas Soome		if (len < pathlen && memcmp(path, prefix, len) == 0) {
1210ce5f7fbToomas Soome			path += len;
1211ce5f7fbToomas Soome			pathlen -= len;
1212ce5f7fbToomas Soome		}
1213ce5f7fbToomas Soome		name = malloc(pathlen + 1);
1214ce5f7fbToomas Soome		if (name != NULL) {
1215ce5f7fbToomas Soome			bcopy(path, name, pathlen);
1216ce5f7fbToomas Soome			name[pathlen] = '\0';
1217ce5f7fbToomas Soome		}
1218ce5f7fbToomas Soome		vdev->v_name = name;
1219ce5f7fbToomas Soome		vdev->v_phys_path = NULL;
1220ce5f7fbToomas Soome		vdev->v_devid = NULL;
1221da9bf00Toomas Soome		if (nvlist_find(nvlist, ZPOOL_CONFIG_PHYS_PATH,
1222ce5f7fbToomas Soome		    DATA_TYPE_STRING, NULL, &path, &pathlen) == 0) {
1223ce5f7fbToomas Soome			name = malloc(pathlen + 1);
1224ce5f7fbToomas Soome			if (name != NULL) {
1225ce5f7fbToomas Soome				bcopy(path, name, pathlen);
1226ce5f7fbToomas Soome				name[pathlen] = '\0';
1227ce5f7fbToomas Soome				vdev->v_phys_path = name;
1228ce5f7fbToomas Soome			}
1229edb3504Toomas Soome		}
1230da9bf00Toomas Soome		if (nvlist_find(nvlist, ZPOOL_CONFIG_DEVID,
1231ce5f7fbToomas Soome		    DATA_TYPE_STRING, NULL, &path, &pathlen) == 0) {
1232ce5f7fbToomas Soome			name = malloc(pathlen + 1);
1233ce5f7fbToomas Soome			if (name != NULL) {
1234ce5f7fbToomas Soome				bcopy(path, name, pathlen);
1235ce5f7fbToomas Soome				name[pathlen] = '\0';
1236ce5f7fbToomas Soome				vdev->v_devid = name;
1237ce5f7fbToomas Soome			}
1238199767fToomas Soome		}
1239199767fToomas Soome	} else {
1240da9bf00Toomas Soome		name = NULL;
1241ce5f7fbToomas Soome		if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0) {
1242da9bf00Toomas Soome			if (vdev->v_nparity < 1 ||
1243da9bf00Toomas Soome			    vdev->v_nparity > 3) {
1244da9bf00Toomas Soome				printf("ZFS: invalid raidz parity: %d\n",
1245da9bf00Toomas Soome				    vdev->v_nparity);
1246da9bf00Toomas Soome				return (EIO);
1247da9bf00Toomas Soome			}
1248ce5f7fbToomas Soome			(void) asprintf(&name, "%.*s%d-%" PRIu64, len, type,
1249da9bf00Toomas Soome			    vdev->v_nparity, id);
1250da9bf00Toomas Soome		} else {
1251ce5f7fbToomas Soome			(void) asprintf(&name, "%.*s-%" PRIu64, len, type, id);
1252da9bf00Toomas Soome		}
1253da9bf00Toomas Soome		vdev->v_name = name;
1254da9bf00Toomas Soome	}
1255da9bf00Toomas Soome	*vdevp = vdev;
1256da9bf00Toomas Soome	return (0);
1257da9bf00Toomas Soome}
1258da9bf00Toomas Soome
1259da9bf00Toomas Soome/*
1260da9bf00Toomas Soome * Find slot for vdev. We return either NULL to signal to use
1261da9bf00Toomas Soome * STAILQ_INSERT_HEAD, or we return link element to be used with
1262da9bf00Toomas Soome * STAILQ_INSERT_AFTER.
1263da9bf00Toomas Soome */
1264da9bf00Toomas Soomestatic vdev_t *
1265da9bf00Toomas Soomevdev_find_previous(vdev_t *top_vdev, vdev_t *vdev)
1266da9bf00Toomas Soome{
1267da9bf00Toomas Soome	vdev_t *v, *previous;
1268da9bf00Toomas Soome
1269da9bf00Toomas Soome	if (STAILQ_EMPTY(&top_vdev->v_children))
1270da9bf00Toomas Soome		return (NULL);
1271da9bf00Toomas Soome
1272da9bf00Toomas Soome	previous = NULL;
1273da9bf00Toomas Soome	STAILQ_FOREACH(v, &top_vdev->v_children, v_childlink) {
1274da9bf00Toomas Soome		if (v->v_id > vdev->v_id)
1275da9bf00Toomas Soome			return (previous);
1276da9bf00Toomas Soome
1277da9bf00Toomas Soome		if (v->v_id == vdev->v_id)
1278da9bf00Toomas Soome			return (v);
1279da9bf00Toomas Soome
1280da9bf00Toomas Soome		if (v->v_id < vdev->v_id)
1281da9bf00Toomas Soome			previous = v;
1282199767fToomas Soome	}
1283da9bf00Toomas Soome	return (previous);
1284da9bf00Toomas Soome}
1285da9bf00Toomas Soome
1286da9bf00Toomas Soomestatic size_t
1287da9bf00Toomas Soomevdev_child_count(vdev_t *vdev)
1288da9bf00Toomas Soome{
1289da9bf00Toomas Soome	vdev_t *v;
1290da9bf00Toomas Soome	size_t count;
1291da9bf00Toomas Soome
1292da9bf00Toomas Soome	count = 0;
1293da9bf00Toomas Soome	STAILQ_FOREACH(v, &vdev->v_children, v_childlink) {
1294da9bf00Toomas Soome		count++;
1295da9bf00Toomas Soome	}
1296da9bf00Toomas Soome	return (count);
1297da9bf00Toomas Soome}
1298da9bf00Toomas Soome
1299da9bf00Toomas Soome/*
1300da9bf00Toomas Soome * Insert vdev into top_vdev children list. List is ordered by v_id.
1301da9bf00Toomas Soome */
1302da9bf00Toomas Soomestatic void
1303da9bf00Toomas Soomevdev_insert(vdev_t *top_vdev, vdev_t *vdev)
1304da9bf00Toomas Soome{
1305da9bf00Toomas Soome	vdev_t *previous;
1306da9bf00Toomas Soome	size_t count;
1307da9bf00Toomas Soome
1308da9bf00Toomas Soome	/*
1309da9bf00Toomas Soome	 * The top level vdev can appear in random order, depending how
1310da9bf00Toomas Soome	 * the firmware is presenting the disk devices.
1311da9bf00Toomas Soome	 * However, we will insert vdev to create list ordered by v_id,
1312da9bf00Toomas Soome	 * so we can use either STAILQ_INSERT_HEAD or STAILQ_INSERT_AFTER
1313da9bf00Toomas Soome	 * as STAILQ does not have insert before.
1314da9bf00Toomas Soome	 */
1315da9bf00Toomas Soome	previous = vdev_find_previous(top_vdev, vdev);
1316199767fToomas Soome
1317da9bf00Toomas Soome	if (previous == NULL) {
1318da9bf00Toomas Soome		STAILQ_INSERT_HEAD(&top_vdev->v_children, vdev, v_childlink);
1319da9bf00Toomas Soome	} else if (previous->v_id == vdev->v_id) {
1320199767fToomas Soome		/*
1321da9bf00Toomas Soome		 * This vdev was configured from label config,
1322da9bf00Toomas Soome		 * do not insert duplicate.
1323199767fToomas Soome		 */
1324da9bf00Toomas Soome		return;
1325da9bf00Toomas Soome	} else {
1326da9bf00Toomas Soome		STAILQ_INSERT_AFTER(&top_vdev->v_children, previous, vdev,
1327da9bf00Toomas Soome		    v_childlink);
1328da9bf00Toomas Soome	}
1329da9bf00Toomas Soome
1330da9bf00Toomas Soome	count = vdev_child_count(top_vdev);
1331da9bf00Toomas Soome	if (top_vdev->v_nchildren < count)
1332da9bf00Toomas Soome		top_vdev->v_nchildren = count;
1333da9bf00Toomas Soome}
1334da9bf00Toomas Soome
1335da9bf00Toomas Soomestatic int
1336da9bf00Toomas Soomevdev_from_nvlist(spa_t *spa, uint64_t top_guid, const unsigned char *nvlist)
1337da9bf00Toomas Soome{
1338da9bf00Toomas Soome	vdev_t *top_vdev, *vdev;
1339da9bf00Toomas Soome	const unsigned char *kids;
1340da9bf00Toomas Soome	int rc, nkids;
1341da9bf00Toomas Soome
1342da9bf00Toomas Soome	/* Get top vdev. */
1343da9bf00Toomas Soome	top_vdev = vdev_find(top_guid);
1344da9bf00Toomas Soome	if (top_vdev == NULL) {
1345da9bf00Toomas Soome		rc = vdev_init(top_guid, nvlist, &top_vdev);
1346da9bf00Toomas Soome		if (rc != 0)
1347da9bf00Toomas Soome			return (rc);
1348da9bf00Toomas Soome		top_vdev->v_spa = spa;
1349da9bf00Toomas Soome		top_vdev->v_top = top_vdev;
1350da9bf00Toomas Soome		vdev_insert(spa->spa_root_vdev, top_vdev);
1351199767fToomas Soome	}
1352199767fToomas Soome
1353da9bf00Toomas Soome	/* Add children if there are any. */
1354edb3504Toomas Soome	rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY,
1355ce5f7fbToomas Soome	    &nkids, &kids, NULL);
1356199767fToomas Soome	if (rc == 0) {
1357da9bf00Toomas Soome		for (int i = 0; i < nkids; i++) {
1358da9bf00Toomas Soome			uint64_t guid;
1359da9bf00Toomas Soome
1360da9bf00Toomas Soome			rc = nvlist_find(kids, ZPOOL_CONFIG_GUID,
1361ce5f7fbToomas Soome			    DATA_TYPE_UINT64, NULL, &guid, NULL);
1362da9bf00Toomas Soome			if (rc != 0)
1363da9bf00Toomas Soome				return (rc);
1364da9bf00Toomas Soome			rc = vdev_init(guid, kids, &vdev);
1365da9bf00Toomas Soome			if (rc != 0)
1366199767fToomas Soome				return (rc);
1367da9bf00Toomas Soome
1368da9bf00Toomas Soome			vdev->v_spa = spa;
1369da9bf00Toomas Soome			vdev->v_top = top_vdev;
1370da9bf00Toomas Soome			vdev_insert(top_vdev, vdev);
1371da9bf00Toomas Soome
1372199767fToomas Soome			kids = nvlist_next(kids);
1373199767fToomas Soome		}
1374199767fToomas Soome	} else {
1375da9bf00Toomas Soome		/*
1376da9bf00Toomas Soome		 * When there are no children, nvlist_find() does return
1377da9bf00Toomas Soome		 * error, reset it because leaf devices have no children.
1378da9bf00Toomas Soome		 */
1379da9bf00Toomas Soome		rc = 0;
1380199767fToomas Soome	}
1381199767fToomas Soome
1382da9bf00Toomas Soome	return (rc);
1383da9bf00Toomas Soome}
1384da9bf00Toomas Soome
1385da9bf00Toomas Soomestatic int
1386da9bf00Toomas Soomevdev_init_from_label(spa_t *spa, const unsigned char *nvlist)
1387da9bf00Toomas Soome{
1388da9bf00Toomas Soome	uint64_t pool_guid, top_guid;
1389da9bf00Toomas Soome	const unsigned char *vdevs;
1390da9bf00Toomas Soome
1391da9bf00Toomas Soome	if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64,
1392ce5f7fbToomas Soome	    NULL, &pool_guid, NULL) ||
1393da9bf00Toomas Soome	    nvlist_find(nvlist, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64,
1394ce5f7fbToomas Soome	    NULL, &top_guid, NULL) ||
1395da9bf00Toomas Soome	    nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
1396