2fa9e406ahrens * CDDL HEADER START
3fa9e406ahrens *
4fa9e406ahrens * The contents of this file are subject to the terms of the
5ea8dc4beschrock * Common Development and Distribution License (the "License").
6ea8dc4beschrock * You may not use this file except in compliance with the License.
7fa9e406ahrens *
8fa9e406ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e406ahrens * or http://www.opensolaris.org/os/licensing.
10fa9e406ahrens * See the License for the specific language governing permissions
11fa9e406ahrens * and limitations under the License.
12fa9e406ahrens *
13fa9e406ahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e406ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e406ahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e406ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e406ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e406ahrens *
19fa9e406ahrens * CDDL HEADER END
20fa9e406ahrens */
21078266aMarcel Telka
23f9af39bGeorge Wilson * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
245dafeeaPavel Zakharov * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
25bd0f709Andrew Stormont * Copyright 2015 RackTop Systems.
2646d46cdYuri Pankov * Copyright 2017 Nexenta Systems, Inc.
27fa9e406ahrens */
30fa9e406ahrens * Pool import support functions.
31fa9e406ahrens *
32fa9e406ahrens * To import a pool, we rely on reading the configuration information from the
33fa9e406ahrens * ZFS label of each device.  If we successfully read the label, then we
34fa9e406ahrens * organize the configuration information in the following hierarchy:
35fa9e406ahrens *
36ddfe901sara hartse *	pool guid -> toplevel vdev guid -> label txg
37fa9e406ahrens *
38fa9e406ahrens * Duplicate entries matching this same tuple will be discarded.  Once we have
39fa9e406ahrens * examined every device, we pick the best label txg config for each toplevel
40fa9e406ahrens * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
41fa9e406ahrens * update any paths that have changed.  Finally, we attempt to import the pool
42fa9e406ahrens * using our derived config, and record the results.
43fa9e406ahrens */
454f67d75Eric Taylor#include <ctype.h>
46fa9e406ahrens#include <devid.h>
47fa9e406ahrens#include <dirent.h>
48fa9e406ahrens#include <errno.h>
49fa9e406ahrens#include <libintl.h>
504f67d75Eric Taylor#include <stddef.h>
51fa9e406ahrens#include <stdlib.h>
52fa9e406ahrens#include <string.h>
53fa9e406ahrens#include <sys/stat.h>
54fa9e406ahrens#include <unistd.h>
55fa9e406ahrens#include <fcntl.h>
564f67d75Eric Taylor#include <sys/vtoc.h>
574f67d75Eric Taylor#include <sys/dktp/fdisk.h>
584f67d75Eric Taylor#include <sys/efi_partition.h>
594f67d75Eric Taylor#include <thread_pool.h>
61fa9e406ahrens#include <sys/vdev_impl.h>
63fa9e406ahrens#include "libzfs.h"
64fa9e406ahrens#include "libzfs_impl.h"
67fa9e406ahrens * Intermediate structures used to gather configuration information.
68fa9e406ahrens */
69fa9e406ahrenstypedef struct config_entry {
70fa9e406ahrens	uint64_t		ce_txg;
71fa9e406ahrens	nvlist_t		*ce_config;
72fa9e406ahrens	struct config_entry	*ce_next;
73fa9e406ahrens} config_entry_t;
75fa9e406ahrenstypedef struct vdev_entry {
76fa9e406ahrens	uint64_t		ve_guid;
77fa9e406ahrens	config_entry_t		*ve_configs;
78fa9e406ahrens	struct vdev_entry	*ve_next;
79fa9e406ahrens} vdev_entry_t;
81fa9e406ahrenstypedef struct pool_entry {
82fa9e406ahrens	uint64_t		pe_guid;
83fa9e406ahrens	vdev_entry_t		*pe_vdevs;
84fa9e406ahrens	struct pool_entry	*pe_next;
85fa9e406ahrens} pool_entry_t;
87fa9e406ahrenstypedef struct name_entry {
8899653d4eschrock	char			*ne_name;
89fa9e406ahrens	uint64_t		ne_guid;
90fa9e406ahrens	struct name_entry	*ne_next;
91fa9e406ahrens} name_entry_t;
93fa9e406ahrenstypedef struct pool_list {
94fa9e406ahrens	pool_entry_t		*pools;
95fa9e406ahrens	name_entry_t		*names;
96fa9e406ahrens} pool_list_t;
99fa9e406ahrens * Go through and fix up any path and/or devid information for the given vdev
100fa9e406ahrens * configuration.
101fa9e406ahrens */
10299653d4eschrockstatic int
103fa9e406ahrensfix_paths(nvlist_t *nv, name_entry_t *names)
105fa9e406ahrens	nvlist_t **child;
106fa9e406ahrens	uint_t c, children;
107fa9e406ahrens	uint64_t guid;
108c67d967eschrock	name_entry_t *ne, *best;
109c67d967eschrock	char *path, *devid;
110c67d967eschrock	int matched;
112fa9e406ahrens	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
113fa9e406ahrens	    &child, &children) == 0) {
114fa9e406ahrens		for (c = 0; c < children; c++)
11599653d4eschrock			if (fix_paths(child[c], names) != 0)
11699653d4eschrock				return (-1);
11799653d4eschrock		return (0);
118fa9e406ahrens	}
120fa9e406ahrens	/*
121fa9e406ahrens	 * This is a leaf (file or disk) vdev.  In either case, go through
122fa9e406ahrens	 * the name list and see if we find a matching guid.  If so, replace
123fa9e406ahrens	 * the path and see if we can calculate a new devid.
124c67d967eschrock	 *
125c67d967eschrock	 * There may be multiple names associated with a particular guid, in
126c67d967eschrock	 * which case we have overlapping slices or multiple paths to the same
127c67d967eschrock	 * disk.  If this is the case, then we want to pick the path that is
128c67d967eschrock	 * the most similar to the original, where "most similar" is the number
129c67d967eschrock	 * of matching characters starting from the end of the path.  This will
130c67d967eschrock	 * preserve slice numbers even if the disks have been reorganized, and
131c67d967eschrock	 * will also catch preferred disk names if multiple paths exist.
132fa9e406ahrens	 */
133fa9e406ahrens	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
134c67d967eschrock	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
135c67d967eschrock		path = NULL;
137c67d967eschrock	matched = 0;
138c67d967eschrock	best = NULL;
139c67d967eschrock	for (ne = names; ne != NULL; ne = ne->ne_next) {
140c67d967eschrock		if (ne->ne_guid == guid) {
141c67d967eschrock			const char *src, *dst;
142c67d967eschrock			int count;
144c67d967eschrock			if (path == NULL) {
145c67d967eschrock				best = ne;
146c67d967eschrock				break;
147c67d967eschrock			}
149c67d967eschrock			src = ne->ne_name + strlen(ne->ne_name) - 1;
150c67d967eschrock			dst = path + strlen(path) - 1;
151c67d967eschrock			for (count = 0; src >= ne->ne_name && dst >= path;
152c67d967eschrock			    src--, dst--, count++)
153c67d967eschrock				if (*src != *dst)
154c67d967eschrock					break;
156c67d967eschrock			/*
157c67d967eschrock			 * At this point, 'count' is the number of characters
158c67d967eschrock			 * matched from the end.
159c67d967eschrock			 */
160c67d967eschrock			if (count > matched || best == NULL) {
161c67d967eschrock				best = ne;
162c67d967eschrock				matched = count;
163c67d967eschrock			}
164c67d967eschrock		}
165c67d967eschrock	}
167c67d967eschrock	if (best == NULL)
16899653d4eschrock		return (0);
17099653d4eschrock	if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
17199653d4eschrock		return (-1);
17346d46cdYuri Pankov	if ((devid = devid_str_from_path(best->ne_name)) == NULL) {
174fa9e406ahrens		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
175fa9e406ahrens	} else {
176078266aMarcel Telka		if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) {
177078266aMarcel Telka			devid_str_free(devid);
17899653d4eschrock			return (-1);
179078266aMarcel Telka		}
180fa9e406ahrens		devid_str_free(devid);
181fa9e406ahrens	}
18399653d4eschrock	return (0);
187fa9e406ahrens * Add the given configuration to the list of known devices.
188fa9e406ahrens */
18999653d4eschrockstatic int
19099653d4eschrockadd_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
19199653d4eschrock    nvlist_t *config)
19399653d4eschrock	uint64_t pool_guid, vdev_guid, top_guid, txg, state;
194fa9e406ahrens	pool_entry_t *pe;
195fa9e406ahrens	vdev_entry_t *ve;
196fa9e406ahrens	config_entry_t *ce;
197fa9e406ahrens	name_entry_t *ne;
199fa9e406ahrens	/*
200fa94a07brendan	 * If this is a hot spare not currently in use or level 2 cache
201fa94a07brendan	 * device, add it to the list of names to translate, but don't do
202fa94a07brendan	 * anything else.
20399653d4eschrock	 */
20499653d4eschrock	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
205fa94a07brendan	    &state) == 0 &&
206fa94a07brendan	    (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
20799653d4eschrock	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
20899653d4eschrock		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
209ccae0b5eschrock			return (-1);
21199653d4eschrock		if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
21299653d4eschrock			free(ne);
21399653d4eschrock			return (-1);
21499653d4eschrock		}
2151056865Yuri Pankov
21699653d4eschrock		ne->ne_guid = vdev_guid;
21799653d4eschrock		ne->ne_next = pl->names;
21899653d4eschrock		pl->names = ne;
2191056865Yuri Pankov
22099653d4eschrock		return (0);
22199653d4eschrock	}
22399653d4eschrock	/*
224fa9e406ahrens	 * If we have a valid config but cannot read any of these fields, then
225fa9e406ahrens	 * it means we have a half-initialized label.  In vdev_label_init()
226fa9e406ahrens	 * we write a label with txg == 0 so that we can identify the device
227fa9e406ahrens	 * in case the user refers to the same disk later on.  If we fail to
228fa9e406ahrens	 * create the pool, we'll be left with a label in this state
229fa9e406ahrens	 * which should not be considered part of a valid pool.
230fa9e406ahrens	 */
231fa9e406ahrens	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
232fa9e406ahrens	    &pool_guid) != 0 ||
233fa9e406ahrens	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
234fa9e406ahrens	    &vdev_guid) != 0 ||
235fa9e406ahrens	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
236fa9e406ahrens	    &top_guid) != 0 ||
237fa9e406ahrens	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
238fa9e406ahrens	    &txg) != 0 || txg == 0) {
23999653d4eschrock		return (0);
240fa9e406ahrens	}
242fa9e406ahrens	/*
243fa9e406ahrens	 * First, see if we know about this pool.  If not, then add it to the
244fa9e406ahrens	 * list of known pools.
245fa9e406ahrens	 */
246fa9e406ahrens	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
247fa9e406ahrens		if (pe->pe_guid == pool_guid)
248fa9e406ahrens			break;
249fa9e406ahrens	}
251fa9e406ahrens	if (pe == NULL) {
25299653d4eschrock		if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
25399653d4eschrock			return (-1);
25499653d4eschrock		}
255fa9e406ahrens		pe->pe_guid = pool_guid;
256fa9e406ahrens		pe->pe_next = pl->pools;
257fa9e406ahrens		pl->pools = pe;
258fa9e406ahrens	}
260fa9e406ahrens	/*
261fa9e406ahrens	 * Second, see if we know about this toplevel vdev.  Add it if its
262fa9e406ahrens	 * missing.
263fa9e406ahrens	 */
264fa9e406ahrens	for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
265fa9e406ahrens		if (ve->ve_guid == top_guid)
266fa9e406ahrens			break;
267fa9e406ahrens	}
269fa9e406ahrens	if (ve == NULL) {
27099653d4eschrock		if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
27199653d4eschrock			return (-1);
27299653d4eschrock		}
273fa9e406ahrens		ve->ve_guid = top_guid;
274fa9e406ahrens		ve->ve_next = pe->pe_vdevs;
275fa9e406ahrens		pe->pe_vdevs = ve;
276fa9e406ahrens	}
278fa9e406ahrens	/*
279fa9e406ahrens	 * Third, see if we have a config with a matching transaction group.  If
280fa9e406ahrens	 * so, then we do nothing.  Otherwise, add it to the list of known
281fa9e406ahrens	 * configs.
282fa9e406ahrens	 */
283fa9e406ahrens	for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
284fa9e406ahrens		if (ce->ce_txg == txg)
285fa9e406ahrens			break;
286fa9e406ahrens	}
288fa9e406ahrens	if (ce == NULL) {
28999653d4eschrock		if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
29099653d4eschrock			return (-1);
29199653d4eschrock		}
292fa9e406ahrens		ce->ce_txg = txg;
293ddfe901sara hartse		ce->ce_config = fnvlist_dup(config);
294fa9e406ahrens		ce->ce_next = ve->ve_configs;
295fa9e406ahrens		ve->ve_configs = ce;
296fa9e406ahrens	}
298fa9e406ahrens	/*
299fa9e406ahrens	 * At this point we've successfully added our config to the list of
300fa9e406ahrens	 * known configs.  The last thing to do is add the vdev guid -> path
301fa9e406ahrens	 * mappings so that we can fix up the configuration as necessary before
302fa9e406ahrens	 * doing the import.
303fa9e406ahrens	 */
30499653d4eschrock	if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
30599653d4eschrock		return (-1);
30799653d4eschrock	if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
30899653d4eschrock		free(ne);
30999653d4eschrock		return (-1);
31099653d4eschrock	}
312fa9e406ahrens	ne->ne_guid = vdev_guid;
313fa9e406ahrens	ne->ne_next = pl->names;
314fa9e406ahrens	pl->names = ne;
31699653d4eschrock	return (0);
320eaca9bbeschrock * Returns true if the named pool matches the given GUID.
321eaca9bbeschrock */
32294de1d4eschrockstatic int
32394de1d4eschrockpool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
32494de1d4eschrock    boolean_t *isactive)
326eaca9bbeschrock	zpool_handle_t *zhp;
327eaca9bbeschrock	uint64_t theguid;
32994de1d4eschrock	if (zpool_open_silent(hdl, name, &zhp) != 0)
33094de1d4eschrock		return (-1);
33294de1d4eschrock	if (zhp == NULL) {
33394de1d4eschrock		*isactive = B_FALSE;
33494de1d4eschrock		return (0);
33594de1d4eschrock	}
337eaca9bbeschrock	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
338eaca9bbeschrock	    &theguid) == 0);
340eaca9bbeschrock	zpool_close(zhp);
34294de1d4eschrock	*isactive = (theguid == guid);
34394de1d4eschrock	return (0);
3462f8aaabeschrockstatic nvlist_t *
3472f8aaabeschrockrefresh_config(libzfs_handle_t *hdl, nvlist_t *config)
3492f8aaabeschrock	nvlist_t *nvl;
3502f8aaabeschrock	zfs_cmd_t zc = { 0 };
3518b65a70Pavel Zakharov	int err, dstbuf_size;
3532f8aaabeschrock	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
3542f8aaabeschrock		return (NULL);
3568b65a70Pavel Zakharov	dstbuf_size = MAX(CONFIG_BUF_MINSIZE, zc.zc_nvlist_conf_size * 4);
3578b65a70Pavel Zakharov
3588b65a70Pavel Zakharov	if (zcmd_alloc_dst_nvlist(hdl, &zc, dstbuf_size) != 0) {
3592f8aaabeschrock		zcmd_free_nvlists(&zc);
3602f8aaabeschrock		return (NULL);
3612f8aaabeschrock	}
3632f8aaabeschrock	while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
3642f8aaabeschrock	    &zc)) != 0 && errno == ENOMEM) {
3652f8aaabeschrock		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3662f8aaabeschrock			zcmd_free_nvlists(&zc);
3672f8aaabeschrock			return (NULL);
3682f8aaabeschrock		}
3692f8aaabeschrock	}
3712f8aaabeschrock	if (err) {
3722f8aaabeschrock		zcmd_free_nvlists(&zc);
3732f8aaabeschrock		return (NULL);
3742f8aaabeschrock	}
3762f8aaabeschrock	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
3772f8aaabeschrock		zcmd_free_nvlists(&zc);
3782f8aaabeschrock		return (NULL);
3792f8aaabeschrock	}
3812f8aaabeschrock	zcmd_free_nvlists(&zc);
3822f8aaabeschrock	return (nvl);
38688ecc94George Wilson * Determine if the vdev id is a hole in the namespace.
38788ecc94George Wilson */
38888ecc94George Wilsonboolean_t
38988ecc94George Wilsonvdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
39088ecc94George Wilson{
39188ecc94George Wilson	for (int c = 0; c < holes; c++) {
39288ecc94George Wilson
39388ecc94George Wilson		/* Top-level is a hole */
39488ecc94George Wilson		if (hole_array[c] == id)
39588ecc94George Wilson			return (B_TRUE);
39688ecc94George Wilson	}
39788ecc94George Wilson	return (B_FALSE);
39888ecc94George Wilson}
39988ecc94George Wilson
40088ecc94George Wilson/*
401fa9e406ahrens * Convert our list of pools into the definitive set of configurations.  We
402fa9e406ahrens * start by picking the best config for each toplevel vdev.  Once that's done,
403fa9e406ahrens * we assemble the toplevel vdevs into a full config for the pool.  We make a
404fa9e406ahrens * pass to fix up any incorrect paths, and then add it to the main list to
405fa9e406ahrens * return to the user.
406fa9e406ahrens */
407fa9e406ahrensstatic nvlist_t *
4086f79381Pavel Zakharovget_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok,
4096f79381Pavel Zakharov    nvlist_t *policy)
41199653d4eschrock	pool_entry_t *pe;
41299653d4eschrock	vdev_entry_t *ve;
41399653d4eschrock	config_entry_t *ce;
414f83b46bPaul Dagnelie	nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
415fa94a07brendan	nvlist_t **spares, **l2cache;
416fa94a07brendan	uint_t i, nspares, nl2cache;
41799653d4eschrock	boolean_t config_seen;
418fa9e406ahrens	uint64_t best_txg;
419f83b46bPaul Dagnelie	char *name, *hostname = NULL;
420dfbb943George Wilson	uint64_t guid;
42199653d4eschrock	uint_t children = 0;
42299653d4eschrock	nvlist_t **child = NULL;
42388ecc94George Wilson	uint_t holes;
42488ecc94George Wilson	uint64_t *hole_array, max_id;
42599653d4eschrock	uint_t c;
42694de1d4eschrock	boolean_t isactive;
4279517395ek	uint64_t hostid;
4282f8aaabeschrock	nvlist_t *nvl;
42924e697dck	boolean_t found_one = B_FALSE;
43088ecc94George Wilson	boolean_t valid_top_config = B_FALSE;
43299653d4eschrock	if (nvlist_alloc(&ret, 0, 0) != 0)
43399653d4eschrock		goto nomem;
43599653d4eschrock	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
43688ecc94George Wilson		uint64_t id, max_txg = 0;
43899653d4eschrock		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
43999653d4eschrock			goto nomem;
44099653d4eschrock		config_seen = B_FALSE;
442fa9e406ahrens		/*
443fa9e406ahrens		 * Iterate over all toplevel vdevs.  Grab the pool configuration
444fa9e406ahrens		 * from the first one we find, and then go through the rest and
445fa9e406ahrens		 * add them as necessary to the 'vdevs' member of the config.
446fa9e406ahrens		 */
44799653d4eschrock		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
449fa9e406ahrens			/*
450fa9e406ahrens			 * Determine the best configuration for this vdev by
451fa9e406ahrens			 * selecting the config with the latest transaction
452fa9e406ahrens			 * group.
453fa9e406ahrens			 */
454fa9e406ahrens			best_txg = 0;
455fa9e406ahrens			for (ce = ve->ve_configs; ce != NULL;
456fa9e406ahrens			    ce = ce->ce_next) {
45899653d4eschrock				if (ce->ce_txg > best_txg) {
459fa9e406ahrens					tmp = ce->ce_config;
46099653d4eschrock					best_txg = ce->ce_txg;
46199653d4eschrock				}
462fa9e406ahrens			}
46488ecc94George Wilson			/*
46588ecc94George Wilson			 * We rely on the fact that the max txg for the
46688ecc94George Wilson			 * pool will contain the most up-to-date information
46788ecc94George Wilson			 * about the valid top-levels in the vdev namespace.
46888ecc94George Wilson			 */
46988ecc94George Wilson			if (best_txg > max_txg) {
47088ecc94George Wilson				(void) nvlist_remove(config,
47188ecc94George Wilson				    ZPOOL_CONFIG_VDEV_CHILDREN,
47288ecc94George Wilson				    DATA_TYPE_UINT64);
47388ecc94George Wilson				(void) nvlist_remove(config,
47488ecc94George Wilson				    ZPOOL_CONFIG_HOLE_ARRAY,
47588ecc94George Wilson				    DATA_TYPE_UINT64_ARRAY);
47688ecc94George Wilson
47788ecc94George Wilson				max_txg = best_txg;
47888ecc94George Wilson				hole_array = NULL;
47988ecc94George Wilson				holes = 0;
48088ecc94George Wilson				max_id = 0;
48188ecc94George Wilson				valid_top_config = B_FALSE;
48288ecc94George Wilson
48388ecc94George Wilson				if (nvlist_lookup_uint64(tmp,
48488ecc94George Wilson				    ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
48588ecc94George Wilson					verify(nvlist_add_uint64(config,
48688ecc94George Wilson					    ZPOOL_CONFIG_VDEV_CHILDREN,
48788ecc94George Wilson					    max_id) == 0);
48888ecc94George Wilson					valid_top_config = B_TRUE;
48988ecc94George Wilson				}
49088ecc94George Wilson
49188ecc94George Wilson				if (nvlist_lookup_uint64_array(tmp,
49288ecc94George Wilson				    ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
49388ecc94George Wilson				    &holes) == 0) {
49488ecc94George Wilson					verify(nvlist_add_uint64_array(config,
49588ecc94George Wilson					    ZPOOL_CONFIG_HOLE_ARRAY,
49688ecc94George Wilson					    hole_array, holes) == 0);
49788ecc94George Wilson				}
49888ecc94George Wilson			}
49988ecc94George Wilson
500fa9e406ahrens			if (!config_seen) {
501fa9e406ahrens				/*
502fa9e406ahrens				 * Copy the relevant pieces of data to the pool
503fa9e406ahrens				 * configuration:
504fa9e406ahrens				 *
50599653d4eschrock				 *	version
506dfbb943George Wilson				 *	pool guid
507dfbb943George Wilson				 *	name
5088704186Dan McDonald				 *	comment (if available)
509dfbb943George Wilson				 *	pool state
5109517395ek				 *	hostid (if available)
5119517395ek				 *	hostname (if available)
512fa9e406ahrens				 */
513bda8819George Wilson				uint64_t state, version;
514dfbb943George Wilson				char *comment = NULL;
515dfbb943George Wilson
516dfbb943George Wilson				version = fnvlist_lookup_uint64(tmp,
517dfbb943George Wilson				    ZPOOL_CONFIG_VERSION);
518dfbb943George Wilson				fnvlist_add_uint64(config,
519dfbb943George Wilson				    ZPOOL_CONFIG_VERSION, version);
520dfbb943George Wilson				guid = fnvlist_lookup_uint64(tmp,
521dfbb943George Wilson				    ZPOOL_CONFIG_POOL_GUID);
522dfbb943George Wilson				fnvlist_add_uint64(config,
523dfbb943George Wilson				    ZPOOL_CONFIG_POOL_GUID, guid);
524dfbb943George Wilson				name = fnvlist_lookup_string(tmp,
525dfbb943George Wilson				    ZPOOL_CONFIG_POOL_NAME);
526dfbb943George Wilson				fnvlist_add_string(config,
527dfbb943George Wilson				    ZPOOL_CONFIG_POOL_NAME, name);
528dfbb943George Wilson
5298704186Dan McDonald				if (nvlist_lookup_string(tmp,
530dfbb943George Wilson				    ZPOOL_CONFIG_COMMENT, &comment) == 0)
531dfbb943George Wilson					fnvlist_add_string(config,
532dfbb943George Wilson					    ZPOOL_CONFIG_COMMENT, comment);
533dfbb943George Wilson
534dfbb943George Wilson				state = fnvlist_lookup_uint64(tmp,
535dfbb943George Wilson				    ZPOOL_CONFIG_POOL_STATE);
536dfbb943George Wilson				fnvlist_add_uint64(config,
537dfbb943George Wilson				    ZPOOL_CONFIG_POOL_STATE, state);
5388704186Dan McDonald
5399517395ek				hostid = 0;
5409517395ek				if (nvlist_lookup_uint64(tmp,
5419517395ek				    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
542dfbb943George Wilson					fnvlist_add_uint64(config,
543dfbb943George Wilson					    ZPOOL_CONFIG_HOSTID, hostid);
544dfbb943George Wilson					hostname = fnvlist_lookup_string(tmp,
545dfbb943George Wilson					    ZPOOL_CONFIG_HOSTNAME);
546dfbb943George Wilson					fnvlist_add_string(config,
547dfbb943George Wilson					    ZPOOL_CONFIG_HOSTNAME, hostname);
5489517395ek				}
55099653d4eschrock				config_seen = B_TRUE;
551fa9e406ahrens			}
553fa9e406ahrens			/*
554fa9e406ahrens			 * Add this top-level vdev to the child array.
555fa9e406ahrens			 */
556fa9e406ahrens			verify(nvlist_lookup_nvlist(tmp,
557fa9e406ahrens			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
558fa9e406ahrens			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
559fa9e406ahrens			    &id) == 0);
56088ecc94George Wilson
561fa9e406ahrens			if (id >= children) {
562fa9e406ahrens				nvlist_t **newchild;
56499653d4eschrock				newchild = zfs_alloc(hdl, (id + 1) *
565fa9e406ahrens				    sizeof (nvlist_t *));
56699653d4eschrock				if (newchild == NULL)
56799653d4eschrock					goto nomem;
569fa9e406ahrens				for (c = 0; c < children; c++)
570fa9e406ahrens					newchild[c] = child[c];
572fa9e406ahrens				free(child);
573fa9e406ahrens				child = newchild;
574fa9e406ahrens				children = id + 1;
575fa9e406ahrens			}
57699653d4eschrock			if (nvlist_dup(nvtop, &child[id], 0) != 0)
57799653d4eschrock				goto nomem;
579fa9e406ahrens		}
58188ecc94George Wilson		/*
58288ecc94George Wilson		 * If we have information about all the top-levels then
58388ecc94George Wilson		 * clean up the nvlist which we've constructed. This
58488ecc94George Wilson		 * means removing any extraneous devices that are
58588ecc94George Wilson		 * beyond the valid range or adding devices to the end
58688ecc94George Wilson		 * of our array which appear to be missing.
58788ecc94George Wilson		 */
58888ecc94George Wilson		if (valid_top_config) {
58988ecc94George Wilson			if (max_id < children) {
59088ecc94George Wilson				for (c = max_id; c < children; c++)
59188ecc94George Wilson					nvlist_free(child[c]);
59288ecc94George Wilson				children = max_id;
59388ecc94George Wilson			} else if (max_id > children) {
59488ecc94George Wilson				nvlist_t **newchild;
59588ecc94George Wilson
59688ecc94George Wilson				newchild = zfs_alloc(hdl, (max_id) *
59788ecc94George Wilson				    sizeof (nvlist_t *));
59888ecc94George Wilson				if (newchild == NULL)
59988ecc94George Wilson					goto nomem;
60088ecc94George Wilson
60188ecc94George Wilson				for (c = 0; c < children; c++)
60288ecc94George Wilson					newchild[c] = child[c];
60388ecc94George Wilson
60488ecc94George Wilson				free(child);
60588ecc94George Wilson				child = newchild;
60688ecc94George Wilson				children = max_id;
60788ecc94George Wilson			}
60888ecc94George Wilson		}
60988ecc94George Wilson
610fa9e406ahrens		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
611fa9e406ahrens		    &guid) == 0);
613fa9e406ahrens		/*
61488ecc94George Wilson		 * The vdev namespace may contain holes as a result of
61588ecc94George Wilson		 * device removal. We must add them back into the vdev
61688ecc94George Wilson		 * tree before we process any missing devices.
61788ecc94George Wilson		 */
61888ecc94George Wilson		if (holes > 0) {
61988ecc94George Wilson			ASSERT(valid_top_config);
62088ecc94George Wilson
62188ecc94George Wilson			for (c = 0; c < children; c++) {
62288ecc94George Wilson				nvlist_t *holey;
62388ecc94George Wilson
62488ecc94George Wilson				if (child[c] != NULL ||
62588ecc94George Wilson				    !vdev_is_hole(hole_array, holes, c))
62688ecc94George Wilson					continue;
62788ecc94George Wilson
62888ecc94George Wilson				if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
62988ecc94George Wilson				    0) != 0)
63088ecc94George Wilson					goto nomem;
63188ecc94George Wilson
63288ecc94George Wilson				/*
63388ecc94George Wilson				 * Holes in the namespace are treated as
63488ecc94George Wilson				 * "hole" top-level vdevs and have a
63588ecc94George Wilson				 * special flag set on them.
63688ecc94George Wilson				 */
63788ecc94George Wilson				if (nvlist_add_string(holey,
63888ecc94George Wilson				    ZPOOL_CONFIG_TYPE,
63988ecc94George Wilson				    VDEV_TYPE_HOLE) != 0 ||
64088ecc94George Wilson				    nvlist_add_uint64(holey,
64188ecc94George Wilson				    ZPOOL_CONFIG_ID, c) != 0 ||
64288ecc94George Wilson				    nvlist_add_uint64(holey,
643078266aMarcel Telka				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
644078266aMarcel Telka					nvlist_free(holey);
64588ecc94George Wilson					goto nomem;
646078266aMarcel Telka				}
64788ecc94George Wilson				child[c] = holey;
64888ecc94George Wilson			}
64988ecc94George Wilson		}
65088ecc94George Wilson
65188ecc94George Wilson		/*
652fa9e406ahrens		 * Look for any missing top-level vdevs.  If this is the case,
653fa9e406ahrens		 * create a faked up 'missing' vdev as a placeholder.  We cannot
654fa9e406ahrens		 * simply compress the child array, because the kernel performs
655fa9e406ahrens		 * certain checks to make sure the vdev IDs match their location
656fa9e406ahrens		 * in the configuration.
657fa9e406ahrens		 */
65888ecc94George Wilson		for (c = 0; c < children; c++) {
659fa9e406ahrens			if (child[c] == NULL) {
660fa9e406ahrens				nvlist_t *missing;
66199653d4eschrock				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
66299653d4eschrock				    0) != 0)
66399653d4eschrock					goto nomem;
66499653d4eschrock				if (nvlist_add_string(missing,
66599653d4eschrock				    ZPOOL_CONFIG_TYPE,
66699653d4eschrock				    VDEV_TYPE_MISSING) != 0 ||
66799653d4eschrock				    nvlist_add_uint64(missing,
66899653d4eschrock				    ZPOOL_CONFIG_ID, c) != 0 ||
66999653d4eschrock				    nvlist_add_uint64(missing,
67099653d4eschrock				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
67199653d4eschrock					nvlist_free(missing);
67299653d4eschrock					goto nomem;
67399653d4eschrock				}
674fa9e406ahrens				child[c] = missing;
675fa9e406ahrens			}
67688ecc94George Wilson		}
678fa9e406ahrens		/*
679fa9e406ahrens		 * Put all of this pool's top-level vdevs into a root vdev.
680fa9e406ahrens		 */
68199653d4eschrock		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
68299653d4eschrock			goto nomem;
68399653d4eschrock		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
68499653d4eschrock		    VDEV_TYPE_ROOT) != 0 ||
68599653d4eschrock		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
68699653d4eschrock		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
68799653d4eschrock		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
68899653d4eschrock		    child, children) != 0) {
68999653d4eschrock			nvlist_free(nvroot);
69099653d4eschrock			goto nomem;
69199653d4eschrock		}
693fa9e406ahrens		for (c = 0; c < children; c++)
694fa9e406ahrens			nvlist_free(child[c]);
695fa9e406ahrens		free(child);
69699653d4eschrock		children = 0;
69799653d4eschrock		child = NULL;
699fa9e406ahrens		/*
700fa9e406ahrens		 * Go through and fix up any paths and/or devids based on our
701fa9e406ahrens		 * known list of vdev GUID -> path mappings.
702fa9e406ahrens		 */
70399653d4eschrock		if (fix_paths(nvroot, pl->names) != 0) {
70499653d4eschrock			nvlist_free(nvroot);
70599653d4eschrock			goto nomem;
70699653d4eschrock		}
708fa9e406ahrens		/*
709fa9e406ahrens		 * Add the root vdev to this pool's configuration.
710fa9e406ahrens		 */
71199653d4eschrock		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
71299653d4eschrock		    nvroot) != 0) {
71399653d4eschrock			nvlist_free(nvroot);
71499653d4eschrock			goto nomem;
71599653d4eschrock		}
716fa9e406ahrens		nvlist_free(nvroot);
718fa9e406ahrens		/*
7193a57275ck		 * zdb uses this path to report on active pools that were
7203a57275ck		 * imported or created using -R.
7213a57275ck		 */
7223a57275ck		if (active_ok)
7233a57275ck			goto add_pool;
7253a57275ck		/*
726fa9e406ahrens		 * Determine if this pool is currently active, in which case we
727fa9e406ahrens		 * can't actually import it.
728fa9e406ahrens		 */
729fa9e406ahrens		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
730fa9e406ahrens		    &name) == 0);
731fa9e406ahrens		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
732fa9e406ahrens		    &guid) == 0);
73494de1d4eschrock		if (pool_active(hdl, name, guid, &isactive) != 0)
73594de1d4eschrock			goto error;
7370192a27eschrock		if (isactive) {
738fa9e406ahrens			nvlist_free(config);
73999653d4eschrock			config = NULL;
740fa9e406ahrens			continue;
741fa9e406ahrens		}
7436f79381Pavel Zakharov		if (policy != NULL) {
7445dafeeaPavel Zakharov			if (nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
7456f79381Pavel Zakharov			    policy) != 0)
7466f79381Pavel Zakharov				goto nomem;
7476f79381Pavel Zakharov		}
7486f79381Pavel Zakharov
74988ecc94George Wilson		if ((nvl = refresh_config(hdl, config)) == NULL) {
75088ecc94George Wilson			nvlist_free(config);
75188ecc94George Wilson			config = NULL;
75288ecc94George Wilson			continue;
75388ecc94George Wilson		}
755fa9e406ahrens		nvlist_free(config);
7562f8aaabeschrock		config = nvl;
75899653d4eschrock		/*
75999653d4eschrock		 * Go through and update the paths for spares, now that we have
76099653d4eschrock		 * them.
76199653d4eschrock		 */
76299653d4eschrock		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
76399653d4eschrock		    &nvroot) == 0);
76499653d4eschrock		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
76599653d4eschrock		    &spares, &nspares) == 0) {
76699653d4eschrock			for (i = 0; i < nspares; i++) {
76799653d4eschrock				if (fix_paths(spares[i], pl->names) != 0)
76899653d4eschrock					goto nomem;
76999653d4eschrock			}
77099653d4eschrock		}
772fa9e406ahrens		/*
773fa94a07brendan		 * Update the paths for l2cache devices.
774fa94a07brendan		 */
775fa94a07brendan		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
776fa94a07brendan		    &l2cache, &nl2cache) == 0) {
777fa94a07brendan			for (i = 0; i < nl2cache; i++) {
778fa94a07brendan				if (fix_paths(l2cache[i], pl->names) != 0)
779fa94a07brendan					goto nomem;
780fa94a07brendan			}
781fa94a07brendan		}
783fa94a07brendan		/*
7849517395ek		 * Restore the original information read from the actual label.
7859517395ek		 */
7869517395ek		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
7879517395ek		    DATA_TYPE_UINT64);
7889517395ek		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
7899517395ek		    DATA_TYPE_STRING);
7909517395ek		if (hostid != 0) {
7919517395ek			verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
7929517395ek			    hostid) == 0);
7939517395ek			verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
7949517395ek			    hostname) == 0);
7959517395ek		}
7989517395ek		/*
799fa9e406ahrens		 * Add this pool to the list of configs.
800fa9e406ahrens		 */
801e9dbad6eschrock		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
802e9dbad6eschrock		    &name) == 0);
80399653d4eschrock		if (nvlist_add_nvlist(ret, name, config) != 0)
80499653d4eschrock			goto nomem;
80624e697dck		found_one = B_TRUE;
807fa9e406ahrens		nvlist_free(config);
80899653d4eschrock		config = NULL;
809fa9e406ahrens	}
81124e697dck	if (!found_one) {
81224e697dck		nvlist_free(ret);
81324e697dck		ret = NULL;
81424e697dck	}
816fa9e406ahrens	return (ret);
81999653d4eschrock	(void) no_memory(hdl);
82194de1d4eschrock	nvlist_free(config);
82294de1d4eschrock	nvlist_free(ret);
82399653d4eschrock	for (c = 0; c < children; c++)
82499653d4eschrock		nvlist_free(child[c]);
82594de1d4eschrock	free(child);
82799653d4eschrock	return (NULL);
831fa9e406ahrens * Return the offset of the given label.
832fa9e406ahrens */
833fa9e406ahrensstatic uint64_t
834e743726ahrenslabel_offset(uint64_t size, int l)
836e743726ahrens	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
837fa9e406ahrens	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
838fa9e406ahrens	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
842fa9e406ahrens * Given a file descriptor, read the label information and return an nvlist
843fa9e406ahrens * describing the configuration, if there is one.
844c861bfbAlan Somers * Return 0 on success, or -1 on failure
845fa9e406ahrens */
84799653d4eschrockzpool_read_label(int fd, nvlist_t **config)
849fa9e406ahrens	struct stat64 statbuf;
850fa9e406ahrens	int l;
851fa9e406ahrens	vdev_label_t *label;
852e743726ahrens	uint64_t state, txg, size;
85499653d4eschrock	*config = NULL;
856fa9e406ahrens	if (fstat64(fd, &statbuf) == -1)
857c861bfbAlan Somers		return (-1);
858e743726ahrens	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
86099653d4eschrock	if ((label = malloc(sizeof (vdev_label_t))) == NULL)
86199653d4eschrock		return (-1);
863fa9e406ahrens	for (l = 0; l < VDEV_LABELS; l++) {
864c5904d1eschrock		if (pread64(fd, label, sizeof (vdev_label_t),
865e743726ahrens		    label_offset(size, l)) != sizeof (vdev_label_t))
866fa9e406ahrens			continue;
868fa9e406ahrens		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
86999653d4eschrock		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
870fa9e406ahrens			continue;
87299653d4eschrock		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
873fa94a07brendan		    &state) != 0 || state > POOL_STATE_L2CACHE) {
87499653d4eschrock			nvlist_free(*config);
875fa9e406ahrens			continue;
876fa9e406ahrens		}
878fa94a07brendan		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
87999653d4eschrock		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
88099653d4eschrock		    &txg) != 0 || txg == 0)) {
88199653d4eschrock			nvlist_free(*config);
882fa9e406ahrens			continue;
883fa9e406ahrens		}
885fa9e406ahrens		free(label);
88699653d4eschrock		return (0);
887fa9e406ahrens	}
889fa9e406ahrens	free(label);
89099653d4eschrock	*config = NULL;
891ae5ee1bAlan Somers	errno = ENOENT;
892c861bfbAlan Somers	return (-1);
8954f67d75Eric Taylortypedef struct rdsk_node {
8964f67d75Eric Taylor	char *rn_name;
8974f67d75Eric Taylor	int rn_dfd;
8984f67d75Eric Taylor	libzfs_handle_t *rn_hdl;
8994f67d75Eric Taylor	nvlist_t *rn_config;
9004f67d75Eric Taylor	avl_tree_t *rn_avl;
9014f67d75Eric Taylor	avl_node_t rn_node;
9024f67d75Eric Taylor	boolean_t rn_nozpool;