1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
25 * Copyright 2019 Joyent, Inc.
26 * Copyright 2016 Nexenta Systems, Inc.
27 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
28 * Copyright (c) 2017 Datto Inc.
29 * Copyright (c) 2017, Intel Corporation.
30 */
31
32#include <ctype.h>
33#include <errno.h>
34#include <devid.h>
35#include <fcntl.h>
36#include <libintl.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <strings.h>
40#include <unistd.h>
41#include <libgen.h>
42#include <sys/efi_partition.h>
43#include <sys/vtoc.h>
44#include <sys/zfs_ioctl.h>
45#include <dlfcn.h>
46#include <libzutil.h>
47
48#include "zfs_namecheck.h"
49#include "zfs_prop.h"
50#include "libzfs_impl.h"
51#include "zfs_comutil.h"
52#include "zfeature_common.h"
53
54static int read_efi_label(nvlist_t *, diskaddr_t *, boolean_t *);
55static boolean_t zpool_vdev_is_interior(const char *name);
56
57#define	BACKUP_SLICE	"s2"
58
59typedef struct prop_flags {
60	int create:1;	/* Validate property on creation */
61	int import:1;	/* Validate property on import */
62} prop_flags_t;
63
64/*
65 * ====================================================================
66 *   zpool property functions
67 * ====================================================================
68 */
69
70static int
71zpool_get_all_props(zpool_handle_t *zhp)
72{
73	zfs_cmd_t zc = { 0 };
74	libzfs_handle_t *hdl = zhp->zpool_hdl;
75
76	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
77
78	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
79		return (-1);
80
81	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
82		if (errno == ENOMEM) {
83			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
84				zcmd_free_nvlists(&zc);
85				return (-1);
86			}
87		} else {
88			zcmd_free_nvlists(&zc);
89			return (-1);
90		}
91	}
92
93	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
94		zcmd_free_nvlists(&zc);
95		return (-1);
96	}
97
98	zcmd_free_nvlists(&zc);
99
100	return (0);
101}
102
103static int
104zpool_props_refresh(zpool_handle_t *zhp)
105{
106	nvlist_t *old_props;
107
108	old_props = zhp->zpool_props;
109
110	if (zpool_get_all_props(zhp) != 0)
111		return (-1);
112
113	nvlist_free(old_props);
114	return (0);
115}
116
117static char *
118zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
119    zprop_source_t *src)
120{
121	nvlist_t *nv, *nvl;
122	uint64_t ival;
123	char *value;
124	zprop_source_t source;
125
126	nvl = zhp->zpool_props;
127	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
128		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
129		source = ival;
130		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
131	} else {
132		source = ZPROP_SRC_DEFAULT;
133		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
134			value = "-";
135	}
136
137	if (src)
138		*src = source;
139
140	return (value);
141}
142
143uint64_t
144zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
145{
146	nvlist_t *nv, *nvl;
147	uint64_t value;
148	zprop_source_t source;
149
150	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
151		/*
152		 * zpool_get_all_props() has most likely failed because
153		 * the pool is faulted, but if all we need is the top level
154		 * vdev's guid then get it from the zhp config nvlist.
155		 */
156		if ((prop == ZPOOL_PROP_GUID) &&
157		    (nvlist_lookup_nvlist(zhp->zpool_config,
158		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
159		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
160		    == 0)) {
161			return (value);
162		}
163		return (zpool_prop_default_numeric(prop));
164	}
165
166	nvl = zhp->zpool_props;
167	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
168		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
169		source = value;
170		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
171	} else {
172		source = ZPROP_SRC_DEFAULT;
173		value = zpool_prop_default_numeric(prop);
174	}
175
176	if (src)
177		*src = source;
178
179	return (value);
180}
181
182/*
183 * Map VDEV STATE to printed strings.
184 */
185const char *
186zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
187{
188	switch (state) {
189	case VDEV_STATE_CLOSED:
190	case VDEV_STATE_OFFLINE:
191		return (gettext("OFFLINE"));
192	case VDEV_STATE_REMOVED:
193		return (gettext("REMOVED"));
194	case VDEV_STATE_CANT_OPEN:
195		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
196			return (gettext("FAULTED"));
197		else if (aux == VDEV_AUX_SPLIT_POOL)
198			return (gettext("SPLIT"));
199		else
200			return (gettext("UNAVAIL"));
201	case VDEV_STATE_FAULTED:
202		return (gettext("FAULTED"));
203	case VDEV_STATE_DEGRADED:
204		return (gettext("DEGRADED"));
205	case VDEV_STATE_HEALTHY:
206		return (gettext("ONLINE"));
207
208	default:
209		break;
210	}
211
212	return (gettext("UNKNOWN"));
213}
214
215/*
216 * Map POOL STATE to printed strings.
217 */
218const char *
219zpool_pool_state_to_name(pool_state_t state)
220{
221	switch (state) {
222	case POOL_STATE_ACTIVE:
223		return (gettext("ACTIVE"));
224	case POOL_STATE_EXPORTED:
225		return (gettext("EXPORTED"));
226	case POOL_STATE_DESTROYED:
227		return (gettext("DESTROYED"));
228	case POOL_STATE_SPARE:
229		return (gettext("SPARE"));
230	case POOL_STATE_L2CACHE:
231		return (gettext("L2CACHE"));
232	case POOL_STATE_UNINITIALIZED:
233		return (gettext("UNINITIALIZED"));
234	case POOL_STATE_UNAVAIL:
235		return (gettext("UNAVAIL"));
236	case POOL_STATE_POTENTIALLY_ACTIVE:
237		return (gettext("POTENTIALLY_ACTIVE"));
238	}
239
240	return (gettext("UNKNOWN"));
241}
242
243/*
244 * Get a zpool property value for 'prop' and return the value in
245 * a pre-allocated buffer.
246 */
247int
248zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
249    zprop_source_t *srctype, boolean_t literal)
250{
251	uint64_t intval;
252	const char *strval;
253	zprop_source_t src = ZPROP_SRC_NONE;
254	nvlist_t *nvroot;
255	vdev_stat_t *vs;
256	uint_t vsc;
257
258	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
259		switch (prop) {
260		case ZPOOL_PROP_NAME:
261			(void) strlcpy(buf, zpool_get_name(zhp), len);
262			break;
263
264		case ZPOOL_PROP_HEALTH:
265			(void) strlcpy(buf, "FAULTED", len);
266			break;
267
268		case ZPOOL_PROP_GUID:
269			intval = zpool_get_prop_int(zhp, prop, &src);
270			(void) snprintf(buf, len, "%llu", intval);
271			break;
272
273		case ZPOOL_PROP_ALTROOT:
274		case ZPOOL_PROP_CACHEFILE:
275		case ZPOOL_PROP_COMMENT:
276			if (zhp->zpool_props != NULL ||
277			    zpool_get_all_props(zhp) == 0) {
278				(void) strlcpy(buf,
279				    zpool_get_prop_string(zhp, prop, &src),
280				    len);
281				break;
282			}
283			/* FALLTHROUGH */
284		default:
285			(void) strlcpy(buf, "-", len);
286			break;
287		}
288
289		if (srctype != NULL)
290			*srctype = src;
291		return (0);
292	}
293
294	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
295	    prop != ZPOOL_PROP_NAME)
296		return (-1);
297
298	switch (zpool_prop_get_type(prop)) {
299	case PROP_TYPE_STRING:
300		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
301		    len);
302		break;
303
304	case PROP_TYPE_NUMBER:
305		intval = zpool_get_prop_int(zhp, prop, &src);
306
307		switch (prop) {
308		case ZPOOL_PROP_SIZE:
309		case ZPOOL_PROP_ALLOCATED:
310		case ZPOOL_PROP_FREE:
311		case ZPOOL_PROP_FREEING:
312		case ZPOOL_PROP_LEAKED:
313		case ZPOOL_PROP_ASHIFT:
314			if (literal) {
315				(void) snprintf(buf, len, "%llu",
316				    (u_longlong_t)intval);
317			} else {
318				(void) zfs_nicenum(intval, buf, len);
319			}
320			break;
321		case ZPOOL_PROP_BOOTSIZE:
322		case ZPOOL_PROP_EXPANDSZ:
323		case ZPOOL_PROP_CHECKPOINT:
324			if (intval == 0) {
325				(void) strlcpy(buf, "-", len);
326			} else if (literal) {
327				(void) snprintf(buf, len, "%llu",
328				    (u_longlong_t)intval);
329			} else {
330				(void) zfs_nicenum(intval, buf, len);
331			}
332			break;
333		case ZPOOL_PROP_CAPACITY:
334			if (literal) {
335				(void) snprintf(buf, len, "%llu",
336				    (u_longlong_t)intval);
337			} else {
338				(void) snprintf(buf, len, "%llu%%",
339				    (u_longlong_t)intval);
340			}
341			break;
342		case ZPOOL_PROP_FRAGMENTATION:
343			if (intval == UINT64_MAX) {
344				(void) strlcpy(buf, "-", len);
345			} else {
346				(void) snprintf(buf, len, "%llu%%",
347				    (u_longlong_t)intval);
348			}
349			break;
350		case ZPOOL_PROP_DEDUPRATIO:
351			(void) snprintf(buf, len, "%llu.%02llux",
352			    (u_longlong_t)(intval / 100),
353			    (u_longlong_t)(intval % 100));
354			break;
355		case ZPOOL_PROP_HEALTH:
356			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
357			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
358			verify(nvlist_lookup_uint64_array(nvroot,
359			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
360			    == 0);
361
362			(void) strlcpy(buf, zpool_state_to_name(intval,
363			    vs->vs_aux), len);
364			break;
365		case ZPOOL_PROP_VERSION:
366			if (intval >= SPA_VERSION_FEATURES) {
367				(void) snprintf(buf, len, "-");
368				break;
369			}
370			/* FALLTHROUGH */
371		default:
372			(void) snprintf(buf, len, "%llu", intval);
373		}
374		break;
375
376	case PROP_TYPE_INDEX:
377		intval = zpool_get_prop_int(zhp, prop, &src);
378		if (zpool_prop_index_to_string(prop, intval, &strval)
379		    != 0)
380			return (-1);
381		(void) strlcpy(buf, strval, len);
382		break;
383
384	default:
385		abort();
386	}
387
388	if (srctype)
389		*srctype = src;
390
391	return (0);
392}
393
394/*
395 * Check if the bootfs name has the same pool name as it is set to.
396 * Assuming bootfs is a valid dataset name.
397 */
398static boolean_t
399bootfs_name_valid(const char *pool, char *bootfs)
400{
401	int len = strlen(pool);
402	if (bootfs[0] == '\0')
403		return (B_TRUE);
404
405	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
406		return (B_FALSE);
407
408	if (strncmp(pool, bootfs, len) == 0 &&
409	    (bootfs[len] == '/' || bootfs[len] == '\0'))
410		return (B_TRUE);
411
412	return (B_FALSE);
413}
414
415boolean_t
416zpool_is_bootable(zpool_handle_t *zhp)
417{
418	char bootfs[ZFS_MAX_DATASET_NAME_LEN];
419
420	return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
421	    sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
422	    sizeof (bootfs)) != 0);
423}
424
425
426/*
427 * Given an nvlist of zpool properties to be set, validate that they are
428 * correct, and parse any numeric properties (index, boolean, etc) if they are
429 * specified as strings.
430 */
431static nvlist_t *
432zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
433    nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
434{
435	nvpair_t *elem;
436	nvlist_t *retprops;
437	zpool_prop_t prop;
438	char *strval;
439	uint64_t intval;
440	char *slash, *check;
441	struct stat64 statbuf;
442	zpool_handle_t *zhp;
443
444	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
445		(void) no_memory(hdl);
446		return (NULL);
447	}
448
449	elem = NULL;
450	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
451		const char *propname = nvpair_name(elem);
452
453		prop = zpool_name_to_prop(propname);
454		if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) {
455			int err;
456			char *fname = strchr(propname, '@') + 1;
457
458			err = zfeature_lookup_name(fname, NULL);
459			if (err != 0) {
460				ASSERT3U(err, ==, ENOENT);
461				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
462				    "invalid feature '%s', '%s'"), fname,
463				    propname);
464				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
465				goto error;
466			}
467
468			if (nvpair_type(elem) != DATA_TYPE_STRING) {
469				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
470				    "'%s' must be a string"), propname);
471				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
472				goto error;
473			}
474
475			(void) nvpair_value_string(elem, &strval);
476			if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0 &&
477			    strcmp(strval, ZFS_FEATURE_DISABLED) != 0) {
478				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
479				    "property '%s' can only be set to "
480				    "'enabled' or 'disabled'"), propname);
481				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
482				goto error;
483			}
484
485			if (nvlist_add_uint64(retprops, propname, 0) != 0) {
486				(void) no_memory(hdl);
487				goto error;
488			}
489			continue;
490		}
491
492		/*
493		 * Make sure this property is valid and applies to this type.
494		 */
495		if (prop == ZPOOL_PROP_INVAL) {
496			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
497			    "invalid property '%s'"), propname);
498			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
499			goto error;
500		}
501
502		if (zpool_prop_readonly(prop)) {
503			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
504			    "is readonly"), propname);
505			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
506			goto error;
507		}
508
509		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
510		    &strval, &intval, errbuf) != 0)
511			goto error;
512
513		/*
514		 * Perform additional checking for specific properties.
515		 */
516		switch (prop) {
517		case ZPOOL_PROP_VERSION:
518			if (intval < version ||
519			    !SPA_VERSION_IS_SUPPORTED(intval)) {
520				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
521				    "property '%s' number %d is invalid."),
522				    propname, intval);
523				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
524				goto error;
525			}
526			break;
527
528		case ZPOOL_PROP_BOOTSIZE:
529			if (!flags.create) {
530				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
531				    "property '%s' can only be set during pool "
532				    "creation"), propname);
533				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
534				goto error;
535			}
536			break;
537
538		case ZPOOL_PROP_ASHIFT:
539			if (intval != 0 &&
540			    (intval < ASHIFT_MIN || intval > ASHIFT_MAX)) {
541				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
542				    "invalid '%s=%d' property: only values "
543				    "between %" PRId32 " and %" PRId32 " "
544				    "are allowed.\n"),
545				    propname, intval, ASHIFT_MIN, ASHIFT_MAX);
546				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
547				goto error;
548			}
549			break;
550
551		case ZPOOL_PROP_BOOTFS:
552			if (flags.create || flags.import) {
553				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
554				    "property '%s' cannot be set at creation "
555				    "or import time"), propname);
556				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
557				goto error;
558			}
559
560			if (version < SPA_VERSION_BOOTFS) {
561				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
562				    "pool must be upgraded to support "
563				    "'%s' property"), propname);
564				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
565				goto error;
566			}
567
568			/*
569			 * bootfs property value has to be a dataset name and
570			 * the dataset has to be in the same pool as it sets to.
571			 */
572			if (!bootfs_name_valid(poolname, strval)) {
573				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
574				    "is an invalid name"), strval);
575				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
576				goto error;
577			}
578
579			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
580				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
581				    "could not open pool '%s'"), poolname);
582				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
583				goto error;
584			}
585			zpool_close(zhp);
586			break;
587
588		case ZPOOL_PROP_ALTROOT:
589			if (!flags.create && !flags.import) {
590				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
591				    "property '%s' can only be set during pool "
592				    "creation or import"), propname);
593				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
594				goto error;
595			}
596
597			if (strval[0] != '/') {
598				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
599				    "bad alternate root '%s'"), strval);
600				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
601				goto error;
602			}
603			break;
604
605		case ZPOOL_PROP_CACHEFILE:
606			if (strval[0] == '\0')
607				break;
608
609			if (strcmp(strval, "none") == 0)
610				break;
611
612			if (strval[0] != '/') {
613				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
614				    "property '%s' must be empty, an "
615				    "absolute path, or 'none'"), propname);
616				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
617				goto error;
618			}
619
620			slash = strrchr(strval, '/');
621
622			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
623			    strcmp(slash, "/..") == 0) {
624				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
625				    "'%s' is not a valid file"), strval);
626				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
627				goto error;
628			}
629
630			*slash = '\0';
631
632			if (strval[0] != '\0' &&
633			    (stat64(strval, &statbuf) != 0 ||
634			    !S_ISDIR(statbuf.st_mode))) {
635				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
636				    "'%s' is not a valid directory"),
637				    strval);
638				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
639				goto error;
640			}
641
642			*slash = '/';
643			break;
644
645		case ZPOOL_PROP_COMMENT:
646			for (check = strval; *check != '\0'; check++) {
647				if (!isprint(*check)) {
648					zfs_error_aux(hdl,
649					    dgettext(TEXT_DOMAIN,
650					    "comment may only have printable "
651					    "characters"));
652					(void) zfs_error(hdl, EZFS_BADPROP,
653					    errbuf);
654					goto error;
655				}
656			}
657			if (strlen(strval) > ZPROP_MAX_COMMENT) {
658				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
659				    "comment must not exceed %d characters"),
660				    ZPROP_MAX_COMMENT);
661				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
662				goto error;
663			}
664			break;
665
666		case ZPOOL_PROP_READONLY:
667			if (!flags.import) {
668				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
669				    "property '%s' can only be set at "
670				    "import time"), propname);
671				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
672				goto error;
673			}
674			break;
675
676		case ZPOOL_PROP_TNAME:
677			if (!flags.create) {
678				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
679				    "property '%s' can only be set at "
680				    "creation time"), propname);
681				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
682				goto error;
683			}
684			break;
685
686		case ZPOOL_PROP_MULTIHOST:
687			if (get_system_hostid() == 0) {
688				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
689				    "requires a non-zero system hostid"));
690				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
691				goto error;
692			}
693			break;
694
695		default:
696			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
697			    "property '%s'(%d) not defined"), propname, prop);
698			break;
699		}
700	}
701
702	return (retprops);
703error:
704	nvlist_free(retprops);
705	return (NULL);
706}
707
708/*
709 * Set zpool property : propname=propval.
710 */
711int
712zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
713{
714	zfs_cmd_t zc = { 0 };
715	int ret = -1;
716	char errbuf[1024];
717	nvlist_t *nvl = NULL;
718	nvlist_t *realprops;
719	uint64_t version;
720	prop_flags_t flags = { 0 };
721
722	(void) snprintf(errbuf, sizeof (errbuf),
723	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
724	    zhp->zpool_name);
725
726	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
727		return (no_memory(zhp->zpool_hdl));
728
729	if (nvlist_add_string(nvl, propname, propval) != 0) {
730		nvlist_free(nvl);
731		return (no_memory(zhp->zpool_hdl));
732	}
733
734	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
735	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
736	    zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
737		nvlist_free(nvl);
738		return (-1);
739	}
740
741	nvlist_free(nvl);
742	nvl = realprops;
743
744	/*
745	 * Execute the corresponding ioctl() to set this property.
746	 */
747	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
748
749	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
750		nvlist_free(nvl);
751		return (-1);
752	}
753
754	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
755
756	zcmd_free_nvlists(&zc);
757	nvlist_free(nvl);
758
759	if (ret)
760		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
761	else
762		(void) zpool_props_refresh(zhp);
763
764	return (ret);
765}
766
767int
768zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
769{
770	libzfs_handle_t *hdl = zhp->zpool_hdl;
771	zprop_list_t *entry;
772	char buf[ZFS_MAXPROPLEN];
773	nvlist_t *features = NULL;
774	zprop_list_t **last;
775	boolean_t firstexpand = (NULL == *plp);
776
777	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
778		return (-1);
779
780	last = plp;
781	while (*last != NULL)
782		last = &(*last)->pl_next;
783
784	if ((*plp)->pl_all)
785		features = zpool_get_features(zhp);
786
787	if ((*plp)->pl_all && firstexpand) {
788		for (int i = 0; i < SPA_FEATURES; i++) {
789			zprop_list_t *entry = zfs_alloc(hdl,
790			    sizeof (zprop_list_t));
791			entry->pl_prop = ZPROP_INVAL;
792			entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
793			    spa_feature_table[i].fi_uname);
794			entry->pl_width = strlen(entry->pl_user_prop);
795			entry->pl_all = B_TRUE;
796
797			*last = entry;
798			last = &entry->pl_next;
799		}
800	}
801
802	/* add any unsupported features */
803	for (nvpair_t *nvp = nvlist_next_nvpair(features, NULL);
804	    nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
805		char *propname;
806		boolean_t found;
807		zprop_list_t *entry;
808
809		if (zfeature_is_supported(nvpair_name(nvp)))
810			continue;
811
812		propname = zfs_asprintf(hdl, "unsupported@%s",
813		    nvpair_name(nvp));
814
815		/*
816		 * Before adding the property to the list make sure that no
817		 * other pool already added the same property.
818		 */
819		found = B_FALSE;
820		entry = *plp;
821		while (entry != NULL) {
822			if (entry->pl_user_prop != NULL &&
823			    strcmp(propname, entry->pl_user_prop) == 0) {
824				found = B_TRUE;
825				break;
826			}
827			entry = entry->pl_next;
828		}
829		if (found) {
830			free(propname);
831			continue;
832		}
833
834		entry = zfs_alloc(hdl, sizeof (zprop_list_t));
835		entry->pl_prop = ZPROP_INVAL;
836		entry->pl_user_prop = propname;
837		entry->pl_width = strlen(entry->pl_user_prop);
838		entry->pl_all = B_TRUE;
839
840		*last = entry;
841		last = &entry->pl_next;
842	}
843
844	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
845
846		if (entry->pl_fixed)
847			continue;
848
849		if (entry->pl_prop != ZPROP_INVAL &&
850		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
851		    NULL, B_FALSE) == 0) {
852			if (strlen(buf) > entry->pl_width)
853				entry->pl_width = strlen(buf);
854		}
855	}
856
857	return (0);
858}
859
860/*
861 * Get the state for the given feature on the given ZFS pool.
862 */
863int
864zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
865    size_t len)
866{
867	uint64_t refcount;
868	boolean_t found = B_FALSE;
869	nvlist_t *features = zpool_get_features(zhp);
870	boolean_t supported;
871	const char *feature = strchr(propname, '@') + 1;
872
873	supported = zpool_prop_feature(propname);
874	ASSERT(supported || zpool_prop_unsupported(propname));
875
876	/*
877	 * Convert from feature name to feature guid. This conversion is
878	 * unecessary for unsupported@... properties because they already
879	 * use guids.
880	 */
881	if (supported) {
882		int ret;
883		spa_feature_t fid;
884
885		ret = zfeature_lookup_name(feature, &fid);
886		if (ret != 0) {
887			(void) strlcpy(buf, "-", len);
888			return (ENOTSUP);
889		}
890		feature = spa_feature_table[fid].fi_guid;
891	}
892
893	if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
894		found = B_TRUE;
895
896	if (supported) {
897		if (!found) {
898			(void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
899		} else  {
900			if (refcount == 0)
901				(void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
902			else
903				(void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
904		}
905	} else {
906		if (found) {
907			if (refcount == 0) {
908				(void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
909			} else {
910				(void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
911			}
912		} else {
913			(void) strlcpy(buf, "-", len);
914			return (ENOTSUP);
915		}
916	}
917
918	return (0);
919}
920
921/*
922 * Don't start the slice at the default block of 34; many storage
923 * devices will use a stripe width of 128k, so start there instead.
924 */
925#define	NEW_START_BLOCK	256
926
927/*
928 * Validate the given pool name, optionally putting an extended error message in
929 * 'buf'.
930 */
931boolean_t
932zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
933{
934	namecheck_err_t why;
935	char what;
936	int ret;
937
938	ret = pool_namecheck(pool, &why, &what);
939
940	/*
941	 * The rules for reserved pool names were extended at a later point.
942	 * But we need to support users with existing pools that may now be
943	 * invalid.  So we only check for this expanded set of names during a
944	 * create (or import), and only in userland.
945	 */
946	if (ret == 0 && !isopen &&
947	    (strncmp(pool, "mirror", 6) == 0 ||
948	    strncmp(pool, "raidz", 5) == 0 ||
949	    strncmp(pool, "spare", 5) == 0 ||
950	    strcmp(pool, "log") == 0)) {
951		if (hdl != NULL)
952			zfs_error_aux(hdl,
953			    dgettext(TEXT_DOMAIN, "name is reserved"));
954		return (B_FALSE);
955	}
956
957
958	if (ret != 0) {
959		if (hdl != NULL) {
960			switch (why) {
961			case NAME_ERR_TOOLONG:
962				zfs_error_aux(hdl,
963				    dgettext(TEXT_DOMAIN, "name is too long"));
964				break;
965
966			case NAME_ERR_INVALCHAR:
967				zfs_error_aux(hdl,
968				    dgettext(TEXT_DOMAIN, "invalid character "
969				    "'%c' in pool name"), what);
970				break;
971
972			case NAME_ERR_NOLETTER:
973				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
974				    "name must begin with a letter"));
975				break;
976
977			case NAME_ERR_RESERVED:
978				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
979				    "name is reserved"));
980				break;
981
982			case NAME_ERR_DISKLIKE:
983				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
984				    "pool name is reserved"));
985				break;
986
987			case NAME_ERR_LEADING_SLASH:
988				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
989				    "leading slash in name"));
990				break;
991
992			case NAME_ERR_EMPTY_COMPONENT:
993				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
994				    "empty component in name"));
995				break;
996
997			case NAME_ERR_TRAILING_SLASH:
998				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
999				    "trailing slash in name"));
1000				break;
1001
1002			case NAME_ERR_MULTIPLE_DELIMITERS:
1003				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1004				    "multiple '@' and/or '#' delimiters in "
1005				    "name"));
1006				break;
1007
1008			default:
1009				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1010				    "(%d) not defined"), why);
1011				break;
1012			}
1013		}
1014		return (B_FALSE);
1015	}
1016
1017	return (B_TRUE);
1018}
1019
1020/*
1021 * Open a handle to the given pool, even if the pool is currently in the FAULTED
1022 * state.
1023 */
1024zpool_handle_t *
1025zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1026{
1027	zpool_handle_t *zhp;
1028	boolean_t missing;
1029
1030	/*
1031	 * Make sure the pool name is valid.
1032	 */
1033	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1034		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1035		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1036		    pool);
1037		return (NULL);
1038	}
1039
1040	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1041		return (NULL);
1042
1043	zhp->zpool_hdl = hdl;
1044	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1045
1046	if (zpool_refresh_stats(zhp, &missing) != 0) {
1047		zpool_close(zhp);
1048		return (NULL);
1049	}
1050
1051	if (missing) {
1052		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1053		(void) zfs_error_fmt(hdl, EZFS_NOENT,
1054		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1055		zpool_close(zhp);
1056		return (NULL);
1057	}
1058
1059	return (zhp);
1060}
1061
1062/*
1063 * Like the above, but silent on error.  Used when iterating over pools (because
1064 * the configuration cache may be out of date).
1065 */
1066int
1067zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1068{
1069	zpool_handle_t *zhp;
1070	boolean_t missing;
1071
1072	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1073		return (-1);
1074
1075	zhp->zpool_hdl = hdl;
1076	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1077
1078	if (zpool_refresh_stats(zhp, &missing) != 0) {
1079		zpool_close(zhp);
1080		return (-1);
1081	}
1082
1083	if (missing) {
1084		zpool_close(zhp);
1085		*ret = NULL;
1086		return (0);
1087	}
1088
1089	*ret = zhp;
1090	return (0);
1091}
1092
1093/*
1094 * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1095 * state.
1096 */
1097zpool_handle_t *
1098zpool_open(libzfs_handle_t *hdl, const char *pool)
1099{
1100	zpool_handle_t *zhp;
1101
1102	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1103		return (NULL);
1104
1105	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1106		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1107		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1108		zpool_close(zhp);
1109		return (NULL);
1110	}
1111
1112	return (zhp);
1113}
1114
1115/*
1116 * Close the handle.  Simply frees the memory associated with the handle.
1117 */
1118void
1119zpool_close(zpool_handle_t *zhp)
1120{
1121	nvlist_free(zhp->zpool_config);
1122	nvlist_free(zhp->zpool_old_config);
1123	nvlist_free(zhp->zpool_props);
1124	free(zhp);
1125}
1126
1127/*
1128 * Return the name of the pool.
1129 */
1130const char *
1131zpool_get_name(zpool_handle_t *zhp)
1132{
1133	return (zhp->zpool_name);
1134}
1135
1136
1137/*
1138 * Return the state of the pool (ACTIVE or UNAVAILABLE)
1139 */
1140int
1141zpool_get_state(zpool_handle_t *zhp)
1142{
1143	return (zhp->zpool_state);
1144}
1145
1146/*
1147 * Check if vdev list contains a special vdev
1148 */
1149static boolean_t
1150zpool_has_special_vdev(nvlist_t *nvroot)
1151{
1152	nvlist_t **child;
1153	uint_t children;
1154
1155	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child,
1156	    &children) == 0) {
1157		for (uint_t c = 0; c < children; c++) {
1158			char *bias;
1159
1160			if (nvlist_lookup_string(child[c],
1161			    ZPOOL_CONFIG_ALLOCATION_BIAS, &bias) == 0 &&
1162			    strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0) {
1163				return (B_TRUE);
1164			}
1165		}
1166	}
1167	return (B_FALSE);
1168}
1169
1170/*
1171 * Create the named pool, using the provided vdev list.  It is assumed
1172 * that the consumer has already validated the contents of the nvlist, so we
1173 * don't have to worry about error semantics.
1174 */
1175int
1176zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
1177    nvlist_t *props, nvlist_t *fsprops)
1178{
1179	zfs_cmd_t zc = { 0 };
1180	nvlist_t *zc_fsprops = NULL;
1181	nvlist_t *zc_props = NULL;
1182	nvlist_t *hidden_args = NULL;
1183	uint8_t *wkeydata = NULL;
1184	uint_t wkeylen = 0;
1185	char msg[1024];
1186	int ret = -1;
1187
1188	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1189	    "cannot create '%s'"), pool);
1190
1191	if (!zpool_name_valid(hdl, B_FALSE, pool))
1192		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1193
1194	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1195		return (-1);
1196
1197	if (props) {
1198		prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1199
1200		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
1201		    SPA_VERSION_1, flags, msg)) == NULL) {
1202			goto create_failed;
1203		}
1204	}
1205
1206	if (fsprops) {
1207		uint64_t zoned;
1208		char *zonestr;
1209
1210		zoned = ((nvlist_lookup_string(fsprops,
1211		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1212		    strcmp(zonestr, "on") == 0);
1213
1214		if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
1215		    fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
1216			goto create_failed;
1217		}
1218
1219		if (nvlist_exists(zc_fsprops,
1220		    zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS)) &&
1221		    !zpool_has_special_vdev(nvroot)) {
1222			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1223			    "%s property requires a special vdev"),
1224			    zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS));
1225			(void) zfs_error(hdl, EZFS_BADPROP, msg);
1226			goto create_failed;
1227		}
1228
1229		if (!zc_props &&
1230		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1231			goto create_failed;
1232		}
1233		if (zfs_crypto_create(hdl, NULL, zc_fsprops, props, B_TRUE,
1234		    &wkeydata, &wkeylen) != 0) {
1235			(void) zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
1236			goto create_failed;
1237		}
1238		if (nvlist_add_nvlist(zc_props,
1239		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1240			goto create_failed;
1241		}
1242		if (wkeydata != NULL) {
1243			if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0)
1244				goto create_failed;
1245
1246			if (nvlist_add_uint8_array(hidden_args, "wkeydata",
1247			    wkeydata, wkeylen) != 0)
1248				goto create_failed;
1249
1250			if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS,
1251			    hidden_args) != 0)
1252				goto create_failed;
1253		}
1254	}
1255
1256	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1257		goto create_failed;
1258
1259	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1260
1261	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
1262
1263		zcmd_free_nvlists(&zc);
1264		nvlist_free(zc_props);
1265		nvlist_free(zc_fsprops);
1266		nvlist_free(hidden_args);
1267		if (wkeydata != NULL)
1268			free(wkeydata);
1269
1270		switch (errno) {
1271		case EBUSY:
1272			/*
1273			 * This can happen if the user has specified the same
1274			 * device multiple times.  We can't reliably detect this
1275			 * until we try to add it and see we already have a
1276			 * label.
1277			 */
1278			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1279			    "one or more vdevs refer to the same device"));
1280			return (zfs_error(hdl, EZFS_BADDEV, msg));
1281
1282		case ERANGE:
1283			/*
1284			 * This happens if the record size is smaller or larger
1285			 * than the allowed size range, or not a power of 2.
1286			 *
1287			 * NOTE: although zfs_valid_proplist is called earlier,
1288			 * this case may have slipped through since the
1289			 * pool does not exist yet and it is therefore
1290			 * impossible to read properties e.g. max blocksize
1291			 * from the pool.
1292			 */
1293			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1294			    "record size invalid"));
1295			return (zfs_error(hdl, EZFS_BADPROP, msg));
1296
1297		case EOVERFLOW:
1298			/*
1299			 * This occurs when one of the devices is below
1300			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1301			 * device was the problem device since there's no
1302			 * reliable way to determine device size from userland.
1303			 */
1304			{
1305				char buf[64];
1306
1307				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1308
1309				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1310				    "one or more devices is less than the "
1311				    "minimum size (%s)"), buf);
1312			}
1313			return (zfs_error(hdl, EZFS_BADDEV, msg));
1314
1315		case ENOSPC:
1316			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1317			    "one or more devices is out of space"));
1318			return (zfs_error(hdl, EZFS_BADDEV, msg));
1319
1320		default:
1321			return (zpool_standard_error(hdl, errno, msg));
1322		}
1323	}
1324
1325create_failed:
1326	zcmd_free_nvlists(&zc);
1327	nvlist_free(zc_props);
1328	nvlist_free(zc_fsprops);
1329	nvlist_free(hidden_args);
1330	if (wkeydata != NULL)
1331		free(wkeydata);
1332	return (ret);
1333}
1334
1335/*
1336 * Destroy the given pool.  It is up to the caller to ensure that there are no
1337 * datasets left in the pool.
1338 */
1339int
1340zpool_destroy(zpool_handle_t *zhp, const char *log_str)
1341{
1342	zfs_cmd_t zc = { 0 };
1343	zfs_handle_t *zfp = NULL;
1344	libzfs_handle_t *hdl = zhp->zpool_hdl;
1345	char msg[1024];
1346
1347	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1348	    (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1349		return (-1);
1350
1351	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1352	zc.zc_history = (uint64_t)(uintptr_t)log_str;
1353
1354	if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1355		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1356		    "cannot destroy '%s'"), zhp->zpool_name);
1357
1358		if (errno == EROFS) {
1359			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1360			    "one or more devices is read only"));
1361			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1362		} else {
1363			(void) zpool_standard_error(hdl, errno, msg);
1364		}
1365
1366		if (zfp)
1367			zfs_close(zfp);
1368		return (-1);
1369	}
1370
1371	if (zfp) {
1372		remove_mountpoint(zfp);
1373		zfs_close(zfp);
1374	}
1375
1376	return (0);
1377}
1378
1379/*
1380 * Create a checkpoint in the given pool.
1381 */
1382int
1383zpool_checkpoint(zpool_handle_t *zhp)
1384{
1385	libzfs_handle_t *hdl = zhp->zpool_hdl;
1386	char msg[1024];
1387	int error;
1388
1389	error = lzc_pool_checkpoint(zhp->zpool_name);
1390	if (error != 0) {
1391		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1392		    "cannot checkpoint '%s'"), zhp->zpool_name);
1393		(void) zpool_standard_error(hdl, error, msg);
1394		return (-1);
1395	}
1396
1397	return (0);
1398}
1399
1400/*
1401 * Discard the checkpoint from the given pool.
1402 */
1403int
1404zpool_discard_checkpoint(zpool_handle_t *zhp)
1405{
1406	libzfs_handle_t *hdl = zhp->zpool_hdl;
1407	char msg[1024];
1408	int error;
1409
1410	error = lzc_pool_checkpoint_discard(zhp->zpool_name);
1411	if (error != 0) {
1412		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1413		    "cannot discard checkpoint in '%s'"), zhp->zpool_name);
1414		(void) zpool_standard_error(hdl, error, msg);
1415		return (-1);
1416	}
1417
1418	return (0);
1419}
1420
1421/*
1422 * Add the given vdevs to the pool.  The caller must have already performed the
1423 * necessary verification to ensure that the vdev specification is well-formed.
1424 */
1425int
1426zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1427{
1428	zfs_cmd_t zc = { 0 };
1429	int ret;
1430	libzfs_handle_t *hdl = zhp->zpool_hdl;
1431	char msg[1024];
1432	nvlist_t **spares, **l2cache;
1433	uint_t nspares, nl2cache;
1434
1435	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1436	    "cannot add to '%s'"), zhp->zpool_name);
1437
1438	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1439	    SPA_VERSION_SPARES &&
1440	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1441	    &spares, &nspares) == 0) {
1442		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1443		    "upgraded to add hot spares"));
1444		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1445	}
1446
1447	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1448	    SPA_VERSION_L2CACHE &&
1449	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1450	    &l2cache, &nl2cache) == 0) {
1451		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1452		    "upgraded to add cache devices"));
1453		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1454	}
1455
1456	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1457		return (-1);
1458	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1459
1460	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1461		switch (errno) {
1462		case EBUSY:
1463			/*
1464			 * This can happen if the user has specified the same
1465			 * device multiple times.  We can't reliably detect this
1466			 * until we try to add it and see we already have a
1467			 * label.
1468			 */
1469			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1470			    "one or more vdevs refer to the same device"));
1471			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1472			break;
1473
1474		case EINVAL:
1475			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1476			    "invalid config; a pool with removing/removed "
1477			    "vdevs does not support adding raidz vdevs"));
1478			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1479			break;
1480
1481		case EOVERFLOW:
1482			/*
1483			 * This occurrs when one of the devices is below
1484			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1485			 * device was the problem device since there's no
1486			 * reliable way to determine device size from userland.
1487			 */
1488			{
1489				char buf[64];
1490
1491				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1492
1493				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1494				    "device is less than the minimum "
1495				    "size (%s)"), buf);
1496			}
1497			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1498			break;
1499
1500		case ENOTSUP:
1501			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1502			    "pool must be upgraded to add these vdevs"));
1503			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
1504			break;
1505
1506		case EDOM:
1507			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1508			    "root pool can not have multiple vdevs"
1509			    " or separate logs"));
1510			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1511			break;
1512
1513		default:
1514			(void) zpool_standard_error(hdl, errno, msg);
1515		}
1516
1517		ret = -1;
1518	} else {
1519		ret = 0;
1520	}
1521
1522	zcmd_free_nvlists(&zc);
1523
1524	return (ret);
1525}
1526
1527/*
1528 * Exports the pool from the system.  The caller must ensure that there are no
1529 * mounted datasets in the pool.
1530 */
1531static int
1532zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1533    const char *log_str)
1534{
1535	zfs_cmd_t zc = { 0 };
1536	char msg[1024];
1537
1538	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1539	    "cannot export '%s'"), zhp->zpool_name);
1540
1541	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1542	zc.zc_cookie = force;
1543	zc.zc_guid = hardforce;
1544	zc.zc_history = (uint64_t)(uintptr_t)log_str;
1545
1546	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1547		switch (errno) {
1548		case EXDEV:
1549			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1550			    "use '-f' to override the following errors:\n"
1551			    "'%s' has an active shared spare which could be"
1552			    " used by other pools once '%s' is exported."),
1553			    zhp->zpool_name, zhp->zpool_name);
1554			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1555			    msg));
1556		default:
1557			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1558			    msg));
1559		}
1560	}
1561
1562	return (0);
1563}
1564
1565int
1566zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
1567{
1568	return (zpool_export_common(zhp, force, B_FALSE, log_str));
1569}
1570
1571int
1572zpool_export_force(zpool_handle_t *zhp, const char *log_str)
1573{
1574	return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
1575}
1576
1577static void
1578zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1579    nvlist_t *config)
1580{
1581	nvlist_t *nv = NULL;
1582	uint64_t rewindto;
1583	int64_t loss = -1;
1584	struct tm t;
1585	char timestr[128];
1586
1587	if (!hdl->libzfs_printerr || config == NULL)
1588		return;
1589
1590	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1591	    nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
1592		return;
1593	}
1594
1595	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1596		return;
1597	(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1598
1599	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1600	    strftime(timestr, 128, 0, &t) != 0) {
1601		if (dryrun) {
1602			(void) printf(dgettext(TEXT_DOMAIN,
1603			    "Would be able to return %s "
1604			    "to its state as of %s.\n"),
1605			    name, timestr);
1606		} else {
1607			(void) printf(dgettext(TEXT_DOMAIN,
1608			    "Pool %s returned to its state as of %s.\n"),
1609			    name, timestr);
1610		}
1611		if (loss > 120) {
1612			(void) printf(dgettext(TEXT_DOMAIN,
1613			    "%s approximately %lld "),
1614			    dryrun ? "Would discard" : "Discarded",
1615			    (loss + 30) / 60);
1616			(void) printf(dgettext(TEXT_DOMAIN,
1617			    "minutes of transactions.\n"));
1618		} else if (loss > 0) {
1619			(void) printf(dgettext(TEXT_DOMAIN,
1620			    "%s approximately %lld "),
1621			    dryrun ? "Would discard" : "Discarded", loss);
1622			(void) printf(dgettext(TEXT_DOMAIN,
1623			    "seconds of transactions.\n"));
1624		}
1625	}
1626}
1627
1628void
1629zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1630    nvlist_t *config)
1631{
1632	nvlist_t *nv = NULL;
1633	int64_t loss = -1;
1634	uint64_t edata = UINT64_MAX;
1635	uint64_t rewindto;
1636	struct tm t;
1637	char timestr[128];
1638
1639	if (!hdl->libzfs_printerr)
1640		return;
1641
1642	if (reason >= 0)
1643		(void) printf(dgettext(TEXT_DOMAIN, "action: "));
1644	else
1645		(void) printf(dgettext(TEXT_DOMAIN, "\t"));
1646
1647	/* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1648	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1649	    nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
1650	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1651		goto no_info;
1652
1653	(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1654	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1655	    &edata);
1656
1657	(void) printf(dgettext(TEXT_DOMAIN,
1658	    "Recovery is possible, but will result in some data loss.\n"));
1659
1660	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1661	    strftime(timestr, 128, 0, &t) != 0) {
1662		(void) printf(dgettext(TEXT_DOMAIN,
1663		    "\tReturning the pool to its state as of %s\n"
1664		    "\tshould correct the problem.  "),
1665		    timestr);
1666	} else {
1667		(void) printf(dgettext(TEXT_DOMAIN,
1668		    "\tReverting the pool to an earlier state "
1669		    "should correct the problem.\n\t"));
1670	}
1671
1672	if (loss > 120) {
1673		(void) printf(dgettext(TEXT_DOMAIN,
1674		    "Approximately %lld minutes of data\n"
1675		    "\tmust be discarded, irreversibly.  "), (loss + 30) / 60);
1676	} else if (loss > 0) {
1677		(void) printf(dgettext(TEXT_DOMAIN,
1678		    "Approximately %lld seconds of data\n"
1679		    "\tmust be discarded, irreversibly.  "), loss);
1680	}
1681	if (edata != 0 && edata != UINT64_MAX) {
1682		if (edata == 1) {
1683			(void) printf(dgettext(TEXT_DOMAIN,
1684			    "After rewind, at least\n"
1685			    "\tone persistent user-data error will remain.  "));
1686		} else {
1687			(void) printf(dgettext(TEXT_DOMAIN,
1688			    "After rewind, several\n"
1689			    "\tpersistent user-data errors will remain.  "));
1690		}
1691	}
1692	(void) printf(dgettext(TEXT_DOMAIN,
1693	    "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1694	    reason >= 0 ? "clear" : "import", name);
1695
1696	(void) printf(dgettext(TEXT_DOMAIN,
1697	    "A scrub of the pool\n"
1698	    "\tis strongly recommended after recovery.\n"));
1699	return;
1700
1701no_info:
1702	(void) printf(dgettext(TEXT_DOMAIN,
1703	    "Destroy and re-create the pool from\n\ta backup source.\n"));
1704}
1705
1706/*
1707 * zpool_import() is a contracted interface. Should be kept the same
1708 * if possible.
1709 *
1710 * Applications should use zpool_import_props() to import a pool with
1711 * new properties value to be set.
1712 */
1713int
1714zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1715    char *altroot)
1716{
1717	nvlist_t *props = NULL;
1718	int ret;
1719
1720	if (altroot != NULL) {
1721		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1722			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1723			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1724			    newname));
1725		}
1726
1727		if (nvlist_add_string(props,
1728		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1729		    nvlist_add_string(props,
1730		    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1731			nvlist_free(props);
1732			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1733			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1734			    newname));
1735		}
1736	}
1737
1738	ret = zpool_import_props(hdl, config, newname, props,
1739	    ZFS_IMPORT_NORMAL);
1740	nvlist_free(props);
1741	return (ret);
1742}
1743
1744static void
1745print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1746    int indent)
1747{
1748	nvlist_t **child;
1749	uint_t c, children;
1750	char *vname;
1751	uint64_t is_log = 0;
1752
1753	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1754	    &is_log);
1755
1756	if (name != NULL)
1757		(void) printf("\t%*s%s%s\n", indent, "", name,
1758		    is_log ? " [log]" : "");
1759
1760	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1761	    &child, &children) != 0)
1762		return;
1763
1764	for (c = 0; c < children; c++) {
1765		vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID);
1766		print_vdev_tree(hdl, vname, child[c], indent + 2);
1767		free(vname);
1768	}
1769}
1770
1771void
1772zpool_print_unsup_feat(nvlist_t *config)
1773{
1774	nvlist_t *nvinfo, *unsup_feat;
1775
1776	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1777	    0);
1778	verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1779	    &unsup_feat) == 0);
1780
1781	for (nvpair_t *nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1782	    nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1783		char *desc;
1784
1785		verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1786		verify(nvpair_value_string(nvp, &desc) == 0);
1787
1788		if (strlen(desc) > 0)
1789			(void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1790		else
1791			(void) printf("\t%s\n", nvpair_name(nvp));
1792	}
1793}
1794
1795/*
1796 * Import the given pool using the known configuration and a list of
1797 * properties to be set. The configuration should have come from
1798 * zpool_find_import(). The 'newname' parameters control whether the pool
1799 * is imported with a different name.
1800 */
1801int
1802zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1803    nvlist_t *props, int flags)
1804{
1805	zfs_cmd_t zc = { 0 };
1806	zpool_load_policy_t policy;
1807	nvlist_t *nv = NULL;
1808	nvlist_t *nvinfo = NULL;
1809	nvlist_t *missing = NULL;
1810	char *thename;
1811	char *origname;
1812	int ret;
1813	int error = 0;
1814	char errbuf[1024];
1815
1816	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1817	    &origname) == 0);
1818
1819	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1820	    "cannot import pool '%s'"), origname);
1821
1822	if (newname != NULL) {
1823		if (!zpool_name_valid(hdl, B_FALSE, newname))
1824			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1825			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1826			    newname));
1827		thename = (char *)newname;
1828	} else {
1829		thename = origname;
1830	}
1831
1832	if (props != NULL) {
1833		uint64_t version;
1834		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1835
1836		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1837		    &version) == 0);
1838
1839		if ((props = zpool_valid_proplist(hdl, origname,
1840		    props, version, flags, errbuf)) == NULL)
1841			return (-1);
1842		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1843			nvlist_free(props);
1844			return (-1);
1845		}
1846		nvlist_free(props);
1847	}
1848
1849	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1850
1851	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1852	    &zc.zc_guid) == 0);
1853
1854	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1855		zcmd_free_nvlists(&zc);
1856		return (-1);
1857	}
1858	if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1859		zcmd_free_nvlists(&zc);
1860		return (-1);
1861	}
1862
1863	zc.zc_cookie = flags;
1864	while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1865	    errno == ENOMEM) {
1866		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1867			zcmd_free_nvlists(&zc);
1868			return (-1);
1869		}
1870	}
1871	if (ret != 0)
1872		error = errno;
1873
1874	(void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1875
1876	zcmd_free_nvlists(&zc);
1877
1878	zpool_get_load_policy(config, &policy);
1879
1880	if (error) {
1881		char desc[1024];
1882		char aux[256];
1883
1884		/*
1885		 * Dry-run failed, but we print out what success
1886		 * looks like if we found a best txg
1887		 */
1888		if (policy.zlp_rewind & ZPOOL_TRY_REWIND) {
1889			zpool_rewind_exclaim(hdl, newname ? origname : thename,
1890			    B_TRUE, nv);
1891			nvlist_free(nv);
1892			return (-1);
1893		}
1894
1895		if (newname == NULL)
1896			(void) snprintf(desc, sizeof (desc),
1897			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1898			    thename);
1899		else
1900			(void) snprintf(desc, sizeof (desc),
1901			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1902			    origname, thename);
1903
1904		switch (error) {
1905		case ENOTSUP:
1906			if (nv != NULL && nvlist_lookup_nvlist(nv,
1907			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1908			    nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1909				(void) printf(dgettext(TEXT_DOMAIN, "This "
1910				    "pool uses the following feature(s) not "
1911				    "supported by this system:\n"));
1912				zpool_print_unsup_feat(nv);
1913				if (nvlist_exists(nvinfo,
1914				    ZPOOL_CONFIG_CAN_RDONLY)) {
1915					(void) printf(dgettext(TEXT_DOMAIN,
1916					    "All unsupported features are only "
1917					    "required for writing to the pool."
1918					    "\nThe pool can be imported using "
1919					    "'-o readonly=on'.\n"));
1920				}
1921			}
1922			/*
1923			 * Unsupported version.
1924			 */
1925			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
1926			break;
1927
1928		case EREMOTEIO:
1929			if (nv != NULL && nvlist_lookup_nvlist(nv,
1930			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) {
1931				char *hostname = "<unknown>";
1932				uint64_t hostid = 0;
1933				mmp_state_t mmp_state;
1934
1935				mmp_state = fnvlist_lookup_uint64(nvinfo,
1936				    ZPOOL_CONFIG_MMP_STATE);
1937
1938				if (nvlist_exists(nvinfo,
1939				    ZPOOL_CONFIG_MMP_HOSTNAME))
1940					hostname = fnvlist_lookup_string(nvinfo,
1941					    ZPOOL_CONFIG_MMP_HOSTNAME);
1942
1943				if (nvlist_exists(nvinfo,
1944				    ZPOOL_CONFIG_MMP_HOSTID))
1945					hostid = fnvlist_lookup_uint64(nvinfo,
1946					    ZPOOL_CONFIG_MMP_HOSTID);
1947
1948				if (mmp_state == MMP_STATE_ACTIVE) {
1949					(void) snprintf(aux, sizeof (aux),
1950					    dgettext(TEXT_DOMAIN, "pool is imp"
1951					    "orted on host '%s' (hostid=%lx).\n"
1952					    "Export the pool on the other "
1953					    "system, then run 'zpool import'."),
1954					    hostname, (unsigned long) hostid);
1955				} else if (mmp_state == MMP_STATE_NO_HOSTID) {
1956					(void) snprintf(aux, sizeof (aux),
1957					    dgettext(TEXT_DOMAIN, "pool has "
1958					    "the multihost property on and "
1959					    "the\nsystem's hostid is not "
1960					    "set.\n"));
1961				}
1962
1963				(void) zfs_error_aux(hdl, aux);
1964			}
1965			(void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc);
1966			break;
1967
1968		case EINVAL:
1969			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1970			break;
1971
1972		case EROFS:
1973			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1974			    "one or more devices is read only"));
1975			(void) zfs_error(hdl, EZFS_BADDEV, desc);
1976			break;
1977
1978		case ENXIO:
1979			if (nv && nvlist_lookup_nvlist(nv,
1980			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1981			    nvlist_lookup_nvlist(nvinfo,
1982			    ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1983				(void) printf(dgettext(TEXT_DOMAIN,
1984				    "The devices below are missing or "
1985				    "corrupted, use '-m' to import the pool "
1986				    "anyway:\n"));
1987				print_vdev_tree(hdl, NULL, missing, 2);
1988				(void) printf("\n");
1989			}
1990			(void) zpool_standard_error(hdl, error, desc);
1991			break;
1992
1993		case EEXIST:
1994			(void) zpool_standard_error(hdl, error, desc);
1995			break;
1996		case ENAMETOOLONG:
1997			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1998			    "new name of at least one dataset is longer than "
1999			    "the maximum allowable length"));
2000			(void) zfs_error(hdl, EZFS_NAMETOOLONG, desc);
2001			break;
2002		default:
2003			(void) zpool_standard_error(hdl, error, desc);
2004			zpool_explain_recover(hdl,
2005			    newname ? origname : thename, -error, nv);
2006			break;
2007		}
2008
2009		nvlist_free(nv);
2010		ret = -1;
2011	} else {
2012		zpool_handle_t *zhp;
2013
2014		/*
2015		 * This should never fail, but play it safe anyway.
2016		 */
2017		if (zpool_open_silent(hdl, thename, &zhp) != 0)
2018			ret = -1;
2019		else if (zhp != NULL)
2020			zpool_close(zhp);
2021		if (policy.zlp_rewind &
2022		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
2023			zpool_rewind_exclaim(hdl, newname ? origname : thename,
2024			    ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), nv);
2025		}
2026		nvlist_free(nv);
2027		return (0);
2028	}
2029
2030	return (ret);
2031}
2032
2033/*
2034 * Translate vdev names to guids.  If a vdev_path is determined to be
2035 * unsuitable then a vd_errlist is allocated and the vdev path and errno
2036 * are added to it.
2037 */
2038static int
2039zpool_translate_vdev_guids(zpool_handle_t *zhp, nvlist_t *vds,
2040    nvlist_t *vdev_guids, nvlist_t *guids_to_paths, nvlist_t **vd_errlist)
2041{
2042	nvlist_t *errlist = NULL;
2043	int error = 0;
2044
2045	for (nvpair_t *elem = nvlist_next_nvpair(vds, NULL); elem != NULL;
2046	    elem = nvlist_next_nvpair(vds, elem)) {
2047		boolean_t spare, cache;
2048
2049		char *vd_path = nvpair_name(elem);
2050		nvlist_t *tgt = zpool_find_vdev(zhp, vd_path, &spare, &cache,
2051		    NULL);
2052
2053		if ((tgt == NULL) || cache || spare) {
2054			if (errlist == NULL) {
2055				errlist = fnvlist_alloc();
2056				error = EINVAL;
2057			}
2058
2059			uint64_t err = (tgt == NULL) ? EZFS_NODEVICE :
2060			    (spare ? EZFS_ISSPARE : EZFS_ISL2CACHE);
2061			fnvlist_add_int64(errlist, vd_path, err);
2062			continue;
2063		}
2064
2065		uint64_t guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
2066		fnvlist_add_uint64(vdev_guids, vd_path, guid);
2067
2068		char msg[MAXNAMELEN];
2069		(void) snprintf(msg, sizeof (msg), "%llu", (u_longlong_t)guid);
2070		fnvlist_add_string(guids_to_paths, msg, vd_path);
2071	}
2072
2073	if (error != 0) {
2074		verify(errlist != NULL);
2075		if (vd_errlist != NULL)
2076			*vd_errlist = errlist;
2077		else
2078			fnvlist_free(errlist);
2079	}
2080
2081	return (error);
2082}
2083
2084/*
2085 * Scan the pool.
2086 */
2087int
2088zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
2089{
2090	zfs_cmd_t zc = { 0 };
2091	char msg[1024];
2092	int err;
2093	libzfs_handle_t *hdl = zhp->zpool_hdl;
2094
2095	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2096	zc.zc_cookie = func;
2097	zc.zc_flags = cmd;
2098
2099	if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
2100		return (0);
2101
2102	err = errno;
2103
2104	/* ECANCELED on a scrub means we resumed a paused scrub */
2105	if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
2106	    cmd == POOL_SCRUB_NORMAL)
2107		return (0);
2108
2109	if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
2110		return (0);
2111
2112	if (func == POOL_SCAN_SCRUB) {
2113		if (cmd == POOL_SCRUB_PAUSE) {
2114			(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2115			    "cannot pause scrubbing %s"), zc.zc_name);
2116		} else {
2117			assert(cmd == POOL_SCRUB_NORMAL);
2118			(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2119			    "cannot scrub %s"), zc.zc_name);
2120		}
2121	} else if (func == POOL_SCAN_RESILVER) {
2122		assert(cmd == POOL_SCRUB_NORMAL);
2123		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2124		    "cannot restart resilver on %s"), zc.zc_name);
2125	} else if (func == POOL_SCAN_NONE) {
2126		(void) snprintf(msg, sizeof (msg),
2127		    dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
2128		    zc.zc_name);
2129	} else {
2130		assert(!"unexpected result");
2131	}
2132
2133	if (err == EBUSY) {
2134		nvlist_t *nvroot;
2135		pool_scan_stat_t *ps = NULL;
2136		uint_t psc;
2137
2138		verify(nvlist_lookup_nvlist(zhp->zpool_config,
2139		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2140		(void) nvlist_lookup_uint64_array(nvroot,
2141		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
2142		if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
2143			if (cmd == POOL_SCRUB_PAUSE)
2144				return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
2145			else
2146				return (zfs_error(hdl, EZFS_SCRUBBING, msg));
2147		} else {
2148			return (zfs_error(hdl, EZFS_RESILVERING, msg));
2149		}
2150	} else if (err == ENOENT) {
2151		return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
2152	} else if (err == ENOTSUP && func == POOL_SCAN_RESILVER) {
2153		return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, msg));
2154	} else {
2155		return (zpool_standard_error(hdl, err, msg));
2156	}
2157}
2158
2159static int
2160xlate_init_err(int err)
2161{
2162	switch (err) {
2163	case ENODEV:
2164		return (EZFS_NODEVICE);
2165	case EINVAL:
2166	case EROFS:
2167		return (EZFS_BADDEV);
2168	case EBUSY:
2169		return (EZFS_INITIALIZING);
2170	case ESRCH:
2171		return (EZFS_NO_INITIALIZE);
2172	}
2173	return (err);
2174}
2175
2176/*
2177 * Begin, suspend, or cancel the initialization (initializing of all free
2178 * blocks) for the given vdevs in the given pool.
2179 */
2180int
2181zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
2182    nvlist_t *vds)
2183{
2184	char msg[1024];
2185	int err;
2186
2187	nvlist_t *vdev_guids = fnvlist_alloc();
2188	nvlist_t *guids_to_paths = fnvlist_alloc();
2189	nvlist_t *vd_errlist = NULL;
2190	nvlist_t *errlist;
2191	nvpair_t *elem;
2192
2193	err = zpool_translate_vdev_guids(zhp, vds, vdev_guids,
2194	    guids_to_paths, &vd_errlist);
2195
2196	if (err == 0) {
2197		err = lzc_initialize(zhp->zpool_name, cmd_type,
2198		    vdev_guids, &errlist);
2199		if (err == 0) {
2200			fnvlist_free(vdev_guids);
2201			fnvlist_free(guids_to_paths);
2202			return (0);
2203		}
2204
2205		if (errlist != NULL) {
2206			vd_errlist = fnvlist_lookup_nvlist(errlist,
2207			    ZPOOL_INITIALIZE_VDEVS);
2208		}
2209
2210		(void) snprintf(msg, sizeof (msg),
2211		    dgettext(TEXT_DOMAIN, "operation failed"));
2212	} else {
2213		verify(vd_errlist != NULL);
2214	}
2215
2216	for (elem = nvlist_next_nvpair(vd_errlist, NULL); elem != NULL;
2217	    elem = nvlist_next_nvpair(vd_errlist, elem)) {
2218		int64_t vd_error = xlate_init_err(fnvpair_value_int64(elem));
2219		char *path;
2220
2221		if (nvlist_lookup_string(guids_to_paths, nvpair_name(elem),
2222		    &path) != 0)
2223			path = nvpair_name(elem);
2224
2225		(void) zfs_error_fmt(zhp->zpool_hdl, vd_error,
2226		    "cannot initialize '%s'", path);
2227	}
2228
2229	fnvlist_free(vdev_guids);
2230	fnvlist_free(guids_to_paths);
2231
2232	if (vd_errlist != NULL) {
2233		fnvlist_free(vd_errlist);
2234		return (-1);
2235	}
2236
2237	return (zpool_standard_error(zhp->zpool_hdl, err, msg));
2238}
2239
2240static int
2241xlate_trim_err(int err)
2242{
2243	switch (err) {
2244	case ENODEV:
2245		return (EZFS_NODEVICE);
2246	case EINVAL:
2247	case EROFS:
2248		return (EZFS_BADDEV);
2249	case EBUSY:
2250		return (EZFS_TRIMMING);
2251	case ESRCH:
2252		return (EZFS_NO_TRIM);
2253	case EOPNOTSUPP:
2254		return (EZFS_TRIM_NOTSUP);
2255	}
2256	return (err);
2257}
2258
2259/*
2260 * Begin, suspend, or cancel the TRIM (discarding of all free blocks) for
2261 * the given vdevs in the given pool.
2262 */
2263int
2264zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds,
2265    trimflags_t *trim_flags)
2266{
2267	char msg[1024];
2268	int err;
2269
2270	nvlist_t *vdev_guids = fnvlist_alloc();
2271	nvlist_t *guids_to_paths = fnvlist_alloc();
2272	nvlist_t *vd_errlist = NULL;
2273	nvlist_t *errlist;
2274	nvpair_t *elem;
2275
2276	err = zpool_translate_vdev_guids(zhp, vds, vdev_guids,
2277	    guids_to_paths, &vd_errlist);
2278	if (err == 0) {
2279		err = lzc_trim(zhp->zpool_name, cmd_type, trim_flags->rate,
2280		    trim_flags->secure, vdev_guids, &errlist);
2281		if (err == 0) {
2282			fnvlist_free(vdev_guids);
2283			fnvlist_free(guids_to_paths);
2284			return (0);
2285		}
2286
2287		if (errlist != NULL) {
2288			vd_errlist = fnvlist_lookup_nvlist(errlist,
2289			    ZPOOL_TRIM_VDEVS);
2290		}
2291
2292		(void) snprintf(msg, sizeof (msg),
2293		    dgettext(TEXT_DOMAIN, "operation failed"));
2294	} else {
2295		verify(vd_errlist != NULL);
2296	}
2297
2298	for (elem = nvlist_next_nvpair(vd_errlist, NULL);
2299	    elem != NULL; elem = nvlist_next_nvpair(vd_errlist, elem)) {
2300		int64_t vd_error = xlate_trim_err(fnvpair_value_int64(elem));
2301		char *path;
2302		/*
2303		 * If only the pool was specified, and it was not a secure
2304		 * trim then suppress warnings for individual vdevs which
2305		 * do not support trimming.
2306		 */
2307		if (vd_error == EZFS_TRIM_NOTSUP &&
2308		    trim_flags->fullpool &&
2309		    !trim_flags->secure) {
2310			continue;
2311		}
2312
2313		if (nvlist_lookup_string(guids_to_paths, nvpair_name(elem),
2314		    &path) != 0)
2315			path = nvpair_name(elem);
2316
2317		(void) zfs_error_fmt(zhp->zpool_hdl, vd_error,
2318		    "cannot trim '%s'", path);
2319	}
2320
2321	fnvlist_free(vdev_guids);
2322	fnvlist_free(guids_to_paths);
2323
2324	if (vd_errlist != NULL) {
2325		fnvlist_free(vd_errlist);
2326		return (-1);
2327	}
2328
2329	return (zpool_standard_error(zhp->zpool_hdl, err, msg));
2330}
2331
2332/*
2333 * This provides a very minimal check whether a given string is likely a
2334 * c#t#d# style string.  Users of this are expected to do their own
2335 * verification of the s# part.
2336 */
2337#define	CTD_CHECK(str)  (str && str[0] == 'c' && isdigit(str[1]))
2338
2339/*
2340 * More elaborate version for ones which may start with "/dev/dsk/"
2341 * and the like.
2342 */
2343static int
2344ctd_check_path(char *str)
2345{
2346	/*
2347	 * If it starts with a slash, check the last component.
2348	 */
2349	if (str && str[0] == '/') {
2350		char *tmp = strrchr(str, '/');
2351
2352		/*
2353		 * If it ends in "/old", check the second-to-last
2354		 * component of the string instead.
2355		 */
2356		if (tmp != str && strcmp(tmp, "/old") == 0) {
2357			for (tmp--; *tmp != '/'; tmp--)
2358				;
2359		}
2360		str = tmp + 1;
2361	}
2362	return (CTD_CHECK(str));
2363}
2364
2365/*
2366 * Find a vdev that matches the search criteria specified. We use the
2367 * the nvpair name to determine how we should look for the device.
2368 * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
2369 * spare; but FALSE if its an INUSE spare.
2370 */
2371static nvlist_t *
2372vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
2373    boolean_t *l2cache, boolean_t *log)
2374{
2375	uint_t c, children;
2376	nvlist_t **child;
2377	nvlist_t *ret;
2378	uint64_t is_log;
2379	char *srchkey;
2380	nvpair_t *pair = nvlist_next_nvpair(search, NULL);
2381
2382	/* Nothing to look for */
2383	if (search == NULL || pair == NULL)
2384		return (NULL);
2385
2386	/* Obtain the key we will use to search */
2387	srchkey = nvpair_name(pair);
2388
2389	switch (nvpair_type(pair)) {
2390	case DATA_TYPE_UINT64:
2391		if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
2392			uint64_t srchval, theguid;
2393
2394			verify(nvpair_value_uint64(pair, &srchval) == 0);
2395			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2396			    &theguid) == 0);
2397			if (theguid == srchval)
2398				return (nv);
2399		}
2400		break;
2401
2402	case DATA_TYPE_STRING: {
2403		char *srchval, *val;
2404
2405		verify(nvpair_value_string(pair, &srchval) == 0);
2406		if (nvlist_lookup_string(nv, srchkey, &val) != 0)
2407			break;
2408
2409		/*
2410		 * Search for the requested value. Special cases:
2411		 *
2412		 * - ZPOOL_CONFIG_PATH for whole disk entries. To support
2413		 *   UEFI boot, these end in "s0" or "s0/old" or "s1" or
2414		 *   "s1/old".   The "s0" or "s1" part is hidden from the user,
2415		 *   but included in the string, so this matches around it.
2416		 * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2417		 *
2418		 * Otherwise, all other searches are simple string compares.
2419		 */
2420		if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 &&
2421		    ctd_check_path(val)) {
2422			uint64_t wholedisk = 0;
2423
2424			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2425			    &wholedisk);
2426			if (wholedisk) {
2427				int slen = strlen(srchval);
2428				int vlen = strlen(val);
2429
2430				if (slen != vlen - 2)
2431					break;
2432
2433				/*
2434				 * make_leaf_vdev() should only set
2435				 * wholedisk for ZPOOL_CONFIG_PATHs which
2436				 * will include "/dev/dsk/", giving plenty of
2437				 * room for the indices used next.
2438				 */
2439				ASSERT(vlen >= 6);
2440
2441				/*
2442				 * strings identical except trailing "s0"
2443				 */
2444				if ((strcmp(&val[vlen - 2], "s0") == 0 ||
2445				    strcmp(&val[vlen - 2], "s1") == 0) &&
2446				    strncmp(srchval, val, slen) == 0)
2447					return (nv);
2448
2449				/*
2450				 * strings identical except trailing "s0/old"
2451				 */
2452				if ((strcmp(&val[vlen - 6], "s0/old") == 0 ||
2453				    strcmp(&val[vlen - 6], "s1/old") == 0) &&
2454				    strcmp(&srchval[slen - 4], "/old") == 0 &&
2455				    strncmp(srchval, val, slen - 4) == 0)
2456					return (nv);
2457
2458				break;
2459			}
2460		} else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2461			char *type, *idx, *end, *p;
2462			uint64_t id, vdev_id;
2463
2464			/*
2465			 * Determine our vdev type, keeping in mind
2466			 * that the srchval is composed of a type and
2467			 * vdev id pair (i.e. mirror-4).
2468			 */
2469			if ((type = strdup(srchval)) == NULL)
2470				return (NULL);
2471
2472			if ((p = strrchr(type, '-')) == NULL) {
2473				free(type);
2474				break;
2475			}
2476			idx = p + 1;
2477			*p = '\0';
2478
2479			/*
2480			 * If the types don't match then keep looking.
2481			 */
2482			if (strncmp(val, type, strlen(val)) != 0) {
2483				free(type);
2484				break;
2485			}
2486
2487			verify(zpool_vdev_is_interior(type));
2488			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2489			    &id) == 0);
2490
2491			errno = 0;
2492			vdev_id = strtoull(idx, &end, 10);
2493
2494			free(type);
2495			if (errno != 0)
2496				return (NULL);
2497
2498			/*
2499			 * Now verify that we have the correct vdev id.
2500			 */
2501			if (vdev_id == id)
2502				return (nv);
2503		}
2504
2505		/*
2506		 * Common case
2507		 */
2508		if (strcmp(srchval, val) == 0)
2509			return (nv);
2510		break;
2511	}
2512
2513	default:
2514		break;
2515	}
2516
2517	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2518	    &child, &children) != 0)
2519		return (NULL);
2520
2521	for (c = 0; c < children; c++) {
2522		if ((ret = vdev_to_nvlist_iter(child[c], search,
2523		    avail_spare, l2cache, NULL)) != NULL) {
2524			/*
2525			 * The 'is_log' value is only set for the toplevel
2526			 * vdev, not the leaf vdevs.  So we always lookup the
2527			 * log device from the root of the vdev tree (where
2528			 * 'log' is non-NULL).
2529			 */
2530			if (log != NULL &&
2531			    nvlist_lookup_uint64(child[c],
2532			    ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2533			    is_log) {
2534				*log = B_TRUE;
2535			}
2536			return (ret);
2537		}
2538	}
2539
2540	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2541	    &child, &children) == 0) {
2542		for (c = 0; c < children; c++) {
2543			if ((ret = vdev_to_nvlist_iter(child[c], search,
2544			    avail_spare, l2cache, NULL)) != NULL) {
2545				*avail_spare = B_TRUE;
2546				return (ret);
2547			}
2548		}
2549	}
2550
2551	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2552	    &child, &children) == 0) {
2553		for (c = 0; c < children; c++) {
2554			if ((ret = vdev_to_nvlist_iter(child[c], search,
2555			    avail_spare, l2cache, NULL)) != NULL) {
2556				*l2cache = B_TRUE;
2557				return (ret);
2558			}
2559		}
2560	}
2561
2562	return (NULL);
2563}
2564
2565/*
2566 * Given a physical path (minus the "/devices" prefix), find the
2567 * associated vdev.
2568 */
2569nvlist_t *
2570zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2571    boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2572{
2573	nvlist_t *search, *nvroot, *ret;
2574
2575	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2576	verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2577
2578	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2579	    &nvroot) == 0);
2580
2581	*avail_spare = B_FALSE;
2582	*l2cache = B_FALSE;
2583	if (log != NULL)
2584		*log = B_FALSE;
2585	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2586	nvlist_free(search);
2587
2588	return (ret);
2589}
2590
2591/*
2592 * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2593 */
2594static boolean_t
2595zpool_vdev_is_interior(const char *name)
2596{
2597	if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2598	    strncmp(name, VDEV_TYPE_SPARE, strlen(VDEV_TYPE_SPARE)) == 0 ||
2599	    strncmp(name,
2600	    VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 ||
2601	    strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2602		return (B_TRUE);
2603	return (B_FALSE);
2604}
2605
2606nvlist_t *
2607zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
2608    boolean_t *l2cache, boolean_t *log)
2609{
2610	char buf[MAXPATHLEN];
2611	char *end;
2612	nvlist_t *nvroot, *search, *ret;
2613	uint64_t guid;
2614
2615	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2616
2617	guid = strtoull(path, &end, 10);
2618	if (guid != 0 && *end == '\0') {
2619		verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2620	} else if (zpool_vdev_is_interior(path)) {
2621		verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
2622	} else if (path[0] != '/') {
2623		(void) snprintf(buf, sizeof (buf), "%s/%s", ZFS_DISK_ROOT,
2624		    path);
2625		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
2626	} else {
2627		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
2628	}
2629
2630	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2631	    &nvroot) == 0);
2632
2633	*avail_spare = B_FALSE;
2634	*l2cache = B_FALSE;
2635	if (log != NULL)
2636		*log = B_FALSE;
2637	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2638	nvlist_free(search);
2639
2640	return (ret);
2641}
2642
2643static int
2644vdev_is_online(nvlist_t *nv)
2645{
2646	uint64_t ival;
2647
2648	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2649	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2650	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2651		return (0);
2652
2653	return (1);
2654}
2655
2656/*
2657 * Helper function for zpool_get_physpaths().
2658 */
2659static int
2660vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2661    size_t *bytes_written)
2662{
2663	size_t bytes_left, pos, rsz;
2664	char *tmppath;
2665	const char *format;
2666
2667	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2668	    &tmppath) != 0)
2669		return (EZFS_NODEVICE);
2670
2671	pos = *bytes_written;
2672	bytes_left = physpath_size - pos;
2673	format = (pos == 0) ? "%s" : " %s";
2674
2675	rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2676	*bytes_written += rsz;
2677
2678	if (rsz >= bytes_left) {
2679		/* if physpath was not copied properly, clear it */
2680		if (bytes_left != 0) {
2681			physpath[pos] = 0;
2682		}
2683		return (EZFS_NOSPC);
2684	}
2685	return (0);
2686}
2687
2688static int
2689vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2690    size_t *rsz, boolean_t is_spare)
2691{
2692	char *type;
2693	int ret;
2694
2695	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2696		return (EZFS_INVALCONFIG);
2697
2698	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2699		/*
2700		 * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2701		 * For a spare vdev, we only want to boot from the active
2702		 * spare device.
2703		 */
2704		if (is_spare) {
2705			uint64_t spare = 0;
2706			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2707			    &spare);
2708			if (!spare)
2709				return (EZFS_INVALCONFIG);
2710		}
2711
2712		if (vdev_is_online(nv)) {
2713			if ((ret = vdev_get_one_physpath(nv, physpath,
2714			    phypath_size, rsz)) != 0)
2715				return (ret);
2716		}
2717	} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2718	    strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2719	    strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2720	    (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2721		nvlist_t **child;
2722		uint_t count;
2723		int i, ret;
2724
2725		if (nvlist_lookup_nvlist_array(nv,
2726		    ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2727			return (EZFS_INVALCONFIG);
2728
2729		for (i = 0; i < count; i++) {
2730			ret = vdev_get_physpaths(child[i], physpath,
2731			    phypath_size, rsz, is_spare);
2732			if (ret == EZFS_NOSPC)
2733				return (ret);
2734		}
2735	}
2736
2737	return (EZFS_POOL_INVALARG);
2738}
2739
2740/*
2741 * Get phys_path for a root pool config.
2742 * Return 0 on success; non-zero on failure.
2743 */
2744static int
2745zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2746{
2747	size_t rsz;
2748	nvlist_t *vdev_root;
2749	nvlist_t **child;
2750	uint_t count;
2751	char *type;
2752
2753	rsz = 0;
2754
2755	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2756	    &vdev_root) != 0)
2757		return (EZFS_INVALCONFIG);
2758
2759	if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2760	    nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2761	    &child, &count) != 0)
2762		return (EZFS_INVALCONFIG);
2763
2764	/*
2765	 * root pool can only have a single top-level vdev.
2766	 */
2767	if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1)
2768		return (EZFS_POOL_INVALARG);
2769
2770	(void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2771	    B_FALSE);
2772
2773	/* No online devices */
2774	if (rsz == 0)
2775		return (EZFS_NODEVICE);
2776
2777	return (0);
2778}
2779
2780/*
2781 * Get phys_path for a root pool
2782 * Return 0 on success; non-zero on failure.
2783 */
2784int
2785zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2786{
2787	return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2788	    phypath_size));
2789}
2790
2791/*
2792 * If the device has being dynamically expanded then we need to relabel
2793 * the disk to use the new unallocated space.
2794 */
2795static int
2796zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
2797{
2798	char path[MAXPATHLEN];
2799	char errbuf[1024];
2800	int fd, error;
2801	int (*_efi_use_whole_disk)(int);
2802
2803	if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
2804	    "efi_use_whole_disk")) == NULL)
2805		return (-1);
2806
2807	(void) snprintf(path, sizeof (path), "%s/%s", ZFS_RDISK_ROOT, name);
2808
2809	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2810		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2811		    "relabel '%s': unable to open device"), name);
2812		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2813	}
2814
2815	/*
2816	 * It's possible that we might encounter an error if the device
2817	 * does not have any unallocated space left. If so, we simply
2818	 * ignore that error and continue on.
2819	 */
2820	error = _efi_use_whole_disk(fd);
2821	(void) close(fd);
2822	if (error && error != VT_ENOSPC) {
2823		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2824		    "relabel '%s': unable to read disk capacity"), name);
2825		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2826	}
2827	return (0);
2828}
2829
2830/*
2831 * Bring the specified vdev online.   The 'flags' parameter is a set of the
2832 * ZFS_ONLINE_* flags.
2833 */
2834int
2835zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2836    vdev_state_t *newstate)
2837{
2838	zfs_cmd_t zc = { 0 };
2839	char msg[1024];
2840	char *pathname;
2841	nvlist_t *tgt;
2842	boolean_t avail_spare, l2cache, islog;
2843	libzfs_handle_t *hdl = zhp->zpool_hdl;
2844
2845	if (flags & ZFS_ONLINE_EXPAND) {
2846		(void) snprintf(msg, sizeof (msg),
2847		    dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2848	} else {
2849		(void) snprintf(msg, sizeof (msg),
2850		    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2851	}
2852
2853	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2854	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2855	    &islog)) == NULL)
2856		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2857
2858	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2859
2860	if (avail_spare)
2861		return (zfs_error(hdl, EZFS_ISSPARE, msg));
2862
2863	if ((flags & ZFS_ONLINE_EXPAND ||
2864	    zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) &&
2865	    nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &pathname) == 0) {
2866		uint64_t wholedisk = 0;
2867
2868		(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2869		    &wholedisk);
2870
2871		/*
2872		 * XXX - L2ARC 1.0 devices can't support expansion.
2873		 */
2874		if (l2cache) {
2875			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2876			    "cannot expand cache devices"));
2877			return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2878		}
2879
2880		if (wholedisk) {
2881			pathname += strlen(ZFS_DISK_ROOT) + 1;
2882			(void) zpool_relabel_disk(hdl, pathname);
2883		}
2884	}
2885
2886	zc.zc_cookie = VDEV_STATE_ONLINE;
2887	zc.zc_obj = flags;
2888
2889	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2890		if (errno == EINVAL) {
2891			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2892			    "from this pool into a new one.  Use '%s' "
2893			    "instead"), "zpool detach");
2894			return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2895		}
2896		return (zpool_standard_error(hdl, errno, msg));
2897	}
2898
2899	*newstate = zc.zc_cookie;
2900	return (0);
2901}
2902
2903/*
2904 * Take the specified vdev offline
2905 */
2906int
2907zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2908{
2909	zfs_cmd_t zc = { 0 };
2910	char msg[1024];
2911	nvlist_t *tgt;
2912	boolean_t avail_spare, l2cache;
2913	libzfs_handle_t *hdl = zhp->zpool_hdl;
2914
2915	(void) snprintf(msg, sizeof (msg),
2916	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2917
2918	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2919	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2920	    NULL)) == NULL)
2921		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2922
2923	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2924
2925	if (avail_spare)
2926		return (zfs_error(hdl, EZFS_ISSPARE, msg));
2927
2928	zc.zc_cookie = VDEV_STATE_OFFLINE;
2929	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2930
2931	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2932		return (0);
2933
2934	switch (errno) {
2935	case EBUSY:
2936
2937		/*
2938		 * There are no other replicas of this device.
2939		 */
2940		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2941
2942	case EEXIST:
2943		/*
2944		 * The log device has unplayed logs
2945		 */
2946		return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2947
2948	default:
2949		return (zpool_standard_error(hdl, errno, msg));
2950	}
2951}
2952
2953/*
2954 * Mark the given vdev faulted.
2955 */
2956int
2957zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2958{
2959	zfs_cmd_t zc = { 0 };
2960	char msg[1024];
2961	libzfs_handle_t *hdl = zhp->zpool_hdl;
2962
2963	(void) snprintf(msg, sizeof (msg),
2964	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
2965
2966	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2967	zc.zc_guid = guid;
2968	zc.zc_cookie = VDEV_STATE_FAULTED;
2969	zc.zc_obj = aux;
2970
2971	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2972		return (0);
2973
2974	switch (errno) {
2975	case EBUSY:
2976
2977		/*
2978		 * There are no other replicas of this device.
2979		 */
2980		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2981
2982	default:
2983		return (zpool_standard_error(hdl, errno, msg));
2984	}
2985
2986}
2987
2988/*
2989 * Mark the given vdev degraded.
2990 */
2991int
2992zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2993{
2994	zfs_cmd_t zc = { 0 };
2995	char msg[1024];
2996	libzfs_handle_t *hdl = zhp->zpool_hdl;
2997
2998	(void) snprintf(msg, sizeof (msg),
2999	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
3000
3001	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3002	zc.zc_guid = guid;
3003	zc.zc_cookie = VDEV_STATE_DEGRADED;
3004	zc.zc_obj = aux;
3005
3006	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
3007		return (0);
3008
3009	return (zpool_standard_error(hdl, errno, msg));
3010}
3011
3012/*
3013 * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
3014 * a hot spare.
3015 */
3016static boolean_t
3017is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
3018{
3019	nvlist_t **child;
3020	uint_t c, children;
3021	char *type;
3022
3023	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
3024	    &children) == 0) {
3025		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
3026		    &type) == 0);
3027
3028		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
3029		    children == 2 && child[which] == tgt)
3030			return (B_TRUE);
3031
3032		for (c = 0; c < children; c++)
3033			if (is_replacing_spare(child[c], tgt, which))
3034				return (B_TRUE);
3035	}
3036
3037	return (B_FALSE);
3038}
3039
3040/*
3041 * Attach new_disk (fully described by nvroot) to old_disk.
3042 * If 'replacing' is specified, the new disk will replace the old one.
3043 */
3044int
3045zpool_vdev_attach(zpool_handle_t *zhp,
3046    const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
3047{
3048	zfs_cmd_t zc = { 0 };
3049	char msg[1024];
3050	int ret;
3051	nvlist_t *tgt, *newvd;
3052	boolean_t avail_spare, l2cache, islog;
3053	uint64_t val;
3054	char *newname;
3055	nvlist_t **child;
3056	uint_t children;
3057	nvlist_t *config_root;
3058	libzfs_handle_t *hdl = zhp->zpool_hdl;
3059	boolean_t rootpool = zpool_is_bootable(zhp);
3060
3061	if (replacing)
3062		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
3063		    "cannot replace %s with %s"), old_disk, new_disk);
3064	else
3065		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
3066		    "cannot attach %s to %s"), new_disk, old_disk);
3067
3068	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3069	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
3070	    &islog)) == NULL)
3071		return (zfs_error(hdl, EZFS_NODEVICE, msg));
3072
3073	if (avail_spare)
3074		return (zfs_error(hdl, EZFS_ISSPARE, msg));
3075
3076	if (l2cache)
3077		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
3078
3079	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3080	zc.zc_cookie = replacing;
3081
3082	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
3083	    &child, &children) != 0 || children != 1) {
3084		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3085		    "new device must be a single disk"));
3086		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
3087	}
3088
3089	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
3090	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
3091
3092	if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL)
3093		return (-1);
3094
3095	newvd = zpool_find_vdev(zhp, newname, &avail_spare, &l2cache, NULL);
3096	/*
3097	 * If the target is a hot spare that has been swapped in, we can only
3098	 * replace it with another hot spare.
3099	 */
3100	if (replacing &&
3101	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
3102	    (newvd == NULL || !avail_spare) &&
3103	    is_replacing_spare(config_root, tgt, 1)) {
3104		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3105		    "can only be replaced by another hot spare"));
3106		free(newname);
3107		return (zfs_error(hdl, EZFS_BADTARGET, msg));
3108	}
3109
3110	free(newname);
3111
3112	if (replacing && avail_spare && !vdev_is_online(newvd)) {
3113		(void) zpool_standard_error(hdl, ENXIO, msg);
3114		return (-1);
3115	}
3116
3117	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
3118		return (-1);
3119
3120	ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
3121
3122	zcmd_free_nvlists(&zc);
3123
3124	if (ret == 0) {
3125		if (rootpool) {
3126			/*
3127			 * XXX need a better way to prevent user from
3128			 * booting up a half-baked vdev.
3129			 */
3130			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
3131			    "sure to wait until resilver is done "
3132			    "before rebooting.\n"));
3133		}
3134		return (0);
3135	}
3136
3137	switch (errno) {
3138	case ENOTSUP:
3139		/*
3140		 * Can't attach to or replace this type of vdev.
3141		 */
3142		if (replacing) {
3143			uint64_t version = zpool_get_prop_int(zhp,
3144			    ZPOOL_PROP_VERSION, NULL);
3145
3146			if (islog)
3147				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3148				    "cannot replace a log with a spare"));
3149			else if (version >= SPA_VERSION_MULTI_REPLACE)
3150				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3151				    "already in replacing/spare config; wait "
3152				    "for completion or use 'zpool detach'"));
3153			else
3154				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3155				    "cannot replace a replacing device"));
3156		} else {
3157			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3158			    "can only attach to mirrors and top-level "
3159			    "disks"));
3160		}
3161		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
3162		break;
3163
3164	case EINVAL:
3165		/*
3166		 * The new device must be a single disk.
3167		 */
3168		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3169		    "new device must be a single disk"));
3170		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
3171		break;
3172
3173	case EBUSY:
3174		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
3175		    "or device removal is in progress"),
3176		    new_disk);
3177		(void) zfs_error(hdl, EZFS_BADDEV, msg);
3178		break;
3179
3180	case EOVERFLOW:
3181		/*
3182		 * The new device is too small.
3183		 */
3184		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3185		    "device is too small"));
3186		(void) zfs_error(hdl, EZFS_BADDEV, msg);
3187		break;
3188
3189	case EDOM:
3190		/*
3191		 * The new device has a different optimal sector size.
3192		 */
3193		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3194		    "new device has a different optimal sector size; use the "
3195		    "option '-o ashift=N' to override the optimal size"));
3196		(void) zfs_error(hdl, EZFS_BADDEV, msg);
3197		break;
3198
3199	case ENAMETOOLONG:
3200		/*
3201		 * The resulting top-level vdev spec won't fit in the label.
3202		 */
3203		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
3204		break;
3205
3206	default:
3207		(void) zpool_standard_error(hdl, errno, msg);
3208	}
3209
3210	return (-1);
3211}
3212
3213/*
3214 * Detach the specified device.
3215 */
3216int
3217zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
3218{
3219	zfs_cmd_t zc = { 0 };
3220	char msg[1024];
3221	nvlist_t *tgt;
3222	boolean_t avail_spare, l2cache;
3223	libzfs_handle_t *hdl = zhp->zpool_hdl;
3224
3225	(void) snprintf(msg, sizeof (msg),
3226	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
3227
3228	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3229	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3230	    NULL)) == NULL)
3231		return (zfs_error(hdl, EZFS_NODEVICE, msg));
3232
3233	if (avail_spare)
3234		return (zfs_error(hdl, EZFS_ISSPARE, msg));
3235
3236	if (l2cache)
3237		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
3238
3239	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3240
3241	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
3242		return (0);
3243
3244	switch (errno) {
3245
3246	case ENOTSUP:
3247		/*
3248		 * Can't detach from this type of vdev.
3249		 */
3250		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
3251		    "applicable to mirror and replacing vdevs"));
3252		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
3253		break;
3254
3255	case EBUSY:
3256		/*
3257		 * There are no other replicas of this device.
3258		 */
3259		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
3260		break;
3261
3262	default:
3263		(void) zpool_standard_error(hdl, errno, msg);
3264	}
3265
3266	return (-1);
3267}
3268
3269/*
3270 * Find a mirror vdev in the source nvlist.
3271 *
3272 * The mchild array contains a list of disks in one of the top-level mirrors
3273 * of the source pool.  The schild array contains a list of disks that the
3274 * user specified on the command line.  We loop over the mchild array to
3275 * see if any entry in the schild array matches.
3276 *
3277 * If a disk in the mchild array is found in the schild array, we return
3278 * the index of that entry.  Otherwise we return -1.
3279 */
3280static int
3281find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
3282    nvlist_t **schild, uint_t schildren)
3283{
3284	uint_t mc;
3285
3286	for (mc = 0; mc < mchildren; mc++) {
3287		uint_t sc;
3288		char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
3289		    mchild[mc], 0);
3290
3291		for (sc = 0; sc < schildren; sc++) {
3292			char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
3293			    schild[sc], 0);
3294			boolean_t result = (strcmp(mpath, spath) == 0);
3295
3296			free(spath);
3297			if (result) {
3298				free(mpath);
3299				return (mc);
3300			}
3301		}
3302
3303		free(mpath);
3304	}
3305
3306	return (-1);
3307}
3308
3309/*
3310 * Split a mirror pool.  If newroot points to null, then a new nvlist
3311 * is generated and it is the responsibility of the caller to free it.
3312 */
3313int
3314zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
3315    nvlist_t *props, splitflags_t flags)
3316{
3317	zfs_cmd_t zc = { 0 };
3318	char msg[1024];
3319	nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
3320	nvlist_t **varray = NULL, *zc_props = NULL;
3321	uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
3322	libzfs_handle_t *hdl = zhp->zpool_hdl;
3323	uint64_t vers;
3324	boolean_t freelist = B_FALSE, memory_err = B_TRUE;
3325	int retval = 0;
3326
3327	(void) snprintf(msg, sizeof (msg),
3328	    dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
3329
3330	if (!zpool_name_valid(hdl, B_FALSE, newname))
3331		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
3332
3333	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
3334		(void) fprintf(stderr, gettext("Internal error: unable to "
3335		    "retrieve pool configuration\n"));
3336		return (-1);
3337	}
3338
3339	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
3340	    == 0);
3341	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
3342
3343	if (props) {
3344		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
3345		if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
3346		    props, vers, flags, msg)) == NULL)
3347			return (-1);
3348	}
3349
3350	if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
3351	    &children) != 0) {
3352		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3353		    "Source pool is missing vdev tree"));
3354		nvlist_free(zc_props);
3355		return (-1);
3356	}
3357
3358	varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
3359	vcount = 0;
3360
3361	if (*newroot == NULL ||
3362	    nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
3363	    &newchild, &newchildren) != 0)
3364		newchildren = 0;
3365
3366	for (c = 0; c < children; c++) {
3367		uint64_t is_log = B_FALSE, is_hole = B_FALSE;
3368		char *type;
3369		nvlist_t **mchild, *vdev;
3370		uint_t mchildren;
3371		int entry;
3372
3373		/*
3374		 * Unlike cache & spares, slogs are stored in the
3375		 * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
3376		 */
3377		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
3378		    &is_log);
3379		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
3380		    &is_hole);
3381		if (is_log || is_hole) {
3382			/*
3383			 * Create a hole vdev and put it in the config.
3384			 */
3385			if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
3386				goto out;
3387			if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
3388			    VDEV_TYPE_HOLE) != 0)
3389				goto out;
3390			if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
3391			    1) != 0)
3392				goto out;
3393			if (lastlog == 0)
3394				lastlog = vcount;
3395			varray[vcount++] = vdev;
3396			continue;
3397		}
3398		lastlog = 0;
3399		verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
3400		    == 0);
3401		if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
3402			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3403			    "Source pool must be composed only of mirrors\n"));
3404			retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3405			goto out;
3406		}
3407
3408		verify(nvlist_lookup_nvlist_array(child[c],
3409		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
3410
3411		/* find or add an entry for this top-level vdev */
3412		if (newchildren > 0 &&
3413		    (entry = find_vdev_entry(zhp, mchild, mchildren,
3414		    newchild, newchildren)) >= 0) {
3415			/* We found a disk that the user specified. */
3416			vdev = mchild[entry];
3417			++found;
3418		} else {
3419			/* User didn't specify a disk for this vdev. */
3420			vdev = mchild[mchildren - 1];
3421		}
3422
3423		if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
3424			goto out;
3425	}
3426
3427	/* did we find every disk the user specified? */
3428	if (found != newchildren) {
3429		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
3430		    "include at most one disk from each mirror"));
3431		retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3432		goto out;
3433	}
3434
3435	/* Prepare the nvlist for populating. */
3436	if (*newroot == NULL) {
3437		if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3438			goto out;
3439		freelist = B_TRUE;
3440		if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3441		    VDEV_TYPE_ROOT) != 0)
3442			goto out;
3443	} else {
3444		verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3445	}
3446
3447	/* Add all the children we found */
3448	if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3449	    lastlog == 0 ? vcount : lastlog) != 0)
3450		goto out;
3451
3452	/*
3453	 * If we're just doing a dry run, exit now with success.
3454	 */
3455	if (flags.dryrun) {
3456		memory_err = B_FALSE;
3457		freelist = B_FALSE;
3458		goto out;
3459	}
3460
3461	/* now build up the config list & call the ioctl */
3462	if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3463		goto out;
3464
3465	if (nvlist_add_nvlist(newconfig,
3466	    ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3467	    nvlist_add_string(newconfig,
3468	    ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3469	    nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3470		goto out;
3471
3472	/*
3473	 * The new pool is automatically part of the namespace unless we
3474	 * explicitly export it.
3475	 */
3476	if (!flags.import)
3477		zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3478	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3479	(void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3480	if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3481		goto out;
3482	if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3483		goto out;
3484
3485	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3486		retval = zpool_standard_error(hdl, errno, msg);
3487		goto out;
3488	}
3489
3490	freelist = B_FALSE;
3491	memory_err = B_FALSE;
3492
3493out:
3494	if (varray != NULL) {
3495		int v;
3496
3497		for (v = 0; v < vcount; v++)
3498			nvlist_free(varray[v]);
3499		free(varray);
3500	}
3501	zcmd_free_nvlists(&zc);
3502	nvlist_free(zc_props);
3503	nvlist_free(newconfig);
3504	if (freelist) {
3505		nvlist_free(*newroot);
3506		*newroot = NULL;
3507	}
3508
3509	if (retval != 0)
3510		return (retval);
3511
3512	if (memory_err)
3513		return (no_memory(hdl));
3514
3515	return (0);
3516}
3517
3518/*
3519 * Remove the given device.
3520 */
3521int
3522zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3523{
3524	zfs_cmd_t zc = { 0 };
3525	char msg[1024];
3526	nvlist_t *tgt;
3527	boolean_t avail_spare, l2cache, islog;
3528	libzfs_handle_t *hdl = zhp->zpool_hdl;
3529	uint64_t version;
3530
3531	(void) snprintf(msg, sizeof (msg),
3532	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3533
3534	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3535	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3536	    &islog)) == NULL)
3537		return (zfs_error(hdl, EZFS_NODEVICE, msg));
3538
3539	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3540	if (islog && version < SPA_VERSION_HOLES) {
3541		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3542		    "pool must be upgraded to support log removal"));
3543		return (zfs_error(hdl, EZFS_BADVERSION, msg));
3544	}
3545
3546	zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
3547
3548	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3549		return (0);
3550
3551	switch (errno) {
3552
3553	case EINVAL:
3554		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3555		    "invalid config; all top-level vdevs must "
3556		    "have the same sector size and not be raidz."));
3557		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
3558		break;
3559
3560	case EBUSY:
3561		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3562		    "Pool busy; removal may already be in progress"));
3563		(void) zfs_error(hdl, EZFS_BUSY, msg);
3564		break;
3565
3566	default:
3567		(void) zpool_standard_error(hdl, errno, msg);
3568	}
3569	return (-1);
3570}
3571
3572int
3573zpool_vdev_remove_cancel(zpool_handle_t *zhp)
3574{
3575	zfs_cmd_t zc = { 0 };
3576	char msg[1024];
3577	libzfs_handle_t *hdl = zhp->zpool_hdl;
3578
3579	(void) snprintf(msg, sizeof (msg),
3580	    dgettext(TEXT_DOMAIN, "cannot cancel removal"));
3581
3582	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3583	zc.zc_cookie = 1;
3584
3585	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3586		return (0);
3587
3588	return (zpool_standard_error(hdl, errno, msg));
3589}
3590
3591int
3592zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
3593    uint64_t *sizep)
3594{
3595	char msg[1024];
3596	nvlist_t *tgt;
3597	boolean_t avail_spare, l2cache, islog;
3598	libzfs_handle_t *hdl = zhp->zpool_hdl;
3599
3600	(void) snprintf(msg, sizeof (msg),
3601	    dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
3602	    path);
3603
3604	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3605	    &islog)) == NULL)
3606		return (zfs_error(hdl, EZFS_NODEVICE, msg));
3607
3608	if (avail_spare || l2cache || islog) {
3609		*sizep = 0;
3610		return (0);
3611	}
3612
3613	if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
3614		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3615		    "indirect size not available"));
3616		return (zfs_error(hdl, EINVAL, msg));
3617	}
3618	return (0);
3619}
3620
3621/*
3622 * Clear the errors for the pool, or the particular device if specified.
3623 */
3624int
3625zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
3626{
3627	zfs_cmd_t zc = { 0 };
3628	char msg[1024];
3629	nvlist_t *tgt;
3630	zpool_load_policy_t policy;
3631	boolean_t avail_spare, l2cache;
3632	libzfs_handle_t *hdl = zhp->zpool_hdl;
3633	nvlist_t *nvi = NULL;
3634	int error;
3635
3636	if (path)
3637		(void) snprintf(msg, sizeof (msg),
3638		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3639		    path);
3640	else
3641		(void) snprintf(msg, sizeof (msg),
3642		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3643		    zhp->zpool_name);
3644
3645	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3646	if (path) {
3647		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
3648		    &l2cache, NULL)) == NULL)
3649			return (zfs_error(hdl, EZFS_NODEVICE, msg));
3650
3651		/*
3652		 * Don't allow error clearing for hot spares.  Do allow
3653		 * error clearing for l2cache devices.
3654		 */
3655		if (avail_spare)
3656			return (zfs_error(hdl, EZFS_ISSPARE, msg));
3657
3658		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3659		    &zc.zc_guid) == 0);
3660	}
3661
3662	zpool_get_load_policy(rewindnvl, &policy);
3663	zc.zc_cookie = policy.zlp_rewind;
3664
3665	if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
3666		return (-1);
3667
3668	if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
3669		return (-1);
3670
3671	while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3672	    errno == ENOMEM) {
3673		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3674			zcmd_free_nvlists(&zc);
3675			return (-1);
3676		}
3677	}
3678
3679	if (!error || ((policy.zlp_rewind & ZPOOL_TRY_REWIND) &&
3680	    errno != EPERM && errno != EACCES)) {
3681		if (policy.zlp_rewind &
3682		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3683			(void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3684			zpool_rewind_exclaim(hdl, zc.zc_name,
3685			    ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0),
3686			    nvi);
3687			nvlist_free(nvi);
3688		}
3689		zcmd_free_nvlists(&zc);
3690		return (0);
3691	}
3692
3693	zcmd_free_nvlists(&zc);
3694	return (zpool_standard_error(hdl, errno, msg));
3695}
3696
3697/*
3698 * Similar to zpool_clear(), but takes a GUID (used by fmd).
3699 */
3700int
3701zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3702{
3703	zfs_cmd_t zc = { 0 };
3704	char msg[1024];
3705	libzfs_handle_t *hdl = zhp->zpool_hdl;
3706
3707	(void) snprintf(msg, sizeof (msg),
3708	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
3709	    guid);
3710
3711	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3712	zc.zc_guid = guid;
3713	zc.zc_cookie = ZPOOL_NO_REWIND;
3714
3715	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3716		return (0);
3717
3718	return (zpool_standard_error(hdl, errno, msg));
3719}
3720
3721/*
3722 * Change the GUID for a pool.
3723 */
3724int
3725zpool_reguid(zpool_handle_t *zhp)
3726{
3727	char msg[1024];
3728	libzfs_handle_t *hdl = zhp->zpool_hdl;
3729	zfs_cmd_t zc = { 0 };
3730
3731	(void) snprintf(msg, sizeof (msg),
3732	    dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3733
3734	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3735	if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3736		return (0);
3737
3738	return (zpool_standard_error(hdl, errno, msg));
3739}
3740
3741/*
3742 * Reopen the pool.
3743 */
3744int
3745zpool_reopen(zpool_handle_t *zhp)
3746{
3747	zfs_cmd_t zc = { 0 };
3748	char msg[1024];
3749	libzfs_handle_t *hdl = zhp->zpool_hdl;
3750
3751	(void) snprintf(msg, sizeof (msg),
3752	    dgettext(TEXT_DOMAIN, "cannot reopen '%s'"),
3753	    zhp->zpool_name);
3754
3755	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3756	if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0)
3757		return (0);
3758	return (zpool_standard_error(hdl, errno, msg));
3759}
3760
3761/* call into libzfs_core to execute the sync IOCTL per pool */
3762int
3763zpool_sync_one(zpool_handle_t *zhp, void *data)
3764{
3765	int ret;
3766	libzfs_handle_t *hdl = zpool_get_handle(zhp);
3767	const char *pool_name = zpool_get_name(zhp);
3768	boolean_t *force = data;
3769	nvlist_t *innvl = fnvlist_alloc();
3770
3771	fnvlist_add_boolean_value(innvl, "force", *force);
3772	if ((ret = lzc_sync(pool_name, innvl, NULL)) != 0) {
3773		nvlist_free(innvl);
3774		return (zpool_standard_error_fmt(hdl, ret,
3775		    dgettext(TEXT_DOMAIN, "sync '%s' failed"), pool_name));
3776	}
3777	nvlist_free(innvl);
3778
3779	return (0);
3780}
3781
3782/*
3783 * Convert from a devid string to a path.
3784 */
3785static char *
3786devid_to_path(char *devid_str)
3787{
3788	ddi_devid_t devid;
3789	char *minor;
3790	char *path;
3791	devid_nmlist_t *list = NULL;
3792	int ret;
3793
3794	if (devid_str_decode(devid_str, &devid, &minor) != 0)
3795		return (NULL);
3796
3797	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3798
3799	devid_str_free(minor);
3800	devid_free(devid);
3801
3802	if (ret != 0)
3803		return (NULL);
3804
3805	/*
3806	 * In a case the strdup() fails, we will just return NULL below.
3807	 */
3808	path = strdup(list[0].devname);
3809
3810	devid_free_nmlist(list);
3811
3812	return (path);
3813}
3814
3815/*
3816 * Convert from a path to a devid string.
3817 */
3818static char *
3819path_to_devid(const char *path)
3820{
3821	int fd;
3822	ddi_devid_t devid;
3823	char *minor, *ret;
3824
3825	if ((fd = open(path, O_RDONLY)) < 0)
3826		return (NULL);
3827
3828	minor = NULL;
3829	ret = NULL;
3830	if (devid_get(fd, &devid) == 0) {
3831		if (devid_get_minor_name(fd, &minor) == 0)
3832			ret = devid_str_encode(devid, minor);
3833		if (minor != NULL)
3834			devid_str_free(minor);
3835		devid_free(devid);
3836	}
3837	(void) close(fd);
3838
3839	return (ret);
3840}
3841
3842struct path_from_physpath_walker_args {
3843	char *pfpwa_path;
3844};
3845
3846/*
3847 * Walker for use with di_devlink_walk().  Stores the "/dev" path of the first
3848 * primary devlink (i.e., the first devlink which refers to our "/devices"
3849 * node) and stops walking.
3850 */
3851static int
3852path_from_physpath_walker(di_devlink_t devlink, void *arg)
3853{
3854	struct path_from_physpath_walker_args *pfpwa = arg;
3855
3856	if (di_devlink_type(devlink) != DI_PRIMARY_LINK) {
3857		return (DI_WALK_CONTINUE);
3858	}
3859
3860	verify(pfpwa->pfpwa_path == NULL);
3861	if ((pfpwa->pfpwa_path = strdup(di_devlink_path(devlink))) != NULL) {
3862		return (DI_WALK_TERMINATE);
3863	}
3864
3865	return (DI_WALK_CONTINUE);
3866}
3867
3868/*
3869 * Search for a "/dev" path that refers to our physical path.  Returns the new
3870 * path if one is found and it does not match the existing "path" value.  If
3871 * the value is unchanged, or one could not be found, returns NULL.
3872 */
3873static char *
3874path_from_physpath(libzfs_handle_t *hdl, const char *path,
3875    const char *physpath)
3876{
3877	struct path_from_physpath_walker_args pfpwa;
3878
3879	if (physpath == NULL) {
3880		return (NULL);
3881	}
3882
3883	if (hdl->libzfs_devlink == NULL) {
3884		if ((hdl->libzfs_devlink = di_devlink_init(NULL, 0)) ==
3885		    DI_LINK_NIL) {
3886			/*
3887			 * We may not be able to open a handle if this process
3888			 * is insufficiently privileged, or we are too early in
3889			 * boot for devfsadm to be ready.  Ignore this error
3890			 * and defer the path check to a subsequent run.
3891			 */
3892			return (NULL);
3893		}
3894	}
3895
3896	pfpwa.pfpwa_path = NULL;
3897	(void) di_devlink_walk(hdl->libzfs_devlink, NULL, physpath,
3898	    DI_PRIMARY_LINK, &pfpwa, path_from_physpath_walker);
3899
3900	if (path != NULL && pfpwa.pfpwa_path != NULL &&
3901	    strcmp(path, pfpwa.pfpwa_path) == 0) {
3902		/*
3903		 * If the path is already correct, no change is required.
3904		 */
3905		free(pfpwa.pfpwa_path);
3906		return (NULL);
3907	}
3908
3909	return (pfpwa.pfpwa_path);
3910}
3911
3912/*
3913 * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3914 * ignore any failure here, since a common case is for an unprivileged user to
3915 * type 'zpool status', and we'll display the correct information anyway.
3916 */
3917static void
3918set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3919{
3920	zfs_cmd_t zc = { 0 };
3921
3922	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3923	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3924	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3925	    &zc.zc_guid) == 0);
3926
3927	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3928}
3929
3930/*
3931 * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3932 * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3933 * We also check if this is a whole disk, in which case we strip off the
3934 * trailing 's0' slice name.
3935 *
3936 * This routine is also responsible for identifying when disks have been
3937 * reconfigured in a new location.  The kernel will have opened the device by
3938 * devid, but the path will still refer to the old location.  To catch this, we
3939 * first do a path -> devid translation (which is fast for the common case).  If
3940 * the devid matches, we're done.  If not, we do a reverse devid -> path
3941 * translation and issue the appropriate ioctl() to update the path of the vdev.
3942 * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3943 * of these checks.
3944 */
3945char *
3946zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3947    int name_flags)
3948{
3949	char *path, *type, *env;
3950	uint64_t value;
3951	char buf[64];
3952
3953	/*
3954	 * vdev_name will be "root"/"root-0" for the root vdev, but it is the
3955	 * zpool name that will be displayed to the user.
3956	 */
3957	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3958	if (zhp != NULL && strcmp(type, "root") == 0)
3959		return (zfs_strdup(hdl, zpool_get_name(zhp)));
3960
3961	env = getenv("ZPOOL_VDEV_NAME_PATH");
3962	if (env && (strtoul(env, NULL, 0) > 0 ||
3963	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3964		name_flags |= VDEV_NAME_PATH;
3965
3966	env = getenv("ZPOOL_VDEV_NAME_GUID");
3967	if (env && (strtoul(env, NULL, 0) > 0 ||
3968	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3969		name_flags |= VDEV_NAME_GUID;
3970
3971	env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS");
3972	if (env && (strtoul(env, NULL, 0) > 0 ||
3973	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3974		name_flags |= VDEV_NAME_FOLLOW_LINKS;
3975
3976	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
3977	    name_flags & VDEV_NAME_GUID) {
3978		nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
3979		(void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
3980		path = buf;
3981	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3982		vdev_stat_t *vs;
3983		uint_t vsc;
3984		char *newpath = NULL;
3985		char *physpath = NULL;
3986		char *devid = NULL;
3987
3988		/*
3989		 * If the device is dead (faulted, offline, etc) then don't
3990		 * bother opening it.  Otherwise we may be forcing the user to
3991		 * open a misbehaving device, which can have undesirable
3992		 * effects.
3993		 */
3994		if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3995		    (uint64_t **)&vs, &vsc) != 0 ||
3996		    vs->vs_state < VDEV_STATE_DEGRADED ||
3997		    zhp == NULL) {
3998			goto after_open;
3999		}
4000
4001		if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
4002			/*
4003			 * This vdev has a devid.  We can use it to check the
4004			 * current path.
4005			 */
4006			char *newdevid = path_to_devid(path);
4007
4008			if (newdevid == NULL || strcmp(devid, newdevid) != 0) {
4009				newpath = devid_to_path(devid);
4010			}
4011
4012			if (newdevid != NULL)
4013				devid_str_free(newdevid);
4014
4015		} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH,
4016		    &physpath) == 0) {
4017			/*
4018			 * This vdev does not have a devid, but it does have a
4019			 * physical path.  Attempt to translate this to a /dev
4020			 * path.
4021			 */
4022			newpath = path_from_physpath(hdl, path, physpath);
4023		}
4024
4025		if (newpath != NULL) {
4026			/*
4027			 * Update the path appropriately.
4028			 */
4029			set_path(zhp, nv, newpath);
4030			if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH,
4031			    newpath) == 0) {
4032				verify(nvlist_lookup_string(nv,
4033				    ZPOOL_CONFIG_PATH, &path) == 0);
4034			}
4035			free(newpath);
4036		}
4037
4038		if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
4039			char *rp = realpath(path, NULL);
4040			if (rp) {
4041				strlcpy(buf, rp, sizeof (buf));
4042				path = buf;
4043				free(rp);
4044			}
4045		}
4046
4047after_open:
4048		if (strncmp(path, ZFS_DISK_ROOTD, strlen(ZFS_DISK_ROOTD)) == 0)
4049			path += strlen(ZFS_DISK_ROOTD);
4050
4051		/*
4052		 * Remove the partition from the path it this is a whole disk.
4053		 */
4054		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
4055		    == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
4056			int pathlen = strlen(path);
4057			char *tmp = zfs_strdup(hdl, path);
4058
4059			/*
4060			 * If it starts with c#, and ends with "s0" or "s1",
4061			 * chop the slice off, or if it ends with "s0/old" or
4062			 * "s1/old", remove the slice from the middle.
4063			 */
4064			if (CTD_CHECK(tmp)) {
4065				if (strcmp(&tmp[pathlen - 2], "s0") == 0 ||
4066				    strcmp(&tmp[pathlen - 2], "s1") == 0) {
4067					tmp[pathlen - 2] = '\0';
4068				} else if (pathlen > 6 &&
4069				    (strcmp(&tmp[pathlen - 6], "s0/old") == 0 ||
4070				    strcmp(&tmp[pathlen - 6], "s1/old") == 0)) {
4071					(void) strcpy(&tmp[pathlen - 6],
4072					    "/old");
4073				}
4074			}
4075			return (tmp);
4076		}
4077	} else {
4078		path = type;
4079
4080		/*
4081		 * If it's a raidz device, we need to stick in the parity level.
4082		 */
4083		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
4084			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
4085			    &value) == 0);
4086			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
4087			    (u_longlong_t)value);
4088			path = buf;
4089		}
4090
4091		/*
4092		 * We identify each top-level vdev by using a <type-id>
4093		 * naming convention.
4094		 */
4095		if (name_flags & VDEV_NAME_TYPE_ID) {
4096			uint64_t id;
4097
4098			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
4099			    &id) == 0);
4100			(void) snprintf(buf, sizeof (buf), "%s-%llu", path,
4101			    (u_longlong_t)id);
4102			path = buf;
4103		}
4104	}
4105
4106	return (zfs_strdup(hdl, path));
4107}
4108
4109static int
4110zbookmark_mem_compare(const void *a, const void *b)
4111{
4112	return (memcmp(a, b, sizeof (zbookmark_phys_t)));
4113}
4114
4115/*
4116 * Retrieve the persistent error log, uniquify the members, and return to the
4117 * caller.
4118 */
4119int
4120zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
4121{
4122	zfs_cmd_t zc = { 0 };
4123	uint64_t count;
4124	zbookmark_phys_t *zb = NULL;
4125	int i;
4126
4127	/*
4128	 * Retrieve the raw error list from the kernel.  If the number of errors
4129	 * has increased, allocate more space and continue until we get the
4130	 * entire list.
4131	 */
4132	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
4133	    &count) == 0);
4134	if (count == 0)
4135		return (0);
4136	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
4137	    count * sizeof (zbookmark_phys_t))) == (uintptr_t)NULL)
4138		return (-1);
4139	zc.zc_nvlist_dst_size = count;
4140	(void) strcpy(zc.zc_name, zhp->zpool_name);
4141	for (;;) {
4142		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
4143		    &zc) != 0) {
4144			free((void *)(uintptr_t)zc.zc_nvlist_dst);
4145			if (errno == ENOMEM) {
4146				void *dst;
4147
4148				count = zc.zc_nvlist_dst_size;
4149				dst = zfs_alloc(zhp->zpool_hdl, count *
4150				    sizeof (zbookmark_phys_t));
4151				if (dst == NULL)
4152					return (-1);
4153				zc.zc_nvlist_dst = (uintptr_t)dst;
4154			} else {
4155				return (-1);
4156			}
4157		} else {
4158			break;
4159		}
4160	}
4161
4162	/*
4163	 * Sort the resulting bookmarks.  This is a little confusing due to the
4164	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
4165	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
4166	 * _not_ copied as part of the process.  So we point the start of our
4167	 * array appropriate and decrement the total number of elements.
4168	 */
4169	zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
4170	    zc.zc_nvlist_dst_size;
4171	count -= zc.zc_nvlist_dst_size;
4172
4173	qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
4174
4175	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
4176
4177	/*
4178	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
4179	 */
4180	for (i = 0; i < count; i++) {
4181		nvlist_t *nv;
4182
4183		/* ignoring zb_blkid and zb_level for now */
4184		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
4185		    zb[i-1].zb_object == zb[i].zb_object)
4186			continue;
4187
4188		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
4189			goto nomem;
4190		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
4191		    zb[i].zb_objset) != 0) {
4192			nvlist_free(nv);
4193			goto nomem;
4194		}
4195		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
4196		    zb[i].zb_object) != 0) {
4197			nvlist_free(nv);
4198			goto nomem;
4199		}
4200		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
4201			nvlist_free(nv);
4202			goto nomem;
4203		}
4204		nvlist_free(nv);
4205	}
4206
4207	free((void *)(uintptr_t)zc.zc_nvlist_dst);
4208	return (0);
4209
4210nomem:
4211	free((void *)(uintptr_t)zc.zc_nvlist_dst);
4212	return (no_memory(zhp->zpool_hdl));
4213}
4214
4215/*
4216 * Upgrade a ZFS pool to the latest on-disk version.
4217 */
4218int
4219zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
4220{
4221	zfs_cmd_t zc = { 0 };
4222	libzfs_handle_t *hdl = zhp->zpool_hdl;
4223
4224	(void) strcpy(zc.zc_name, zhp->zpool_name);
4225	zc.zc_cookie = new_version;
4226
4227	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
4228		return (zpool_standard_error_fmt(hdl, errno,
4229		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
4230		    zhp->zpool_name));
4231	return (0);
4232}
4233
4234void
4235zfs_save_arguments(int argc, char **argv, char *string, int len)
4236{
4237	(void) strlcpy(string, basename(argv[0]), len);
4238	for (int i = 1; i < argc; i++) {
4239		(void) strlcat(string, " ", len);
4240		(void) strlcat(string, argv[i], len);
4241	}
4242}
4243
4244int
4245zpool_log_history(libzfs_handle_t *hdl, const char *message)
4246{
4247	zfs_cmd_t zc = { 0 };
4248	nvlist_t *args;
4249	int err;
4250
4251	args = fnvlist_alloc();
4252	fnvlist_add_string(args, "message", message);
4253	err = zcmd_write_src_nvlist(hdl, &zc, args);
4254	if (err == 0)
4255		err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
4256	nvlist_free(args);
4257	zcmd_free_nvlists(&zc);
4258	return (err);
4259}
4260
4261/*
4262 * Perform ioctl to get some command history of a pool.
4263 *
4264 * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
4265 * logical offset of the history buffer to start reading from.
4266 *
4267 * Upon return, 'off' is the next logical offset to read from and
4268 * 'len' is the actual amount of bytes read into 'buf'.
4269 */
4270static int
4271get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
4272{
4273	zfs_cmd_t zc = { 0 };
4274	libzfs_handle_t *hdl = zhp->zpool_hdl;
4275
4276	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4277
4278	zc.zc_history = (uint64_t)(uintptr_t)buf;
4279	zc.zc_history_len = *len;
4280	zc.zc_history_offset = *off;
4281
4282	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
4283		switch (errno) {
4284		case EPERM:
4285			return (zfs_error_fmt(hdl, EZFS_PERM,
4286			    dgettext(TEXT_DOMAIN,
4287			    "cannot show history for pool '%s'"),
4288			    zhp->zpool_name));
4289		case ENOENT:
4290			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
4291			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
4292			    "'%s'"), zhp->zpool_name));
4293		case ENOTSUP:
4294			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
4295			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
4296			    "'%s', pool must be upgraded"), zhp->zpool_name));
4297		default:
4298			return (zpool_standard_error_fmt(hdl, errno,
4299			    dgettext(TEXT_DOMAIN,
4300			    "cannot get history for '%s'"), zhp->zpool_name));
4301		}
4302	}
4303
4304	*len = zc.zc_history_len;
4305	*off = zc.zc_history_offset;
4306
4307	return (0);
4308}
4309
4310/*
4311 * Retrieve the command history of a pool.
4312 */
4313int
4314zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp, uint64_t *off,
4315    boolean_t *eof)
4316{
4317	char *buf;
4318	int buflen = 128 * 1024;
4319	nvlist_t **records = NULL;
4320	uint_t numrecords = 0;
4321	int err = 0, i;
4322	uint64_t start = *off;
4323
4324	buf = malloc(buflen);
4325	if (buf == NULL)
4326		return (ENOMEM);
4327	/* process about 1MB a time */
4328	while (*off - start < 1024 * 1024) {
4329		uint64_t bytes_read = buflen;
4330		uint64_t leftover;
4331
4332		if ((err = get_history(zhp, buf, off, &bytes_read)) != 0)
4333			break;
4334
4335		/* if nothing else was read in, we're at EOF, just return */
4336		if (!bytes_read) {
4337			*eof = B_TRUE;
4338			break;
4339		}
4340
4341		if ((err = zpool_history_unpack(buf, bytes_read,
4342		    &leftover, &records, &numrecords)) != 0)
4343			break;
4344		*off -= leftover;
4345		if (leftover == bytes_read) {
4346			/*
4347			 * no progress made, because buffer is not big enough
4348			 * to hold this record; resize and retry.
4349			 */
4350			buflen *= 2;
4351			free(buf);
4352			buf = malloc(buflen);
4353			if (buf == NULL)
4354				return (ENOMEM);
4355		}
4356	}
4357
4358	free(buf);
4359
4360	if (!err) {
4361		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
4362		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
4363		    records, numrecords) == 0);
4364	}
4365	for (i = 0; i < numrecords; i++)
4366		nvlist_free(records[i]);
4367	free(records);
4368
4369	return (err);
4370}
4371
4372void
4373zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
4374    char *pathname, size_t len)
4375{
4376	zfs_cmd_t zc = { 0 };
4377	boolean_t mounted = B_FALSE;
4378	char *mntpnt = NULL;
4379	char dsname[ZFS_MAX_DATASET_NAME_LEN];
4380
4381	if (dsobj == 0) {
4382		/* special case for the MOS */
4383		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
4384		return;
4385	}
4386
4387	/* get the dataset's name */
4388	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4389	zc.zc_obj = dsobj;
4390	if (ioctl(zhp->zpool_hdl->libzfs_fd,
4391	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
4392		/* just write out a path of two object numbers */
4393		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
4394		    dsobj, obj);
4395		return;
4396	}
4397	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
4398
4399	/* find out if the dataset is mounted */
4400	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
4401
4402	/* get the corrupted object's path */
4403	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
4404	zc.zc_obj = obj;
4405	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
4406	    &zc) == 0) {
4407		if (mounted) {
4408			(void) snprintf(pathname, len, "%s%s", mntpnt,
4409			    zc.zc_value);
4410		} else {
4411			(void) snprintf(pathname, len, "%s:%s",
4412			    dsname, zc.zc_value);
4413		}
4414	} else {
4415		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
4416	}
4417	free(mntpnt);
4418}
4419
4420/*
4421 * Read the EFI label from the config, if a label does not exist then
4422 * pass back the error to the caller. If the caller has passed a non-NULL
4423 * diskaddr argument then we set it to the starting address of the EFI
4424 * partition. If the caller has passed a non-NULL boolean argument, then
4425 * we set it to indicate if the disk does have efi system partition.
4426 */
4427static int
4428read_efi_label(nvlist_t *config, diskaddr_t *sb, boolean_t *system)
4429{
4430	char *path;
4431	int fd;
4432	char diskname[MAXPATHLEN];
4433	boolean_t boot = B_FALSE;
4434	int err = -1;
4435	int slice;
4436
4437	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
4438		return (err);
4439
4440	(void) snprintf(diskname, sizeof (diskname), "%s%s", ZFS_RDISK_ROOT,
4441	    strrchr(path, '/'));
4442	if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
4443		struct dk_gpt *vtoc;
4444
4445		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
4446			for (slice = 0; slice < vtoc->efi_nparts; slice++) {
4447				if (vtoc->efi_parts[slice].p_tag == V_SYSTEM)
4448					boot = B_TRUE;
4449				if (vtoc->efi_parts[slice].p_tag == V_USR)
4450					break;
4451			}
4452			if (sb != NULL && vtoc->efi_parts[slice].p_tag == V_USR)
4453				*sb = vtoc->efi_parts[slice].p_start;
4454			if (system != NULL)
4455				*system = boot;
4456			efi_free(vtoc);
4457		}
4458		(void) close(fd);
4459	}
4460	return (err);
4461}
4462
4463/*
4464 * determine where a partition starts on a disk in the current
4465 * configuration
4466 */
4467static diskaddr_t
4468find_start_block(nvlist_t *config)
4469{
4470	nvlist_t **child;
4471	uint_t c, children;
4472	diskaddr_t sb = MAXOFFSET_T;
4473	uint64_t wholedisk;
4474
4475	if (nvlist_lookup_nvlist_array(config,
4476	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
4477		if (nvlist_lookup_uint64(config,
4478		    ZPOOL_CONFIG_WHOLE_DISK,
4479		    &wholedisk) != 0 || !wholedisk) {
4480			return (MAXOFFSET_T);
4481		}
4482		if (read_efi_label(config, &sb, NULL) < 0)
4483			sb = MAXOFFSET_T;
4484		return (sb);
4485	}
4486
4487	for (c = 0; c < children; c++) {
4488		sb = find_start_block(child[c]);
4489		if (sb != MAXOFFSET_T) {
4490			return (sb);
4491		}
4492	}
4493	return (MAXOFFSET_T);
4494}
4495
4496/*
4497 * Label an individual disk.  The name provided is the short name,
4498 * stripped of any leading /dev path.
4499 */
4500int
4501zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name,
4502    zpool_boot_label_t boot_type, uint64_t boot_size, int *slice)
4503{
4504	char path[MAXPATHLEN];
4505	struct dk_gpt *vtoc;
4506	int fd;
4507	size_t resv;
4508	uint64_t slice_size;
4509	diskaddr_t start_block;
4510	char errbuf[1024];
4511
4512	/* prepare an error message just in case */
4513	(void) snprintf(errbuf, sizeof (errbuf),
4514	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4515
4516	if (zhp) {
4517		nvlist_t *nvroot;
4518
4519		verify(nvlist_lookup_nvlist(zhp->zpool_config,
4520		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4521
4522		if (zhp->zpool_start_block == 0)
4523			start_block = find_start_block(nvroot);
4524		else
4525			start_block = zhp->zpool_start_block;
4526		zhp->zpool_start_block = start_block;
4527	} else {
4528		/* new pool */
4529		start_block = NEW_START_BLOCK;
4530	}
4531
4532	(void) snprintf(path, sizeof (path), "%s/%s%s", ZFS_RDISK_ROOT, name,
4533	    BACKUP_SLICE);
4534
4535	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
4536		/*
4537		 * This shouldn't happen.  We've long since verified that this
4538		 * is a valid device.
4539		 */
4540		zfs_error_aux(hdl,
4541		    dgettext(TEXT_DOMAIN, "unable to open device"));
4542		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4543	}
4544
4545	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4546		/*
4547		 * The only way this can fail is if we run out of memory, or we
4548		 * were unable to read the disk's capacity
4549		 */
4550		if (errno == ENOMEM)
4551			(void) no_memory(hdl);
4552
4553		(void) close(fd);
4554		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4555		    "unable to read disk capacity"), name);
4556
4557		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4558	}
4559	resv = efi_reserved_sectors(vtoc);
4560
4561	/*
4562	 * Why we use V_USR: V_BACKUP confuses users, and is considered
4563	 * disposable by some EFI utilities (since EFI doesn't have a backup
4564	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
4565	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
4566	 * etc. were all pretty specific.  V_USR is as close to reality as we
4567	 * can get, in the absence of V_OTHER.
4568	 */
4569	/* first fix the partition start block */
4570	if (start_block == MAXOFFSET_T)
4571		start_block = NEW_START_BLOCK;
4572
4573	/*
4574	 * EFI System partition is using slice 0.
4575	 * ZFS is on slice 1 and slice 8 is reserved.
4576	 * We assume the GPT partition table without system
4577	 * partition has zfs p_start == NEW_START_BLOCK.
4578	 * If start_block != NEW_START_BLOCK, it means we have
4579	 * system partition. Correct solution would be to query/cache vtoc
4580	 * from existing vdev member.
4581	 */
4582	if (boot_type == ZPOOL_CREATE_BOOT_LABEL) {
4583		if (boot_size % vtoc->efi_lbasize != 0) {
4584			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4585			    "boot partition size must be a multiple of %d"),
4586			    vtoc->efi_lbasize);
4587			(void) close(fd);
4588			efi_free(vtoc);
4589			return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4590		}
4591		/*
4592		 * System partition size checks.
4593		 * Note the 1MB is quite arbitrary value, since we
4594		 * are creating dedicated pool, it should be enough
4595		 * to hold fat + efi bootloader. May need to be
4596		 * adjusted if the bootloader size will grow.
4597		 */
4598		if (boot_size < 1024 * 1024) {
4599			char buf[64];
4600			zfs_nicenum(boot_size, buf, sizeof (buf));
4601			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4602			    "Specified size %s for EFI System partition is too "
4603			    "small, the minimum size is 1MB."), buf);
4604			(void) close(fd);
4605			efi_free(vtoc);
4606			return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4607		}
4608		/* 33MB is tested with mkfs -F pcfs */
4609		if (hdl->libzfs_printerr &&
4610		    ((vtoc->efi_lbasize == 512 &&
4611		    boot_size < 33 * 1024 * 1024) ||
4612		    (vtoc->efi_lbasize == 4096 &&
4613		    boot_size < 256 * 1024 * 1024)))  {
4614			char buf[64];
4615			zfs_nicenum(boot_size, buf, sizeof (buf));
4616			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4617			    "Warning: EFI System partition size %s is "
4618			    "not allowing to create FAT32 file\nsystem, which "
4619			    "may result in unbootable system.\n"), buf);
4620		}
4621		/* Adjust zfs partition start by size of system partition. */
4622		start_block += boot_size / vtoc->efi_lbasize;
4623	}
4624
4625	if (start_block == NEW_START_BLOCK) {
4626		/*
4627		 * Use default layout.
4628		 * ZFS is on slice 0 and slice 8 is reserved.
4629		 */
4630		slice_size = vtoc->efi_last_u_lba + 1;
4631		slice_size -= resv;
4632		slice_size -= start_block;
4633		if (slice != NULL)
4634			*slice = 0;
4635
4636		vtoc->efi_parts[0].p_start = start_block;
4637		vtoc->efi_parts[0].p_size = slice_size;
4638
4639		vtoc->efi_parts[0].p_tag = V_USR;
4640		(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
4641
4642		vtoc->efi_parts[8].p_start = slice_size + start_block;
4643		vtoc->efi_parts[8].p_size = resv;
4644		vtoc->efi_parts[8].p_tag = V_RESERVED;
4645	} else {
4646		slice_size = start_block - NEW_START_BLOCK;
4647		vtoc->efi_parts[0].p_start = NEW_START_BLOCK;
4648		vtoc->efi_parts[0].p_size = slice_size;
4649		vtoc->efi_parts[0].p_tag = V_SYSTEM;
4650		(void) strcpy(vtoc->efi_parts[0].p_name, "loader");
4651		if (slice != NULL)
4652			*slice = 1;
4653		/* prepare slice 1 */
4654		slice_size = vtoc->efi_last_u_lba + 1 - slice_size;
4655		slice_size -= resv;
4656		slice_size -= NEW_START_BLOCK;
4657		vtoc->efi_parts[1].p_start = start_block;
4658		vtoc->efi_parts[1].p_size = slice_size;
4659		vtoc->efi_parts[1].p_tag = V_USR;
4660		(void) strcpy(vtoc->efi_parts[1].p_name, "zfs");
4661
4662		vtoc->efi_parts[8].p_start = slice_size + start_block;
4663		vtoc->efi_parts[8].p_size = resv;
4664		vtoc->efi_parts[8].p_tag = V_RESERVED;
4665	}
4666
4667	if (efi_write(fd, vtoc) != 0) {
4668		/*
4669		 * Some block drivers (like pcata) may not support EFI
4670		 * GPT labels.  Print out a helpful error message dir-
4671		 * ecting the user to manually label the disk and give
4672		 * a specific slice.
4673		 */
4674		(void) close(fd);
4675		efi_free(vtoc);
4676
4677		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4678		    "try using fdisk(1M) and then provide a specific slice"));
4679		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4680	}
4681
4682	(void) close(fd);
4683	efi_free(vtoc);
4684	return (0);
4685}
4686
4687static boolean_t
4688supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
4689{
4690	char *type;
4691	nvlist_t **child;
4692	uint_t children, c;
4693
4694	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
4695	if (strcmp(type, VDEV_TYPE_FILE) == 0 ||
4696	    strcmp(type, VDEV_TYPE_HOLE) == 0 ||
4697	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
4698		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4699		    "vdev type '%s' is not supported"), type);
4700		(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
4701		return (B_FALSE);
4702	}
4703	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
4704	    &child, &children) == 0) {
4705		for (c = 0; c < children; c++) {
4706			if (!supported_dump_vdev_type(hdl, child[c], errbuf))
4707				return (B_FALSE);
4708		}
4709	}
4710	return (B_TRUE);
4711}
4712
4713/*
4714 * Check if this zvol is allowable for use as a dump device; zero if
4715 * it is, > 0 if it isn't, < 0 if it isn't a zvol.
4716 *
4717 * Allowable storage configurations include mirrors, all raidz variants, and
4718 * pools with log, cache, and spare devices.  Pools which are backed by files or
4719 * have missing/hole vdevs are not suitable.
4720 */
4721int
4722zvol_check_dump_config(char *arg)
4723{
4724	zpool_handle_t *zhp = NULL;
4725	nvlist_t *config, *nvroot;
4726	char *p, *volname;
4727	nvlist_t **top;
4728	uint_t toplevels;
4729	libzfs_handle_t *hdl;
4730	char errbuf[1024];
4731	char poolname[ZFS_MAX_DATASET_NAME_LEN];
4732	int pathlen = strlen(ZVOL_FULL_DEV_DIR);
4733	int ret = 1;
4734
4735	if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
4736		return (-1);
4737	}
4738
4739	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4740	    "dump is not supported on device '%s'"), arg);
4741
4742	if ((hdl = libzfs_init()) == NULL)
4743		return (1);
4744	libzfs_print_on_error(hdl, B_TRUE);
4745
4746	volname = arg + pathlen;
4747
4748	/* check the configuration of the pool */
4749	if ((p = strchr(volname, '/')) == NULL) {
4750		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4751		    "malformed dataset name"));
4752		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4753		return (1);
4754	} else if (p - volname >= ZFS_MAX_DATASET_NAME_LEN) {
4755		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4756		    "dataset name is too long"));
4757		(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
4758		return (1);
4759	} else {
4760		(void) strncpy(poolname, volname, p - volname);
4761		poolname[p - volname] = '\0';
4762	}
4763
4764	if ((zhp = zpool_open(hdl, poolname)) == NULL) {
4765		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4766		    "could not open pool '%s'"), poolname);
4767		(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
4768		goto out;
4769	}
4770	config = zpool_get_config(zhp, NULL);
4771	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
4772	    &nvroot) != 0) {
4773		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4774		    "could not obtain vdev configuration for  '%s'"), poolname);
4775		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
4776		goto out;
4777	}
4778
4779	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
4780	    &top, &toplevels) == 0);
4781
4782	if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
4783		goto out;
4784	}
4785	ret = 0;
4786
4787out:
4788	if (zhp)
4789		zpool_close(zhp);
4790	libzfs_fini(hdl);
4791	return (ret);
4792}
4793