xref: /illumos-gate/usr/src/cmd/zpool/zpool_vdev.c (revision ea8dc4b6d2251b437950c0056bc626b311c73c27)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5*ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6*ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
2246a2abf2Seschrock  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27fa9e4066Sahrens 
28fa9e4066Sahrens /*
29fa9e4066Sahrens  * Functions to convert between a list of vdevs and an nvlist representing the
30fa9e4066Sahrens  * configuration.  Each entry in the list can be one of:
31fa9e4066Sahrens  *
32fa9e4066Sahrens  * 	Device vdevs
33fa9e4066Sahrens  * 		disk=(path=..., devid=...)
34fa9e4066Sahrens  * 		file=(path=...)
35fa9e4066Sahrens  *
36fa9e4066Sahrens  * 	Group vdevs
37fa9e4066Sahrens  * 		raidz=(...)
38fa9e4066Sahrens  * 		mirror=(...)
39fa9e4066Sahrens  *
40fa9e4066Sahrens  * While the underlying implementation supports it, group vdevs cannot contain
41fa9e4066Sahrens  * other group vdevs.  All userland verification of devices is contained within
42fa9e4066Sahrens  * this file.  If successful, the nvlist returned can be passed directly to the
43fa9e4066Sahrens  * kernel; we've done as much verification as possible in userland.
44fa9e4066Sahrens  *
45fa9e4066Sahrens  * The only function exported by this file is 'get_vdev_spec'.  The function
46fa9e4066Sahrens  * performs several passes:
47fa9e4066Sahrens  *
48fa9e4066Sahrens  * 	1. Construct the vdev specification.  Performs syntax validation and
49fa9e4066Sahrens  *         makes sure each device is valid.
50fa9e4066Sahrens  * 	2. Check for devices in use.  Using libdiskmgt, makes sure that no
51fa9e4066Sahrens  *         devices are also in use.  Some can be overridden using the 'force'
52fa9e4066Sahrens  *         flag, others cannot.
53fa9e4066Sahrens  * 	3. Check for replication errors if the 'force' flag is not specified.
54fa9e4066Sahrens  *         validates that the replication level is consistent across the
55fa9e4066Sahrens  *         entire pool.
56fa9e4066Sahrens  * 	4. Label any whole disks with an EFI label.
57fa9e4066Sahrens  */
58fa9e4066Sahrens 
59fa9e4066Sahrens #include <assert.h>
60fa9e4066Sahrens #include <devid.h>
61fa9e4066Sahrens #include <errno.h>
62fa9e4066Sahrens #include <fcntl.h>
63fa9e4066Sahrens #include <libdiskmgt.h>
64fa9e4066Sahrens #include <libintl.h>
65fa9e4066Sahrens #include <libnvpair.h>
66fa9e4066Sahrens #include <stdio.h>
67fa9e4066Sahrens #include <string.h>
68fa9e4066Sahrens #include <unistd.h>
69fa9e4066Sahrens #include <sys/efi_partition.h>
70fa9e4066Sahrens #include <sys/stat.h>
71fa9e4066Sahrens #include <sys/vtoc.h>
72fa9e4066Sahrens #include <sys/mntent.h>
73fa9e4066Sahrens 
74fa9e4066Sahrens #include <libzfs.h>
75fa9e4066Sahrens 
76fa9e4066Sahrens #include "zpool_util.h"
77fa9e4066Sahrens 
78fa9e4066Sahrens #define	DISK_ROOT	"/dev/dsk"
79fa9e4066Sahrens #define	RDISK_ROOT	"/dev/rdsk"
80fa9e4066Sahrens #define	BACKUP_SLICE	"s2"
81fa9e4066Sahrens 
82fa9e4066Sahrens /*
83fa9e4066Sahrens  * For any given vdev specification, we can have multiple errors.  The
84fa9e4066Sahrens  * vdev_error() function keeps track of whether we have seen an error yet, and
85fa9e4066Sahrens  * prints out a header if its the first error we've seen.
86fa9e4066Sahrens  */
87fa9e4066Sahrens int error_seen;
88fa9e4066Sahrens int is_force;
89fa9e4066Sahrens 
90fa9e4066Sahrens void
91fa9e4066Sahrens vdev_error(const char *fmt, ...)
92fa9e4066Sahrens {
93fa9e4066Sahrens 	va_list ap;
94fa9e4066Sahrens 
95fa9e4066Sahrens 	if (!error_seen) {
96fa9e4066Sahrens 		(void) fprintf(stderr, gettext("invalid vdev specification\n"));
97fa9e4066Sahrens 		if (!is_force)
98fa9e4066Sahrens 			(void) fprintf(stderr, gettext("use '-f' to override "
99fa9e4066Sahrens 			    "the following errors:\n"));
100fa9e4066Sahrens 		else
101fa9e4066Sahrens 			(void) fprintf(stderr, gettext("the following errors "
102fa9e4066Sahrens 			    "must be manually repaired:\n"));
103fa9e4066Sahrens 		error_seen = TRUE;
104fa9e4066Sahrens 	}
105fa9e4066Sahrens 
106fa9e4066Sahrens 	va_start(ap, fmt);
107fa9e4066Sahrens 	(void) vfprintf(stderr, fmt, ap);
108fa9e4066Sahrens 	va_end(ap);
109fa9e4066Sahrens }
110fa9e4066Sahrens 
11146a2abf2Seschrock static void
11246a2abf2Seschrock libdiskmgt_error(int error)
113fa9e4066Sahrens {
114*ea8dc4b6Seschrock 	/*
115*ea8dc4b6Seschrock 	 * ENXIO is a valid error message if the device doesn't live in
116*ea8dc4b6Seschrock 	 * /dev/dsk.  Don't bother printing an error message in this case.
117*ea8dc4b6Seschrock 	 */
118*ea8dc4b6Seschrock 	if (error == ENXIO)
119*ea8dc4b6Seschrock 		return;
120*ea8dc4b6Seschrock 
12146a2abf2Seschrock 	(void) fprintf(stderr, gettext("warning: device in use checking "
12246a2abf2Seschrock 	    "failed: %s\n"), strerror(error));
123fa9e4066Sahrens }
124fa9e4066Sahrens 
125fa9e4066Sahrens /*
12646a2abf2Seschrock  * Validate a device, passing the bulk of the work off to libdiskmgt.
127fa9e4066Sahrens  */
128fa9e4066Sahrens int
12946a2abf2Seschrock check_slice(const char *path, int force, int wholedisk)
130fa9e4066Sahrens {
13146a2abf2Seschrock 	char *msg;
13246a2abf2Seschrock 	int error = 0;
133fa9e4066Sahrens 	int ret = 0;
134fa9e4066Sahrens 
13546a2abf2Seschrock 	if (dm_inuse((char *)path, &msg,
13646a2abf2Seschrock 	    force ? DM_WHO_ZPOOL_FORCE : DM_WHO_ZPOOL, &error) || error) {
13746a2abf2Seschrock 		if (error != 0) {
13846a2abf2Seschrock 			libdiskmgt_error(error);
13946a2abf2Seschrock 			return (0);
14046a2abf2Seschrock 		} else {
14146a2abf2Seschrock 			vdev_error("%s", msg);
14246a2abf2Seschrock 			free(msg);
143fa9e4066Sahrens 		}
144fa9e4066Sahrens 
14546a2abf2Seschrock 		ret = -1;
146fa9e4066Sahrens 	}
147fa9e4066Sahrens 
148fa9e4066Sahrens 	/*
14946a2abf2Seschrock 	 * If we're given a whole disk, ignore overlapping slices since we're
15046a2abf2Seschrock 	 * about to label it anyway.
151fa9e4066Sahrens 	 */
15246a2abf2Seschrock 	error = 0;
15346a2abf2Seschrock 	if (!wholedisk && !force &&
15446a2abf2Seschrock 	    (dm_isoverlapping((char *)path, &msg, &error) || error)) {
15546a2abf2Seschrock 		if (error != 0) {
15646a2abf2Seschrock 			libdiskmgt_error(error);
15746a2abf2Seschrock 			return (0);
158fa9e4066Sahrens 		} else {
15946a2abf2Seschrock 			vdev_error("%s overlaps with %s\n", path, msg);
16046a2abf2Seschrock 			free(msg);
161fa9e4066Sahrens 		}
162fa9e4066Sahrens 
16346a2abf2Seschrock 		ret = -1;
16446a2abf2Seschrock 	}
165fa9e4066Sahrens 
16646a2abf2Seschrock 	return (ret);
167fa9e4066Sahrens }
168fa9e4066Sahrens 
169fa9e4066Sahrens /*
170fa9e4066Sahrens  * Validate a whole disk.  Iterate over all slices on the disk and make sure
171fa9e4066Sahrens  * that none is in use by calling check_slice().
172fa9e4066Sahrens  */
173fa9e4066Sahrens /* ARGSUSED */
174fa9e4066Sahrens int
175fa9e4066Sahrens check_disk(const char *name, dm_descriptor_t disk, int force)
176fa9e4066Sahrens {
177fa9e4066Sahrens 	dm_descriptor_t *drive, *media, *slice;
178fa9e4066Sahrens 	int err = 0;
179fa9e4066Sahrens 	int i;
180fa9e4066Sahrens 	int ret;
181fa9e4066Sahrens 
182fa9e4066Sahrens 	/*
183fa9e4066Sahrens 	 * Get the drive associated with this disk.  This should never fail,
184fa9e4066Sahrens 	 * because we already have an alias handle open for the device.
185fa9e4066Sahrens 	 */
186fa9e4066Sahrens 	if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
18746a2abf2Seschrock 	    &err)) == NULL || *drive == NULL) {
18846a2abf2Seschrock 		if (err)
18946a2abf2Seschrock 			libdiskmgt_error(err);
19046a2abf2Seschrock 		return (0);
19146a2abf2Seschrock 	}
192fa9e4066Sahrens 
193fa9e4066Sahrens 	if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
19446a2abf2Seschrock 	    &err)) == NULL) {
19546a2abf2Seschrock 		dm_free_descriptors(drive);
19646a2abf2Seschrock 		if (err)
19746a2abf2Seschrock 			libdiskmgt_error(err);
19846a2abf2Seschrock 		return (0);
19946a2abf2Seschrock 	}
200fa9e4066Sahrens 
201fa9e4066Sahrens 	dm_free_descriptors(drive);
202fa9e4066Sahrens 
203fa9e4066Sahrens 	/*
204fa9e4066Sahrens 	 * It is possible that the user has specified a removable media drive,
205fa9e4066Sahrens 	 * and the media is not present.
206fa9e4066Sahrens 	 */
207fa9e4066Sahrens 	if (*media == NULL) {
208fa9e4066Sahrens 		dm_free_descriptors(media);
20946a2abf2Seschrock 		vdev_error(gettext("'%s' has no media in drive\n"), name);
210fa9e4066Sahrens 		return (-1);
211fa9e4066Sahrens 	}
212fa9e4066Sahrens 
213fa9e4066Sahrens 	if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
21446a2abf2Seschrock 	    &err)) == NULL) {
21546a2abf2Seschrock 		dm_free_descriptors(media);
21646a2abf2Seschrock 		if (err)
21746a2abf2Seschrock 			libdiskmgt_error(err);
21846a2abf2Seschrock 		return (0);
21946a2abf2Seschrock 	}
220fa9e4066Sahrens 
221fa9e4066Sahrens 	dm_free_descriptors(media);
222fa9e4066Sahrens 
223fa9e4066Sahrens 	ret = 0;
224fa9e4066Sahrens 
225fa9e4066Sahrens 	/*
226fa9e4066Sahrens 	 * Iterate over all slices and report any errors.  We don't care about
227fa9e4066Sahrens 	 * overlapping slices because we are using the whole disk.
228fa9e4066Sahrens 	 */
229fa9e4066Sahrens 	for (i = 0; slice[i] != NULL; i++) {
23046a2abf2Seschrock 		if (check_slice(dm_get_name(slice[i], &err), force, TRUE) != 0)
231fa9e4066Sahrens 			ret = -1;
232fa9e4066Sahrens 	}
233fa9e4066Sahrens 
234fa9e4066Sahrens 	dm_free_descriptors(slice);
235fa9e4066Sahrens 	return (ret);
236fa9e4066Sahrens }
237fa9e4066Sahrens 
238fa9e4066Sahrens /*
23946a2abf2Seschrock  * Validate a device.
240fa9e4066Sahrens  */
241fa9e4066Sahrens int
242fa9e4066Sahrens check_device(const char *path, int force)
243fa9e4066Sahrens {
244fa9e4066Sahrens 	dm_descriptor_t desc;
245fa9e4066Sahrens 	int err;
24646a2abf2Seschrock 	char *dev;
247fa9e4066Sahrens 
248fa9e4066Sahrens 	/*
249fa9e4066Sahrens 	 * For whole disks, libdiskmgt does not include the leading dev path.
250fa9e4066Sahrens 	 */
251fa9e4066Sahrens 	dev = strrchr(path, '/');
252fa9e4066Sahrens 	assert(dev != NULL);
253fa9e4066Sahrens 	dev++;
25446a2abf2Seschrock 	if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
25546a2abf2Seschrock 		err = check_disk(path, desc, force);
25646a2abf2Seschrock 		dm_free_descriptor(desc);
25746a2abf2Seschrock 		return (err);
258fa9e4066Sahrens 	}
259fa9e4066Sahrens 
26046a2abf2Seschrock 	return (check_slice(path, force, FALSE));
261fa9e4066Sahrens }
262fa9e4066Sahrens 
263fa9e4066Sahrens /*
264fa9e4066Sahrens  * Check that a file is valid.  All we can do in this case is check that it's
265fa9e4066Sahrens  * not in use by another pool.
266fa9e4066Sahrens  */
267fa9e4066Sahrens int
268fa9e4066Sahrens check_file(const char *file, int force)
269fa9e4066Sahrens {
27046a2abf2Seschrock 	char  *name;
271fa9e4066Sahrens 	int fd;
272fa9e4066Sahrens 	int ret = 0;
27346a2abf2Seschrock 	pool_state_t state;
274fa9e4066Sahrens 
275fa9e4066Sahrens 	if ((fd = open(file, O_RDONLY)) < 0)
276fa9e4066Sahrens 		return (0);
277fa9e4066Sahrens 
27846a2abf2Seschrock 	if (zpool_in_use(fd, &state, &name)) {
27946a2abf2Seschrock 		const char *desc;
28046a2abf2Seschrock 
28146a2abf2Seschrock 		switch (state) {
28246a2abf2Seschrock 		case POOL_STATE_ACTIVE:
28346a2abf2Seschrock 			desc = gettext("active");
28446a2abf2Seschrock 			break;
28546a2abf2Seschrock 
28646a2abf2Seschrock 		case POOL_STATE_EXPORTED:
28746a2abf2Seschrock 			desc = gettext("exported");
28846a2abf2Seschrock 			break;
28946a2abf2Seschrock 
29046a2abf2Seschrock 		case POOL_STATE_POTENTIALLY_ACTIVE:
29146a2abf2Seschrock 			desc = gettext("potentially active");
29246a2abf2Seschrock 			break;
29346a2abf2Seschrock 
29446a2abf2Seschrock 		default:
29546a2abf2Seschrock 			desc = gettext("unknown");
29646a2abf2Seschrock 			break;
29746a2abf2Seschrock 		}
29846a2abf2Seschrock 
29946a2abf2Seschrock 		if (state == POOL_STATE_ACTIVE || !force) {
300fa9e4066Sahrens 			vdev_error(gettext("%s is part of %s pool '%s'\n"),
301fa9e4066Sahrens 			    file, desc, name);
302fa9e4066Sahrens 			ret = -1;
303fa9e4066Sahrens 		}
304fa9e4066Sahrens 
305fa9e4066Sahrens 		free(name);
306fa9e4066Sahrens 	}
307fa9e4066Sahrens 
308fa9e4066Sahrens 	(void) close(fd);
309fa9e4066Sahrens 	return (ret);
310fa9e4066Sahrens }
311fa9e4066Sahrens 
312fa9e4066Sahrens static int
313fa9e4066Sahrens is_whole_disk(const char *arg, struct stat64 *statbuf)
314fa9e4066Sahrens {
315fa9e4066Sahrens 	char path[MAXPATHLEN];
316fa9e4066Sahrens 
317fa9e4066Sahrens 	(void) snprintf(path, sizeof (path), "%s%s", arg, BACKUP_SLICE);
318fa9e4066Sahrens 	if (stat64(path, statbuf) == 0)
319fa9e4066Sahrens 		return (TRUE);
320fa9e4066Sahrens 
321fa9e4066Sahrens 	return (FALSE);
322fa9e4066Sahrens }
323fa9e4066Sahrens 
324fa9e4066Sahrens /*
325fa9e4066Sahrens  * Create a leaf vdev.  Determine if this is a file or a device.  If it's a
326fa9e4066Sahrens  * device, fill in the device id to make a complete nvlist.  Valid forms for a
327fa9e4066Sahrens  * leaf vdev are:
328fa9e4066Sahrens  *
329fa9e4066Sahrens  * 	/dev/dsk/xxx	Complete disk path
330fa9e4066Sahrens  * 	/xxx		Full path to file
331fa9e4066Sahrens  * 	xxx		Shorthand for /dev/dsk/xxx
332fa9e4066Sahrens  */
333fa9e4066Sahrens nvlist_t *
334fa9e4066Sahrens make_leaf_vdev(const char *arg)
335fa9e4066Sahrens {
336fa9e4066Sahrens 	char path[MAXPATHLEN];
337fa9e4066Sahrens 	struct stat64 statbuf;
338fa9e4066Sahrens 	nvlist_t *vdev = NULL;
339fa9e4066Sahrens 	char *type = NULL;
340fa9e4066Sahrens 	int wholedisk = FALSE;
341fa9e4066Sahrens 
342fa9e4066Sahrens 	/*
343fa9e4066Sahrens 	 * Determine what type of vdev this is, and put the full path into
344fa9e4066Sahrens 	 * 'path'.  We detect whether this is a device of file afterwards by
345fa9e4066Sahrens 	 * checking the st_mode of the file.
346fa9e4066Sahrens 	 */
347fa9e4066Sahrens 	if (arg[0] == '/') {
348fa9e4066Sahrens 		/*
349fa9e4066Sahrens 		 * Complete device or file path.  Exact type is determined by
350fa9e4066Sahrens 		 * examining the file descriptor afterwards.
351fa9e4066Sahrens 		 */
352fa9e4066Sahrens 		if (is_whole_disk(arg, &statbuf)) {
353fa9e4066Sahrens 			wholedisk = TRUE;
354fa9e4066Sahrens 		} else if (stat64(arg, &statbuf) != 0) {
355fa9e4066Sahrens 			(void) fprintf(stderr,
356fa9e4066Sahrens 			    gettext("cannot open '%s': %s\n"),
357fa9e4066Sahrens 			    arg, strerror(errno));
358fa9e4066Sahrens 			return (NULL);
359fa9e4066Sahrens 		}
360fa9e4066Sahrens 
361fa9e4066Sahrens 		(void) strlcpy(path, arg, sizeof (path));
362fa9e4066Sahrens 	} else {
363fa9e4066Sahrens 		/*
364fa9e4066Sahrens 		 * This may be a short path for a device, or it could be total
365fa9e4066Sahrens 		 * gibberish.  Check to see if it's a known device in
366fa9e4066Sahrens 		 * /dev/dsk/.  As part of this check, see if we've been given a
367fa9e4066Sahrens 		 * an entire disk (minus the slice number).
368fa9e4066Sahrens 		 */
369fa9e4066Sahrens 		(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT,
370fa9e4066Sahrens 		    arg);
371fa9e4066Sahrens 		if (is_whole_disk(path, &statbuf)) {
372fa9e4066Sahrens 			wholedisk = TRUE;
373fa9e4066Sahrens 		} else if (stat64(path, &statbuf) != 0) {
374fa9e4066Sahrens 			/*
375fa9e4066Sahrens 			 * If we got ENOENT, then the user gave us
376fa9e4066Sahrens 			 * gibberish, so try to direct them with a
377fa9e4066Sahrens 			 * reasonable error message.  Otherwise,
378fa9e4066Sahrens 			 * regurgitate strerror() since it's the best we
379fa9e4066Sahrens 			 * can do.
380fa9e4066Sahrens 			 */
381fa9e4066Sahrens 			if (errno == ENOENT) {
382fa9e4066Sahrens 				(void) fprintf(stderr,
383fa9e4066Sahrens 				    gettext("cannot open '%s': no such "
384fa9e4066Sahrens 				    "device in %s\n"), arg, DISK_ROOT);
385fa9e4066Sahrens 				(void) fprintf(stderr,
386fa9e4066Sahrens 				    gettext("must be a full path or "
387fa9e4066Sahrens 				    "shorthand device name\n"));
388fa9e4066Sahrens 				return (NULL);
389fa9e4066Sahrens 			} else {
390fa9e4066Sahrens 				(void) fprintf(stderr,
391fa9e4066Sahrens 				    gettext("cannot open '%s': %s\n"),
392fa9e4066Sahrens 				    path, strerror(errno));
393fa9e4066Sahrens 				return (NULL);
394fa9e4066Sahrens 			}
395fa9e4066Sahrens 		}
396fa9e4066Sahrens 	}
397fa9e4066Sahrens 
398fa9e4066Sahrens 	/*
399fa9e4066Sahrens 	 * Determine whether this is a device or a file.
400fa9e4066Sahrens 	 */
401fa9e4066Sahrens 	if (S_ISBLK(statbuf.st_mode)) {
402fa9e4066Sahrens 		type = VDEV_TYPE_DISK;
403fa9e4066Sahrens 	} else if (S_ISREG(statbuf.st_mode)) {
404fa9e4066Sahrens 		type = VDEV_TYPE_FILE;
405fa9e4066Sahrens 	} else {
406fa9e4066Sahrens 		(void) fprintf(stderr, gettext("cannot use '%s': must be a "
407fa9e4066Sahrens 		    "block device or regular file\n"), path);
408fa9e4066Sahrens 		return (NULL);
409fa9e4066Sahrens 	}
410fa9e4066Sahrens 
411fa9e4066Sahrens 	/*
412fa9e4066Sahrens 	 * Finally, we have the complete device or file, and we know that it is
413fa9e4066Sahrens 	 * acceptable to use.  Construct the nvlist to describe this vdev.  All
414fa9e4066Sahrens 	 * vdevs have a 'path' element, and devices also have a 'devid' element.
415fa9e4066Sahrens 	 */
416fa9e4066Sahrens 	verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
417fa9e4066Sahrens 	verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
418fa9e4066Sahrens 	verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
419afefbcddSeschrock 	if (strcmp(type, VDEV_TYPE_DISK) == 0)
420afefbcddSeschrock 		verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
421afefbcddSeschrock 		    (uint64_t)wholedisk) == 0);
422fa9e4066Sahrens 
423fa9e4066Sahrens 	/*
424fa9e4066Sahrens 	 * For a whole disk, defer getting its devid until after labeling it.
425fa9e4066Sahrens 	 */
426fa9e4066Sahrens 	if (S_ISBLK(statbuf.st_mode) && !wholedisk) {
427fa9e4066Sahrens 		/*
428fa9e4066Sahrens 		 * Get the devid for the device.
429fa9e4066Sahrens 		 */
430fa9e4066Sahrens 		int fd;
431fa9e4066Sahrens 		ddi_devid_t devid;
432fa9e4066Sahrens 		char *minor = NULL, *devid_str = NULL;
433fa9e4066Sahrens 
434fa9e4066Sahrens 		if ((fd = open(path, O_RDONLY)) < 0) {
435fa9e4066Sahrens 			(void) fprintf(stderr, gettext("cannot open '%s': "
436fa9e4066Sahrens 			    "%s\n"), path, strerror(errno));
437fa9e4066Sahrens 			nvlist_free(vdev);
438fa9e4066Sahrens 			return (NULL);
439fa9e4066Sahrens 		}
440fa9e4066Sahrens 
441fa9e4066Sahrens 		if (devid_get(fd, &devid) == 0) {
442fa9e4066Sahrens 			if (devid_get_minor_name(fd, &minor) == 0 &&
443fa9e4066Sahrens 			    (devid_str = devid_str_encode(devid, minor)) !=
444fa9e4066Sahrens 			    NULL) {
445fa9e4066Sahrens 				verify(nvlist_add_string(vdev,
446fa9e4066Sahrens 				    ZPOOL_CONFIG_DEVID, devid_str) == 0);
447fa9e4066Sahrens 			}
448fa9e4066Sahrens 			if (devid_str != NULL)
449fa9e4066Sahrens 				devid_str_free(devid_str);
450fa9e4066Sahrens 			if (minor != NULL)
451fa9e4066Sahrens 				devid_str_free(minor);
452fa9e4066Sahrens 			devid_free(devid);
453fa9e4066Sahrens 		}
454fa9e4066Sahrens 
455fa9e4066Sahrens 		(void) close(fd);
456fa9e4066Sahrens 	}
457fa9e4066Sahrens 
458fa9e4066Sahrens 	return (vdev);
459fa9e4066Sahrens }
460fa9e4066Sahrens 
461fa9e4066Sahrens /*
462fa9e4066Sahrens  * Go through and verify the replication level of the pool is consistent.
463fa9e4066Sahrens  * Performs the following checks:
464fa9e4066Sahrens  *
465fa9e4066Sahrens  * 	For the new spec, verifies that devices in mirrors and raidz are the
466fa9e4066Sahrens  * 	same size.
467fa9e4066Sahrens  *
468fa9e4066Sahrens  * 	If the current configuration already has inconsistent replication
469fa9e4066Sahrens  * 	levels, ignore any other potential problems in the new spec.
470fa9e4066Sahrens  *
471fa9e4066Sahrens  * 	Otherwise, make sure that the current spec (if there is one) and the new
472fa9e4066Sahrens  * 	spec have consistent replication levels.
473fa9e4066Sahrens  */
474fa9e4066Sahrens typedef struct replication_level {
475fa9e4066Sahrens 	char	*type;
476fa9e4066Sahrens 	int	level;
477fa9e4066Sahrens } replication_level_t;
478fa9e4066Sahrens 
479fa9e4066Sahrens /*
480fa9e4066Sahrens  * Given a list of toplevel vdevs, return the current replication level.  If
481fa9e4066Sahrens  * the config is inconsistent, then NULL is returned.  If 'fatal' is set, then
482fa9e4066Sahrens  * an error message will be displayed for each self-inconsistent vdev.
483fa9e4066Sahrens  */
484fa9e4066Sahrens replication_level_t *
485fa9e4066Sahrens get_replication(nvlist_t *nvroot, int fatal)
486fa9e4066Sahrens {
487fa9e4066Sahrens 	nvlist_t **top;
488fa9e4066Sahrens 	uint_t t, toplevels;
489fa9e4066Sahrens 	nvlist_t **child;
490fa9e4066Sahrens 	uint_t c, children;
491fa9e4066Sahrens 	nvlist_t *nv;
492fa9e4066Sahrens 	char *type;
493fa9e4066Sahrens 	replication_level_t lastrep, rep, *ret;
494fa9e4066Sahrens 	int dontreport;
495fa9e4066Sahrens 
496fa9e4066Sahrens 	ret = safe_malloc(sizeof (replication_level_t));
497fa9e4066Sahrens 
498fa9e4066Sahrens 	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
499fa9e4066Sahrens 	    &top, &toplevels) == 0);
500fa9e4066Sahrens 
501fa9e4066Sahrens 	lastrep.type = NULL;
502fa9e4066Sahrens 	for (t = 0; t < toplevels; t++) {
503fa9e4066Sahrens 		nv = top[t];
504fa9e4066Sahrens 
505fa9e4066Sahrens 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
506fa9e4066Sahrens 
507fa9e4066Sahrens 		if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
508fa9e4066Sahrens 		    &child, &children) != 0) {
509fa9e4066Sahrens 			/*
510fa9e4066Sahrens 			 * This is a 'file' or 'disk' vdev.
511fa9e4066Sahrens 			 */
512fa9e4066Sahrens 			rep.type = type;
513fa9e4066Sahrens 			rep.level = 1;
514fa9e4066Sahrens 		} else {
515fa9e4066Sahrens 			uint64_t vdev_size;
516fa9e4066Sahrens 
517fa9e4066Sahrens 			/*
518fa9e4066Sahrens 			 * This is a mirror or RAID-Z vdev.  Go through and make
519fa9e4066Sahrens 			 * sure the contents are all the same (files vs. disks),
520fa9e4066Sahrens 			 * keeping track of the number of elements in the
521fa9e4066Sahrens 			 * process.
522fa9e4066Sahrens 			 *
523fa9e4066Sahrens 			 * We also check that the size of each vdev (if it can
524fa9e4066Sahrens 			 * be determined) is the same.
525fa9e4066Sahrens 			 */
526fa9e4066Sahrens 			rep.type = type;
527fa9e4066Sahrens 			rep.level = 0;
528fa9e4066Sahrens 
529fa9e4066Sahrens 			/*
530fa9e4066Sahrens 			 * The 'dontreport' variable indicatest that we've
531fa9e4066Sahrens 			 * already reported an error for this spec, so don't
532fa9e4066Sahrens 			 * bother doing it again.
533fa9e4066Sahrens 			 */
534fa9e4066Sahrens 			type = NULL;
535fa9e4066Sahrens 			dontreport = 0;
536fa9e4066Sahrens 			vdev_size = -1ULL;
537fa9e4066Sahrens 			for (c = 0; c < children; c++) {
538fa9e4066Sahrens 				nvlist_t *cnv = child[c];
539fa9e4066Sahrens 				char *path;
540fa9e4066Sahrens 				struct stat64 statbuf;
541fa9e4066Sahrens 				uint64_t size = -1ULL;
542fa9e4066Sahrens 				char *childtype;
543fa9e4066Sahrens 				int fd, err;
544fa9e4066Sahrens 
545fa9e4066Sahrens 				rep.level++;
546fa9e4066Sahrens 
547fa9e4066Sahrens 				verify(nvlist_lookup_string(cnv,
548fa9e4066Sahrens 				    ZPOOL_CONFIG_TYPE, &childtype) == 0);
549fa9e4066Sahrens 				verify(nvlist_lookup_string(cnv,
550fa9e4066Sahrens 				    ZPOOL_CONFIG_PATH, &path) == 0);
551fa9e4066Sahrens 
552fa9e4066Sahrens 				/*
553fa9e4066Sahrens 				 * If we have a raidz/mirror that combines disks
554fa9e4066Sahrens 				 * with files, report it as an error.
555fa9e4066Sahrens 				 */
556fa9e4066Sahrens 				if (!dontreport && type != NULL &&
557fa9e4066Sahrens 				    strcmp(type, childtype) != 0) {
558fa9e4066Sahrens 					if (ret != NULL)
559fa9e4066Sahrens 						free(ret);
560fa9e4066Sahrens 					ret = NULL;
561fa9e4066Sahrens 					if (fatal)
562fa9e4066Sahrens 						vdev_error(gettext(
563fa9e4066Sahrens 						    "mismatched replication "
564fa9e4066Sahrens 						    "level: %s contains both "
565fa9e4066Sahrens 						    "files and devices\n"),
566fa9e4066Sahrens 						    rep.type);
567fa9e4066Sahrens 					else
568fa9e4066Sahrens 						return (NULL);
569fa9e4066Sahrens 					dontreport = TRUE;
570fa9e4066Sahrens 				}
571fa9e4066Sahrens 
572fa9e4066Sahrens 				/*
573fa9e4066Sahrens 				 * According to stat(2), the value of 'st_size'
574fa9e4066Sahrens 				 * is undefined for block devices and character
575fa9e4066Sahrens 				 * devices.  But there is no effective way to
576fa9e4066Sahrens 				 * determine the real size in userland.
577fa9e4066Sahrens 				 *
578fa9e4066Sahrens 				 * Instead, we'll take advantage of an
579fa9e4066Sahrens 				 * implementation detail of spec_size().  If the
580fa9e4066Sahrens 				 * device is currently open, then we (should)
581fa9e4066Sahrens 				 * return a valid size.
582fa9e4066Sahrens 				 *
583fa9e4066Sahrens 				 * If we still don't get a valid size (indicated
584fa9e4066Sahrens 				 * by a size of 0 or MAXOFFSET_T), then ignore
585fa9e4066Sahrens 				 * this device altogether.
586fa9e4066Sahrens 				 */
587fa9e4066Sahrens 				if ((fd = open(path, O_RDONLY)) >= 0) {
588fa9e4066Sahrens 					err = fstat64(fd, &statbuf);
589fa9e4066Sahrens 					(void) close(fd);
590fa9e4066Sahrens 				} else {
591fa9e4066Sahrens 					err = stat64(path, &statbuf);
592fa9e4066Sahrens 				}
593fa9e4066Sahrens 
594fa9e4066Sahrens 				if (err != 0 ||
595fa9e4066Sahrens 				    statbuf.st_size == 0 ||
596fa9e4066Sahrens 				    statbuf.st_size == MAXOFFSET_T)
597fa9e4066Sahrens 					continue;
598fa9e4066Sahrens 
599fa9e4066Sahrens 				size = statbuf.st_size;
600fa9e4066Sahrens 
601fa9e4066Sahrens 				/*
602fa9e4066Sahrens 				 * Also check the size of each device.  If they
603fa9e4066Sahrens 				 * differ, then report an error.
604fa9e4066Sahrens 				 */
605fa9e4066Sahrens 				if (!dontreport && vdev_size != -1ULL &&
606fa9e4066Sahrens 				    size != vdev_size) {
607fa9e4066Sahrens 					if (ret != NULL)
608fa9e4066Sahrens 						free(ret);
609fa9e4066Sahrens 					ret = NULL;
610fa9e4066Sahrens 					if (fatal)
611fa9e4066Sahrens 						vdev_error(gettext(
612fa9e4066Sahrens 						    "%s contains devices of "
613fa9e4066Sahrens 						    "different sizes\n"),
614fa9e4066Sahrens 						    rep.type);
615fa9e4066Sahrens 					else
616fa9e4066Sahrens 						return (NULL);
617fa9e4066Sahrens 					dontreport = TRUE;
618fa9e4066Sahrens 				}
619fa9e4066Sahrens 
620fa9e4066Sahrens 				type = childtype;
621fa9e4066Sahrens 				vdev_size = size;
622fa9e4066Sahrens 			}
623fa9e4066Sahrens 		}
624fa9e4066Sahrens 
625fa9e4066Sahrens 		/*
626fa9e4066Sahrens 		 * At this point, we have the replication of the last toplevel
627fa9e4066Sahrens 		 * vdev in 'rep'.  Compare it to 'lastrep' to see if its
628fa9e4066Sahrens 		 * different.
629fa9e4066Sahrens 		 */
630fa9e4066Sahrens 		if (lastrep.type != NULL) {
631fa9e4066Sahrens 			if (strcmp(lastrep.type, rep.type) != 0) {
632fa9e4066Sahrens 				if (ret != NULL)
633fa9e4066Sahrens 					free(ret);
634fa9e4066Sahrens 				ret = NULL;
635fa9e4066Sahrens 				if (fatal)
636fa9e4066Sahrens 					vdev_error(gettext(
637fa9e4066Sahrens 					    "mismatched replication "
638fa9e4066Sahrens 					    "level: both %s and %s vdevs are "
639fa9e4066Sahrens 					    "present\n"),
640fa9e4066Sahrens 					    lastrep.type, rep.type);
641fa9e4066Sahrens 				else
642fa9e4066Sahrens 					return (NULL);
643fa9e4066Sahrens 			} else if (lastrep.level != rep.level) {
644fa9e4066Sahrens 				if (ret)
645fa9e4066Sahrens 					free(ret);
646fa9e4066Sahrens 				ret = NULL;
647fa9e4066Sahrens 				if (fatal)
648fa9e4066Sahrens 					vdev_error(gettext(
649fa9e4066Sahrens 					    "mismatched replication "
650fa9e4066Sahrens 					    "level: %d-way %s and %d-way %s "
651fa9e4066Sahrens 					    "vdevs are present\n"),
652fa9e4066Sahrens 					    lastrep.level, lastrep.type,
653fa9e4066Sahrens 					    rep.level, rep.type);
654fa9e4066Sahrens 				else
655fa9e4066Sahrens 					return (NULL);
656fa9e4066Sahrens 			}
657fa9e4066Sahrens 		}
658fa9e4066Sahrens 		lastrep = rep;
659fa9e4066Sahrens 	}
660fa9e4066Sahrens 
661fa9e4066Sahrens 	if (ret != NULL) {
662fa9e4066Sahrens 		ret->type = rep.type;
663fa9e4066Sahrens 		ret->level = rep.level;
664fa9e4066Sahrens 	}
665fa9e4066Sahrens 
666fa9e4066Sahrens 	return (ret);
667fa9e4066Sahrens }
668fa9e4066Sahrens 
669fa9e4066Sahrens /*
670fa9e4066Sahrens  * Check the replication level of the vdev spec against the current pool.  Calls
671fa9e4066Sahrens  * get_replication() to make sure the new spec is self-consistent.  If the pool
672fa9e4066Sahrens  * has a consistent replication level, then we ignore any errors.  Otherwise,
673fa9e4066Sahrens  * report any difference between the two.
674fa9e4066Sahrens  */
675fa9e4066Sahrens int
676fa9e4066Sahrens check_replication(nvlist_t *config, nvlist_t *newroot)
677fa9e4066Sahrens {
678fa9e4066Sahrens 	replication_level_t *current = NULL, *new;
679fa9e4066Sahrens 	int ret;
680fa9e4066Sahrens 
681fa9e4066Sahrens 	/*
682fa9e4066Sahrens 	 * If we have a current pool configuration, check to see if it's
683fa9e4066Sahrens 	 * self-consistent.  If not, simply return success.
684fa9e4066Sahrens 	 */
685fa9e4066Sahrens 	if (config != NULL) {
686fa9e4066Sahrens 		nvlist_t *nvroot;
687fa9e4066Sahrens 
688fa9e4066Sahrens 		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
689fa9e4066Sahrens 		    &nvroot) == 0);
690fa9e4066Sahrens 		if ((current = get_replication(nvroot, FALSE)) == NULL)
691fa9e4066Sahrens 			return (0);
692fa9e4066Sahrens 	}
693fa9e4066Sahrens 
694fa9e4066Sahrens 	/*
695fa9e4066Sahrens 	 * Get the replication level of the new vdev spec, reporting any
696fa9e4066Sahrens 	 * inconsistencies found.
697fa9e4066Sahrens 	 */
698fa9e4066Sahrens 	if ((new = get_replication(newroot, TRUE)) == NULL) {
699fa9e4066Sahrens 		free(current);
700fa9e4066Sahrens 		return (-1);
701fa9e4066Sahrens 	}
702fa9e4066Sahrens 
703fa9e4066Sahrens 	/*
704fa9e4066Sahrens 	 * Check to see if the new vdev spec matches the replication level of
705fa9e4066Sahrens 	 * the current pool.
706fa9e4066Sahrens 	 */
707fa9e4066Sahrens 	ret = 0;
708fa9e4066Sahrens 	if (current != NULL) {
709fa9e4066Sahrens 		if (strcmp(current->type, new->type) != 0 ||
710fa9e4066Sahrens 		    current->level != new->level) {
711fa9e4066Sahrens 			vdev_error(gettext(
712fa9e4066Sahrens 			    "mismatched replication level: pool uses %d-way %s "
713fa9e4066Sahrens 			    "and new vdev uses %d-way %s\n"),
714fa9e4066Sahrens 			    current->level, current->type, new->level,
715fa9e4066Sahrens 			    new->type);
716fa9e4066Sahrens 			ret = -1;
717fa9e4066Sahrens 		}
718fa9e4066Sahrens 	}
719fa9e4066Sahrens 
720fa9e4066Sahrens 	free(new);
721fa9e4066Sahrens 	if (current != NULL)
722fa9e4066Sahrens 		free(current);
723fa9e4066Sahrens 
724fa9e4066Sahrens 	return (ret);
725fa9e4066Sahrens }
726fa9e4066Sahrens 
727fa9e4066Sahrens /*
728fa9e4066Sahrens  * Label an individual disk.  The name provided is the short name, stripped of
729fa9e4066Sahrens  * any leading /dev path.
730fa9e4066Sahrens  */
731fa9e4066Sahrens int
732fa9e4066Sahrens label_disk(char *name)
733fa9e4066Sahrens {
734fa9e4066Sahrens 	char path[MAXPATHLEN];
735fa9e4066Sahrens 	struct dk_gpt *vtoc;
736fa9e4066Sahrens 	int fd;
737fa9e4066Sahrens 	size_t resv = 16384;
738fa9e4066Sahrens 
739fa9e4066Sahrens 	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
740fa9e4066Sahrens 	    BACKUP_SLICE);
741fa9e4066Sahrens 
742fa9e4066Sahrens 	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
743fa9e4066Sahrens 		/*
744fa9e4066Sahrens 		 * This shouldn't happen.  We've long since verified that this
745fa9e4066Sahrens 		 * is a valid device.
746fa9e4066Sahrens 		 */
747fa9e4066Sahrens 		(void) fprintf(stderr, gettext("cannot open '%s': %s\n"),
748fa9e4066Sahrens 		    path, strerror(errno));
749fa9e4066Sahrens 		return (-1);
750fa9e4066Sahrens 	}
751fa9e4066Sahrens 
752fa9e4066Sahrens 
753fa9e4066Sahrens 	if (efi_alloc_and_init(fd, 9, &vtoc) != 0) {
754fa9e4066Sahrens 		/*
755fa9e4066Sahrens 		 * The only way this can fail is if we run out of memory, or we
756fa9e4066Sahrens 		 * were unable to read the disk geometry.
757fa9e4066Sahrens 		 */
758fa9e4066Sahrens 		if (errno == ENOMEM)
759fa9e4066Sahrens 			no_memory();
760fa9e4066Sahrens 
761fa9e4066Sahrens 		(void) fprintf(stderr, gettext("cannot label '%s': unable to "
762fa9e4066Sahrens 		    "read disk geometry\n"), name);
763fa9e4066Sahrens 		(void) close(fd);
764fa9e4066Sahrens 		return (-1);
765fa9e4066Sahrens 	}
766fa9e4066Sahrens 
767fa9e4066Sahrens 	vtoc->efi_parts[0].p_start = vtoc->efi_first_u_lba;
768fa9e4066Sahrens 	vtoc->efi_parts[0].p_size = vtoc->efi_last_u_lba + 1 -
769fa9e4066Sahrens 	    vtoc->efi_first_u_lba - resv;
770fa9e4066Sahrens 
771fa9e4066Sahrens 	/*
772fa9e4066Sahrens 	 * Why we use V_USR: V_BACKUP confuses users, and is considered
773fa9e4066Sahrens 	 * disposable by some EFI utilities (since EFI doesn't have a backup
774fa9e4066Sahrens 	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
775fa9e4066Sahrens 	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
776fa9e4066Sahrens 	 * etc. were all pretty specific.  V_USR is as close to reality as we
777fa9e4066Sahrens 	 * can get, in the absence of V_OTHER.
778fa9e4066Sahrens 	 */
779fa9e4066Sahrens 	vtoc->efi_parts[0].p_tag = V_USR;
780fa9e4066Sahrens 	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
781fa9e4066Sahrens 
782fa9e4066Sahrens 	vtoc->efi_parts[8].p_start = vtoc->efi_last_u_lba + 1 - resv;
783fa9e4066Sahrens 	vtoc->efi_parts[8].p_size = resv;
784fa9e4066Sahrens 	vtoc->efi_parts[8].p_tag = V_RESERVED;
785fa9e4066Sahrens 
786fa9e4066Sahrens 	if (efi_write(fd, vtoc) != 0) {
787fa9e4066Sahrens 		/*
788fa9e4066Sahrens 		 * Currently, EFI labels are not supported for IDE disks, and it
789fa9e4066Sahrens 		 * is likely that they will not be supported on other drives for
790fa9e4066Sahrens 		 * some time.  Print out a helpful error message directing the
791fa9e4066Sahrens 		 * user to manually label the disk and give a specific slice.
792fa9e4066Sahrens 		 */
793fa9e4066Sahrens 		(void) fprintf(stderr, gettext("cannot label '%s': failed to "
794fa9e4066Sahrens 		    "write EFI label\n"), name);
795fa9e4066Sahrens 		(void) fprintf(stderr, gettext("use fdisk(1M) to partition "
796fa9e4066Sahrens 		    "the disk, and provide a specific slice\n"));
797fa9e4066Sahrens 		(void) close(fd);
798fa9e4066Sahrens 		return (-1);
799fa9e4066Sahrens 	}
800fa9e4066Sahrens 
801fa9e4066Sahrens 	(void) close(fd);
802fa9e4066Sahrens 	return (0);
803fa9e4066Sahrens }
804fa9e4066Sahrens 
805fa9e4066Sahrens /*
806fa9e4066Sahrens  * Go through and find any whole disks in the vdev specification, labelling them
807fa9e4066Sahrens  * as appropriate.  When constructing the vdev spec, we were unable to open this
808fa9e4066Sahrens  * device in order to provide a devid.  Now that we have labelled the disk and
809fa9e4066Sahrens  * know that slice 0 is valid, we can construct the devid now.
810fa9e4066Sahrens  *
811fa9e4066Sahrens  * If the disk was already labelled with an EFI label, we will have gotten the
812fa9e4066Sahrens  * devid already (because we were able to open the whole disk).  Otherwise, we
813fa9e4066Sahrens  * need to get the devid after we label the disk.
814fa9e4066Sahrens  */
815fa9e4066Sahrens int
816fa9e4066Sahrens make_disks(nvlist_t *nv)
817fa9e4066Sahrens {
818fa9e4066Sahrens 	nvlist_t **child;
819fa9e4066Sahrens 	uint_t c, children;
820fa9e4066Sahrens 	char *type, *path, *diskname;
821fa9e4066Sahrens 	char buf[MAXPATHLEN];
822afefbcddSeschrock 	uint64_t wholedisk;
823fa9e4066Sahrens 	int fd;
824fa9e4066Sahrens 	int ret;
825fa9e4066Sahrens 	ddi_devid_t devid;
826fa9e4066Sahrens 	char *minor = NULL, *devid_str = NULL;
827fa9e4066Sahrens 
828fa9e4066Sahrens 	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
829fa9e4066Sahrens 
830fa9e4066Sahrens 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
831fa9e4066Sahrens 	    &child, &children) != 0) {
832fa9e4066Sahrens 
833fa9e4066Sahrens 		if (strcmp(type, VDEV_TYPE_DISK) != 0)
834fa9e4066Sahrens 			return (0);
835fa9e4066Sahrens 
836fa9e4066Sahrens 		/*
837fa9e4066Sahrens 		 * We have a disk device.  Get the path to the device
838fa9e4066Sahrens 		 * and see if its a whole disk by appending the backup
839fa9e4066Sahrens 		 * slice and stat()ing the device.
840fa9e4066Sahrens 		 */
841fa9e4066Sahrens 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
842fa9e4066Sahrens 
843afefbcddSeschrock 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
844afefbcddSeschrock 		    &wholedisk) != 0 || !wholedisk)
845fa9e4066Sahrens 			return (0);
846fa9e4066Sahrens 
847fa9e4066Sahrens 		diskname = strrchr(path, '/');
848fa9e4066Sahrens 		assert(diskname != NULL);
849fa9e4066Sahrens 		diskname++;
850fa9e4066Sahrens 		if (label_disk(diskname) != 0)
851fa9e4066Sahrens 			return (-1);
852fa9e4066Sahrens 
853fa9e4066Sahrens 		/*
854fa9e4066Sahrens 		 * Fill in the devid, now that we've labeled the disk.
855fa9e4066Sahrens 		 */
856fa9e4066Sahrens 		(void) snprintf(buf, sizeof (buf), "%ss0", path);
857fa9e4066Sahrens 		if ((fd = open(buf, O_RDONLY)) < 0) {
858fa9e4066Sahrens 			(void) fprintf(stderr,
859fa9e4066Sahrens 			    gettext("cannot open '%s': %s\n"),
860fa9e4066Sahrens 			    buf, strerror(errno));
861fa9e4066Sahrens 			return (-1);
862fa9e4066Sahrens 		}
863fa9e4066Sahrens 
864fa9e4066Sahrens 		if (devid_get(fd, &devid) == 0) {
865fa9e4066Sahrens 			if (devid_get_minor_name(fd, &minor) == 0 &&
866fa9e4066Sahrens 			    (devid_str = devid_str_encode(devid, minor)) !=
867fa9e4066Sahrens 			    NULL) {
868fa9e4066Sahrens 				verify(nvlist_add_string(nv,
869fa9e4066Sahrens 				    ZPOOL_CONFIG_DEVID, devid_str) == 0);
870fa9e4066Sahrens 			}
871fa9e4066Sahrens 			if (devid_str != NULL)
872fa9e4066Sahrens 				devid_str_free(devid_str);
873fa9e4066Sahrens 			if (minor != NULL)
874fa9e4066Sahrens 				devid_str_free(minor);
875fa9e4066Sahrens 			devid_free(devid);
876fa9e4066Sahrens 		}
877fa9e4066Sahrens 
878afefbcddSeschrock 		/*
879afefbcddSeschrock 		 * Update the path to refer to the 's0' slice.  The presence of
880afefbcddSeschrock 		 * the 'whole_disk' field indicates to the CLI that we should
881afefbcddSeschrock 		 * chop off the slice number when displaying the device in
882afefbcddSeschrock 		 * future output.
883afefbcddSeschrock 		 */
884afefbcddSeschrock 		verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
885afefbcddSeschrock 
886fa9e4066Sahrens 		(void) close(fd);
887fa9e4066Sahrens 
888fa9e4066Sahrens 		return (0);
889fa9e4066Sahrens 	}
890fa9e4066Sahrens 
891fa9e4066Sahrens 	for (c = 0; c < children; c++)
892fa9e4066Sahrens 		if ((ret = make_disks(child[c])) != 0)
893fa9e4066Sahrens 			return (ret);
894fa9e4066Sahrens 
895fa9e4066Sahrens 	return (0);
896fa9e4066Sahrens }
897fa9e4066Sahrens 
898fa9e4066Sahrens /*
899fa9e4066Sahrens  * Go through and find any devices that are in use.  We rely on libdiskmgt for
900fa9e4066Sahrens  * the majority of this task.
901fa9e4066Sahrens  */
902fa9e4066Sahrens int
903fa9e4066Sahrens check_in_use(nvlist_t *nv, int force)
904fa9e4066Sahrens {
905fa9e4066Sahrens 	nvlist_t **child;
906fa9e4066Sahrens 	uint_t c, children;
907fa9e4066Sahrens 	char *type, *path;
908fa9e4066Sahrens 	int ret;
909fa9e4066Sahrens 
910fa9e4066Sahrens 	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
911fa9e4066Sahrens 
912fa9e4066Sahrens 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
913fa9e4066Sahrens 	    &child, &children) != 0) {
914fa9e4066Sahrens 
915fa9e4066Sahrens 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
916fa9e4066Sahrens 
917fa9e4066Sahrens 		if (strcmp(type, VDEV_TYPE_DISK) == 0)
918fa9e4066Sahrens 			ret = check_device(path, force);
919fa9e4066Sahrens 
920fa9e4066Sahrens 		if (strcmp(type, VDEV_TYPE_FILE) == 0)
921fa9e4066Sahrens 			ret = check_file(path, force);
922fa9e4066Sahrens 
923fa9e4066Sahrens 		return (ret);
924fa9e4066Sahrens 	}
925fa9e4066Sahrens 
926fa9e4066Sahrens 	for (c = 0; c < children; c++)
927fa9e4066Sahrens 		if ((ret = check_in_use(child[c], force)) != 0)
928fa9e4066Sahrens 			return (ret);
929fa9e4066Sahrens 
930fa9e4066Sahrens 	return (0);
931fa9e4066Sahrens }
932fa9e4066Sahrens 
933fa9e4066Sahrens /*
934fa9e4066Sahrens  * Construct a syntactically valid vdev specification,
935fa9e4066Sahrens  * and ensure that all devices and files exist and can be opened.
936fa9e4066Sahrens  * Note: we don't bother freeing anything in the error paths
937fa9e4066Sahrens  * because the program is just going to exit anyway.
938fa9e4066Sahrens  */
939fa9e4066Sahrens nvlist_t *
940fa9e4066Sahrens construct_spec(int argc, char **argv)
941fa9e4066Sahrens {
942fa9e4066Sahrens 	nvlist_t *nvroot, *nv, **top;
943fa9e4066Sahrens 	int t, toplevels;
944fa9e4066Sahrens 
945fa9e4066Sahrens 	top = NULL;
946fa9e4066Sahrens 	toplevels = 0;
947fa9e4066Sahrens 
948fa9e4066Sahrens 	while (argc > 0) {
949fa9e4066Sahrens 		nv = NULL;
950fa9e4066Sahrens 
951fa9e4066Sahrens 		/*
952fa9e4066Sahrens 		 * If it's a mirror or raidz, the subsequent arguments are
953fa9e4066Sahrens 		 * its leaves -- until we encounter the next mirror or raidz.
954fa9e4066Sahrens 		 */
955fa9e4066Sahrens 		if (strcmp(argv[0], VDEV_TYPE_MIRROR) == 0 ||
956fa9e4066Sahrens 		    strcmp(argv[0], VDEV_TYPE_RAIDZ) == 0) {
957fa9e4066Sahrens 
958fa9e4066Sahrens 			char *type = argv[0];
959fa9e4066Sahrens 			nvlist_t **child = NULL;
960fa9e4066Sahrens 			int children = 0;
961fa9e4066Sahrens 			int c;
962fa9e4066Sahrens 
963fa9e4066Sahrens 			for (c = 1; c < argc; c++) {
964fa9e4066Sahrens 				if (strcmp(argv[c], VDEV_TYPE_MIRROR) == 0 ||
965fa9e4066Sahrens 				    strcmp(argv[c], VDEV_TYPE_RAIDZ) == 0)
966fa9e4066Sahrens 					break;
967fa9e4066Sahrens 				children++;
968fa9e4066Sahrens 				child = realloc(child,
969fa9e4066Sahrens 				    children * sizeof (nvlist_t *));
970fa9e4066Sahrens 				if (child == NULL)
971fa9e4066Sahrens 					no_memory();
972fa9e4066Sahrens 				if ((nv = make_leaf_vdev(argv[c])) == NULL)
973fa9e4066Sahrens 					return (NULL);
974fa9e4066Sahrens 				child[children - 1] = nv;
975fa9e4066Sahrens 			}
976fa9e4066Sahrens 
977fa9e4066Sahrens 			argc -= c;
978fa9e4066Sahrens 			argv += c;
979fa9e4066Sahrens 
980fa9e4066Sahrens 			/*
981fa9e4066Sahrens 			 * Mirrors and RAID-Z devices require at least
982fa9e4066Sahrens 			 * two components.
983fa9e4066Sahrens 			 */
984fa9e4066Sahrens 			if (children < 2) {
985fa9e4066Sahrens 				(void) fprintf(stderr,
986fa9e4066Sahrens 				    gettext("invalid vdev specification: "
987fa9e4066Sahrens 				    "%s requires at least 2 devices\n"), type);
988fa9e4066Sahrens 				return (NULL);
989fa9e4066Sahrens 			}
990fa9e4066Sahrens 
991fa9e4066Sahrens 			verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) == 0);
992fa9e4066Sahrens 			verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
993fa9e4066Sahrens 			    type) == 0);
994fa9e4066Sahrens 			verify(nvlist_add_nvlist_array(nv,
995fa9e4066Sahrens 			    ZPOOL_CONFIG_CHILDREN, child, children) == 0);
996fa9e4066Sahrens 
997fa9e4066Sahrens 			for (c = 0; c < children; c++)
998fa9e4066Sahrens 				nvlist_free(child[c]);
999fa9e4066Sahrens 			free(child);
1000fa9e4066Sahrens 		} else {
1001fa9e4066Sahrens 			/*
1002fa9e4066Sahrens 			 * We have a device.  Pass off to make_leaf_vdev() to
1003fa9e4066Sahrens 			 * construct the appropriate nvlist describing the vdev.
1004fa9e4066Sahrens 			 */
1005fa9e4066Sahrens 			if ((nv = make_leaf_vdev(argv[0])) == NULL)
1006fa9e4066Sahrens 				return (NULL);
1007fa9e4066Sahrens 			argc--;
1008fa9e4066Sahrens 			argv++;
1009fa9e4066Sahrens 		}
1010fa9e4066Sahrens 
1011fa9e4066Sahrens 		toplevels++;
1012fa9e4066Sahrens 		top = realloc(top, toplevels * sizeof (nvlist_t *));
1013fa9e4066Sahrens 		if (top == NULL)
1014fa9e4066Sahrens 			no_memory();
1015fa9e4066Sahrens 		top[toplevels - 1] = nv;
1016fa9e4066Sahrens 	}
1017fa9e4066Sahrens 
1018fa9e4066Sahrens 	/*
1019fa9e4066Sahrens 	 * Finally, create nvroot and add all top-level vdevs to it.
1020fa9e4066Sahrens 	 */
1021fa9e4066Sahrens 	verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
1022fa9e4066Sahrens 	verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
1023fa9e4066Sahrens 	    VDEV_TYPE_ROOT) == 0);
1024fa9e4066Sahrens 	verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1025fa9e4066Sahrens 	    top, toplevels) == 0);
1026fa9e4066Sahrens 
1027fa9e4066Sahrens 	for (t = 0; t < toplevels; t++)
1028fa9e4066Sahrens 		nvlist_free(top[t]);
1029fa9e4066Sahrens 	free(top);
1030fa9e4066Sahrens 
1031fa9e4066Sahrens 	return (nvroot);
1032fa9e4066Sahrens }
1033fa9e4066Sahrens 
1034fa9e4066Sahrens /*
1035fa9e4066Sahrens  * Get and validate the contents of the given vdev specification.  This ensures
1036fa9e4066Sahrens  * that the nvlist returned is well-formed, that all the devices exist, and that
1037fa9e4066Sahrens  * they are not currently in use by any other known consumer.  The 'poolconfig'
1038fa9e4066Sahrens  * parameter is the current configuration of the pool when adding devices
1039fa9e4066Sahrens  * existing pool, and is used to perform additional checks, such as changing the
1040fa9e4066Sahrens  * replication level of the pool.  It can be 'NULL' to indicate that this is a
1041fa9e4066Sahrens  * new pool.  The 'force' flag controls whether devices should be forcefully
1042fa9e4066Sahrens  * added, even if they appear in use.
1043fa9e4066Sahrens  */
1044fa9e4066Sahrens nvlist_t *
1045fa9e4066Sahrens make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
1046fa9e4066Sahrens     int argc, char **argv)
1047fa9e4066Sahrens {
1048fa9e4066Sahrens 	nvlist_t *newroot;
1049fa9e4066Sahrens 
1050fa9e4066Sahrens 	is_force = force;
1051fa9e4066Sahrens 
1052fa9e4066Sahrens 	/*
1053fa9e4066Sahrens 	 * Construct the vdev specification.  If this is successful, we know
1054fa9e4066Sahrens 	 * that we have a valid specification, and that all devices can be
1055fa9e4066Sahrens 	 * opened.
1056fa9e4066Sahrens 	 */
1057fa9e4066Sahrens 	if ((newroot = construct_spec(argc, argv)) == NULL)
1058fa9e4066Sahrens 		return (NULL);
1059fa9e4066Sahrens 
1060fa9e4066Sahrens 	/*
1061fa9e4066Sahrens 	 * Validate each device to make sure that its not shared with another
1062fa9e4066Sahrens 	 * subsystem.  We do this even if 'force' is set, because there are some
1063fa9e4066Sahrens 	 * uses (such as a dedicated dump device) that even '-f' cannot
1064fa9e4066Sahrens 	 * override.
1065fa9e4066Sahrens 	 */
1066fa9e4066Sahrens 	if (check_in_use(newroot, force) != 0) {
1067fa9e4066Sahrens 		nvlist_free(newroot);
1068fa9e4066Sahrens 		return (NULL);
1069fa9e4066Sahrens 	}
1070fa9e4066Sahrens 
1071fa9e4066Sahrens 	/*
1072fa9e4066Sahrens 	 * Check the replication level of the given vdevs and report any errors
1073fa9e4066Sahrens 	 * found.  We include the existing pool spec, if any, as we need to
1074fa9e4066Sahrens 	 * catch changes against the existing replication level.
1075fa9e4066Sahrens 	 */
1076fa9e4066Sahrens 	if (check_rep && check_replication(poolconfig, newroot) != 0) {
1077fa9e4066Sahrens 		nvlist_free(newroot);
1078fa9e4066Sahrens 		return (NULL);
1079fa9e4066Sahrens 	}
1080fa9e4066Sahrens 
1081fa9e4066Sahrens 	/*
1082fa9e4066Sahrens 	 * Run through the vdev specification and label any whole disks found.
1083fa9e4066Sahrens 	 */
1084fa9e4066Sahrens 	if (make_disks(newroot) != 0) {
1085fa9e4066Sahrens 		nvlist_free(newroot);
1086fa9e4066Sahrens 		return (NULL);
1087fa9e4066Sahrens 	}
1088fa9e4066Sahrens 
1089fa9e4066Sahrens 	return (newroot);
1090fa9e4066Sahrens }
1091