1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6*ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 2246a2abf2Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens /* 29fa9e4066Sahrens * Functions to convert between a list of vdevs and an nvlist representing the 30fa9e4066Sahrens * configuration. Each entry in the list can be one of: 31fa9e4066Sahrens * 32fa9e4066Sahrens * Device vdevs 33fa9e4066Sahrens * disk=(path=..., devid=...) 34fa9e4066Sahrens * file=(path=...) 35fa9e4066Sahrens * 36fa9e4066Sahrens * Group vdevs 37fa9e4066Sahrens * raidz=(...) 38fa9e4066Sahrens * mirror=(...) 39fa9e4066Sahrens * 40fa9e4066Sahrens * While the underlying implementation supports it, group vdevs cannot contain 41fa9e4066Sahrens * other group vdevs. All userland verification of devices is contained within 42fa9e4066Sahrens * this file. If successful, the nvlist returned can be passed directly to the 43fa9e4066Sahrens * kernel; we've done as much verification as possible in userland. 44fa9e4066Sahrens * 45fa9e4066Sahrens * The only function exported by this file is 'get_vdev_spec'. The function 46fa9e4066Sahrens * performs several passes: 47fa9e4066Sahrens * 48fa9e4066Sahrens * 1. Construct the vdev specification. Performs syntax validation and 49fa9e4066Sahrens * makes sure each device is valid. 50fa9e4066Sahrens * 2. Check for devices in use. Using libdiskmgt, makes sure that no 51fa9e4066Sahrens * devices are also in use. Some can be overridden using the 'force' 52fa9e4066Sahrens * flag, others cannot. 53fa9e4066Sahrens * 3. Check for replication errors if the 'force' flag is not specified. 54fa9e4066Sahrens * validates that the replication level is consistent across the 55fa9e4066Sahrens * entire pool. 56fa9e4066Sahrens * 4. Label any whole disks with an EFI label. 57fa9e4066Sahrens */ 58fa9e4066Sahrens 59fa9e4066Sahrens #include <assert.h> 60fa9e4066Sahrens #include <devid.h> 61fa9e4066Sahrens #include <errno.h> 62fa9e4066Sahrens #include <fcntl.h> 63fa9e4066Sahrens #include <libdiskmgt.h> 64fa9e4066Sahrens #include <libintl.h> 65fa9e4066Sahrens #include <libnvpair.h> 66fa9e4066Sahrens #include <stdio.h> 67fa9e4066Sahrens #include <string.h> 68fa9e4066Sahrens #include <unistd.h> 69fa9e4066Sahrens #include <sys/efi_partition.h> 70fa9e4066Sahrens #include <sys/stat.h> 71fa9e4066Sahrens #include <sys/vtoc.h> 72fa9e4066Sahrens #include <sys/mntent.h> 73fa9e4066Sahrens 74fa9e4066Sahrens #include <libzfs.h> 75fa9e4066Sahrens 76fa9e4066Sahrens #include "zpool_util.h" 77fa9e4066Sahrens 78fa9e4066Sahrens #define DISK_ROOT "/dev/dsk" 79fa9e4066Sahrens #define RDISK_ROOT "/dev/rdsk" 80fa9e4066Sahrens #define BACKUP_SLICE "s2" 81fa9e4066Sahrens 82fa9e4066Sahrens /* 83fa9e4066Sahrens * For any given vdev specification, we can have multiple errors. The 84fa9e4066Sahrens * vdev_error() function keeps track of whether we have seen an error yet, and 85fa9e4066Sahrens * prints out a header if its the first error we've seen. 86fa9e4066Sahrens */ 87fa9e4066Sahrens int error_seen; 88fa9e4066Sahrens int is_force; 89fa9e4066Sahrens 90fa9e4066Sahrens void 91fa9e4066Sahrens vdev_error(const char *fmt, ...) 92fa9e4066Sahrens { 93fa9e4066Sahrens va_list ap; 94fa9e4066Sahrens 95fa9e4066Sahrens if (!error_seen) { 96fa9e4066Sahrens (void) fprintf(stderr, gettext("invalid vdev specification\n")); 97fa9e4066Sahrens if (!is_force) 98fa9e4066Sahrens (void) fprintf(stderr, gettext("use '-f' to override " 99fa9e4066Sahrens "the following errors:\n")); 100fa9e4066Sahrens else 101fa9e4066Sahrens (void) fprintf(stderr, gettext("the following errors " 102fa9e4066Sahrens "must be manually repaired:\n")); 103fa9e4066Sahrens error_seen = TRUE; 104fa9e4066Sahrens } 105fa9e4066Sahrens 106fa9e4066Sahrens va_start(ap, fmt); 107fa9e4066Sahrens (void) vfprintf(stderr, fmt, ap); 108fa9e4066Sahrens va_end(ap); 109fa9e4066Sahrens } 110fa9e4066Sahrens 11146a2abf2Seschrock static void 11246a2abf2Seschrock libdiskmgt_error(int error) 113fa9e4066Sahrens { 114*ea8dc4b6Seschrock /* 115*ea8dc4b6Seschrock * ENXIO is a valid error message if the device doesn't live in 116*ea8dc4b6Seschrock * /dev/dsk. Don't bother printing an error message in this case. 117*ea8dc4b6Seschrock */ 118*ea8dc4b6Seschrock if (error == ENXIO) 119*ea8dc4b6Seschrock return; 120*ea8dc4b6Seschrock 12146a2abf2Seschrock (void) fprintf(stderr, gettext("warning: device in use checking " 12246a2abf2Seschrock "failed: %s\n"), strerror(error)); 123fa9e4066Sahrens } 124fa9e4066Sahrens 125fa9e4066Sahrens /* 12646a2abf2Seschrock * Validate a device, passing the bulk of the work off to libdiskmgt. 127fa9e4066Sahrens */ 128fa9e4066Sahrens int 12946a2abf2Seschrock check_slice(const char *path, int force, int wholedisk) 130fa9e4066Sahrens { 13146a2abf2Seschrock char *msg; 13246a2abf2Seschrock int error = 0; 133fa9e4066Sahrens int ret = 0; 134fa9e4066Sahrens 13546a2abf2Seschrock if (dm_inuse((char *)path, &msg, 13646a2abf2Seschrock force ? DM_WHO_ZPOOL_FORCE : DM_WHO_ZPOOL, &error) || error) { 13746a2abf2Seschrock if (error != 0) { 13846a2abf2Seschrock libdiskmgt_error(error); 13946a2abf2Seschrock return (0); 14046a2abf2Seschrock } else { 14146a2abf2Seschrock vdev_error("%s", msg); 14246a2abf2Seschrock free(msg); 143fa9e4066Sahrens } 144fa9e4066Sahrens 14546a2abf2Seschrock ret = -1; 146fa9e4066Sahrens } 147fa9e4066Sahrens 148fa9e4066Sahrens /* 14946a2abf2Seschrock * If we're given a whole disk, ignore overlapping slices since we're 15046a2abf2Seschrock * about to label it anyway. 151fa9e4066Sahrens */ 15246a2abf2Seschrock error = 0; 15346a2abf2Seschrock if (!wholedisk && !force && 15446a2abf2Seschrock (dm_isoverlapping((char *)path, &msg, &error) || error)) { 15546a2abf2Seschrock if (error != 0) { 15646a2abf2Seschrock libdiskmgt_error(error); 15746a2abf2Seschrock return (0); 158fa9e4066Sahrens } else { 15946a2abf2Seschrock vdev_error("%s overlaps with %s\n", path, msg); 16046a2abf2Seschrock free(msg); 161fa9e4066Sahrens } 162fa9e4066Sahrens 16346a2abf2Seschrock ret = -1; 16446a2abf2Seschrock } 165fa9e4066Sahrens 16646a2abf2Seschrock return (ret); 167fa9e4066Sahrens } 168fa9e4066Sahrens 169fa9e4066Sahrens /* 170fa9e4066Sahrens * Validate a whole disk. Iterate over all slices on the disk and make sure 171fa9e4066Sahrens * that none is in use by calling check_slice(). 172fa9e4066Sahrens */ 173fa9e4066Sahrens /* ARGSUSED */ 174fa9e4066Sahrens int 175fa9e4066Sahrens check_disk(const char *name, dm_descriptor_t disk, int force) 176fa9e4066Sahrens { 177fa9e4066Sahrens dm_descriptor_t *drive, *media, *slice; 178fa9e4066Sahrens int err = 0; 179fa9e4066Sahrens int i; 180fa9e4066Sahrens int ret; 181fa9e4066Sahrens 182fa9e4066Sahrens /* 183fa9e4066Sahrens * Get the drive associated with this disk. This should never fail, 184fa9e4066Sahrens * because we already have an alias handle open for the device. 185fa9e4066Sahrens */ 186fa9e4066Sahrens if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE, 18746a2abf2Seschrock &err)) == NULL || *drive == NULL) { 18846a2abf2Seschrock if (err) 18946a2abf2Seschrock libdiskmgt_error(err); 19046a2abf2Seschrock return (0); 19146a2abf2Seschrock } 192fa9e4066Sahrens 193fa9e4066Sahrens if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA, 19446a2abf2Seschrock &err)) == NULL) { 19546a2abf2Seschrock dm_free_descriptors(drive); 19646a2abf2Seschrock if (err) 19746a2abf2Seschrock libdiskmgt_error(err); 19846a2abf2Seschrock return (0); 19946a2abf2Seschrock } 200fa9e4066Sahrens 201fa9e4066Sahrens dm_free_descriptors(drive); 202fa9e4066Sahrens 203fa9e4066Sahrens /* 204fa9e4066Sahrens * It is possible that the user has specified a removable media drive, 205fa9e4066Sahrens * and the media is not present. 206fa9e4066Sahrens */ 207fa9e4066Sahrens if (*media == NULL) { 208fa9e4066Sahrens dm_free_descriptors(media); 20946a2abf2Seschrock vdev_error(gettext("'%s' has no media in drive\n"), name); 210fa9e4066Sahrens return (-1); 211fa9e4066Sahrens } 212fa9e4066Sahrens 213fa9e4066Sahrens if ((slice = dm_get_associated_descriptors(*media, DM_SLICE, 21446a2abf2Seschrock &err)) == NULL) { 21546a2abf2Seschrock dm_free_descriptors(media); 21646a2abf2Seschrock if (err) 21746a2abf2Seschrock libdiskmgt_error(err); 21846a2abf2Seschrock return (0); 21946a2abf2Seschrock } 220fa9e4066Sahrens 221fa9e4066Sahrens dm_free_descriptors(media); 222fa9e4066Sahrens 223fa9e4066Sahrens ret = 0; 224fa9e4066Sahrens 225fa9e4066Sahrens /* 226fa9e4066Sahrens * Iterate over all slices and report any errors. We don't care about 227fa9e4066Sahrens * overlapping slices because we are using the whole disk. 228fa9e4066Sahrens */ 229fa9e4066Sahrens for (i = 0; slice[i] != NULL; i++) { 23046a2abf2Seschrock if (check_slice(dm_get_name(slice[i], &err), force, TRUE) != 0) 231fa9e4066Sahrens ret = -1; 232fa9e4066Sahrens } 233fa9e4066Sahrens 234fa9e4066Sahrens dm_free_descriptors(slice); 235fa9e4066Sahrens return (ret); 236fa9e4066Sahrens } 237fa9e4066Sahrens 238fa9e4066Sahrens /* 23946a2abf2Seschrock * Validate a device. 240fa9e4066Sahrens */ 241fa9e4066Sahrens int 242fa9e4066Sahrens check_device(const char *path, int force) 243fa9e4066Sahrens { 244fa9e4066Sahrens dm_descriptor_t desc; 245fa9e4066Sahrens int err; 24646a2abf2Seschrock char *dev; 247fa9e4066Sahrens 248fa9e4066Sahrens /* 249fa9e4066Sahrens * For whole disks, libdiskmgt does not include the leading dev path. 250fa9e4066Sahrens */ 251fa9e4066Sahrens dev = strrchr(path, '/'); 252fa9e4066Sahrens assert(dev != NULL); 253fa9e4066Sahrens dev++; 25446a2abf2Seschrock if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) { 25546a2abf2Seschrock err = check_disk(path, desc, force); 25646a2abf2Seschrock dm_free_descriptor(desc); 25746a2abf2Seschrock return (err); 258fa9e4066Sahrens } 259fa9e4066Sahrens 26046a2abf2Seschrock return (check_slice(path, force, FALSE)); 261fa9e4066Sahrens } 262fa9e4066Sahrens 263fa9e4066Sahrens /* 264fa9e4066Sahrens * Check that a file is valid. All we can do in this case is check that it's 265fa9e4066Sahrens * not in use by another pool. 266fa9e4066Sahrens */ 267fa9e4066Sahrens int 268fa9e4066Sahrens check_file(const char *file, int force) 269fa9e4066Sahrens { 27046a2abf2Seschrock char *name; 271fa9e4066Sahrens int fd; 272fa9e4066Sahrens int ret = 0; 27346a2abf2Seschrock pool_state_t state; 274fa9e4066Sahrens 275fa9e4066Sahrens if ((fd = open(file, O_RDONLY)) < 0) 276fa9e4066Sahrens return (0); 277fa9e4066Sahrens 27846a2abf2Seschrock if (zpool_in_use(fd, &state, &name)) { 27946a2abf2Seschrock const char *desc; 28046a2abf2Seschrock 28146a2abf2Seschrock switch (state) { 28246a2abf2Seschrock case POOL_STATE_ACTIVE: 28346a2abf2Seschrock desc = gettext("active"); 28446a2abf2Seschrock break; 28546a2abf2Seschrock 28646a2abf2Seschrock case POOL_STATE_EXPORTED: 28746a2abf2Seschrock desc = gettext("exported"); 28846a2abf2Seschrock break; 28946a2abf2Seschrock 29046a2abf2Seschrock case POOL_STATE_POTENTIALLY_ACTIVE: 29146a2abf2Seschrock desc = gettext("potentially active"); 29246a2abf2Seschrock break; 29346a2abf2Seschrock 29446a2abf2Seschrock default: 29546a2abf2Seschrock desc = gettext("unknown"); 29646a2abf2Seschrock break; 29746a2abf2Seschrock } 29846a2abf2Seschrock 29946a2abf2Seschrock if (state == POOL_STATE_ACTIVE || !force) { 300fa9e4066Sahrens vdev_error(gettext("%s is part of %s pool '%s'\n"), 301fa9e4066Sahrens file, desc, name); 302fa9e4066Sahrens ret = -1; 303fa9e4066Sahrens } 304fa9e4066Sahrens 305fa9e4066Sahrens free(name); 306fa9e4066Sahrens } 307fa9e4066Sahrens 308fa9e4066Sahrens (void) close(fd); 309fa9e4066Sahrens return (ret); 310fa9e4066Sahrens } 311fa9e4066Sahrens 312fa9e4066Sahrens static int 313fa9e4066Sahrens is_whole_disk(const char *arg, struct stat64 *statbuf) 314fa9e4066Sahrens { 315fa9e4066Sahrens char path[MAXPATHLEN]; 316fa9e4066Sahrens 317fa9e4066Sahrens (void) snprintf(path, sizeof (path), "%s%s", arg, BACKUP_SLICE); 318fa9e4066Sahrens if (stat64(path, statbuf) == 0) 319fa9e4066Sahrens return (TRUE); 320fa9e4066Sahrens 321fa9e4066Sahrens return (FALSE); 322fa9e4066Sahrens } 323fa9e4066Sahrens 324fa9e4066Sahrens /* 325fa9e4066Sahrens * Create a leaf vdev. Determine if this is a file or a device. If it's a 326fa9e4066Sahrens * device, fill in the device id to make a complete nvlist. Valid forms for a 327fa9e4066Sahrens * leaf vdev are: 328fa9e4066Sahrens * 329fa9e4066Sahrens * /dev/dsk/xxx Complete disk path 330fa9e4066Sahrens * /xxx Full path to file 331fa9e4066Sahrens * xxx Shorthand for /dev/dsk/xxx 332fa9e4066Sahrens */ 333fa9e4066Sahrens nvlist_t * 334fa9e4066Sahrens make_leaf_vdev(const char *arg) 335fa9e4066Sahrens { 336fa9e4066Sahrens char path[MAXPATHLEN]; 337fa9e4066Sahrens struct stat64 statbuf; 338fa9e4066Sahrens nvlist_t *vdev = NULL; 339fa9e4066Sahrens char *type = NULL; 340fa9e4066Sahrens int wholedisk = FALSE; 341fa9e4066Sahrens 342fa9e4066Sahrens /* 343fa9e4066Sahrens * Determine what type of vdev this is, and put the full path into 344fa9e4066Sahrens * 'path'. We detect whether this is a device of file afterwards by 345fa9e4066Sahrens * checking the st_mode of the file. 346fa9e4066Sahrens */ 347fa9e4066Sahrens if (arg[0] == '/') { 348fa9e4066Sahrens /* 349fa9e4066Sahrens * Complete device or file path. Exact type is determined by 350fa9e4066Sahrens * examining the file descriptor afterwards. 351fa9e4066Sahrens */ 352fa9e4066Sahrens if (is_whole_disk(arg, &statbuf)) { 353fa9e4066Sahrens wholedisk = TRUE; 354fa9e4066Sahrens } else if (stat64(arg, &statbuf) != 0) { 355fa9e4066Sahrens (void) fprintf(stderr, 356fa9e4066Sahrens gettext("cannot open '%s': %s\n"), 357fa9e4066Sahrens arg, strerror(errno)); 358fa9e4066Sahrens return (NULL); 359fa9e4066Sahrens } 360fa9e4066Sahrens 361fa9e4066Sahrens (void) strlcpy(path, arg, sizeof (path)); 362fa9e4066Sahrens } else { 363fa9e4066Sahrens /* 364fa9e4066Sahrens * This may be a short path for a device, or it could be total 365fa9e4066Sahrens * gibberish. Check to see if it's a known device in 366fa9e4066Sahrens * /dev/dsk/. As part of this check, see if we've been given a 367fa9e4066Sahrens * an entire disk (minus the slice number). 368fa9e4066Sahrens */ 369fa9e4066Sahrens (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, 370fa9e4066Sahrens arg); 371fa9e4066Sahrens if (is_whole_disk(path, &statbuf)) { 372fa9e4066Sahrens wholedisk = TRUE; 373fa9e4066Sahrens } else if (stat64(path, &statbuf) != 0) { 374fa9e4066Sahrens /* 375fa9e4066Sahrens * If we got ENOENT, then the user gave us 376fa9e4066Sahrens * gibberish, so try to direct them with a 377fa9e4066Sahrens * reasonable error message. Otherwise, 378fa9e4066Sahrens * regurgitate strerror() since it's the best we 379fa9e4066Sahrens * can do. 380fa9e4066Sahrens */ 381fa9e4066Sahrens if (errno == ENOENT) { 382fa9e4066Sahrens (void) fprintf(stderr, 383fa9e4066Sahrens gettext("cannot open '%s': no such " 384fa9e4066Sahrens "device in %s\n"), arg, DISK_ROOT); 385fa9e4066Sahrens (void) fprintf(stderr, 386fa9e4066Sahrens gettext("must be a full path or " 387fa9e4066Sahrens "shorthand device name\n")); 388fa9e4066Sahrens return (NULL); 389fa9e4066Sahrens } else { 390fa9e4066Sahrens (void) fprintf(stderr, 391fa9e4066Sahrens gettext("cannot open '%s': %s\n"), 392fa9e4066Sahrens path, strerror(errno)); 393fa9e4066Sahrens return (NULL); 394fa9e4066Sahrens } 395fa9e4066Sahrens } 396fa9e4066Sahrens } 397fa9e4066Sahrens 398fa9e4066Sahrens /* 399fa9e4066Sahrens * Determine whether this is a device or a file. 400fa9e4066Sahrens */ 401fa9e4066Sahrens if (S_ISBLK(statbuf.st_mode)) { 402fa9e4066Sahrens type = VDEV_TYPE_DISK; 403fa9e4066Sahrens } else if (S_ISREG(statbuf.st_mode)) { 404fa9e4066Sahrens type = VDEV_TYPE_FILE; 405fa9e4066Sahrens } else { 406fa9e4066Sahrens (void) fprintf(stderr, gettext("cannot use '%s': must be a " 407fa9e4066Sahrens "block device or regular file\n"), path); 408fa9e4066Sahrens return (NULL); 409fa9e4066Sahrens } 410fa9e4066Sahrens 411fa9e4066Sahrens /* 412fa9e4066Sahrens * Finally, we have the complete device or file, and we know that it is 413fa9e4066Sahrens * acceptable to use. Construct the nvlist to describe this vdev. All 414fa9e4066Sahrens * vdevs have a 'path' element, and devices also have a 'devid' element. 415fa9e4066Sahrens */ 416fa9e4066Sahrens verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0); 417fa9e4066Sahrens verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0); 418fa9e4066Sahrens verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0); 419afefbcddSeschrock if (strcmp(type, VDEV_TYPE_DISK) == 0) 420afefbcddSeschrock verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, 421afefbcddSeschrock (uint64_t)wholedisk) == 0); 422fa9e4066Sahrens 423fa9e4066Sahrens /* 424fa9e4066Sahrens * For a whole disk, defer getting its devid until after labeling it. 425fa9e4066Sahrens */ 426fa9e4066Sahrens if (S_ISBLK(statbuf.st_mode) && !wholedisk) { 427fa9e4066Sahrens /* 428fa9e4066Sahrens * Get the devid for the device. 429fa9e4066Sahrens */ 430fa9e4066Sahrens int fd; 431fa9e4066Sahrens ddi_devid_t devid; 432fa9e4066Sahrens char *minor = NULL, *devid_str = NULL; 433fa9e4066Sahrens 434fa9e4066Sahrens if ((fd = open(path, O_RDONLY)) < 0) { 435fa9e4066Sahrens (void) fprintf(stderr, gettext("cannot open '%s': " 436fa9e4066Sahrens "%s\n"), path, strerror(errno)); 437fa9e4066Sahrens nvlist_free(vdev); 438fa9e4066Sahrens return (NULL); 439fa9e4066Sahrens } 440fa9e4066Sahrens 441fa9e4066Sahrens if (devid_get(fd, &devid) == 0) { 442fa9e4066Sahrens if (devid_get_minor_name(fd, &minor) == 0 && 443fa9e4066Sahrens (devid_str = devid_str_encode(devid, minor)) != 444fa9e4066Sahrens NULL) { 445fa9e4066Sahrens verify(nvlist_add_string(vdev, 446fa9e4066Sahrens ZPOOL_CONFIG_DEVID, devid_str) == 0); 447fa9e4066Sahrens } 448fa9e4066Sahrens if (devid_str != NULL) 449fa9e4066Sahrens devid_str_free(devid_str); 450fa9e4066Sahrens if (minor != NULL) 451fa9e4066Sahrens devid_str_free(minor); 452fa9e4066Sahrens devid_free(devid); 453fa9e4066Sahrens } 454fa9e4066Sahrens 455fa9e4066Sahrens (void) close(fd); 456fa9e4066Sahrens } 457fa9e4066Sahrens 458fa9e4066Sahrens return (vdev); 459fa9e4066Sahrens } 460fa9e4066Sahrens 461fa9e4066Sahrens /* 462fa9e4066Sahrens * Go through and verify the replication level of the pool is consistent. 463fa9e4066Sahrens * Performs the following checks: 464fa9e4066Sahrens * 465fa9e4066Sahrens * For the new spec, verifies that devices in mirrors and raidz are the 466fa9e4066Sahrens * same size. 467fa9e4066Sahrens * 468fa9e4066Sahrens * If the current configuration already has inconsistent replication 469fa9e4066Sahrens * levels, ignore any other potential problems in the new spec. 470fa9e4066Sahrens * 471fa9e4066Sahrens * Otherwise, make sure that the current spec (if there is one) and the new 472fa9e4066Sahrens * spec have consistent replication levels. 473fa9e4066Sahrens */ 474fa9e4066Sahrens typedef struct replication_level { 475fa9e4066Sahrens char *type; 476fa9e4066Sahrens int level; 477fa9e4066Sahrens } replication_level_t; 478fa9e4066Sahrens 479fa9e4066Sahrens /* 480fa9e4066Sahrens * Given a list of toplevel vdevs, return the current replication level. If 481fa9e4066Sahrens * the config is inconsistent, then NULL is returned. If 'fatal' is set, then 482fa9e4066Sahrens * an error message will be displayed for each self-inconsistent vdev. 483fa9e4066Sahrens */ 484fa9e4066Sahrens replication_level_t * 485fa9e4066Sahrens get_replication(nvlist_t *nvroot, int fatal) 486fa9e4066Sahrens { 487fa9e4066Sahrens nvlist_t **top; 488fa9e4066Sahrens uint_t t, toplevels; 489fa9e4066Sahrens nvlist_t **child; 490fa9e4066Sahrens uint_t c, children; 491fa9e4066Sahrens nvlist_t *nv; 492fa9e4066Sahrens char *type; 493fa9e4066Sahrens replication_level_t lastrep, rep, *ret; 494fa9e4066Sahrens int dontreport; 495fa9e4066Sahrens 496fa9e4066Sahrens ret = safe_malloc(sizeof (replication_level_t)); 497fa9e4066Sahrens 498fa9e4066Sahrens verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 499fa9e4066Sahrens &top, &toplevels) == 0); 500fa9e4066Sahrens 501fa9e4066Sahrens lastrep.type = NULL; 502fa9e4066Sahrens for (t = 0; t < toplevels; t++) { 503fa9e4066Sahrens nv = top[t]; 504fa9e4066Sahrens 505fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); 506fa9e4066Sahrens 507fa9e4066Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 508fa9e4066Sahrens &child, &children) != 0) { 509fa9e4066Sahrens /* 510fa9e4066Sahrens * This is a 'file' or 'disk' vdev. 511fa9e4066Sahrens */ 512fa9e4066Sahrens rep.type = type; 513fa9e4066Sahrens rep.level = 1; 514fa9e4066Sahrens } else { 515fa9e4066Sahrens uint64_t vdev_size; 516fa9e4066Sahrens 517fa9e4066Sahrens /* 518fa9e4066Sahrens * This is a mirror or RAID-Z vdev. Go through and make 519fa9e4066Sahrens * sure the contents are all the same (files vs. disks), 520fa9e4066Sahrens * keeping track of the number of elements in the 521fa9e4066Sahrens * process. 522fa9e4066Sahrens * 523fa9e4066Sahrens * We also check that the size of each vdev (if it can 524fa9e4066Sahrens * be determined) is the same. 525fa9e4066Sahrens */ 526fa9e4066Sahrens rep.type = type; 527fa9e4066Sahrens rep.level = 0; 528fa9e4066Sahrens 529fa9e4066Sahrens /* 530fa9e4066Sahrens * The 'dontreport' variable indicatest that we've 531fa9e4066Sahrens * already reported an error for this spec, so don't 532fa9e4066Sahrens * bother doing it again. 533fa9e4066Sahrens */ 534fa9e4066Sahrens type = NULL; 535fa9e4066Sahrens dontreport = 0; 536fa9e4066Sahrens vdev_size = -1ULL; 537fa9e4066Sahrens for (c = 0; c < children; c++) { 538fa9e4066Sahrens nvlist_t *cnv = child[c]; 539fa9e4066Sahrens char *path; 540fa9e4066Sahrens struct stat64 statbuf; 541fa9e4066Sahrens uint64_t size = -1ULL; 542fa9e4066Sahrens char *childtype; 543fa9e4066Sahrens int fd, err; 544fa9e4066Sahrens 545fa9e4066Sahrens rep.level++; 546fa9e4066Sahrens 547fa9e4066Sahrens verify(nvlist_lookup_string(cnv, 548fa9e4066Sahrens ZPOOL_CONFIG_TYPE, &childtype) == 0); 549fa9e4066Sahrens verify(nvlist_lookup_string(cnv, 550fa9e4066Sahrens ZPOOL_CONFIG_PATH, &path) == 0); 551fa9e4066Sahrens 552fa9e4066Sahrens /* 553fa9e4066Sahrens * If we have a raidz/mirror that combines disks 554fa9e4066Sahrens * with files, report it as an error. 555fa9e4066Sahrens */ 556fa9e4066Sahrens if (!dontreport && type != NULL && 557fa9e4066Sahrens strcmp(type, childtype) != 0) { 558fa9e4066Sahrens if (ret != NULL) 559fa9e4066Sahrens free(ret); 560fa9e4066Sahrens ret = NULL; 561fa9e4066Sahrens if (fatal) 562fa9e4066Sahrens vdev_error(gettext( 563fa9e4066Sahrens "mismatched replication " 564fa9e4066Sahrens "level: %s contains both " 565fa9e4066Sahrens "files and devices\n"), 566fa9e4066Sahrens rep.type); 567fa9e4066Sahrens else 568fa9e4066Sahrens return (NULL); 569fa9e4066Sahrens dontreport = TRUE; 570fa9e4066Sahrens } 571fa9e4066Sahrens 572fa9e4066Sahrens /* 573fa9e4066Sahrens * According to stat(2), the value of 'st_size' 574fa9e4066Sahrens * is undefined for block devices and character 575fa9e4066Sahrens * devices. But there is no effective way to 576fa9e4066Sahrens * determine the real size in userland. 577fa9e4066Sahrens * 578fa9e4066Sahrens * Instead, we'll take advantage of an 579fa9e4066Sahrens * implementation detail of spec_size(). If the 580fa9e4066Sahrens * device is currently open, then we (should) 581fa9e4066Sahrens * return a valid size. 582fa9e4066Sahrens * 583fa9e4066Sahrens * If we still don't get a valid size (indicated 584fa9e4066Sahrens * by a size of 0 or MAXOFFSET_T), then ignore 585fa9e4066Sahrens * this device altogether. 586fa9e4066Sahrens */ 587fa9e4066Sahrens if ((fd = open(path, O_RDONLY)) >= 0) { 588fa9e4066Sahrens err = fstat64(fd, &statbuf); 589fa9e4066Sahrens (void) close(fd); 590fa9e4066Sahrens } else { 591fa9e4066Sahrens err = stat64(path, &statbuf); 592fa9e4066Sahrens } 593fa9e4066Sahrens 594fa9e4066Sahrens if (err != 0 || 595fa9e4066Sahrens statbuf.st_size == 0 || 596fa9e4066Sahrens statbuf.st_size == MAXOFFSET_T) 597fa9e4066Sahrens continue; 598fa9e4066Sahrens 599fa9e4066Sahrens size = statbuf.st_size; 600fa9e4066Sahrens 601fa9e4066Sahrens /* 602fa9e4066Sahrens * Also check the size of each device. If they 603fa9e4066Sahrens * differ, then report an error. 604fa9e4066Sahrens */ 605fa9e4066Sahrens if (!dontreport && vdev_size != -1ULL && 606fa9e4066Sahrens size != vdev_size) { 607fa9e4066Sahrens if (ret != NULL) 608fa9e4066Sahrens free(ret); 609fa9e4066Sahrens ret = NULL; 610fa9e4066Sahrens if (fatal) 611fa9e4066Sahrens vdev_error(gettext( 612fa9e4066Sahrens "%s contains devices of " 613fa9e4066Sahrens "different sizes\n"), 614fa9e4066Sahrens rep.type); 615fa9e4066Sahrens else 616fa9e4066Sahrens return (NULL); 617fa9e4066Sahrens dontreport = TRUE; 618fa9e4066Sahrens } 619fa9e4066Sahrens 620fa9e4066Sahrens type = childtype; 621fa9e4066Sahrens vdev_size = size; 622fa9e4066Sahrens } 623fa9e4066Sahrens } 624fa9e4066Sahrens 625fa9e4066Sahrens /* 626fa9e4066Sahrens * At this point, we have the replication of the last toplevel 627fa9e4066Sahrens * vdev in 'rep'. Compare it to 'lastrep' to see if its 628fa9e4066Sahrens * different. 629fa9e4066Sahrens */ 630fa9e4066Sahrens if (lastrep.type != NULL) { 631fa9e4066Sahrens if (strcmp(lastrep.type, rep.type) != 0) { 632fa9e4066Sahrens if (ret != NULL) 633fa9e4066Sahrens free(ret); 634fa9e4066Sahrens ret = NULL; 635fa9e4066Sahrens if (fatal) 636fa9e4066Sahrens vdev_error(gettext( 637fa9e4066Sahrens "mismatched replication " 638fa9e4066Sahrens "level: both %s and %s vdevs are " 639fa9e4066Sahrens "present\n"), 640fa9e4066Sahrens lastrep.type, rep.type); 641fa9e4066Sahrens else 642fa9e4066Sahrens return (NULL); 643fa9e4066Sahrens } else if (lastrep.level != rep.level) { 644fa9e4066Sahrens if (ret) 645fa9e4066Sahrens free(ret); 646fa9e4066Sahrens ret = NULL; 647fa9e4066Sahrens if (fatal) 648fa9e4066Sahrens vdev_error(gettext( 649fa9e4066Sahrens "mismatched replication " 650fa9e4066Sahrens "level: %d-way %s and %d-way %s " 651fa9e4066Sahrens "vdevs are present\n"), 652fa9e4066Sahrens lastrep.level, lastrep.type, 653fa9e4066Sahrens rep.level, rep.type); 654fa9e4066Sahrens else 655fa9e4066Sahrens return (NULL); 656fa9e4066Sahrens } 657fa9e4066Sahrens } 658fa9e4066Sahrens lastrep = rep; 659fa9e4066Sahrens } 660fa9e4066Sahrens 661fa9e4066Sahrens if (ret != NULL) { 662fa9e4066Sahrens ret->type = rep.type; 663fa9e4066Sahrens ret->level = rep.level; 664fa9e4066Sahrens } 665fa9e4066Sahrens 666fa9e4066Sahrens return (ret); 667fa9e4066Sahrens } 668fa9e4066Sahrens 669fa9e4066Sahrens /* 670fa9e4066Sahrens * Check the replication level of the vdev spec against the current pool. Calls 671fa9e4066Sahrens * get_replication() to make sure the new spec is self-consistent. If the pool 672fa9e4066Sahrens * has a consistent replication level, then we ignore any errors. Otherwise, 673fa9e4066Sahrens * report any difference between the two. 674fa9e4066Sahrens */ 675fa9e4066Sahrens int 676fa9e4066Sahrens check_replication(nvlist_t *config, nvlist_t *newroot) 677fa9e4066Sahrens { 678fa9e4066Sahrens replication_level_t *current = NULL, *new; 679fa9e4066Sahrens int ret; 680fa9e4066Sahrens 681fa9e4066Sahrens /* 682fa9e4066Sahrens * If we have a current pool configuration, check to see if it's 683fa9e4066Sahrens * self-consistent. If not, simply return success. 684fa9e4066Sahrens */ 685fa9e4066Sahrens if (config != NULL) { 686fa9e4066Sahrens nvlist_t *nvroot; 687fa9e4066Sahrens 688fa9e4066Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 689fa9e4066Sahrens &nvroot) == 0); 690fa9e4066Sahrens if ((current = get_replication(nvroot, FALSE)) == NULL) 691fa9e4066Sahrens return (0); 692fa9e4066Sahrens } 693fa9e4066Sahrens 694fa9e4066Sahrens /* 695fa9e4066Sahrens * Get the replication level of the new vdev spec, reporting any 696fa9e4066Sahrens * inconsistencies found. 697fa9e4066Sahrens */ 698fa9e4066Sahrens if ((new = get_replication(newroot, TRUE)) == NULL) { 699fa9e4066Sahrens free(current); 700fa9e4066Sahrens return (-1); 701fa9e4066Sahrens } 702fa9e4066Sahrens 703fa9e4066Sahrens /* 704fa9e4066Sahrens * Check to see if the new vdev spec matches the replication level of 705fa9e4066Sahrens * the current pool. 706fa9e4066Sahrens */ 707fa9e4066Sahrens ret = 0; 708fa9e4066Sahrens if (current != NULL) { 709fa9e4066Sahrens if (strcmp(current->type, new->type) != 0 || 710fa9e4066Sahrens current->level != new->level) { 711fa9e4066Sahrens vdev_error(gettext( 712fa9e4066Sahrens "mismatched replication level: pool uses %d-way %s " 713fa9e4066Sahrens "and new vdev uses %d-way %s\n"), 714fa9e4066Sahrens current->level, current->type, new->level, 715fa9e4066Sahrens new->type); 716fa9e4066Sahrens ret = -1; 717fa9e4066Sahrens } 718fa9e4066Sahrens } 719fa9e4066Sahrens 720fa9e4066Sahrens free(new); 721fa9e4066Sahrens if (current != NULL) 722fa9e4066Sahrens free(current); 723fa9e4066Sahrens 724fa9e4066Sahrens return (ret); 725fa9e4066Sahrens } 726fa9e4066Sahrens 727fa9e4066Sahrens /* 728fa9e4066Sahrens * Label an individual disk. The name provided is the short name, stripped of 729fa9e4066Sahrens * any leading /dev path. 730fa9e4066Sahrens */ 731fa9e4066Sahrens int 732fa9e4066Sahrens label_disk(char *name) 733fa9e4066Sahrens { 734fa9e4066Sahrens char path[MAXPATHLEN]; 735fa9e4066Sahrens struct dk_gpt *vtoc; 736fa9e4066Sahrens int fd; 737fa9e4066Sahrens size_t resv = 16384; 738fa9e4066Sahrens 739fa9e4066Sahrens (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name, 740fa9e4066Sahrens BACKUP_SLICE); 741fa9e4066Sahrens 742fa9e4066Sahrens if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { 743fa9e4066Sahrens /* 744fa9e4066Sahrens * This shouldn't happen. We've long since verified that this 745fa9e4066Sahrens * is a valid device. 746fa9e4066Sahrens */ 747fa9e4066Sahrens (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), 748fa9e4066Sahrens path, strerror(errno)); 749fa9e4066Sahrens return (-1); 750fa9e4066Sahrens } 751fa9e4066Sahrens 752fa9e4066Sahrens 753fa9e4066Sahrens if (efi_alloc_and_init(fd, 9, &vtoc) != 0) { 754fa9e4066Sahrens /* 755fa9e4066Sahrens * The only way this can fail is if we run out of memory, or we 756fa9e4066Sahrens * were unable to read the disk geometry. 757fa9e4066Sahrens */ 758fa9e4066Sahrens if (errno == ENOMEM) 759fa9e4066Sahrens no_memory(); 760fa9e4066Sahrens 761fa9e4066Sahrens (void) fprintf(stderr, gettext("cannot label '%s': unable to " 762fa9e4066Sahrens "read disk geometry\n"), name); 763fa9e4066Sahrens (void) close(fd); 764fa9e4066Sahrens return (-1); 765fa9e4066Sahrens } 766fa9e4066Sahrens 767fa9e4066Sahrens vtoc->efi_parts[0].p_start = vtoc->efi_first_u_lba; 768fa9e4066Sahrens vtoc->efi_parts[0].p_size = vtoc->efi_last_u_lba + 1 - 769fa9e4066Sahrens vtoc->efi_first_u_lba - resv; 770fa9e4066Sahrens 771fa9e4066Sahrens /* 772fa9e4066Sahrens * Why we use V_USR: V_BACKUP confuses users, and is considered 773fa9e4066Sahrens * disposable by some EFI utilities (since EFI doesn't have a backup 774fa9e4066Sahrens * slice). V_UNASSIGNED is supposed to be used only for zero size 775fa9e4066Sahrens * partitions, and efi_write() will fail if we use it. V_ROOT, V_BOOT, 776fa9e4066Sahrens * etc. were all pretty specific. V_USR is as close to reality as we 777fa9e4066Sahrens * can get, in the absence of V_OTHER. 778fa9e4066Sahrens */ 779fa9e4066Sahrens vtoc->efi_parts[0].p_tag = V_USR; 780fa9e4066Sahrens (void) strcpy(vtoc->efi_parts[0].p_name, "zfs"); 781fa9e4066Sahrens 782fa9e4066Sahrens vtoc->efi_parts[8].p_start = vtoc->efi_last_u_lba + 1 - resv; 783fa9e4066Sahrens vtoc->efi_parts[8].p_size = resv; 784fa9e4066Sahrens vtoc->efi_parts[8].p_tag = V_RESERVED; 785fa9e4066Sahrens 786fa9e4066Sahrens if (efi_write(fd, vtoc) != 0) { 787fa9e4066Sahrens /* 788fa9e4066Sahrens * Currently, EFI labels are not supported for IDE disks, and it 789fa9e4066Sahrens * is likely that they will not be supported on other drives for 790fa9e4066Sahrens * some time. Print out a helpful error message directing the 791fa9e4066Sahrens * user to manually label the disk and give a specific slice. 792fa9e4066Sahrens */ 793fa9e4066Sahrens (void) fprintf(stderr, gettext("cannot label '%s': failed to " 794fa9e4066Sahrens "write EFI label\n"), name); 795fa9e4066Sahrens (void) fprintf(stderr, gettext("use fdisk(1M) to partition " 796fa9e4066Sahrens "the disk, and provide a specific slice\n")); 797fa9e4066Sahrens (void) close(fd); 798fa9e4066Sahrens return (-1); 799fa9e4066Sahrens } 800fa9e4066Sahrens 801fa9e4066Sahrens (void) close(fd); 802fa9e4066Sahrens return (0); 803fa9e4066Sahrens } 804fa9e4066Sahrens 805fa9e4066Sahrens /* 806fa9e4066Sahrens * Go through and find any whole disks in the vdev specification, labelling them 807fa9e4066Sahrens * as appropriate. When constructing the vdev spec, we were unable to open this 808fa9e4066Sahrens * device in order to provide a devid. Now that we have labelled the disk and 809fa9e4066Sahrens * know that slice 0 is valid, we can construct the devid now. 810fa9e4066Sahrens * 811fa9e4066Sahrens * If the disk was already labelled with an EFI label, we will have gotten the 812fa9e4066Sahrens * devid already (because we were able to open the whole disk). Otherwise, we 813fa9e4066Sahrens * need to get the devid after we label the disk. 814fa9e4066Sahrens */ 815fa9e4066Sahrens int 816fa9e4066Sahrens make_disks(nvlist_t *nv) 817fa9e4066Sahrens { 818fa9e4066Sahrens nvlist_t **child; 819fa9e4066Sahrens uint_t c, children; 820fa9e4066Sahrens char *type, *path, *diskname; 821fa9e4066Sahrens char buf[MAXPATHLEN]; 822afefbcddSeschrock uint64_t wholedisk; 823fa9e4066Sahrens int fd; 824fa9e4066Sahrens int ret; 825fa9e4066Sahrens ddi_devid_t devid; 826fa9e4066Sahrens char *minor = NULL, *devid_str = NULL; 827fa9e4066Sahrens 828fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); 829fa9e4066Sahrens 830fa9e4066Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 831fa9e4066Sahrens &child, &children) != 0) { 832fa9e4066Sahrens 833fa9e4066Sahrens if (strcmp(type, VDEV_TYPE_DISK) != 0) 834fa9e4066Sahrens return (0); 835fa9e4066Sahrens 836fa9e4066Sahrens /* 837fa9e4066Sahrens * We have a disk device. Get the path to the device 838fa9e4066Sahrens * and see if its a whole disk by appending the backup 839fa9e4066Sahrens * slice and stat()ing the device. 840fa9e4066Sahrens */ 841fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); 842fa9e4066Sahrens 843afefbcddSeschrock if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, 844afefbcddSeschrock &wholedisk) != 0 || !wholedisk) 845fa9e4066Sahrens return (0); 846fa9e4066Sahrens 847fa9e4066Sahrens diskname = strrchr(path, '/'); 848fa9e4066Sahrens assert(diskname != NULL); 849fa9e4066Sahrens diskname++; 850fa9e4066Sahrens if (label_disk(diskname) != 0) 851fa9e4066Sahrens return (-1); 852fa9e4066Sahrens 853fa9e4066Sahrens /* 854fa9e4066Sahrens * Fill in the devid, now that we've labeled the disk. 855fa9e4066Sahrens */ 856fa9e4066Sahrens (void) snprintf(buf, sizeof (buf), "%ss0", path); 857fa9e4066Sahrens if ((fd = open(buf, O_RDONLY)) < 0) { 858fa9e4066Sahrens (void) fprintf(stderr, 859fa9e4066Sahrens gettext("cannot open '%s': %s\n"), 860fa9e4066Sahrens buf, strerror(errno)); 861fa9e4066Sahrens return (-1); 862fa9e4066Sahrens } 863fa9e4066Sahrens 864fa9e4066Sahrens if (devid_get(fd, &devid) == 0) { 865fa9e4066Sahrens if (devid_get_minor_name(fd, &minor) == 0 && 866fa9e4066Sahrens (devid_str = devid_str_encode(devid, minor)) != 867fa9e4066Sahrens NULL) { 868fa9e4066Sahrens verify(nvlist_add_string(nv, 869fa9e4066Sahrens ZPOOL_CONFIG_DEVID, devid_str) == 0); 870fa9e4066Sahrens } 871fa9e4066Sahrens if (devid_str != NULL) 872fa9e4066Sahrens devid_str_free(devid_str); 873fa9e4066Sahrens if (minor != NULL) 874fa9e4066Sahrens devid_str_free(minor); 875fa9e4066Sahrens devid_free(devid); 876fa9e4066Sahrens } 877fa9e4066Sahrens 878afefbcddSeschrock /* 879afefbcddSeschrock * Update the path to refer to the 's0' slice. The presence of 880afefbcddSeschrock * the 'whole_disk' field indicates to the CLI that we should 881afefbcddSeschrock * chop off the slice number when displaying the device in 882afefbcddSeschrock * future output. 883afefbcddSeschrock */ 884afefbcddSeschrock verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0); 885afefbcddSeschrock 886fa9e4066Sahrens (void) close(fd); 887fa9e4066Sahrens 888fa9e4066Sahrens return (0); 889fa9e4066Sahrens } 890fa9e4066Sahrens 891fa9e4066Sahrens for (c = 0; c < children; c++) 892fa9e4066Sahrens if ((ret = make_disks(child[c])) != 0) 893fa9e4066Sahrens return (ret); 894fa9e4066Sahrens 895fa9e4066Sahrens return (0); 896fa9e4066Sahrens } 897fa9e4066Sahrens 898fa9e4066Sahrens /* 899fa9e4066Sahrens * Go through and find any devices that are in use. We rely on libdiskmgt for 900fa9e4066Sahrens * the majority of this task. 901fa9e4066Sahrens */ 902fa9e4066Sahrens int 903fa9e4066Sahrens check_in_use(nvlist_t *nv, int force) 904fa9e4066Sahrens { 905fa9e4066Sahrens nvlist_t **child; 906fa9e4066Sahrens uint_t c, children; 907fa9e4066Sahrens char *type, *path; 908fa9e4066Sahrens int ret; 909fa9e4066Sahrens 910fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); 911fa9e4066Sahrens 912fa9e4066Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 913fa9e4066Sahrens &child, &children) != 0) { 914fa9e4066Sahrens 915fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); 916fa9e4066Sahrens 917fa9e4066Sahrens if (strcmp(type, VDEV_TYPE_DISK) == 0) 918fa9e4066Sahrens ret = check_device(path, force); 919fa9e4066Sahrens 920fa9e4066Sahrens if (strcmp(type, VDEV_TYPE_FILE) == 0) 921fa9e4066Sahrens ret = check_file(path, force); 922fa9e4066Sahrens 923fa9e4066Sahrens return (ret); 924fa9e4066Sahrens } 925fa9e4066Sahrens 926fa9e4066Sahrens for (c = 0; c < children; c++) 927fa9e4066Sahrens if ((ret = check_in_use(child[c], force)) != 0) 928fa9e4066Sahrens return (ret); 929fa9e4066Sahrens 930fa9e4066Sahrens return (0); 931fa9e4066Sahrens } 932fa9e4066Sahrens 933fa9e4066Sahrens /* 934fa9e4066Sahrens * Construct a syntactically valid vdev specification, 935fa9e4066Sahrens * and ensure that all devices and files exist and can be opened. 936fa9e4066Sahrens * Note: we don't bother freeing anything in the error paths 937fa9e4066Sahrens * because the program is just going to exit anyway. 938fa9e4066Sahrens */ 939fa9e4066Sahrens nvlist_t * 940fa9e4066Sahrens construct_spec(int argc, char **argv) 941fa9e4066Sahrens { 942fa9e4066Sahrens nvlist_t *nvroot, *nv, **top; 943fa9e4066Sahrens int t, toplevels; 944fa9e4066Sahrens 945fa9e4066Sahrens top = NULL; 946fa9e4066Sahrens toplevels = 0; 947fa9e4066Sahrens 948fa9e4066Sahrens while (argc > 0) { 949fa9e4066Sahrens nv = NULL; 950fa9e4066Sahrens 951fa9e4066Sahrens /* 952fa9e4066Sahrens * If it's a mirror or raidz, the subsequent arguments are 953fa9e4066Sahrens * its leaves -- until we encounter the next mirror or raidz. 954fa9e4066Sahrens */ 955fa9e4066Sahrens if (strcmp(argv[0], VDEV_TYPE_MIRROR) == 0 || 956fa9e4066Sahrens strcmp(argv[0], VDEV_TYPE_RAIDZ) == 0) { 957fa9e4066Sahrens 958fa9e4066Sahrens char *type = argv[0]; 959fa9e4066Sahrens nvlist_t **child = NULL; 960fa9e4066Sahrens int children = 0; 961fa9e4066Sahrens int c; 962fa9e4066Sahrens 963fa9e4066Sahrens for (c = 1; c < argc; c++) { 964fa9e4066Sahrens if (strcmp(argv[c], VDEV_TYPE_MIRROR) == 0 || 965fa9e4066Sahrens strcmp(argv[c], VDEV_TYPE_RAIDZ) == 0) 966fa9e4066Sahrens break; 967fa9e4066Sahrens children++; 968fa9e4066Sahrens child = realloc(child, 969fa9e4066Sahrens children * sizeof (nvlist_t *)); 970fa9e4066Sahrens if (child == NULL) 971fa9e4066Sahrens no_memory(); 972fa9e4066Sahrens if ((nv = make_leaf_vdev(argv[c])) == NULL) 973fa9e4066Sahrens return (NULL); 974fa9e4066Sahrens child[children - 1] = nv; 975fa9e4066Sahrens } 976fa9e4066Sahrens 977fa9e4066Sahrens argc -= c; 978fa9e4066Sahrens argv += c; 979fa9e4066Sahrens 980fa9e4066Sahrens /* 981fa9e4066Sahrens * Mirrors and RAID-Z devices require at least 982fa9e4066Sahrens * two components. 983fa9e4066Sahrens */ 984fa9e4066Sahrens if (children < 2) { 985fa9e4066Sahrens (void) fprintf(stderr, 986fa9e4066Sahrens gettext("invalid vdev specification: " 987fa9e4066Sahrens "%s requires at least 2 devices\n"), type); 988fa9e4066Sahrens return (NULL); 989fa9e4066Sahrens } 990fa9e4066Sahrens 991fa9e4066Sahrens verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) == 0); 992fa9e4066Sahrens verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, 993fa9e4066Sahrens type) == 0); 994fa9e4066Sahrens verify(nvlist_add_nvlist_array(nv, 995fa9e4066Sahrens ZPOOL_CONFIG_CHILDREN, child, children) == 0); 996fa9e4066Sahrens 997fa9e4066Sahrens for (c = 0; c < children; c++) 998fa9e4066Sahrens nvlist_free(child[c]); 999fa9e4066Sahrens free(child); 1000fa9e4066Sahrens } else { 1001fa9e4066Sahrens /* 1002fa9e4066Sahrens * We have a device. Pass off to make_leaf_vdev() to 1003fa9e4066Sahrens * construct the appropriate nvlist describing the vdev. 1004fa9e4066Sahrens */ 1005fa9e4066Sahrens if ((nv = make_leaf_vdev(argv[0])) == NULL) 1006fa9e4066Sahrens return (NULL); 1007fa9e4066Sahrens argc--; 1008fa9e4066Sahrens argv++; 1009fa9e4066Sahrens } 1010fa9e4066Sahrens 1011fa9e4066Sahrens toplevels++; 1012fa9e4066Sahrens top = realloc(top, toplevels * sizeof (nvlist_t *)); 1013fa9e4066Sahrens if (top == NULL) 1014fa9e4066Sahrens no_memory(); 1015fa9e4066Sahrens top[toplevels - 1] = nv; 1016fa9e4066Sahrens } 1017fa9e4066Sahrens 1018fa9e4066Sahrens /* 1019fa9e4066Sahrens * Finally, create nvroot and add all top-level vdevs to it. 1020fa9e4066Sahrens */ 1021fa9e4066Sahrens verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0); 1022fa9e4066Sahrens verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 1023fa9e4066Sahrens VDEV_TYPE_ROOT) == 0); 1024fa9e4066Sahrens verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1025fa9e4066Sahrens top, toplevels) == 0); 1026fa9e4066Sahrens 1027fa9e4066Sahrens for (t = 0; t < toplevels; t++) 1028fa9e4066Sahrens nvlist_free(top[t]); 1029fa9e4066Sahrens free(top); 1030fa9e4066Sahrens 1031fa9e4066Sahrens return (nvroot); 1032fa9e4066Sahrens } 1033fa9e4066Sahrens 1034fa9e4066Sahrens /* 1035fa9e4066Sahrens * Get and validate the contents of the given vdev specification. This ensures 1036fa9e4066Sahrens * that the nvlist returned is well-formed, that all the devices exist, and that 1037fa9e4066Sahrens * they are not currently in use by any other known consumer. The 'poolconfig' 1038fa9e4066Sahrens * parameter is the current configuration of the pool when adding devices 1039fa9e4066Sahrens * existing pool, and is used to perform additional checks, such as changing the 1040fa9e4066Sahrens * replication level of the pool. It can be 'NULL' to indicate that this is a 1041fa9e4066Sahrens * new pool. The 'force' flag controls whether devices should be forcefully 1042fa9e4066Sahrens * added, even if they appear in use. 1043fa9e4066Sahrens */ 1044fa9e4066Sahrens nvlist_t * 1045fa9e4066Sahrens make_root_vdev(nvlist_t *poolconfig, int force, int check_rep, 1046fa9e4066Sahrens int argc, char **argv) 1047fa9e4066Sahrens { 1048fa9e4066Sahrens nvlist_t *newroot; 1049fa9e4066Sahrens 1050fa9e4066Sahrens is_force = force; 1051fa9e4066Sahrens 1052fa9e4066Sahrens /* 1053fa9e4066Sahrens * Construct the vdev specification. If this is successful, we know 1054fa9e4066Sahrens * that we have a valid specification, and that all devices can be 1055fa9e4066Sahrens * opened. 1056fa9e4066Sahrens */ 1057fa9e4066Sahrens if ((newroot = construct_spec(argc, argv)) == NULL) 1058fa9e4066Sahrens return (NULL); 1059fa9e4066Sahrens 1060fa9e4066Sahrens /* 1061fa9e4066Sahrens * Validate each device to make sure that its not shared with another 1062fa9e4066Sahrens * subsystem. We do this even if 'force' is set, because there are some 1063fa9e4066Sahrens * uses (such as a dedicated dump device) that even '-f' cannot 1064fa9e4066Sahrens * override. 1065fa9e4066Sahrens */ 1066fa9e4066Sahrens if (check_in_use(newroot, force) != 0) { 1067fa9e4066Sahrens nvlist_free(newroot); 1068fa9e4066Sahrens return (NULL); 1069fa9e4066Sahrens } 1070fa9e4066Sahrens 1071fa9e4066Sahrens /* 1072fa9e4066Sahrens * Check the replication level of the given vdevs and report any errors 1073fa9e4066Sahrens * found. We include the existing pool spec, if any, as we need to 1074fa9e4066Sahrens * catch changes against the existing replication level. 1075fa9e4066Sahrens */ 1076fa9e4066Sahrens if (check_rep && check_replication(poolconfig, newroot) != 0) { 1077fa9e4066Sahrens nvlist_free(newroot); 1078fa9e4066Sahrens return (NULL); 1079fa9e4066Sahrens } 1080fa9e4066Sahrens 1081fa9e4066Sahrens /* 1082fa9e4066Sahrens * Run through the vdev specification and label any whole disks found. 1083fa9e4066Sahrens */ 1084fa9e4066Sahrens if (make_disks(newroot) != 0) { 1085fa9e4066Sahrens nvlist_free(newroot); 1086fa9e4066Sahrens return (NULL); 1087fa9e4066Sahrens } 1088fa9e4066Sahrens 1089fa9e4066Sahrens return (newroot); 1090fa9e4066Sahrens } 1091