1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 2199653d4eSeschrock 22fa9e4066Sahrens /* 233f9d6ad7SLin Ling * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24663207adSDon Brady * Copyright (c) 2013, 2018 by Delphix. All rights reserved. 25663207adSDon Brady * Copyright (c) 2016, 2017 Intel Corporation. 26b327cd3fSIgor Kozhukhov * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. 27fa9e4066Sahrens */ 28fa9e4066Sahrens 29fa9e4066Sahrens /* 30fa9e4066Sahrens * Functions to convert between a list of vdevs and an nvlist representing the 31fa9e4066Sahrens * configuration. Each entry in the list can be one of: 32fa9e4066Sahrens * 33*8a5bcf73SToomas Soome * Device vdevs 34*8a5bcf73SToomas Soome * disk=(path=..., devid=...) 35*8a5bcf73SToomas Soome * file=(path=...) 36fa9e4066Sahrens * 37*8a5bcf73SToomas Soome * Group vdevs 38*8a5bcf73SToomas Soome * raidz[1|2]=(...) 39*8a5bcf73SToomas Soome * mirror=(...) 40fa9e4066Sahrens * 41*8a5bcf73SToomas Soome * Hot spares 4299653d4eSeschrock * 43fa9e4066Sahrens * While the underlying implementation supports it, group vdevs cannot contain 44fa9e4066Sahrens * other group vdevs. All userland verification of devices is contained within 45fa9e4066Sahrens * this file. If successful, the nvlist returned can be passed directly to the 46fa9e4066Sahrens * kernel; we've done as much verification as possible in userland. 47fa9e4066Sahrens * 4899653d4eSeschrock * Hot spares are a special case, and passed down as an array of disk vdevs, at 4999653d4eSeschrock * the same level as the root of the vdev tree. 5099653d4eSeschrock * 518488aeb5Staylor * The only function exported by this file is 'make_root_vdev'. The 528488aeb5Staylor * function performs several passes: 53fa9e4066Sahrens * 54*8a5bcf73SToomas Soome * 1. Construct the vdev specification. Performs syntax validation and 55fa9e4066Sahrens * makes sure each device is valid. 56*8a5bcf73SToomas Soome * 2. Check for devices in use. Using libdiskmgt, makes sure that no 57fa9e4066Sahrens * devices are also in use. Some can be overridden using the 'force' 58fa9e4066Sahrens * flag, others cannot. 59*8a5bcf73SToomas Soome * 3. Check for replication errors if the 'force' flag is not specified. 60fa9e4066Sahrens * validates that the replication level is consistent across the 61fa9e4066Sahrens * entire pool. 62*8a5bcf73SToomas Soome * 4. Call libzfs to label any whole disks with an EFI label. 63fa9e4066Sahrens */ 64fa9e4066Sahrens 65fa9e4066Sahrens #include <assert.h> 66fa9e4066Sahrens #include <devid.h> 67fa9e4066Sahrens #include <errno.h> 68fa9e4066Sahrens #include <fcntl.h> 69fa9e4066Sahrens #include <libdiskmgt.h> 70fa9e4066Sahrens #include <libintl.h> 71fa9e4066Sahrens #include <libnvpair.h> 72f94275ceSAdam Leventhal #include <limits.h> 73fa9e4066Sahrens #include <stdio.h> 74fa9e4066Sahrens #include <string.h> 75fa9e4066Sahrens #include <unistd.h> 76fa9e4066Sahrens #include <sys/efi_partition.h> 77fa9e4066Sahrens #include <sys/stat.h> 78fa9e4066Sahrens #include <sys/vtoc.h> 79fa9e4066Sahrens #include <sys/mntent.h> 80fa9e4066Sahrens 81fa9e4066Sahrens #include "zpool_util.h" 82fa9e4066Sahrens 83fa9e4066Sahrens #define BACKUP_SLICE "s2" 84fa9e4066Sahrens 85fa9e4066Sahrens /* 86fa9e4066Sahrens * For any given vdev specification, we can have multiple errors. The 87fa9e4066Sahrens * vdev_error() function keeps track of whether we have seen an error yet, and 88fa9e4066Sahrens * prints out a header if its the first error we've seen. 89fa9e4066Sahrens */ 9099653d4eSeschrock boolean_t error_seen; 9199653d4eSeschrock boolean_t is_force; 92fa9e4066Sahrens 9399653d4eSeschrock /*PRINTFLIKE1*/ 9499653d4eSeschrock static void 95fa9e4066Sahrens vdev_error(const char *fmt, ...) 96fa9e4066Sahrens { 97fa9e4066Sahrens va_list ap; 98fa9e4066Sahrens 99fa9e4066Sahrens if (!error_seen) { 100fa9e4066Sahrens (void) fprintf(stderr, gettext("invalid vdev specification\n")); 101fa9e4066Sahrens if (!is_force) 102fa9e4066Sahrens (void) fprintf(stderr, gettext("use '-f' to override " 103fa9e4066Sahrens "the following errors:\n")); 104fa9e4066Sahrens else 105fa9e4066Sahrens (void) fprintf(stderr, gettext("the following errors " 106fa9e4066Sahrens "must be manually repaired:\n")); 10799653d4eSeschrock error_seen = B_TRUE; 108fa9e4066Sahrens } 109fa9e4066Sahrens 110fa9e4066Sahrens va_start(ap, fmt); 111fa9e4066Sahrens (void) vfprintf(stderr, fmt, ap); 112fa9e4066Sahrens va_end(ap); 113fa9e4066Sahrens } 114fa9e4066Sahrens 11546a2abf2Seschrock static void 11646a2abf2Seschrock libdiskmgt_error(int error) 117fa9e4066Sahrens { 118ea8dc4b6Seschrock /* 11999653d4eSeschrock * ENXIO/ENODEV is a valid error message if the device doesn't live in 120ea8dc4b6Seschrock * /dev/dsk. Don't bother printing an error message in this case. 121ea8dc4b6Seschrock */ 12299653d4eSeschrock if (error == ENXIO || error == ENODEV) 123ea8dc4b6Seschrock return; 124ea8dc4b6Seschrock 12546a2abf2Seschrock (void) fprintf(stderr, gettext("warning: device in use checking " 12646a2abf2Seschrock "failed: %s\n"), strerror(error)); 127fa9e4066Sahrens } 128fa9e4066Sahrens 129fa9e4066Sahrens /* 13046a2abf2Seschrock * Validate a device, passing the bulk of the work off to libdiskmgt. 131fa9e4066Sahrens */ 1328488aeb5Staylor static int 13399653d4eSeschrock check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare) 134fa9e4066Sahrens { 13546a2abf2Seschrock char *msg; 13646a2abf2Seschrock int error = 0; 13703a818bcSmmusante dm_who_type_t who; 138fa9e4066Sahrens 13903a818bcSmmusante if (force) 14003a818bcSmmusante who = DM_WHO_ZPOOL_FORCE; 14103a818bcSmmusante else if (isspare) 14203a818bcSmmusante who = DM_WHO_ZPOOL_SPARE; 14303a818bcSmmusante else 14403a818bcSmmusante who = DM_WHO_ZPOOL; 14503a818bcSmmusante 14603a818bcSmmusante if (dm_inuse((char *)path, &msg, who, &error) || error) { 14746a2abf2Seschrock if (error != 0) { 14846a2abf2Seschrock libdiskmgt_error(error); 14946a2abf2Seschrock return (0); 15046657f8dSmmusante } else { 15146a2abf2Seschrock vdev_error("%s", msg); 15246a2abf2Seschrock free(msg); 153181c2f42Smmusante return (-1); 154fa9e4066Sahrens } 155fa9e4066Sahrens } 156fa9e4066Sahrens 157fa9e4066Sahrens /* 15846a2abf2Seschrock * If we're given a whole disk, ignore overlapping slices since we're 15946a2abf2Seschrock * about to label it anyway. 160fa9e4066Sahrens */ 16146a2abf2Seschrock error = 0; 16246a2abf2Seschrock if (!wholedisk && !force && 16346a2abf2Seschrock (dm_isoverlapping((char *)path, &msg, &error) || error)) { 164181c2f42Smmusante if (error == 0) { 165181c2f42Smmusante /* dm_isoverlapping returned -1 */ 166181c2f42Smmusante vdev_error(gettext("%s overlaps with %s\n"), path, msg); 167181c2f42Smmusante free(msg); 168181c2f42Smmusante return (-1); 169181c2f42Smmusante } else if (error != ENODEV) { 170181c2f42Smmusante /* libdiskmgt's devcache only handles physical drives */ 17146a2abf2Seschrock libdiskmgt_error(error); 17246a2abf2Seschrock return (0); 173fa9e4066Sahrens } 17446a2abf2Seschrock } 175fa9e4066Sahrens 176181c2f42Smmusante return (0); 177fa9e4066Sahrens } 178fa9e4066Sahrens 1798488aeb5Staylor 180fa9e4066Sahrens /* 181fa9e4066Sahrens * Validate a whole disk. Iterate over all slices on the disk and make sure 182fa9e4066Sahrens * that none is in use by calling check_slice(). 183fa9e4066Sahrens */ 1848488aeb5Staylor static int 18599653d4eSeschrock check_disk(const char *name, dm_descriptor_t disk, int force, int isspare) 186fa9e4066Sahrens { 187fa9e4066Sahrens dm_descriptor_t *drive, *media, *slice; 188fa9e4066Sahrens int err = 0; 189fa9e4066Sahrens int i; 190fa9e4066Sahrens int ret; 191fa9e4066Sahrens 192fa9e4066Sahrens /* 193fa9e4066Sahrens * Get the drive associated with this disk. This should never fail, 194fa9e4066Sahrens * because we already have an alias handle open for the device. 195fa9e4066Sahrens */ 196fa9e4066Sahrens if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE, 197*8a5bcf73SToomas Soome &err)) == NULL || *drive == 0) { 19846a2abf2Seschrock if (err) 19946a2abf2Seschrock libdiskmgt_error(err); 20046a2abf2Seschrock return (0); 20146a2abf2Seschrock } 202fa9e4066Sahrens 203fa9e4066Sahrens if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA, 20446a2abf2Seschrock &err)) == NULL) { 20546a2abf2Seschrock dm_free_descriptors(drive); 20646a2abf2Seschrock if (err) 20746a2abf2Seschrock libdiskmgt_error(err); 20846a2abf2Seschrock return (0); 20946a2abf2Seschrock } 210fa9e4066Sahrens 211fa9e4066Sahrens dm_free_descriptors(drive); 212fa9e4066Sahrens 213fa9e4066Sahrens /* 214fa9e4066Sahrens * It is possible that the user has specified a removable media drive, 215fa9e4066Sahrens * and the media is not present. 216fa9e4066Sahrens */ 217*8a5bcf73SToomas Soome if (*media == 0) { 218fa9e4066Sahrens dm_free_descriptors(media); 21946a2abf2Seschrock vdev_error(gettext("'%s' has no media in drive\n"), name); 220fa9e4066Sahrens return (-1); 221fa9e4066Sahrens } 222fa9e4066Sahrens 223fa9e4066Sahrens if ((slice = dm_get_associated_descriptors(*media, DM_SLICE, 22446a2abf2Seschrock &err)) == NULL) { 22546a2abf2Seschrock dm_free_descriptors(media); 22646a2abf2Seschrock if (err) 22746a2abf2Seschrock libdiskmgt_error(err); 22846a2abf2Seschrock return (0); 22946a2abf2Seschrock } 230fa9e4066Sahrens 231fa9e4066Sahrens dm_free_descriptors(media); 232fa9e4066Sahrens 233fa9e4066Sahrens ret = 0; 234fa9e4066Sahrens 235fa9e4066Sahrens /* 236fa9e4066Sahrens * Iterate over all slices and report any errors. We don't care about 237fa9e4066Sahrens * overlapping slices because we are using the whole disk. 238fa9e4066Sahrens */ 239*8a5bcf73SToomas Soome for (i = 0; slice[i] != 0; i++) { 24099653d4eSeschrock char *name = dm_get_name(slice[i], &err); 24199653d4eSeschrock 24299653d4eSeschrock if (check_slice(name, force, B_TRUE, isspare) != 0) 243fa9e4066Sahrens ret = -1; 24499653d4eSeschrock 24599653d4eSeschrock dm_free_name(name); 246fa9e4066Sahrens } 247fa9e4066Sahrens 248fa9e4066Sahrens dm_free_descriptors(slice); 249fa9e4066Sahrens return (ret); 250fa9e4066Sahrens } 251fa9e4066Sahrens 252fa9e4066Sahrens /* 25346a2abf2Seschrock * Validate a device. 254fa9e4066Sahrens */ 2558488aeb5Staylor static int 25699653d4eSeschrock check_device(const char *path, boolean_t force, boolean_t isspare) 257fa9e4066Sahrens { 258fa9e4066Sahrens dm_descriptor_t desc; 259fa9e4066Sahrens int err; 26046a2abf2Seschrock char *dev; 261fa9e4066Sahrens 262fa9e4066Sahrens /* 263fa9e4066Sahrens * For whole disks, libdiskmgt does not include the leading dev path. 264fa9e4066Sahrens */ 265fa9e4066Sahrens dev = strrchr(path, '/'); 266fa9e4066Sahrens assert(dev != NULL); 267fa9e4066Sahrens dev++; 268*8a5bcf73SToomas Soome if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != 0) { 26999653d4eSeschrock err = check_disk(path, desc, force, isspare); 27046a2abf2Seschrock dm_free_descriptor(desc); 27146a2abf2Seschrock return (err); 272fa9e4066Sahrens } 273fa9e4066Sahrens 27499653d4eSeschrock return (check_slice(path, force, B_FALSE, isspare)); 275fa9e4066Sahrens } 276fa9e4066Sahrens 277fa9e4066Sahrens /* 278fa9e4066Sahrens * Check that a file is valid. All we can do in this case is check that it's 279181c2f42Smmusante * not in use by another pool, and not in use by swap. 280fa9e4066Sahrens */ 2818488aeb5Staylor static int 28299653d4eSeschrock check_file(const char *file, boolean_t force, boolean_t isspare) 283fa9e4066Sahrens { 28446a2abf2Seschrock char *name; 285fa9e4066Sahrens int fd; 286fa9e4066Sahrens int ret = 0; 287181c2f42Smmusante int err; 28846a2abf2Seschrock pool_state_t state; 28999653d4eSeschrock boolean_t inuse; 290fa9e4066Sahrens 291181c2f42Smmusante if (dm_inuse_swap(file, &err)) { 292181c2f42Smmusante if (err) 293181c2f42Smmusante libdiskmgt_error(err); 294181c2f42Smmusante else 295181c2f42Smmusante vdev_error(gettext("%s is currently used by swap. " 296181c2f42Smmusante "Please see swap(1M).\n"), file); 297181c2f42Smmusante return (-1); 298181c2f42Smmusante } 299181c2f42Smmusante 300fa9e4066Sahrens if ((fd = open(file, O_RDONLY)) < 0) 301fa9e4066Sahrens return (0); 302fa9e4066Sahrens 30399653d4eSeschrock if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) { 30446a2abf2Seschrock const char *desc; 30546a2abf2Seschrock 30646a2abf2Seschrock switch (state) { 30746a2abf2Seschrock case POOL_STATE_ACTIVE: 30846a2abf2Seschrock desc = gettext("active"); 30946a2abf2Seschrock break; 31046a2abf2Seschrock 31146a2abf2Seschrock case POOL_STATE_EXPORTED: 31246a2abf2Seschrock desc = gettext("exported"); 31346a2abf2Seschrock break; 31446a2abf2Seschrock 31546a2abf2Seschrock case POOL_STATE_POTENTIALLY_ACTIVE: 31646a2abf2Seschrock desc = gettext("potentially active"); 31746a2abf2Seschrock break; 31846a2abf2Seschrock 31946a2abf2Seschrock default: 32046a2abf2Seschrock desc = gettext("unknown"); 32146a2abf2Seschrock break; 32246a2abf2Seschrock } 32346a2abf2Seschrock 32499653d4eSeschrock /* 32599653d4eSeschrock * Allow hot spares to be shared between pools. 32699653d4eSeschrock */ 32799653d4eSeschrock if (state == POOL_STATE_SPARE && isspare) 32899653d4eSeschrock return (0); 32999653d4eSeschrock 33099653d4eSeschrock if (state == POOL_STATE_ACTIVE || 33199653d4eSeschrock state == POOL_STATE_SPARE || !force) { 33299653d4eSeschrock switch (state) { 33399653d4eSeschrock case POOL_STATE_SPARE: 33499653d4eSeschrock vdev_error(gettext("%s is reserved as a hot " 33599653d4eSeschrock "spare for pool %s\n"), file, name); 33699653d4eSeschrock break; 33799653d4eSeschrock default: 33899653d4eSeschrock vdev_error(gettext("%s is part of %s pool " 33999653d4eSeschrock "'%s'\n"), file, desc, name); 34099653d4eSeschrock break; 34199653d4eSeschrock } 342fa9e4066Sahrens ret = -1; 343fa9e4066Sahrens } 344fa9e4066Sahrens 345fa9e4066Sahrens free(name); 346fa9e4066Sahrens } 347fa9e4066Sahrens 348fa9e4066Sahrens (void) close(fd); 349fa9e4066Sahrens return (ret); 350fa9e4066Sahrens } 351fa9e4066Sahrens 3528488aeb5Staylor 3538488aeb5Staylor /* 3548488aeb5Staylor * By "whole disk" we mean an entire physical disk (something we can 3558488aeb5Staylor * label, toggle the write cache on, etc.) as opposed to the full 3568488aeb5Staylor * capacity of a pseudo-device such as lofi or did. We act as if we 3578488aeb5Staylor * are labeling the disk, which should be a pretty good test of whether 3588488aeb5Staylor * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if 3598488aeb5Staylor * it isn't. 3608488aeb5Staylor */ 36199653d4eSeschrock static boolean_t 3628488aeb5Staylor is_whole_disk(const char *arg) 363fa9e4066Sahrens { 3648488aeb5Staylor struct dk_gpt *label; 3658488aeb5Staylor int fd; 3668488aeb5Staylor char path[MAXPATHLEN]; 367fa9e4066Sahrens 3688488aeb5Staylor (void) snprintf(path, sizeof (path), "%s%s%s", 3696401734dSWill Andrews ZFS_RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE); 3708488aeb5Staylor if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) 3718488aeb5Staylor return (B_FALSE); 3728488aeb5Staylor if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { 3738488aeb5Staylor (void) close(fd); 3748488aeb5Staylor return (B_FALSE); 3758488aeb5Staylor } 3768488aeb5Staylor efi_free(label); 3778488aeb5Staylor (void) close(fd); 3788488aeb5Staylor return (B_TRUE); 379fa9e4066Sahrens } 380fa9e4066Sahrens 381fa9e4066Sahrens /* 382fa9e4066Sahrens * Create a leaf vdev. Determine if this is a file or a device. If it's a 383fa9e4066Sahrens * device, fill in the device id to make a complete nvlist. Valid forms for a 384fa9e4066Sahrens * leaf vdev are: 385fa9e4066Sahrens * 386*8a5bcf73SToomas Soome * /dev/dsk/xxx Complete disk path 387*8a5bcf73SToomas Soome * /xxx Full path to file 388*8a5bcf73SToomas Soome * xxx Shorthand for /dev/dsk/xxx 389fa9e4066Sahrens */ 3908488aeb5Staylor static nvlist_t * 3918654d025Sperrin make_leaf_vdev(const char *arg, uint64_t is_log) 392fa9e4066Sahrens { 393fa9e4066Sahrens char path[MAXPATHLEN]; 394fa9e4066Sahrens struct stat64 statbuf; 395fa9e4066Sahrens nvlist_t *vdev = NULL; 396fa9e4066Sahrens char *type = NULL; 39799653d4eSeschrock boolean_t wholedisk = B_FALSE; 398fa9e4066Sahrens 399fa9e4066Sahrens /* 400fa9e4066Sahrens * Determine what type of vdev this is, and put the full path into 401fa9e4066Sahrens * 'path'. We detect whether this is a device of file afterwards by 402fa9e4066Sahrens * checking the st_mode of the file. 403fa9e4066Sahrens */ 404fa9e4066Sahrens if (arg[0] == '/') { 405fa9e4066Sahrens /* 406fa9e4066Sahrens * Complete device or file path. Exact type is determined by 407fa9e4066Sahrens * examining the file descriptor afterwards. 408fa9e4066Sahrens */ 4098488aeb5Staylor wholedisk = is_whole_disk(arg); 4108488aeb5Staylor if (!wholedisk && (stat64(arg, &statbuf) != 0)) { 411fa9e4066Sahrens (void) fprintf(stderr, 412fa9e4066Sahrens gettext("cannot open '%s': %s\n"), 413fa9e4066Sahrens arg, strerror(errno)); 414fa9e4066Sahrens return (NULL); 415fa9e4066Sahrens } 416fa9e4066Sahrens 417fa9e4066Sahrens (void) strlcpy(path, arg, sizeof (path)); 418fa9e4066Sahrens } else { 419fa9e4066Sahrens /* 420fa9e4066Sahrens * This may be a short path for a device, or it could be total 421fa9e4066Sahrens * gibberish. Check to see if it's a known device in 422fa9e4066Sahrens * /dev/dsk/. As part of this check, see if we've been given a 423fa9e4066Sahrens * an entire disk (minus the slice number). 424fa9e4066Sahrens */ 4256401734dSWill Andrews (void) snprintf(path, sizeof (path), "%s/%s", ZFS_DISK_ROOT, 426fa9e4066Sahrens arg); 4278488aeb5Staylor wholedisk = is_whole_disk(path); 4288488aeb5Staylor if (!wholedisk && (stat64(path, &statbuf) != 0)) { 429fa9e4066Sahrens /* 430fa9e4066Sahrens * If we got ENOENT, then the user gave us 431fa9e4066Sahrens * gibberish, so try to direct them with a 432fa9e4066Sahrens * reasonable error message. Otherwise, 433fa9e4066Sahrens * regurgitate strerror() since it's the best we 434fa9e4066Sahrens * can do. 435fa9e4066Sahrens */ 436fa9e4066Sahrens if (errno == ENOENT) { 437fa9e4066Sahrens (void) fprintf(stderr, 438fa9e4066Sahrens gettext("cannot open '%s': no such " 4396401734dSWill Andrews "device in %s\n"), arg, ZFS_DISK_ROOT); 440fa9e4066Sahrens (void) fprintf(stderr, 441fa9e4066Sahrens gettext("must be a full path or " 442fa9e4066Sahrens "shorthand device name\n")); 443fa9e4066Sahrens return (NULL); 444fa9e4066Sahrens } else { 445fa9e4066Sahrens (void) fprintf(stderr, 446fa9e4066Sahrens gettext("cannot open '%s': %s\n"), 447fa9e4066Sahrens path, strerror(errno)); 448fa9e4066Sahrens return (NULL); 449fa9e4066Sahrens } 450fa9e4066Sahrens } 451fa9e4066Sahrens } 452fa9e4066Sahrens 453fa9e4066Sahrens /* 454fa9e4066Sahrens * Determine whether this is a device or a file. 455fa9e4066Sahrens */ 4568488aeb5Staylor if (wholedisk || S_ISBLK(statbuf.st_mode)) { 457fa9e4066Sahrens type = VDEV_TYPE_DISK; 458fa9e4066Sahrens } else if (S_ISREG(statbuf.st_mode)) { 459fa9e4066Sahrens type = VDEV_TYPE_FILE; 460fa9e4066Sahrens } else { 461fa9e4066Sahrens (void) fprintf(stderr, gettext("cannot use '%s': must be a " 462fa9e4066Sahrens "block device or regular file\n"), path); 463fa9e4066Sahrens return (NULL); 464fa9e4066Sahrens } 465fa9e4066Sahrens 466fa9e4066Sahrens /* 467fa9e4066Sahrens * Finally, we have the complete device or file, and we know that it is 468fa9e4066Sahrens * acceptable to use. Construct the nvlist to describe this vdev. All 469fa9e4066Sahrens * vdevs have a 'path' element, and devices also have a 'devid' element. 470fa9e4066Sahrens */ 471fa9e4066Sahrens verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0); 472fa9e4066Sahrens verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0); 473fa9e4066Sahrens verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0); 4748654d025Sperrin verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0); 475663207adSDon Brady if (is_log) 476663207adSDon Brady verify(nvlist_add_string(vdev, ZPOOL_CONFIG_ALLOCATION_BIAS, 477663207adSDon Brady VDEV_ALLOC_BIAS_LOG) == 0); 478afefbcddSeschrock if (strcmp(type, VDEV_TYPE_DISK) == 0) 479afefbcddSeschrock verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, 480afefbcddSeschrock (uint64_t)wholedisk) == 0); 481fa9e4066Sahrens 482fa9e4066Sahrens /* 483fa9e4066Sahrens * For a whole disk, defer getting its devid until after labeling it. 484fa9e4066Sahrens */ 485fa9e4066Sahrens if (S_ISBLK(statbuf.st_mode) && !wholedisk) { 486fa9e4066Sahrens /* 487fa9e4066Sahrens * Get the devid for the device. 488fa9e4066Sahrens */ 489fa9e4066Sahrens int fd; 490fa9e4066Sahrens ddi_devid_t devid; 491fa9e4066Sahrens char *minor = NULL, *devid_str = NULL; 492fa9e4066Sahrens 493fa9e4066Sahrens if ((fd = open(path, O_RDONLY)) < 0) { 494fa9e4066Sahrens (void) fprintf(stderr, gettext("cannot open '%s': " 495fa9e4066Sahrens "%s\n"), path, strerror(errno)); 496fa9e4066Sahrens nvlist_free(vdev); 497fa9e4066Sahrens return (NULL); 498fa9e4066Sahrens } 499fa9e4066Sahrens 500fa9e4066Sahrens if (devid_get(fd, &devid) == 0) { 501fa9e4066Sahrens if (devid_get_minor_name(fd, &minor) == 0 && 502fa9e4066Sahrens (devid_str = devid_str_encode(devid, minor)) != 503fa9e4066Sahrens NULL) { 504fa9e4066Sahrens verify(nvlist_add_string(vdev, 505fa9e4066Sahrens ZPOOL_CONFIG_DEVID, devid_str) == 0); 506fa9e4066Sahrens } 507fa9e4066Sahrens if (devid_str != NULL) 508fa9e4066Sahrens devid_str_free(devid_str); 509fa9e4066Sahrens if (minor != NULL) 510fa9e4066Sahrens devid_str_free(minor); 511fa9e4066Sahrens devid_free(devid); 512fa9e4066Sahrens } 513fa9e4066Sahrens 514fa9e4066Sahrens (void) close(fd); 515fa9e4066Sahrens } 516fa9e4066Sahrens 517fa9e4066Sahrens return (vdev); 518fa9e4066Sahrens } 519fa9e4066Sahrens 520fa9e4066Sahrens /* 521fa9e4066Sahrens * Go through and verify the replication level of the pool is consistent. 522fa9e4066Sahrens * Performs the following checks: 523fa9e4066Sahrens * 524*8a5bcf73SToomas Soome * For the new spec, verifies that devices in mirrors and raidz are the 525*8a5bcf73SToomas Soome * same size. 526fa9e4066Sahrens * 527*8a5bcf73SToomas Soome * If the current configuration already has inconsistent replication 528*8a5bcf73SToomas Soome * levels, ignore any other potential problems in the new spec. 529fa9e4066Sahrens * 530*8a5bcf73SToomas Soome * Otherwise, make sure that the current spec (if there is one) and the new 531*8a5bcf73SToomas Soome * spec have consistent replication levels. 532663207adSDon Brady * 533663207adSDon Brady * If there is no current spec (create), make sure new spec has at least 534663207adSDon Brady * one general purpose vdev. 535fa9e4066Sahrens */ 536fa9e4066Sahrens typedef struct replication_level { 53799653d4eSeschrock char *zprl_type; 53899653d4eSeschrock uint64_t zprl_children; 53999653d4eSeschrock uint64_t zprl_parity; 540fa9e4066Sahrens } replication_level_t; 541fa9e4066Sahrens 5428488aeb5Staylor #define ZPOOL_FUZZ (16 * 1024 * 1024) 5438488aeb5Staylor 544663207adSDon Brady static boolean_t 545663207adSDon Brady is_raidz_mirror(replication_level_t *a, replication_level_t *b, 546663207adSDon Brady replication_level_t **raidz, replication_level_t **mirror) 547663207adSDon Brady { 548663207adSDon Brady if (strcmp(a->zprl_type, "raidz") == 0 && 549663207adSDon Brady strcmp(b->zprl_type, "mirror") == 0) { 550663207adSDon Brady *raidz = a; 551663207adSDon Brady *mirror = b; 552663207adSDon Brady return (B_TRUE); 553663207adSDon Brady } 554663207adSDon Brady return (B_FALSE); 555663207adSDon Brady } 556663207adSDon Brady 557fa9e4066Sahrens /* 558fa9e4066Sahrens * Given a list of toplevel vdevs, return the current replication level. If 559fa9e4066Sahrens * the config is inconsistent, then NULL is returned. If 'fatal' is set, then 560fa9e4066Sahrens * an error message will be displayed for each self-inconsistent vdev. 561fa9e4066Sahrens */ 5628488aeb5Staylor static replication_level_t * 56399653d4eSeschrock get_replication(nvlist_t *nvroot, boolean_t fatal) 564fa9e4066Sahrens { 565fa9e4066Sahrens nvlist_t **top; 566fa9e4066Sahrens uint_t t, toplevels; 567fa9e4066Sahrens nvlist_t **child; 568fa9e4066Sahrens uint_t c, children; 569fa9e4066Sahrens nvlist_t *nv; 570fa9e4066Sahrens char *type; 571b327cd3fSIgor Kozhukhov replication_level_t lastrep = {0}; 572b327cd3fSIgor Kozhukhov replication_level_t rep; 573b327cd3fSIgor Kozhukhov replication_level_t *ret; 574663207adSDon Brady replication_level_t *raidz, *mirror; 57599653d4eSeschrock boolean_t dontreport; 576fa9e4066Sahrens 577fa9e4066Sahrens ret = safe_malloc(sizeof (replication_level_t)); 578fa9e4066Sahrens 579fa9e4066Sahrens verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 580fa9e4066Sahrens &top, &toplevels) == 0); 581fa9e4066Sahrens 582fa9e4066Sahrens for (t = 0; t < toplevels; t++) { 5838654d025Sperrin uint64_t is_log = B_FALSE; 5848654d025Sperrin 585fa9e4066Sahrens nv = top[t]; 586fa9e4066Sahrens 5878654d025Sperrin /* 5888654d025Sperrin * For separate logs we ignore the top level vdev replication 5898654d025Sperrin * constraints. 5908654d025Sperrin */ 5918654d025Sperrin (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); 5928654d025Sperrin if (is_log) 5938654d025Sperrin continue; 5948654d025Sperrin 5958654d025Sperrin verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, 5968654d025Sperrin &type) == 0); 597fa9e4066Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 598fa9e4066Sahrens &child, &children) != 0) { 599fa9e4066Sahrens /* 600fa9e4066Sahrens * This is a 'file' or 'disk' vdev. 601fa9e4066Sahrens */ 60299653d4eSeschrock rep.zprl_type = type; 60399653d4eSeschrock rep.zprl_children = 1; 60499653d4eSeschrock rep.zprl_parity = 0; 605fa9e4066Sahrens } else { 606fa9e4066Sahrens uint64_t vdev_size; 607fa9e4066Sahrens 608fa9e4066Sahrens /* 609fa9e4066Sahrens * This is a mirror or RAID-Z vdev. Go through and make 610fa9e4066Sahrens * sure the contents are all the same (files vs. disks), 611fa9e4066Sahrens * keeping track of the number of elements in the 612fa9e4066Sahrens * process. 613fa9e4066Sahrens * 614fa9e4066Sahrens * We also check that the size of each vdev (if it can 615fa9e4066Sahrens * be determined) is the same. 616fa9e4066Sahrens */ 61799653d4eSeschrock rep.zprl_type = type; 61899653d4eSeschrock rep.zprl_children = 0; 61999653d4eSeschrock 62099653d4eSeschrock if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { 62199653d4eSeschrock verify(nvlist_lookup_uint64(nv, 62299653d4eSeschrock ZPOOL_CONFIG_NPARITY, 62399653d4eSeschrock &rep.zprl_parity) == 0); 62499653d4eSeschrock assert(rep.zprl_parity != 0); 62599653d4eSeschrock } else { 62699653d4eSeschrock rep.zprl_parity = 0; 62799653d4eSeschrock } 628fa9e4066Sahrens 629fa9e4066Sahrens /* 6308654d025Sperrin * The 'dontreport' variable indicates that we've 631fa9e4066Sahrens * already reported an error for this spec, so don't 632fa9e4066Sahrens * bother doing it again. 633fa9e4066Sahrens */ 634fa9e4066Sahrens type = NULL; 635fa9e4066Sahrens dontreport = 0; 636fa9e4066Sahrens vdev_size = -1ULL; 637fa9e4066Sahrens for (c = 0; c < children; c++) { 638fa9e4066Sahrens nvlist_t *cnv = child[c]; 639fa9e4066Sahrens char *path; 640fa9e4066Sahrens struct stat64 statbuf; 641fa9e4066Sahrens uint64_t size = -1ULL; 642fa9e4066Sahrens char *childtype; 643fa9e4066Sahrens int fd, err; 644fa9e4066Sahrens 64599653d4eSeschrock rep.zprl_children++; 646fa9e4066Sahrens 647fa9e4066Sahrens verify(nvlist_lookup_string(cnv, 648fa9e4066Sahrens ZPOOL_CONFIG_TYPE, &childtype) == 0); 64994de1d4cSeschrock 65094de1d4cSeschrock /* 6518654d025Sperrin * If this is a replacing or spare vdev, then 652ac0215f4Sloli * get the real first child of the vdev: do this 653ac0215f4Sloli * in a loop because replacing and spare vdevs 654ac0215f4Sloli * can be nested. 65594de1d4cSeschrock */ 656ac0215f4Sloli while (strcmp(childtype, 65794de1d4cSeschrock VDEV_TYPE_REPLACING) == 0 || 65894de1d4cSeschrock strcmp(childtype, VDEV_TYPE_SPARE) == 0) { 65994de1d4cSeschrock nvlist_t **rchild; 66094de1d4cSeschrock uint_t rchildren; 66194de1d4cSeschrock 66294de1d4cSeschrock verify(nvlist_lookup_nvlist_array(cnv, 66394de1d4cSeschrock ZPOOL_CONFIG_CHILDREN, &rchild, 66494de1d4cSeschrock &rchildren) == 0); 66594de1d4cSeschrock assert(rchildren == 2); 66694de1d4cSeschrock cnv = rchild[0]; 66794de1d4cSeschrock 66894de1d4cSeschrock verify(nvlist_lookup_string(cnv, 66994de1d4cSeschrock ZPOOL_CONFIG_TYPE, 67094de1d4cSeschrock &childtype) == 0); 67194de1d4cSeschrock } 67294de1d4cSeschrock 673fa9e4066Sahrens verify(nvlist_lookup_string(cnv, 674fa9e4066Sahrens ZPOOL_CONFIG_PATH, &path) == 0); 675fa9e4066Sahrens 676fa9e4066Sahrens /* 677fa9e4066Sahrens * If we have a raidz/mirror that combines disks 678fa9e4066Sahrens * with files, report it as an error. 679fa9e4066Sahrens */ 680fa9e4066Sahrens if (!dontreport && type != NULL && 681fa9e4066Sahrens strcmp(type, childtype) != 0) { 682fa9e4066Sahrens if (ret != NULL) 683fa9e4066Sahrens free(ret); 684fa9e4066Sahrens ret = NULL; 685fa9e4066Sahrens if (fatal) 686fa9e4066Sahrens vdev_error(gettext( 687fa9e4066Sahrens "mismatched replication " 688fa9e4066Sahrens "level: %s contains both " 689fa9e4066Sahrens "files and devices\n"), 69099653d4eSeschrock rep.zprl_type); 691fa9e4066Sahrens else 692fa9e4066Sahrens return (NULL); 69399653d4eSeschrock dontreport = B_TRUE; 694fa9e4066Sahrens } 695fa9e4066Sahrens 696fa9e4066Sahrens /* 697fa9e4066Sahrens * According to stat(2), the value of 'st_size' 698fa9e4066Sahrens * is undefined for block devices and character 699fa9e4066Sahrens * devices. But there is no effective way to 700fa9e4066Sahrens * determine the real size in userland. 701fa9e4066Sahrens * 702fa9e4066Sahrens * Instead, we'll take advantage of an 703fa9e4066Sahrens * implementation detail of spec_size(). If the 704fa9e4066Sahrens * device is currently open, then we (should) 705fa9e4066Sahrens * return a valid size. 706fa9e4066Sahrens * 707fa9e4066Sahrens * If we still don't get a valid size (indicated 708fa9e4066Sahrens * by a size of 0 or MAXOFFSET_T), then ignore 709fa9e4066Sahrens * this device altogether. 710fa9e4066Sahrens */ 711fa9e4066Sahrens if ((fd = open(path, O_RDONLY)) >= 0) { 712fa9e4066Sahrens err = fstat64(fd, &statbuf); 713fa9e4066Sahrens (void) close(fd); 714fa9e4066Sahrens } else { 715fa9e4066Sahrens err = stat64(path, &statbuf); 716fa9e4066Sahrens } 717fa9e4066Sahrens 718fa9e4066Sahrens if (err != 0 || 719fa9e4066Sahrens statbuf.st_size == 0 || 720fa9e4066Sahrens statbuf.st_size == MAXOFFSET_T) 721fa9e4066Sahrens continue; 722fa9e4066Sahrens 723fa9e4066Sahrens size = statbuf.st_size; 724fa9e4066Sahrens 725fa9e4066Sahrens /* 7268488aeb5Staylor * Also make sure that devices and 7278488aeb5Staylor * slices have a consistent size. If 7288488aeb5Staylor * they differ by a significant amount 7298488aeb5Staylor * (~16MB) then report an error. 730fa9e4066Sahrens */ 7318488aeb5Staylor if (!dontreport && 7328488aeb5Staylor (vdev_size != -1ULL && 7338488aeb5Staylor (labs(size - vdev_size) > 7348488aeb5Staylor ZPOOL_FUZZ))) { 735fa9e4066Sahrens if (ret != NULL) 736fa9e4066Sahrens free(ret); 737fa9e4066Sahrens ret = NULL; 738fa9e4066Sahrens if (fatal) 739fa9e4066Sahrens vdev_error(gettext( 740fa9e4066Sahrens "%s contains devices of " 741fa9e4066Sahrens "different sizes\n"), 74299653d4eSeschrock rep.zprl_type); 743fa9e4066Sahrens else 744fa9e4066Sahrens return (NULL); 74599653d4eSeschrock dontreport = B_TRUE; 746fa9e4066Sahrens } 747fa9e4066Sahrens 748fa9e4066Sahrens type = childtype; 749fa9e4066Sahrens vdev_size = size; 750fa9e4066Sahrens } 751fa9e4066Sahrens } 752fa9e4066Sahrens 753fa9e4066Sahrens /* 754fa9e4066Sahrens * At this point, we have the replication of the last toplevel 755663207adSDon Brady * vdev in 'rep'. Compare it to 'lastrep' to see if it is 756fa9e4066Sahrens * different. 757fa9e4066Sahrens */ 75899653d4eSeschrock if (lastrep.zprl_type != NULL) { 759663207adSDon Brady if (is_raidz_mirror(&lastrep, &rep, &raidz, &mirror) || 760663207adSDon Brady is_raidz_mirror(&rep, &lastrep, &raidz, &mirror)) { 761663207adSDon Brady /* 762663207adSDon Brady * Accepted raidz and mirror when they can 763663207adSDon Brady * handle the same number of disk failures. 764663207adSDon Brady */ 765663207adSDon Brady if (raidz->zprl_parity != 766663207adSDon Brady mirror->zprl_children - 1) { 767663207adSDon Brady if (ret != NULL) 768663207adSDon Brady free(ret); 769663207adSDon Brady ret = NULL; 770663207adSDon Brady if (fatal) 771663207adSDon Brady vdev_error(gettext( 772663207adSDon Brady "mismatched replication " 773663207adSDon Brady "level: " 774663207adSDon Brady "%s and %s vdevs with " 775663207adSDon Brady "different redundancy, " 776663207adSDon Brady "%llu vs. %llu (%llu-way) " 777663207adSDon Brady "are present\n"), 778663207adSDon Brady raidz->zprl_type, 779663207adSDon Brady mirror->zprl_type, 780663207adSDon Brady raidz->zprl_parity, 781663207adSDon Brady mirror->zprl_children - 1, 782663207adSDon Brady mirror->zprl_children); 783663207adSDon Brady else 784663207adSDon Brady return (NULL); 785663207adSDon Brady } 786663207adSDon Brady } else if (strcmp(lastrep.zprl_type, rep.zprl_type) != 787663207adSDon Brady 0) { 788fa9e4066Sahrens if (ret != NULL) 789fa9e4066Sahrens free(ret); 790fa9e4066Sahrens ret = NULL; 791fa9e4066Sahrens if (fatal) 792fa9e4066Sahrens vdev_error(gettext( 79399653d4eSeschrock "mismatched replication level: " 79499653d4eSeschrock "both %s and %s vdevs are " 795fa9e4066Sahrens "present\n"), 79699653d4eSeschrock lastrep.zprl_type, rep.zprl_type); 797fa9e4066Sahrens else 798fa9e4066Sahrens return (NULL); 79999653d4eSeschrock } else if (lastrep.zprl_parity != rep.zprl_parity) { 800fa9e4066Sahrens if (ret) 801fa9e4066Sahrens free(ret); 802fa9e4066Sahrens ret = NULL; 803fa9e4066Sahrens if (fatal) 804fa9e4066Sahrens vdev_error(gettext( 80599653d4eSeschrock "mismatched replication level: " 80699653d4eSeschrock "both %llu and %llu device parity " 80799653d4eSeschrock "%s vdevs are present\n"), 80899653d4eSeschrock lastrep.zprl_parity, 80999653d4eSeschrock rep.zprl_parity, 81099653d4eSeschrock rep.zprl_type); 81199653d4eSeschrock else 81299653d4eSeschrock return (NULL); 81399653d4eSeschrock } else if (lastrep.zprl_children != rep.zprl_children) { 81499653d4eSeschrock if (ret) 81599653d4eSeschrock free(ret); 81699653d4eSeschrock ret = NULL; 81799653d4eSeschrock if (fatal) 81899653d4eSeschrock vdev_error(gettext( 81999653d4eSeschrock "mismatched replication level: " 82099653d4eSeschrock "both %llu-way and %llu-way %s " 821fa9e4066Sahrens "vdevs are present\n"), 82299653d4eSeschrock lastrep.zprl_children, 82399653d4eSeschrock rep.zprl_children, 82499653d4eSeschrock rep.zprl_type); 825fa9e4066Sahrens else 826fa9e4066Sahrens return (NULL); 827fa9e4066Sahrens } 828fa9e4066Sahrens } 829fa9e4066Sahrens lastrep = rep; 830fa9e4066Sahrens } 831fa9e4066Sahrens 83299653d4eSeschrock if (ret != NULL) 83399653d4eSeschrock *ret = rep; 834fa9e4066Sahrens 835fa9e4066Sahrens return (ret); 836fa9e4066Sahrens } 837fa9e4066Sahrens 838fa9e4066Sahrens /* 839fa9e4066Sahrens * Check the replication level of the vdev spec against the current pool. Calls 840fa9e4066Sahrens * get_replication() to make sure the new spec is self-consistent. If the pool 841fa9e4066Sahrens * has a consistent replication level, then we ignore any errors. Otherwise, 842fa9e4066Sahrens * report any difference between the two. 843fa9e4066Sahrens */ 8448488aeb5Staylor static int 845fa9e4066Sahrens check_replication(nvlist_t *config, nvlist_t *newroot) 846fa9e4066Sahrens { 8478488aeb5Staylor nvlist_t **child; 8488488aeb5Staylor uint_t children; 849fa9e4066Sahrens replication_level_t *current = NULL, *new; 850663207adSDon Brady replication_level_t *raidz, *mirror; 851fa9e4066Sahrens int ret; 852fa9e4066Sahrens 853fa9e4066Sahrens /* 854fa9e4066Sahrens * If we have a current pool configuration, check to see if it's 855fa9e4066Sahrens * self-consistent. If not, simply return success. 856fa9e4066Sahrens */ 857fa9e4066Sahrens if (config != NULL) { 858fa9e4066Sahrens nvlist_t *nvroot; 859fa9e4066Sahrens 860fa9e4066Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 861fa9e4066Sahrens &nvroot) == 0); 86299653d4eSeschrock if ((current = get_replication(nvroot, B_FALSE)) == NULL) 863fa9e4066Sahrens return (0); 864fa9e4066Sahrens } 8658488aeb5Staylor /* 8668488aeb5Staylor * for spares there may be no children, and therefore no 8678488aeb5Staylor * replication level to check 8688488aeb5Staylor */ 8698488aeb5Staylor if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN, 8708488aeb5Staylor &child, &children) != 0) || (children == 0)) { 8718488aeb5Staylor free(current); 8728488aeb5Staylor return (0); 8738488aeb5Staylor } 874fa9e4066Sahrens 8758654d025Sperrin /* 8768654d025Sperrin * If all we have is logs then there's no replication level to check. 8778654d025Sperrin */ 8788654d025Sperrin if (num_logs(newroot) == children) { 8798654d025Sperrin free(current); 8808654d025Sperrin return (0); 8818654d025Sperrin } 8828654d025Sperrin 883fa9e4066Sahrens /* 884fa9e4066Sahrens * Get the replication level of the new vdev spec, reporting any 885fa9e4066Sahrens * inconsistencies found. 886fa9e4066Sahrens */ 88799653d4eSeschrock if ((new = get_replication(newroot, B_TRUE)) == NULL) { 888fa9e4066Sahrens free(current); 889fa9e4066Sahrens return (-1); 890fa9e4066Sahrens } 891fa9e4066Sahrens 892fa9e4066Sahrens /* 893fa9e4066Sahrens * Check to see if the new vdev spec matches the replication level of 894fa9e4066Sahrens * the current pool. 895fa9e4066Sahrens */ 896fa9e4066Sahrens ret = 0; 897fa9e4066Sahrens if (current != NULL) { 898663207adSDon Brady if (is_raidz_mirror(current, new, &raidz, &mirror) || 899663207adSDon Brady is_raidz_mirror(new, current, &raidz, &mirror)) { 900663207adSDon Brady if (raidz->zprl_parity != mirror->zprl_children - 1) { 901663207adSDon Brady vdev_error(gettext( 902663207adSDon Brady "mismatched replication level: pool and " 903663207adSDon Brady "new vdev with different redundancy, %s " 904663207adSDon Brady "and %s vdevs, %llu vs. %llu (%llu-way)\n"), 905663207adSDon Brady raidz->zprl_type, 906663207adSDon Brady mirror->zprl_type, 907663207adSDon Brady raidz->zprl_parity, 908663207adSDon Brady mirror->zprl_children - 1, 909663207adSDon Brady mirror->zprl_children); 910663207adSDon Brady ret = -1; 911663207adSDon Brady } 912663207adSDon Brady } else if (strcmp(current->zprl_type, new->zprl_type) != 0) { 913fa9e4066Sahrens vdev_error(gettext( 91499653d4eSeschrock "mismatched replication level: pool uses %s " 91599653d4eSeschrock "and new vdev is %s\n"), 91699653d4eSeschrock current->zprl_type, new->zprl_type); 91799653d4eSeschrock ret = -1; 91899653d4eSeschrock } else if (current->zprl_parity != new->zprl_parity) { 91999653d4eSeschrock vdev_error(gettext( 92099653d4eSeschrock "mismatched replication level: pool uses %llu " 92199653d4eSeschrock "device parity and new vdev uses %llu\n"), 92299653d4eSeschrock current->zprl_parity, new->zprl_parity); 92399653d4eSeschrock ret = -1; 92499653d4eSeschrock } else if (current->zprl_children != new->zprl_children) { 92599653d4eSeschrock vdev_error(gettext( 92699653d4eSeschrock "mismatched replication level: pool uses %llu-way " 92799653d4eSeschrock "%s and new vdev uses %llu-way %s\n"), 92899653d4eSeschrock current->zprl_children, current->zprl_type, 92999653d4eSeschrock new->zprl_children, new->zprl_type); 930fa9e4066Sahrens ret = -1; 931fa9e4066Sahrens } 932fa9e4066Sahrens } 933fa9e4066Sahrens 934fa9e4066Sahrens free(new); 935fa9e4066Sahrens if (current != NULL) 936fa9e4066Sahrens free(current); 937fa9e4066Sahrens 938fa9e4066Sahrens return (ret); 939fa9e4066Sahrens } 940fa9e4066Sahrens 941fa9e4066Sahrens /* 942fa9e4066Sahrens * Go through and find any whole disks in the vdev specification, labelling them 943fa9e4066Sahrens * as appropriate. When constructing the vdev spec, we were unable to open this 944fa9e4066Sahrens * device in order to provide a devid. Now that we have labelled the disk and 9457855d95bSToomas Soome * know the pool slice is valid, we can construct the devid now. 946fa9e4066Sahrens * 9478488aeb5Staylor * If the disk was already labeled with an EFI label, we will have gotten the 948fa9e4066Sahrens * devid already (because we were able to open the whole disk). Otherwise, we 949fa9e4066Sahrens * need to get the devid after we label the disk. 950fa9e4066Sahrens */ 9518488aeb5Staylor static int 9527855d95bSToomas Soome make_disks(zpool_handle_t *zhp, nvlist_t *nv, zpool_boot_label_t boot_type, 9537855d95bSToomas Soome uint64_t boot_size) 954fa9e4066Sahrens { 955fa9e4066Sahrens nvlist_t **child; 956fa9e4066Sahrens uint_t c, children; 957fa9e4066Sahrens char *type, *path, *diskname; 958fa9e4066Sahrens char buf[MAXPATHLEN]; 959afefbcddSeschrock uint64_t wholedisk; 960fa9e4066Sahrens int fd; 961fa9e4066Sahrens int ret; 9627855d95bSToomas Soome int slice; 963fa9e4066Sahrens ddi_devid_t devid; 964fa9e4066Sahrens char *minor = NULL, *devid_str = NULL; 965fa9e4066Sahrens 966fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); 967fa9e4066Sahrens 968fa9e4066Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 969fa9e4066Sahrens &child, &children) != 0) { 970fa9e4066Sahrens 971fa9e4066Sahrens if (strcmp(type, VDEV_TYPE_DISK) != 0) 972fa9e4066Sahrens return (0); 973fa9e4066Sahrens 974fa9e4066Sahrens /* 975fa9e4066Sahrens * We have a disk device. Get the path to the device 9768488aeb5Staylor * and see if it's a whole disk by appending the backup 977fa9e4066Sahrens * slice and stat()ing the device. 978fa9e4066Sahrens */ 979fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); 980fa9e4066Sahrens 981fa9e4066Sahrens diskname = strrchr(path, '/'); 982fa9e4066Sahrens assert(diskname != NULL); 983fa9e4066Sahrens diskname++; 9847855d95bSToomas Soome 9857855d95bSToomas Soome if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, 9867855d95bSToomas Soome &wholedisk) != 0 || !wholedisk) { 9877855d95bSToomas Soome /* 9887855d95bSToomas Soome * This is not whole disk, return error if 9897855d95bSToomas Soome * boot partition creation was requested 9907855d95bSToomas Soome */ 9917855d95bSToomas Soome if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { 9927855d95bSToomas Soome (void) fprintf(stderr, 9937855d95bSToomas Soome gettext("creating boot partition is only " 9947855d95bSToomas Soome "supported on whole disk vdevs: %s\n"), 9957855d95bSToomas Soome diskname); 9967855d95bSToomas Soome return (-1); 9977855d95bSToomas Soome } 9987855d95bSToomas Soome return (0); 9997855d95bSToomas Soome } 10007855d95bSToomas Soome 10017855d95bSToomas Soome ret = zpool_label_disk(g_zfs, zhp, diskname, boot_type, 10027855d95bSToomas Soome boot_size, &slice); 10037855d95bSToomas Soome if (ret == -1) 10047855d95bSToomas Soome return (ret); 1005fa9e4066Sahrens 1006fa9e4066Sahrens /* 1007fa9e4066Sahrens * Fill in the devid, now that we've labeled the disk. 1008fa9e4066Sahrens */ 10097855d95bSToomas Soome (void) snprintf(buf, sizeof (buf), "%ss%d", path, slice); 1010fa9e4066Sahrens if ((fd = open(buf, O_RDONLY)) < 0) { 1011fa9e4066Sahrens (void) fprintf(stderr, 1012fa9e4066Sahrens gettext("cannot open '%s': %s\n"), 1013fa9e4066Sahrens buf, strerror(errno)); 1014fa9e4066Sahrens return (-1); 1015fa9e4066Sahrens } 1016fa9e4066Sahrens 1017fa9e4066Sahrens if (devid_get(fd, &devid) == 0) { 1018fa9e4066Sahrens if (devid_get_minor_name(fd, &minor) == 0 && 1019fa9e4066Sahrens (devid_str = devid_str_encode(devid, minor)) != 1020fa9e4066Sahrens NULL) { 1021fa9e4066Sahrens verify(nvlist_add_string(nv, 1022fa9e4066Sahrens ZPOOL_CONFIG_DEVID, devid_str) == 0); 1023fa9e4066Sahrens } 1024fa9e4066Sahrens if (devid_str != NULL) 1025fa9e4066Sahrens devid_str_free(devid_str); 1026fa9e4066Sahrens if (minor != NULL) 1027fa9e4066Sahrens devid_str_free(minor); 1028fa9e4066Sahrens devid_free(devid); 1029fa9e4066Sahrens } 1030fa9e4066Sahrens 1031afefbcddSeschrock /* 10327855d95bSToomas Soome * Update the path to refer to the pool slice. The presence of 1033afefbcddSeschrock * the 'whole_disk' field indicates to the CLI that we should 1034afefbcddSeschrock * chop off the slice number when displaying the device in 1035afefbcddSeschrock * future output. 1036afefbcddSeschrock */ 1037afefbcddSeschrock verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0); 1038afefbcddSeschrock 1039fa9e4066Sahrens (void) close(fd); 1040fa9e4066Sahrens 1041fa9e4066Sahrens return (0); 1042fa9e4066Sahrens } 1043fa9e4066Sahrens 10447855d95bSToomas Soome /* illumos kernel does not support booting from multi-vdev pools. */ 10457855d95bSToomas Soome if ((boot_type == ZPOOL_CREATE_BOOT_LABEL)) { 10467855d95bSToomas Soome if ((strcmp(type, VDEV_TYPE_ROOT) == 0) && children > 1) { 10477855d95bSToomas Soome (void) fprintf(stderr, gettext("boot pool " 10487855d95bSToomas Soome "can not have more than one vdev\n")); 10497855d95bSToomas Soome return (-1); 10507855d95bSToomas Soome } 10517855d95bSToomas Soome } 10527855d95bSToomas Soome 10537855d95bSToomas Soome for (c = 0; c < children; c++) { 10547855d95bSToomas Soome ret = make_disks(zhp, child[c], boot_type, boot_size); 10557855d95bSToomas Soome if (ret != 0) 1056fa9e4066Sahrens return (ret); 10577855d95bSToomas Soome } 1058fa9e4066Sahrens 105999653d4eSeschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, 106099653d4eSeschrock &child, &children) == 0) 10617855d95bSToomas Soome for (c = 0; c < children; c++) { 10627855d95bSToomas Soome ret = make_disks(zhp, child[c], boot_type, boot_size); 10637855d95bSToomas Soome if (ret != 0) 106499653d4eSeschrock return (ret); 10657855d95bSToomas Soome } 106699653d4eSeschrock 1067fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 1068fa94a07fSbrendan &child, &children) == 0) 10697855d95bSToomas Soome for (c = 0; c < children; c++) { 10707855d95bSToomas Soome ret = make_disks(zhp, child[c], boot_type, boot_size); 10717855d95bSToomas Soome if (ret != 0) 1072fa94a07fSbrendan return (ret); 10737855d95bSToomas Soome } 1074fa94a07fSbrendan 1075fa9e4066Sahrens return (0); 1076fa9e4066Sahrens } 1077fa9e4066Sahrens 107899653d4eSeschrock /* 107999653d4eSeschrock * Determine if the given path is a hot spare within the given configuration. 108099653d4eSeschrock */ 108199653d4eSeschrock static boolean_t 108299653d4eSeschrock is_spare(nvlist_t *config, const char *path) 108399653d4eSeschrock { 108499653d4eSeschrock int fd; 108599653d4eSeschrock pool_state_t state; 10863ccfa83cSahrens char *name = NULL; 108799653d4eSeschrock nvlist_t *label; 108899653d4eSeschrock uint64_t guid, spareguid; 108999653d4eSeschrock nvlist_t *nvroot; 109099653d4eSeschrock nvlist_t **spares; 109199653d4eSeschrock uint_t i, nspares; 109299653d4eSeschrock boolean_t inuse; 109399653d4eSeschrock 109499653d4eSeschrock if ((fd = open(path, O_RDONLY)) < 0) 109599653d4eSeschrock return (B_FALSE); 109699653d4eSeschrock 109799653d4eSeschrock if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || 109899653d4eSeschrock !inuse || 109999653d4eSeschrock state != POOL_STATE_SPARE || 110099653d4eSeschrock zpool_read_label(fd, &label) != 0) { 11013ccfa83cSahrens free(name); 110299653d4eSeschrock (void) close(fd); 110399653d4eSeschrock return (B_FALSE); 110499653d4eSeschrock } 11053ccfa83cSahrens free(name); 110699653d4eSeschrock (void) close(fd); 11073f9d6ad7SLin Ling 110899653d4eSeschrock verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0); 110999653d4eSeschrock nvlist_free(label); 111099653d4eSeschrock 111199653d4eSeschrock verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 111299653d4eSeschrock &nvroot) == 0); 111399653d4eSeschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 111499653d4eSeschrock &spares, &nspares) == 0) { 111599653d4eSeschrock for (i = 0; i < nspares; i++) { 111699653d4eSeschrock verify(nvlist_lookup_uint64(spares[i], 111799653d4eSeschrock ZPOOL_CONFIG_GUID, &spareguid) == 0); 111899653d4eSeschrock if (spareguid == guid) 111999653d4eSeschrock return (B_TRUE); 112099653d4eSeschrock } 112199653d4eSeschrock } 112299653d4eSeschrock 112399653d4eSeschrock return (B_FALSE); 112499653d4eSeschrock } 112599653d4eSeschrock 1126fa9e4066Sahrens /* 1127fa9e4066Sahrens * Go through and find any devices that are in use. We rely on libdiskmgt for 1128fa9e4066Sahrens * the majority of this task. 1129fa9e4066Sahrens */ 113075fbdf9bSBasil Crow static boolean_t 113175fbdf9bSBasil Crow is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, 11323f9d6ad7SLin Ling boolean_t replacing, boolean_t isspare) 1133fa9e4066Sahrens { 1134fa9e4066Sahrens nvlist_t **child; 1135fa9e4066Sahrens uint_t c, children; 1136fa9e4066Sahrens char *type, *path; 1137b327cd3fSIgor Kozhukhov int ret = 0; 113899653d4eSeschrock char buf[MAXPATHLEN]; 113999653d4eSeschrock uint64_t wholedisk; 114075fbdf9bSBasil Crow boolean_t anyinuse = B_FALSE; 1141fa9e4066Sahrens 1142fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); 1143fa9e4066Sahrens 1144fa9e4066Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 1145fa9e4066Sahrens &child, &children) != 0) { 1146fa9e4066Sahrens 1147fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); 1148fa9e4066Sahrens 114999653d4eSeschrock /* 115099653d4eSeschrock * As a generic check, we look to see if this is a replace of a 115199653d4eSeschrock * hot spare within the same pool. If so, we allow it 115299653d4eSeschrock * regardless of what libdiskmgt or zpool_in_use() says. 115399653d4eSeschrock */ 11543f9d6ad7SLin Ling if (replacing) { 115599653d4eSeschrock if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, 115699653d4eSeschrock &wholedisk) == 0 && wholedisk) 115799653d4eSeschrock (void) snprintf(buf, sizeof (buf), "%ss0", 115899653d4eSeschrock path); 115999653d4eSeschrock else 116099653d4eSeschrock (void) strlcpy(buf, path, sizeof (buf)); 11613f9d6ad7SLin Ling 116299653d4eSeschrock if (is_spare(config, buf)) 116375fbdf9bSBasil Crow return (B_FALSE); 116499653d4eSeschrock } 116599653d4eSeschrock 1166fa9e4066Sahrens if (strcmp(type, VDEV_TYPE_DISK) == 0) 116799653d4eSeschrock ret = check_device(path, force, isspare); 116875fbdf9bSBasil Crow else if (strcmp(type, VDEV_TYPE_FILE) == 0) 116999653d4eSeschrock ret = check_file(path, force, isspare); 1170fa9e4066Sahrens 117175fbdf9bSBasil Crow return (ret != 0); 1172fa9e4066Sahrens } 1173fa9e4066Sahrens 1174fa9e4066Sahrens for (c = 0; c < children; c++) 117575fbdf9bSBasil Crow if (is_device_in_use(config, child[c], force, replacing, 117675fbdf9bSBasil Crow B_FALSE)) 117775fbdf9bSBasil Crow anyinuse = B_TRUE; 1178fa9e4066Sahrens 117999653d4eSeschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, 118099653d4eSeschrock &child, &children) == 0) 118199653d4eSeschrock for (c = 0; c < children; c++) 118275fbdf9bSBasil Crow if (is_device_in_use(config, child[c], force, replacing, 118375fbdf9bSBasil Crow B_TRUE)) 118475fbdf9bSBasil Crow anyinuse = B_TRUE; 1185fa94a07fSbrendan 1186fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 1187fa94a07fSbrendan &child, &children) == 0) 1188fa94a07fSbrendan for (c = 0; c < children; c++) 118975fbdf9bSBasil Crow if (is_device_in_use(config, child[c], force, replacing, 119075fbdf9bSBasil Crow B_FALSE)) 119175fbdf9bSBasil Crow anyinuse = B_TRUE; 1192fa94a07fSbrendan 119375fbdf9bSBasil Crow return (anyinuse); 1194fa9e4066Sahrens } 1195fa9e4066Sahrens 11968488aeb5Staylor static const char * 1197f94275ceSAdam Leventhal is_grouping(const char *type, int *mindev, int *maxdev) 119899653d4eSeschrock { 1199f94275ceSAdam Leventhal if (strncmp(type, "raidz", 5) == 0) { 1200f94275ceSAdam Leventhal const char *p = type + 5; 1201f94275ceSAdam Leventhal char *end; 1202f94275ceSAdam Leventhal long nparity; 1203f94275ceSAdam Leventhal 1204f94275ceSAdam Leventhal if (*p == '\0') { 1205f94275ceSAdam Leventhal nparity = 1; 1206f94275ceSAdam Leventhal } else if (*p == '0') { 1207f94275ceSAdam Leventhal return (NULL); /* no zero prefixes allowed */ 1208f94275ceSAdam Leventhal } else { 1209f94275ceSAdam Leventhal errno = 0; 1210f94275ceSAdam Leventhal nparity = strtol(p, &end, 10); 1211f94275ceSAdam Leventhal if (errno != 0 || nparity < 1 || nparity >= 255 || 1212f94275ceSAdam Leventhal *end != '\0') 1213f94275ceSAdam Leventhal return (NULL); 1214f94275ceSAdam Leventhal } 121599653d4eSeschrock 121699653d4eSeschrock if (mindev != NULL) 1217f94275ceSAdam Leventhal *mindev = nparity + 1; 1218f94275ceSAdam Leventhal if (maxdev != NULL) 1219f94275ceSAdam Leventhal *maxdev = 255; 122099653d4eSeschrock return (VDEV_TYPE_RAIDZ); 122199653d4eSeschrock } 122299653d4eSeschrock 1223f94275ceSAdam Leventhal if (maxdev != NULL) 1224f94275ceSAdam Leventhal *maxdev = INT_MAX; 1225f94275ceSAdam Leventhal 122699653d4eSeschrock if (strcmp(type, "mirror") == 0) { 122799653d4eSeschrock if (mindev != NULL) 122899653d4eSeschrock *mindev = 2; 122999653d4eSeschrock return (VDEV_TYPE_MIRROR); 123099653d4eSeschrock } 123199653d4eSeschrock 123299653d4eSeschrock if (strcmp(type, "spare") == 0) { 123399653d4eSeschrock if (mindev != NULL) 123499653d4eSeschrock *mindev = 1; 123599653d4eSeschrock return (VDEV_TYPE_SPARE); 123699653d4eSeschrock } 123799653d4eSeschrock 12388654d025Sperrin if (strcmp(type, "log") == 0) { 12398654d025Sperrin if (mindev != NULL) 12408654d025Sperrin *mindev = 1; 12418654d025Sperrin return (VDEV_TYPE_LOG); 12428654d025Sperrin } 12438654d025Sperrin 1244663207adSDon Brady if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0 || 1245663207adSDon Brady strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) { 1246663207adSDon Brady if (mindev != NULL) 1247663207adSDon Brady *mindev = 1; 1248663207adSDon Brady return (type); 1249663207adSDon Brady } 1250663207adSDon Brady 1251fa94a07fSbrendan if (strcmp(type, "cache") == 0) { 1252fa94a07fSbrendan if (mindev != NULL) 1253fa94a07fSbrendan *mindev = 1; 1254fa94a07fSbrendan return (VDEV_TYPE_L2CACHE); 1255fa94a07fSbrendan } 1256fa94a07fSbrendan 125799653d4eSeschrock return (NULL); 125899653d4eSeschrock } 125999653d4eSeschrock 1260fa9e4066Sahrens /* 1261fa9e4066Sahrens * Construct a syntactically valid vdev specification, 1262fa9e4066Sahrens * and ensure that all devices and files exist and can be opened. 1263fa9e4066Sahrens * Note: we don't bother freeing anything in the error paths 1264fa9e4066Sahrens * because the program is just going to exit anyway. 1265fa9e4066Sahrens */ 1266fa9e4066Sahrens nvlist_t * 1267fa9e4066Sahrens construct_spec(int argc, char **argv) 1268fa9e4066Sahrens { 1269fa94a07fSbrendan nvlist_t *nvroot, *nv, **top, **spares, **l2cache; 1270f94275ceSAdam Leventhal int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache; 127199653d4eSeschrock const char *type; 1272663207adSDon Brady uint64_t is_log, is_special, is_dedup; 12738654d025Sperrin boolean_t seen_logs; 1274fa9e4066Sahrens 1275fa9e4066Sahrens top = NULL; 1276fa9e4066Sahrens toplevels = 0; 127799653d4eSeschrock spares = NULL; 1278fa94a07fSbrendan l2cache = NULL; 127999653d4eSeschrock nspares = 0; 12808654d025Sperrin nlogs = 0; 1281fa94a07fSbrendan nl2cache = 0; 1282663207adSDon Brady is_log = is_special = is_dedup = B_FALSE; 12838654d025Sperrin seen_logs = B_FALSE; 1284fa9e4066Sahrens 1285fa9e4066Sahrens while (argc > 0) { 1286fa9e4066Sahrens nv = NULL; 1287fa9e4066Sahrens 1288fa9e4066Sahrens /* 1289fa9e4066Sahrens * If it's a mirror or raidz, the subsequent arguments are 1290fa9e4066Sahrens * its leaves -- until we encounter the next mirror or raidz. 1291fa9e4066Sahrens */ 1292f94275ceSAdam Leventhal if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) { 1293fa9e4066Sahrens nvlist_t **child = NULL; 129499653d4eSeschrock int c, children = 0; 129599653d4eSeschrock 12968654d025Sperrin if (strcmp(type, VDEV_TYPE_SPARE) == 0) { 12978654d025Sperrin if (spares != NULL) { 12988654d025Sperrin (void) fprintf(stderr, 12998654d025Sperrin gettext("invalid vdev " 13008654d025Sperrin "specification: 'spare' can be " 13018654d025Sperrin "specified only once\n")); 13028654d025Sperrin return (NULL); 13038654d025Sperrin } 1304663207adSDon Brady is_log = is_special = is_dedup = B_FALSE; 13058654d025Sperrin } 13068654d025Sperrin 13078654d025Sperrin if (strcmp(type, VDEV_TYPE_LOG) == 0) { 13088654d025Sperrin if (seen_logs) { 13098654d025Sperrin (void) fprintf(stderr, 13108654d025Sperrin gettext("invalid vdev " 13118654d025Sperrin "specification: 'log' can be " 13128654d025Sperrin "specified only once\n")); 13138654d025Sperrin return (NULL); 13148654d025Sperrin } 13158654d025Sperrin seen_logs = B_TRUE; 13168654d025Sperrin is_log = B_TRUE; 1317663207adSDon Brady is_special = B_FALSE; 1318663207adSDon Brady is_dedup = B_FALSE; 13198654d025Sperrin argc--; 13208654d025Sperrin argv++; 13218654d025Sperrin /* 13228654d025Sperrin * A log is not a real grouping device. 13238654d025Sperrin * We just set is_log and continue. 13248654d025Sperrin */ 13258654d025Sperrin continue; 13268654d025Sperrin } 13278654d025Sperrin 1328663207adSDon Brady if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0) { 1329663207adSDon Brady is_special = B_TRUE; 1330663207adSDon Brady is_log = B_FALSE; 1331663207adSDon Brady is_dedup = B_FALSE; 1332663207adSDon Brady argc--; 1333663207adSDon Brady argv++; 1334663207adSDon Brady continue; 1335663207adSDon Brady } 1336663207adSDon Brady 1337663207adSDon Brady if (strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) { 1338663207adSDon Brady is_dedup = B_TRUE; 1339663207adSDon Brady is_log = B_FALSE; 1340663207adSDon Brady is_special = B_FALSE; 1341663207adSDon Brady argc--; 1342663207adSDon Brady argv++; 1343663207adSDon Brady continue; 1344663207adSDon Brady } 1345663207adSDon Brady 1346fa94a07fSbrendan if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { 1347fa94a07fSbrendan if (l2cache != NULL) { 1348fa94a07fSbrendan (void) fprintf(stderr, 1349fa94a07fSbrendan gettext("invalid vdev " 1350fa94a07fSbrendan "specification: 'cache' can be " 1351fa94a07fSbrendan "specified only once\n")); 1352fa94a07fSbrendan return (NULL); 1353fa94a07fSbrendan } 1354663207adSDon Brady is_log = is_special = is_dedup = B_FALSE; 1355fa94a07fSbrendan } 1356fa94a07fSbrendan 1357663207adSDon Brady if (is_log || is_special || is_dedup) { 13588654d025Sperrin if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { 13598654d025Sperrin (void) fprintf(stderr, 13608654d025Sperrin gettext("invalid vdev " 1361663207adSDon Brady "specification: unsupported '%s' " 1362663207adSDon Brady "device: %s\n"), is_log ? "log" : 1363663207adSDon Brady "special", type); 13648654d025Sperrin return (NULL); 13658654d025Sperrin } 13668654d025Sperrin nlogs++; 136799653d4eSeschrock } 1368fa9e4066Sahrens 1369fa9e4066Sahrens for (c = 1; c < argc; c++) { 1370f94275ceSAdam Leventhal if (is_grouping(argv[c], NULL, NULL) != NULL) 1371fa9e4066Sahrens break; 1372fa9e4066Sahrens children++; 1373fa9e4066Sahrens child = realloc(child, 1374fa9e4066Sahrens children * sizeof (nvlist_t *)); 1375fa9e4066Sahrens if (child == NULL) 13765ad82045Snd zpool_no_memory(); 13778654d025Sperrin if ((nv = make_leaf_vdev(argv[c], B_FALSE)) 13788654d025Sperrin == NULL) 1379fa9e4066Sahrens return (NULL); 1380fa9e4066Sahrens child[children - 1] = nv; 1381fa9e4066Sahrens } 1382fa9e4066Sahrens 138399653d4eSeschrock if (children < mindev) { 138499653d4eSeschrock (void) fprintf(stderr, gettext("invalid vdev " 138599653d4eSeschrock "specification: %s requires at least %d " 138699653d4eSeschrock "devices\n"), argv[0], mindev); 1387fa9e4066Sahrens return (NULL); 1388fa9e4066Sahrens } 1389fa9e4066Sahrens 1390f94275ceSAdam Leventhal if (children > maxdev) { 1391f94275ceSAdam Leventhal (void) fprintf(stderr, gettext("invalid vdev " 1392f94275ceSAdam Leventhal "specification: %s supports no more than " 1393f94275ceSAdam Leventhal "%d devices\n"), argv[0], maxdev); 1394f94275ceSAdam Leventhal return (NULL); 1395f94275ceSAdam Leventhal } 1396f94275ceSAdam Leventhal 139799653d4eSeschrock argc -= c; 139899653d4eSeschrock argv += c; 139999653d4eSeschrock 140099653d4eSeschrock if (strcmp(type, VDEV_TYPE_SPARE) == 0) { 140199653d4eSeschrock spares = child; 140299653d4eSeschrock nspares = children; 140399653d4eSeschrock continue; 1404fa94a07fSbrendan } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { 1405fa94a07fSbrendan l2cache = child; 1406fa94a07fSbrendan nl2cache = children; 1407fa94a07fSbrendan continue; 140899653d4eSeschrock } else { 1409663207adSDon Brady /* create a top-level vdev with children */ 141099653d4eSeschrock verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, 141199653d4eSeschrock 0) == 0); 141299653d4eSeschrock verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, 141399653d4eSeschrock type) == 0); 14148654d025Sperrin verify(nvlist_add_uint64(nv, 14158654d025Sperrin ZPOOL_CONFIG_IS_LOG, is_log) == 0); 1416663207adSDon Brady if (is_log) 1417663207adSDon Brady verify(nvlist_add_string(nv, 1418663207adSDon Brady ZPOOL_CONFIG_ALLOCATION_BIAS, 1419663207adSDon Brady VDEV_ALLOC_BIAS_LOG) == 0); 1420663207adSDon Brady if (is_special) { 1421663207adSDon Brady verify(nvlist_add_string(nv, 1422663207adSDon Brady ZPOOL_CONFIG_ALLOCATION_BIAS, 1423663207adSDon Brady VDEV_ALLOC_BIAS_SPECIAL) == 0); 1424663207adSDon Brady } 1425663207adSDon Brady if (is_dedup) { 1426663207adSDon Brady verify(nvlist_add_string(nv, 1427663207adSDon Brady ZPOOL_CONFIG_ALLOCATION_BIAS, 1428663207adSDon Brady VDEV_ALLOC_BIAS_DEDUP) == 0); 1429663207adSDon Brady } 143099653d4eSeschrock if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { 143199653d4eSeschrock verify(nvlist_add_uint64(nv, 143299653d4eSeschrock ZPOOL_CONFIG_NPARITY, 143399653d4eSeschrock mindev - 1) == 0); 143499653d4eSeschrock } 143599653d4eSeschrock verify(nvlist_add_nvlist_array(nv, 143699653d4eSeschrock ZPOOL_CONFIG_CHILDREN, child, 143799653d4eSeschrock children) == 0); 1438fa9e4066Sahrens 143999653d4eSeschrock for (c = 0; c < children; c++) 144099653d4eSeschrock nvlist_free(child[c]); 144199653d4eSeschrock free(child); 144299653d4eSeschrock } 1443fa9e4066Sahrens } else { 1444fa9e4066Sahrens /* 1445fa9e4066Sahrens * We have a device. Pass off to make_leaf_vdev() to 1446fa9e4066Sahrens * construct the appropriate nvlist describing the vdev. 1447fa9e4066Sahrens */ 14488654d025Sperrin if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL) 1449fa9e4066Sahrens return (NULL); 14508654d025Sperrin if (is_log) 14518654d025Sperrin nlogs++; 1452663207adSDon Brady if (is_special) { 1453663207adSDon Brady verify(nvlist_add_string(nv, 1454663207adSDon Brady ZPOOL_CONFIG_ALLOCATION_BIAS, 1455663207adSDon Brady VDEV_ALLOC_BIAS_SPECIAL) == 0); 1456663207adSDon Brady } 1457663207adSDon Brady if (is_dedup) { 1458663207adSDon Brady verify(nvlist_add_string(nv, 1459663207adSDon Brady ZPOOL_CONFIG_ALLOCATION_BIAS, 1460663207adSDon Brady VDEV_ALLOC_BIAS_DEDUP) == 0); 1461663207adSDon Brady } 1462fa9e4066Sahrens argc--; 1463fa9e4066Sahrens argv++; 1464fa9e4066Sahrens } 1465fa9e4066Sahrens 1466fa9e4066Sahrens toplevels++; 1467fa9e4066Sahrens top = realloc(top, toplevels * sizeof (nvlist_t *)); 1468fa9e4066Sahrens if (top == NULL) 14695ad82045Snd zpool_no_memory(); 1470fa9e4066Sahrens top[toplevels - 1] = nv; 1471fa9e4066Sahrens } 1472fa9e4066Sahrens 1473fa94a07fSbrendan if (toplevels == 0 && nspares == 0 && nl2cache == 0) { 147499653d4eSeschrock (void) fprintf(stderr, gettext("invalid vdev " 147599653d4eSeschrock "specification: at least one toplevel vdev must be " 147699653d4eSeschrock "specified\n")); 147799653d4eSeschrock return (NULL); 147899653d4eSeschrock } 147999653d4eSeschrock 14808654d025Sperrin if (seen_logs && nlogs == 0) { 14818654d025Sperrin (void) fprintf(stderr, gettext("invalid vdev specification: " 14828654d025Sperrin "log requires at least 1 device\n")); 14838654d025Sperrin return (NULL); 14848654d025Sperrin } 14858654d025Sperrin 1486fa9e4066Sahrens /* 1487fa9e4066Sahrens * Finally, create nvroot and add all top-level vdevs to it. 1488fa9e4066Sahrens */ 1489fa9e4066Sahrens verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0); 1490fa9e4066Sahrens verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 1491fa9e4066Sahrens VDEV_TYPE_ROOT) == 0); 1492fa9e4066Sahrens verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1493fa9e4066Sahrens top, toplevels) == 0); 149499653d4eSeschrock if (nspares != 0) 149599653d4eSeschrock verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 149699653d4eSeschrock spares, nspares) == 0); 1497fa94a07fSbrendan if (nl2cache != 0) 1498fa94a07fSbrendan verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 1499fa94a07fSbrendan l2cache, nl2cache) == 0); 1500fa9e4066Sahrens 1501fa9e4066Sahrens for (t = 0; t < toplevels; t++) 1502fa9e4066Sahrens nvlist_free(top[t]); 150399653d4eSeschrock for (t = 0; t < nspares; t++) 150499653d4eSeschrock nvlist_free(spares[t]); 1505fa94a07fSbrendan for (t = 0; t < nl2cache; t++) 1506fa94a07fSbrendan nvlist_free(l2cache[t]); 150799653d4eSeschrock if (spares) 150899653d4eSeschrock free(spares); 1509fa94a07fSbrendan if (l2cache) 1510fa94a07fSbrendan free(l2cache); 1511fa9e4066Sahrens free(top); 1512fa9e4066Sahrens 1513fa9e4066Sahrens return (nvroot); 1514fa9e4066Sahrens } 1515fa9e4066Sahrens 15161195e687SMark J Musante nvlist_t * 15171195e687SMark J Musante split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, 15181195e687SMark J Musante splitflags_t flags, int argc, char **argv) 15191195e687SMark J Musante { 15201195e687SMark J Musante nvlist_t *newroot = NULL, **child; 15211195e687SMark J Musante uint_t c, children; 15227855d95bSToomas Soome zpool_boot_label_t boot_type; 15231195e687SMark J Musante 15241195e687SMark J Musante if (argc > 0) { 15251195e687SMark J Musante if ((newroot = construct_spec(argc, argv)) == NULL) { 15261195e687SMark J Musante (void) fprintf(stderr, gettext("Unable to build a " 15271195e687SMark J Musante "pool from the specified devices\n")); 15281195e687SMark J Musante return (NULL); 15291195e687SMark J Musante } 15301195e687SMark J Musante 15317855d95bSToomas Soome if (zpool_is_bootable(zhp)) 15327855d95bSToomas Soome boot_type = ZPOOL_COPY_BOOT_LABEL; 15337855d95bSToomas Soome else 15347855d95bSToomas Soome boot_type = ZPOOL_NO_BOOT_LABEL; 15357855d95bSToomas Soome 15367855d95bSToomas Soome if (!flags.dryrun && 15377855d95bSToomas Soome make_disks(zhp, newroot, boot_type, 0) != 0) { 15381195e687SMark J Musante nvlist_free(newroot); 15391195e687SMark J Musante return (NULL); 15401195e687SMark J Musante } 15411195e687SMark J Musante 15421195e687SMark J Musante /* avoid any tricks in the spec */ 15431195e687SMark J Musante verify(nvlist_lookup_nvlist_array(newroot, 15441195e687SMark J Musante ZPOOL_CONFIG_CHILDREN, &child, &children) == 0); 15451195e687SMark J Musante for (c = 0; c < children; c++) { 15461195e687SMark J Musante char *path; 15471195e687SMark J Musante const char *type; 15481195e687SMark J Musante int min, max; 15491195e687SMark J Musante 15501195e687SMark J Musante verify(nvlist_lookup_string(child[c], 15511195e687SMark J Musante ZPOOL_CONFIG_PATH, &path) == 0); 15521195e687SMark J Musante if ((type = is_grouping(path, &min, &max)) != NULL) { 15531195e687SMark J Musante (void) fprintf(stderr, gettext("Cannot use " 15541195e687SMark J Musante "'%s' as a device for splitting\n"), type); 15551195e687SMark J Musante nvlist_free(newroot); 15561195e687SMark J Musante return (NULL); 15571195e687SMark J Musante } 15581195e687SMark J Musante } 15591195e687SMark J Musante } 15601195e687SMark J Musante 15611195e687SMark J Musante if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) { 1562aab83bb8SJosef 'Jeff' Sipek nvlist_free(newroot); 15631195e687SMark J Musante return (NULL); 15641195e687SMark J Musante } 15651195e687SMark J Musante 15661195e687SMark J Musante return (newroot); 15671195e687SMark J Musante } 15688488aeb5Staylor 1569663207adSDon Brady static int 1570663207adSDon Brady num_normal_vdevs(nvlist_t *nvroot) 1571663207adSDon Brady { 1572663207adSDon Brady nvlist_t **top; 1573663207adSDon Brady uint_t t, toplevels, normal = 0; 1574663207adSDon Brady 1575663207adSDon Brady verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1576663207adSDon Brady &top, &toplevels) == 0); 1577663207adSDon Brady 1578663207adSDon Brady for (t = 0; t < toplevels; t++) { 1579663207adSDon Brady uint64_t log = B_FALSE; 1580663207adSDon Brady 1581663207adSDon Brady (void) nvlist_lookup_uint64(top[t], ZPOOL_CONFIG_IS_LOG, &log); 1582663207adSDon Brady if (log) 1583663207adSDon Brady continue; 1584663207adSDon Brady if (nvlist_exists(top[t], ZPOOL_CONFIG_ALLOCATION_BIAS)) 1585663207adSDon Brady continue; 1586663207adSDon Brady 1587663207adSDon Brady normal++; 1588663207adSDon Brady } 1589663207adSDon Brady 1590663207adSDon Brady return (normal); 1591663207adSDon Brady } 1592663207adSDon Brady 1593fa9e4066Sahrens /* 1594fa9e4066Sahrens * Get and validate the contents of the given vdev specification. This ensures 1595fa9e4066Sahrens * that the nvlist returned is well-formed, that all the devices exist, and that 1596fa9e4066Sahrens * they are not currently in use by any other known consumer. The 'poolconfig' 1597fa9e4066Sahrens * parameter is the current configuration of the pool when adding devices 1598fa9e4066Sahrens * existing pool, and is used to perform additional checks, such as changing the 1599fa9e4066Sahrens * replication level of the pool. It can be 'NULL' to indicate that this is a 1600fa9e4066Sahrens * new pool. The 'force' flag controls whether devices should be forcefully 1601fa9e4066Sahrens * added, even if they appear in use. 1602fa9e4066Sahrens */ 1603fa9e4066Sahrens nvlist_t * 16048488aeb5Staylor make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, 16057855d95bSToomas Soome boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type, 16067855d95bSToomas Soome uint64_t boot_size, int argc, char **argv) 1607fa9e4066Sahrens { 1608fa9e4066Sahrens nvlist_t *newroot; 16098488aeb5Staylor nvlist_t *poolconfig = NULL; 1610fa9e4066Sahrens is_force = force; 1611fa9e4066Sahrens 1612fa9e4066Sahrens /* 1613fa9e4066Sahrens * Construct the vdev specification. If this is successful, we know 1614fa9e4066Sahrens * that we have a valid specification, and that all devices can be 1615fa9e4066Sahrens * opened. 1616fa9e4066Sahrens */ 1617fa9e4066Sahrens if ((newroot = construct_spec(argc, argv)) == NULL) 1618fa9e4066Sahrens return (NULL); 1619fa9e4066Sahrens 16208488aeb5Staylor if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL)) 16218488aeb5Staylor return (NULL); 16228488aeb5Staylor 1623fa9e4066Sahrens /* 1624fa9e4066Sahrens * Validate each device to make sure that its not shared with another 1625fa9e4066Sahrens * subsystem. We do this even if 'force' is set, because there are some 1626fa9e4066Sahrens * uses (such as a dedicated dump device) that even '-f' cannot 1627fa9e4066Sahrens * override. 1628fa9e4066Sahrens */ 162975fbdf9bSBasil Crow if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) { 1630fa9e4066Sahrens nvlist_free(newroot); 1631fa9e4066Sahrens return (NULL); 1632fa9e4066Sahrens } 1633fa9e4066Sahrens 1634fa9e4066Sahrens /* 1635fa9e4066Sahrens * Check the replication level of the given vdevs and report any errors 1636fa9e4066Sahrens * found. We include the existing pool spec, if any, as we need to 1637fa9e4066Sahrens * catch changes against the existing replication level. 1638fa9e4066Sahrens */ 1639fa9e4066Sahrens if (check_rep && check_replication(poolconfig, newroot) != 0) { 1640fa9e4066Sahrens nvlist_free(newroot); 1641fa9e4066Sahrens return (NULL); 1642fa9e4066Sahrens } 1643fa9e4066Sahrens 1644663207adSDon Brady /* 1645663207adSDon Brady * On pool create the new vdev spec must have one normal vdev. 1646663207adSDon Brady */ 1647663207adSDon Brady if (poolconfig == NULL && num_normal_vdevs(newroot) == 0) { 1648663207adSDon Brady vdev_error(gettext("at least one general top-level vdev must " 1649663207adSDon Brady "be specified\n")); 1650663207adSDon Brady nvlist_free(newroot); 1651663207adSDon Brady return (NULL); 1652663207adSDon Brady } 1653663207adSDon Brady 1654fa9e4066Sahrens /* 1655fa9e4066Sahrens * Run through the vdev specification and label any whole disks found. 1656fa9e4066Sahrens */ 16577855d95bSToomas Soome if (!dryrun && make_disks(zhp, newroot, boot_type, boot_size) != 0) { 1658fa9e4066Sahrens nvlist_free(newroot); 1659fa9e4066Sahrens return (NULL); 1660fa9e4066Sahrens } 1661fa9e4066Sahrens 1662fa9e4066Sahrens return (newroot); 1663fa9e4066Sahrens } 1664