1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 2199653d4eSeschrock 22fa9e4066Sahrens /* 233f9d6ad7SLin Ling * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24663207adSDon Brady * Copyright (c) 2013, 2018 by Delphix. All rights reserved. 25663207adSDon Brady * Copyright (c) 2016, 2017 Intel Corporation. 26b327cd3fSIgor Kozhukhov * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. 27fa9e4066Sahrens */ 28fa9e4066Sahrens 29fa9e4066Sahrens /* 30fa9e4066Sahrens * Functions to convert between a list of vdevs and an nvlist representing the 31fa9e4066Sahrens * configuration. Each entry in the list can be one of: 32fa9e4066Sahrens * 338a5bcf73SToomas Soome * Device vdevs 348a5bcf73SToomas Soome * disk=(path=..., devid=...) 358a5bcf73SToomas Soome * file=(path=...) 36fa9e4066Sahrens * 378a5bcf73SToomas Soome * Group vdevs 388a5bcf73SToomas Soome * raidz[1|2]=(...) 398a5bcf73SToomas Soome * mirror=(...) 40fa9e4066Sahrens * 418a5bcf73SToomas Soome * Hot spares 4299653d4eSeschrock * 43fa9e4066Sahrens * While the underlying implementation supports it, group vdevs cannot contain 44fa9e4066Sahrens * other group vdevs. All userland verification of devices is contained within 45fa9e4066Sahrens * this file. If successful, the nvlist returned can be passed directly to the 46fa9e4066Sahrens * kernel; we've done as much verification as possible in userland. 47fa9e4066Sahrens * 4899653d4eSeschrock * Hot spares are a special case, and passed down as an array of disk vdevs, at 4999653d4eSeschrock * the same level as the root of the vdev tree. 5099653d4eSeschrock * 518488aeb5Staylor * The only function exported by this file is 'make_root_vdev'. The 528488aeb5Staylor * function performs several passes: 53fa9e4066Sahrens * 548a5bcf73SToomas Soome * 1. Construct the vdev specification. Performs syntax validation and 55fa9e4066Sahrens * makes sure each device is valid. 568a5bcf73SToomas Soome * 2. Check for devices in use. Using libdiskmgt, makes sure that no 57fa9e4066Sahrens * devices are also in use. Some can be overridden using the 'force' 58fa9e4066Sahrens * flag, others cannot. 598a5bcf73SToomas Soome * 3. Check for replication errors if the 'force' flag is not specified. 60fa9e4066Sahrens * validates that the replication level is consistent across the 61fa9e4066Sahrens * entire pool. 628a5bcf73SToomas Soome * 4. Call libzfs to label any whole disks with an EFI label. 63fa9e4066Sahrens */ 64fa9e4066Sahrens 65fa9e4066Sahrens #include <assert.h> 66fa9e4066Sahrens #include <devid.h> 67fa9e4066Sahrens #include <errno.h> 68fa9e4066Sahrens #include <fcntl.h> 69fa9e4066Sahrens #include <libdiskmgt.h> 70fa9e4066Sahrens #include <libintl.h> 71fa9e4066Sahrens #include <libnvpair.h> 72*d8ab6e12SDon Brady #include <libzutil.h> 73f94275ceSAdam Leventhal #include <limits.h> 745711d393Sloli #include <sys/spa.h> 75fa9e4066Sahrens #include <stdio.h> 76fa9e4066Sahrens #include <string.h> 77fa9e4066Sahrens #include <unistd.h> 78fa9e4066Sahrens #include <sys/efi_partition.h> 79fa9e4066Sahrens #include <sys/stat.h> 80fa9e4066Sahrens #include <sys/vtoc.h> 81fa9e4066Sahrens #include <sys/mntent.h> 82fa9e4066Sahrens 83fa9e4066Sahrens #include "zpool_util.h" 84fa9e4066Sahrens 85fa9e4066Sahrens #define BACKUP_SLICE "s2" 86fa9e4066Sahrens 87fa9e4066Sahrens /* 88fa9e4066Sahrens * For any given vdev specification, we can have multiple errors. The 89fa9e4066Sahrens * vdev_error() function keeps track of whether we have seen an error yet, and 90fa9e4066Sahrens * prints out a header if its the first error we've seen. 91fa9e4066Sahrens */ 9299653d4eSeschrock boolean_t error_seen; 9399653d4eSeschrock boolean_t is_force; 94fa9e4066Sahrens 9599653d4eSeschrock /*PRINTFLIKE1*/ 9699653d4eSeschrock static void 97fa9e4066Sahrens vdev_error(const char *fmt, ...) 98fa9e4066Sahrens { 99fa9e4066Sahrens va_list ap; 100fa9e4066Sahrens 101fa9e4066Sahrens if (!error_seen) { 102fa9e4066Sahrens (void) fprintf(stderr, gettext("invalid vdev specification\n")); 103fa9e4066Sahrens if (!is_force) 104fa9e4066Sahrens (void) fprintf(stderr, gettext("use '-f' to override " 105fa9e4066Sahrens "the following errors:\n")); 106fa9e4066Sahrens else 107fa9e4066Sahrens (void) fprintf(stderr, gettext("the following errors " 108fa9e4066Sahrens "must be manually repaired:\n")); 10999653d4eSeschrock error_seen = B_TRUE; 110fa9e4066Sahrens } 111fa9e4066Sahrens 112fa9e4066Sahrens va_start(ap, fmt); 113fa9e4066Sahrens (void) vfprintf(stderr, fmt, ap); 114fa9e4066Sahrens va_end(ap); 115fa9e4066Sahrens } 116fa9e4066Sahrens 11746a2abf2Seschrock static void 11846a2abf2Seschrock libdiskmgt_error(int error) 119fa9e4066Sahrens { 120ea8dc4b6Seschrock /* 12199653d4eSeschrock * ENXIO/ENODEV is a valid error message if the device doesn't live in 122ea8dc4b6Seschrock * /dev/dsk. Don't bother printing an error message in this case. 123ea8dc4b6Seschrock */ 12499653d4eSeschrock if (error == ENXIO || error == ENODEV) 125ea8dc4b6Seschrock return; 126ea8dc4b6Seschrock 12746a2abf2Seschrock (void) fprintf(stderr, gettext("warning: device in use checking " 12846a2abf2Seschrock "failed: %s\n"), strerror(error)); 129fa9e4066Sahrens } 130fa9e4066Sahrens 131fa9e4066Sahrens /* 13246a2abf2Seschrock * Validate a device, passing the bulk of the work off to libdiskmgt. 133fa9e4066Sahrens */ 1348488aeb5Staylor static int 13599653d4eSeschrock check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare) 136fa9e4066Sahrens { 13746a2abf2Seschrock char *msg; 13846a2abf2Seschrock int error = 0; 13903a818bcSmmusante dm_who_type_t who; 140fa9e4066Sahrens 14103a818bcSmmusante if (force) 14203a818bcSmmusante who = DM_WHO_ZPOOL_FORCE; 14303a818bcSmmusante else if (isspare) 14403a818bcSmmusante who = DM_WHO_ZPOOL_SPARE; 14503a818bcSmmusante else 14603a818bcSmmusante who = DM_WHO_ZPOOL; 14703a818bcSmmusante 14803a818bcSmmusante if (dm_inuse((char *)path, &msg, who, &error) || error) { 14946a2abf2Seschrock if (error != 0) { 15046a2abf2Seschrock libdiskmgt_error(error); 15146a2abf2Seschrock return (0); 15246657f8dSmmusante } else { 15346a2abf2Seschrock vdev_error("%s", msg); 15446a2abf2Seschrock free(msg); 155181c2f42Smmusante return (-1); 156fa9e4066Sahrens } 157fa9e4066Sahrens } 158fa9e4066Sahrens 159fa9e4066Sahrens /* 16046a2abf2Seschrock * If we're given a whole disk, ignore overlapping slices since we're 16146a2abf2Seschrock * about to label it anyway. 162fa9e4066Sahrens */ 16346a2abf2Seschrock error = 0; 16446a2abf2Seschrock if (!wholedisk && !force && 16546a2abf2Seschrock (dm_isoverlapping((char *)path, &msg, &error) || error)) { 166181c2f42Smmusante if (error == 0) { 167181c2f42Smmusante /* dm_isoverlapping returned -1 */ 168181c2f42Smmusante vdev_error(gettext("%s overlaps with %s\n"), path, msg); 169181c2f42Smmusante free(msg); 170181c2f42Smmusante return (-1); 171181c2f42Smmusante } else if (error != ENODEV) { 172181c2f42Smmusante /* libdiskmgt's devcache only handles physical drives */ 17346a2abf2Seschrock libdiskmgt_error(error); 17446a2abf2Seschrock return (0); 175fa9e4066Sahrens } 17646a2abf2Seschrock } 177fa9e4066Sahrens 178181c2f42Smmusante return (0); 179fa9e4066Sahrens } 180fa9e4066Sahrens 1818488aeb5Staylor 182fa9e4066Sahrens /* 183fa9e4066Sahrens * Validate a whole disk. Iterate over all slices on the disk and make sure 184fa9e4066Sahrens * that none is in use by calling check_slice(). 185fa9e4066Sahrens */ 1868488aeb5Staylor static int 18799653d4eSeschrock check_disk(const char *name, dm_descriptor_t disk, int force, int isspare) 188fa9e4066Sahrens { 189fa9e4066Sahrens dm_descriptor_t *drive, *media, *slice; 190fa9e4066Sahrens int err = 0; 191fa9e4066Sahrens int i; 192fa9e4066Sahrens int ret; 193fa9e4066Sahrens 194fa9e4066Sahrens /* 195fa9e4066Sahrens * Get the drive associated with this disk. This should never fail, 196fa9e4066Sahrens * because we already have an alias handle open for the device. 197fa9e4066Sahrens */ 198fa9e4066Sahrens if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE, 1998a5bcf73SToomas Soome &err)) == NULL || *drive == 0) { 20046a2abf2Seschrock if (err) 20146a2abf2Seschrock libdiskmgt_error(err); 20246a2abf2Seschrock return (0); 20346a2abf2Seschrock } 204fa9e4066Sahrens 205fa9e4066Sahrens if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA, 20646a2abf2Seschrock &err)) == NULL) { 20746a2abf2Seschrock dm_free_descriptors(drive); 20846a2abf2Seschrock if (err) 20946a2abf2Seschrock libdiskmgt_error(err); 21046a2abf2Seschrock return (0); 21146a2abf2Seschrock } 212fa9e4066Sahrens 213fa9e4066Sahrens dm_free_descriptors(drive); 214fa9e4066Sahrens 215fa9e4066Sahrens /* 216fa9e4066Sahrens * It is possible that the user has specified a removable media drive, 217fa9e4066Sahrens * and the media is not present. 218fa9e4066Sahrens */ 2198a5bcf73SToomas Soome if (*media == 0) { 220fa9e4066Sahrens dm_free_descriptors(media); 22146a2abf2Seschrock vdev_error(gettext("'%s' has no media in drive\n"), name); 222fa9e4066Sahrens return (-1); 223fa9e4066Sahrens } 224fa9e4066Sahrens 225fa9e4066Sahrens if ((slice = dm_get_associated_descriptors(*media, DM_SLICE, 22646a2abf2Seschrock &err)) == NULL) { 22746a2abf2Seschrock dm_free_descriptors(media); 22846a2abf2Seschrock if (err) 22946a2abf2Seschrock libdiskmgt_error(err); 23046a2abf2Seschrock return (0); 23146a2abf2Seschrock } 232fa9e4066Sahrens 233fa9e4066Sahrens dm_free_descriptors(media); 234fa9e4066Sahrens 235fa9e4066Sahrens ret = 0; 236fa9e4066Sahrens 237fa9e4066Sahrens /* 238fa9e4066Sahrens * Iterate over all slices and report any errors. We don't care about 239fa9e4066Sahrens * overlapping slices because we are using the whole disk. 240fa9e4066Sahrens */ 2418a5bcf73SToomas Soome for (i = 0; slice[i] != 0; i++) { 24299653d4eSeschrock char *name = dm_get_name(slice[i], &err); 24399653d4eSeschrock 24499653d4eSeschrock if (check_slice(name, force, B_TRUE, isspare) != 0) 245fa9e4066Sahrens ret = -1; 24699653d4eSeschrock 24799653d4eSeschrock dm_free_name(name); 248fa9e4066Sahrens } 249fa9e4066Sahrens 250fa9e4066Sahrens dm_free_descriptors(slice); 251fa9e4066Sahrens return (ret); 252fa9e4066Sahrens } 253fa9e4066Sahrens 254fa9e4066Sahrens /* 25546a2abf2Seschrock * Validate a device. 256fa9e4066Sahrens */ 2578488aeb5Staylor static int 25899653d4eSeschrock check_device(const char *path, boolean_t force, boolean_t isspare) 259fa9e4066Sahrens { 260fa9e4066Sahrens dm_descriptor_t desc; 261fa9e4066Sahrens int err; 26246a2abf2Seschrock char *dev; 263fa9e4066Sahrens 264fa9e4066Sahrens /* 265fa9e4066Sahrens * For whole disks, libdiskmgt does not include the leading dev path. 266fa9e4066Sahrens */ 267fa9e4066Sahrens dev = strrchr(path, '/'); 268fa9e4066Sahrens assert(dev != NULL); 269fa9e4066Sahrens dev++; 2708a5bcf73SToomas Soome if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != 0) { 27199653d4eSeschrock err = check_disk(path, desc, force, isspare); 27246a2abf2Seschrock dm_free_descriptor(desc); 27346a2abf2Seschrock return (err); 274fa9e4066Sahrens } 275fa9e4066Sahrens 27699653d4eSeschrock return (check_slice(path, force, B_FALSE, isspare)); 277fa9e4066Sahrens } 278fa9e4066Sahrens 279fa9e4066Sahrens /* 280fa9e4066Sahrens * Check that a file is valid. All we can do in this case is check that it's 281181c2f42Smmusante * not in use by another pool, and not in use by swap. 282fa9e4066Sahrens */ 2838488aeb5Staylor static int 28499653d4eSeschrock check_file(const char *file, boolean_t force, boolean_t isspare) 285fa9e4066Sahrens { 28646a2abf2Seschrock char *name; 287fa9e4066Sahrens int fd; 288fa9e4066Sahrens int ret = 0; 289181c2f42Smmusante int err; 29046a2abf2Seschrock pool_state_t state; 29199653d4eSeschrock boolean_t inuse; 292fa9e4066Sahrens 293181c2f42Smmusante if (dm_inuse_swap(file, &err)) { 294181c2f42Smmusante if (err) 295181c2f42Smmusante libdiskmgt_error(err); 296181c2f42Smmusante else 297181c2f42Smmusante vdev_error(gettext("%s is currently used by swap. " 298181c2f42Smmusante "Please see swap(1M).\n"), file); 299181c2f42Smmusante return (-1); 300181c2f42Smmusante } 301181c2f42Smmusante 302fa9e4066Sahrens if ((fd = open(file, O_RDONLY)) < 0) 303fa9e4066Sahrens return (0); 304fa9e4066Sahrens 30599653d4eSeschrock if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) { 30646a2abf2Seschrock const char *desc; 30746a2abf2Seschrock 30846a2abf2Seschrock switch (state) { 30946a2abf2Seschrock case POOL_STATE_ACTIVE: 31046a2abf2Seschrock desc = gettext("active"); 31146a2abf2Seschrock break; 31246a2abf2Seschrock 31346a2abf2Seschrock case POOL_STATE_EXPORTED: 31446a2abf2Seschrock desc = gettext("exported"); 31546a2abf2Seschrock break; 31646a2abf2Seschrock 31746a2abf2Seschrock case POOL_STATE_POTENTIALLY_ACTIVE: 31846a2abf2Seschrock desc = gettext("potentially active"); 31946a2abf2Seschrock break; 32046a2abf2Seschrock 32146a2abf2Seschrock default: 32246a2abf2Seschrock desc = gettext("unknown"); 32346a2abf2Seschrock break; 32446a2abf2Seschrock } 32546a2abf2Seschrock 32699653d4eSeschrock /* 32799653d4eSeschrock * Allow hot spares to be shared between pools. 32899653d4eSeschrock */ 32999653d4eSeschrock if (state == POOL_STATE_SPARE && isspare) 33099653d4eSeschrock return (0); 33199653d4eSeschrock 33299653d4eSeschrock if (state == POOL_STATE_ACTIVE || 33399653d4eSeschrock state == POOL_STATE_SPARE || !force) { 33499653d4eSeschrock switch (state) { 33599653d4eSeschrock case POOL_STATE_SPARE: 33699653d4eSeschrock vdev_error(gettext("%s is reserved as a hot " 33799653d4eSeschrock "spare for pool %s\n"), file, name); 33899653d4eSeschrock break; 33999653d4eSeschrock default: 34099653d4eSeschrock vdev_error(gettext("%s is part of %s pool " 34199653d4eSeschrock "'%s'\n"), file, desc, name); 34299653d4eSeschrock break; 34399653d4eSeschrock } 344fa9e4066Sahrens ret = -1; 345fa9e4066Sahrens } 346fa9e4066Sahrens 347fa9e4066Sahrens free(name); 348fa9e4066Sahrens } 349fa9e4066Sahrens 350fa9e4066Sahrens (void) close(fd); 351fa9e4066Sahrens return (ret); 352fa9e4066Sahrens } 353fa9e4066Sahrens 3548488aeb5Staylor 3558488aeb5Staylor /* 3568488aeb5Staylor * By "whole disk" we mean an entire physical disk (something we can 3578488aeb5Staylor * label, toggle the write cache on, etc.) as opposed to the full 3588488aeb5Staylor * capacity of a pseudo-device such as lofi or did. We act as if we 3598488aeb5Staylor * are labeling the disk, which should be a pretty good test of whether 3608488aeb5Staylor * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if 3618488aeb5Staylor * it isn't. 3628488aeb5Staylor */ 36399653d4eSeschrock static boolean_t 3648488aeb5Staylor is_whole_disk(const char *arg) 365fa9e4066Sahrens { 3668488aeb5Staylor struct dk_gpt *label; 3678488aeb5Staylor int fd; 3688488aeb5Staylor char path[MAXPATHLEN]; 369fa9e4066Sahrens 3708488aeb5Staylor (void) snprintf(path, sizeof (path), "%s%s%s", 3716401734dSWill Andrews ZFS_RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE); 3728488aeb5Staylor if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) 3738488aeb5Staylor return (B_FALSE); 3748488aeb5Staylor if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { 3758488aeb5Staylor (void) close(fd); 3768488aeb5Staylor return (B_FALSE); 3778488aeb5Staylor } 3788488aeb5Staylor efi_free(label); 3798488aeb5Staylor (void) close(fd); 3808488aeb5Staylor return (B_TRUE); 381fa9e4066Sahrens } 382fa9e4066Sahrens 383fa9e4066Sahrens /* 384fa9e4066Sahrens * Create a leaf vdev. Determine if this is a file or a device. If it's a 385fa9e4066Sahrens * device, fill in the device id to make a complete nvlist. Valid forms for a 386fa9e4066Sahrens * leaf vdev are: 387fa9e4066Sahrens * 3888a5bcf73SToomas Soome * /dev/dsk/xxx Complete disk path 3898a5bcf73SToomas Soome * /xxx Full path to file 3908a5bcf73SToomas Soome * xxx Shorthand for /dev/dsk/xxx 391fa9e4066Sahrens */ 3928488aeb5Staylor static nvlist_t * 3935711d393Sloli make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log) 394fa9e4066Sahrens { 395fa9e4066Sahrens char path[MAXPATHLEN]; 396fa9e4066Sahrens struct stat64 statbuf; 397fa9e4066Sahrens nvlist_t *vdev = NULL; 398fa9e4066Sahrens char *type = NULL; 39999653d4eSeschrock boolean_t wholedisk = B_FALSE; 4005711d393Sloli uint64_t ashift = 0; 401fa9e4066Sahrens 402fa9e4066Sahrens /* 403fa9e4066Sahrens * Determine what type of vdev this is, and put the full path into 404fa9e4066Sahrens * 'path'. We detect whether this is a device of file afterwards by 405fa9e4066Sahrens * checking the st_mode of the file. 406fa9e4066Sahrens */ 407fa9e4066Sahrens if (arg[0] == '/') { 408fa9e4066Sahrens /* 409fa9e4066Sahrens * Complete device or file path. Exact type is determined by 410fa9e4066Sahrens * examining the file descriptor afterwards. 411fa9e4066Sahrens */ 4128488aeb5Staylor wholedisk = is_whole_disk(arg); 4138488aeb5Staylor if (!wholedisk && (stat64(arg, &statbuf) != 0)) { 414fa9e4066Sahrens (void) fprintf(stderr, 415fa9e4066Sahrens gettext("cannot open '%s': %s\n"), 416fa9e4066Sahrens arg, strerror(errno)); 417fa9e4066Sahrens return (NULL); 418fa9e4066Sahrens } 419fa9e4066Sahrens 420fa9e4066Sahrens (void) strlcpy(path, arg, sizeof (path)); 421fa9e4066Sahrens } else { 422fa9e4066Sahrens /* 423fa9e4066Sahrens * This may be a short path for a device, or it could be total 424fa9e4066Sahrens * gibberish. Check to see if it's a known device in 425fa9e4066Sahrens * /dev/dsk/. As part of this check, see if we've been given a 426fa9e4066Sahrens * an entire disk (minus the slice number). 427fa9e4066Sahrens */ 4286401734dSWill Andrews (void) snprintf(path, sizeof (path), "%s/%s", ZFS_DISK_ROOT, 429fa9e4066Sahrens arg); 4308488aeb5Staylor wholedisk = is_whole_disk(path); 4318488aeb5Staylor if (!wholedisk && (stat64(path, &statbuf) != 0)) { 432fa9e4066Sahrens /* 433fa9e4066Sahrens * If we got ENOENT, then the user gave us 434fa9e4066Sahrens * gibberish, so try to direct them with a 435fa9e4066Sahrens * reasonable error message. Otherwise, 436fa9e4066Sahrens * regurgitate strerror() since it's the best we 437fa9e4066Sahrens * can do. 438fa9e4066Sahrens */ 439fa9e4066Sahrens if (errno == ENOENT) { 440fa9e4066Sahrens (void) fprintf(stderr, 441fa9e4066Sahrens gettext("cannot open '%s': no such " 4426401734dSWill Andrews "device in %s\n"), arg, ZFS_DISK_ROOT); 443fa9e4066Sahrens (void) fprintf(stderr, 444fa9e4066Sahrens gettext("must be a full path or " 445fa9e4066Sahrens "shorthand device name\n")); 446fa9e4066Sahrens return (NULL); 447fa9e4066Sahrens } else { 448fa9e4066Sahrens (void) fprintf(stderr, 449fa9e4066Sahrens gettext("cannot open '%s': %s\n"), 450fa9e4066Sahrens path, strerror(errno)); 451fa9e4066Sahrens return (NULL); 452fa9e4066Sahrens } 453fa9e4066Sahrens } 454fa9e4066Sahrens } 455fa9e4066Sahrens 456fa9e4066Sahrens /* 457fa9e4066Sahrens * Determine whether this is a device or a file. 458fa9e4066Sahrens */ 4598488aeb5Staylor if (wholedisk || S_ISBLK(statbuf.st_mode)) { 460fa9e4066Sahrens type = VDEV_TYPE_DISK; 461fa9e4066Sahrens } else if (S_ISREG(statbuf.st_mode)) { 462fa9e4066Sahrens type = VDEV_TYPE_FILE; 463fa9e4066Sahrens } else { 464fa9e4066Sahrens (void) fprintf(stderr, gettext("cannot use '%s': must be a " 465fa9e4066Sahrens "block device or regular file\n"), path); 466fa9e4066Sahrens return (NULL); 467fa9e4066Sahrens } 468fa9e4066Sahrens 469fa9e4066Sahrens /* 470fa9e4066Sahrens * Finally, we have the complete device or file, and we know that it is 471fa9e4066Sahrens * acceptable to use. Construct the nvlist to describe this vdev. All 472fa9e4066Sahrens * vdevs have a 'path' element, and devices also have a 'devid' element. 473fa9e4066Sahrens */ 474fa9e4066Sahrens verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0); 475fa9e4066Sahrens verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0); 476fa9e4066Sahrens verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0); 4778654d025Sperrin verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0); 478663207adSDon Brady if (is_log) 479663207adSDon Brady verify(nvlist_add_string(vdev, ZPOOL_CONFIG_ALLOCATION_BIAS, 480663207adSDon Brady VDEV_ALLOC_BIAS_LOG) == 0); 481afefbcddSeschrock if (strcmp(type, VDEV_TYPE_DISK) == 0) 482afefbcddSeschrock verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, 483afefbcddSeschrock (uint64_t)wholedisk) == 0); 484fa9e4066Sahrens 4855711d393Sloli if (props != NULL) { 4865711d393Sloli char *value = NULL; 4875711d393Sloli 4885711d393Sloli if (nvlist_lookup_string(props, 4895711d393Sloli zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) { 4905711d393Sloli if (zfs_nicestrtonum(NULL, value, &ashift) != 0) { 4915711d393Sloli (void) fprintf(stderr, 4925711d393Sloli gettext("ashift must be a number.\n")); 4935711d393Sloli return (NULL); 4945711d393Sloli } 4955711d393Sloli if (ashift != 0 && 4965711d393Sloli (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) { 4975711d393Sloli (void) fprintf(stderr, 4985711d393Sloli gettext("invalid 'ashift=%" PRIu64 "' " 4995711d393Sloli "property: only values between %" PRId32 " " 5005711d393Sloli "and %" PRId32 " are allowed.\n"), 5015711d393Sloli ashift, ASHIFT_MIN, ASHIFT_MAX); 5025711d393Sloli return (NULL); 5035711d393Sloli } 5045711d393Sloli } 5055711d393Sloli } 5065711d393Sloli 507fa9e4066Sahrens /* 508fa9e4066Sahrens * For a whole disk, defer getting its devid until after labeling it. 509fa9e4066Sahrens */ 510fa9e4066Sahrens if (S_ISBLK(statbuf.st_mode) && !wholedisk) { 511fa9e4066Sahrens /* 512fa9e4066Sahrens * Get the devid for the device. 513fa9e4066Sahrens */ 514fa9e4066Sahrens int fd; 515fa9e4066Sahrens ddi_devid_t devid; 516fa9e4066Sahrens char *minor = NULL, *devid_str = NULL; 517fa9e4066Sahrens 518fa9e4066Sahrens if ((fd = open(path, O_RDONLY)) < 0) { 519fa9e4066Sahrens (void) fprintf(stderr, gettext("cannot open '%s': " 520fa9e4066Sahrens "%s\n"), path, strerror(errno)); 521fa9e4066Sahrens nvlist_free(vdev); 522fa9e4066Sahrens return (NULL); 523fa9e4066Sahrens } 524fa9e4066Sahrens 525fa9e4066Sahrens if (devid_get(fd, &devid) == 0) { 526fa9e4066Sahrens if (devid_get_minor_name(fd, &minor) == 0 && 527fa9e4066Sahrens (devid_str = devid_str_encode(devid, minor)) != 528fa9e4066Sahrens NULL) { 529fa9e4066Sahrens verify(nvlist_add_string(vdev, 530fa9e4066Sahrens ZPOOL_CONFIG_DEVID, devid_str) == 0); 531fa9e4066Sahrens } 532fa9e4066Sahrens if (devid_str != NULL) 533fa9e4066Sahrens devid_str_free(devid_str); 534fa9e4066Sahrens if (minor != NULL) 535fa9e4066Sahrens devid_str_free(minor); 536fa9e4066Sahrens devid_free(devid); 537fa9e4066Sahrens } 538fa9e4066Sahrens 539fa9e4066Sahrens (void) close(fd); 540fa9e4066Sahrens } 541fa9e4066Sahrens 5425711d393Sloli if (ashift > 0) 5435711d393Sloli (void) nvlist_add_uint64(vdev, ZPOOL_CONFIG_ASHIFT, ashift); 5445711d393Sloli 545fa9e4066Sahrens return (vdev); 546fa9e4066Sahrens } 547fa9e4066Sahrens 548fa9e4066Sahrens /* 549fa9e4066Sahrens * Go through and verify the replication level of the pool is consistent. 550fa9e4066Sahrens * Performs the following checks: 551fa9e4066Sahrens * 5528a5bcf73SToomas Soome * For the new spec, verifies that devices in mirrors and raidz are the 5538a5bcf73SToomas Soome * same size. 554fa9e4066Sahrens * 5558a5bcf73SToomas Soome * If the current configuration already has inconsistent replication 5568a5bcf73SToomas Soome * levels, ignore any other potential problems in the new spec. 557fa9e4066Sahrens * 5588a5bcf73SToomas Soome * Otherwise, make sure that the current spec (if there is one) and the new 5598a5bcf73SToomas Soome * spec have consistent replication levels. 560663207adSDon Brady * 561663207adSDon Brady * If there is no current spec (create), make sure new spec has at least 562663207adSDon Brady * one general purpose vdev. 563fa9e4066Sahrens */ 564fa9e4066Sahrens typedef struct replication_level { 56599653d4eSeschrock char *zprl_type; 56699653d4eSeschrock uint64_t zprl_children; 56799653d4eSeschrock uint64_t zprl_parity; 568fa9e4066Sahrens } replication_level_t; 569fa9e4066Sahrens 5708488aeb5Staylor #define ZPOOL_FUZZ (16 * 1024 * 1024) 5718488aeb5Staylor 572663207adSDon Brady static boolean_t 573663207adSDon Brady is_raidz_mirror(replication_level_t *a, replication_level_t *b, 574663207adSDon Brady replication_level_t **raidz, replication_level_t **mirror) 575663207adSDon Brady { 576663207adSDon Brady if (strcmp(a->zprl_type, "raidz") == 0 && 577663207adSDon Brady strcmp(b->zprl_type, "mirror") == 0) { 578663207adSDon Brady *raidz = a; 579663207adSDon Brady *mirror = b; 580663207adSDon Brady return (B_TRUE); 581663207adSDon Brady } 582663207adSDon Brady return (B_FALSE); 583663207adSDon Brady } 584663207adSDon Brady 585fa9e4066Sahrens /* 586fa9e4066Sahrens * Given a list of toplevel vdevs, return the current replication level. If 587fa9e4066Sahrens * the config is inconsistent, then NULL is returned. If 'fatal' is set, then 588fa9e4066Sahrens * an error message will be displayed for each self-inconsistent vdev. 589fa9e4066Sahrens */ 5908488aeb5Staylor static replication_level_t * 59199653d4eSeschrock get_replication(nvlist_t *nvroot, boolean_t fatal) 592fa9e4066Sahrens { 593fa9e4066Sahrens nvlist_t **top; 594fa9e4066Sahrens uint_t t, toplevels; 595fa9e4066Sahrens nvlist_t **child; 596fa9e4066Sahrens uint_t c, children; 597fa9e4066Sahrens nvlist_t *nv; 598fa9e4066Sahrens char *type; 599b327cd3fSIgor Kozhukhov replication_level_t lastrep = {0}; 600b327cd3fSIgor Kozhukhov replication_level_t rep; 601b327cd3fSIgor Kozhukhov replication_level_t *ret; 602663207adSDon Brady replication_level_t *raidz, *mirror; 60399653d4eSeschrock boolean_t dontreport; 604fa9e4066Sahrens 605fa9e4066Sahrens ret = safe_malloc(sizeof (replication_level_t)); 606fa9e4066Sahrens 607fa9e4066Sahrens verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 608fa9e4066Sahrens &top, &toplevels) == 0); 609fa9e4066Sahrens 610fa9e4066Sahrens for (t = 0; t < toplevels; t++) { 6118654d025Sperrin uint64_t is_log = B_FALSE; 6128654d025Sperrin 613fa9e4066Sahrens nv = top[t]; 614fa9e4066Sahrens 6158654d025Sperrin /* 6168654d025Sperrin * For separate logs we ignore the top level vdev replication 6178654d025Sperrin * constraints. 6188654d025Sperrin */ 6198654d025Sperrin (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); 6208654d025Sperrin if (is_log) 6218654d025Sperrin continue; 6228654d025Sperrin 6238654d025Sperrin verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, 6248654d025Sperrin &type) == 0); 625fa9e4066Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 626fa9e4066Sahrens &child, &children) != 0) { 627fa9e4066Sahrens /* 628fa9e4066Sahrens * This is a 'file' or 'disk' vdev. 629fa9e4066Sahrens */ 63099653d4eSeschrock rep.zprl_type = type; 63199653d4eSeschrock rep.zprl_children = 1; 63299653d4eSeschrock rep.zprl_parity = 0; 633fa9e4066Sahrens } else { 634fa9e4066Sahrens uint64_t vdev_size; 635fa9e4066Sahrens 636fa9e4066Sahrens /* 637fa9e4066Sahrens * This is a mirror or RAID-Z vdev. Go through and make 638fa9e4066Sahrens * sure the contents are all the same (files vs. disks), 639fa9e4066Sahrens * keeping track of the number of elements in the 640fa9e4066Sahrens * process. 641fa9e4066Sahrens * 642fa9e4066Sahrens * We also check that the size of each vdev (if it can 643fa9e4066Sahrens * be determined) is the same. 644fa9e4066Sahrens */ 64599653d4eSeschrock rep.zprl_type = type; 64699653d4eSeschrock rep.zprl_children = 0; 64799653d4eSeschrock 64899653d4eSeschrock if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { 64999653d4eSeschrock verify(nvlist_lookup_uint64(nv, 65099653d4eSeschrock ZPOOL_CONFIG_NPARITY, 65199653d4eSeschrock &rep.zprl_parity) == 0); 65299653d4eSeschrock assert(rep.zprl_parity != 0); 65399653d4eSeschrock } else { 65499653d4eSeschrock rep.zprl_parity = 0; 65599653d4eSeschrock } 656fa9e4066Sahrens 657fa9e4066Sahrens /* 6588654d025Sperrin * The 'dontreport' variable indicates that we've 659fa9e4066Sahrens * already reported an error for this spec, so don't 660fa9e4066Sahrens * bother doing it again. 661fa9e4066Sahrens */ 662fa9e4066Sahrens type = NULL; 663fa9e4066Sahrens dontreport = 0; 664fa9e4066Sahrens vdev_size = -1ULL; 665fa9e4066Sahrens for (c = 0; c < children; c++) { 666fa9e4066Sahrens nvlist_t *cnv = child[c]; 667fa9e4066Sahrens char *path; 668fa9e4066Sahrens struct stat64 statbuf; 669fa9e4066Sahrens uint64_t size = -1ULL; 670fa9e4066Sahrens char *childtype; 671fa9e4066Sahrens int fd, err; 672fa9e4066Sahrens 67399653d4eSeschrock rep.zprl_children++; 674fa9e4066Sahrens 675fa9e4066Sahrens verify(nvlist_lookup_string(cnv, 676fa9e4066Sahrens ZPOOL_CONFIG_TYPE, &childtype) == 0); 67794de1d4cSeschrock 67894de1d4cSeschrock /* 6798654d025Sperrin * If this is a replacing or spare vdev, then 680ac0215f4Sloli * get the real first child of the vdev: do this 681ac0215f4Sloli * in a loop because replacing and spare vdevs 682ac0215f4Sloli * can be nested. 68394de1d4cSeschrock */ 684ac0215f4Sloli while (strcmp(childtype, 68594de1d4cSeschrock VDEV_TYPE_REPLACING) == 0 || 68694de1d4cSeschrock strcmp(childtype, VDEV_TYPE_SPARE) == 0) { 68794de1d4cSeschrock nvlist_t **rchild; 68894de1d4cSeschrock uint_t rchildren; 68994de1d4cSeschrock 69094de1d4cSeschrock verify(nvlist_lookup_nvlist_array(cnv, 69194de1d4cSeschrock ZPOOL_CONFIG_CHILDREN, &rchild, 69294de1d4cSeschrock &rchildren) == 0); 69394de1d4cSeschrock assert(rchildren == 2); 69494de1d4cSeschrock cnv = rchild[0]; 69594de1d4cSeschrock 69694de1d4cSeschrock verify(nvlist_lookup_string(cnv, 69794de1d4cSeschrock ZPOOL_CONFIG_TYPE, 69894de1d4cSeschrock &childtype) == 0); 69994de1d4cSeschrock } 70094de1d4cSeschrock 701fa9e4066Sahrens verify(nvlist_lookup_string(cnv, 702fa9e4066Sahrens ZPOOL_CONFIG_PATH, &path) == 0); 703fa9e4066Sahrens 704fa9e4066Sahrens /* 705fa9e4066Sahrens * If we have a raidz/mirror that combines disks 706fa9e4066Sahrens * with files, report it as an error. 707fa9e4066Sahrens */ 708fa9e4066Sahrens if (!dontreport && type != NULL && 709fa9e4066Sahrens strcmp(type, childtype) != 0) { 710fa9e4066Sahrens if (ret != NULL) 711fa9e4066Sahrens free(ret); 712fa9e4066Sahrens ret = NULL; 713fa9e4066Sahrens if (fatal) 714fa9e4066Sahrens vdev_error(gettext( 715fa9e4066Sahrens "mismatched replication " 716fa9e4066Sahrens "level: %s contains both " 717fa9e4066Sahrens "files and devices\n"), 71899653d4eSeschrock rep.zprl_type); 719fa9e4066Sahrens else 720fa9e4066Sahrens return (NULL); 72199653d4eSeschrock dontreport = B_TRUE; 722fa9e4066Sahrens } 723fa9e4066Sahrens 724fa9e4066Sahrens /* 725fa9e4066Sahrens * According to stat(2), the value of 'st_size' 726fa9e4066Sahrens * is undefined for block devices and character 727fa9e4066Sahrens * devices. But there is no effective way to 728fa9e4066Sahrens * determine the real size in userland. 729fa9e4066Sahrens * 730fa9e4066Sahrens * Instead, we'll take advantage of an 731fa9e4066Sahrens * implementation detail of spec_size(). If the 732fa9e4066Sahrens * device is currently open, then we (should) 733fa9e4066Sahrens * return a valid size. 734fa9e4066Sahrens * 735fa9e4066Sahrens * If we still don't get a valid size (indicated 736fa9e4066Sahrens * by a size of 0 or MAXOFFSET_T), then ignore 737fa9e4066Sahrens * this device altogether. 738fa9e4066Sahrens */ 739fa9e4066Sahrens if ((fd = open(path, O_RDONLY)) >= 0) { 740fa9e4066Sahrens err = fstat64(fd, &statbuf); 741fa9e4066Sahrens (void) close(fd); 742fa9e4066Sahrens } else { 743fa9e4066Sahrens err = stat64(path, &statbuf); 744fa9e4066Sahrens } 745fa9e4066Sahrens 746fa9e4066Sahrens if (err != 0 || 747fa9e4066Sahrens statbuf.st_size == 0 || 748fa9e4066Sahrens statbuf.st_size == MAXOFFSET_T) 749fa9e4066Sahrens continue; 750fa9e4066Sahrens 751fa9e4066Sahrens size = statbuf.st_size; 752fa9e4066Sahrens 753fa9e4066Sahrens /* 7548488aeb5Staylor * Also make sure that devices and 7558488aeb5Staylor * slices have a consistent size. If 7568488aeb5Staylor * they differ by a significant amount 7578488aeb5Staylor * (~16MB) then report an error. 758fa9e4066Sahrens */ 7598488aeb5Staylor if (!dontreport && 7608488aeb5Staylor (vdev_size != -1ULL && 7618488aeb5Staylor (labs(size - vdev_size) > 7628488aeb5Staylor ZPOOL_FUZZ))) { 763fa9e4066Sahrens if (ret != NULL) 764fa9e4066Sahrens free(ret); 765fa9e4066Sahrens ret = NULL; 766fa9e4066Sahrens if (fatal) 767fa9e4066Sahrens vdev_error(gettext( 768fa9e4066Sahrens "%s contains devices of " 769fa9e4066Sahrens "different sizes\n"), 77099653d4eSeschrock rep.zprl_type); 771fa9e4066Sahrens else 772fa9e4066Sahrens return (NULL); 77399653d4eSeschrock dontreport = B_TRUE; 774fa9e4066Sahrens } 775fa9e4066Sahrens 776fa9e4066Sahrens type = childtype; 777fa9e4066Sahrens vdev_size = size; 778fa9e4066Sahrens } 779fa9e4066Sahrens } 780fa9e4066Sahrens 781fa9e4066Sahrens /* 782fa9e4066Sahrens * At this point, we have the replication of the last toplevel 783663207adSDon Brady * vdev in 'rep'. Compare it to 'lastrep' to see if it is 784fa9e4066Sahrens * different. 785fa9e4066Sahrens */ 78699653d4eSeschrock if (lastrep.zprl_type != NULL) { 787663207adSDon Brady if (is_raidz_mirror(&lastrep, &rep, &raidz, &mirror) || 788663207adSDon Brady is_raidz_mirror(&rep, &lastrep, &raidz, &mirror)) { 789663207adSDon Brady /* 790663207adSDon Brady * Accepted raidz and mirror when they can 791663207adSDon Brady * handle the same number of disk failures. 792663207adSDon Brady */ 793663207adSDon Brady if (raidz->zprl_parity != 794663207adSDon Brady mirror->zprl_children - 1) { 795663207adSDon Brady if (ret != NULL) 796663207adSDon Brady free(ret); 797663207adSDon Brady ret = NULL; 798663207adSDon Brady if (fatal) 799663207adSDon Brady vdev_error(gettext( 800663207adSDon Brady "mismatched replication " 801663207adSDon Brady "level: " 802663207adSDon Brady "%s and %s vdevs with " 803663207adSDon Brady "different redundancy, " 804663207adSDon Brady "%llu vs. %llu (%llu-way) " 805663207adSDon Brady "are present\n"), 806663207adSDon Brady raidz->zprl_type, 807663207adSDon Brady mirror->zprl_type, 808663207adSDon Brady raidz->zprl_parity, 809663207adSDon Brady mirror->zprl_children - 1, 810663207adSDon Brady mirror->zprl_children); 811663207adSDon Brady else 812663207adSDon Brady return (NULL); 813663207adSDon Brady } 814663207adSDon Brady } else if (strcmp(lastrep.zprl_type, rep.zprl_type) != 815663207adSDon Brady 0) { 816fa9e4066Sahrens if (ret != NULL) 817fa9e4066Sahrens free(ret); 818fa9e4066Sahrens ret = NULL; 819fa9e4066Sahrens if (fatal) 820fa9e4066Sahrens vdev_error(gettext( 82199653d4eSeschrock "mismatched replication level: " 82299653d4eSeschrock "both %s and %s vdevs are " 823fa9e4066Sahrens "present\n"), 82499653d4eSeschrock lastrep.zprl_type, rep.zprl_type); 825fa9e4066Sahrens else 826fa9e4066Sahrens return (NULL); 82799653d4eSeschrock } else if (lastrep.zprl_parity != rep.zprl_parity) { 828fa9e4066Sahrens if (ret) 829fa9e4066Sahrens free(ret); 830fa9e4066Sahrens ret = NULL; 831fa9e4066Sahrens if (fatal) 832fa9e4066Sahrens vdev_error(gettext( 83399653d4eSeschrock "mismatched replication level: " 83499653d4eSeschrock "both %llu and %llu device parity " 83599653d4eSeschrock "%s vdevs are present\n"), 83699653d4eSeschrock lastrep.zprl_parity, 83799653d4eSeschrock rep.zprl_parity, 83899653d4eSeschrock rep.zprl_type); 83999653d4eSeschrock else 84099653d4eSeschrock return (NULL); 84199653d4eSeschrock } else if (lastrep.zprl_children != rep.zprl_children) { 84299653d4eSeschrock if (ret) 84399653d4eSeschrock free(ret); 84499653d4eSeschrock ret = NULL; 84599653d4eSeschrock if (fatal) 84699653d4eSeschrock vdev_error(gettext( 84799653d4eSeschrock "mismatched replication level: " 84899653d4eSeschrock "both %llu-way and %llu-way %s " 849fa9e4066Sahrens "vdevs are present\n"), 85099653d4eSeschrock lastrep.zprl_children, 85199653d4eSeschrock rep.zprl_children, 85299653d4eSeschrock rep.zprl_type); 853fa9e4066Sahrens else 854fa9e4066Sahrens return (NULL); 855fa9e4066Sahrens } 856fa9e4066Sahrens } 857fa9e4066Sahrens lastrep = rep; 858fa9e4066Sahrens } 859fa9e4066Sahrens 86099653d4eSeschrock if (ret != NULL) 86199653d4eSeschrock *ret = rep; 862fa9e4066Sahrens 863fa9e4066Sahrens return (ret); 864fa9e4066Sahrens } 865fa9e4066Sahrens 866fa9e4066Sahrens /* 867fa9e4066Sahrens * Check the replication level of the vdev spec against the current pool. Calls 868fa9e4066Sahrens * get_replication() to make sure the new spec is self-consistent. If the pool 869fa9e4066Sahrens * has a consistent replication level, then we ignore any errors. Otherwise, 870fa9e4066Sahrens * report any difference between the two. 871fa9e4066Sahrens */ 8728488aeb5Staylor static int 873fa9e4066Sahrens check_replication(nvlist_t *config, nvlist_t *newroot) 874fa9e4066Sahrens { 8758488aeb5Staylor nvlist_t **child; 8768488aeb5Staylor uint_t children; 877fa9e4066Sahrens replication_level_t *current = NULL, *new; 878663207adSDon Brady replication_level_t *raidz, *mirror; 879fa9e4066Sahrens int ret; 880fa9e4066Sahrens 881fa9e4066Sahrens /* 882fa9e4066Sahrens * If we have a current pool configuration, check to see if it's 883fa9e4066Sahrens * self-consistent. If not, simply return success. 884fa9e4066Sahrens */ 885fa9e4066Sahrens if (config != NULL) { 886fa9e4066Sahrens nvlist_t *nvroot; 887fa9e4066Sahrens 888fa9e4066Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 889fa9e4066Sahrens &nvroot) == 0); 89099653d4eSeschrock if ((current = get_replication(nvroot, B_FALSE)) == NULL) 891fa9e4066Sahrens return (0); 892fa9e4066Sahrens } 8938488aeb5Staylor /* 8948488aeb5Staylor * for spares there may be no children, and therefore no 8958488aeb5Staylor * replication level to check 8968488aeb5Staylor */ 8978488aeb5Staylor if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN, 8988488aeb5Staylor &child, &children) != 0) || (children == 0)) { 8998488aeb5Staylor free(current); 9008488aeb5Staylor return (0); 9018488aeb5Staylor } 902fa9e4066Sahrens 9038654d025Sperrin /* 9048654d025Sperrin * If all we have is logs then there's no replication level to check. 9058654d025Sperrin */ 9068654d025Sperrin if (num_logs(newroot) == children) { 9078654d025Sperrin free(current); 9088654d025Sperrin return (0); 9098654d025Sperrin } 9108654d025Sperrin 911fa9e4066Sahrens /* 912fa9e4066Sahrens * Get the replication level of the new vdev spec, reporting any 913fa9e4066Sahrens * inconsistencies found. 914fa9e4066Sahrens */ 91599653d4eSeschrock if ((new = get_replication(newroot, B_TRUE)) == NULL) { 916fa9e4066Sahrens free(current); 917fa9e4066Sahrens return (-1); 918fa9e4066Sahrens } 919fa9e4066Sahrens 920fa9e4066Sahrens /* 921fa9e4066Sahrens * Check to see if the new vdev spec matches the replication level of 922fa9e4066Sahrens * the current pool. 923fa9e4066Sahrens */ 924fa9e4066Sahrens ret = 0; 925fa9e4066Sahrens if (current != NULL) { 926663207adSDon Brady if (is_raidz_mirror(current, new, &raidz, &mirror) || 927663207adSDon Brady is_raidz_mirror(new, current, &raidz, &mirror)) { 928663207adSDon Brady if (raidz->zprl_parity != mirror->zprl_children - 1) { 929663207adSDon Brady vdev_error(gettext( 930663207adSDon Brady "mismatched replication level: pool and " 931663207adSDon Brady "new vdev with different redundancy, %s " 932663207adSDon Brady "and %s vdevs, %llu vs. %llu (%llu-way)\n"), 933663207adSDon Brady raidz->zprl_type, 934663207adSDon Brady mirror->zprl_type, 935663207adSDon Brady raidz->zprl_parity, 936663207adSDon Brady mirror->zprl_children - 1, 937663207adSDon Brady mirror->zprl_children); 938663207adSDon Brady ret = -1; 939663207adSDon Brady } 940663207adSDon Brady } else if (strcmp(current->zprl_type, new->zprl_type) != 0) { 941fa9e4066Sahrens vdev_error(gettext( 94299653d4eSeschrock "mismatched replication level: pool uses %s " 94399653d4eSeschrock "and new vdev is %s\n"), 94499653d4eSeschrock current->zprl_type, new->zprl_type); 94599653d4eSeschrock ret = -1; 94699653d4eSeschrock } else if (current->zprl_parity != new->zprl_parity) { 94799653d4eSeschrock vdev_error(gettext( 94899653d4eSeschrock "mismatched replication level: pool uses %llu " 94999653d4eSeschrock "device parity and new vdev uses %llu\n"), 95099653d4eSeschrock current->zprl_parity, new->zprl_parity); 95199653d4eSeschrock ret = -1; 95299653d4eSeschrock } else if (current->zprl_children != new->zprl_children) { 95399653d4eSeschrock vdev_error(gettext( 95499653d4eSeschrock "mismatched replication level: pool uses %llu-way " 95599653d4eSeschrock "%s and new vdev uses %llu-way %s\n"), 95699653d4eSeschrock current->zprl_children, current->zprl_type, 95799653d4eSeschrock new->zprl_children, new->zprl_type); 958fa9e4066Sahrens ret = -1; 959fa9e4066Sahrens } 960fa9e4066Sahrens } 961fa9e4066Sahrens 962fa9e4066Sahrens free(new); 963fa9e4066Sahrens if (current != NULL) 964fa9e4066Sahrens free(current); 965fa9e4066Sahrens 966fa9e4066Sahrens return (ret); 967fa9e4066Sahrens } 968fa9e4066Sahrens 969fa9e4066Sahrens /* 970fa9e4066Sahrens * Go through and find any whole disks in the vdev specification, labelling them 971fa9e4066Sahrens * as appropriate. When constructing the vdev spec, we were unable to open this 972fa9e4066Sahrens * device in order to provide a devid. Now that we have labelled the disk and 9737855d95bSToomas Soome * know the pool slice is valid, we can construct the devid now. 974fa9e4066Sahrens * 9758488aeb5Staylor * If the disk was already labeled with an EFI label, we will have gotten the 976fa9e4066Sahrens * devid already (because we were able to open the whole disk). Otherwise, we 977fa9e4066Sahrens * need to get the devid after we label the disk. 978fa9e4066Sahrens */ 9798488aeb5Staylor static int 9807855d95bSToomas Soome make_disks(zpool_handle_t *zhp, nvlist_t *nv, zpool_boot_label_t boot_type, 9817855d95bSToomas Soome uint64_t boot_size) 982fa9e4066Sahrens { 983fa9e4066Sahrens nvlist_t **child; 984fa9e4066Sahrens uint_t c, children; 985fa9e4066Sahrens char *type, *path, *diskname; 986fa9e4066Sahrens char buf[MAXPATHLEN]; 987afefbcddSeschrock uint64_t wholedisk; 988fa9e4066Sahrens int fd; 989fa9e4066Sahrens int ret; 9907855d95bSToomas Soome int slice; 991fa9e4066Sahrens ddi_devid_t devid; 992fa9e4066Sahrens char *minor = NULL, *devid_str = NULL; 993fa9e4066Sahrens 994fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); 995fa9e4066Sahrens 996fa9e4066Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 997fa9e4066Sahrens &child, &children) != 0) { 998fa9e4066Sahrens 999fa9e4066Sahrens if (strcmp(type, VDEV_TYPE_DISK) != 0) 1000fa9e4066Sahrens return (0); 1001fa9e4066Sahrens 1002fa9e4066Sahrens /* 1003fa9e4066Sahrens * We have a disk device. Get the path to the device 10048488aeb5Staylor * and see if it's a whole disk by appending the backup 1005fa9e4066Sahrens * slice and stat()ing the device. 1006fa9e4066Sahrens */ 1007fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); 1008fa9e4066Sahrens 1009fa9e4066Sahrens diskname = strrchr(path, '/'); 1010fa9e4066Sahrens assert(diskname != NULL); 1011fa9e4066Sahrens diskname++; 10127855d95bSToomas Soome 10137855d95bSToomas Soome if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, 10147855d95bSToomas Soome &wholedisk) != 0 || !wholedisk) { 10157855d95bSToomas Soome /* 10167855d95bSToomas Soome * This is not whole disk, return error if 10177855d95bSToomas Soome * boot partition creation was requested 10187855d95bSToomas Soome */ 10197855d95bSToomas Soome if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { 10207855d95bSToomas Soome (void) fprintf(stderr, 10217855d95bSToomas Soome gettext("creating boot partition is only " 10227855d95bSToomas Soome "supported on whole disk vdevs: %s\n"), 10237855d95bSToomas Soome diskname); 10247855d95bSToomas Soome return (-1); 10257855d95bSToomas Soome } 10267855d95bSToomas Soome return (0); 10277855d95bSToomas Soome } 10287855d95bSToomas Soome 10297855d95bSToomas Soome ret = zpool_label_disk(g_zfs, zhp, diskname, boot_type, 10307855d95bSToomas Soome boot_size, &slice); 10317855d95bSToomas Soome if (ret == -1) 10327855d95bSToomas Soome return (ret); 1033fa9e4066Sahrens 1034fa9e4066Sahrens /* 1035fa9e4066Sahrens * Fill in the devid, now that we've labeled the disk. 1036fa9e4066Sahrens */ 10377855d95bSToomas Soome (void) snprintf(buf, sizeof (buf), "%ss%d", path, slice); 1038fa9e4066Sahrens if ((fd = open(buf, O_RDONLY)) < 0) { 1039fa9e4066Sahrens (void) fprintf(stderr, 1040fa9e4066Sahrens gettext("cannot open '%s': %s\n"), 1041fa9e4066Sahrens buf, strerror(errno)); 1042fa9e4066Sahrens return (-1); 1043fa9e4066Sahrens } 1044fa9e4066Sahrens 1045fa9e4066Sahrens if (devid_get(fd, &devid) == 0) { 1046fa9e4066Sahrens if (devid_get_minor_name(fd, &minor) == 0 && 1047fa9e4066Sahrens (devid_str = devid_str_encode(devid, minor)) != 1048fa9e4066Sahrens NULL) { 1049fa9e4066Sahrens verify(nvlist_add_string(nv, 1050fa9e4066Sahrens ZPOOL_CONFIG_DEVID, devid_str) == 0); 1051fa9e4066Sahrens } 1052fa9e4066Sahrens if (devid_str != NULL) 1053fa9e4066Sahrens devid_str_free(devid_str); 1054fa9e4066Sahrens if (minor != NULL) 1055fa9e4066Sahrens devid_str_free(minor); 1056fa9e4066Sahrens devid_free(devid); 1057fa9e4066Sahrens } 1058fa9e4066Sahrens 1059afefbcddSeschrock /* 10607855d95bSToomas Soome * Update the path to refer to the pool slice. The presence of 1061afefbcddSeschrock * the 'whole_disk' field indicates to the CLI that we should 1062afefbcddSeschrock * chop off the slice number when displaying the device in 1063afefbcddSeschrock * future output. 1064afefbcddSeschrock */ 1065afefbcddSeschrock verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0); 1066afefbcddSeschrock 1067fa9e4066Sahrens (void) close(fd); 1068fa9e4066Sahrens 1069fa9e4066Sahrens return (0); 1070fa9e4066Sahrens } 1071fa9e4066Sahrens 10727855d95bSToomas Soome /* illumos kernel does not support booting from multi-vdev pools. */ 10737855d95bSToomas Soome if ((boot_type == ZPOOL_CREATE_BOOT_LABEL)) { 10747855d95bSToomas Soome if ((strcmp(type, VDEV_TYPE_ROOT) == 0) && children > 1) { 10757855d95bSToomas Soome (void) fprintf(stderr, gettext("boot pool " 10767855d95bSToomas Soome "can not have more than one vdev\n")); 10777855d95bSToomas Soome return (-1); 10787855d95bSToomas Soome } 10797855d95bSToomas Soome } 10807855d95bSToomas Soome 10817855d95bSToomas Soome for (c = 0; c < children; c++) { 10827855d95bSToomas Soome ret = make_disks(zhp, child[c], boot_type, boot_size); 10837855d95bSToomas Soome if (ret != 0) 1084fa9e4066Sahrens return (ret); 10857855d95bSToomas Soome } 1086fa9e4066Sahrens 108799653d4eSeschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, 108899653d4eSeschrock &child, &children) == 0) 10897855d95bSToomas Soome for (c = 0; c < children; c++) { 10907855d95bSToomas Soome ret = make_disks(zhp, child[c], boot_type, boot_size); 10917855d95bSToomas Soome if (ret != 0) 109299653d4eSeschrock return (ret); 10937855d95bSToomas Soome } 109499653d4eSeschrock 1095fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 1096fa94a07fSbrendan &child, &children) == 0) 10977855d95bSToomas Soome for (c = 0; c < children; c++) { 10987855d95bSToomas Soome ret = make_disks(zhp, child[c], boot_type, boot_size); 10997855d95bSToomas Soome if (ret != 0) 1100fa94a07fSbrendan return (ret); 11017855d95bSToomas Soome } 1102fa94a07fSbrendan 1103fa9e4066Sahrens return (0); 1104fa9e4066Sahrens } 1105fa9e4066Sahrens 110699653d4eSeschrock /* 110799653d4eSeschrock * Determine if the given path is a hot spare within the given configuration. 110899653d4eSeschrock */ 110999653d4eSeschrock static boolean_t 111099653d4eSeschrock is_spare(nvlist_t *config, const char *path) 111199653d4eSeschrock { 111299653d4eSeschrock int fd; 111399653d4eSeschrock pool_state_t state; 11143ccfa83cSahrens char *name = NULL; 111599653d4eSeschrock nvlist_t *label; 111699653d4eSeschrock uint64_t guid, spareguid; 111799653d4eSeschrock nvlist_t *nvroot; 111899653d4eSeschrock nvlist_t **spares; 111999653d4eSeschrock uint_t i, nspares; 112099653d4eSeschrock boolean_t inuse; 112199653d4eSeschrock 112299653d4eSeschrock if ((fd = open(path, O_RDONLY)) < 0) 112399653d4eSeschrock return (B_FALSE); 112499653d4eSeschrock 112599653d4eSeschrock if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || 112699653d4eSeschrock !inuse || 112799653d4eSeschrock state != POOL_STATE_SPARE || 1128*d8ab6e12SDon Brady zpool_read_label(fd, &label, NULL) != 0) { 11293ccfa83cSahrens free(name); 113099653d4eSeschrock (void) close(fd); 113199653d4eSeschrock return (B_FALSE); 113299653d4eSeschrock } 11333ccfa83cSahrens free(name); 113499653d4eSeschrock (void) close(fd); 11353f9d6ad7SLin Ling 113699653d4eSeschrock verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0); 113799653d4eSeschrock nvlist_free(label); 113899653d4eSeschrock 113999653d4eSeschrock verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 114099653d4eSeschrock &nvroot) == 0); 114199653d4eSeschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 114299653d4eSeschrock &spares, &nspares) == 0) { 114399653d4eSeschrock for (i = 0; i < nspares; i++) { 114499653d4eSeschrock verify(nvlist_lookup_uint64(spares[i], 114599653d4eSeschrock ZPOOL_CONFIG_GUID, &spareguid) == 0); 114699653d4eSeschrock if (spareguid == guid) 114799653d4eSeschrock return (B_TRUE); 114899653d4eSeschrock } 114999653d4eSeschrock } 115099653d4eSeschrock 115199653d4eSeschrock return (B_FALSE); 115299653d4eSeschrock } 115399653d4eSeschrock 1154fa9e4066Sahrens /* 1155fa9e4066Sahrens * Go through and find any devices that are in use. We rely on libdiskmgt for 1156fa9e4066Sahrens * the majority of this task. 1157fa9e4066Sahrens */ 115875fbdf9bSBasil Crow static boolean_t 115975fbdf9bSBasil Crow is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, 11603f9d6ad7SLin Ling boolean_t replacing, boolean_t isspare) 1161fa9e4066Sahrens { 1162fa9e4066Sahrens nvlist_t **child; 1163fa9e4066Sahrens uint_t c, children; 1164fa9e4066Sahrens char *type, *path; 1165b327cd3fSIgor Kozhukhov int ret = 0; 116699653d4eSeschrock char buf[MAXPATHLEN]; 116799653d4eSeschrock uint64_t wholedisk; 116875fbdf9bSBasil Crow boolean_t anyinuse = B_FALSE; 1169fa9e4066Sahrens 1170fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); 1171fa9e4066Sahrens 1172fa9e4066Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 1173fa9e4066Sahrens &child, &children) != 0) { 1174fa9e4066Sahrens 1175fa9e4066Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); 1176fa9e4066Sahrens 117799653d4eSeschrock /* 117899653d4eSeschrock * As a generic check, we look to see if this is a replace of a 117999653d4eSeschrock * hot spare within the same pool. If so, we allow it 118099653d4eSeschrock * regardless of what libdiskmgt or zpool_in_use() says. 118199653d4eSeschrock */ 11823f9d6ad7SLin Ling if (replacing) { 118399653d4eSeschrock if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, 118499653d4eSeschrock &wholedisk) == 0 && wholedisk) 118599653d4eSeschrock (void) snprintf(buf, sizeof (buf), "%ss0", 118699653d4eSeschrock path); 118799653d4eSeschrock else 118899653d4eSeschrock (void) strlcpy(buf, path, sizeof (buf)); 11893f9d6ad7SLin Ling 119099653d4eSeschrock if (is_spare(config, buf)) 119175fbdf9bSBasil Crow return (B_FALSE); 119299653d4eSeschrock } 119399653d4eSeschrock 1194fa9e4066Sahrens if (strcmp(type, VDEV_TYPE_DISK) == 0) 119599653d4eSeschrock ret = check_device(path, force, isspare); 119675fbdf9bSBasil Crow else if (strcmp(type, VDEV_TYPE_FILE) == 0) 119799653d4eSeschrock ret = check_file(path, force, isspare); 1198fa9e4066Sahrens 119975fbdf9bSBasil Crow return (ret != 0); 1200fa9e4066Sahrens } 1201fa9e4066Sahrens 1202fa9e4066Sahrens for (c = 0; c < children; c++) 120375fbdf9bSBasil Crow if (is_device_in_use(config, child[c], force, replacing, 120475fbdf9bSBasil Crow B_FALSE)) 120575fbdf9bSBasil Crow anyinuse = B_TRUE; 1206fa9e4066Sahrens 120799653d4eSeschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, 120899653d4eSeschrock &child, &children) == 0) 120999653d4eSeschrock for (c = 0; c < children; c++) 121075fbdf9bSBasil Crow if (is_device_in_use(config, child[c], force, replacing, 121175fbdf9bSBasil Crow B_TRUE)) 121275fbdf9bSBasil Crow anyinuse = B_TRUE; 1213fa94a07fSbrendan 1214fa94a07fSbrendan if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 1215fa94a07fSbrendan &child, &children) == 0) 1216fa94a07fSbrendan for (c = 0; c < children; c++) 121775fbdf9bSBasil Crow if (is_device_in_use(config, child[c], force, replacing, 121875fbdf9bSBasil Crow B_FALSE)) 121975fbdf9bSBasil Crow anyinuse = B_TRUE; 1220fa94a07fSbrendan 122175fbdf9bSBasil Crow return (anyinuse); 1222fa9e4066Sahrens } 1223fa9e4066Sahrens 12248488aeb5Staylor static const char * 1225f94275ceSAdam Leventhal is_grouping(const char *type, int *mindev, int *maxdev) 122699653d4eSeschrock { 1227f94275ceSAdam Leventhal if (strncmp(type, "raidz", 5) == 0) { 1228f94275ceSAdam Leventhal const char *p = type + 5; 1229f94275ceSAdam Leventhal char *end; 1230f94275ceSAdam Leventhal long nparity; 1231f94275ceSAdam Leventhal 1232f94275ceSAdam Leventhal if (*p == '\0') { 1233f94275ceSAdam Leventhal nparity = 1; 1234f94275ceSAdam Leventhal } else if (*p == '0') { 1235f94275ceSAdam Leventhal return (NULL); /* no zero prefixes allowed */ 1236f94275ceSAdam Leventhal } else { 1237f94275ceSAdam Leventhal errno = 0; 1238f94275ceSAdam Leventhal nparity = strtol(p, &end, 10); 1239f94275ceSAdam Leventhal if (errno != 0 || nparity < 1 || nparity >= 255 || 1240f94275ceSAdam Leventhal *end != '\0') 1241f94275ceSAdam Leventhal return (NULL); 1242f94275ceSAdam Leventhal } 124399653d4eSeschrock 124499653d4eSeschrock if (mindev != NULL) 1245f94275ceSAdam Leventhal *mindev = nparity + 1; 1246f94275ceSAdam Leventhal if (maxdev != NULL) 1247f94275ceSAdam Leventhal *maxdev = 255; 124899653d4eSeschrock return (VDEV_TYPE_RAIDZ); 124999653d4eSeschrock } 125099653d4eSeschrock 1251f94275ceSAdam Leventhal if (maxdev != NULL) 1252f94275ceSAdam Leventhal *maxdev = INT_MAX; 1253f94275ceSAdam Leventhal 125499653d4eSeschrock if (strcmp(type, "mirror") == 0) { 125599653d4eSeschrock if (mindev != NULL) 125699653d4eSeschrock *mindev = 2; 125799653d4eSeschrock return (VDEV_TYPE_MIRROR); 125899653d4eSeschrock } 125999653d4eSeschrock 126099653d4eSeschrock if (strcmp(type, "spare") == 0) { 126199653d4eSeschrock if (mindev != NULL) 126299653d4eSeschrock *mindev = 1; 126399653d4eSeschrock return (VDEV_TYPE_SPARE); 126499653d4eSeschrock } 126599653d4eSeschrock 12668654d025Sperrin if (strcmp(type, "log") == 0) { 12678654d025Sperrin if (mindev != NULL) 12688654d025Sperrin *mindev = 1; 12698654d025Sperrin return (VDEV_TYPE_LOG); 12708654d025Sperrin } 12718654d025Sperrin 1272663207adSDon Brady if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0 || 1273663207adSDon Brady strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) { 1274663207adSDon Brady if (mindev != NULL) 1275663207adSDon Brady *mindev = 1; 1276663207adSDon Brady return (type); 1277663207adSDon Brady } 1278663207adSDon Brady 1279fa94a07fSbrendan if (strcmp(type, "cache") == 0) { 1280fa94a07fSbrendan if (mindev != NULL) 1281fa94a07fSbrendan *mindev = 1; 1282fa94a07fSbrendan return (VDEV_TYPE_L2CACHE); 1283fa94a07fSbrendan } 1284fa94a07fSbrendan 128599653d4eSeschrock return (NULL); 128699653d4eSeschrock } 128799653d4eSeschrock 1288fa9e4066Sahrens /* 1289fa9e4066Sahrens * Construct a syntactically valid vdev specification, 1290fa9e4066Sahrens * and ensure that all devices and files exist and can be opened. 1291fa9e4066Sahrens * Note: we don't bother freeing anything in the error paths 1292fa9e4066Sahrens * because the program is just going to exit anyway. 1293fa9e4066Sahrens */ 1294fa9e4066Sahrens nvlist_t * 12955711d393Sloli construct_spec(nvlist_t *props, int argc, char **argv) 1296fa9e4066Sahrens { 1297fa94a07fSbrendan nvlist_t *nvroot, *nv, **top, **spares, **l2cache; 1298f94275ceSAdam Leventhal int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache; 129999653d4eSeschrock const char *type; 1300663207adSDon Brady uint64_t is_log, is_special, is_dedup; 13018654d025Sperrin boolean_t seen_logs; 1302fa9e4066Sahrens 1303fa9e4066Sahrens top = NULL; 1304fa9e4066Sahrens toplevels = 0; 130599653d4eSeschrock spares = NULL; 1306fa94a07fSbrendan l2cache = NULL; 130799653d4eSeschrock nspares = 0; 13088654d025Sperrin nlogs = 0; 1309fa94a07fSbrendan nl2cache = 0; 1310663207adSDon Brady is_log = is_special = is_dedup = B_FALSE; 13118654d025Sperrin seen_logs = B_FALSE; 1312fa9e4066Sahrens 1313fa9e4066Sahrens while (argc > 0) { 1314fa9e4066Sahrens nv = NULL; 1315fa9e4066Sahrens 1316fa9e4066Sahrens /* 1317fa9e4066Sahrens * If it's a mirror or raidz, the subsequent arguments are 1318fa9e4066Sahrens * its leaves -- until we encounter the next mirror or raidz. 1319fa9e4066Sahrens */ 1320f94275ceSAdam Leventhal if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) { 1321fa9e4066Sahrens nvlist_t **child = NULL; 132299653d4eSeschrock int c, children = 0; 132399653d4eSeschrock 13248654d025Sperrin if (strcmp(type, VDEV_TYPE_SPARE) == 0) { 13258654d025Sperrin if (spares != NULL) { 13268654d025Sperrin (void) fprintf(stderr, 13278654d025Sperrin gettext("invalid vdev " 13288654d025Sperrin "specification: 'spare' can be " 13298654d025Sperrin "specified only once\n")); 13308654d025Sperrin return (NULL); 13318654d025Sperrin } 1332663207adSDon Brady is_log = is_special = is_dedup = B_FALSE; 13338654d025Sperrin } 13348654d025Sperrin 13358654d025Sperrin if (strcmp(type, VDEV_TYPE_LOG) == 0) { 13368654d025Sperrin if (seen_logs) { 13378654d025Sperrin (void) fprintf(stderr, 13388654d025Sperrin gettext("invalid vdev " 13398654d025Sperrin "specification: 'log' can be " 13408654d025Sperrin "specified only once\n")); 13418654d025Sperrin return (NULL); 13428654d025Sperrin } 13438654d025Sperrin seen_logs = B_TRUE; 13448654d025Sperrin is_log = B_TRUE; 1345663207adSDon Brady is_special = B_FALSE; 1346663207adSDon Brady is_dedup = B_FALSE; 13478654d025Sperrin argc--; 13488654d025Sperrin argv++; 13498654d025Sperrin /* 13508654d025Sperrin * A log is not a real grouping device. 13518654d025Sperrin * We just set is_log and continue. 13528654d025Sperrin */ 13538654d025Sperrin continue; 13548654d025Sperrin } 13558654d025Sperrin 1356663207adSDon Brady if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0) { 1357663207adSDon Brady is_special = B_TRUE; 1358663207adSDon Brady is_log = B_FALSE; 1359663207adSDon Brady is_dedup = B_FALSE; 1360663207adSDon Brady argc--; 1361663207adSDon Brady argv++; 1362663207adSDon Brady continue; 1363663207adSDon Brady } 1364663207adSDon Brady 1365663207adSDon Brady if (strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) { 1366663207adSDon Brady is_dedup = B_TRUE; 1367663207adSDon Brady is_log = B_FALSE; 1368663207adSDon Brady is_special = B_FALSE; 1369663207adSDon Brady argc--; 1370663207adSDon Brady argv++; 1371663207adSDon Brady continue; 1372663207adSDon Brady } 1373663207adSDon Brady 1374fa94a07fSbrendan if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { 1375fa94a07fSbrendan if (l2cache != NULL) { 1376fa94a07fSbrendan (void) fprintf(stderr, 1377fa94a07fSbrendan gettext("invalid vdev " 1378fa94a07fSbrendan "specification: 'cache' can be " 1379fa94a07fSbrendan "specified only once\n")); 1380fa94a07fSbrendan return (NULL); 1381fa94a07fSbrendan } 1382663207adSDon Brady is_log = is_special = is_dedup = B_FALSE; 1383fa94a07fSbrendan } 1384fa94a07fSbrendan 1385663207adSDon Brady if (is_log || is_special || is_dedup) { 13868654d025Sperrin if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { 13878654d025Sperrin (void) fprintf(stderr, 13888654d025Sperrin gettext("invalid vdev " 1389663207adSDon Brady "specification: unsupported '%s' " 1390663207adSDon Brady "device: %s\n"), is_log ? "log" : 1391663207adSDon Brady "special", type); 13928654d025Sperrin return (NULL); 13938654d025Sperrin } 13948654d025Sperrin nlogs++; 139599653d4eSeschrock } 1396fa9e4066Sahrens 1397fa9e4066Sahrens for (c = 1; c < argc; c++) { 1398f94275ceSAdam Leventhal if (is_grouping(argv[c], NULL, NULL) != NULL) 1399fa9e4066Sahrens break; 1400fa9e4066Sahrens children++; 1401fa9e4066Sahrens child = realloc(child, 1402fa9e4066Sahrens children * sizeof (nvlist_t *)); 1403fa9e4066Sahrens if (child == NULL) 14045ad82045Snd zpool_no_memory(); 14055711d393Sloli if ((nv = make_leaf_vdev(props, argv[c], 14065711d393Sloli B_FALSE)) == NULL) 1407fa9e4066Sahrens return (NULL); 1408fa9e4066Sahrens child[children - 1] = nv; 1409fa9e4066Sahrens } 1410fa9e4066Sahrens 141199653d4eSeschrock if (children < mindev) { 141299653d4eSeschrock (void) fprintf(stderr, gettext("invalid vdev " 141399653d4eSeschrock "specification: %s requires at least %d " 141499653d4eSeschrock "devices\n"), argv[0], mindev); 1415fa9e4066Sahrens return (NULL); 1416fa9e4066Sahrens } 1417fa9e4066Sahrens 1418f94275ceSAdam Leventhal if (children > maxdev) { 1419f94275ceSAdam Leventhal (void) fprintf(stderr, gettext("invalid vdev " 1420f94275ceSAdam Leventhal "specification: %s supports no more than " 1421f94275ceSAdam Leventhal "%d devices\n"), argv[0], maxdev); 1422f94275ceSAdam Leventhal return (NULL); 1423f94275ceSAdam Leventhal } 1424f94275ceSAdam Leventhal 142599653d4eSeschrock argc -= c; 142699653d4eSeschrock argv += c; 142799653d4eSeschrock 142899653d4eSeschrock if (strcmp(type, VDEV_TYPE_SPARE) == 0) { 142999653d4eSeschrock spares = child; 143099653d4eSeschrock nspares = children; 143199653d4eSeschrock continue; 1432fa94a07fSbrendan } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { 1433fa94a07fSbrendan l2cache = child; 1434fa94a07fSbrendan nl2cache = children; 1435fa94a07fSbrendan continue; 143699653d4eSeschrock } else { 1437663207adSDon Brady /* create a top-level vdev with children */ 143899653d4eSeschrock verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, 143999653d4eSeschrock 0) == 0); 144099653d4eSeschrock verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, 144199653d4eSeschrock type) == 0); 14428654d025Sperrin verify(nvlist_add_uint64(nv, 14438654d025Sperrin ZPOOL_CONFIG_IS_LOG, is_log) == 0); 1444663207adSDon Brady if (is_log) 1445663207adSDon Brady verify(nvlist_add_string(nv, 1446663207adSDon Brady ZPOOL_CONFIG_ALLOCATION_BIAS, 1447663207adSDon Brady VDEV_ALLOC_BIAS_LOG) == 0); 1448663207adSDon Brady if (is_special) { 1449663207adSDon Brady verify(nvlist_add_string(nv, 1450663207adSDon Brady ZPOOL_CONFIG_ALLOCATION_BIAS, 1451663207adSDon Brady VDEV_ALLOC_BIAS_SPECIAL) == 0); 1452663207adSDon Brady } 1453663207adSDon Brady if (is_dedup) { 1454663207adSDon Brady verify(nvlist_add_string(nv, 1455663207adSDon Brady ZPOOL_CONFIG_ALLOCATION_BIAS, 1456663207adSDon Brady VDEV_ALLOC_BIAS_DEDUP) == 0); 1457663207adSDon Brady } 145899653d4eSeschrock if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { 145999653d4eSeschrock verify(nvlist_add_uint64(nv, 146099653d4eSeschrock ZPOOL_CONFIG_NPARITY, 146199653d4eSeschrock mindev - 1) == 0); 146299653d4eSeschrock } 146399653d4eSeschrock verify(nvlist_add_nvlist_array(nv, 146499653d4eSeschrock ZPOOL_CONFIG_CHILDREN, child, 146599653d4eSeschrock children) == 0); 1466fa9e4066Sahrens 146799653d4eSeschrock for (c = 0; c < children; c++) 146899653d4eSeschrock nvlist_free(child[c]); 146999653d4eSeschrock free(child); 147099653d4eSeschrock } 1471fa9e4066Sahrens } else { 1472fa9e4066Sahrens /* 1473fa9e4066Sahrens * We have a device. Pass off to make_leaf_vdev() to 1474fa9e4066Sahrens * construct the appropriate nvlist describing the vdev. 1475fa9e4066Sahrens */ 14765711d393Sloli if ((nv = make_leaf_vdev(props, argv[0], is_log)) 14775711d393Sloli == NULL) 1478fa9e4066Sahrens return (NULL); 14798654d025Sperrin if (is_log) 14808654d025Sperrin nlogs++; 1481663207adSDon Brady if (is_special) { 1482663207adSDon Brady verify(nvlist_add_string(nv, 1483663207adSDon Brady ZPOOL_CONFIG_ALLOCATION_BIAS, 1484663207adSDon Brady VDEV_ALLOC_BIAS_SPECIAL) == 0); 1485663207adSDon Brady } 1486663207adSDon Brady if (is_dedup) { 1487663207adSDon Brady verify(nvlist_add_string(nv, 1488663207adSDon Brady ZPOOL_CONFIG_ALLOCATION_BIAS, 1489663207adSDon Brady VDEV_ALLOC_BIAS_DEDUP) == 0); 1490663207adSDon Brady } 1491fa9e4066Sahrens argc--; 1492fa9e4066Sahrens argv++; 1493fa9e4066Sahrens } 1494fa9e4066Sahrens 1495fa9e4066Sahrens toplevels++; 1496fa9e4066Sahrens top = realloc(top, toplevels * sizeof (nvlist_t *)); 1497fa9e4066Sahrens if (top == NULL) 14985ad82045Snd zpool_no_memory(); 1499fa9e4066Sahrens top[toplevels - 1] = nv; 1500fa9e4066Sahrens } 1501fa9e4066Sahrens 1502fa94a07fSbrendan if (toplevels == 0 && nspares == 0 && nl2cache == 0) { 150399653d4eSeschrock (void) fprintf(stderr, gettext("invalid vdev " 150499653d4eSeschrock "specification: at least one toplevel vdev must be " 150599653d4eSeschrock "specified\n")); 150699653d4eSeschrock return (NULL); 150799653d4eSeschrock } 150899653d4eSeschrock 15098654d025Sperrin if (seen_logs && nlogs == 0) { 15108654d025Sperrin (void) fprintf(stderr, gettext("invalid vdev specification: " 15118654d025Sperrin "log requires at least 1 device\n")); 15128654d025Sperrin return (NULL); 15138654d025Sperrin } 15148654d025Sperrin 1515fa9e4066Sahrens /* 1516fa9e4066Sahrens * Finally, create nvroot and add all top-level vdevs to it. 1517fa9e4066Sahrens */ 1518fa9e4066Sahrens verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0); 1519fa9e4066Sahrens verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 1520fa9e4066Sahrens VDEV_TYPE_ROOT) == 0); 1521fa9e4066Sahrens verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1522fa9e4066Sahrens top, toplevels) == 0); 152399653d4eSeschrock if (nspares != 0) 152499653d4eSeschrock verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 152599653d4eSeschrock spares, nspares) == 0); 1526fa94a07fSbrendan if (nl2cache != 0) 1527fa94a07fSbrendan verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 1528fa94a07fSbrendan l2cache, nl2cache) == 0); 1529fa9e4066Sahrens 1530fa9e4066Sahrens for (t = 0; t < toplevels; t++) 1531fa9e4066Sahrens nvlist_free(top[t]); 153299653d4eSeschrock for (t = 0; t < nspares; t++) 153399653d4eSeschrock nvlist_free(spares[t]); 1534fa94a07fSbrendan for (t = 0; t < nl2cache; t++) 1535fa94a07fSbrendan nvlist_free(l2cache[t]); 153699653d4eSeschrock if (spares) 153799653d4eSeschrock free(spares); 1538fa94a07fSbrendan if (l2cache) 1539fa94a07fSbrendan free(l2cache); 1540fa9e4066Sahrens free(top); 1541fa9e4066Sahrens 1542fa9e4066Sahrens return (nvroot); 1543fa9e4066Sahrens } 1544fa9e4066Sahrens 15451195e687SMark J Musante nvlist_t * 15461195e687SMark J Musante split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, 15471195e687SMark J Musante splitflags_t flags, int argc, char **argv) 15481195e687SMark J Musante { 15491195e687SMark J Musante nvlist_t *newroot = NULL, **child; 15501195e687SMark J Musante uint_t c, children; 15517855d95bSToomas Soome zpool_boot_label_t boot_type; 15521195e687SMark J Musante 15531195e687SMark J Musante if (argc > 0) { 15545711d393Sloli if ((newroot = construct_spec(props, argc, argv)) == NULL) { 15551195e687SMark J Musante (void) fprintf(stderr, gettext("Unable to build a " 15561195e687SMark J Musante "pool from the specified devices\n")); 15571195e687SMark J Musante return (NULL); 15581195e687SMark J Musante } 15591195e687SMark J Musante 15607855d95bSToomas Soome if (zpool_is_bootable(zhp)) 15617855d95bSToomas Soome boot_type = ZPOOL_COPY_BOOT_LABEL; 15627855d95bSToomas Soome else 15637855d95bSToomas Soome boot_type = ZPOOL_NO_BOOT_LABEL; 15647855d95bSToomas Soome 15657855d95bSToomas Soome if (!flags.dryrun && 15667855d95bSToomas Soome make_disks(zhp, newroot, boot_type, 0) != 0) { 15671195e687SMark J Musante nvlist_free(newroot); 15681195e687SMark J Musante return (NULL); 15691195e687SMark J Musante } 15701195e687SMark J Musante 15711195e687SMark J Musante /* avoid any tricks in the spec */ 15721195e687SMark J Musante verify(nvlist_lookup_nvlist_array(newroot, 15731195e687SMark J Musante ZPOOL_CONFIG_CHILDREN, &child, &children) == 0); 15741195e687SMark J Musante for (c = 0; c < children; c++) { 15751195e687SMark J Musante char *path; 15761195e687SMark J Musante const char *type; 15771195e687SMark J Musante int min, max; 15781195e687SMark J Musante 15791195e687SMark J Musante verify(nvlist_lookup_string(child[c], 15801195e687SMark J Musante ZPOOL_CONFIG_PATH, &path) == 0); 15811195e687SMark J Musante if ((type = is_grouping(path, &min, &max)) != NULL) { 15821195e687SMark J Musante (void) fprintf(stderr, gettext("Cannot use " 15831195e687SMark J Musante "'%s' as a device for splitting\n"), type); 15841195e687SMark J Musante nvlist_free(newroot); 15851195e687SMark J Musante return (NULL); 15861195e687SMark J Musante } 15871195e687SMark J Musante } 15881195e687SMark J Musante } 15891195e687SMark J Musante 15901195e687SMark J Musante if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) { 1591aab83bb8SJosef 'Jeff' Sipek nvlist_free(newroot); 15921195e687SMark J Musante return (NULL); 15931195e687SMark J Musante } 15941195e687SMark J Musante 15951195e687SMark J Musante return (newroot); 15961195e687SMark J Musante } 15978488aeb5Staylor 1598663207adSDon Brady static int 1599663207adSDon Brady num_normal_vdevs(nvlist_t *nvroot) 1600663207adSDon Brady { 1601663207adSDon Brady nvlist_t **top; 1602663207adSDon Brady uint_t t, toplevels, normal = 0; 1603663207adSDon Brady 1604663207adSDon Brady verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1605663207adSDon Brady &top, &toplevels) == 0); 1606663207adSDon Brady 1607663207adSDon Brady for (t = 0; t < toplevels; t++) { 1608663207adSDon Brady uint64_t log = B_FALSE; 1609663207adSDon Brady 1610663207adSDon Brady (void) nvlist_lookup_uint64(top[t], ZPOOL_CONFIG_IS_LOG, &log); 1611663207adSDon Brady if (log) 1612663207adSDon Brady continue; 1613663207adSDon Brady if (nvlist_exists(top[t], ZPOOL_CONFIG_ALLOCATION_BIAS)) 1614663207adSDon Brady continue; 1615663207adSDon Brady 1616663207adSDon Brady normal++; 1617663207adSDon Brady } 1618663207adSDon Brady 1619663207adSDon Brady return (normal); 1620663207adSDon Brady } 1621663207adSDon Brady 1622fa9e4066Sahrens /* 1623fa9e4066Sahrens * Get and validate the contents of the given vdev specification. This ensures 1624fa9e4066Sahrens * that the nvlist returned is well-formed, that all the devices exist, and that 1625fa9e4066Sahrens * they are not currently in use by any other known consumer. The 'poolconfig' 1626fa9e4066Sahrens * parameter is the current configuration of the pool when adding devices 1627fa9e4066Sahrens * existing pool, and is used to perform additional checks, such as changing the 1628fa9e4066Sahrens * replication level of the pool. It can be 'NULL' to indicate that this is a 1629fa9e4066Sahrens * new pool. The 'force' flag controls whether devices should be forcefully 1630fa9e4066Sahrens * added, even if they appear in use. 1631fa9e4066Sahrens */ 1632fa9e4066Sahrens nvlist_t * 16335711d393Sloli make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, 16347855d95bSToomas Soome boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type, 16357855d95bSToomas Soome uint64_t boot_size, int argc, char **argv) 1636fa9e4066Sahrens { 1637fa9e4066Sahrens nvlist_t *newroot; 16388488aeb5Staylor nvlist_t *poolconfig = NULL; 1639fa9e4066Sahrens is_force = force; 1640fa9e4066Sahrens 1641fa9e4066Sahrens /* 1642fa9e4066Sahrens * Construct the vdev specification. If this is successful, we know 1643fa9e4066Sahrens * that we have a valid specification, and that all devices can be 1644fa9e4066Sahrens * opened. 1645fa9e4066Sahrens */ 16465711d393Sloli if ((newroot = construct_spec(props, argc, argv)) == NULL) 1647fa9e4066Sahrens return (NULL); 1648fa9e4066Sahrens 16498488aeb5Staylor if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL)) 16508488aeb5Staylor return (NULL); 16518488aeb5Staylor 1652fa9e4066Sahrens /* 1653fa9e4066Sahrens * Validate each device to make sure that its not shared with another 1654fa9e4066Sahrens * subsystem. We do this even if 'force' is set, because there are some 1655fa9e4066Sahrens * uses (such as a dedicated dump device) that even '-f' cannot 1656fa9e4066Sahrens * override. 1657fa9e4066Sahrens */ 165875fbdf9bSBasil Crow if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) { 1659fa9e4066Sahrens nvlist_free(newroot); 1660fa9e4066Sahrens return (NULL); 1661fa9e4066Sahrens } 1662fa9e4066Sahrens 1663fa9e4066Sahrens /* 1664fa9e4066Sahrens * Check the replication level of the given vdevs and report any errors 1665fa9e4066Sahrens * found. We include the existing pool spec, if any, as we need to 1666fa9e4066Sahrens * catch changes against the existing replication level. 1667fa9e4066Sahrens */ 1668fa9e4066Sahrens if (check_rep && check_replication(poolconfig, newroot) != 0) { 1669fa9e4066Sahrens nvlist_free(newroot); 1670fa9e4066Sahrens return (NULL); 1671fa9e4066Sahrens } 1672fa9e4066Sahrens 1673663207adSDon Brady /* 1674663207adSDon Brady * On pool create the new vdev spec must have one normal vdev. 1675663207adSDon Brady */ 1676663207adSDon Brady if (poolconfig == NULL && num_normal_vdevs(newroot) == 0) { 1677663207adSDon Brady vdev_error(gettext("at least one general top-level vdev must " 1678663207adSDon Brady "be specified\n")); 1679663207adSDon Brady nvlist_free(newroot); 1680663207adSDon Brady return (NULL); 1681663207adSDon Brady } 1682663207adSDon Brady 1683fa9e4066Sahrens /* 1684fa9e4066Sahrens * Run through the vdev specification and label any whole disks found. 1685fa9e4066Sahrens */ 16867855d95bSToomas Soome if (!dryrun && make_disks(zhp, newroot, boot_type, boot_size) != 0) { 1687fa9e4066Sahrens nvlist_free(newroot); 1688fa9e4066Sahrens return (NULL); 1689fa9e4066Sahrens } 1690fa9e4066Sahrens 1691fa9e4066Sahrens return (newroot); 1692fa9e4066Sahrens } 1693