1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5bef6b7d2Swebaker * Common Development and Distribution License (the "License"). 6bef6b7d2Swebaker * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22f13665b7Sbo zhou - Sun Microsystems - Beijing China * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 236fe4f300SPavel Zakharov * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 24295438baSHans Rosenfeld * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 251b500975SMike Gerdts * Copyright 2020 Joyent, Inc. 26*30c304d9SJoshua M. Clulow * Copyright 2020 Joshua M. Clulow <josh@sysmgr.org> 27fa9e4066Sahrens */ 28fa9e4066Sahrens 29fa9e4066Sahrens #include <sys/zfs_context.h> 30dcba9f3fSGeorge Wilson #include <sys/spa_impl.h> 31e7cbe64fSgw #include <sys/refcount.h> 32fa9e4066Sahrens #include <sys/vdev_impl.h> 33084fd14fSBrian Behlendorf #include <sys/vdev_trim.h> 34770499e1SDan Kimmel #include <sys/abd.h> 35fa9e4066Sahrens #include <sys/fs/zfs.h> 36fa9e4066Sahrens #include <sys/zio.h> 37afefbcddSeschrock #include <sys/sunldi.h> 384263d13fSGeorge Wilson #include <sys/efi_partition.h> 3951ece835Seschrock #include <sys/fm/fs/zfs.h> 40ac04831dSMike Gerdts #include <sys/ddi.h> 41fa9e4066Sahrens 42fb05b94aSJerry Jelinek /* 43fc5c75cfSJerry Jelinek * Tunable to disable TRIM in case we're using a problematic SSD. 44fb05b94aSJerry Jelinek */ 45fc5c75cfSJerry Jelinek uint_t zfs_no_trim = 0; 46fb05b94aSJerry Jelinek 47f8fdf681SPrakash Surya /* 48f8fdf681SPrakash Surya * Tunable parameter for debugging or performance analysis. Setting this 49f8fdf681SPrakash Surya * will cause pool corruption on power loss if a volatile out-of-order 50f8fdf681SPrakash Surya * write cache is enabled. 51f8fdf681SPrakash Surya */ 52f8fdf681SPrakash Surya boolean_t zfs_nocacheflush = B_FALSE; 53f8fdf681SPrakash Surya 54fa9e4066Sahrens /* 55fa9e4066Sahrens * Virtual device vector for disks. 56fa9e4066Sahrens */ 57fa9e4066Sahrens 58fa9e4066Sahrens extern ldi_ident_t zfs_li; 59fa9e4066Sahrens 6039cddb10SJoshua M. Clulow static void vdev_disk_close(vdev_t *); 6139cddb10SJoshua M. Clulow 62ac04831dSMike Gerdts typedef struct vdev_disk { 63ac04831dSMike Gerdts ddi_devid_t vd_devid; 64ac04831dSMike Gerdts char *vd_minor; 65ac04831dSMike Gerdts ldi_handle_t vd_lh; 66ac04831dSMike Gerdts list_t vd_ldi_cbs; 67ac04831dSMike Gerdts boolean_t vd_ldi_offline; 68ac04831dSMike Gerdts } vdev_disk_t; 69ac04831dSMike Gerdts 70ac04831dSMike Gerdts typedef struct vdev_disk_buf { 71ac04831dSMike Gerdts buf_t vdb_buf; 72ac04831dSMike Gerdts zio_t *vdb_io; 73ac04831dSMike Gerdts } vdev_disk_buf_t; 74ac04831dSMike Gerdts 7539cddb10SJoshua M. Clulow typedef struct vdev_disk_ldi_cb { 7639cddb10SJoshua M. Clulow list_node_t lcb_next; 7739cddb10SJoshua M. Clulow ldi_callback_id_t lcb_id; 7839cddb10SJoshua M. Clulow } vdev_disk_ldi_cb_t; 7939cddb10SJoshua M. Clulow 806fe4f300SPavel Zakharov /* 816fe4f300SPavel Zakharov * Bypass the devid when opening a disk vdev. 826fe4f300SPavel Zakharov * There have been issues where the devids of several devices were shuffled, 836fe4f300SPavel Zakharov * causing pool open failures. Note, that this flag is intended to be used 846fe4f300SPavel Zakharov * for pool recovery only. 856fe4f300SPavel Zakharov * 866fe4f300SPavel Zakharov * Note that if a pool is imported with the devids bypassed, all its vdevs will 876fe4f300SPavel Zakharov * cease storing devid information permanently. In practice, the devid is rarely 886fe4f300SPavel Zakharov * useful as vdev paths do not tend to change unless the hardware is 896fe4f300SPavel Zakharov * reconfigured. That said, if the paths do change and a pool fails to open 906fe4f300SPavel Zakharov * automatically at boot, a simple zpool import should re-scan the paths and fix 916fe4f300SPavel Zakharov * the issue. 926fe4f300SPavel Zakharov */ 936fe4f300SPavel Zakharov boolean_t vdev_disk_bypass_devid = B_FALSE; 946fe4f300SPavel Zakharov 9539cddb10SJoshua M. Clulow static void 9639cddb10SJoshua M. Clulow vdev_disk_alloc(vdev_t *vd) 9739cddb10SJoshua M. Clulow { 9839cddb10SJoshua M. Clulow vdev_disk_t *dvd; 9939cddb10SJoshua M. Clulow 10039cddb10SJoshua M. Clulow dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); 10139cddb10SJoshua M. Clulow /* 10239cddb10SJoshua M. Clulow * Create the LDI event callback list. 10339cddb10SJoshua M. Clulow */ 10439cddb10SJoshua M. Clulow list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t), 10539cddb10SJoshua M. Clulow offsetof(vdev_disk_ldi_cb_t, lcb_next)); 10639cddb10SJoshua M. Clulow } 10739cddb10SJoshua M. Clulow 10839cddb10SJoshua M. Clulow static void 10939cddb10SJoshua M. Clulow vdev_disk_free(vdev_t *vd) 11039cddb10SJoshua M. Clulow { 11139cddb10SJoshua M. Clulow vdev_disk_t *dvd = vd->vdev_tsd; 11239cddb10SJoshua M. Clulow vdev_disk_ldi_cb_t *lcb; 11339cddb10SJoshua M. Clulow 11439cddb10SJoshua M. Clulow if (dvd == NULL) 11539cddb10SJoshua M. Clulow return; 11639cddb10SJoshua M. Clulow 11739cddb10SJoshua M. Clulow /* 11839cddb10SJoshua M. Clulow * We have already closed the LDI handle. Clean up the LDI event 11939cddb10SJoshua M. Clulow * callbacks and free vd->vdev_tsd. 12039cddb10SJoshua M. Clulow */ 12139cddb10SJoshua M. Clulow while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) { 12239cddb10SJoshua M. Clulow list_remove(&dvd->vd_ldi_cbs, lcb); 12339cddb10SJoshua M. Clulow (void) ldi_ev_remove_callbacks(lcb->lcb_id); 12439cddb10SJoshua M. Clulow kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t)); 12539cddb10SJoshua M. Clulow } 12639cddb10SJoshua M. Clulow list_destroy(&dvd->vd_ldi_cbs); 12739cddb10SJoshua M. Clulow kmem_free(dvd, sizeof (vdev_disk_t)); 12839cddb10SJoshua M. Clulow vd->vdev_tsd = NULL; 12939cddb10SJoshua M. Clulow } 13039cddb10SJoshua M. Clulow 13139cddb10SJoshua M. Clulow static int 1321b500975SMike Gerdts vdev_disk_off_notify(ldi_handle_t lh __unused, ldi_ev_cookie_t ecookie, 1331b500975SMike Gerdts void *arg, void *ev_data __unused) 13439cddb10SJoshua M. Clulow { 13539cddb10SJoshua M. Clulow vdev_t *vd = (vdev_t *)arg; 13639cddb10SJoshua M. Clulow vdev_disk_t *dvd = vd->vdev_tsd; 13739cddb10SJoshua M. Clulow 13839cddb10SJoshua M. Clulow /* 13939cddb10SJoshua M. Clulow * Ignore events other than offline. 14039cddb10SJoshua M. Clulow */ 14139cddb10SJoshua M. Clulow if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0) 14239cddb10SJoshua M. Clulow return (LDI_EV_SUCCESS); 14339cddb10SJoshua M. Clulow 14439cddb10SJoshua M. Clulow /* 1451b500975SMike Gerdts * Tell any new threads that stumble upon this vdev that they should not 1461b500975SMike Gerdts * try to do I/O. 14739cddb10SJoshua M. Clulow */ 14839cddb10SJoshua M. Clulow dvd->vd_ldi_offline = B_TRUE; 14939cddb10SJoshua M. Clulow 15039cddb10SJoshua M. Clulow /* 1511b500975SMike Gerdts * Request that the spa_async_thread mark the device as REMOVED and 1521b500975SMike Gerdts * notify FMA of the removal. This should also trigger a vdev_close() 1531b500975SMike Gerdts * in the async thread. 15439cddb10SJoshua M. Clulow */ 15539cddb10SJoshua M. Clulow zfs_post_remove(vd->vdev_spa, vd); 15639cddb10SJoshua M. Clulow vd->vdev_remove_wanted = B_TRUE; 15739cddb10SJoshua M. Clulow spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 15839cddb10SJoshua M. Clulow 15939cddb10SJoshua M. Clulow return (LDI_EV_SUCCESS); 16039cddb10SJoshua M. Clulow } 16139cddb10SJoshua M. Clulow 16239cddb10SJoshua M. Clulow static void 1631b500975SMike Gerdts vdev_disk_off_finalize(ldi_handle_t lh __unused, ldi_ev_cookie_t ecookie, 1641b500975SMike Gerdts int ldi_result, void *arg, void *ev_data __unused) 16539cddb10SJoshua M. Clulow { 16639cddb10SJoshua M. Clulow vdev_t *vd = (vdev_t *)arg; 16739cddb10SJoshua M. Clulow 16839cddb10SJoshua M. Clulow /* 16939cddb10SJoshua M. Clulow * Ignore events other than offline. 17039cddb10SJoshua M. Clulow */ 17139cddb10SJoshua M. Clulow if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0) 17239cddb10SJoshua M. Clulow return; 17339cddb10SJoshua M. Clulow 17439cddb10SJoshua M. Clulow /* 17539cddb10SJoshua M. Clulow * Request that the vdev be reopened if the offline state change was 17639cddb10SJoshua M. Clulow * unsuccessful. 17739cddb10SJoshua M. Clulow */ 17839cddb10SJoshua M. Clulow if (ldi_result != LDI_EV_SUCCESS) { 17939cddb10SJoshua M. Clulow vd->vdev_probe_wanted = B_TRUE; 18039cddb10SJoshua M. Clulow spa_async_request(vd->vdev_spa, SPA_ASYNC_PROBE); 18139cddb10SJoshua M. Clulow } 18239cddb10SJoshua M. Clulow } 18339cddb10SJoshua M. Clulow 18439cddb10SJoshua M. Clulow static ldi_ev_callback_t vdev_disk_off_callb = { 18539cddb10SJoshua M. Clulow .cb_vers = LDI_EV_CB_VERS, 18639cddb10SJoshua M. Clulow .cb_notify = vdev_disk_off_notify, 18739cddb10SJoshua M. Clulow .cb_finalize = vdev_disk_off_finalize 18839cddb10SJoshua M. Clulow }; 18939cddb10SJoshua M. Clulow 19039cddb10SJoshua M. Clulow static void 1911b500975SMike Gerdts vdev_disk_dgrd_finalize(ldi_handle_t lh __unused, ldi_ev_cookie_t ecookie, 1921b500975SMike Gerdts int ldi_result, void *arg, void *ev_data __unused) 19339cddb10SJoshua M. Clulow { 19439cddb10SJoshua M. Clulow vdev_t *vd = (vdev_t *)arg; 19539cddb10SJoshua M. Clulow 19639cddb10SJoshua M. Clulow /* 19739cddb10SJoshua M. Clulow * Ignore events other than degrade. 19839cddb10SJoshua M. Clulow */ 19939cddb10SJoshua M. Clulow if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_DEGRADE) != 0) 20039cddb10SJoshua M. Clulow return; 20139cddb10SJoshua M. Clulow 20239cddb10SJoshua M. Clulow /* 20339cddb10SJoshua M. Clulow * Degrade events always succeed. Mark the vdev as degraded. 20439cddb10SJoshua M. Clulow * This status is purely informative for the user. 20539cddb10SJoshua M. Clulow */ 20639cddb10SJoshua M. Clulow (void) vdev_degrade(vd->vdev_spa, vd->vdev_guid, 0); 20739cddb10SJoshua M. Clulow } 20839cddb10SJoshua M. Clulow 20939cddb10SJoshua M. Clulow static ldi_ev_callback_t vdev_disk_dgrd_callb = { 21039cddb10SJoshua M. Clulow .cb_vers = LDI_EV_CB_VERS, 21139cddb10SJoshua M. Clulow .cb_notify = NULL, 21239cddb10SJoshua M. Clulow .cb_finalize = vdev_disk_dgrd_finalize 21339cddb10SJoshua M. Clulow }; 21439cddb10SJoshua M. Clulow 215dcba9f3fSGeorge Wilson static void 216dcba9f3fSGeorge Wilson vdev_disk_hold(vdev_t *vd) 217dcba9f3fSGeorge Wilson { 218dcba9f3fSGeorge Wilson ddi_devid_t devid; 219dcba9f3fSGeorge Wilson char *minor; 220dcba9f3fSGeorge Wilson 221dcba9f3fSGeorge Wilson ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER)); 222dcba9f3fSGeorge Wilson 223dcba9f3fSGeorge Wilson /* 224dcba9f3fSGeorge Wilson * We must have a pathname, and it must be absolute. 225dcba9f3fSGeorge Wilson */ 226dcba9f3fSGeorge Wilson if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') 227dcba9f3fSGeorge Wilson return; 228dcba9f3fSGeorge Wilson 229dcba9f3fSGeorge Wilson /* 230dcba9f3fSGeorge Wilson * Only prefetch path and devid info if the device has 231dcba9f3fSGeorge Wilson * never been opened. 232dcba9f3fSGeorge Wilson */ 233dcba9f3fSGeorge Wilson if (vd->vdev_tsd != NULL) 234dcba9f3fSGeorge Wilson return; 235dcba9f3fSGeorge Wilson 236dcba9f3fSGeorge Wilson if (vd->vdev_wholedisk == -1ULL) { 237dcba9f3fSGeorge Wilson size_t len = strlen(vd->vdev_path) + 3; 238dcba9f3fSGeorge Wilson char *buf = kmem_alloc(len, KM_SLEEP); 239dcba9f3fSGeorge Wilson 240dcba9f3fSGeorge Wilson (void) snprintf(buf, len, "%ss0", vd->vdev_path); 241dcba9f3fSGeorge Wilson 242dcba9f3fSGeorge Wilson (void) ldi_vp_from_name(buf, &vd->vdev_name_vp); 243dcba9f3fSGeorge Wilson kmem_free(buf, len); 244dcba9f3fSGeorge Wilson } 245dcba9f3fSGeorge Wilson 246dcba9f3fSGeorge Wilson if (vd->vdev_name_vp == NULL) 247dcba9f3fSGeorge Wilson (void) ldi_vp_from_name(vd->vdev_path, &vd->vdev_name_vp); 248dcba9f3fSGeorge Wilson 249dcba9f3fSGeorge Wilson if (vd->vdev_devid != NULL && 250dcba9f3fSGeorge Wilson ddi_devid_str_decode(vd->vdev_devid, &devid, &minor) == 0) { 251dcba9f3fSGeorge Wilson (void) ldi_vp_from_devid(devid, minor, &vd->vdev_devid_vp); 252dcba9f3fSGeorge Wilson ddi_devid_str_free(minor); 253dcba9f3fSGeorge Wilson ddi_devid_free(devid); 254dcba9f3fSGeorge Wilson } 255dcba9f3fSGeorge Wilson } 256dcba9f3fSGeorge Wilson 257dcba9f3fSGeorge Wilson static void 258dcba9f3fSGeorge Wilson vdev_disk_rele(vdev_t *vd) 259dcba9f3fSGeorge Wilson { 260dcba9f3fSGeorge Wilson ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER)); 261dcba9f3fSGeorge Wilson 262dcba9f3fSGeorge Wilson if (vd->vdev_name_vp) { 263dcba9f3fSGeorge Wilson VN_RELE_ASYNC(vd->vdev_name_vp, 264dcba9f3fSGeorge Wilson dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool)); 265dcba9f3fSGeorge Wilson vd->vdev_name_vp = NULL; 266dcba9f3fSGeorge Wilson } 267dcba9f3fSGeorge Wilson if (vd->vdev_devid_vp) { 268dcba9f3fSGeorge Wilson VN_RELE_ASYNC(vd->vdev_devid_vp, 269dcba9f3fSGeorge Wilson dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool)); 270dcba9f3fSGeorge Wilson vd->vdev_devid_vp = NULL; 271dcba9f3fSGeorge Wilson } 272dcba9f3fSGeorge Wilson } 273dcba9f3fSGeorge Wilson 274a5b57771SDan McDonald /* 275a5b57771SDan McDonald * We want to be loud in DEBUG kernels when DKIOCGMEDIAINFOEXT fails, or when 276a5b57771SDan McDonald * even a fallback to DKIOCGMEDIAINFO fails. 277a5b57771SDan McDonald */ 278a5b57771SDan McDonald #ifdef DEBUG 279a5b57771SDan McDonald #define VDEV_DEBUG(...) cmn_err(CE_NOTE, __VA_ARGS__) 280a5b57771SDan McDonald #else 281a5b57771SDan McDonald #define VDEV_DEBUG(...) /* Nothing... */ 282a5b57771SDan McDonald #endif 283a5b57771SDan McDonald 284fa9e4066Sahrens static int 2854263d13fSGeorge Wilson vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 2864263d13fSGeorge Wilson uint64_t *ashift) 287fa9e4066Sahrens { 2888ad4d6ddSJeff Bonwick spa_t *spa = vd->vdev_spa; 28939cddb10SJoshua M. Clulow vdev_disk_t *dvd = vd->vdev_tsd; 29039cddb10SJoshua M. Clulow ldi_ev_cookie_t ecookie; 29139cddb10SJoshua M. Clulow vdev_disk_ldi_cb_t *lcb; 292a5b57771SDan McDonald union { 293a5b57771SDan McDonald struct dk_minfo_ext ude; 294a5b57771SDan McDonald struct dk_minfo ud; 295a5b57771SDan McDonald } dks; 296a5b57771SDan McDonald struct dk_minfo_ext *dkmext = &dks.ude; 297a5b57771SDan McDonald struct dk_minfo *dkm = &dks.ud; 298084fd14fSBrian Behlendorf int error, can_free; 299e14bb325SJeff Bonwick dev_t dev; 300e14bb325SJeff Bonwick int otyp; 301fb02ae02SGeorge Wilson boolean_t validate_devid = B_FALSE; 302a5b57771SDan McDonald uint64_t capacity = 0, blksz = 0, pbsize; 303fa9e4066Sahrens 304fa9e4066Sahrens /* 305fa9e4066Sahrens * We must have a pathname, and it must be absolute. 306fa9e4066Sahrens */ 307fa9e4066Sahrens if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 308fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 309be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 310fa9e4066Sahrens } 311fa9e4066Sahrens 312095bcd66SGeorge Wilson /* 313095bcd66SGeorge Wilson * Reopen the device if it's not currently open. Otherwise, 314095bcd66SGeorge Wilson * just update the physical size of the device. 315095bcd66SGeorge Wilson */ 31639cddb10SJoshua M. Clulow if (dvd != NULL) { 3171b500975SMike Gerdts ASSERT(vd->vdev_reopening); 3181b500975SMike Gerdts goto skip_open; 319095bcd66SGeorge Wilson } 320095bcd66SGeorge Wilson 32139cddb10SJoshua M. Clulow /* 32239cddb10SJoshua M. Clulow * Create vd->vdev_tsd. 32339cddb10SJoshua M. Clulow */ 32439cddb10SJoshua M. Clulow vdev_disk_alloc(vd); 32539cddb10SJoshua M. Clulow dvd = vd->vdev_tsd; 326fa9e4066Sahrens 3276fe4f300SPavel Zakharov /* 3286fe4f300SPavel Zakharov * Allow bypassing the devid. 3296fe4f300SPavel Zakharov */ 3306fe4f300SPavel Zakharov if (vd->vdev_devid != NULL && vdev_disk_bypass_devid) { 3316fe4f300SPavel Zakharov vdev_dbgmsg(vd, "vdev_disk_open, devid %s bypassed", 3326fe4f300SPavel Zakharov vd->vdev_devid); 3336fe4f300SPavel Zakharov spa_strfree(vd->vdev_devid); 3346fe4f300SPavel Zakharov vd->vdev_devid = NULL; 3356fe4f300SPavel Zakharov } 3366fe4f300SPavel Zakharov 337fa9e4066Sahrens /* 338fa9e4066Sahrens * When opening a disk device, we want to preserve the user's original 339fa9e4066Sahrens * intent. We always want to open the device by the path the user gave 3401724dc7bSJoshua M. Clulow * us, even if it is one of multiple paths to the same device. But we 341fa9e4066Sahrens * also want to be able to survive disks being removed/recabled. 342fa9e4066Sahrens * Therefore the sequence of opening devices is: 343fa9e4066Sahrens * 344afefbcddSeschrock * 1. Try opening the device by path. For legacy pools without the 345afefbcddSeschrock * 'whole_disk' property, attempt to fix the path by appending 's0'. 346fa9e4066Sahrens * 347fa9e4066Sahrens * 2. If the devid of the device matches the stored value, return 348fa9e4066Sahrens * success. 349fa9e4066Sahrens * 350fa9e4066Sahrens * 3. Otherwise, the device may have moved. Try opening the device 351fa9e4066Sahrens * by the devid instead. 352fa9e4066Sahrens */ 353fa9e4066Sahrens if (vd->vdev_devid != NULL) { 354fa9e4066Sahrens if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, 355fa9e4066Sahrens &dvd->vd_minor) != 0) { 356fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 3573ee8c80cSPavel Zakharov vdev_dbgmsg(vd, "vdev_disk_open: invalid " 3583ee8c80cSPavel Zakharov "vdev_devid '%s'", vd->vdev_devid); 359be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 360fa9e4066Sahrens } 361fa9e4066Sahrens } 362fa9e4066Sahrens 363fa9e4066Sahrens error = EINVAL; /* presume failure */ 364fa9e4066Sahrens 365095bcd66SGeorge Wilson if (vd->vdev_path != NULL) { 366afefbcddSeschrock if (vd->vdev_wholedisk == -1ULL) { 367afefbcddSeschrock size_t len = strlen(vd->vdev_path) + 3; 368afefbcddSeschrock char *buf = kmem_alloc(len, KM_SLEEP); 369afefbcddSeschrock 370afefbcddSeschrock (void) snprintf(buf, len, "%ss0", vd->vdev_path); 371afefbcddSeschrock 37239cddb10SJoshua M. Clulow error = ldi_open_by_name(buf, spa_mode(spa), kcred, 37339cddb10SJoshua M. Clulow &dvd->vd_lh, zfs_li); 37439cddb10SJoshua M. Clulow if (error == 0) { 375afefbcddSeschrock spa_strfree(vd->vdev_path); 376afefbcddSeschrock vd->vdev_path = buf; 377afefbcddSeschrock vd->vdev_wholedisk = 1ULL; 378afefbcddSeschrock } else { 379afefbcddSeschrock kmem_free(buf, len); 380afefbcddSeschrock } 381afefbcddSeschrock } 382fa9e4066Sahrens 38339cddb10SJoshua M. Clulow /* 38439cddb10SJoshua M. Clulow * If we have not yet opened the device, try to open it by the 38539cddb10SJoshua M. Clulow * specified path. 38639cddb10SJoshua M. Clulow */ 38739cddb10SJoshua M. Clulow if (error != 0) { 38839cddb10SJoshua M. Clulow error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), 38939cddb10SJoshua M. Clulow kcred, &dvd->vd_lh, zfs_li); 39039cddb10SJoshua M. Clulow } 391fa9e4066Sahrens 392fa9e4066Sahrens /* 393fa9e4066Sahrens * Compare the devid to the stored value. 394fa9e4066Sahrens */ 3956af23589SJoshua M. Clulow if (error == 0 && vd->vdev_devid != NULL) { 3966af23589SJoshua M. Clulow ddi_devid_t devid = NULL; 3976af23589SJoshua M. Clulow 3986af23589SJoshua M. Clulow if (ldi_get_devid(dvd->vd_lh, &devid) != 0) { 3996af23589SJoshua M. Clulow /* 4006af23589SJoshua M. Clulow * We expected a devid on this device but it no 4016af23589SJoshua M. Clulow * longer appears to have one. The validation 4026af23589SJoshua M. Clulow * step may need to remove it from the 4036af23589SJoshua M. Clulow * configuration. 4046af23589SJoshua M. Clulow */ 4056af23589SJoshua M. Clulow validate_devid = B_TRUE; 4066af23589SJoshua M. Clulow 4076af23589SJoshua M. Clulow } else if (ddi_devid_compare(devid, dvd->vd_devid) != 4086af23589SJoshua M. Clulow 0) { 4096fe4f300SPavel Zakharov /* 4106fe4f300SPavel Zakharov * A mismatch here is unexpected, log it. 4116fe4f300SPavel Zakharov */ 4126fe4f300SPavel Zakharov char *devid_str = ddi_devid_str_encode(devid, 4136fe4f300SPavel Zakharov dvd->vd_minor); 4146fe4f300SPavel Zakharov vdev_dbgmsg(vd, "vdev_disk_open: devid " 4156fe4f300SPavel Zakharov "mismatch: %s != %s", vd->vdev_devid, 4166fe4f300SPavel Zakharov devid_str); 4176fe4f300SPavel Zakharov cmn_err(CE_NOTE, "vdev_disk_open %s: devid " 4186fe4f300SPavel Zakharov "mismatch: %s != %s", vd->vdev_path, 4196fe4f300SPavel Zakharov vd->vdev_devid, devid_str); 4206fe4f300SPavel Zakharov ddi_devid_str_free(devid_str); 4216fe4f300SPavel Zakharov 422be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 4238ad4d6ddSJeff Bonwick (void) ldi_close(dvd->vd_lh, spa_mode(spa), 4248ad4d6ddSJeff Bonwick kcred); 425fa9e4066Sahrens dvd->vd_lh = NULL; 426fa9e4066Sahrens } 4276af23589SJoshua M. Clulow 4286af23589SJoshua M. Clulow if (devid != NULL) { 4296af23589SJoshua M. Clulow ddi_devid_free(devid); 4306af23589SJoshua M. Clulow } 431fa9e4066Sahrens } 432afefbcddSeschrock 433afefbcddSeschrock /* 434afefbcddSeschrock * If we succeeded in opening the device, but 'vdev_wholedisk' 435afefbcddSeschrock * is not yet set, then this must be a slice. 436afefbcddSeschrock */ 437afefbcddSeschrock if (error == 0 && vd->vdev_wholedisk == -1ULL) 438afefbcddSeschrock vd->vdev_wholedisk = 0; 439fa9e4066Sahrens } 440fa9e4066Sahrens 441fa9e4066Sahrens /* 442fa9e4066Sahrens * If we were unable to open by path, or the devid check fails, open by 443fa9e4066Sahrens * devid instead. 444fa9e4066Sahrens */ 445fb02ae02SGeorge Wilson if (error != 0 && vd->vdev_devid != NULL) { 446fa9e4066Sahrens error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, 4478ad4d6ddSJeff Bonwick spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); 4486fe4f300SPavel Zakharov if (error != 0) { 4496fe4f300SPavel Zakharov vdev_dbgmsg(vd, "Failed to open by devid (%s)", 4506fe4f300SPavel Zakharov vd->vdev_devid); 4516fe4f300SPavel Zakharov } 452fb02ae02SGeorge Wilson } 453fa9e4066Sahrens 4543d7072f8Seschrock /* 4553d7072f8Seschrock * If all else fails, then try opening by physical path (if available) 4563d7072f8Seschrock * or the logical path (if we failed due to the devid check). While not 4573d7072f8Seschrock * as reliable as the devid, this will give us something, and the higher 4583d7072f8Seschrock * level vdev validation will prevent us from opening the wrong device. 4593d7072f8Seschrock */ 4606af23589SJoshua M. Clulow if (error != 0) { 4616af23589SJoshua M. Clulow validate_devid = B_TRUE; 462fb02ae02SGeorge Wilson 4633d7072f8Seschrock if (vd->vdev_physpath != NULL && 4646af23589SJoshua M. Clulow (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) { 4658ad4d6ddSJeff Bonwick error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa), 4663d7072f8Seschrock kcred, &dvd->vd_lh, zfs_li); 4676af23589SJoshua M. Clulow } 4683d7072f8Seschrock 4693d7072f8Seschrock /* 4703d7072f8Seschrock * Note that we don't support the legacy auto-wholedisk support 4713d7072f8Seschrock * as above. This hasn't been used in a very long time and we 4723d7072f8Seschrock * don't need to propagate its oddities to this edge condition. 4733d7072f8Seschrock */ 4746af23589SJoshua M. Clulow if (error != 0 && vd->vdev_path != NULL) { 4758ad4d6ddSJeff Bonwick error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), 4768ad4d6ddSJeff Bonwick kcred, &dvd->vd_lh, zfs_li); 4776af23589SJoshua M. Clulow } 4783d7072f8Seschrock } 4793d7072f8Seschrock 480*30c304d9SJoshua M. Clulow /* 481*30c304d9SJoshua M. Clulow * If this is early in boot, a sweep of available block devices may 482*30c304d9SJoshua M. Clulow * locate an alternative path that we can try. 483*30c304d9SJoshua M. Clulow */ 484*30c304d9SJoshua M. Clulow if (error != 0) { 485*30c304d9SJoshua M. Clulow const char *altdevpath = vdev_disk_preroot_lookup( 486*30c304d9SJoshua M. Clulow spa_guid(spa), vd->vdev_guid); 487*30c304d9SJoshua M. Clulow 488*30c304d9SJoshua M. Clulow if (altdevpath != NULL) { 489*30c304d9SJoshua M. Clulow vdev_dbgmsg(vd, "Trying alternate preroot path (%s)", 490*30c304d9SJoshua M. Clulow altdevpath); 491*30c304d9SJoshua M. Clulow 492*30c304d9SJoshua M. Clulow validate_devid = B_TRUE; 493*30c304d9SJoshua M. Clulow 494*30c304d9SJoshua M. Clulow if ((error = ldi_open_by_name((char *)altdevpath, 495*30c304d9SJoshua M. Clulow spa_mode(spa), kcred, &dvd->vd_lh, zfs_li)) != 0) { 496*30c304d9SJoshua M. Clulow vdev_dbgmsg(vd, "Failed to open by preroot " 497*30c304d9SJoshua M. Clulow "path (%s)", altdevpath); 498*30c304d9SJoshua M. Clulow } 499*30c304d9SJoshua M. Clulow } 500*30c304d9SJoshua M. Clulow } 501*30c304d9SJoshua M. Clulow 5026af23589SJoshua M. Clulow if (error != 0) { 503fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 5043ee8c80cSPavel Zakharov vdev_dbgmsg(vd, "vdev_disk_open: failed to open [error=%d]", 5053ee8c80cSPavel Zakharov error); 506fa9e4066Sahrens return (error); 507e14bb325SJeff Bonwick } 508fa9e4066Sahrens 509fb02ae02SGeorge Wilson /* 510fb02ae02SGeorge Wilson * Now that the device has been successfully opened, update the devid 511fb02ae02SGeorge Wilson * if necessary. 512fb02ae02SGeorge Wilson */ 5136af23589SJoshua M. Clulow if (validate_devid) { 5146af23589SJoshua M. Clulow ddi_devid_t devid = NULL; 5156af23589SJoshua M. Clulow char *minorname = NULL; 5166af23589SJoshua M. Clulow char *vd_devid = NULL; 5176af23589SJoshua M. Clulow boolean_t remove = B_FALSE, update = B_FALSE; 5186af23589SJoshua M. Clulow 5196af23589SJoshua M. Clulow /* 5206af23589SJoshua M. Clulow * Get the current devid and minor name for the device we 5216af23589SJoshua M. Clulow * opened. 5226af23589SJoshua M. Clulow */ 5236af23589SJoshua M. Clulow if (ldi_get_devid(dvd->vd_lh, &devid) != 0 || 5246af23589SJoshua M. Clulow ldi_get_minor_name(dvd->vd_lh, &minorname) != 0) { 5256af23589SJoshua M. Clulow /* 5266af23589SJoshua M. Clulow * If we are unable to get the devid or the minor name 5276af23589SJoshua M. Clulow * for the device, we need to remove them from the 5286af23589SJoshua M. Clulow * configuration to prevent potential inconsistencies. 5296af23589SJoshua M. Clulow */ 5306af23589SJoshua M. Clulow if (dvd->vd_minor != NULL || dvd->vd_devid != NULL || 5316af23589SJoshua M. Clulow vd->vdev_devid != NULL) { 5326af23589SJoshua M. Clulow /* 5336af23589SJoshua M. Clulow * We only need to remove the devid if one 5346af23589SJoshua M. Clulow * exists. 5356af23589SJoshua M. Clulow */ 5366af23589SJoshua M. Clulow remove = B_TRUE; 5376af23589SJoshua M. Clulow } 538fb02ae02SGeorge Wilson 5396af23589SJoshua M. Clulow } else if (dvd->vd_devid == NULL || dvd->vd_minor == NULL) { 5406af23589SJoshua M. Clulow /* 5416af23589SJoshua M. Clulow * There was previously no devid at all so we need to 5426af23589SJoshua M. Clulow * add one. 5436af23589SJoshua M. Clulow */ 5446af23589SJoshua M. Clulow update = B_TRUE; 5456af23589SJoshua M. Clulow 5466af23589SJoshua M. Clulow } else if (ddi_devid_compare(devid, dvd->vd_devid) != 0 || 5476af23589SJoshua M. Clulow strcmp(minorname, dvd->vd_minor) != 0) { 5486af23589SJoshua M. Clulow /* 5496af23589SJoshua M. Clulow * The devid or minor name on file does not match the 5506af23589SJoshua M. Clulow * one from the opened device. 5516af23589SJoshua M. Clulow */ 5526af23589SJoshua M. Clulow update = B_TRUE; 5536af23589SJoshua M. Clulow } 5546af23589SJoshua M. Clulow 5556af23589SJoshua M. Clulow if (update) { 5566af23589SJoshua M. Clulow /* 5576af23589SJoshua M. Clulow * Render the new devid and minor name as a string for 5586af23589SJoshua M. Clulow * logging and to store in the vdev configuration. 5596af23589SJoshua M. Clulow */ 5606af23589SJoshua M. Clulow vd_devid = ddi_devid_str_encode(devid, minorname); 5616af23589SJoshua M. Clulow } 5626af23589SJoshua M. Clulow 5636af23589SJoshua M. Clulow if (update || remove) { 5643ee8c80cSPavel Zakharov vdev_dbgmsg(vd, "vdev_disk_open: update devid from " 5656af23589SJoshua M. Clulow "'%s' to '%s'", 5666af23589SJoshua M. Clulow vd->vdev_devid != NULL ? vd->vdev_devid : "<none>", 5676af23589SJoshua M. Clulow vd_devid != NULL ? vd_devid : "<none>"); 5686fe4f300SPavel Zakharov cmn_err(CE_NOTE, "vdev_disk_open %s: update devid " 5696af23589SJoshua M. Clulow "from '%s' to '%s'", 5706af23589SJoshua M. Clulow vd->vdev_path != NULL ? vd->vdev_path : "?", 5716af23589SJoshua M. Clulow vd->vdev_devid != NULL ? vd->vdev_devid : "<none>", 5726af23589SJoshua M. Clulow vd_devid != NULL ? vd_devid : "<none>"); 5736af23589SJoshua M. Clulow 5746af23589SJoshua M. Clulow /* 5756af23589SJoshua M. Clulow * Remove and free any existing values. 5766af23589SJoshua M. Clulow */ 5776af23589SJoshua M. Clulow if (dvd->vd_minor != NULL) { 5786af23589SJoshua M. Clulow ddi_devid_str_free(dvd->vd_minor); 5796af23589SJoshua M. Clulow dvd->vd_minor = NULL; 5806af23589SJoshua M. Clulow } 5816af23589SJoshua M. Clulow if (dvd->vd_devid != NULL) { 5826af23589SJoshua M. Clulow ddi_devid_free(dvd->vd_devid); 5836af23589SJoshua M. Clulow dvd->vd_devid = NULL; 5846af23589SJoshua M. Clulow } 5856af23589SJoshua M. Clulow if (vd->vdev_devid != NULL) { 5866af23589SJoshua M. Clulow spa_strfree(vd->vdev_devid); 5876af23589SJoshua M. Clulow vd->vdev_devid = NULL; 5886af23589SJoshua M. Clulow } 5896af23589SJoshua M. Clulow } 5906af23589SJoshua M. Clulow 5916af23589SJoshua M. Clulow if (update) { 5926af23589SJoshua M. Clulow /* 5936af23589SJoshua M. Clulow * Install the new values. 5946af23589SJoshua M. Clulow */ 5956af23589SJoshua M. Clulow vd->vdev_devid = vd_devid; 5966af23589SJoshua M. Clulow dvd->vd_minor = minorname; 5976af23589SJoshua M. Clulow dvd->vd_devid = devid; 5986af23589SJoshua M. Clulow 5996af23589SJoshua M. Clulow } else { 6006af23589SJoshua M. Clulow if (devid != NULL) { 6016af23589SJoshua M. Clulow ddi_devid_free(devid); 6026af23589SJoshua M. Clulow } 6036af23589SJoshua M. Clulow if (minorname != NULL) { 6046af23589SJoshua M. Clulow kmem_free(minorname, strlen(minorname) + 1); 6056af23589SJoshua M. Clulow } 606fb02ae02SGeorge Wilson } 607fb02ae02SGeorge Wilson } 608fb02ae02SGeorge Wilson 6093d7072f8Seschrock /* 6103d7072f8Seschrock * Once a device is opened, verify that the physical device path (if 6113d7072f8Seschrock * available) is up to date. 6123d7072f8Seschrock */ 6133d7072f8Seschrock if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && 6143d7072f8Seschrock ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { 6150a4e9518Sgw char *physpath, *minorname; 6160a4e9518Sgw 6173d7072f8Seschrock physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 6183d7072f8Seschrock minorname = NULL; 6193d7072f8Seschrock if (ddi_dev_pathname(dev, otyp, physpath) == 0 && 6203d7072f8Seschrock ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && 6213d7072f8Seschrock (vd->vdev_physpath == NULL || 6223d7072f8Seschrock strcmp(vd->vdev_physpath, physpath) != 0)) { 6233d7072f8Seschrock if (vd->vdev_physpath) 6243d7072f8Seschrock spa_strfree(vd->vdev_physpath); 6253d7072f8Seschrock (void) strlcat(physpath, ":", MAXPATHLEN); 6263d7072f8Seschrock (void) strlcat(physpath, minorname, MAXPATHLEN); 6273d7072f8Seschrock vd->vdev_physpath = spa_strdup(physpath); 6283d7072f8Seschrock } 6293d7072f8Seschrock if (minorname) 6303d7072f8Seschrock kmem_free(minorname, strlen(minorname) + 1); 6313d7072f8Seschrock kmem_free(physpath, MAXPATHLEN); 6323d7072f8Seschrock } 6333d7072f8Seschrock 63439cddb10SJoshua M. Clulow /* 63539cddb10SJoshua M. Clulow * Register callbacks for the LDI offline event. 63639cddb10SJoshua M. Clulow */ 63739cddb10SJoshua M. Clulow if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) == 63839cddb10SJoshua M. Clulow LDI_EV_SUCCESS) { 63939cddb10SJoshua M. Clulow lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); 64039cddb10SJoshua M. Clulow list_insert_tail(&dvd->vd_ldi_cbs, lcb); 64139cddb10SJoshua M. Clulow (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, 64239cddb10SJoshua M. Clulow &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id); 64339cddb10SJoshua M. Clulow } 64439cddb10SJoshua M. Clulow 64539cddb10SJoshua M. Clulow /* 64639cddb10SJoshua M. Clulow * Register callbacks for the LDI degrade event. 64739cddb10SJoshua M. Clulow */ 64839cddb10SJoshua M. Clulow if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) == 64939cddb10SJoshua M. Clulow LDI_EV_SUCCESS) { 65039cddb10SJoshua M. Clulow lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); 65139cddb10SJoshua M. Clulow list_insert_tail(&dvd->vd_ldi_cbs, lcb); 65239cddb10SJoshua M. Clulow (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, 65339cddb10SJoshua M. Clulow &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id); 65439cddb10SJoshua M. Clulow } 655084fd14fSBrian Behlendorf 656095bcd66SGeorge Wilson skip_open: 657fa9e4066Sahrens /* 658fa9e4066Sahrens * Determine the actual size of the device. 659fa9e4066Sahrens */ 660fa9e4066Sahrens if (ldi_get_size(dvd->vd_lh, psize) != 0) { 661fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 6623ee8c80cSPavel Zakharov vdev_dbgmsg(vd, "vdev_disk_open: failed to get size"); 663be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 664fa9e4066Sahrens } 665fa9e4066Sahrens 666a5b57771SDan McDonald *max_psize = *psize; 667a5b57771SDan McDonald 668ecc2d604Sbonwick /* 669ecc2d604Sbonwick * Determine the device's minimum transfer size. 670ecc2d604Sbonwick * If the ioctl isn't supported, assume DEV_BSIZE. 671ecc2d604Sbonwick */ 672a5b57771SDan McDonald if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, 673a5b57771SDan McDonald (intptr_t)dkmext, FKIOCTL, kcred, NULL)) == 0) { 674a5b57771SDan McDonald capacity = dkmext->dki_capacity - 1; 675a5b57771SDan McDonald blksz = dkmext->dki_lbsize; 676a5b57771SDan McDonald pbsize = dkmext->dki_pbsize; 677a5b57771SDan McDonald } else if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, 678a5b57771SDan McDonald (intptr_t)dkm, FKIOCTL, kcred, NULL)) == 0) { 679a5b57771SDan McDonald VDEV_DEBUG( 680a5b57771SDan McDonald "vdev_disk_open(\"%s\"): fallback to DKIOCGMEDIAINFO\n", 681a5b57771SDan McDonald vd->vdev_path); 682a5b57771SDan McDonald capacity = dkm->dki_capacity - 1; 683a5b57771SDan McDonald blksz = dkm->dki_lbsize; 684a5b57771SDan McDonald pbsize = blksz; 685a5b57771SDan McDonald } else { 686a5b57771SDan McDonald VDEV_DEBUG("vdev_disk_open(\"%s\"): " 687a5b57771SDan McDonald "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n", 688a5b57771SDan McDonald vd->vdev_path, error); 689a5b57771SDan McDonald pbsize = DEV_BSIZE; 690a5b57771SDan McDonald } 691bef6b7d2Swebaker 692bf16b11eSMatthew Ahrens *ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1; 693bef6b7d2Swebaker 6944263d13fSGeorge Wilson if (vd->vdev_wholedisk == 1) { 6954263d13fSGeorge Wilson int wce = 1; 6964263d13fSGeorge Wilson 697a5b57771SDan McDonald if (error == 0) { 698a5b57771SDan McDonald /* 699a5b57771SDan McDonald * If we have the capability to expand, we'd have 700a5b57771SDan McDonald * found out via success from DKIOCGMEDIAINFO{,EXT}. 701a5b57771SDan McDonald * Adjust max_psize upward accordingly since we know 702a5b57771SDan McDonald * we own the whole disk now. 703a5b57771SDan McDonald */ 704c39a2aaeSGeorge Wilson *max_psize = capacity * blksz; 705a5b57771SDan McDonald } 706a5b57771SDan McDonald 7074263d13fSGeorge Wilson /* 708a5b57771SDan McDonald * Since we own the whole disk, try to enable disk write 709a5b57771SDan McDonald * caching. We ignore errors because it's OK if we can't do it. 7104263d13fSGeorge Wilson */ 7114263d13fSGeorge Wilson (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, 7124263d13fSGeorge Wilson FKIOCTL, kcred, NULL); 7134263d13fSGeorge Wilson } 7144263d13fSGeorge Wilson 715b468a217Seschrock /* 716b468a217Seschrock * Clear the nowritecache bit, so that on a vdev_reopen() we will 717b468a217Seschrock * try again. 718b468a217Seschrock */ 719b468a217Seschrock vd->vdev_nowritecache = B_FALSE; 720b468a217Seschrock 721084fd14fSBrian Behlendorf if (ldi_ioctl(dvd->vd_lh, DKIOC_CANFREE, (intptr_t)&can_free, FKIOCTL, 722084fd14fSBrian Behlendorf kcred, NULL) == 0 && can_free == 1) { 723084fd14fSBrian Behlendorf vd->vdev_has_trim = B_TRUE; 724084fd14fSBrian Behlendorf } else { 725084fd14fSBrian Behlendorf vd->vdev_has_trim = B_FALSE; 726084fd14fSBrian Behlendorf } 727084fd14fSBrian Behlendorf 728fb05b94aSJerry Jelinek if (zfs_no_trim == 1) 729fb05b94aSJerry Jelinek vd->vdev_has_trim = B_FALSE; 730fb05b94aSJerry Jelinek 731084fd14fSBrian Behlendorf /* Currently only supported for ZoL. */ 732084fd14fSBrian Behlendorf vd->vdev_has_securetrim = B_FALSE; 733084fd14fSBrian Behlendorf 73412a8814cSTom Caputi /* Inform the ZIO pipeline that we are non-rotational */ 73512a8814cSTom Caputi vd->vdev_nonrot = B_FALSE; 73612a8814cSTom Caputi if (ldi_prop_exists(dvd->vd_lh, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 73712a8814cSTom Caputi "device-solid-state")) { 73812a8814cSTom Caputi if (ldi_prop_get_int(dvd->vd_lh, 73912a8814cSTom Caputi LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 74012a8814cSTom Caputi "device-solid-state", B_FALSE) != 0) 74112a8814cSTom Caputi vd->vdev_nonrot = B_TRUE; 74212a8814cSTom Caputi } 74312a8814cSTom Caputi 744fa9e4066Sahrens return (0); 745fa9e4066Sahrens } 746fa9e4066Sahrens 747fa9e4066Sahrens static void 748fa9e4066Sahrens vdev_disk_close(vdev_t *vd) 749fa9e4066Sahrens { 750fa9e4066Sahrens vdev_disk_t *dvd = vd->vdev_tsd; 751fa9e4066Sahrens 752095bcd66SGeorge Wilson if (vd->vdev_reopening || dvd == NULL) 753fa9e4066Sahrens return; 754fa9e4066Sahrens 75539cddb10SJoshua M. Clulow if (dvd->vd_minor != NULL) { 756fa9e4066Sahrens ddi_devid_str_free(dvd->vd_minor); 75739cddb10SJoshua M. Clulow dvd->vd_minor = NULL; 75839cddb10SJoshua M. Clulow } 759fa9e4066Sahrens 76039cddb10SJoshua M. Clulow if (dvd->vd_devid != NULL) { 761fa9e4066Sahrens ddi_devid_free(dvd->vd_devid); 76239cddb10SJoshua M. Clulow dvd->vd_devid = NULL; 76339cddb10SJoshua M. Clulow } 764fa9e4066Sahrens 76539cddb10SJoshua M. Clulow if (dvd->vd_lh != NULL) { 7668ad4d6ddSJeff Bonwick (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred); 76739cddb10SJoshua M. Clulow dvd->vd_lh = NULL; 76839cddb10SJoshua M. Clulow } 769fa9e4066Sahrens 77098d1cbfeSGeorge Wilson vd->vdev_delayed_close = B_FALSE; 77139cddb10SJoshua M. Clulow vdev_disk_free(vd); 772fa9e4066Sahrens } 773fa9e4066Sahrens 774ac04831dSMike Gerdts static int 775810e43b2SBill Pijewski vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data, 776810e43b2SBill Pijewski size_t size, uint64_t offset, int flags) 777e7cbe64fSgw { 778e7cbe64fSgw buf_t *bp; 779e7cbe64fSgw int error = 0; 780e7cbe64fSgw 781e7cbe64fSgw if (vd_lh == NULL) 782be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 783e7cbe64fSgw 784e7cbe64fSgw ASSERT(flags & B_READ || flags & B_WRITE); 785e7cbe64fSgw 786e7cbe64fSgw bp = getrbuf(KM_SLEEP); 787e7cbe64fSgw bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST; 788e7cbe64fSgw bp->b_bcount = size; 789e7cbe64fSgw bp->b_un.b_addr = (void *)data; 790e7cbe64fSgw bp->b_lblkno = lbtodb(offset); 791e7cbe64fSgw bp->b_bufsize = size; 792e7cbe64fSgw 793e7cbe64fSgw error = ldi_strategy(vd_lh, bp); 794e7cbe64fSgw ASSERT(error == 0); 795e7cbe64fSgw if ((error = biowait(bp)) == 0 && bp->b_resid != 0) 796be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 797e7cbe64fSgw freerbuf(bp); 798e7cbe64fSgw 799e7cbe64fSgw return (error); 800e7cbe64fSgw } 801e7cbe64fSgw 802ac04831dSMike Gerdts static int 803ac04831dSMike Gerdts vdev_disk_dumpio(vdev_t *vd, caddr_t data, size_t size, 8041b500975SMike Gerdts uint64_t offset, uint64_t origoffset __unused, boolean_t doread, 8051b500975SMike Gerdts boolean_t isdump) 806ac04831dSMike Gerdts { 807ac04831dSMike Gerdts vdev_disk_t *dvd = vd->vdev_tsd; 808ac04831dSMike Gerdts int flags = doread ? B_READ : B_WRITE; 809ac04831dSMike Gerdts 810ac04831dSMike Gerdts /* 811ac04831dSMike Gerdts * If the vdev is closed, it's likely in the REMOVED or FAULTED state. 812ac04831dSMike Gerdts * Nothing to be done here but return failure. 813ac04831dSMike Gerdts */ 814ac04831dSMike Gerdts if (dvd == NULL || dvd->vd_ldi_offline) { 8151b500975SMike Gerdts return (SET_ERROR(ENXIO)); 816ac04831dSMike Gerdts } 817ac04831dSMike Gerdts 818ac04831dSMike Gerdts ASSERT(vd->vdev_ops == &vdev_disk_ops); 819ac04831dSMike Gerdts 820ac04831dSMike Gerdts offset += VDEV_LABEL_START_SIZE; 821ac04831dSMike Gerdts 822ac04831dSMike Gerdts /* 823ac04831dSMike Gerdts * If in the context of an active crash dump, use the ldi_dump(9F) 824ac04831dSMike Gerdts * call instead of ldi_strategy(9F) as usual. 825ac04831dSMike Gerdts */ 826ac04831dSMike Gerdts if (isdump) { 827ac04831dSMike Gerdts ASSERT3P(dvd, !=, NULL); 828ac04831dSMike Gerdts return (ldi_dump(dvd->vd_lh, data, lbtodb(offset), 829ac04831dSMike Gerdts lbtodb(size))); 830ac04831dSMike Gerdts } 831ac04831dSMike Gerdts 832ac04831dSMike Gerdts return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags)); 833ac04831dSMike Gerdts } 834ac04831dSMike Gerdts 835c62757b2SToomas Soome static int 836fa9e4066Sahrens vdev_disk_io_intr(buf_t *bp) 837fa9e4066Sahrens { 83831d7e8faSGeorge Wilson vdev_buf_t *vb = (vdev_buf_t *)bp; 83931d7e8faSGeorge Wilson zio_t *zio = vb->vb_io; 840fa9e4066Sahrens 84151ece835Seschrock /* 84251ece835Seschrock * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO. 84351ece835Seschrock * Rather than teach the rest of the stack about other error 84451ece835Seschrock * possibilities (EFAULT, etc), we normalize the error value here. 84551ece835Seschrock */ 84651ece835Seschrock zio->io_error = (geterror(bp) != 0 ? EIO : 0); 84751ece835Seschrock 84851ece835Seschrock if (zio->io_error == 0 && bp->b_resid != 0) 849be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(EIO); 850fa9e4066Sahrens 851770499e1SDan Kimmel if (zio->io_type == ZIO_TYPE_READ) { 852770499e1SDan Kimmel abd_return_buf_copy(zio->io_abd, bp->b_un.b_addr, zio->io_size); 853770499e1SDan Kimmel } else { 854770499e1SDan Kimmel abd_return_buf(zio->io_abd, bp->b_un.b_addr, zio->io_size); 855770499e1SDan Kimmel } 856770499e1SDan Kimmel 85731d7e8faSGeorge Wilson kmem_free(vb, sizeof (vdev_buf_t)); 858fa9e4066Sahrens 85997e81309SPrakash Surya zio_delay_interrupt(zio); 860c62757b2SToomas Soome return (0); 861fa9e4066Sahrens } 862fa9e4066Sahrens 863f4a72450SJeff Bonwick static void 864f4a72450SJeff Bonwick vdev_disk_ioctl_free(zio_t *zio) 865f4a72450SJeff Bonwick { 866f4a72450SJeff Bonwick kmem_free(zio->io_vsd, sizeof (struct dk_callback)); 867f4a72450SJeff Bonwick } 868f4a72450SJeff Bonwick 86922fe2c88SJonathan Adams static const zio_vsd_ops_t vdev_disk_vsd_ops = { 87022fe2c88SJonathan Adams vdev_disk_ioctl_free, 87122fe2c88SJonathan Adams zio_vsd_default_cksum_report 87222fe2c88SJonathan Adams }; 87322fe2c88SJonathan Adams 874fa9e4066Sahrens static void 875fa9e4066Sahrens vdev_disk_ioctl_done(void *zio_arg, int error) 876fa9e4066Sahrens { 877fa9e4066Sahrens zio_t *zio = zio_arg; 878fa9e4066Sahrens 879fa9e4066Sahrens zio->io_error = error; 880fa9e4066Sahrens 881e05725b1Sbonwick zio_interrupt(zio); 882fa9e4066Sahrens } 883fa9e4066Sahrens 884738f37bcSGeorge Wilson static void 885fa9e4066Sahrens vdev_disk_io_start(zio_t *zio) 886fa9e4066Sahrens { 887fa9e4066Sahrens vdev_t *vd = zio->io_vd; 888fa9e4066Sahrens vdev_disk_t *dvd = vd->vdev_tsd; 889084fd14fSBrian Behlendorf unsigned long trim_flags = 0; 89031d7e8faSGeorge Wilson vdev_buf_t *vb; 891e14bb325SJeff Bonwick struct dk_callback *dkc; 892fa9e4066Sahrens buf_t *bp; 893e14bb325SJeff Bonwick int error; 894fa9e4066Sahrens 89539cddb10SJoshua M. Clulow /* 89639cddb10SJoshua M. Clulow * If the vdev is closed, it's likely in the REMOVED or FAULTED state. 89739cddb10SJoshua M. Clulow * Nothing to be done here but return failure. 89839cddb10SJoshua M. Clulow */ 8991b500975SMike Gerdts if (dvd == NULL || dvd->vd_ldi_offline) { 90039cddb10SJoshua M. Clulow zio->io_error = ENXIO; 901738f37bcSGeorge Wilson zio_interrupt(zio); 902738f37bcSGeorge Wilson return; 90339cddb10SJoshua M. Clulow } 90439cddb10SJoshua M. Clulow 905084fd14fSBrian Behlendorf switch (zio->io_type) { 906084fd14fSBrian Behlendorf case ZIO_TYPE_IOCTL: 907fa9e4066Sahrens /* XXPOLICY */ 9080a4e9518Sgw if (!vdev_readable(vd)) { 909be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(ENXIO); 910738f37bcSGeorge Wilson zio_interrupt(zio); 911738f37bcSGeorge Wilson return; 912fa9e4066Sahrens } 913fa9e4066Sahrens 914fa9e4066Sahrens switch (zio->io_cmd) { 915fa9e4066Sahrens 916fa9e4066Sahrens case DKIOCFLUSHWRITECACHE: 917fa9e4066Sahrens 918a2eea2e1Sahrens if (zfs_nocacheflush) 919a2eea2e1Sahrens break; 920a2eea2e1Sahrens 921b468a217Seschrock if (vd->vdev_nowritecache) { 922be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(ENOTSUP); 923b468a217Seschrock break; 924b468a217Seschrock } 925b468a217Seschrock 926e14bb325SJeff Bonwick zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP); 92722fe2c88SJonathan Adams zio->io_vsd_ops = &vdev_disk_vsd_ops; 928e14bb325SJeff Bonwick 929e14bb325SJeff Bonwick dkc->dkc_callback = vdev_disk_ioctl_done; 930e14bb325SJeff Bonwick dkc->dkc_flag = FLUSH_VOLATILE; 931e14bb325SJeff Bonwick dkc->dkc_cookie = zio; 932fa9e4066Sahrens 933fa9e4066Sahrens error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, 934e14bb325SJeff Bonwick (uintptr_t)dkc, FKIOCTL, kcred, NULL); 935fa9e4066Sahrens 936fa9e4066Sahrens if (error == 0) { 937fa9e4066Sahrens /* 938fa9e4066Sahrens * The ioctl will be done asychronously, 939fa9e4066Sahrens * and will call vdev_disk_ioctl_done() 940fa9e4066Sahrens * upon completion. 941fa9e4066Sahrens */ 942738f37bcSGeorge Wilson return; 943e05725b1Sbonwick } 944e05725b1Sbonwick 945fa9e4066Sahrens zio->io_error = error; 946b468a217Seschrock 947fa9e4066Sahrens break; 948fa9e4066Sahrens 949fa9e4066Sahrens default: 950be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(ENOTSUP); 951fa9e4066Sahrens } 952fa9e4066Sahrens 953738f37bcSGeorge Wilson zio_execute(zio); 954738f37bcSGeorge Wilson return; 955084fd14fSBrian Behlendorf 956084fd14fSBrian Behlendorf case ZIO_TYPE_TRIM: 957fb05b94aSJerry Jelinek if (zfs_no_trim == 1 || !vd->vdev_has_trim) { 958084fd14fSBrian Behlendorf zio->io_error = SET_ERROR(ENOTSUP); 959084fd14fSBrian Behlendorf zio_execute(zio); 960084fd14fSBrian Behlendorf return; 961084fd14fSBrian Behlendorf } 962084fd14fSBrian Behlendorf /* Currently only supported on ZoL. */ 963084fd14fSBrian Behlendorf ASSERT0(zio->io_trim_flags & ZIO_TRIM_SECURE); 964084fd14fSBrian Behlendorf 965084fd14fSBrian Behlendorf /* dkioc_free_list_t is already declared to hold one entry */ 966084fd14fSBrian Behlendorf dkioc_free_list_t dfl; 967084fd14fSBrian Behlendorf dfl.dfl_flags = 0; 968084fd14fSBrian Behlendorf dfl.dfl_num_exts = 1; 969d0562c10SJerry Jelinek dfl.dfl_offset = 0; 970084fd14fSBrian Behlendorf dfl.dfl_exts[0].dfle_start = zio->io_offset; 971084fd14fSBrian Behlendorf dfl.dfl_exts[0].dfle_length = zio->io_size; 972084fd14fSBrian Behlendorf 973084fd14fSBrian Behlendorf zio->io_error = ldi_ioctl(dvd->vd_lh, DKIOCFREE, 974084fd14fSBrian Behlendorf (uintptr_t)&dfl, FKIOCTL, kcred, NULL); 975084fd14fSBrian Behlendorf 976084fd14fSBrian Behlendorf if (zio->io_error == ENOTSUP || zio->io_error == ENOTTY) { 977084fd14fSBrian Behlendorf /* 978084fd14fSBrian Behlendorf * The device must have changed and now TRIM is 979084fd14fSBrian Behlendorf * no longer supported. 980084fd14fSBrian Behlendorf */ 981084fd14fSBrian Behlendorf vd->vdev_has_trim = B_FALSE; 982084fd14fSBrian Behlendorf } 983084fd14fSBrian Behlendorf 984084fd14fSBrian Behlendorf zio_interrupt(zio); 985084fd14fSBrian Behlendorf return; 986fa9e4066Sahrens } 987fa9e4066Sahrens 988f693d300SSteven Hartland ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); 98997e81309SPrakash Surya zio->io_target_timestamp = zio_handle_io_delay(zio); 990f693d300SSteven Hartland 99131d7e8faSGeorge Wilson vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP); 992fa9e4066Sahrens 99331d7e8faSGeorge Wilson vb->vb_io = zio; 99431d7e8faSGeorge Wilson bp = &vb->vb_buf; 995fa9e4066Sahrens 996fa9e4066Sahrens bioinit(bp); 997e14bb325SJeff Bonwick bp->b_flags = B_BUSY | B_NOCACHE | 9988956713aSEric Schrock (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); 9998956713aSEric Schrock if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) 10008956713aSEric Schrock bp->b_flags |= B_FAILFAST; 1001fa9e4066Sahrens bp->b_bcount = zio->io_size; 1002770499e1SDan Kimmel 1003770499e1SDan Kimmel if (zio->io_type == ZIO_TYPE_READ) { 1004770499e1SDan Kimmel bp->b_un.b_addr = 1005770499e1SDan Kimmel abd_borrow_buf(zio->io_abd, zio->io_size); 1006770499e1SDan Kimmel } else { 1007770499e1SDan Kimmel bp->b_un.b_addr = 1008770499e1SDan Kimmel abd_borrow_buf_copy(zio->io_abd, zio->io_size); 1009770499e1SDan Kimmel } 1010770499e1SDan Kimmel 1011fa9e4066Sahrens bp->b_lblkno = lbtodb(zio->io_offset); 1012fa9e4066Sahrens bp->b_bufsize = zio->io_size; 1013c62757b2SToomas Soome bp->b_iodone = vdev_disk_io_intr; 1014fa9e4066Sahrens 1015fa88c70fSJerry Jelinek /* 1016fa88c70fSJerry Jelinek * In general we would expect ldi_strategy() to return non-zero only 1017fa88c70fSJerry Jelinek * because of programming errors, but we've also seen this fail shortly 1018fa88c70fSJerry Jelinek * after a disk dies. 1019fa88c70fSJerry Jelinek */ 1020fa88c70fSJerry Jelinek if (ldi_strategy(dvd->vd_lh, bp) != 0) { 1021fa88c70fSJerry Jelinek zio->io_error = ENXIO; 1022fa88c70fSJerry Jelinek zio_interrupt(zio); 1023fa88c70fSJerry Jelinek } 1024fa9e4066Sahrens } 1025fa9e4066Sahrens 1026e14bb325SJeff Bonwick static void 1027fa9e4066Sahrens vdev_disk_io_done(zio_t *zio) 1028fa9e4066Sahrens { 1029e14bb325SJeff Bonwick vdev_t *vd = zio->io_vd; 1030ea8dc4b6Seschrock 10313d7072f8Seschrock /* 10323d7072f8Seschrock * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if 10333d7072f8Seschrock * the device has been removed. If this is the case, then we trigger an 10340a4e9518Sgw * asynchronous removal of the device. Otherwise, probe the device and 10351f7ad2e1Sgw * make sure it's still accessible. 10363d7072f8Seschrock */ 10371d713200SEric Schrock if (zio->io_error == EIO && !vd->vdev_remove_wanted) { 10380a4e9518Sgw vdev_disk_t *dvd = vd->vdev_tsd; 1039e14bb325SJeff Bonwick int state = DKIO_NONE; 10400a4e9518Sgw 1041e14bb325SJeff Bonwick if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state, 1042e14bb325SJeff Bonwick FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) { 10431d713200SEric Schrock /* 10441d713200SEric Schrock * We post the resource as soon as possible, instead of 10451d713200SEric Schrock * when the async removal actually happens, because the 10461d713200SEric Schrock * DE is using this information to discard previous I/O 10471d713200SEric Schrock * errors. 10481d713200SEric Schrock */ 10491d713200SEric Schrock zfs_post_remove(zio->io_spa, vd); 10503d7072f8Seschrock vd->vdev_remove_wanted = B_TRUE; 10513d7072f8Seschrock spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); 105298d1cbfeSGeorge Wilson } else if (!vd->vdev_delayed_close) { 105398d1cbfeSGeorge Wilson vd->vdev_delayed_close = B_TRUE; 10543d7072f8Seschrock } 10553d7072f8Seschrock } 1056fa9e4066Sahrens } 1057fa9e4066Sahrens 1058fa9e4066Sahrens vdev_ops_t vdev_disk_ops = { 1059a3874b8bSToomas Soome .vdev_op_open = vdev_disk_open, 1060a3874b8bSToomas Soome .vdev_op_close = vdev_disk_close, 1061a3874b8bSToomas Soome .vdev_op_asize = vdev_default_asize, 1062a3874b8bSToomas Soome .vdev_op_io_start = vdev_disk_io_start, 1063a3874b8bSToomas Soome .vdev_op_io_done = vdev_disk_io_done, 1064a3874b8bSToomas Soome .vdev_op_state_change = NULL, 1065a3874b8bSToomas Soome .vdev_op_need_resilver = NULL, 1066a3874b8bSToomas Soome .vdev_op_hold = vdev_disk_hold, 1067a3874b8bSToomas Soome .vdev_op_rele = vdev_disk_rele, 1068a3874b8bSToomas Soome .vdev_op_remap = NULL, 1069a3874b8bSToomas Soome .vdev_op_xlate = vdev_default_xlate, 1070ac04831dSMike Gerdts .vdev_op_dumpio = vdev_disk_dumpio, 1071a3874b8bSToomas Soome .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ 1072a3874b8bSToomas Soome .vdev_op_leaf = B_TRUE /* leaf vdev */ 1073fa9e4066Sahrens }; 1074e7cbe64fSgw 1075e7cbe64fSgw /* 1076051aabe6Staylor * Given the root disk device devid or pathname, read the label from 1077051aabe6Staylor * the device, and construct a configuration nvlist. 1078e7cbe64fSgw */ 1079f940fbb1SLin Ling int 1080*30c304d9SJoshua M. Clulow vdev_disk_read_rootlabel(const char *devpath, const char *devid, 1081*30c304d9SJoshua M. Clulow nvlist_t **config) 1082e7cbe64fSgw { 1083e7cbe64fSgw ldi_handle_t vd_lh; 1084e7cbe64fSgw vdev_label_t *label; 1085e7cbe64fSgw uint64_t s, size; 1086e7cbe64fSgw int l; 1087051aabe6Staylor ddi_devid_t tmpdevid; 1088f4565e39SLin Ling int error = -1; 1089051aabe6Staylor char *minor_name; 1090e7cbe64fSgw 1091e7cbe64fSgw /* 1092e7cbe64fSgw * Read the device label and build the nvlist. 1093e7cbe64fSgw */ 1094*30c304d9SJoshua M. Clulow if (devid != NULL && ddi_devid_str_decode((char *)devid, &tmpdevid, 1095051aabe6Staylor &minor_name) == 0) { 1096051aabe6Staylor error = ldi_open_by_devid(tmpdevid, minor_name, 10978ad4d6ddSJeff Bonwick FREAD, kcred, &vd_lh, zfs_li); 1098051aabe6Staylor ddi_devid_free(tmpdevid); 1099051aabe6Staylor ddi_devid_str_free(minor_name); 1100051aabe6Staylor } 1101051aabe6Staylor 1102*30c304d9SJoshua M. Clulow if (error != 0 && (error = ldi_open_by_name((char *)devpath, FREAD, 1103*30c304d9SJoshua M. Clulow kcred, &vd_lh, zfs_li)) != 0) { 1104f940fbb1SLin Ling return (error); 1105*30c304d9SJoshua M. Clulow } 1106e7cbe64fSgw 1107bf82a41bSeschrock if (ldi_get_size(vd_lh, &s)) { 1108bf82a41bSeschrock (void) ldi_close(vd_lh, FREAD, kcred); 1109be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 1110bf82a41bSeschrock } 1111e7cbe64fSgw 1112e7cbe64fSgw size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t); 1113e7cbe64fSgw label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP); 1114e7cbe64fSgw 111517f1e64aSEric Taylor *config = NULL; 1116e7cbe64fSgw for (l = 0; l < VDEV_LABELS; l++) { 1117e7cbe64fSgw uint64_t offset, state, txg = 0; 1118e7cbe64fSgw 1119e7cbe64fSgw /* read vdev label */ 1120e7cbe64fSgw offset = vdev_label_offset(size, l, 0); 1121810e43b2SBill Pijewski if (vdev_disk_ldi_physio(vd_lh, (caddr_t)label, 11222264ca7fSLin Ling VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0) 1123e7cbe64fSgw continue; 1124e7cbe64fSgw 1125e7cbe64fSgw if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, 1126f940fbb1SLin Ling sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) { 1127f940fbb1SLin Ling *config = NULL; 1128e7cbe64fSgw continue; 1129e7cbe64fSgw } 1130e7cbe64fSgw 1131f940fbb1SLin Ling if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 1132e7cbe64fSgw &state) != 0 || state >= POOL_STATE_DESTROYED) { 1133f940fbb1SLin Ling nvlist_free(*config); 1134f940fbb1SLin Ling *config = NULL; 1135e7cbe64fSgw continue; 1136e7cbe64fSgw } 1137e7cbe64fSgw 1138f940fbb1SLin Ling if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 1139e7cbe64fSgw &txg) != 0 || txg == 0) { 1140f940fbb1SLin Ling nvlist_free(*config); 1141f940fbb1SLin Ling *config = NULL; 1142e7cbe64fSgw continue; 1143e7cbe64fSgw } 1144e7cbe64fSgw 1145e7cbe64fSgw break; 1146e7cbe64fSgw } 1147e7cbe64fSgw 1148e7cbe64fSgw kmem_free(label, sizeof (vdev_label_t)); 1149bf82a41bSeschrock (void) ldi_close(vd_lh, FREAD, kcred); 115017f1e64aSEric Taylor if (*config == NULL) 1151be6fd75aSMatthew Ahrens error = SET_ERROR(EIDRM); 1152bf82a41bSeschrock 1153f940fbb1SLin Ling return (error); 1154e7cbe64fSgw } 1155*30c304d9SJoshua M. Clulow 1156*30c304d9SJoshua M. Clulow struct veb { 1157*30c304d9SJoshua M. Clulow list_t veb_ents; 1158*30c304d9SJoshua M. Clulow boolean_t veb_scanned; 1159*30c304d9SJoshua M. Clulow }; 1160*30c304d9SJoshua M. Clulow 1161*30c304d9SJoshua M. Clulow struct veb_ent { 1162*30c304d9SJoshua M. Clulow uint64_t vebe_pool_guid; 1163*30c304d9SJoshua M. Clulow uint64_t vebe_vdev_guid; 1164*30c304d9SJoshua M. Clulow 1165*30c304d9SJoshua M. Clulow char *vebe_devpath; 1166*30c304d9SJoshua M. Clulow 1167*30c304d9SJoshua M. Clulow list_node_t vebe_link; 1168*30c304d9SJoshua M. Clulow }; 1169*30c304d9SJoshua M. Clulow 1170*30c304d9SJoshua M. Clulow static kmutex_t veb_lock; 1171*30c304d9SJoshua M. Clulow static struct veb *veb; 1172*30c304d9SJoshua M. Clulow 1173*30c304d9SJoshua M. Clulow static int 1174*30c304d9SJoshua M. Clulow vdev_disk_preroot_scan_walk(const char *devpath, void *arg) 1175*30c304d9SJoshua M. Clulow { 1176*30c304d9SJoshua M. Clulow int r; 1177*30c304d9SJoshua M. Clulow nvlist_t *cfg = NULL; 1178*30c304d9SJoshua M. Clulow uint64_t pguid = 0, vguid = 0; 1179*30c304d9SJoshua M. Clulow 1180*30c304d9SJoshua M. Clulow /* 1181*30c304d9SJoshua M. Clulow * Attempt to read the label from this block device. 1182*30c304d9SJoshua M. Clulow */ 1183*30c304d9SJoshua M. Clulow if ((r = vdev_disk_read_rootlabel(devpath, NULL, &cfg)) != 0) { 1184*30c304d9SJoshua M. Clulow /* 1185*30c304d9SJoshua M. Clulow * Many of the available block devices will represent slices or 1186*30c304d9SJoshua M. Clulow * partitions of disks, or may represent disks that are not at 1187*30c304d9SJoshua M. Clulow * all initialised with ZFS. As this is a best effort 1188*30c304d9SJoshua M. Clulow * mechanism to locate an alternate path to a particular vdev, 1189*30c304d9SJoshua M. Clulow * we will ignore any failures and keep scanning. 1190*30c304d9SJoshua M. Clulow */ 1191*30c304d9SJoshua M. Clulow return (PREROOT_WALK_BLOCK_DEVICES_NEXT); 1192*30c304d9SJoshua M. Clulow } 1193*30c304d9SJoshua M. Clulow 1194*30c304d9SJoshua M. Clulow /* 1195*30c304d9SJoshua M. Clulow * Determine the pool and vdev GUID read from the label for this 1196*30c304d9SJoshua M. Clulow * device. Both values must be present and have a non-zero value. 1197*30c304d9SJoshua M. Clulow */ 1198*30c304d9SJoshua M. Clulow if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pguid) != 0 || 1199*30c304d9SJoshua M. Clulow nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_GUID, &vguid) != 0 || 1200*30c304d9SJoshua M. Clulow pguid == 0 || vguid == 0) { 1201*30c304d9SJoshua M. Clulow /* 1202*30c304d9SJoshua M. Clulow * This label was not complete. 1203*30c304d9SJoshua M. Clulow */ 1204*30c304d9SJoshua M. Clulow goto out; 1205*30c304d9SJoshua M. Clulow } 1206*30c304d9SJoshua M. Clulow 1207*30c304d9SJoshua M. Clulow /* 1208*30c304d9SJoshua M. Clulow * Keep track of all of the GUID-to-devpath mappings we find so that 1209*30c304d9SJoshua M. Clulow * vdev_disk_preroot_lookup() can search them. 1210*30c304d9SJoshua M. Clulow */ 1211*30c304d9SJoshua M. Clulow struct veb_ent *vebe = kmem_zalloc(sizeof (*vebe), KM_SLEEP); 1212*30c304d9SJoshua M. Clulow vebe->vebe_pool_guid = pguid; 1213*30c304d9SJoshua M. Clulow vebe->vebe_vdev_guid = vguid; 1214*30c304d9SJoshua M. Clulow vebe->vebe_devpath = spa_strdup(devpath); 1215*30c304d9SJoshua M. Clulow 1216*30c304d9SJoshua M. Clulow list_insert_tail(&veb->veb_ents, vebe); 1217*30c304d9SJoshua M. Clulow 1218*30c304d9SJoshua M. Clulow out: 1219*30c304d9SJoshua M. Clulow nvlist_free(cfg); 1220*30c304d9SJoshua M. Clulow return (PREROOT_WALK_BLOCK_DEVICES_NEXT); 1221*30c304d9SJoshua M. Clulow } 1222*30c304d9SJoshua M. Clulow 1223*30c304d9SJoshua M. Clulow const char * 1224*30c304d9SJoshua M. Clulow vdev_disk_preroot_lookup(uint64_t pool_guid, uint64_t vdev_guid) 1225*30c304d9SJoshua M. Clulow { 1226*30c304d9SJoshua M. Clulow if (pool_guid == 0 || vdev_guid == 0) { 1227*30c304d9SJoshua M. Clulow /* 1228*30c304d9SJoshua M. Clulow * If we aren't provided both a pool and a vdev GUID, we cannot 1229*30c304d9SJoshua M. Clulow * perform a lookup. 1230*30c304d9SJoshua M. Clulow */ 1231*30c304d9SJoshua M. Clulow return (NULL); 1232*30c304d9SJoshua M. Clulow } 1233*30c304d9SJoshua M. Clulow 1234*30c304d9SJoshua M. Clulow mutex_enter(&veb_lock); 1235*30c304d9SJoshua M. Clulow if (veb == NULL) { 1236*30c304d9SJoshua M. Clulow /* 1237*30c304d9SJoshua M. Clulow * If vdev_disk_preroot_fini() has been called already, there 1238*30c304d9SJoshua M. Clulow * is nothing we can do. 1239*30c304d9SJoshua M. Clulow */ 1240*30c304d9SJoshua M. Clulow mutex_exit(&veb_lock); 1241*30c304d9SJoshua M. Clulow return (NULL); 1242*30c304d9SJoshua M. Clulow } 1243*30c304d9SJoshua M. Clulow 1244*30c304d9SJoshua M. Clulow /* 1245*30c304d9SJoshua M. Clulow * We want to perform at most one scan of all block devices per boot. 1246*30c304d9SJoshua M. Clulow */ 1247*30c304d9SJoshua M. Clulow if (!veb->veb_scanned) { 1248*30c304d9SJoshua M. Clulow cmn_err(CE_NOTE, "Performing full ZFS device scan!"); 1249*30c304d9SJoshua M. Clulow 1250*30c304d9SJoshua M. Clulow preroot_walk_block_devices(vdev_disk_preroot_scan_walk, NULL); 1251*30c304d9SJoshua M. Clulow 1252*30c304d9SJoshua M. Clulow veb->veb_scanned = B_TRUE; 1253*30c304d9SJoshua M. Clulow } 1254*30c304d9SJoshua M. Clulow 1255*30c304d9SJoshua M. Clulow const char *path = NULL; 1256*30c304d9SJoshua M. Clulow for (struct veb_ent *vebe = list_head(&veb->veb_ents); vebe != NULL; 1257*30c304d9SJoshua M. Clulow vebe = list_next(&veb->veb_ents, vebe)) { 1258*30c304d9SJoshua M. Clulow if (vebe->vebe_pool_guid == pool_guid && 1259*30c304d9SJoshua M. Clulow vebe->vebe_vdev_guid == vdev_guid) { 1260*30c304d9SJoshua M. Clulow path = vebe->vebe_devpath; 1261*30c304d9SJoshua M. Clulow break; 1262*30c304d9SJoshua M. Clulow } 1263*30c304d9SJoshua M. Clulow } 1264*30c304d9SJoshua M. Clulow 1265*30c304d9SJoshua M. Clulow mutex_exit(&veb_lock); 1266*30c304d9SJoshua M. Clulow 1267*30c304d9SJoshua M. Clulow return (path); 1268*30c304d9SJoshua M. Clulow } 1269*30c304d9SJoshua M. Clulow 1270*30c304d9SJoshua M. Clulow void 1271*30c304d9SJoshua M. Clulow vdev_disk_preroot_init(void) 1272*30c304d9SJoshua M. Clulow { 1273*30c304d9SJoshua M. Clulow mutex_init(&veb_lock, NULL, MUTEX_DEFAULT, NULL); 1274*30c304d9SJoshua M. Clulow 1275*30c304d9SJoshua M. Clulow VERIFY3P(veb, ==, NULL); 1276*30c304d9SJoshua M. Clulow veb = kmem_zalloc(sizeof (*veb), KM_SLEEP); 1277*30c304d9SJoshua M. Clulow list_create(&veb->veb_ents, sizeof (struct veb_ent), 1278*30c304d9SJoshua M. Clulow offsetof(struct veb_ent, vebe_link)); 1279*30c304d9SJoshua M. Clulow veb->veb_scanned = B_FALSE; 1280*30c304d9SJoshua M. Clulow } 1281*30c304d9SJoshua M. Clulow 1282*30c304d9SJoshua M. Clulow void 1283*30c304d9SJoshua M. Clulow vdev_disk_preroot_fini(void) 1284*30c304d9SJoshua M. Clulow { 1285*30c304d9SJoshua M. Clulow mutex_enter(&veb_lock); 1286*30c304d9SJoshua M. Clulow 1287*30c304d9SJoshua M. Clulow if (veb != NULL) { 1288*30c304d9SJoshua M. Clulow while (!list_is_empty(&veb->veb_ents)) { 1289*30c304d9SJoshua M. Clulow struct veb_ent *vebe = list_remove_head(&veb->veb_ents); 1290*30c304d9SJoshua M. Clulow 1291*30c304d9SJoshua M. Clulow spa_strfree(vebe->vebe_devpath); 1292*30c304d9SJoshua M. Clulow 1293*30c304d9SJoshua M. Clulow kmem_free(vebe, sizeof (*vebe)); 1294*30c304d9SJoshua M. Clulow } 1295*30c304d9SJoshua M. Clulow 1296*30c304d9SJoshua M. Clulow kmem_free(veb, sizeof (*veb)); 1297*30c304d9SJoshua M. Clulow veb = NULL; 1298*30c304d9SJoshua M. Clulow } 1299*30c304d9SJoshua M. Clulow 1300*30c304d9SJoshua M. Clulow mutex_exit(&veb_lock); 1301*30c304d9SJoshua M. Clulow } 1302