1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22f80ce222SChris Kirby * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23b77b9231SDan McDonald * 24b77b9231SDan McDonald * Portions Copyright 2010 Robert Milkowski 25b77b9231SDan McDonald * 26b77b9231SDan McDonald * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 277802d7bfSMatthew Ahrens * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 28810e43b2SBill Pijewski * Copyright (c) 2013, Joyent, Inc. All rights reserved. 29fa9e4066Sahrens */ 30fa9e4066Sahrens 31fa9e4066Sahrens /* 32fa9e4066Sahrens * ZFS volume emulation driver. 33fa9e4066Sahrens * 34fa9e4066Sahrens * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. 35fa9e4066Sahrens * Volumes are accessed through the symbolic links named: 36fa9e4066Sahrens * 37fa9e4066Sahrens * /dev/zvol/dsk/<pool_name>/<dataset_name> 38fa9e4066Sahrens * /dev/zvol/rdsk/<pool_name>/<dataset_name> 39fa9e4066Sahrens * 40681d9761SEric Taylor * These links are created by the /dev filesystem (sdev_zvolops.c). 41fa9e4066Sahrens * Volumes are persistent through reboot. No user command needs to be 42fa9e4066Sahrens * run before opening and using a device. 43fa9e4066Sahrens */ 44fa9e4066Sahrens 45fa9e4066Sahrens #include <sys/types.h> 46fa9e4066Sahrens #include <sys/param.h> 47fa9e4066Sahrens #include <sys/errno.h> 48fa9e4066Sahrens #include <sys/uio.h> 49fa9e4066Sahrens #include <sys/buf.h> 50fa9e4066Sahrens #include <sys/modctl.h> 51fa9e4066Sahrens #include <sys/open.h> 52fa9e4066Sahrens #include <sys/kmem.h> 53fa9e4066Sahrens #include <sys/conf.h> 54fa9e4066Sahrens #include <sys/cmn_err.h> 55fa9e4066Sahrens #include <sys/stat.h> 56fa9e4066Sahrens #include <sys/zap.h> 57fa9e4066Sahrens #include <sys/spa.h> 58810e43b2SBill Pijewski #include <sys/spa_impl.h> 59fa9e4066Sahrens #include <sys/zio.h> 60e7cbe64fSgw #include <sys/dmu_traverse.h> 61e7cbe64fSgw #include <sys/dnode.h> 62e7cbe64fSgw #include <sys/dsl_dataset.h> 63fa9e4066Sahrens #include <sys/dsl_prop.h> 64fa9e4066Sahrens #include <sys/dkio.h> 65fa9e4066Sahrens #include <sys/efi_partition.h> 66fa9e4066Sahrens #include <sys/byteorder.h> 67fa9e4066Sahrens #include <sys/pathname.h> 68fa9e4066Sahrens #include <sys/ddi.h> 69fa9e4066Sahrens #include <sys/sunddi.h> 70fa9e4066Sahrens #include <sys/crc32.h> 71fa9e4066Sahrens #include <sys/dirent.h> 72fa9e4066Sahrens #include <sys/policy.h> 73fa9e4066Sahrens #include <sys/fs/zfs.h> 74fa9e4066Sahrens #include <sys/zfs_ioctl.h> 75fa9e4066Sahrens #include <sys/mkdev.h> 7622ac5be4Sperrin #include <sys/zil.h> 77c5c6ffa0Smaybee #include <sys/refcount.h> 78c2e6a7d6Sperrin #include <sys/zfs_znode.h> 79c2e6a7d6Sperrin #include <sys/zfs_rlock.h> 80e7cbe64fSgw #include <sys/vdev_disk.h> 81e7cbe64fSgw #include <sys/vdev_impl.h> 82810e43b2SBill Pijewski #include <sys/vdev_raidz.h> 83e7cbe64fSgw #include <sys/zvol.h> 84e7cbe64fSgw #include <sys/dumphdr.h> 851209a471SNeil Perrin #include <sys/zil_impl.h> 8680901aeaSGeorge Wilson #include <sys/dbuf.h> 87810e43b2SBill Pijewski #include <sys/dmu_tx.h> 88810e43b2SBill Pijewski #include <sys/zfeature.h> 89810e43b2SBill Pijewski #include <sys/zio_checksum.h> 90fa9e4066Sahrens 91fa9e4066Sahrens #include "zfs_namecheck.h" 92fa9e4066Sahrens 93c99e4bdcSChris Kirby void *zfsdev_state; 94503ad85cSMatthew Ahrens static char *zvol_tag = "zvol_tag"; 95fa9e4066Sahrens 96e7cbe64fSgw #define ZVOL_DUMPSIZE "dumpsize" 97e7cbe64fSgw 98fa9e4066Sahrens /* 99c99e4bdcSChris Kirby * This lock protects the zfsdev_state structure from being modified 100fa9e4066Sahrens * while it's being used, e.g. an open that comes in before a create 101fa9e4066Sahrens * finishes. It also protects temporary opens of the dataset so that, 102fa9e4066Sahrens * e.g., an open doesn't get a spurious EBUSY. 103fa9e4066Sahrens */ 104c99e4bdcSChris Kirby kmutex_t zfsdev_state_lock; 105fa9e4066Sahrens static uint32_t zvol_minors; 106fa9e4066Sahrens 107e7cbe64fSgw typedef struct zvol_extent { 10888b7b0f2SMatthew Ahrens list_node_t ze_node; 109e7cbe64fSgw dva_t ze_dva; /* dva associated with this extent */ 11088b7b0f2SMatthew Ahrens uint64_t ze_nblks; /* number of blocks in extent */ 111e7cbe64fSgw } zvol_extent_t; 112e7cbe64fSgw 113fa9e4066Sahrens /* 114fa9e4066Sahrens * The in-core state of each volume. 115fa9e4066Sahrens */ 116fa9e4066Sahrens typedef struct zvol_state { 117fa9e4066Sahrens char zv_name[MAXPATHLEN]; /* pool/dd name */ 118fa9e4066Sahrens uint64_t zv_volsize; /* amount of space we advertise */ 11967bd71c6Sperrin uint64_t zv_volblocksize; /* volume block size */ 120fa9e4066Sahrens minor_t zv_minor; /* minor number */ 121fa9e4066Sahrens uint8_t zv_min_bs; /* minimum addressable block shift */ 122701f66c4SEric Taylor uint8_t zv_flags; /* readonly, dumpified, etc. */ 123fa9e4066Sahrens objset_t *zv_objset; /* objset handle */ 124fa9e4066Sahrens uint32_t zv_open_count[OTYPCNT]; /* open counts */ 125fa9e4066Sahrens uint32_t zv_total_opens; /* total open count */ 12622ac5be4Sperrin zilog_t *zv_zilog; /* ZIL handle */ 12788b7b0f2SMatthew Ahrens list_t zv_extents; /* List of extents for dump */ 128c2e6a7d6Sperrin znode_t zv_znode; /* for range locking */ 12994d1a210STim Haley dmu_buf_t *zv_dbuf; /* bonus handle */ 130fa9e4066Sahrens } zvol_state_t; 131fa9e4066Sahrens 132e7cbe64fSgw /* 133e7cbe64fSgw * zvol specific flags 134e7cbe64fSgw */ 135e7cbe64fSgw #define ZVOL_RDONLY 0x1 136e7cbe64fSgw #define ZVOL_DUMPIFIED 0x2 137c7f714e2SEric Taylor #define ZVOL_EXCL 0x4 138701f66c4SEric Taylor #define ZVOL_WCE 0x8 139e7cbe64fSgw 14067bd71c6Sperrin /* 14167bd71c6Sperrin * zvol maximum transfer in one DMU tx. 14267bd71c6Sperrin */ 14367bd71c6Sperrin int zvol_maxphys = DMU_MAX_ACCESS/2; 14467bd71c6Sperrin 145893c83baSGeorge Wilson /* 146893c83baSGeorge Wilson * Toggle unmap functionality. 147893c83baSGeorge Wilson */ 148893c83baSGeorge Wilson boolean_t zvol_unmap_enabled = B_TRUE; 149893c83baSGeorge Wilson 15092241e0bSTom Erickson extern int zfs_set_prop_nvlist(const char *, zprop_source_t, 1514445fffbSMatthew Ahrens nvlist_t *, nvlist_t *); 152681d9761SEric Taylor static int zvol_remove_zv(zvol_state_t *); 153feb08c6bSbillm static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); 154e7cbe64fSgw static int zvol_dumpify(zvol_state_t *zv); 155e7cbe64fSgw static int zvol_dump_fini(zvol_state_t *zv); 156e7cbe64fSgw static int zvol_dump_init(zvol_state_t *zv, boolean_t resize); 15767bd71c6Sperrin 158fa9e4066Sahrens static void 159c61ea566SGeorge Wilson zvol_size_changed(zvol_state_t *zv, uint64_t volsize) 160fa9e4066Sahrens { 161c61ea566SGeorge Wilson dev_t dev = makedevice(ddi_driver_major(zfs_dip), zv->zv_minor); 162fa9e4066Sahrens 163c61ea566SGeorge Wilson zv->zv_volsize = volsize; 164fa9e4066Sahrens VERIFY(ddi_prop_update_int64(dev, zfs_dip, 165681d9761SEric Taylor "Size", volsize) == DDI_SUCCESS); 166fa9e4066Sahrens VERIFY(ddi_prop_update_int64(dev, zfs_dip, 167681d9761SEric Taylor "Nblocks", lbtodb(volsize)) == DDI_SUCCESS); 168e7cbe64fSgw 169e7cbe64fSgw /* Notify specfs to invalidate the cached size */ 170e7cbe64fSgw spec_size_invalidate(dev, VBLK); 171e7cbe64fSgw spec_size_invalidate(dev, VCHR); 172fa9e4066Sahrens } 173fa9e4066Sahrens 174fa9e4066Sahrens int 175e9dbad6fSeschrock zvol_check_volsize(uint64_t volsize, uint64_t blocksize) 176fa9e4066Sahrens { 177e9dbad6fSeschrock if (volsize == 0) 178be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 179fa9e4066Sahrens 180e9dbad6fSeschrock if (volsize % blocksize != 0) 181be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1825c5460e9Seschrock 183fa9e4066Sahrens #ifdef _ILP32 184e9dbad6fSeschrock if (volsize - 1 > SPEC_MAXOFFSET_T) 185be6fd75aSMatthew Ahrens return (SET_ERROR(EOVERFLOW)); 186fa9e4066Sahrens #endif 187fa9e4066Sahrens return (0); 188fa9e4066Sahrens } 189fa9e4066Sahrens 190fa9e4066Sahrens int 191e9dbad6fSeschrock zvol_check_volblocksize(uint64_t volblocksize) 192fa9e4066Sahrens { 193e9dbad6fSeschrock if (volblocksize < SPA_MINBLOCKSIZE || 194*b5152584SMatthew Ahrens volblocksize > SPA_OLD_MAXBLOCKSIZE || 195e9dbad6fSeschrock !ISP2(volblocksize)) 196be6fd75aSMatthew Ahrens return (SET_ERROR(EDOM)); 197fa9e4066Sahrens 198fa9e4066Sahrens return (0); 199fa9e4066Sahrens } 200fa9e4066Sahrens 201fa9e4066Sahrens int 202a2eea2e1Sahrens zvol_get_stats(objset_t *os, nvlist_t *nv) 203fa9e4066Sahrens { 204fa9e4066Sahrens int error; 205fa9e4066Sahrens dmu_object_info_t doi; 206a2eea2e1Sahrens uint64_t val; 207fa9e4066Sahrens 208a2eea2e1Sahrens error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); 209fa9e4066Sahrens if (error) 210fa9e4066Sahrens return (error); 211fa9e4066Sahrens 212a2eea2e1Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); 213a2eea2e1Sahrens 214fa9e4066Sahrens error = dmu_object_info(os, ZVOL_OBJ, &doi); 215fa9e4066Sahrens 216a2eea2e1Sahrens if (error == 0) { 217a2eea2e1Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, 218a2eea2e1Sahrens doi.doi_data_block_size); 219a2eea2e1Sahrens } 220fa9e4066Sahrens 221fa9e4066Sahrens return (error); 222fa9e4066Sahrens } 223fa9e4066Sahrens 224fa9e4066Sahrens static zvol_state_t * 225e9dbad6fSeschrock zvol_minor_lookup(const char *name) 226fa9e4066Sahrens { 227fa9e4066Sahrens minor_t minor; 228fa9e4066Sahrens zvol_state_t *zv; 229fa9e4066Sahrens 230c99e4bdcSChris Kirby ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 231fa9e4066Sahrens 232c99e4bdcSChris Kirby for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) { 233c99e4bdcSChris Kirby zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 234fa9e4066Sahrens if (zv == NULL) 235fa9e4066Sahrens continue; 236fa9e4066Sahrens if (strcmp(zv->zv_name, name) == 0) 237f80ce222SChris Kirby return (zv); 238fa9e4066Sahrens } 239fa9e4066Sahrens 240f80ce222SChris Kirby return (NULL); 241fa9e4066Sahrens } 242fa9e4066Sahrens 243e7cbe64fSgw /* extent mapping arg */ 244e7cbe64fSgw struct maparg { 24588b7b0f2SMatthew Ahrens zvol_state_t *ma_zv; 24688b7b0f2SMatthew Ahrens uint64_t ma_blks; 247e7cbe64fSgw }; 248e7cbe64fSgw 249e7cbe64fSgw /*ARGSUSED*/ 250e7cbe64fSgw static int 2511b912ec7SGeorge Wilson zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 2527802d7bfSMatthew Ahrens const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 253e7cbe64fSgw { 25488b7b0f2SMatthew Ahrens struct maparg *ma = arg; 25588b7b0f2SMatthew Ahrens zvol_extent_t *ze; 25688b7b0f2SMatthew Ahrens int bs = ma->ma_zv->zv_volblocksize; 257e7cbe64fSgw 25843466aaeSMax Grossman if (BP_IS_HOLE(bp) || 25943466aaeSMax Grossman zb->zb_object != ZVOL_OBJ || zb->zb_level != 0) 26088b7b0f2SMatthew Ahrens return (0); 261e7cbe64fSgw 2625d7b4d43SMatthew Ahrens VERIFY(!BP_IS_EMBEDDED(bp)); 2635d7b4d43SMatthew Ahrens 26488b7b0f2SMatthew Ahrens VERIFY3U(ma->ma_blks, ==, zb->zb_blkid); 26588b7b0f2SMatthew Ahrens ma->ma_blks++; 266e7cbe64fSgw 26788b7b0f2SMatthew Ahrens /* Abort immediately if we have encountered gang blocks */ 26888b7b0f2SMatthew Ahrens if (BP_IS_GANG(bp)) 269be6fd75aSMatthew Ahrens return (SET_ERROR(EFRAGS)); 270e7cbe64fSgw 27188b7b0f2SMatthew Ahrens /* 27288b7b0f2SMatthew Ahrens * See if the block is at the end of the previous extent. 27388b7b0f2SMatthew Ahrens */ 27488b7b0f2SMatthew Ahrens ze = list_tail(&ma->ma_zv->zv_extents); 27588b7b0f2SMatthew Ahrens if (ze && 27688b7b0f2SMatthew Ahrens DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) && 27788b7b0f2SMatthew Ahrens DVA_GET_OFFSET(BP_IDENTITY(bp)) == 27888b7b0f2SMatthew Ahrens DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) { 27988b7b0f2SMatthew Ahrens ze->ze_nblks++; 28088b7b0f2SMatthew Ahrens return (0); 281e7cbe64fSgw } 282e7cbe64fSgw 28388b7b0f2SMatthew Ahrens dprintf_bp(bp, "%s", "next blkptr:"); 284e7cbe64fSgw 28588b7b0f2SMatthew Ahrens /* start a new extent */ 28688b7b0f2SMatthew Ahrens ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP); 28788b7b0f2SMatthew Ahrens ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ 28888b7b0f2SMatthew Ahrens ze->ze_nblks = 1; 28988b7b0f2SMatthew Ahrens list_insert_tail(&ma->ma_zv->zv_extents, ze); 29088b7b0f2SMatthew Ahrens return (0); 29188b7b0f2SMatthew Ahrens } 292e7cbe64fSgw 29388b7b0f2SMatthew Ahrens static void 29488b7b0f2SMatthew Ahrens zvol_free_extents(zvol_state_t *zv) 29588b7b0f2SMatthew Ahrens { 29688b7b0f2SMatthew Ahrens zvol_extent_t *ze; 297e7cbe64fSgw 29888b7b0f2SMatthew Ahrens while (ze = list_head(&zv->zv_extents)) { 29988b7b0f2SMatthew Ahrens list_remove(&zv->zv_extents, ze); 30088b7b0f2SMatthew Ahrens kmem_free(ze, sizeof (zvol_extent_t)); 301e7cbe64fSgw } 30288b7b0f2SMatthew Ahrens } 303e7cbe64fSgw 30488b7b0f2SMatthew Ahrens static int 30588b7b0f2SMatthew Ahrens zvol_get_lbas(zvol_state_t *zv) 30688b7b0f2SMatthew Ahrens { 3073adc9019SEric Taylor objset_t *os = zv->zv_objset; 30888b7b0f2SMatthew Ahrens struct maparg ma; 30988b7b0f2SMatthew Ahrens int err; 31088b7b0f2SMatthew Ahrens 31188b7b0f2SMatthew Ahrens ma.ma_zv = zv; 31288b7b0f2SMatthew Ahrens ma.ma_blks = 0; 31388b7b0f2SMatthew Ahrens zvol_free_extents(zv); 31488b7b0f2SMatthew Ahrens 3153adc9019SEric Taylor /* commit any in-flight changes before traversing the dataset */ 3163adc9019SEric Taylor txg_wait_synced(dmu_objset_pool(os), 0); 3173adc9019SEric Taylor err = traverse_dataset(dmu_objset_ds(os), 0, 31888b7b0f2SMatthew Ahrens TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma); 31988b7b0f2SMatthew Ahrens if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) { 32088b7b0f2SMatthew Ahrens zvol_free_extents(zv); 32188b7b0f2SMatthew Ahrens return (err ? err : EIO); 322e7cbe64fSgw } 32388b7b0f2SMatthew Ahrens 324e7cbe64fSgw return (0); 325e7cbe64fSgw } 326e7cbe64fSgw 327ecd6cf80Smarks /* ARGSUSED */ 328fa9e4066Sahrens void 329ecd6cf80Smarks zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 330fa9e4066Sahrens { 331da6c28aaSamw zfs_creat_t *zct = arg; 332da6c28aaSamw nvlist_t *nvprops = zct->zct_props; 333fa9e4066Sahrens int error; 334e9dbad6fSeschrock uint64_t volblocksize, volsize; 335fa9e4066Sahrens 336ecd6cf80Smarks VERIFY(nvlist_lookup_uint64(nvprops, 337e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); 338ecd6cf80Smarks if (nvlist_lookup_uint64(nvprops, 339e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) 340e9dbad6fSeschrock volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); 341e9dbad6fSeschrock 342e9dbad6fSeschrock /* 343e7cbe64fSgw * These properties must be removed from the list so the generic 344e9dbad6fSeschrock * property setting step won't apply to them. 345e9dbad6fSeschrock */ 346ecd6cf80Smarks VERIFY(nvlist_remove_all(nvprops, 347e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); 348ecd6cf80Smarks (void) nvlist_remove_all(nvprops, 349e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); 350e9dbad6fSeschrock 351e9dbad6fSeschrock error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, 352fa9e4066Sahrens DMU_OT_NONE, 0, tx); 353fa9e4066Sahrens ASSERT(error == 0); 354fa9e4066Sahrens 355fa9e4066Sahrens error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, 356fa9e4066Sahrens DMU_OT_NONE, 0, tx); 357fa9e4066Sahrens ASSERT(error == 0); 358fa9e4066Sahrens 359e9dbad6fSeschrock error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); 360fa9e4066Sahrens ASSERT(error == 0); 361fa9e4066Sahrens } 362fa9e4066Sahrens 363b77b9231SDan McDonald /* 364b77b9231SDan McDonald * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we 365b77b9231SDan McDonald * implement DKIOCFREE/free-long-range. 366b77b9231SDan McDonald */ 367b77b9231SDan McDonald static int 368b77b9231SDan McDonald zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap) 369b77b9231SDan McDonald { 370b77b9231SDan McDonald uint64_t offset, length; 371b77b9231SDan McDonald 372b77b9231SDan McDonald if (byteswap) 373b77b9231SDan McDonald byteswap_uint64_array(lr, sizeof (*lr)); 374b77b9231SDan McDonald 375b77b9231SDan McDonald offset = lr->lr_offset; 376b77b9231SDan McDonald length = lr->lr_length; 377b77b9231SDan McDonald 378b77b9231SDan McDonald return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length)); 379b77b9231SDan McDonald } 380b77b9231SDan McDonald 38122ac5be4Sperrin /* 38222ac5be4Sperrin * Replay a TX_WRITE ZIL transaction that didn't get committed 38322ac5be4Sperrin * after a system failure 38422ac5be4Sperrin */ 38522ac5be4Sperrin static int 38622ac5be4Sperrin zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) 38722ac5be4Sperrin { 38822ac5be4Sperrin objset_t *os = zv->zv_objset; 38922ac5be4Sperrin char *data = (char *)(lr + 1); /* data follows lr_write_t */ 390b24ab676SJeff Bonwick uint64_t offset, length; 39122ac5be4Sperrin dmu_tx_t *tx; 39222ac5be4Sperrin int error; 39322ac5be4Sperrin 39422ac5be4Sperrin if (byteswap) 39522ac5be4Sperrin byteswap_uint64_array(lr, sizeof (*lr)); 39622ac5be4Sperrin 397b24ab676SJeff Bonwick offset = lr->lr_offset; 398b24ab676SJeff Bonwick length = lr->lr_length; 399b24ab676SJeff Bonwick 400b24ab676SJeff Bonwick /* If it's a dmu_sync() block, write the whole block */ 401b24ab676SJeff Bonwick if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 402b24ab676SJeff Bonwick uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 403b24ab676SJeff Bonwick if (length < blocksize) { 404b24ab676SJeff Bonwick offset -= offset % blocksize; 405b24ab676SJeff Bonwick length = blocksize; 406b24ab676SJeff Bonwick } 407b24ab676SJeff Bonwick } 408975c32a0SNeil Perrin 40922ac5be4Sperrin tx = dmu_tx_create(os); 410b24ab676SJeff Bonwick dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length); 4111209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_WAIT); 41222ac5be4Sperrin if (error) { 41322ac5be4Sperrin dmu_tx_abort(tx); 41422ac5be4Sperrin } else { 415b24ab676SJeff Bonwick dmu_write(os, ZVOL_OBJ, offset, length, data, tx); 41622ac5be4Sperrin dmu_tx_commit(tx); 41722ac5be4Sperrin } 41822ac5be4Sperrin 41922ac5be4Sperrin return (error); 42022ac5be4Sperrin } 42122ac5be4Sperrin 42222ac5be4Sperrin /* ARGSUSED */ 42322ac5be4Sperrin static int 42422ac5be4Sperrin zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) 42522ac5be4Sperrin { 426be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTSUP)); 42722ac5be4Sperrin } 42822ac5be4Sperrin 42922ac5be4Sperrin /* 43022ac5be4Sperrin * Callback vectors for replaying records. 431b77b9231SDan McDonald * Only TX_WRITE and TX_TRUNCATE are needed for zvol. 43222ac5be4Sperrin */ 43322ac5be4Sperrin zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { 43422ac5be4Sperrin zvol_replay_err, /* 0 no such transaction type */ 43522ac5be4Sperrin zvol_replay_err, /* TX_CREATE */ 43622ac5be4Sperrin zvol_replay_err, /* TX_MKDIR */ 43722ac5be4Sperrin zvol_replay_err, /* TX_MKXATTR */ 43822ac5be4Sperrin zvol_replay_err, /* TX_SYMLINK */ 43922ac5be4Sperrin zvol_replay_err, /* TX_REMOVE */ 44022ac5be4Sperrin zvol_replay_err, /* TX_RMDIR */ 44122ac5be4Sperrin zvol_replay_err, /* TX_LINK */ 44222ac5be4Sperrin zvol_replay_err, /* TX_RENAME */ 44322ac5be4Sperrin zvol_replay_write, /* TX_WRITE */ 444b77b9231SDan McDonald zvol_replay_truncate, /* TX_TRUNCATE */ 44522ac5be4Sperrin zvol_replay_err, /* TX_SETATTR */ 44622ac5be4Sperrin zvol_replay_err, /* TX_ACL */ 447975c32a0SNeil Perrin zvol_replay_err, /* TX_CREATE_ACL */ 448975c32a0SNeil Perrin zvol_replay_err, /* TX_CREATE_ATTR */ 449975c32a0SNeil Perrin zvol_replay_err, /* TX_CREATE_ACL_ATTR */ 450975c32a0SNeil Perrin zvol_replay_err, /* TX_MKDIR_ACL */ 451975c32a0SNeil Perrin zvol_replay_err, /* TX_MKDIR_ATTR */ 452975c32a0SNeil Perrin zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ 453975c32a0SNeil Perrin zvol_replay_err, /* TX_WRITE2 */ 45422ac5be4Sperrin }; 45522ac5be4Sperrin 456681d9761SEric Taylor int 457681d9761SEric Taylor zvol_name2minor(const char *name, minor_t *minor) 458681d9761SEric Taylor { 459681d9761SEric Taylor zvol_state_t *zv; 460681d9761SEric Taylor 461c99e4bdcSChris Kirby mutex_enter(&zfsdev_state_lock); 462681d9761SEric Taylor zv = zvol_minor_lookup(name); 463681d9761SEric Taylor if (minor && zv) 464681d9761SEric Taylor *minor = zv->zv_minor; 465c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 466681d9761SEric Taylor return (zv ? 0 : -1); 467681d9761SEric Taylor } 468681d9761SEric Taylor 469e7cbe64fSgw /* 470e7cbe64fSgw * Create a minor node (plus a whole lot more) for the specified volume. 471fa9e4066Sahrens */ 472fa9e4066Sahrens int 473681d9761SEric Taylor zvol_create_minor(const char *name) 474fa9e4066Sahrens { 475c99e4bdcSChris Kirby zfs_soft_state_t *zs; 476fa9e4066Sahrens zvol_state_t *zv; 477fa9e4066Sahrens objset_t *os; 47867bd71c6Sperrin dmu_object_info_t doi; 479fa9e4066Sahrens minor_t minor = 0; 480fa9e4066Sahrens char chrbuf[30], blkbuf[30]; 481fa9e4066Sahrens int error; 482fa9e4066Sahrens 483c99e4bdcSChris Kirby mutex_enter(&zfsdev_state_lock); 484fa9e4066Sahrens 4851195e687SMark J Musante if (zvol_minor_lookup(name) != NULL) { 486c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 487be6fd75aSMatthew Ahrens return (SET_ERROR(EEXIST)); 488fa9e4066Sahrens } 489fa9e4066Sahrens 490503ad85cSMatthew Ahrens /* lie and say we're read-only */ 4916e0cbcaaSMatthew Ahrens error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); 492fa9e4066Sahrens 493fa9e4066Sahrens if (error) { 494c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 495fa9e4066Sahrens return (error); 496fa9e4066Sahrens } 497fa9e4066Sahrens 498c99e4bdcSChris Kirby if ((minor = zfsdev_minor_alloc()) == 0) { 4996e0cbcaaSMatthew Ahrens dmu_objset_disown(os, FTAG); 500c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 501be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 502fa9e4066Sahrens } 503fa9e4066Sahrens 504c99e4bdcSChris Kirby if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { 5056e0cbcaaSMatthew Ahrens dmu_objset_disown(os, FTAG); 506c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 507be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 508fa9e4066Sahrens } 509e9dbad6fSeschrock (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, 510e9dbad6fSeschrock (char *)name); 511fa9e4066Sahrens 512681d9761SEric Taylor (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor); 513fa9e4066Sahrens 514fa9e4066Sahrens if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, 515fa9e4066Sahrens minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 516c99e4bdcSChris Kirby ddi_soft_state_free(zfsdev_state, minor); 5176e0cbcaaSMatthew Ahrens dmu_objset_disown(os, FTAG); 518c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 519be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 520fa9e4066Sahrens } 521fa9e4066Sahrens 522681d9761SEric Taylor (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor); 523fa9e4066Sahrens 524fa9e4066Sahrens if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, 525fa9e4066Sahrens minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 526fa9e4066Sahrens ddi_remove_minor_node(zfs_dip, chrbuf); 527c99e4bdcSChris Kirby ddi_soft_state_free(zfsdev_state, minor); 5286e0cbcaaSMatthew Ahrens dmu_objset_disown(os, FTAG); 529c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 530be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 531fa9e4066Sahrens } 532fa9e4066Sahrens 533c99e4bdcSChris Kirby zs = ddi_get_soft_state(zfsdev_state, minor); 534c99e4bdcSChris Kirby zs->zss_type = ZSST_ZVOL; 535c99e4bdcSChris Kirby zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); 536681d9761SEric Taylor (void) strlcpy(zv->zv_name, name, MAXPATHLEN); 537fa9e4066Sahrens zv->zv_min_bs = DEV_BSHIFT; 538fa9e4066Sahrens zv->zv_minor = minor; 539fa9e4066Sahrens zv->zv_objset = os; 540f9af39baSGeorge Wilson if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) 541681d9761SEric Taylor zv->zv_flags |= ZVOL_RDONLY; 542c2e6a7d6Sperrin mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); 543c2e6a7d6Sperrin avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, 544c2e6a7d6Sperrin sizeof (rl_t), offsetof(rl_t, r_node)); 54588b7b0f2SMatthew Ahrens list_create(&zv->zv_extents, sizeof (zvol_extent_t), 54688b7b0f2SMatthew Ahrens offsetof(zvol_extent_t, ze_node)); 54767bd71c6Sperrin /* get and cache the blocksize */ 54867bd71c6Sperrin error = dmu_object_info(os, ZVOL_OBJ, &doi); 54967bd71c6Sperrin ASSERT(error == 0); 55067bd71c6Sperrin zv->zv_volblocksize = doi.doi_data_block_size; 55122ac5be4Sperrin 552f9af39baSGeorge Wilson if (spa_writeable(dmu_objset_spa(os))) { 553f9af39baSGeorge Wilson if (zil_replay_disable) 554f9af39baSGeorge Wilson zil_destroy(dmu_objset_zil(os), B_FALSE); 555f9af39baSGeorge Wilson else 556f9af39baSGeorge Wilson zil_replay(os, zv, zvol_replay_vector); 557f9af39baSGeorge Wilson } 5586e0cbcaaSMatthew Ahrens dmu_objset_disown(os, FTAG); 559681d9761SEric Taylor zv->zv_objset = NULL; 560fa9e4066Sahrens 561fa9e4066Sahrens zvol_minors++; 562fa9e4066Sahrens 563c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 564fa9e4066Sahrens 565fa9e4066Sahrens return (0); 566fa9e4066Sahrens } 567fa9e4066Sahrens 568fa9e4066Sahrens /* 569fa9e4066Sahrens * Remove minor node for the specified volume. 570fa9e4066Sahrens */ 571681d9761SEric Taylor static int 572681d9761SEric Taylor zvol_remove_zv(zvol_state_t *zv) 573681d9761SEric Taylor { 574681d9761SEric Taylor char nmbuf[20]; 575c99e4bdcSChris Kirby minor_t minor = zv->zv_minor; 576681d9761SEric Taylor 577c99e4bdcSChris Kirby ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 578681d9761SEric Taylor if (zv->zv_total_opens != 0) 579be6fd75aSMatthew Ahrens return (SET_ERROR(EBUSY)); 580681d9761SEric Taylor 581c99e4bdcSChris Kirby (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor); 582681d9761SEric Taylor ddi_remove_minor_node(zfs_dip, nmbuf); 583681d9761SEric Taylor 584c99e4bdcSChris Kirby (void) snprintf(nmbuf, sizeof (nmbuf), "%u", minor); 585681d9761SEric Taylor ddi_remove_minor_node(zfs_dip, nmbuf); 586681d9761SEric Taylor 587681d9761SEric Taylor avl_destroy(&zv->zv_znode.z_range_avl); 588681d9761SEric Taylor mutex_destroy(&zv->zv_znode.z_range_lock); 589681d9761SEric Taylor 590c99e4bdcSChris Kirby kmem_free(zv, sizeof (zvol_state_t)); 591c99e4bdcSChris Kirby 592c99e4bdcSChris Kirby ddi_soft_state_free(zfsdev_state, minor); 593681d9761SEric Taylor 594681d9761SEric Taylor zvol_minors--; 595681d9761SEric Taylor return (0); 596681d9761SEric Taylor } 597681d9761SEric Taylor 598fa9e4066Sahrens int 599e9dbad6fSeschrock zvol_remove_minor(const char *name) 600fa9e4066Sahrens { 601fa9e4066Sahrens zvol_state_t *zv; 602681d9761SEric Taylor int rc; 603fa9e4066Sahrens 604c99e4bdcSChris Kirby mutex_enter(&zfsdev_state_lock); 605e9dbad6fSeschrock if ((zv = zvol_minor_lookup(name)) == NULL) { 606c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 607be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 608fa9e4066Sahrens } 609681d9761SEric Taylor rc = zvol_remove_zv(zv); 610c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 611681d9761SEric Taylor return (rc); 612681d9761SEric Taylor } 613fa9e4066Sahrens 614681d9761SEric Taylor int 615681d9761SEric Taylor zvol_first_open(zvol_state_t *zv) 616681d9761SEric Taylor { 617681d9761SEric Taylor objset_t *os; 618681d9761SEric Taylor uint64_t volsize; 619681d9761SEric Taylor int error; 620681d9761SEric Taylor uint64_t readonly; 621fa9e4066Sahrens 622681d9761SEric Taylor /* lie and say we're read-only */ 623681d9761SEric Taylor error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, 624681d9761SEric Taylor zvol_tag, &os); 625681d9761SEric Taylor if (error) 626681d9761SEric Taylor return (error); 627fa9e4066Sahrens 628c61ea566SGeorge Wilson zv->zv_objset = os; 629681d9761SEric Taylor error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 630681d9761SEric Taylor if (error) { 631681d9761SEric Taylor ASSERT(error == 0); 632681d9761SEric Taylor dmu_objset_disown(os, zvol_tag); 633681d9761SEric Taylor return (error); 634681d9761SEric Taylor } 635c61ea566SGeorge Wilson 63694d1a210STim Haley error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); 63794d1a210STim Haley if (error) { 63894d1a210STim Haley dmu_objset_disown(os, zvol_tag); 63994d1a210STim Haley return (error); 64094d1a210STim Haley } 641c61ea566SGeorge Wilson 642c61ea566SGeorge Wilson zvol_size_changed(zv, volsize); 643681d9761SEric Taylor zv->zv_zilog = zil_open(os, zvol_get_data); 644fa9e4066Sahrens 645681d9761SEric Taylor VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly, 646681d9761SEric Taylor NULL) == 0); 647f9af39baSGeorge Wilson if (readonly || dmu_objset_is_snapshot(os) || 648f9af39baSGeorge Wilson !spa_writeable(dmu_objset_spa(os))) 649681d9761SEric Taylor zv->zv_flags |= ZVOL_RDONLY; 650681d9761SEric Taylor else 651681d9761SEric Taylor zv->zv_flags &= ~ZVOL_RDONLY; 652681d9761SEric Taylor return (error); 653681d9761SEric Taylor } 654fa9e4066Sahrens 655681d9761SEric Taylor void 656681d9761SEric Taylor zvol_last_close(zvol_state_t *zv) 657681d9761SEric Taylor { 65822ac5be4Sperrin zil_close(zv->zv_zilog); 65922ac5be4Sperrin zv->zv_zilog = NULL; 6602e2c1355SMatthew Ahrens 66194d1a210STim Haley dmu_buf_rele(zv->zv_dbuf, zvol_tag); 66294d1a210STim Haley zv->zv_dbuf = NULL; 6632e2c1355SMatthew Ahrens 6642e2c1355SMatthew Ahrens /* 6652e2c1355SMatthew Ahrens * Evict cached data 6662e2c1355SMatthew Ahrens */ 6672e2c1355SMatthew Ahrens if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) && 6682e2c1355SMatthew Ahrens !(zv->zv_flags & ZVOL_RDONLY)) 6692e2c1355SMatthew Ahrens txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 6703b2aab18SMatthew Ahrens dmu_objset_evict_dbufs(zv->zv_objset); 6712e2c1355SMatthew Ahrens 672503ad85cSMatthew Ahrens dmu_objset_disown(zv->zv_objset, zvol_tag); 673fa9e4066Sahrens zv->zv_objset = NULL; 674fa9e4066Sahrens } 675fa9e4066Sahrens 676e7cbe64fSgw int 677e7cbe64fSgw zvol_prealloc(zvol_state_t *zv) 678e7cbe64fSgw { 679e7cbe64fSgw objset_t *os = zv->zv_objset; 680e7cbe64fSgw dmu_tx_t *tx; 681e7cbe64fSgw uint64_t refd, avail, usedobjs, availobjs; 682e7cbe64fSgw uint64_t resid = zv->zv_volsize; 683e7cbe64fSgw uint64_t off = 0; 684e7cbe64fSgw 685e7cbe64fSgw /* Check the space usage before attempting to allocate the space */ 686e7cbe64fSgw dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); 687e7cbe64fSgw if (avail < zv->zv_volsize) 688be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSPC)); 689e7cbe64fSgw 690e7cbe64fSgw /* Free old extents if they exist */ 691e7cbe64fSgw zvol_free_extents(zv); 692e7cbe64fSgw 693e7cbe64fSgw while (resid != 0) { 694e7cbe64fSgw int error; 695*b5152584SMatthew Ahrens uint64_t bytes = MIN(resid, SPA_OLD_MAXBLOCKSIZE); 696e7cbe64fSgw 697e7cbe64fSgw tx = dmu_tx_create(os); 698e7cbe64fSgw dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 699e7cbe64fSgw error = dmu_tx_assign(tx, TXG_WAIT); 700e7cbe64fSgw if (error) { 701e7cbe64fSgw dmu_tx_abort(tx); 702cdb0ab79Smaybee (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off); 703e7cbe64fSgw return (error); 704e7cbe64fSgw } 70582c9918fSTim Haley dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx); 706e7cbe64fSgw dmu_tx_commit(tx); 707e7cbe64fSgw off += bytes; 708e7cbe64fSgw resid -= bytes; 709e7cbe64fSgw } 710e7cbe64fSgw txg_wait_synced(dmu_objset_pool(os), 0); 711e7cbe64fSgw 712e7cbe64fSgw return (0); 713e7cbe64fSgw } 714e7cbe64fSgw 7153b2aab18SMatthew Ahrens static int 716681d9761SEric Taylor zvol_update_volsize(objset_t *os, uint64_t volsize) 717e7cbe64fSgw { 718e7cbe64fSgw dmu_tx_t *tx; 719e7cbe64fSgw int error; 720e7cbe64fSgw 721c99e4bdcSChris Kirby ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 722e7cbe64fSgw 723681d9761SEric Taylor tx = dmu_tx_create(os); 724e7cbe64fSgw dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 7254bb73804SMatthew Ahrens dmu_tx_mark_netfree(tx); 726e7cbe64fSgw error = dmu_tx_assign(tx, TXG_WAIT); 727e7cbe64fSgw if (error) { 728e7cbe64fSgw dmu_tx_abort(tx); 729e7cbe64fSgw return (error); 730e7cbe64fSgw } 731e7cbe64fSgw 732681d9761SEric Taylor error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, 733e7cbe64fSgw &volsize, tx); 734e7cbe64fSgw dmu_tx_commit(tx); 735e7cbe64fSgw 736e7cbe64fSgw if (error == 0) 737681d9761SEric Taylor error = dmu_free_long_range(os, 738cdb0ab79Smaybee ZVOL_OBJ, volsize, DMU_OBJECT_END); 739681d9761SEric Taylor return (error); 740681d9761SEric Taylor } 741e7cbe64fSgw 742681d9761SEric Taylor void 743681d9761SEric Taylor zvol_remove_minors(const char *name) 744681d9761SEric Taylor { 745681d9761SEric Taylor zvol_state_t *zv; 746681d9761SEric Taylor char *namebuf; 747681d9761SEric Taylor minor_t minor; 748681d9761SEric Taylor 749681d9761SEric Taylor namebuf = kmem_zalloc(strlen(name) + 2, KM_SLEEP); 750681d9761SEric Taylor (void) strncpy(namebuf, name, strlen(name)); 751681d9761SEric Taylor (void) strcat(namebuf, "/"); 752c99e4bdcSChris Kirby mutex_enter(&zfsdev_state_lock); 753c99e4bdcSChris Kirby for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) { 754681d9761SEric Taylor 755c99e4bdcSChris Kirby zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 756681d9761SEric Taylor if (zv == NULL) 757681d9761SEric Taylor continue; 758681d9761SEric Taylor if (strncmp(namebuf, zv->zv_name, strlen(namebuf)) == 0) 759681d9761SEric Taylor (void) zvol_remove_zv(zv); 760e7cbe64fSgw } 761681d9761SEric Taylor kmem_free(namebuf, strlen(name) + 2); 762681d9761SEric Taylor 763c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 764e7cbe64fSgw } 765e7cbe64fSgw 766c61ea566SGeorge Wilson static int 7673b2aab18SMatthew Ahrens zvol_update_live_volsize(zvol_state_t *zv, uint64_t volsize) 768fa9e4066Sahrens { 769e7cbe64fSgw uint64_t old_volsize = 0ULL; 7703b2aab18SMatthew Ahrens int error = 0; 771fa9e4066Sahrens 772c61ea566SGeorge Wilson ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 773c61ea566SGeorge Wilson 774e7cbe64fSgw /* 775e7cbe64fSgw * Reinitialize the dump area to the new size. If we 776681d9761SEric Taylor * failed to resize the dump area then restore it back to 777c61ea566SGeorge Wilson * its original size. We must set the new volsize prior 778c61ea566SGeorge Wilson * to calling dumpvp_resize() to ensure that the devices' 779c61ea566SGeorge Wilson * size(9P) is not visible by the dump subsystem. 780e7cbe64fSgw */ 7813b2aab18SMatthew Ahrens old_volsize = zv->zv_volsize; 7823b2aab18SMatthew Ahrens zvol_size_changed(zv, volsize); 7833b2aab18SMatthew Ahrens 7843b2aab18SMatthew Ahrens if (zv->zv_flags & ZVOL_DUMPIFIED) { 7853b2aab18SMatthew Ahrens if ((error = zvol_dumpify(zv)) != 0 || 7863b2aab18SMatthew Ahrens (error = dumpvp_resize()) != 0) { 7873b2aab18SMatthew Ahrens int dumpify_error; 7883b2aab18SMatthew Ahrens 7893b2aab18SMatthew Ahrens (void) zvol_update_volsize(zv->zv_objset, old_volsize); 7903b2aab18SMatthew Ahrens zvol_size_changed(zv, old_volsize); 7913b2aab18SMatthew Ahrens dumpify_error = zvol_dumpify(zv); 7923b2aab18SMatthew Ahrens error = dumpify_error ? dumpify_error : error; 793681d9761SEric Taylor } 794fa9e4066Sahrens } 795fa9e4066Sahrens 796573ca77eSGeorge Wilson /* 797573ca77eSGeorge Wilson * Generate a LUN expansion event. 798573ca77eSGeorge Wilson */ 7993b2aab18SMatthew Ahrens if (error == 0) { 800573ca77eSGeorge Wilson sysevent_id_t eid; 801573ca77eSGeorge Wilson nvlist_t *attr; 802573ca77eSGeorge Wilson char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 803573ca77eSGeorge Wilson 804681d9761SEric Taylor (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV, 805573ca77eSGeorge Wilson zv->zv_minor); 806573ca77eSGeorge Wilson 807573ca77eSGeorge Wilson VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 808573ca77eSGeorge Wilson VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 809573ca77eSGeorge Wilson 810573ca77eSGeorge Wilson (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 811573ca77eSGeorge Wilson ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 812573ca77eSGeorge Wilson 813573ca77eSGeorge Wilson nvlist_free(attr); 814573ca77eSGeorge Wilson kmem_free(physpath, MAXPATHLEN); 815573ca77eSGeorge Wilson } 816c61ea566SGeorge Wilson return (error); 817c61ea566SGeorge Wilson } 818573ca77eSGeorge Wilson 819c61ea566SGeorge Wilson int 820c61ea566SGeorge Wilson zvol_set_volsize(const char *name, uint64_t volsize) 821c61ea566SGeorge Wilson { 822c61ea566SGeorge Wilson zvol_state_t *zv = NULL; 823c61ea566SGeorge Wilson objset_t *os; 824c61ea566SGeorge Wilson int error; 825c61ea566SGeorge Wilson dmu_object_info_t doi; 826c61ea566SGeorge Wilson uint64_t readonly; 8273b2aab18SMatthew Ahrens boolean_t owned = B_FALSE; 8283b2aab18SMatthew Ahrens 8293b2aab18SMatthew Ahrens error = dsl_prop_get_integer(name, 8303b2aab18SMatthew Ahrens zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL); 8313b2aab18SMatthew Ahrens if (error != 0) 8323b2aab18SMatthew Ahrens return (error); 8333b2aab18SMatthew Ahrens if (readonly) 834be6fd75aSMatthew Ahrens return (SET_ERROR(EROFS)); 835c61ea566SGeorge Wilson 836c61ea566SGeorge Wilson mutex_enter(&zfsdev_state_lock); 837c61ea566SGeorge Wilson zv = zvol_minor_lookup(name); 8383b2aab18SMatthew Ahrens 8393b2aab18SMatthew Ahrens if (zv == NULL || zv->zv_objset == NULL) { 8403b2aab18SMatthew Ahrens if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, 8413b2aab18SMatthew Ahrens FTAG, &os)) != 0) { 8423b2aab18SMatthew Ahrens mutex_exit(&zfsdev_state_lock); 8433b2aab18SMatthew Ahrens return (error); 8443b2aab18SMatthew Ahrens } 8453b2aab18SMatthew Ahrens owned = B_TRUE; 8463b2aab18SMatthew Ahrens if (zv != NULL) 8473b2aab18SMatthew Ahrens zv->zv_objset = os; 8483b2aab18SMatthew Ahrens } else { 8493b2aab18SMatthew Ahrens os = zv->zv_objset; 850c61ea566SGeorge Wilson } 851c61ea566SGeorge Wilson 852c61ea566SGeorge Wilson if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || 8533b2aab18SMatthew Ahrens (error = zvol_check_volsize(volsize, doi.doi_data_block_size)) != 0) 854c61ea566SGeorge Wilson goto out; 855c61ea566SGeorge Wilson 8563b2aab18SMatthew Ahrens error = zvol_update_volsize(os, volsize); 857c61ea566SGeorge Wilson 8583b2aab18SMatthew Ahrens if (error == 0 && zv != NULL) 8593b2aab18SMatthew Ahrens error = zvol_update_live_volsize(zv, volsize); 860bb0ade09Sahrens out: 8613b2aab18SMatthew Ahrens if (owned) { 8623b2aab18SMatthew Ahrens dmu_objset_disown(os, FTAG); 8633b2aab18SMatthew Ahrens if (zv != NULL) 8643b2aab18SMatthew Ahrens zv->zv_objset = NULL; 8653b2aab18SMatthew Ahrens } 866c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 867fa9e4066Sahrens return (error); 868fa9e4066Sahrens } 869fa9e4066Sahrens 870fa9e4066Sahrens /*ARGSUSED*/ 871fa9e4066Sahrens int 872fa9e4066Sahrens zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) 873fa9e4066Sahrens { 874fa9e4066Sahrens zvol_state_t *zv; 875681d9761SEric Taylor int err = 0; 876fa9e4066Sahrens 877c99e4bdcSChris Kirby mutex_enter(&zfsdev_state_lock); 878fa9e4066Sahrens 879c99e4bdcSChris Kirby zv = zfsdev_get_soft_state(getminor(*devp), ZSST_ZVOL); 880fa9e4066Sahrens if (zv == NULL) { 881c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 882be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 883fa9e4066Sahrens } 884fa9e4066Sahrens 885681d9761SEric Taylor if (zv->zv_total_opens == 0) 886681d9761SEric Taylor err = zvol_first_open(zv); 887681d9761SEric Taylor if (err) { 888c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 889681d9761SEric Taylor return (err); 890681d9761SEric Taylor } 891681d9761SEric Taylor if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 892be6fd75aSMatthew Ahrens err = SET_ERROR(EROFS); 893681d9761SEric Taylor goto out; 894fa9e4066Sahrens } 895c7f714e2SEric Taylor if (zv->zv_flags & ZVOL_EXCL) { 896be6fd75aSMatthew Ahrens err = SET_ERROR(EBUSY); 897681d9761SEric Taylor goto out; 898c7f714e2SEric Taylor } 899c7f714e2SEric Taylor if (flag & FEXCL) { 900c7f714e2SEric Taylor if (zv->zv_total_opens != 0) { 901be6fd75aSMatthew Ahrens err = SET_ERROR(EBUSY); 902681d9761SEric Taylor goto out; 903c7f714e2SEric Taylor } 904c7f714e2SEric Taylor zv->zv_flags |= ZVOL_EXCL; 905c7f714e2SEric Taylor } 906fa9e4066Sahrens 907fa9e4066Sahrens if (zv->zv_open_count[otyp] == 0 || otyp == OTYP_LYR) { 908fa9e4066Sahrens zv->zv_open_count[otyp]++; 909fa9e4066Sahrens zv->zv_total_opens++; 910fa9e4066Sahrens } 911c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 912fa9e4066Sahrens 913681d9761SEric Taylor return (err); 914681d9761SEric Taylor out: 915681d9761SEric Taylor if (zv->zv_total_opens == 0) 916681d9761SEric Taylor zvol_last_close(zv); 917c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 918681d9761SEric Taylor return (err); 919fa9e4066Sahrens } 920fa9e4066Sahrens 921fa9e4066Sahrens /*ARGSUSED*/ 922fa9e4066Sahrens int 923fa9e4066Sahrens zvol_close(dev_t dev, int flag, int otyp, cred_t *cr) 924fa9e4066Sahrens { 925fa9e4066Sahrens minor_t minor = getminor(dev); 926fa9e4066Sahrens zvol_state_t *zv; 927681d9761SEric Taylor int error = 0; 928fa9e4066Sahrens 929c99e4bdcSChris Kirby mutex_enter(&zfsdev_state_lock); 930fa9e4066Sahrens 931c99e4bdcSChris Kirby zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 932fa9e4066Sahrens if (zv == NULL) { 933c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 934be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 935fa9e4066Sahrens } 936fa9e4066Sahrens 937c7f714e2SEric Taylor if (zv->zv_flags & ZVOL_EXCL) { 938c7f714e2SEric Taylor ASSERT(zv->zv_total_opens == 1); 939c7f714e2SEric Taylor zv->zv_flags &= ~ZVOL_EXCL; 940fa9e4066Sahrens } 941fa9e4066Sahrens 942fa9e4066Sahrens /* 943fa9e4066Sahrens * If the open count is zero, this is a spurious close. 944fa9e4066Sahrens * That indicates a bug in the kernel / DDI framework. 945fa9e4066Sahrens */ 946fa9e4066Sahrens ASSERT(zv->zv_open_count[otyp] != 0); 947fa9e4066Sahrens ASSERT(zv->zv_total_opens != 0); 948fa9e4066Sahrens 949fa9e4066Sahrens /* 950fa9e4066Sahrens * You may get multiple opens, but only one close. 951fa9e4066Sahrens */ 952fa9e4066Sahrens zv->zv_open_count[otyp]--; 953fa9e4066Sahrens zv->zv_total_opens--; 954fa9e4066Sahrens 955681d9761SEric Taylor if (zv->zv_total_opens == 0) 956681d9761SEric Taylor zvol_last_close(zv); 957fa9e4066Sahrens 958c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 959681d9761SEric Taylor return (error); 960fa9e4066Sahrens } 961fa9e4066Sahrens 962feb08c6bSbillm static void 963b24ab676SJeff Bonwick zvol_get_done(zgd_t *zgd, int error) 96467bd71c6Sperrin { 965b24ab676SJeff Bonwick if (zgd->zgd_db) 966b24ab676SJeff Bonwick dmu_buf_rele(zgd->zgd_db, zgd); 967b24ab676SJeff Bonwick 968b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 969b24ab676SJeff Bonwick 970b24ab676SJeff Bonwick if (error == 0 && zgd->zgd_bp) 971b24ab676SJeff Bonwick zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 97267bd71c6Sperrin 97367bd71c6Sperrin kmem_free(zgd, sizeof (zgd_t)); 97467bd71c6Sperrin } 97567bd71c6Sperrin 97667bd71c6Sperrin /* 97767bd71c6Sperrin * Get data to generate a TX_WRITE intent log record. 97867bd71c6Sperrin */ 979feb08c6bSbillm static int 98067bd71c6Sperrin zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 98167bd71c6Sperrin { 98267bd71c6Sperrin zvol_state_t *zv = arg; 98367bd71c6Sperrin objset_t *os = zv->zv_objset; 984b24ab676SJeff Bonwick uint64_t object = ZVOL_OBJ; 985b24ab676SJeff Bonwick uint64_t offset = lr->lr_offset; 986b24ab676SJeff Bonwick uint64_t size = lr->lr_length; /* length of user data */ 987b24ab676SJeff Bonwick blkptr_t *bp = &lr->lr_blkptr; 98867bd71c6Sperrin dmu_buf_t *db; 98967bd71c6Sperrin zgd_t *zgd; 99067bd71c6Sperrin int error; 99167bd71c6Sperrin 992b24ab676SJeff Bonwick ASSERT(zio != NULL); 993b24ab676SJeff Bonwick ASSERT(size != 0); 994b24ab676SJeff Bonwick 995b24ab676SJeff Bonwick zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 996b24ab676SJeff Bonwick zgd->zgd_zilog = zv->zv_zilog; 997b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); 998feb08c6bSbillm 999c2e6a7d6Sperrin /* 1000c2e6a7d6Sperrin * Write records come in two flavors: immediate and indirect. 1001c2e6a7d6Sperrin * For small writes it's cheaper to store the data with the 1002c2e6a7d6Sperrin * log record (immediate); for large writes it's cheaper to 1003c2e6a7d6Sperrin * sync the data and get a pointer to it (indirect) so that 1004c2e6a7d6Sperrin * we don't have to write the data twice. 1005c2e6a7d6Sperrin */ 1006b24ab676SJeff Bonwick if (buf != NULL) { /* immediate write */ 1007b24ab676SJeff Bonwick error = dmu_read(os, object, offset, size, buf, 1008b24ab676SJeff Bonwick DMU_READ_NO_PREFETCH); 1009b24ab676SJeff Bonwick } else { 1010b24ab676SJeff Bonwick size = zv->zv_volblocksize; 1011b24ab676SJeff Bonwick offset = P2ALIGN(offset, size); 101247cb52daSJeff Bonwick error = dmu_buf_hold(os, object, offset, zgd, &db, 101347cb52daSJeff Bonwick DMU_READ_NO_PREFETCH); 1014b24ab676SJeff Bonwick if (error == 0) { 101580901aeaSGeorge Wilson blkptr_t *obp = dmu_buf_get_blkptr(db); 101680901aeaSGeorge Wilson if (obp) { 101780901aeaSGeorge Wilson ASSERT(BP_IS_HOLE(bp)); 101880901aeaSGeorge Wilson *bp = *obp; 101980901aeaSGeorge Wilson } 102080901aeaSGeorge Wilson 1021b24ab676SJeff Bonwick zgd->zgd_db = db; 1022b24ab676SJeff Bonwick zgd->zgd_bp = bp; 102367bd71c6Sperrin 1024b24ab676SJeff Bonwick ASSERT(db->db_offset == offset); 1025b24ab676SJeff Bonwick ASSERT(db->db_size == size); 102667bd71c6Sperrin 1027b24ab676SJeff Bonwick error = dmu_sync(zio, lr->lr_common.lrc_txg, 1028b24ab676SJeff Bonwick zvol_get_done, zgd); 1029975c32a0SNeil Perrin 1030b24ab676SJeff Bonwick if (error == 0) 1031b24ab676SJeff Bonwick return (0); 1032b24ab676SJeff Bonwick } 1033975c32a0SNeil Perrin } 1034975c32a0SNeil Perrin 1035b24ab676SJeff Bonwick zvol_get_done(zgd, error); 1036b24ab676SJeff Bonwick 103767bd71c6Sperrin return (error); 103867bd71c6Sperrin } 103967bd71c6Sperrin 1040a24e15ceSperrin /* 1041a24e15ceSperrin * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. 104222ac5be4Sperrin * 104322ac5be4Sperrin * We store data in the log buffers if it's small enough. 104467bd71c6Sperrin * Otherwise we will later flush the data out via dmu_sync(). 104522ac5be4Sperrin */ 104667bd71c6Sperrin ssize_t zvol_immediate_write_sz = 32768; 104722ac5be4Sperrin 1048feb08c6bSbillm static void 1049510b6c0eSNeil Perrin zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, 1050510b6c0eSNeil Perrin boolean_t sync) 105122ac5be4Sperrin { 1052feb08c6bSbillm uint32_t blocksize = zv->zv_volblocksize; 10531209a471SNeil Perrin zilog_t *zilog = zv->zv_zilog; 1054510b6c0eSNeil Perrin boolean_t slogging; 1055e09fa4daSNeil Perrin ssize_t immediate_write_sz; 1056510b6c0eSNeil Perrin 1057b24ab676SJeff Bonwick if (zil_replaying(zilog, tx)) 10581209a471SNeil Perrin return; 10591209a471SNeil Perrin 1060e09fa4daSNeil Perrin immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) 1061e09fa4daSNeil Perrin ? 0 : zvol_immediate_write_sz; 1062e09fa4daSNeil Perrin 1063e09fa4daSNeil Perrin slogging = spa_has_slogs(zilog->zl_spa) && 1064e09fa4daSNeil Perrin (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); 1065feb08c6bSbillm 1066510b6c0eSNeil Perrin while (resid) { 1067510b6c0eSNeil Perrin itx_t *itx; 1068510b6c0eSNeil Perrin lr_write_t *lr; 1069510b6c0eSNeil Perrin ssize_t len; 1070510b6c0eSNeil Perrin itx_wr_state_t write_state; 1071510b6c0eSNeil Perrin 1072510b6c0eSNeil Perrin /* 1073510b6c0eSNeil Perrin * Unlike zfs_log_write() we can be called with 1074510b6c0eSNeil Perrin * upto DMU_MAX_ACCESS/2 (5MB) writes. 1075510b6c0eSNeil Perrin */ 1076e09fa4daSNeil Perrin if (blocksize > immediate_write_sz && !slogging && 1077510b6c0eSNeil Perrin resid >= blocksize && off % blocksize == 0) { 1078510b6c0eSNeil Perrin write_state = WR_INDIRECT; /* uses dmu_sync */ 1079510b6c0eSNeil Perrin len = blocksize; 1080510b6c0eSNeil Perrin } else if (sync) { 1081510b6c0eSNeil Perrin write_state = WR_COPIED; 1082510b6c0eSNeil Perrin len = MIN(ZIL_MAX_LOG_DATA, resid); 1083510b6c0eSNeil Perrin } else { 1084510b6c0eSNeil Perrin write_state = WR_NEED_COPY; 1085510b6c0eSNeil Perrin len = MIN(ZIL_MAX_LOG_DATA, resid); 1086510b6c0eSNeil Perrin } 1087510b6c0eSNeil Perrin 1088510b6c0eSNeil Perrin itx = zil_itx_create(TX_WRITE, sizeof (*lr) + 1089510b6c0eSNeil Perrin (write_state == WR_COPIED ? len : 0)); 1090feb08c6bSbillm lr = (lr_write_t *)&itx->itx_lr; 1091510b6c0eSNeil Perrin if (write_state == WR_COPIED && dmu_read(zv->zv_objset, 10927bfdf011SNeil Perrin ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { 1093b24ab676SJeff Bonwick zil_itx_destroy(itx); 1094510b6c0eSNeil Perrin itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1095510b6c0eSNeil Perrin lr = (lr_write_t *)&itx->itx_lr; 1096510b6c0eSNeil Perrin write_state = WR_NEED_COPY; 1097510b6c0eSNeil Perrin } 1098510b6c0eSNeil Perrin 1099510b6c0eSNeil Perrin itx->itx_wr_state = write_state; 1100510b6c0eSNeil Perrin if (write_state == WR_NEED_COPY) 1101510b6c0eSNeil Perrin itx->itx_sod += len; 1102feb08c6bSbillm lr->lr_foid = ZVOL_OBJ; 1103feb08c6bSbillm lr->lr_offset = off; 1104510b6c0eSNeil Perrin lr->lr_length = len; 1105b24ab676SJeff Bonwick lr->lr_blkoff = 0; 1106feb08c6bSbillm BP_ZERO(&lr->lr_blkptr); 1107feb08c6bSbillm 1108510b6c0eSNeil Perrin itx->itx_private = zv; 1109510b6c0eSNeil Perrin itx->itx_sync = sync; 1110510b6c0eSNeil Perrin 11115002558fSNeil Perrin zil_itx_assign(zilog, itx, tx); 1112510b6c0eSNeil Perrin 1113510b6c0eSNeil Perrin off += len; 1114510b6c0eSNeil Perrin resid -= len; 111522ac5be4Sperrin } 111622ac5be4Sperrin } 111722ac5be4Sperrin 111888b7b0f2SMatthew Ahrens static int 1119810e43b2SBill Pijewski zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset, 1120810e43b2SBill Pijewski uint64_t size, boolean_t doread, boolean_t isdump) 1121e7cbe64fSgw { 1122e7cbe64fSgw vdev_disk_t *dvd; 1123e7cbe64fSgw int c; 1124e7cbe64fSgw int numerrors = 0; 1125e7cbe64fSgw 1126810e43b2SBill Pijewski if (vd->vdev_ops == &vdev_mirror_ops || 1127810e43b2SBill Pijewski vd->vdev_ops == &vdev_replacing_ops || 1128810e43b2SBill Pijewski vd->vdev_ops == &vdev_spare_ops) { 1129810e43b2SBill Pijewski for (c = 0; c < vd->vdev_children; c++) { 1130810e43b2SBill Pijewski int err = zvol_dumpio_vdev(vd->vdev_child[c], 1131810e43b2SBill Pijewski addr, offset, origoffset, size, doread, isdump); 1132810e43b2SBill Pijewski if (err != 0) { 1133810e43b2SBill Pijewski numerrors++; 1134810e43b2SBill Pijewski } else if (doread) { 1135810e43b2SBill Pijewski break; 1136810e43b2SBill Pijewski } 1137e7cbe64fSgw } 1138e7cbe64fSgw } 1139e7cbe64fSgw 1140810e43b2SBill Pijewski if (!vd->vdev_ops->vdev_op_leaf && vd->vdev_ops != &vdev_raidz_ops) 1141e7cbe64fSgw return (numerrors < vd->vdev_children ? 0 : EIO); 1142e7cbe64fSgw 1143dc0bb255SEric Taylor if (doread && !vdev_readable(vd)) 1144be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 1145dc0bb255SEric Taylor else if (!doread && !vdev_writeable(vd)) 1146be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 1147e7cbe64fSgw 1148810e43b2SBill Pijewski if (vd->vdev_ops == &vdev_raidz_ops) { 1149810e43b2SBill Pijewski return (vdev_raidz_physio(vd, 1150810e43b2SBill Pijewski addr, size, offset, origoffset, doread, isdump)); 1151810e43b2SBill Pijewski } 1152810e43b2SBill Pijewski 1153e7cbe64fSgw offset += VDEV_LABEL_START_SIZE; 1154e7cbe64fSgw 1155e7cbe64fSgw if (ddi_in_panic() || isdump) { 115688b7b0f2SMatthew Ahrens ASSERT(!doread); 115788b7b0f2SMatthew Ahrens if (doread) 1158be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 1159810e43b2SBill Pijewski dvd = vd->vdev_tsd; 1160810e43b2SBill Pijewski ASSERT3P(dvd, !=, NULL); 1161e7cbe64fSgw return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), 1162e7cbe64fSgw lbtodb(size))); 1163e7cbe64fSgw } else { 1164810e43b2SBill Pijewski dvd = vd->vdev_tsd; 1165810e43b2SBill Pijewski ASSERT3P(dvd, !=, NULL); 1166810e43b2SBill Pijewski return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size, 1167810e43b2SBill Pijewski offset, doread ? B_READ : B_WRITE)); 1168e7cbe64fSgw } 1169e7cbe64fSgw } 1170e7cbe64fSgw 117188b7b0f2SMatthew Ahrens static int 117288b7b0f2SMatthew Ahrens zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, 117388b7b0f2SMatthew Ahrens boolean_t doread, boolean_t isdump) 1174e7cbe64fSgw { 1175e7cbe64fSgw vdev_t *vd; 1176e7cbe64fSgw int error; 117788b7b0f2SMatthew Ahrens zvol_extent_t *ze; 1178e7cbe64fSgw spa_t *spa = dmu_objset_spa(zv->zv_objset); 1179e7cbe64fSgw 118088b7b0f2SMatthew Ahrens /* Must be sector aligned, and not stradle a block boundary. */ 118188b7b0f2SMatthew Ahrens if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) || 118288b7b0f2SMatthew Ahrens P2BOUNDARY(offset, size, zv->zv_volblocksize)) { 1183be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 118488b7b0f2SMatthew Ahrens } 118588b7b0f2SMatthew Ahrens ASSERT(size <= zv->zv_volblocksize); 1186e7cbe64fSgw 118788b7b0f2SMatthew Ahrens /* Locate the extent this belongs to */ 118888b7b0f2SMatthew Ahrens ze = list_head(&zv->zv_extents); 118988b7b0f2SMatthew Ahrens while (offset >= ze->ze_nblks * zv->zv_volblocksize) { 119088b7b0f2SMatthew Ahrens offset -= ze->ze_nblks * zv->zv_volblocksize; 119188b7b0f2SMatthew Ahrens ze = list_next(&zv->zv_extents, ze); 119288b7b0f2SMatthew Ahrens } 119324cc0e1cSGeorge Wilson 11943b2aab18SMatthew Ahrens if (ze == NULL) 1195be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 11963b2aab18SMatthew Ahrens 119724cc0e1cSGeorge Wilson if (!ddi_in_panic()) 119824cc0e1cSGeorge Wilson spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 119924cc0e1cSGeorge Wilson 120088b7b0f2SMatthew Ahrens vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva)); 120188b7b0f2SMatthew Ahrens offset += DVA_GET_OFFSET(&ze->ze_dva); 1202810e43b2SBill Pijewski error = zvol_dumpio_vdev(vd, addr, offset, DVA_GET_OFFSET(&ze->ze_dva), 1203810e43b2SBill Pijewski size, doread, isdump); 120424cc0e1cSGeorge Wilson 120524cc0e1cSGeorge Wilson if (!ddi_in_panic()) 120624cc0e1cSGeorge Wilson spa_config_exit(spa, SCL_STATE, FTAG); 120724cc0e1cSGeorge Wilson 1208e7cbe64fSgw return (error); 1209e7cbe64fSgw } 1210e7cbe64fSgw 1211fa9e4066Sahrens int 1212fa9e4066Sahrens zvol_strategy(buf_t *bp) 1213fa9e4066Sahrens { 1214c99e4bdcSChris Kirby zfs_soft_state_t *zs = NULL; 1215c99e4bdcSChris Kirby zvol_state_t *zv; 1216fa9e4066Sahrens uint64_t off, volsize; 121788b7b0f2SMatthew Ahrens size_t resid; 1218fa9e4066Sahrens char *addr; 121922ac5be4Sperrin objset_t *os; 1220c2e6a7d6Sperrin rl_t *rl; 1221fa9e4066Sahrens int error = 0; 122288b7b0f2SMatthew Ahrens boolean_t doread = bp->b_flags & B_READ; 1223810e43b2SBill Pijewski boolean_t is_dumpified; 1224510b6c0eSNeil Perrin boolean_t sync; 1225fa9e4066Sahrens 1226c99e4bdcSChris Kirby if (getminor(bp->b_edev) == 0) { 1227be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 1228c99e4bdcSChris Kirby } else { 1229c99e4bdcSChris Kirby zs = ddi_get_soft_state(zfsdev_state, getminor(bp->b_edev)); 1230c99e4bdcSChris Kirby if (zs == NULL) 1231be6fd75aSMatthew Ahrens error = SET_ERROR(ENXIO); 1232c99e4bdcSChris Kirby else if (zs->zss_type != ZSST_ZVOL) 1233be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 1234fa9e4066Sahrens } 1235fa9e4066Sahrens 1236c99e4bdcSChris Kirby if (error) { 1237c99e4bdcSChris Kirby bioerror(bp, error); 1238fa9e4066Sahrens biodone(bp); 1239fa9e4066Sahrens return (0); 1240fa9e4066Sahrens } 1241fa9e4066Sahrens 1242c99e4bdcSChris Kirby zv = zs->zss_data; 1243c99e4bdcSChris Kirby 1244681d9761SEric Taylor if (!(bp->b_flags & B_READ) && (zv->zv_flags & ZVOL_RDONLY)) { 1245fa9e4066Sahrens bioerror(bp, EROFS); 1246fa9e4066Sahrens biodone(bp); 1247fa9e4066Sahrens return (0); 1248fa9e4066Sahrens } 1249fa9e4066Sahrens 1250fa9e4066Sahrens off = ldbtob(bp->b_blkno); 1251fa9e4066Sahrens volsize = zv->zv_volsize; 1252fa9e4066Sahrens 125322ac5be4Sperrin os = zv->zv_objset; 125422ac5be4Sperrin ASSERT(os != NULL); 1255fa9e4066Sahrens 1256fa9e4066Sahrens bp_mapin(bp); 1257fa9e4066Sahrens addr = bp->b_un.b_addr; 1258fa9e4066Sahrens resid = bp->b_bcount; 1259fa9e4066Sahrens 126088b7b0f2SMatthew Ahrens if (resid > 0 && (off < 0 || off >= volsize)) { 126188b7b0f2SMatthew Ahrens bioerror(bp, EIO); 126288b7b0f2SMatthew Ahrens biodone(bp); 126388b7b0f2SMatthew Ahrens return (0); 126488b7b0f2SMatthew Ahrens } 126573ec3d9cSgw 1266810e43b2SBill Pijewski is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED; 126755da60b9SMark J Musante sync = ((!(bp->b_flags & B_ASYNC) && 126855da60b9SMark J Musante !(zv->zv_flags & ZVOL_WCE)) || 126955da60b9SMark J Musante (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) && 1270810e43b2SBill Pijewski !doread && !is_dumpified; 1271510b6c0eSNeil Perrin 1272a24e15ceSperrin /* 1273a24e15ceSperrin * There must be no buffer changes when doing a dmu_sync() because 1274a24e15ceSperrin * we can't change the data whilst calculating the checksum. 1275a24e15ceSperrin */ 1276c2e6a7d6Sperrin rl = zfs_range_lock(&zv->zv_znode, off, resid, 127788b7b0f2SMatthew Ahrens doread ? RL_READER : RL_WRITER); 1278fa9e4066Sahrens 1279e7cbe64fSgw while (resid != 0 && off < volsize) { 128088b7b0f2SMatthew Ahrens size_t size = MIN(resid, zvol_maxphys); 1281810e43b2SBill Pijewski if (is_dumpified) { 1282e7cbe64fSgw size = MIN(size, P2END(off, zv->zv_volblocksize) - off); 128388b7b0f2SMatthew Ahrens error = zvol_dumpio(zv, addr, off, size, 128488b7b0f2SMatthew Ahrens doread, B_FALSE); 128588b7b0f2SMatthew Ahrens } else if (doread) { 12867bfdf011SNeil Perrin error = dmu_read(os, ZVOL_OBJ, off, size, addr, 12877bfdf011SNeil Perrin DMU_READ_PREFETCH); 1288fa9e4066Sahrens } else { 128922ac5be4Sperrin dmu_tx_t *tx = dmu_tx_create(os); 1290fa9e4066Sahrens dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); 1291fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 1292fa9e4066Sahrens if (error) { 1293fa9e4066Sahrens dmu_tx_abort(tx); 1294fa9e4066Sahrens } else { 129522ac5be4Sperrin dmu_write(os, ZVOL_OBJ, off, size, addr, tx); 1296510b6c0eSNeil Perrin zvol_log_write(zv, tx, off, size, sync); 1297fa9e4066Sahrens dmu_tx_commit(tx); 1298fa9e4066Sahrens } 1299fa9e4066Sahrens } 1300b87f3af3Sperrin if (error) { 1301b87f3af3Sperrin /* convert checksum errors into IO errors */ 1302b87f3af3Sperrin if (error == ECKSUM) 1303be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 1304fa9e4066Sahrens break; 1305b87f3af3Sperrin } 1306fa9e4066Sahrens off += size; 1307fa9e4066Sahrens addr += size; 1308fa9e4066Sahrens resid -= size; 1309fa9e4066Sahrens } 1310c2e6a7d6Sperrin zfs_range_unlock(rl); 1311fa9e4066Sahrens 1312fa9e4066Sahrens if ((bp->b_resid = resid) == bp->b_bcount) 1313fa9e4066Sahrens bioerror(bp, off > volsize ? EINVAL : error); 1314fa9e4066Sahrens 1315510b6c0eSNeil Perrin if (sync) 13165002558fSNeil Perrin zil_commit(zv->zv_zilog, ZVOL_OBJ); 1317feb08c6bSbillm biodone(bp); 131822ac5be4Sperrin 1319fa9e4066Sahrens return (0); 1320fa9e4066Sahrens } 1321fa9e4066Sahrens 132267bd71c6Sperrin /* 132367bd71c6Sperrin * Set the buffer count to the zvol maximum transfer. 132467bd71c6Sperrin * Using our own routine instead of the default minphys() 132567bd71c6Sperrin * means that for larger writes we write bigger buffers on X86 132667bd71c6Sperrin * (128K instead of 56K) and flush the disk write cache less often 132767bd71c6Sperrin * (every zvol_maxphys - currently 1MB) instead of minphys (currently 132867bd71c6Sperrin * 56K on X86 and 128K on sparc). 132967bd71c6Sperrin */ 133067bd71c6Sperrin void 133167bd71c6Sperrin zvol_minphys(struct buf *bp) 133267bd71c6Sperrin { 133367bd71c6Sperrin if (bp->b_bcount > zvol_maxphys) 133467bd71c6Sperrin bp->b_bcount = zvol_maxphys; 133567bd71c6Sperrin } 133667bd71c6Sperrin 1337e7cbe64fSgw int 1338e7cbe64fSgw zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) 1339e7cbe64fSgw { 1340e7cbe64fSgw minor_t minor = getminor(dev); 1341e7cbe64fSgw zvol_state_t *zv; 1342e7cbe64fSgw int error = 0; 1343e7cbe64fSgw uint64_t size; 1344e7cbe64fSgw uint64_t boff; 1345e7cbe64fSgw uint64_t resid; 1346e7cbe64fSgw 1347c99e4bdcSChris Kirby zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1348e7cbe64fSgw if (zv == NULL) 1349be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 1350e7cbe64fSgw 13513b2aab18SMatthew Ahrens if ((zv->zv_flags & ZVOL_DUMPIFIED) == 0) 1352be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 13533b2aab18SMatthew Ahrens 1354e7cbe64fSgw boff = ldbtob(blkno); 1355e7cbe64fSgw resid = ldbtob(nblocks); 135688b7b0f2SMatthew Ahrens 135788b7b0f2SMatthew Ahrens VERIFY3U(boff + resid, <=, zv->zv_volsize); 135888b7b0f2SMatthew Ahrens 1359e7cbe64fSgw while (resid) { 1360e7cbe64fSgw size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); 136188b7b0f2SMatthew Ahrens error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE); 1362e7cbe64fSgw if (error) 1363e7cbe64fSgw break; 1364e7cbe64fSgw boff += size; 1365e7cbe64fSgw addr += size; 1366e7cbe64fSgw resid -= size; 1367e7cbe64fSgw } 1368e7cbe64fSgw 1369e7cbe64fSgw return (error); 1370e7cbe64fSgw } 1371e7cbe64fSgw 1372fa9e4066Sahrens /*ARGSUSED*/ 1373fa9e4066Sahrens int 1374feb08c6bSbillm zvol_read(dev_t dev, uio_t *uio, cred_t *cr) 1375fa9e4066Sahrens { 1376c7ca1008Sgw minor_t minor = getminor(dev); 1377c7ca1008Sgw zvol_state_t *zv; 137873ec3d9cSgw uint64_t volsize; 1379c2e6a7d6Sperrin rl_t *rl; 1380feb08c6bSbillm int error = 0; 1381fa9e4066Sahrens 1382c99e4bdcSChris Kirby zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1383c7ca1008Sgw if (zv == NULL) 1384be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 1385c7ca1008Sgw 138673ec3d9cSgw volsize = zv->zv_volsize; 138773ec3d9cSgw if (uio->uio_resid > 0 && 138873ec3d9cSgw (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) 1389be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 139073ec3d9cSgw 139188b7b0f2SMatthew Ahrens if (zv->zv_flags & ZVOL_DUMPIFIED) { 139288b7b0f2SMatthew Ahrens error = physio(zvol_strategy, NULL, dev, B_READ, 139388b7b0f2SMatthew Ahrens zvol_minphys, uio); 139488b7b0f2SMatthew Ahrens return (error); 139588b7b0f2SMatthew Ahrens } 139688b7b0f2SMatthew Ahrens 1397c2e6a7d6Sperrin rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1398c2e6a7d6Sperrin RL_READER); 139973ec3d9cSgw while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1400feb08c6bSbillm uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1401fa9e4066Sahrens 140273ec3d9cSgw /* don't read past the end */ 140373ec3d9cSgw if (bytes > volsize - uio->uio_loffset) 140473ec3d9cSgw bytes = volsize - uio->uio_loffset; 140573ec3d9cSgw 1406feb08c6bSbillm error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes); 1407b87f3af3Sperrin if (error) { 1408b87f3af3Sperrin /* convert checksum errors into IO errors */ 1409b87f3af3Sperrin if (error == ECKSUM) 1410be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 1411feb08c6bSbillm break; 1412b87f3af3Sperrin } 1413feb08c6bSbillm } 1414c2e6a7d6Sperrin zfs_range_unlock(rl); 1415feb08c6bSbillm return (error); 1416fa9e4066Sahrens } 1417fa9e4066Sahrens 1418fa9e4066Sahrens /*ARGSUSED*/ 1419fa9e4066Sahrens int 1420feb08c6bSbillm zvol_write(dev_t dev, uio_t *uio, cred_t *cr) 1421fa9e4066Sahrens { 1422c7ca1008Sgw minor_t minor = getminor(dev); 1423c7ca1008Sgw zvol_state_t *zv; 142473ec3d9cSgw uint64_t volsize; 1425c2e6a7d6Sperrin rl_t *rl; 1426feb08c6bSbillm int error = 0; 1427510b6c0eSNeil Perrin boolean_t sync; 1428feb08c6bSbillm 1429c99e4bdcSChris Kirby zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1430c7ca1008Sgw if (zv == NULL) 1431be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 1432c7ca1008Sgw 143373ec3d9cSgw volsize = zv->zv_volsize; 143473ec3d9cSgw if (uio->uio_resid > 0 && 143573ec3d9cSgw (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) 1436be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 143773ec3d9cSgw 1438e7cbe64fSgw if (zv->zv_flags & ZVOL_DUMPIFIED) { 1439e7cbe64fSgw error = physio(zvol_strategy, NULL, dev, B_WRITE, 1440e7cbe64fSgw zvol_minphys, uio); 1441e7cbe64fSgw return (error); 1442e7cbe64fSgw } 1443e7cbe64fSgw 144455da60b9SMark J Musante sync = !(zv->zv_flags & ZVOL_WCE) || 144555da60b9SMark J Musante (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); 1446510b6c0eSNeil Perrin 1447c2e6a7d6Sperrin rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1448c2e6a7d6Sperrin RL_WRITER); 144973ec3d9cSgw while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1450feb08c6bSbillm uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1451feb08c6bSbillm uint64_t off = uio->uio_loffset; 1452feb08c6bSbillm dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 145373ec3d9cSgw 145473ec3d9cSgw if (bytes > volsize - off) /* don't write past the end */ 145573ec3d9cSgw bytes = volsize - off; 145673ec3d9cSgw 1457feb08c6bSbillm dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 1458feb08c6bSbillm error = dmu_tx_assign(tx, TXG_WAIT); 1459feb08c6bSbillm if (error) { 1460feb08c6bSbillm dmu_tx_abort(tx); 1461feb08c6bSbillm break; 1462feb08c6bSbillm } 146394d1a210STim Haley error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx); 1464feb08c6bSbillm if (error == 0) 1465510b6c0eSNeil Perrin zvol_log_write(zv, tx, off, bytes, sync); 1466feb08c6bSbillm dmu_tx_commit(tx); 1467feb08c6bSbillm 1468feb08c6bSbillm if (error) 1469feb08c6bSbillm break; 1470feb08c6bSbillm } 1471c2e6a7d6Sperrin zfs_range_unlock(rl); 1472510b6c0eSNeil Perrin if (sync) 14735002558fSNeil Perrin zil_commit(zv->zv_zilog, ZVOL_OBJ); 1474feb08c6bSbillm return (error); 1475fa9e4066Sahrens } 1476fa9e4066Sahrens 1477c7f714e2SEric Taylor int 1478c7f714e2SEric Taylor zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) 1479c7f714e2SEric Taylor { 1480c7f714e2SEric Taylor struct uuid uuid = EFI_RESERVED; 1481c7f714e2SEric Taylor efi_gpe_t gpe = { 0 }; 1482c7f714e2SEric Taylor uint32_t crc; 1483c7f714e2SEric Taylor dk_efi_t efi; 1484c7f714e2SEric Taylor int length; 1485c7f714e2SEric Taylor char *ptr; 1486c7f714e2SEric Taylor 1487c7f714e2SEric Taylor if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag)) 1488be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 1489c7f714e2SEric Taylor ptr = (char *)(uintptr_t)efi.dki_data_64; 1490c7f714e2SEric Taylor length = efi.dki_length; 1491c7f714e2SEric Taylor /* 1492c7f714e2SEric Taylor * Some clients may attempt to request a PMBR for the 1493c7f714e2SEric Taylor * zvol. Currently this interface will return EINVAL to 1494c7f714e2SEric Taylor * such requests. These requests could be supported by 1495c7f714e2SEric Taylor * adding a check for lba == 0 and consing up an appropriate 1496c7f714e2SEric Taylor * PMBR. 1497c7f714e2SEric Taylor */ 1498c7f714e2SEric Taylor if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0) 1499be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1500c7f714e2SEric Taylor 1501c7f714e2SEric Taylor gpe.efi_gpe_StartingLBA = LE_64(34ULL); 1502c7f714e2SEric Taylor gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1); 1503c7f714e2SEric Taylor UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); 1504c7f714e2SEric Taylor 1505c7f714e2SEric Taylor if (efi.dki_lba == 1) { 1506c7f714e2SEric Taylor efi_gpt_t gpt = { 0 }; 1507c7f714e2SEric Taylor 1508c7f714e2SEric Taylor gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); 1509c7f714e2SEric Taylor gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 1510c7f714e2SEric Taylor gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); 1511c7f714e2SEric Taylor gpt.efi_gpt_MyLBA = LE_64(1ULL); 1512c7f714e2SEric Taylor gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL); 1513c7f714e2SEric Taylor gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1); 1514c7f714e2SEric Taylor gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL); 1515c7f714e2SEric Taylor gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); 1516c7f714e2SEric Taylor gpt.efi_gpt_SizeOfPartitionEntry = 1517c7f714e2SEric Taylor LE_32(sizeof (efi_gpe_t)); 1518c7f714e2SEric Taylor CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); 1519c7f714e2SEric Taylor gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 1520c7f714e2SEric Taylor CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); 1521c7f714e2SEric Taylor gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); 1522c7f714e2SEric Taylor if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length), 1523c7f714e2SEric Taylor flag)) 1524be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 1525c7f714e2SEric Taylor ptr += sizeof (gpt); 1526c7f714e2SEric Taylor length -= sizeof (gpt); 1527c7f714e2SEric Taylor } 1528c7f714e2SEric Taylor if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe), 1529c7f714e2SEric Taylor length), flag)) 1530be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 1531c7f714e2SEric Taylor return (0); 1532c7f714e2SEric Taylor } 1533c7f714e2SEric Taylor 15343fb517f7SJames Moore /* 15353fb517f7SJames Moore * BEGIN entry points to allow external callers access to the volume. 15363fb517f7SJames Moore */ 15373fb517f7SJames Moore /* 15383fb517f7SJames Moore * Return the volume parameters needed for access from an external caller. 15393fb517f7SJames Moore * These values are invariant as long as the volume is held open. 15403fb517f7SJames Moore */ 15413fb517f7SJames Moore int 15423fb517f7SJames Moore zvol_get_volume_params(minor_t minor, uint64_t *blksize, 15433fb517f7SJames Moore uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl, 15443fb517f7SJames Moore void **rl_hdl, void **bonus_hdl) 15453fb517f7SJames Moore { 15463fb517f7SJames Moore zvol_state_t *zv; 15473fb517f7SJames Moore 1548c99e4bdcSChris Kirby zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1549c99e4bdcSChris Kirby if (zv == NULL) 1550be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 15513fb517f7SJames Moore if (zv->zv_flags & ZVOL_DUMPIFIED) 1552be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 15533fb517f7SJames Moore 15543fb517f7SJames Moore ASSERT(blksize && max_xfer_len && minor_hdl && 15553fb517f7SJames Moore objset_hdl && zil_hdl && rl_hdl && bonus_hdl); 15563fb517f7SJames Moore 15573fb517f7SJames Moore *blksize = zv->zv_volblocksize; 15583fb517f7SJames Moore *max_xfer_len = (uint64_t)zvol_maxphys; 15593fb517f7SJames Moore *minor_hdl = zv; 15603fb517f7SJames Moore *objset_hdl = zv->zv_objset; 15613fb517f7SJames Moore *zil_hdl = zv->zv_zilog; 15623fb517f7SJames Moore *rl_hdl = &zv->zv_znode; 15633fb517f7SJames Moore *bonus_hdl = zv->zv_dbuf; 15643fb517f7SJames Moore return (0); 15653fb517f7SJames Moore } 15663fb517f7SJames Moore 15673fb517f7SJames Moore /* 15683fb517f7SJames Moore * Return the current volume size to an external caller. 15693fb517f7SJames Moore * The size can change while the volume is open. 15703fb517f7SJames Moore */ 15713fb517f7SJames Moore uint64_t 15723fb517f7SJames Moore zvol_get_volume_size(void *minor_hdl) 15733fb517f7SJames Moore { 15743fb517f7SJames Moore zvol_state_t *zv = minor_hdl; 15753fb517f7SJames Moore 15763fb517f7SJames Moore return (zv->zv_volsize); 15773fb517f7SJames Moore } 15783fb517f7SJames Moore 15793fb517f7SJames Moore /* 15803fb517f7SJames Moore * Return the current WCE setting to an external caller. 15813fb517f7SJames Moore * The WCE setting can change while the volume is open. 15823fb517f7SJames Moore */ 15833fb517f7SJames Moore int 15843fb517f7SJames Moore zvol_get_volume_wce(void *minor_hdl) 15853fb517f7SJames Moore { 15863fb517f7SJames Moore zvol_state_t *zv = minor_hdl; 15873fb517f7SJames Moore 15883fb517f7SJames Moore return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0); 15893fb517f7SJames Moore } 15903fb517f7SJames Moore 15913fb517f7SJames Moore /* 15923fb517f7SJames Moore * Entry point for external callers to zvol_log_write 15933fb517f7SJames Moore */ 15943fb517f7SJames Moore void 15953fb517f7SJames Moore zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid, 15963fb517f7SJames Moore boolean_t sync) 15973fb517f7SJames Moore { 15983fb517f7SJames Moore zvol_state_t *zv = minor_hdl; 15993fb517f7SJames Moore 16003fb517f7SJames Moore zvol_log_write(zv, tx, off, resid, sync); 16013fb517f7SJames Moore } 16023fb517f7SJames Moore /* 16033fb517f7SJames Moore * END entry points to allow external callers access to the volume. 16043fb517f7SJames Moore */ 16053fb517f7SJames Moore 1606b77b9231SDan McDonald /* 1607b77b9231SDan McDonald * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE. 1608b77b9231SDan McDonald */ 1609b77b9231SDan McDonald static void 1610b77b9231SDan McDonald zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len, 1611b77b9231SDan McDonald boolean_t sync) 1612b77b9231SDan McDonald { 1613b77b9231SDan McDonald itx_t *itx; 1614b77b9231SDan McDonald lr_truncate_t *lr; 1615b77b9231SDan McDonald zilog_t *zilog = zv->zv_zilog; 1616b77b9231SDan McDonald 1617b77b9231SDan McDonald if (zil_replaying(zilog, tx)) 1618b77b9231SDan McDonald return; 1619b77b9231SDan McDonald 1620b77b9231SDan McDonald itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 1621b77b9231SDan McDonald lr = (lr_truncate_t *)&itx->itx_lr; 1622b77b9231SDan McDonald lr->lr_foid = ZVOL_OBJ; 1623b77b9231SDan McDonald lr->lr_offset = off; 1624b77b9231SDan McDonald lr->lr_length = len; 1625b77b9231SDan McDonald 1626b77b9231SDan McDonald itx->itx_sync = sync; 1627b77b9231SDan McDonald zil_itx_assign(zilog, itx, tx); 1628b77b9231SDan McDonald } 1629b77b9231SDan McDonald 1630fa9e4066Sahrens /* 1631fa9e4066Sahrens * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). 1632b77b9231SDan McDonald * Also a dirtbag dkio ioctl for unmap/free-block functionality. 1633fa9e4066Sahrens */ 1634fa9e4066Sahrens /*ARGSUSED*/ 1635fa9e4066Sahrens int 1636fa9e4066Sahrens zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) 1637fa9e4066Sahrens { 1638fa9e4066Sahrens zvol_state_t *zv; 1639af2c4821Smaybee struct dk_callback *dkc; 1640fa9e4066Sahrens int error = 0; 1641e7cbe64fSgw rl_t *rl; 1642fa9e4066Sahrens 1643c99e4bdcSChris Kirby mutex_enter(&zfsdev_state_lock); 1644fa9e4066Sahrens 1645c99e4bdcSChris Kirby zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL); 1646fa9e4066Sahrens 1647fa9e4066Sahrens if (zv == NULL) { 1648c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 1649be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 1650fa9e4066Sahrens } 1651701f66c4SEric Taylor ASSERT(zv->zv_total_opens > 0); 1652fa9e4066Sahrens 1653fa9e4066Sahrens switch (cmd) { 1654fa9e4066Sahrens 1655fa9e4066Sahrens case DKIOCINFO: 1656a0b60564SGeorge Wilson { 1657a0b60564SGeorge Wilson struct dk_cinfo dki; 1658a0b60564SGeorge Wilson 1659af2c4821Smaybee bzero(&dki, sizeof (dki)); 1660af2c4821Smaybee (void) strcpy(dki.dki_cname, "zvol"); 1661af2c4821Smaybee (void) strcpy(dki.dki_dname, "zvol"); 1662af2c4821Smaybee dki.dki_ctype = DKC_UNKNOWN; 16633adc9019SEric Taylor dki.dki_unit = getminor(dev); 1664*b5152584SMatthew Ahrens dki.dki_maxtransfer = 1665*b5152584SMatthew Ahrens 1 << (SPA_OLD_MAXBLOCKSHIFT - zv->zv_min_bs); 1666c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 1667af2c4821Smaybee if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) 1668be6fd75aSMatthew Ahrens error = SET_ERROR(EFAULT); 1669fa9e4066Sahrens return (error); 1670a0b60564SGeorge Wilson } 1671fa9e4066Sahrens 1672fa9e4066Sahrens case DKIOCGMEDIAINFO: 1673a0b60564SGeorge Wilson { 1674a0b60564SGeorge Wilson struct dk_minfo dkm; 1675a0b60564SGeorge Wilson 1676fa9e4066Sahrens bzero(&dkm, sizeof (dkm)); 1677fa9e4066Sahrens dkm.dki_lbsize = 1U << zv->zv_min_bs; 1678fa9e4066Sahrens dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 1679fa9e4066Sahrens dkm.dki_media_type = DK_UNKNOWN; 1680c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 1681fa9e4066Sahrens if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) 1682be6fd75aSMatthew Ahrens error = SET_ERROR(EFAULT); 1683fa9e4066Sahrens return (error); 1684a0b60564SGeorge Wilson } 1685a0b60564SGeorge Wilson 1686a0b60564SGeorge Wilson case DKIOCGMEDIAINFOEXT: 1687a0b60564SGeorge Wilson { 1688a0b60564SGeorge Wilson struct dk_minfo_ext dkmext; 1689a0b60564SGeorge Wilson 1690a0b60564SGeorge Wilson bzero(&dkmext, sizeof (dkmext)); 1691a0b60564SGeorge Wilson dkmext.dki_lbsize = 1U << zv->zv_min_bs; 1692a0b60564SGeorge Wilson dkmext.dki_pbsize = zv->zv_volblocksize; 1693a0b60564SGeorge Wilson dkmext.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 1694a0b60564SGeorge Wilson dkmext.dki_media_type = DK_UNKNOWN; 1695a0b60564SGeorge Wilson mutex_exit(&zfsdev_state_lock); 1696a0b60564SGeorge Wilson if (ddi_copyout(&dkmext, (void *)arg, sizeof (dkmext), flag)) 1697a0b60564SGeorge Wilson error = SET_ERROR(EFAULT); 1698a0b60564SGeorge Wilson return (error); 1699a0b60564SGeorge Wilson } 1700fa9e4066Sahrens 1701fa9e4066Sahrens case DKIOCGETEFI: 1702a0b60564SGeorge Wilson { 1703a0b60564SGeorge Wilson uint64_t vs = zv->zv_volsize; 1704a0b60564SGeorge Wilson uint8_t bs = zv->zv_min_bs; 1705fa9e4066Sahrens 1706a0b60564SGeorge Wilson mutex_exit(&zfsdev_state_lock); 1707a0b60564SGeorge Wilson error = zvol_getefi((void *)arg, flag, vs, bs); 1708a0b60564SGeorge Wilson return (error); 1709a0b60564SGeorge Wilson } 1710fa9e4066Sahrens 1711feb08c6bSbillm case DKIOCFLUSHWRITECACHE: 1712af2c4821Smaybee dkc = (struct dk_callback *)arg; 1713c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 17145002558fSNeil Perrin zil_commit(zv->zv_zilog, ZVOL_OBJ); 1715af2c4821Smaybee if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { 1716af2c4821Smaybee (*dkc->dkc_callback)(dkc->dkc_cookie, error); 1717af2c4821Smaybee error = 0; 1718af2c4821Smaybee } 1719701f66c4SEric Taylor return (error); 1720701f66c4SEric Taylor 1721701f66c4SEric Taylor case DKIOCGETWCE: 1722a0b60564SGeorge Wilson { 1723a0b60564SGeorge Wilson int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0; 1724a0b60564SGeorge Wilson if (ddi_copyout(&wce, (void *)arg, sizeof (int), 1725a0b60564SGeorge Wilson flag)) 1726a0b60564SGeorge Wilson error = SET_ERROR(EFAULT); 1727a0b60564SGeorge Wilson break; 1728a0b60564SGeorge Wilson } 1729a0b60564SGeorge Wilson case DKIOCSETWCE: 1730a0b60564SGeorge Wilson { 1731a0b60564SGeorge Wilson int wce; 1732a0b60564SGeorge Wilson if (ddi_copyin((void *)arg, &wce, sizeof (int), 1733a0b60564SGeorge Wilson flag)) { 1734a0b60564SGeorge Wilson error = SET_ERROR(EFAULT); 1735701f66c4SEric Taylor break; 1736701f66c4SEric Taylor } 1737a0b60564SGeorge Wilson if (wce) { 1738a0b60564SGeorge Wilson zv->zv_flags |= ZVOL_WCE; 1739a0b60564SGeorge Wilson mutex_exit(&zfsdev_state_lock); 1740a0b60564SGeorge Wilson } else { 1741a0b60564SGeorge Wilson zv->zv_flags &= ~ZVOL_WCE; 1742a0b60564SGeorge Wilson mutex_exit(&zfsdev_state_lock); 1743a0b60564SGeorge Wilson zil_commit(zv->zv_zilog, ZVOL_OBJ); 1744701f66c4SEric Taylor } 1745a0b60564SGeorge Wilson return (0); 1746a0b60564SGeorge Wilson } 1747feb08c6bSbillm 1748b6130eadSmaybee case DKIOCGGEOM: 1749b6130eadSmaybee case DKIOCGVTOC: 1750e7cbe64fSgw /* 1751e7cbe64fSgw * commands using these (like prtvtoc) expect ENOTSUP 1752e7cbe64fSgw * since we're emulating an EFI label 1753e7cbe64fSgw */ 1754be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTSUP); 1755b6130eadSmaybee break; 1756b6130eadSmaybee 1757e7cbe64fSgw case DKIOCDUMPINIT: 1758e7cbe64fSgw rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 1759e7cbe64fSgw RL_WRITER); 1760e7cbe64fSgw error = zvol_dumpify(zv); 1761e7cbe64fSgw zfs_range_unlock(rl); 1762e7cbe64fSgw break; 1763e7cbe64fSgw 1764e7cbe64fSgw case DKIOCDUMPFINI: 176506d5ae10SEric Taylor if (!(zv->zv_flags & ZVOL_DUMPIFIED)) 176606d5ae10SEric Taylor break; 1767e7cbe64fSgw rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 1768e7cbe64fSgw RL_WRITER); 1769e7cbe64fSgw error = zvol_dump_fini(zv); 1770e7cbe64fSgw zfs_range_unlock(rl); 1771e7cbe64fSgw break; 1772e7cbe64fSgw 1773b77b9231SDan McDonald case DKIOCFREE: 1774b77b9231SDan McDonald { 1775b77b9231SDan McDonald dkioc_free_t df; 1776b77b9231SDan McDonald dmu_tx_t *tx; 1777b77b9231SDan McDonald 1778893c83baSGeorge Wilson if (!zvol_unmap_enabled) 1779893c83baSGeorge Wilson break; 1780893c83baSGeorge Wilson 1781b77b9231SDan McDonald if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) { 1782be6fd75aSMatthew Ahrens error = SET_ERROR(EFAULT); 1783b77b9231SDan McDonald break; 1784b77b9231SDan McDonald } 1785b77b9231SDan McDonald 1786b77b9231SDan McDonald /* 1787b77b9231SDan McDonald * Apply Postel's Law to length-checking. If they overshoot, 1788b77b9231SDan McDonald * just blank out until the end, if there's a need to blank 1789b77b9231SDan McDonald * out anything. 1790b77b9231SDan McDonald */ 1791b77b9231SDan McDonald if (df.df_start >= zv->zv_volsize) 1792b77b9231SDan McDonald break; /* No need to do anything... */ 1793574e2414SGeorge Wilson 1794574e2414SGeorge Wilson mutex_exit(&zfsdev_state_lock); 1795b77b9231SDan McDonald 1796b77b9231SDan McDonald rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length, 1797b77b9231SDan McDonald RL_WRITER); 1798b77b9231SDan McDonald tx = dmu_tx_create(zv->zv_objset); 17994bb73804SMatthew Ahrens dmu_tx_mark_netfree(tx); 1800b77b9231SDan McDonald error = dmu_tx_assign(tx, TXG_WAIT); 1801b77b9231SDan McDonald if (error != 0) { 1802b77b9231SDan McDonald dmu_tx_abort(tx); 1803b77b9231SDan McDonald } else { 1804b77b9231SDan McDonald zvol_log_truncate(zv, tx, df.df_start, 1805b77b9231SDan McDonald df.df_length, B_TRUE); 1806c08b1637SJosef 'Jeff' Sipek dmu_tx_commit(tx); 1807b77b9231SDan McDonald error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 1808b77b9231SDan McDonald df.df_start, df.df_length); 1809b77b9231SDan McDonald } 1810b77b9231SDan McDonald 1811b77b9231SDan McDonald zfs_range_unlock(rl); 1812b77b9231SDan McDonald 1813b77b9231SDan McDonald if (error == 0) { 1814b77b9231SDan McDonald /* 1815b77b9231SDan McDonald * If the write-cache is disabled or 'sync' property 1816b77b9231SDan McDonald * is set to 'always' then treat this as a synchronous 1817b77b9231SDan McDonald * operation (i.e. commit to zil). 1818b77b9231SDan McDonald */ 1819b77b9231SDan McDonald if (!(zv->zv_flags & ZVOL_WCE) || 1820b77b9231SDan McDonald (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) 1821b77b9231SDan McDonald zil_commit(zv->zv_zilog, ZVOL_OBJ); 1822b77b9231SDan McDonald 1823b77b9231SDan McDonald /* 1824b77b9231SDan McDonald * If the caller really wants synchronous writes, and 1825b77b9231SDan McDonald * can't wait for them, don't return until the write 1826b77b9231SDan McDonald * is done. 1827b77b9231SDan McDonald */ 1828b77b9231SDan McDonald if (df.df_flags & DF_WAIT_SYNC) { 1829b77b9231SDan McDonald txg_wait_synced( 1830b77b9231SDan McDonald dmu_objset_pool(zv->zv_objset), 0); 1831b77b9231SDan McDonald } 1832b77b9231SDan McDonald } 1833574e2414SGeorge Wilson return (error); 1834b77b9231SDan McDonald } 1835b77b9231SDan McDonald 1836fa9e4066Sahrens default: 1837be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTTY); 1838fa9e4066Sahrens break; 1839fa9e4066Sahrens 1840fa9e4066Sahrens } 1841c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock); 1842fa9e4066Sahrens return (error); 1843fa9e4066Sahrens } 1844fa9e4066Sahrens 1845fa9e4066Sahrens int 1846fa9e4066Sahrens zvol_busy(void) 1847fa9e4066Sahrens { 1848fa9e4066Sahrens return (zvol_minors != 0); 1849fa9e4066Sahrens } 1850fa9e4066Sahrens 1851fa9e4066Sahrens void 1852fa9e4066Sahrens zvol_init(void) 1853fa9e4066Sahrens { 1854c99e4bdcSChris Kirby VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t), 1855c99e4bdcSChris Kirby 1) == 0); 1856c99e4bdcSChris Kirby mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL); 1857fa9e4066Sahrens } 1858fa9e4066Sahrens 1859fa9e4066Sahrens void 1860fa9e4066Sahrens zvol_fini(void) 1861fa9e4066Sahrens { 1862c99e4bdcSChris Kirby mutex_destroy(&zfsdev_state_lock); 1863c99e4bdcSChris Kirby ddi_soft_state_fini(&zfsdev_state); 1864fa9e4066Sahrens } 1865e7cbe64fSgw 1866810e43b2SBill Pijewski /*ARGSUSED*/ 1867810e43b2SBill Pijewski static int 1868810e43b2SBill Pijewski zfs_mvdev_dump_feature_check(void *arg, dmu_tx_t *tx) 1869810e43b2SBill Pijewski { 1870810e43b2SBill Pijewski spa_t *spa = dmu_tx_pool(tx)->dp_spa; 1871810e43b2SBill Pijewski 18722acef22dSMatthew Ahrens if (spa_feature_is_active(spa, SPA_FEATURE_MULTI_VDEV_CRASH_DUMP)) 1873810e43b2SBill Pijewski return (1); 1874810e43b2SBill Pijewski return (0); 1875810e43b2SBill Pijewski } 1876810e43b2SBill Pijewski 1877810e43b2SBill Pijewski /*ARGSUSED*/ 1878810e43b2SBill Pijewski static void 1879810e43b2SBill Pijewski zfs_mvdev_dump_activate_feature_sync(void *arg, dmu_tx_t *tx) 1880810e43b2SBill Pijewski { 1881810e43b2SBill Pijewski spa_t *spa = dmu_tx_pool(tx)->dp_spa; 1882810e43b2SBill Pijewski 18832acef22dSMatthew Ahrens spa_feature_incr(spa, SPA_FEATURE_MULTI_VDEV_CRASH_DUMP, tx); 1884810e43b2SBill Pijewski } 1885810e43b2SBill Pijewski 1886e7cbe64fSgw static int 1887e7cbe64fSgw zvol_dump_init(zvol_state_t *zv, boolean_t resize) 1888e7cbe64fSgw { 1889e7cbe64fSgw dmu_tx_t *tx; 1890810e43b2SBill Pijewski int error; 1891e7cbe64fSgw objset_t *os = zv->zv_objset; 1892810e43b2SBill Pijewski spa_t *spa = dmu_objset_spa(os); 1893810e43b2SBill Pijewski vdev_t *vd = spa->spa_root_vdev; 1894e7cbe64fSgw nvlist_t *nv = NULL; 1895810e43b2SBill Pijewski uint64_t version = spa_version(spa); 1896810e43b2SBill Pijewski enum zio_checksum checksum; 1897e7cbe64fSgw 1898c99e4bdcSChris Kirby ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 1899810e43b2SBill Pijewski ASSERT(vd->vdev_ops == &vdev_root_ops); 1900810e43b2SBill Pijewski 1901681d9761SEric Taylor error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, 1902681d9761SEric Taylor DMU_OBJECT_END); 1903681d9761SEric Taylor /* wait for dmu_free_long_range to actually free the blocks */ 1904681d9761SEric Taylor txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 1905e7cbe64fSgw 1906810e43b2SBill Pijewski /* 1907810e43b2SBill Pijewski * If the pool on which the dump device is being initialized has more 1908810e43b2SBill Pijewski * than one child vdev, check that the MULTI_VDEV_CRASH_DUMP feature is 1909810e43b2SBill Pijewski * enabled. If so, bump that feature's counter to indicate that the 1910810e43b2SBill Pijewski * feature is active. We also check the vdev type to handle the 1911810e43b2SBill Pijewski * following case: 1912810e43b2SBill Pijewski * # zpool create test raidz disk1 disk2 disk3 1913810e43b2SBill Pijewski * Now have spa_root_vdev->vdev_children == 1 (the raidz vdev), 1914810e43b2SBill Pijewski * the raidz vdev itself has 3 children. 1915810e43b2SBill Pijewski */ 1916810e43b2SBill Pijewski if (vd->vdev_children > 1 || vd->vdev_ops == &vdev_raidz_ops) { 1917810e43b2SBill Pijewski if (!spa_feature_is_enabled(spa, 19182acef22dSMatthew Ahrens SPA_FEATURE_MULTI_VDEV_CRASH_DUMP)) 1919810e43b2SBill Pijewski return (SET_ERROR(ENOTSUP)); 1920810e43b2SBill Pijewski (void) dsl_sync_task(spa_name(spa), 1921810e43b2SBill Pijewski zfs_mvdev_dump_feature_check, 19227d46dc6cSMatthew Ahrens zfs_mvdev_dump_activate_feature_sync, NULL, 19237d46dc6cSMatthew Ahrens 2, ZFS_SPACE_CHECK_RESERVED); 1924810e43b2SBill Pijewski } 1925810e43b2SBill Pijewski 1926e7cbe64fSgw tx = dmu_tx_create(os); 1927e7cbe64fSgw dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 1928681d9761SEric Taylor dmu_tx_hold_bonus(tx, ZVOL_OBJ); 1929e7cbe64fSgw error = dmu_tx_assign(tx, TXG_WAIT); 1930e7cbe64fSgw if (error) { 1931e7cbe64fSgw dmu_tx_abort(tx); 1932e7cbe64fSgw return (error); 1933e7cbe64fSgw } 1934e7cbe64fSgw 1935810e43b2SBill Pijewski /* 1936810e43b2SBill Pijewski * If MULTI_VDEV_CRASH_DUMP is active, use the NOPARITY checksum 1937810e43b2SBill Pijewski * function. Otherwise, use the old default -- OFF. 1938810e43b2SBill Pijewski */ 1939810e43b2SBill Pijewski checksum = spa_feature_is_active(spa, 19402acef22dSMatthew Ahrens SPA_FEATURE_MULTI_VDEV_CRASH_DUMP) ? ZIO_CHECKSUM_NOPARITY : 19412acef22dSMatthew Ahrens ZIO_CHECKSUM_OFF; 1942810e43b2SBill Pijewski 1943e7cbe64fSgw /* 1944e7cbe64fSgw * If we are resizing the dump device then we only need to 1945e7cbe64fSgw * update the refreservation to match the newly updated 1946e7cbe64fSgw * zvolsize. Otherwise, we save off the original state of the 1947e7cbe64fSgw * zvol so that we can restore them if the zvol is ever undumpified. 1948e7cbe64fSgw */ 1949e7cbe64fSgw if (resize) { 1950e7cbe64fSgw error = zap_update(os, ZVOL_ZAP_OBJ, 1951e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 1952e7cbe64fSgw &zv->zv_volsize, tx); 1953e7cbe64fSgw } else { 1954afee20e4SGeorge Wilson uint64_t checksum, compress, refresrv, vbs, dedup; 195588b7b0f2SMatthew Ahrens 1956e7cbe64fSgw error = dsl_prop_get_integer(zv->zv_name, 1957e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); 1958e7cbe64fSgw error = error ? error : dsl_prop_get_integer(zv->zv_name, 1959e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL); 1960e7cbe64fSgw error = error ? error : dsl_prop_get_integer(zv->zv_name, 1961e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL); 196288b7b0f2SMatthew Ahrens error = error ? error : dsl_prop_get_integer(zv->zv_name, 196388b7b0f2SMatthew Ahrens zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL); 19648d265e66SGeorge Wilson if (version >= SPA_VERSION_DEDUP) { 19658d265e66SGeorge Wilson error = error ? error : 19668d265e66SGeorge Wilson dsl_prop_get_integer(zv->zv_name, 19678d265e66SGeorge Wilson zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL); 19688d265e66SGeorge Wilson } 1969e7cbe64fSgw 1970e7cbe64fSgw error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1971e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, 1972e7cbe64fSgw &compress, tx); 1973e7cbe64fSgw error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1974e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx); 1975e7cbe64fSgw error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1976e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 1977e7cbe64fSgw &refresrv, tx); 197888b7b0f2SMatthew Ahrens error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 197988b7b0f2SMatthew Ahrens zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, 198088b7b0f2SMatthew Ahrens &vbs, tx); 1981681d9761SEric Taylor error = error ? error : dmu_object_set_blocksize( 1982*b5152584SMatthew Ahrens os, ZVOL_OBJ, SPA_OLD_MAXBLOCKSIZE, 0, tx); 19838d265e66SGeorge Wilson if (version >= SPA_VERSION_DEDUP) { 19848d265e66SGeorge Wilson error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 19858d265e66SGeorge Wilson zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, 19868d265e66SGeorge Wilson &dedup, tx); 19878d265e66SGeorge Wilson } 1988681d9761SEric Taylor if (error == 0) 1989*b5152584SMatthew Ahrens zv->zv_volblocksize = SPA_OLD_MAXBLOCKSIZE; 1990e7cbe64fSgw } 1991e7cbe64fSgw dmu_tx_commit(tx); 1992e7cbe64fSgw 1993e7cbe64fSgw /* 1994e7cbe64fSgw * We only need update the zvol's property if we are initializing 1995e7cbe64fSgw * the dump area for the first time. 1996e7cbe64fSgw */ 1997e7cbe64fSgw if (!resize) { 1998e7cbe64fSgw VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1999e7cbe64fSgw VERIFY(nvlist_add_uint64(nv, 2000e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); 2001e7cbe64fSgw VERIFY(nvlist_add_uint64(nv, 2002e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 2003e7cbe64fSgw ZIO_COMPRESS_OFF) == 0); 2004e7cbe64fSgw VERIFY(nvlist_add_uint64(nv, 2005e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_CHECKSUM), 2006810e43b2SBill Pijewski checksum) == 0); 20078d265e66SGeorge Wilson if (version >= SPA_VERSION_DEDUP) { 20088d265e66SGeorge Wilson VERIFY(nvlist_add_uint64(nv, 20098d265e66SGeorge Wilson zfs_prop_to_name(ZFS_PROP_DEDUP), 20108d265e66SGeorge Wilson ZIO_CHECKSUM_OFF) == 0); 20118d265e66SGeorge Wilson } 2012e7cbe64fSgw 201392241e0bSTom Erickson error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 201492241e0bSTom Erickson nv, NULL); 2015e7cbe64fSgw nvlist_free(nv); 2016e7cbe64fSgw 2017e7cbe64fSgw if (error) 2018e7cbe64fSgw return (error); 2019e7cbe64fSgw } 2020e7cbe64fSgw 2021e7cbe64fSgw /* Allocate the space for the dump */ 2022e7cbe64fSgw error = zvol_prealloc(zv); 2023e7cbe64fSgw return (error); 2024e7cbe64fSgw } 2025e7cbe64fSgw 2026e7cbe64fSgw static int 2027e7cbe64fSgw zvol_dumpify(zvol_state_t *zv) 2028e7cbe64fSgw { 2029e7cbe64fSgw int error = 0; 2030e7cbe64fSgw uint64_t dumpsize = 0; 2031e7cbe64fSgw dmu_tx_t *tx; 2032e7cbe64fSgw objset_t *os = zv->zv_objset; 2033e7cbe64fSgw 2034681d9761SEric Taylor if (zv->zv_flags & ZVOL_RDONLY) 2035be6fd75aSMatthew Ahrens return (SET_ERROR(EROFS)); 2036e7cbe64fSgw 2037e7cbe64fSgw if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 2038e7cbe64fSgw 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { 20394445fffbSMatthew Ahrens boolean_t resize = (dumpsize > 0); 2040e7cbe64fSgw 2041e7cbe64fSgw if ((error = zvol_dump_init(zv, resize)) != 0) { 2042e7cbe64fSgw (void) zvol_dump_fini(zv); 2043e7cbe64fSgw return (error); 2044e7cbe64fSgw } 2045e7cbe64fSgw } 2046e7cbe64fSgw 2047e7cbe64fSgw /* 2048e7cbe64fSgw * Build up our lba mapping. 2049e7cbe64fSgw */ 2050e7cbe64fSgw error = zvol_get_lbas(zv); 2051e7cbe64fSgw if (error) { 2052e7cbe64fSgw (void) zvol_dump_fini(zv); 2053e7cbe64fSgw return (error); 2054e7cbe64fSgw } 2055e7cbe64fSgw 2056e7cbe64fSgw tx = dmu_tx_create(os); 2057e7cbe64fSgw dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2058e7cbe64fSgw error = dmu_tx_assign(tx, TXG_WAIT); 2059e7cbe64fSgw if (error) { 2060e7cbe64fSgw dmu_tx_abort(tx); 2061e7cbe64fSgw (void) zvol_dump_fini(zv); 2062e7cbe64fSgw return (error); 2063e7cbe64fSgw } 2064e7cbe64fSgw 2065e7cbe64fSgw zv->zv_flags |= ZVOL_DUMPIFIED; 2066e7cbe64fSgw error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, 2067e7cbe64fSgw &zv->zv_volsize, tx); 2068e7cbe64fSgw dmu_tx_commit(tx); 2069e7cbe64fSgw 2070e7cbe64fSgw if (error) { 2071e7cbe64fSgw (void) zvol_dump_fini(zv); 2072e7cbe64fSgw return (error); 2073e7cbe64fSgw } 2074e7cbe64fSgw 2075e7cbe64fSgw txg_wait_synced(dmu_objset_pool(os), 0); 2076e7cbe64fSgw return (0); 2077e7cbe64fSgw } 2078e7cbe64fSgw 2079e7cbe64fSgw static int 2080e7cbe64fSgw zvol_dump_fini(zvol_state_t *zv) 2081e7cbe64fSgw { 2082e7cbe64fSgw dmu_tx_t *tx; 2083e7cbe64fSgw objset_t *os = zv->zv_objset; 2084e7cbe64fSgw nvlist_t *nv; 2085e7cbe64fSgw int error = 0; 2086afee20e4SGeorge Wilson uint64_t checksum, compress, refresrv, vbs, dedup; 20878d265e66SGeorge Wilson uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); 2088e7cbe64fSgw 2089b7e50089Smaybee /* 2090b7e50089Smaybee * Attempt to restore the zvol back to its pre-dumpified state. 2091b7e50089Smaybee * This is a best-effort attempt as it's possible that not all 2092b7e50089Smaybee * of these properties were initialized during the dumpify process 2093b7e50089Smaybee * (i.e. error during zvol_dump_init). 2094b7e50089Smaybee */ 2095b7e50089Smaybee 2096e7cbe64fSgw tx = dmu_tx_create(os); 2097e7cbe64fSgw dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2098e7cbe64fSgw error = dmu_tx_assign(tx, TXG_WAIT); 2099e7cbe64fSgw if (error) { 2100e7cbe64fSgw dmu_tx_abort(tx); 2101e7cbe64fSgw return (error); 2102e7cbe64fSgw } 2103b7e50089Smaybee (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); 2104b7e50089Smaybee dmu_tx_commit(tx); 2105e7cbe64fSgw 2106e7cbe64fSgw (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2107e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); 2108e7cbe64fSgw (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2109e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); 2110e7cbe64fSgw (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2111e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); 211288b7b0f2SMatthew Ahrens (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 211388b7b0f2SMatthew Ahrens zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs); 2114e7cbe64fSgw 2115e7cbe64fSgw VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2116e7cbe64fSgw (void) nvlist_add_uint64(nv, 2117e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); 2118e7cbe64fSgw (void) nvlist_add_uint64(nv, 2119e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); 2120e7cbe64fSgw (void) nvlist_add_uint64(nv, 2121e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); 21228d265e66SGeorge Wilson if (version >= SPA_VERSION_DEDUP && 21238d265e66SGeorge Wilson zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 21248d265e66SGeorge Wilson zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup) == 0) { 21258d265e66SGeorge Wilson (void) nvlist_add_uint64(nv, 21268d265e66SGeorge Wilson zfs_prop_to_name(ZFS_PROP_DEDUP), dedup); 21278d265e66SGeorge Wilson } 212892241e0bSTom Erickson (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 212992241e0bSTom Erickson nv, NULL); 2130e7cbe64fSgw nvlist_free(nv); 2131e7cbe64fSgw 2132b7e50089Smaybee zvol_free_extents(zv); 2133b7e50089Smaybee zv->zv_flags &= ~ZVOL_DUMPIFIED; 2134b7e50089Smaybee (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END); 2135681d9761SEric Taylor /* wait for dmu_free_long_range to actually free the blocks */ 2136681d9761SEric Taylor txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 2137681d9761SEric Taylor tx = dmu_tx_create(os); 2138681d9761SEric Taylor dmu_tx_hold_bonus(tx, ZVOL_OBJ); 2139681d9761SEric Taylor error = dmu_tx_assign(tx, TXG_WAIT); 2140681d9761SEric Taylor if (error) { 2141681d9761SEric Taylor dmu_tx_abort(tx); 2142681d9761SEric Taylor return (error); 2143681d9761SEric Taylor } 2144b24ab676SJeff Bonwick if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0) 2145b24ab676SJeff Bonwick zv->zv_volblocksize = vbs; 2146681d9761SEric Taylor dmu_tx_commit(tx); 2147b7e50089Smaybee 2148e7cbe64fSgw return (0); 2149e7cbe64fSgw } 2150