1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22e7cbe64fSgw * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens /* 29fa9e4066Sahrens * ZFS volume emulation driver. 30fa9e4066Sahrens * 31fa9e4066Sahrens * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. 32fa9e4066Sahrens * Volumes are accessed through the symbolic links named: 33fa9e4066Sahrens * 34fa9e4066Sahrens * /dev/zvol/dsk/<pool_name>/<dataset_name> 35fa9e4066Sahrens * /dev/zvol/rdsk/<pool_name>/<dataset_name> 36fa9e4066Sahrens * 37fa9e4066Sahrens * These links are created by the ZFS-specific devfsadm link generator. 38fa9e4066Sahrens * Volumes are persistent through reboot. No user command needs to be 39fa9e4066Sahrens * run before opening and using a device. 40fa9e4066Sahrens */ 41fa9e4066Sahrens 42fa9e4066Sahrens #include <sys/types.h> 43fa9e4066Sahrens #include <sys/param.h> 44fa9e4066Sahrens #include <sys/errno.h> 45fa9e4066Sahrens #include <sys/uio.h> 46fa9e4066Sahrens #include <sys/buf.h> 47fa9e4066Sahrens #include <sys/modctl.h> 48fa9e4066Sahrens #include <sys/open.h> 49fa9e4066Sahrens #include <sys/kmem.h> 50fa9e4066Sahrens #include <sys/conf.h> 51fa9e4066Sahrens #include <sys/cmn_err.h> 52fa9e4066Sahrens #include <sys/stat.h> 53fa9e4066Sahrens #include <sys/zap.h> 54fa9e4066Sahrens #include <sys/spa.h> 55fa9e4066Sahrens #include <sys/zio.h> 56e7cbe64fSgw #include <sys/dmu_traverse.h> 57e7cbe64fSgw #include <sys/dnode.h> 58e7cbe64fSgw #include <sys/dsl_dataset.h> 59fa9e4066Sahrens #include <sys/dsl_prop.h> 60fa9e4066Sahrens #include <sys/dkio.h> 61fa9e4066Sahrens #include <sys/efi_partition.h> 62fa9e4066Sahrens #include <sys/byteorder.h> 63fa9e4066Sahrens #include <sys/pathname.h> 64fa9e4066Sahrens #include <sys/ddi.h> 65fa9e4066Sahrens #include <sys/sunddi.h> 66fa9e4066Sahrens #include <sys/crc32.h> 67fa9e4066Sahrens #include <sys/dirent.h> 68fa9e4066Sahrens #include <sys/policy.h> 69fa9e4066Sahrens #include <sys/fs/zfs.h> 70fa9e4066Sahrens #include <sys/zfs_ioctl.h> 71fa9e4066Sahrens #include <sys/mkdev.h> 7222ac5be4Sperrin #include <sys/zil.h> 73c5c6ffa0Smaybee #include <sys/refcount.h> 74c2e6a7d6Sperrin #include <sys/zfs_znode.h> 75c2e6a7d6Sperrin #include <sys/zfs_rlock.h> 76e7cbe64fSgw #include <sys/vdev_disk.h> 77e7cbe64fSgw #include <sys/vdev_impl.h> 78e7cbe64fSgw #include <sys/zvol.h> 79e7cbe64fSgw #include <sys/dumphdr.h> 80fa9e4066Sahrens 81fa9e4066Sahrens #include "zfs_namecheck.h" 82fa9e4066Sahrens 83fa9e4066Sahrens static void *zvol_state; 84fa9e4066Sahrens 85e7cbe64fSgw #define ZVOL_DUMPSIZE "dumpsize" 86e7cbe64fSgw 87fa9e4066Sahrens /* 88fa9e4066Sahrens * This lock protects the zvol_state structure from being modified 89fa9e4066Sahrens * while it's being used, e.g. an open that comes in before a create 90fa9e4066Sahrens * finishes. It also protects temporary opens of the dataset so that, 91fa9e4066Sahrens * e.g., an open doesn't get a spurious EBUSY. 92fa9e4066Sahrens */ 93fa9e4066Sahrens static kmutex_t zvol_state_lock; 94fa9e4066Sahrens static uint32_t zvol_minors; 95fa9e4066Sahrens 96e7cbe64fSgw #define NUM_EXTENTS ((SPA_MAXBLOCKSIZE) / sizeof (zvol_extent_t)) 97e7cbe64fSgw 98e7cbe64fSgw typedef struct zvol_extent { 99e7cbe64fSgw dva_t ze_dva; /* dva associated with this extent */ 100e7cbe64fSgw uint64_t ze_stride; /* extent stride */ 101e7cbe64fSgw uint64_t ze_size; /* number of blocks in extent */ 102e7cbe64fSgw } zvol_extent_t; 103e7cbe64fSgw 104e7cbe64fSgw /* 105e7cbe64fSgw * The list of extents associated with the dump device 106e7cbe64fSgw */ 107e7cbe64fSgw typedef struct zvol_ext_list { 108e7cbe64fSgw zvol_extent_t zl_extents[NUM_EXTENTS]; 109e7cbe64fSgw struct zvol_ext_list *zl_next; 110e7cbe64fSgw } zvol_ext_list_t; 111e7cbe64fSgw 112fa9e4066Sahrens /* 113fa9e4066Sahrens * The in-core state of each volume. 114fa9e4066Sahrens */ 115fa9e4066Sahrens typedef struct zvol_state { 116fa9e4066Sahrens char zv_name[MAXPATHLEN]; /* pool/dd name */ 117fa9e4066Sahrens uint64_t zv_volsize; /* amount of space we advertise */ 11867bd71c6Sperrin uint64_t zv_volblocksize; /* volume block size */ 119fa9e4066Sahrens minor_t zv_minor; /* minor number */ 120fa9e4066Sahrens uint8_t zv_min_bs; /* minimum addressable block shift */ 121e7cbe64fSgw uint8_t zv_flags; /* readonly; dumpified */ 122fa9e4066Sahrens objset_t *zv_objset; /* objset handle */ 123fa9e4066Sahrens uint32_t zv_mode; /* DS_MODE_* flags at open time */ 124fa9e4066Sahrens uint32_t zv_open_count[OTYPCNT]; /* open counts */ 125fa9e4066Sahrens uint32_t zv_total_opens; /* total open count */ 12622ac5be4Sperrin zilog_t *zv_zilog; /* ZIL handle */ 127e7cbe64fSgw zvol_ext_list_t *zv_list; /* List of extents for dump */ 12822ac5be4Sperrin uint64_t zv_txg_assign; /* txg to assign during ZIL replay */ 129c2e6a7d6Sperrin znode_t zv_znode; /* for range locking */ 130fa9e4066Sahrens } zvol_state_t; 131fa9e4066Sahrens 132e7cbe64fSgw /* 133e7cbe64fSgw * zvol specific flags 134e7cbe64fSgw */ 135e7cbe64fSgw #define ZVOL_RDONLY 0x1 136e7cbe64fSgw #define ZVOL_DUMPIFIED 0x2 137e7cbe64fSgw 13867bd71c6Sperrin /* 13967bd71c6Sperrin * zvol maximum transfer in one DMU tx. 14067bd71c6Sperrin */ 14167bd71c6Sperrin int zvol_maxphys = DMU_MAX_ACCESS/2; 14267bd71c6Sperrin 143e7cbe64fSgw extern int zfs_set_prop_nvlist(const char *, nvlist_t *); 144feb08c6bSbillm static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); 145e7cbe64fSgw static int zvol_dumpify(zvol_state_t *zv); 146e7cbe64fSgw static int zvol_dump_fini(zvol_state_t *zv); 147e7cbe64fSgw static int zvol_dump_init(zvol_state_t *zv, boolean_t resize); 14867bd71c6Sperrin 149fa9e4066Sahrens static void 15091ebeef5Sahrens zvol_size_changed(zvol_state_t *zv, major_t maj) 151fa9e4066Sahrens { 15291ebeef5Sahrens dev_t dev = makedevice(maj, zv->zv_minor); 153fa9e4066Sahrens 154fa9e4066Sahrens VERIFY(ddi_prop_update_int64(dev, zfs_dip, 155fa9e4066Sahrens "Size", zv->zv_volsize) == DDI_SUCCESS); 156fa9e4066Sahrens VERIFY(ddi_prop_update_int64(dev, zfs_dip, 157fa9e4066Sahrens "Nblocks", lbtodb(zv->zv_volsize)) == DDI_SUCCESS); 158e7cbe64fSgw 159e7cbe64fSgw /* Notify specfs to invalidate the cached size */ 160e7cbe64fSgw spec_size_invalidate(dev, VBLK); 161e7cbe64fSgw spec_size_invalidate(dev, VCHR); 162fa9e4066Sahrens } 163fa9e4066Sahrens 164fa9e4066Sahrens int 165e9dbad6fSeschrock zvol_check_volsize(uint64_t volsize, uint64_t blocksize) 166fa9e4066Sahrens { 167e9dbad6fSeschrock if (volsize == 0) 168fa9e4066Sahrens return (EINVAL); 169fa9e4066Sahrens 170e9dbad6fSeschrock if (volsize % blocksize != 0) 1715c5460e9Seschrock return (EINVAL); 1725c5460e9Seschrock 173fa9e4066Sahrens #ifdef _ILP32 174e9dbad6fSeschrock if (volsize - 1 > SPEC_MAXOFFSET_T) 175fa9e4066Sahrens return (EOVERFLOW); 176fa9e4066Sahrens #endif 177fa9e4066Sahrens return (0); 178fa9e4066Sahrens } 179fa9e4066Sahrens 180fa9e4066Sahrens int 181e9dbad6fSeschrock zvol_check_volblocksize(uint64_t volblocksize) 182fa9e4066Sahrens { 183e9dbad6fSeschrock if (volblocksize < SPA_MINBLOCKSIZE || 184e9dbad6fSeschrock volblocksize > SPA_MAXBLOCKSIZE || 185e9dbad6fSeschrock !ISP2(volblocksize)) 186fa9e4066Sahrens return (EDOM); 187fa9e4066Sahrens 188fa9e4066Sahrens return (0); 189fa9e4066Sahrens } 190fa9e4066Sahrens 191fa9e4066Sahrens static void 192fa9e4066Sahrens zvol_readonly_changed_cb(void *arg, uint64_t newval) 193fa9e4066Sahrens { 194fa9e4066Sahrens zvol_state_t *zv = arg; 195fa9e4066Sahrens 196e7cbe64fSgw if (newval) 197e7cbe64fSgw zv->zv_flags |= ZVOL_RDONLY; 198e7cbe64fSgw else 199e7cbe64fSgw zv->zv_flags &= ~ZVOL_RDONLY; 200fa9e4066Sahrens } 201fa9e4066Sahrens 202fa9e4066Sahrens int 203a2eea2e1Sahrens zvol_get_stats(objset_t *os, nvlist_t *nv) 204fa9e4066Sahrens { 205fa9e4066Sahrens int error; 206fa9e4066Sahrens dmu_object_info_t doi; 207a2eea2e1Sahrens uint64_t val; 208fa9e4066Sahrens 209fa9e4066Sahrens 210a2eea2e1Sahrens error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); 211fa9e4066Sahrens if (error) 212fa9e4066Sahrens return (error); 213fa9e4066Sahrens 214a2eea2e1Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); 215a2eea2e1Sahrens 216fa9e4066Sahrens error = dmu_object_info(os, ZVOL_OBJ, &doi); 217fa9e4066Sahrens 218a2eea2e1Sahrens if (error == 0) { 219a2eea2e1Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, 220a2eea2e1Sahrens doi.doi_data_block_size); 221a2eea2e1Sahrens } 222fa9e4066Sahrens 223fa9e4066Sahrens return (error); 224fa9e4066Sahrens } 225fa9e4066Sahrens 226fa9e4066Sahrens /* 227fa9e4066Sahrens * Find a free minor number. 228fa9e4066Sahrens */ 229fa9e4066Sahrens static minor_t 230fa9e4066Sahrens zvol_minor_alloc(void) 231fa9e4066Sahrens { 232fa9e4066Sahrens minor_t minor; 233fa9e4066Sahrens 234fa9e4066Sahrens ASSERT(MUTEX_HELD(&zvol_state_lock)); 235fa9e4066Sahrens 236fa9e4066Sahrens for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) 237fa9e4066Sahrens if (ddi_get_soft_state(zvol_state, minor) == NULL) 238fa9e4066Sahrens return (minor); 239fa9e4066Sahrens 240fa9e4066Sahrens return (0); 241fa9e4066Sahrens } 242fa9e4066Sahrens 243fa9e4066Sahrens static zvol_state_t * 244e9dbad6fSeschrock zvol_minor_lookup(const char *name) 245fa9e4066Sahrens { 246fa9e4066Sahrens minor_t minor; 247fa9e4066Sahrens zvol_state_t *zv; 248fa9e4066Sahrens 249fa9e4066Sahrens ASSERT(MUTEX_HELD(&zvol_state_lock)); 250fa9e4066Sahrens 251fa9e4066Sahrens for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) { 252fa9e4066Sahrens zv = ddi_get_soft_state(zvol_state, minor); 253fa9e4066Sahrens if (zv == NULL) 254fa9e4066Sahrens continue; 255fa9e4066Sahrens if (strcmp(zv->zv_name, name) == 0) 256fa9e4066Sahrens break; 257fa9e4066Sahrens } 258fa9e4066Sahrens 259fa9e4066Sahrens return (zv); 260fa9e4066Sahrens } 261fa9e4066Sahrens 262e7cbe64fSgw void 263e7cbe64fSgw zvol_init_extent(zvol_extent_t *ze, blkptr_t *bp) 264e7cbe64fSgw { 265e7cbe64fSgw ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ 266e7cbe64fSgw ze->ze_stride = 0; 267e7cbe64fSgw ze->ze_size = 1; 268e7cbe64fSgw } 269e7cbe64fSgw 270e7cbe64fSgw /* extent mapping arg */ 271e7cbe64fSgw struct maparg { 272e7cbe64fSgw zvol_ext_list_t *ma_list; 273e7cbe64fSgw zvol_extent_t *ma_extent; 274e7cbe64fSgw int ma_gang; 275e7cbe64fSgw }; 276e7cbe64fSgw 277e7cbe64fSgw /*ARGSUSED*/ 278e7cbe64fSgw static int 279e7cbe64fSgw zvol_map_block(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 280e7cbe64fSgw { 281e7cbe64fSgw zbookmark_t *zb = &bc->bc_bookmark; 282e7cbe64fSgw blkptr_t *bp = &bc->bc_blkptr; 283e7cbe64fSgw void *data = bc->bc_data; 284e7cbe64fSgw dnode_phys_t *dnp = bc->bc_dnode; 285e7cbe64fSgw struct maparg *ma = (struct maparg *)arg; 286e7cbe64fSgw uint64_t stride; 287e7cbe64fSgw 288e7cbe64fSgw /* If there is an error, then keep trying to make progress */ 289e7cbe64fSgw if (bc->bc_errno) 290e7cbe64fSgw return (ERESTART); 291e7cbe64fSgw 292e7cbe64fSgw #ifdef ZFS_DEBUG 293e7cbe64fSgw if (zb->zb_level == -1) { 294e7cbe64fSgw ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); 295e7cbe64fSgw ASSERT3U(BP_GET_LEVEL(bp), ==, 0); 296e7cbe64fSgw } else { 297e7cbe64fSgw ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); 298e7cbe64fSgw ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); 299e7cbe64fSgw } 300e7cbe64fSgw 301e7cbe64fSgw if (zb->zb_level > 0) { 302e7cbe64fSgw uint64_t fill = 0; 303e7cbe64fSgw blkptr_t *bpx, *bpend; 304e7cbe64fSgw 305e7cbe64fSgw for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx); 306e7cbe64fSgw bpx < bpend; bpx++) { 307e7cbe64fSgw if (bpx->blk_birth != 0) { 308e7cbe64fSgw fill += bpx->blk_fill; 309e7cbe64fSgw } else { 310e7cbe64fSgw ASSERT(bpx->blk_fill == 0); 311e7cbe64fSgw } 312e7cbe64fSgw } 313e7cbe64fSgw ASSERT3U(fill, ==, bp->blk_fill); 314e7cbe64fSgw } 315e7cbe64fSgw 316e7cbe64fSgw if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) { 317e7cbe64fSgw uint64_t fill = 0; 318e7cbe64fSgw dnode_phys_t *dnx, *dnend; 319e7cbe64fSgw 320e7cbe64fSgw for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT); 321e7cbe64fSgw dnx < dnend; dnx++) { 322e7cbe64fSgw if (dnx->dn_type != DMU_OT_NONE) 323e7cbe64fSgw fill++; 324e7cbe64fSgw } 325e7cbe64fSgw ASSERT3U(fill, ==, bp->blk_fill); 326e7cbe64fSgw } 327e7cbe64fSgw #endif 328e7cbe64fSgw 329e7cbe64fSgw if (zb->zb_level || dnp->dn_type == DMU_OT_DNODE) 330e7cbe64fSgw return (0); 331e7cbe64fSgw 332e7cbe64fSgw /* Abort immediately if we have encountered gang blocks */ 333e7cbe64fSgw if (BP_IS_GANG(bp)) { 334e7cbe64fSgw ma->ma_gang++; 335e7cbe64fSgw return (EINTR); 336e7cbe64fSgw } 337e7cbe64fSgw 338e7cbe64fSgw /* first time? */ 339e7cbe64fSgw if (ma->ma_extent->ze_size == 0) { 340e7cbe64fSgw zvol_init_extent(ma->ma_extent, bp); 341e7cbe64fSgw return (0); 342e7cbe64fSgw } 343e7cbe64fSgw 344e7cbe64fSgw stride = (DVA_GET_OFFSET(&bp->blk_dva[0])) - 345e7cbe64fSgw ((DVA_GET_OFFSET(&ma->ma_extent->ze_dva)) + 346e7cbe64fSgw (ma->ma_extent->ze_size - 1) * (ma->ma_extent->ze_stride)); 347e7cbe64fSgw if (DVA_GET_VDEV(BP_IDENTITY(bp)) == 348e7cbe64fSgw DVA_GET_VDEV(&ma->ma_extent->ze_dva)) { 349e7cbe64fSgw if (ma->ma_extent->ze_stride == 0) { 350e7cbe64fSgw /* second block in this extent */ 351e7cbe64fSgw ma->ma_extent->ze_stride = stride; 352e7cbe64fSgw ma->ma_extent->ze_size++; 353e7cbe64fSgw return (0); 354e7cbe64fSgw } else if (ma->ma_extent->ze_stride == stride) { 355e7cbe64fSgw /* 356e7cbe64fSgw * the block we allocated has the same 357e7cbe64fSgw * stride 358e7cbe64fSgw */ 359e7cbe64fSgw ma->ma_extent->ze_size++; 360e7cbe64fSgw return (0); 361e7cbe64fSgw } 362e7cbe64fSgw } 363e7cbe64fSgw 364e7cbe64fSgw /* 365e7cbe64fSgw * dtrace -n 'zfs-dprintf 366e7cbe64fSgw * /stringof(arg0) == "zvol.c"/ 367e7cbe64fSgw * { 368e7cbe64fSgw * printf("%s: %s", stringof(arg1), stringof(arg3)) 369e7cbe64fSgw * } ' 370e7cbe64fSgw */ 371e7cbe64fSgw dprintf("ma_extent 0x%lx mrstride 0x%lx stride %lx\n", 372e7cbe64fSgw ma->ma_extent->ze_size, ma->ma_extent->ze_stride, stride); 373e7cbe64fSgw dprintf_bp(bp, "%s", "next blkptr:"); 374e7cbe64fSgw /* start a new extent */ 375e7cbe64fSgw if (ma->ma_extent == &ma->ma_list->zl_extents[NUM_EXTENTS - 1]) { 376e7cbe64fSgw ma->ma_list->zl_next = kmem_zalloc(sizeof (zvol_ext_list_t), 377e7cbe64fSgw KM_SLEEP); 378e7cbe64fSgw ma->ma_list = ma->ma_list->zl_next; 379e7cbe64fSgw ma->ma_extent = &ma->ma_list->zl_extents[0]; 380e7cbe64fSgw } else { 381e7cbe64fSgw ma->ma_extent++; 382e7cbe64fSgw } 383e7cbe64fSgw zvol_init_extent(ma->ma_extent, bp); 384e7cbe64fSgw return (0); 385e7cbe64fSgw } 386e7cbe64fSgw 387ecd6cf80Smarks /* ARGSUSED */ 388fa9e4066Sahrens void 389ecd6cf80Smarks zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 390fa9e4066Sahrens { 391da6c28aaSamw zfs_creat_t *zct = arg; 392da6c28aaSamw nvlist_t *nvprops = zct->zct_props; 393fa9e4066Sahrens int error; 394e9dbad6fSeschrock uint64_t volblocksize, volsize; 395fa9e4066Sahrens 396ecd6cf80Smarks VERIFY(nvlist_lookup_uint64(nvprops, 397e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); 398ecd6cf80Smarks if (nvlist_lookup_uint64(nvprops, 399e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) 400e9dbad6fSeschrock volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); 401e9dbad6fSeschrock 402e9dbad6fSeschrock /* 403e7cbe64fSgw * These properties must be removed from the list so the generic 404e9dbad6fSeschrock * property setting step won't apply to them. 405e9dbad6fSeschrock */ 406ecd6cf80Smarks VERIFY(nvlist_remove_all(nvprops, 407e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); 408ecd6cf80Smarks (void) nvlist_remove_all(nvprops, 409e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); 410e9dbad6fSeschrock 411e9dbad6fSeschrock error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, 412fa9e4066Sahrens DMU_OT_NONE, 0, tx); 413fa9e4066Sahrens ASSERT(error == 0); 414fa9e4066Sahrens 415fa9e4066Sahrens error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, 416fa9e4066Sahrens DMU_OT_NONE, 0, tx); 417fa9e4066Sahrens ASSERT(error == 0); 418fa9e4066Sahrens 419e9dbad6fSeschrock error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); 420fa9e4066Sahrens ASSERT(error == 0); 421fa9e4066Sahrens } 422fa9e4066Sahrens 42322ac5be4Sperrin /* 42422ac5be4Sperrin * Replay a TX_WRITE ZIL transaction that didn't get committed 42522ac5be4Sperrin * after a system failure 42622ac5be4Sperrin */ 42722ac5be4Sperrin static int 42822ac5be4Sperrin zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) 42922ac5be4Sperrin { 43022ac5be4Sperrin objset_t *os = zv->zv_objset; 43122ac5be4Sperrin char *data = (char *)(lr + 1); /* data follows lr_write_t */ 43222ac5be4Sperrin uint64_t off = lr->lr_offset; 43322ac5be4Sperrin uint64_t len = lr->lr_length; 43422ac5be4Sperrin dmu_tx_t *tx; 43522ac5be4Sperrin int error; 43622ac5be4Sperrin 43722ac5be4Sperrin if (byteswap) 43822ac5be4Sperrin byteswap_uint64_array(lr, sizeof (*lr)); 43922ac5be4Sperrin 44022ac5be4Sperrin tx = dmu_tx_create(os); 44122ac5be4Sperrin dmu_tx_hold_write(tx, ZVOL_OBJ, off, len); 44222ac5be4Sperrin error = dmu_tx_assign(tx, zv->zv_txg_assign); 44322ac5be4Sperrin if (error) { 44422ac5be4Sperrin dmu_tx_abort(tx); 44522ac5be4Sperrin } else { 44622ac5be4Sperrin dmu_write(os, ZVOL_OBJ, off, len, data, tx); 44722ac5be4Sperrin dmu_tx_commit(tx); 44822ac5be4Sperrin } 44922ac5be4Sperrin 45022ac5be4Sperrin return (error); 45122ac5be4Sperrin } 45222ac5be4Sperrin 45322ac5be4Sperrin /* ARGSUSED */ 45422ac5be4Sperrin static int 45522ac5be4Sperrin zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) 45622ac5be4Sperrin { 45722ac5be4Sperrin return (ENOTSUP); 45822ac5be4Sperrin } 45922ac5be4Sperrin 46022ac5be4Sperrin /* 46122ac5be4Sperrin * Callback vectors for replaying records. 46222ac5be4Sperrin * Only TX_WRITE is needed for zvol. 46322ac5be4Sperrin */ 46422ac5be4Sperrin zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { 46522ac5be4Sperrin zvol_replay_err, /* 0 no such transaction type */ 46622ac5be4Sperrin zvol_replay_err, /* TX_CREATE */ 46722ac5be4Sperrin zvol_replay_err, /* TX_MKDIR */ 46822ac5be4Sperrin zvol_replay_err, /* TX_MKXATTR */ 46922ac5be4Sperrin zvol_replay_err, /* TX_SYMLINK */ 47022ac5be4Sperrin zvol_replay_err, /* TX_REMOVE */ 47122ac5be4Sperrin zvol_replay_err, /* TX_RMDIR */ 47222ac5be4Sperrin zvol_replay_err, /* TX_LINK */ 47322ac5be4Sperrin zvol_replay_err, /* TX_RENAME */ 47422ac5be4Sperrin zvol_replay_write, /* TX_WRITE */ 47522ac5be4Sperrin zvol_replay_err, /* TX_TRUNCATE */ 47622ac5be4Sperrin zvol_replay_err, /* TX_SETATTR */ 47722ac5be4Sperrin zvol_replay_err, /* TX_ACL */ 47822ac5be4Sperrin }; 47922ac5be4Sperrin 480fa9e4066Sahrens /* 481e7cbe64fSgw * reconstruct dva that gets us to the desired offset (offset 482e7cbe64fSgw * is in bytes) 483e7cbe64fSgw */ 484e7cbe64fSgw int 485e7cbe64fSgw zvol_get_dva(zvol_state_t *zv, uint64_t offset, dva_t *dva) 486e7cbe64fSgw { 487e7cbe64fSgw zvol_ext_list_t *zl; 488e7cbe64fSgw zvol_extent_t *ze; 489e7cbe64fSgw int idx; 490e7cbe64fSgw uint64_t tmp; 491e7cbe64fSgw 492e7cbe64fSgw if ((zl = zv->zv_list) == NULL) 493e7cbe64fSgw return (EIO); 494e7cbe64fSgw idx = 0; 495e7cbe64fSgw ze = &zl->zl_extents[0]; 496e7cbe64fSgw while (offset >= ze->ze_size * zv->zv_volblocksize) { 497e7cbe64fSgw offset -= ze->ze_size * zv->zv_volblocksize; 498e7cbe64fSgw 499e7cbe64fSgw if (idx == NUM_EXTENTS - 1) { 500e7cbe64fSgw /* we've reached the end of this array */ 501e7cbe64fSgw ASSERT(zl->zl_next != NULL); 502e7cbe64fSgw if (zl->zl_next == NULL) 503e7cbe64fSgw return (-1); 504e7cbe64fSgw zl = zl->zl_next; 505e7cbe64fSgw ze = &zl->zl_extents[0]; 506e7cbe64fSgw idx = 0; 507e7cbe64fSgw } else { 508e7cbe64fSgw ze++; 509e7cbe64fSgw idx++; 510e7cbe64fSgw } 511e7cbe64fSgw } 512e7cbe64fSgw DVA_SET_VDEV(dva, DVA_GET_VDEV(&ze->ze_dva)); 513e7cbe64fSgw tmp = DVA_GET_OFFSET((&ze->ze_dva)); 514e7cbe64fSgw tmp += (ze->ze_stride * (offset / zv->zv_volblocksize)); 515e7cbe64fSgw DVA_SET_OFFSET(dva, tmp); 516e7cbe64fSgw return (0); 517e7cbe64fSgw } 518e7cbe64fSgw 519e7cbe64fSgw static void 520e7cbe64fSgw zvol_free_extents(zvol_state_t *zv) 521e7cbe64fSgw { 522e7cbe64fSgw zvol_ext_list_t *zl; 523e7cbe64fSgw zvol_ext_list_t *tmp; 524e7cbe64fSgw 525e7cbe64fSgw if (zv->zv_list != NULL) { 526e7cbe64fSgw zl = zv->zv_list; 527e7cbe64fSgw while (zl != NULL) { 528e7cbe64fSgw tmp = zl->zl_next; 529e7cbe64fSgw kmem_free(zl, sizeof (zvol_ext_list_t)); 530e7cbe64fSgw zl = tmp; 531e7cbe64fSgw } 532e7cbe64fSgw zv->zv_list = NULL; 533e7cbe64fSgw } 534e7cbe64fSgw } 535e7cbe64fSgw 536e7cbe64fSgw int 537e7cbe64fSgw zvol_get_lbas(zvol_state_t *zv) 538e7cbe64fSgw { 539e7cbe64fSgw struct maparg ma; 540e7cbe64fSgw zvol_ext_list_t *zl; 541e7cbe64fSgw zvol_extent_t *ze; 542e7cbe64fSgw uint64_t blocks = 0; 543e7cbe64fSgw int err; 544e7cbe64fSgw 545e7cbe64fSgw ma.ma_list = zl = kmem_zalloc(sizeof (zvol_ext_list_t), KM_SLEEP); 546e7cbe64fSgw ma.ma_extent = &ma.ma_list->zl_extents[0]; 547e7cbe64fSgw ma.ma_gang = 0; 548e7cbe64fSgw zv->zv_list = ma.ma_list; 549e7cbe64fSgw 550e7cbe64fSgw err = traverse_zvol(zv->zv_objset, ADVANCE_PRE, zvol_map_block, &ma); 551e7cbe64fSgw if (err == EINTR && ma.ma_gang) { 552e7cbe64fSgw /* 553e7cbe64fSgw * We currently don't support dump devices when the pool 554e7cbe64fSgw * is so fragmented that our allocation has resulted in 555e7cbe64fSgw * gang blocks. 556e7cbe64fSgw */ 557e7cbe64fSgw zvol_free_extents(zv); 558e7cbe64fSgw return (EFRAGS); 559e7cbe64fSgw } 560e7cbe64fSgw ASSERT3U(err, ==, 0); 561e7cbe64fSgw 562e7cbe64fSgw ze = &zl->zl_extents[0]; 563e7cbe64fSgw while (ze) { 564e7cbe64fSgw blocks += ze->ze_size; 565e7cbe64fSgw if (ze == &zl->zl_extents[NUM_EXTENTS - 1]) { 566e7cbe64fSgw zl = zl->zl_next; 567e7cbe64fSgw ze = &zl->zl_extents[0]; 568e7cbe64fSgw } else { 569e7cbe64fSgw ze++; 570e7cbe64fSgw } 571e7cbe64fSgw } 572e7cbe64fSgw if (blocks != (zv->zv_volsize / zv->zv_volblocksize)) { 573e7cbe64fSgw zvol_free_extents(zv); 574e7cbe64fSgw return (EIO); 575e7cbe64fSgw } 576e7cbe64fSgw 577e7cbe64fSgw return (0); 578e7cbe64fSgw } 579e7cbe64fSgw 580e7cbe64fSgw /* 581e7cbe64fSgw * Create a minor node (plus a whole lot more) for the specified volume. 582fa9e4066Sahrens */ 583fa9e4066Sahrens int 58491ebeef5Sahrens zvol_create_minor(const char *name, major_t maj) 585fa9e4066Sahrens { 586fa9e4066Sahrens zvol_state_t *zv; 587fa9e4066Sahrens objset_t *os; 58867bd71c6Sperrin dmu_object_info_t doi; 589fa9e4066Sahrens uint64_t volsize; 590fa9e4066Sahrens minor_t minor = 0; 591fa9e4066Sahrens struct pathname linkpath; 592745cd3c5Smaybee int ds_mode = DS_MODE_OWNER; 593fa9e4066Sahrens vnode_t *vp = NULL; 594fa9e4066Sahrens char *devpath; 595e7cbe64fSgw size_t devpathlen = strlen(ZVOL_FULL_DEV_DIR) + strlen(name) + 1; 596fa9e4066Sahrens char chrbuf[30], blkbuf[30]; 597fa9e4066Sahrens int error; 598fa9e4066Sahrens 599fa9e4066Sahrens mutex_enter(&zvol_state_lock); 600fa9e4066Sahrens 601fa9e4066Sahrens if ((zv = zvol_minor_lookup(name)) != NULL) { 602fa9e4066Sahrens mutex_exit(&zvol_state_lock); 603fa9e4066Sahrens return (EEXIST); 604fa9e4066Sahrens } 605fa9e4066Sahrens 606fa9e4066Sahrens if (strchr(name, '@') != 0) 607fa9e4066Sahrens ds_mode |= DS_MODE_READONLY; 608fa9e4066Sahrens 609fa9e4066Sahrens error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os); 610fa9e4066Sahrens 611fa9e4066Sahrens if (error) { 612fa9e4066Sahrens mutex_exit(&zvol_state_lock); 613fa9e4066Sahrens return (error); 614fa9e4066Sahrens } 615fa9e4066Sahrens 616fa9e4066Sahrens error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 617fa9e4066Sahrens 618fa9e4066Sahrens if (error) { 619fa9e4066Sahrens dmu_objset_close(os); 620fa9e4066Sahrens mutex_exit(&zvol_state_lock); 621fa9e4066Sahrens return (error); 622fa9e4066Sahrens } 623fa9e4066Sahrens 624fa9e4066Sahrens /* 625fa9e4066Sahrens * If there's an existing /dev/zvol symlink, try to use the 626fa9e4066Sahrens * same minor number we used last time. 627fa9e4066Sahrens */ 628fa9e4066Sahrens devpath = kmem_alloc(devpathlen, KM_SLEEP); 629fa9e4066Sahrens 630e7cbe64fSgw (void) sprintf(devpath, "%s%s", ZVOL_FULL_DEV_DIR, name); 631fa9e4066Sahrens 632fa9e4066Sahrens error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp); 633fa9e4066Sahrens 634fa9e4066Sahrens kmem_free(devpath, devpathlen); 635fa9e4066Sahrens 636fa9e4066Sahrens if (error == 0 && vp->v_type != VLNK) 637fa9e4066Sahrens error = EINVAL; 638fa9e4066Sahrens 639fa9e4066Sahrens if (error == 0) { 640fa9e4066Sahrens pn_alloc(&linkpath); 641fa9e4066Sahrens error = pn_getsymlink(vp, &linkpath, kcred); 642fa9e4066Sahrens if (error == 0) { 643fa9e4066Sahrens char *ms = strstr(linkpath.pn_path, ZVOL_PSEUDO_DEV); 644fa9e4066Sahrens if (ms != NULL) { 645fa9e4066Sahrens ms += strlen(ZVOL_PSEUDO_DEV); 646fa9e4066Sahrens minor = stoi(&ms); 647fa9e4066Sahrens } 648fa9e4066Sahrens } 649fa9e4066Sahrens pn_free(&linkpath); 650fa9e4066Sahrens } 651fa9e4066Sahrens 652fa9e4066Sahrens if (vp != NULL) 653fa9e4066Sahrens VN_RELE(vp); 654fa9e4066Sahrens 655fa9e4066Sahrens /* 656fa9e4066Sahrens * If we found a minor but it's already in use, we must pick a new one. 657fa9e4066Sahrens */ 658fa9e4066Sahrens if (minor != 0 && ddi_get_soft_state(zvol_state, minor) != NULL) 659fa9e4066Sahrens minor = 0; 660fa9e4066Sahrens 661fa9e4066Sahrens if (minor == 0) 662fa9e4066Sahrens minor = zvol_minor_alloc(); 663fa9e4066Sahrens 664fa9e4066Sahrens if (minor == 0) { 665fa9e4066Sahrens dmu_objset_close(os); 666fa9e4066Sahrens mutex_exit(&zvol_state_lock); 667fa9e4066Sahrens return (ENXIO); 668fa9e4066Sahrens } 669fa9e4066Sahrens 670fa9e4066Sahrens if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) { 671fa9e4066Sahrens dmu_objset_close(os); 672fa9e4066Sahrens mutex_exit(&zvol_state_lock); 673fa9e4066Sahrens return (EAGAIN); 674fa9e4066Sahrens } 675fa9e4066Sahrens 676e9dbad6fSeschrock (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, 677e9dbad6fSeschrock (char *)name); 678fa9e4066Sahrens 679fa9e4066Sahrens (void) sprintf(chrbuf, "%uc,raw", minor); 680fa9e4066Sahrens 681fa9e4066Sahrens if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, 682fa9e4066Sahrens minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 683fa9e4066Sahrens ddi_soft_state_free(zvol_state, minor); 684fa9e4066Sahrens dmu_objset_close(os); 685fa9e4066Sahrens mutex_exit(&zvol_state_lock); 686fa9e4066Sahrens return (EAGAIN); 687fa9e4066Sahrens } 688fa9e4066Sahrens 689fa9e4066Sahrens (void) sprintf(blkbuf, "%uc", minor); 690fa9e4066Sahrens 691fa9e4066Sahrens if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, 692fa9e4066Sahrens minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 693fa9e4066Sahrens ddi_remove_minor_node(zfs_dip, chrbuf); 694fa9e4066Sahrens ddi_soft_state_free(zvol_state, minor); 695fa9e4066Sahrens dmu_objset_close(os); 696fa9e4066Sahrens mutex_exit(&zvol_state_lock); 697fa9e4066Sahrens return (EAGAIN); 698fa9e4066Sahrens } 699fa9e4066Sahrens 700fa9e4066Sahrens zv = ddi_get_soft_state(zvol_state, minor); 701fa9e4066Sahrens 702fa9e4066Sahrens (void) strcpy(zv->zv_name, name); 703fa9e4066Sahrens zv->zv_min_bs = DEV_BSHIFT; 704fa9e4066Sahrens zv->zv_minor = minor; 705fa9e4066Sahrens zv->zv_volsize = volsize; 706fa9e4066Sahrens zv->zv_objset = os; 707fa9e4066Sahrens zv->zv_mode = ds_mode; 70867bd71c6Sperrin zv->zv_zilog = zil_open(os, zvol_get_data); 709c2e6a7d6Sperrin mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); 710c2e6a7d6Sperrin avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, 711c2e6a7d6Sperrin sizeof (rl_t), offsetof(rl_t, r_node)); 71267bd71c6Sperrin /* get and cache the blocksize */ 71367bd71c6Sperrin error = dmu_object_info(os, ZVOL_OBJ, &doi); 71467bd71c6Sperrin ASSERT(error == 0); 71567bd71c6Sperrin zv->zv_volblocksize = doi.doi_data_block_size; 71622ac5be4Sperrin 717893a6d32Sahrens zil_replay(os, zv, &zv->zv_txg_assign, zvol_replay_vector); 71891ebeef5Sahrens zvol_size_changed(zv, maj); 719fa9e4066Sahrens 720ea8dc4b6Seschrock /* XXX this should handle the possible i/o error */ 721fa9e4066Sahrens VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset), 722fa9e4066Sahrens "readonly", zvol_readonly_changed_cb, zv) == 0); 723fa9e4066Sahrens 724fa9e4066Sahrens zvol_minors++; 725fa9e4066Sahrens 726fa9e4066Sahrens mutex_exit(&zvol_state_lock); 727fa9e4066Sahrens 728fa9e4066Sahrens return (0); 729fa9e4066Sahrens } 730fa9e4066Sahrens 731fa9e4066Sahrens /* 732fa9e4066Sahrens * Remove minor node for the specified volume. 733fa9e4066Sahrens */ 734fa9e4066Sahrens int 735e9dbad6fSeschrock zvol_remove_minor(const char *name) 736fa9e4066Sahrens { 737fa9e4066Sahrens zvol_state_t *zv; 738fa9e4066Sahrens char namebuf[30]; 739fa9e4066Sahrens 740fa9e4066Sahrens mutex_enter(&zvol_state_lock); 741fa9e4066Sahrens 742e9dbad6fSeschrock if ((zv = zvol_minor_lookup(name)) == NULL) { 743fa9e4066Sahrens mutex_exit(&zvol_state_lock); 744fa9e4066Sahrens return (ENXIO); 745fa9e4066Sahrens } 746fa9e4066Sahrens 747fa9e4066Sahrens if (zv->zv_total_opens != 0) { 748fa9e4066Sahrens mutex_exit(&zvol_state_lock); 749fa9e4066Sahrens return (EBUSY); 750fa9e4066Sahrens } 751fa9e4066Sahrens 752fa9e4066Sahrens (void) sprintf(namebuf, "%uc,raw", zv->zv_minor); 753fa9e4066Sahrens ddi_remove_minor_node(zfs_dip, namebuf); 754fa9e4066Sahrens 755fa9e4066Sahrens (void) sprintf(namebuf, "%uc", zv->zv_minor); 756fa9e4066Sahrens ddi_remove_minor_node(zfs_dip, namebuf); 757fa9e4066Sahrens 758fa9e4066Sahrens VERIFY(dsl_prop_unregister(dmu_objset_ds(zv->zv_objset), 759fa9e4066Sahrens "readonly", zvol_readonly_changed_cb, zv) == 0); 760fa9e4066Sahrens 76122ac5be4Sperrin zil_close(zv->zv_zilog); 76222ac5be4Sperrin zv->zv_zilog = NULL; 763fa9e4066Sahrens dmu_objset_close(zv->zv_objset); 764fa9e4066Sahrens zv->zv_objset = NULL; 765c2e6a7d6Sperrin avl_destroy(&zv->zv_znode.z_range_avl); 766c2e6a7d6Sperrin mutex_destroy(&zv->zv_znode.z_range_lock); 767fa9e4066Sahrens 768fa9e4066Sahrens ddi_soft_state_free(zvol_state, zv->zv_minor); 769fa9e4066Sahrens 770fa9e4066Sahrens zvol_minors--; 771fa9e4066Sahrens 772fa9e4066Sahrens mutex_exit(&zvol_state_lock); 773fa9e4066Sahrens 774fa9e4066Sahrens return (0); 775fa9e4066Sahrens } 776fa9e4066Sahrens 777e7cbe64fSgw int 778e7cbe64fSgw zvol_prealloc(zvol_state_t *zv) 779e7cbe64fSgw { 780e7cbe64fSgw objset_t *os = zv->zv_objset; 781e7cbe64fSgw dmu_tx_t *tx; 782e7cbe64fSgw void *data; 783e7cbe64fSgw uint64_t refd, avail, usedobjs, availobjs; 784e7cbe64fSgw uint64_t resid = zv->zv_volsize; 785e7cbe64fSgw uint64_t off = 0; 786e7cbe64fSgw 787e7cbe64fSgw /* Check the space usage before attempting to allocate the space */ 788e7cbe64fSgw dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); 789e7cbe64fSgw if (avail < zv->zv_volsize) 790e7cbe64fSgw return (ENOSPC); 791e7cbe64fSgw 792e7cbe64fSgw /* Free old extents if they exist */ 793e7cbe64fSgw zvol_free_extents(zv); 794e7cbe64fSgw 795e7cbe64fSgw /* allocate the blocks by writing each one */ 796e7cbe64fSgw data = kmem_zalloc(SPA_MAXBLOCKSIZE, KM_SLEEP); 797e7cbe64fSgw 798e7cbe64fSgw while (resid != 0) { 799e7cbe64fSgw int error; 800e7cbe64fSgw uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE); 801e7cbe64fSgw 802e7cbe64fSgw tx = dmu_tx_create(os); 803e7cbe64fSgw dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 804e7cbe64fSgw error = dmu_tx_assign(tx, TXG_WAIT); 805e7cbe64fSgw if (error) { 806e7cbe64fSgw dmu_tx_abort(tx); 807e7cbe64fSgw kmem_free(data, SPA_MAXBLOCKSIZE); 808cdb0ab79Smaybee (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off); 809e7cbe64fSgw return (error); 810e7cbe64fSgw } 811e7cbe64fSgw dmu_write(os, ZVOL_OBJ, off, bytes, data, tx); 812e7cbe64fSgw dmu_tx_commit(tx); 813e7cbe64fSgw off += bytes; 814e7cbe64fSgw resid -= bytes; 815e7cbe64fSgw } 816e7cbe64fSgw kmem_free(data, SPA_MAXBLOCKSIZE); 817e7cbe64fSgw txg_wait_synced(dmu_objset_pool(os), 0); 818e7cbe64fSgw 819e7cbe64fSgw return (0); 820e7cbe64fSgw } 821e7cbe64fSgw 822e7cbe64fSgw int 823e7cbe64fSgw zvol_update_volsize(zvol_state_t *zv, major_t maj, uint64_t volsize) 824e7cbe64fSgw { 825e7cbe64fSgw dmu_tx_t *tx; 826e7cbe64fSgw int error; 827e7cbe64fSgw 828e7cbe64fSgw ASSERT(MUTEX_HELD(&zvol_state_lock)); 829e7cbe64fSgw 830e7cbe64fSgw tx = dmu_tx_create(zv->zv_objset); 831e7cbe64fSgw dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 832e7cbe64fSgw error = dmu_tx_assign(tx, TXG_WAIT); 833e7cbe64fSgw if (error) { 834e7cbe64fSgw dmu_tx_abort(tx); 835e7cbe64fSgw return (error); 836e7cbe64fSgw } 837e7cbe64fSgw 838e7cbe64fSgw error = zap_update(zv->zv_objset, ZVOL_ZAP_OBJ, "size", 8, 1, 839e7cbe64fSgw &volsize, tx); 840e7cbe64fSgw dmu_tx_commit(tx); 841e7cbe64fSgw 842e7cbe64fSgw if (error == 0) 843cdb0ab79Smaybee error = dmu_free_long_range(zv->zv_objset, 844cdb0ab79Smaybee ZVOL_OBJ, volsize, DMU_OBJECT_END); 845e7cbe64fSgw 846*bb0ade09Sahrens /* 847*bb0ade09Sahrens * If we are using a faked-up state (zv_minor == 0) then don't 848*bb0ade09Sahrens * try to update the in-core zvol state. 849*bb0ade09Sahrens */ 850*bb0ade09Sahrens if (error == 0 && zv->zv_minor) { 851e7cbe64fSgw zv->zv_volsize = volsize; 852e7cbe64fSgw zvol_size_changed(zv, maj); 853e7cbe64fSgw } 854e7cbe64fSgw return (error); 855e7cbe64fSgw } 856e7cbe64fSgw 857fa9e4066Sahrens int 85891ebeef5Sahrens zvol_set_volsize(const char *name, major_t maj, uint64_t volsize) 859fa9e4066Sahrens { 860fa9e4066Sahrens zvol_state_t *zv; 861fa9e4066Sahrens int error; 8625c5460e9Seschrock dmu_object_info_t doi; 863e7cbe64fSgw uint64_t old_volsize = 0ULL; 864*bb0ade09Sahrens zvol_state_t state = { 0 }; 865fa9e4066Sahrens 866fa9e4066Sahrens mutex_enter(&zvol_state_lock); 867fa9e4066Sahrens 868e9dbad6fSeschrock if ((zv = zvol_minor_lookup(name)) == NULL) { 869*bb0ade09Sahrens /* 870*bb0ade09Sahrens * If we are doing a "zfs clone -o volsize=", then the 871*bb0ade09Sahrens * minor node won't exist yet. 872*bb0ade09Sahrens */ 873*bb0ade09Sahrens error = dmu_objset_open(name, DMU_OST_ZVOL, DS_MODE_OWNER, 874*bb0ade09Sahrens &state.zv_objset); 875*bb0ade09Sahrens if (error != 0) 876*bb0ade09Sahrens goto out; 877*bb0ade09Sahrens zv = &state; 878fa9e4066Sahrens } 879e7cbe64fSgw old_volsize = zv->zv_volsize; 880fa9e4066Sahrens 8815c5460e9Seschrock if ((error = dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi)) != 0 || 882e9dbad6fSeschrock (error = zvol_check_volsize(volsize, 883*bb0ade09Sahrens doi.doi_data_block_size)) != 0) 884*bb0ade09Sahrens goto out; 8855c5460e9Seschrock 886e7cbe64fSgw if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) { 887*bb0ade09Sahrens error = EROFS; 888*bb0ade09Sahrens goto out; 889fa9e4066Sahrens } 890fa9e4066Sahrens 891e7cbe64fSgw error = zvol_update_volsize(zv, maj, volsize); 892fa9e4066Sahrens 893e7cbe64fSgw /* 894e7cbe64fSgw * Reinitialize the dump area to the new size. If we 895e7cbe64fSgw * failed to resize the dump area then restore the it back to 896e7cbe64fSgw * it's original size. 897e7cbe64fSgw */ 898e7cbe64fSgw if (error == 0 && zv->zv_flags & ZVOL_DUMPIFIED) { 899e7cbe64fSgw if ((error = zvol_dumpify(zv)) != 0 || 900e7cbe64fSgw (error = dumpvp_resize()) != 0) { 901e7cbe64fSgw (void) zvol_update_volsize(zv, maj, old_volsize); 902e7cbe64fSgw error = zvol_dumpify(zv); 903e7cbe64fSgw } 904fa9e4066Sahrens } 905fa9e4066Sahrens 906*bb0ade09Sahrens out: 907*bb0ade09Sahrens if (state.zv_objset) 908*bb0ade09Sahrens dmu_objset_close(state.zv_objset); 909*bb0ade09Sahrens 910fa9e4066Sahrens mutex_exit(&zvol_state_lock); 911fa9e4066Sahrens 912fa9e4066Sahrens return (error); 913fa9e4066Sahrens } 914fa9e4066Sahrens 915fa9e4066Sahrens int 916e9dbad6fSeschrock zvol_set_volblocksize(const char *name, uint64_t volblocksize) 917fa9e4066Sahrens { 918fa9e4066Sahrens zvol_state_t *zv; 919fa9e4066Sahrens dmu_tx_t *tx; 920fa9e4066Sahrens int error; 921fa9e4066Sahrens 922fa9e4066Sahrens mutex_enter(&zvol_state_lock); 923fa9e4066Sahrens 924e9dbad6fSeschrock if ((zv = zvol_minor_lookup(name)) == NULL) { 925fa9e4066Sahrens mutex_exit(&zvol_state_lock); 926fa9e4066Sahrens return (ENXIO); 927fa9e4066Sahrens } 928e7cbe64fSgw if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) { 929fa9e4066Sahrens mutex_exit(&zvol_state_lock); 930fa9e4066Sahrens return (EROFS); 931fa9e4066Sahrens } 932fa9e4066Sahrens 933fa9e4066Sahrens tx = dmu_tx_create(zv->zv_objset); 934fa9e4066Sahrens dmu_tx_hold_bonus(tx, ZVOL_OBJ); 935fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 936fa9e4066Sahrens if (error) { 937fa9e4066Sahrens dmu_tx_abort(tx); 938fa9e4066Sahrens } else { 939fa9e4066Sahrens error = dmu_object_set_blocksize(zv->zv_objset, ZVOL_OBJ, 940e9dbad6fSeschrock volblocksize, 0, tx); 941fa9e4066Sahrens if (error == ENOTSUP) 942fa9e4066Sahrens error = EBUSY; 943fa9e4066Sahrens dmu_tx_commit(tx); 944fa9e4066Sahrens } 945fa9e4066Sahrens 946fa9e4066Sahrens mutex_exit(&zvol_state_lock); 947fa9e4066Sahrens 948fa9e4066Sahrens return (error); 949fa9e4066Sahrens } 950fa9e4066Sahrens 951fa9e4066Sahrens /*ARGSUSED*/ 952fa9e4066Sahrens int 953fa9e4066Sahrens zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) 954fa9e4066Sahrens { 955fa9e4066Sahrens minor_t minor = getminor(*devp); 956fa9e4066Sahrens zvol_state_t *zv; 957fa9e4066Sahrens 958fa9e4066Sahrens if (minor == 0) /* This is the control device */ 959fa9e4066Sahrens return (0); 960fa9e4066Sahrens 961fa9e4066Sahrens mutex_enter(&zvol_state_lock); 962fa9e4066Sahrens 963fa9e4066Sahrens zv = ddi_get_soft_state(zvol_state, minor); 964fa9e4066Sahrens if (zv == NULL) { 965fa9e4066Sahrens mutex_exit(&zvol_state_lock); 966fa9e4066Sahrens return (ENXIO); 967fa9e4066Sahrens } 968fa9e4066Sahrens 969fa9e4066Sahrens ASSERT(zv->zv_objset != NULL); 970fa9e4066Sahrens 971fa9e4066Sahrens if ((flag & FWRITE) && 972e7cbe64fSgw (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY))) { 973fa9e4066Sahrens mutex_exit(&zvol_state_lock); 974fa9e4066Sahrens return (EROFS); 975fa9e4066Sahrens } 976fa9e4066Sahrens 977fa9e4066Sahrens if (zv->zv_open_count[otyp] == 0 || otyp == OTYP_LYR) { 978fa9e4066Sahrens zv->zv_open_count[otyp]++; 979fa9e4066Sahrens zv->zv_total_opens++; 980fa9e4066Sahrens } 981fa9e4066Sahrens 982fa9e4066Sahrens mutex_exit(&zvol_state_lock); 983fa9e4066Sahrens 984fa9e4066Sahrens return (0); 985fa9e4066Sahrens } 986fa9e4066Sahrens 987fa9e4066Sahrens /*ARGSUSED*/ 988fa9e4066Sahrens int 989fa9e4066Sahrens zvol_close(dev_t dev, int flag, int otyp, cred_t *cr) 990fa9e4066Sahrens { 991fa9e4066Sahrens minor_t minor = getminor(dev); 992fa9e4066Sahrens zvol_state_t *zv; 993fa9e4066Sahrens 994fa9e4066Sahrens if (minor == 0) /* This is the control device */ 995fa9e4066Sahrens return (0); 996fa9e4066Sahrens 997fa9e4066Sahrens mutex_enter(&zvol_state_lock); 998fa9e4066Sahrens 999fa9e4066Sahrens zv = ddi_get_soft_state(zvol_state, minor); 1000fa9e4066Sahrens if (zv == NULL) { 1001fa9e4066Sahrens mutex_exit(&zvol_state_lock); 1002fa9e4066Sahrens return (ENXIO); 1003fa9e4066Sahrens } 1004fa9e4066Sahrens 1005fa9e4066Sahrens /* 1006fa9e4066Sahrens * The next statement is a workaround for the following DDI bug: 1007fa9e4066Sahrens * 6343604 specfs race: multiple "last-close" of the same device 1008fa9e4066Sahrens */ 1009fa9e4066Sahrens if (zv->zv_total_opens == 0) { 1010fa9e4066Sahrens mutex_exit(&zvol_state_lock); 1011fa9e4066Sahrens return (0); 1012fa9e4066Sahrens } 1013fa9e4066Sahrens 1014fa9e4066Sahrens /* 1015fa9e4066Sahrens * If the open count is zero, this is a spurious close. 1016fa9e4066Sahrens * That indicates a bug in the kernel / DDI framework. 1017fa9e4066Sahrens */ 1018fa9e4066Sahrens ASSERT(zv->zv_open_count[otyp] != 0); 1019fa9e4066Sahrens ASSERT(zv->zv_total_opens != 0); 1020fa9e4066Sahrens 1021fa9e4066Sahrens /* 1022fa9e4066Sahrens * You may get multiple opens, but only one close. 1023fa9e4066Sahrens */ 1024fa9e4066Sahrens zv->zv_open_count[otyp]--; 1025fa9e4066Sahrens zv->zv_total_opens--; 1026fa9e4066Sahrens 1027fa9e4066Sahrens mutex_exit(&zvol_state_lock); 1028fa9e4066Sahrens 1029fa9e4066Sahrens return (0); 1030fa9e4066Sahrens } 1031fa9e4066Sahrens 1032feb08c6bSbillm static void 103367bd71c6Sperrin zvol_get_done(dmu_buf_t *db, void *vzgd) 103467bd71c6Sperrin { 103567bd71c6Sperrin zgd_t *zgd = (zgd_t *)vzgd; 1036c2e6a7d6Sperrin rl_t *rl = zgd->zgd_rl; 103767bd71c6Sperrin 103867bd71c6Sperrin dmu_buf_rele(db, vzgd); 1039c2e6a7d6Sperrin zfs_range_unlock(rl); 104017f17c2dSbonwick zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 104167bd71c6Sperrin kmem_free(zgd, sizeof (zgd_t)); 104267bd71c6Sperrin } 104367bd71c6Sperrin 104467bd71c6Sperrin /* 104567bd71c6Sperrin * Get data to generate a TX_WRITE intent log record. 104667bd71c6Sperrin */ 1047feb08c6bSbillm static int 104867bd71c6Sperrin zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 104967bd71c6Sperrin { 105067bd71c6Sperrin zvol_state_t *zv = arg; 105167bd71c6Sperrin objset_t *os = zv->zv_objset; 105267bd71c6Sperrin dmu_buf_t *db; 1053c2e6a7d6Sperrin rl_t *rl; 105467bd71c6Sperrin zgd_t *zgd; 1055c2e6a7d6Sperrin uint64_t boff; /* block starting offset */ 1056c2e6a7d6Sperrin int dlen = lr->lr_length; /* length of user data */ 105767bd71c6Sperrin int error; 105867bd71c6Sperrin 105967bd71c6Sperrin ASSERT(zio); 1060c2e6a7d6Sperrin ASSERT(dlen != 0); 1061feb08c6bSbillm 1062c2e6a7d6Sperrin /* 1063c2e6a7d6Sperrin * Write records come in two flavors: immediate and indirect. 1064c2e6a7d6Sperrin * For small writes it's cheaper to store the data with the 1065c2e6a7d6Sperrin * log record (immediate); for large writes it's cheaper to 1066c2e6a7d6Sperrin * sync the data and get a pointer to it (indirect) so that 1067c2e6a7d6Sperrin * we don't have to write the data twice. 1068c2e6a7d6Sperrin */ 1069c2e6a7d6Sperrin if (buf != NULL) /* immediate write */ 1070c2e6a7d6Sperrin return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf)); 107167bd71c6Sperrin 107267bd71c6Sperrin zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); 107367bd71c6Sperrin zgd->zgd_zilog = zv->zv_zilog; 107467bd71c6Sperrin zgd->zgd_bp = &lr->lr_blkptr; 107567bd71c6Sperrin 107667bd71c6Sperrin /* 1077c2e6a7d6Sperrin * Lock the range of the block to ensure that when the data is 1078e7cbe64fSgw * written out and its checksum is being calculated that no other 1079c2e6a7d6Sperrin * thread can change the block. 108067bd71c6Sperrin */ 1081c2e6a7d6Sperrin boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t); 1082c2e6a7d6Sperrin rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize, 1083c2e6a7d6Sperrin RL_READER); 1084c2e6a7d6Sperrin zgd->zgd_rl = rl; 1085c2e6a7d6Sperrin 1086c2e6a7d6Sperrin VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db)); 108767bd71c6Sperrin error = dmu_sync(zio, db, &lr->lr_blkptr, 108867bd71c6Sperrin lr->lr_common.lrc_txg, zvol_get_done, zgd); 1089feb08c6bSbillm if (error == 0) 109017f17c2dSbonwick zil_add_block(zv->zv_zilog, &lr->lr_blkptr); 109167bd71c6Sperrin /* 109267bd71c6Sperrin * If we get EINPROGRESS, then we need to wait for a 109367bd71c6Sperrin * write IO initiated by dmu_sync() to complete before 109467bd71c6Sperrin * we can release this dbuf. We will finish everything 109567bd71c6Sperrin * up in the zvol_get_done() callback. 109667bd71c6Sperrin */ 109767bd71c6Sperrin if (error == EINPROGRESS) 109867bd71c6Sperrin return (0); 109967bd71c6Sperrin dmu_buf_rele(db, zgd); 1100c2e6a7d6Sperrin zfs_range_unlock(rl); 110167bd71c6Sperrin kmem_free(zgd, sizeof (zgd_t)); 110267bd71c6Sperrin return (error); 110367bd71c6Sperrin } 110467bd71c6Sperrin 1105a24e15ceSperrin /* 1106a24e15ceSperrin * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. 110722ac5be4Sperrin * 110822ac5be4Sperrin * We store data in the log buffers if it's small enough. 110967bd71c6Sperrin * Otherwise we will later flush the data out via dmu_sync(). 111022ac5be4Sperrin */ 111167bd71c6Sperrin ssize_t zvol_immediate_write_sz = 32768; 111222ac5be4Sperrin 1113feb08c6bSbillm static void 1114feb08c6bSbillm zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len) 111522ac5be4Sperrin { 1116feb08c6bSbillm uint32_t blocksize = zv->zv_volblocksize; 111722ac5be4Sperrin lr_write_t *lr; 111822ac5be4Sperrin 1119a24e15ceSperrin while (len) { 1120feb08c6bSbillm ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize)); 1121feb08c6bSbillm itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1122feb08c6bSbillm 1123feb08c6bSbillm itx->itx_wr_state = 1124feb08c6bSbillm len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY; 1125feb08c6bSbillm itx->itx_private = zv; 1126feb08c6bSbillm lr = (lr_write_t *)&itx->itx_lr; 1127feb08c6bSbillm lr->lr_foid = ZVOL_OBJ; 1128feb08c6bSbillm lr->lr_offset = off; 1129feb08c6bSbillm lr->lr_length = nbytes; 1130feb08c6bSbillm lr->lr_blkoff = off - P2ALIGN_TYPED(off, blocksize, uint64_t); 1131feb08c6bSbillm BP_ZERO(&lr->lr_blkptr); 1132feb08c6bSbillm 1133feb08c6bSbillm (void) zil_itx_assign(zv->zv_zilog, itx, tx); 1134a24e15ceSperrin len -= nbytes; 1135a24e15ceSperrin off += nbytes; 113622ac5be4Sperrin } 113722ac5be4Sperrin } 113822ac5be4Sperrin 1139e7cbe64fSgw int 1140e7cbe64fSgw zvol_dumpio(vdev_t *vd, uint64_t size, uint64_t offset, void *addr, 1141e7cbe64fSgw int bflags, int isdump) 1142e7cbe64fSgw { 1143e7cbe64fSgw vdev_disk_t *dvd; 1144e7cbe64fSgw int direction; 1145e7cbe64fSgw int c; 1146e7cbe64fSgw int numerrors = 0; 1147e7cbe64fSgw 1148e7cbe64fSgw for (c = 0; c < vd->vdev_children; c++) { 1149e7cbe64fSgw if (zvol_dumpio(vd->vdev_child[c], size, offset, 1150e7cbe64fSgw addr, bflags, isdump) != 0) { 1151e7cbe64fSgw numerrors++; 1152e7cbe64fSgw } else if (bflags & B_READ) { 1153e7cbe64fSgw break; 1154e7cbe64fSgw } 1155e7cbe64fSgw } 1156e7cbe64fSgw 1157e7cbe64fSgw if (!vd->vdev_ops->vdev_op_leaf) 1158e7cbe64fSgw return (numerrors < vd->vdev_children ? 0 : EIO); 1159e7cbe64fSgw 1160e7cbe64fSgw if (!vdev_writeable(vd)) 1161e7cbe64fSgw return (EIO); 1162e7cbe64fSgw 1163e7cbe64fSgw dvd = vd->vdev_tsd; 1164e7cbe64fSgw ASSERT3P(dvd, !=, NULL); 1165e7cbe64fSgw direction = bflags & (B_WRITE | B_READ); 1166e7cbe64fSgw ASSERT(ISP2(direction)); 1167e7cbe64fSgw offset += VDEV_LABEL_START_SIZE; 1168e7cbe64fSgw 1169e7cbe64fSgw if (ddi_in_panic() || isdump) { 1170e7cbe64fSgw if (direction & B_READ) 1171e7cbe64fSgw return (EIO); 1172e7cbe64fSgw return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), 1173e7cbe64fSgw lbtodb(size))); 1174e7cbe64fSgw } else { 1175e7cbe64fSgw return (vdev_disk_physio(dvd->vd_lh, addr, size, offset, 1176e7cbe64fSgw direction)); 1177e7cbe64fSgw } 1178e7cbe64fSgw } 1179e7cbe64fSgw 1180e7cbe64fSgw int 1181e7cbe64fSgw zvol_physio(zvol_state_t *zv, int bflags, uint64_t off, 1182e7cbe64fSgw uint64_t size, void *addr, int isdump) 1183e7cbe64fSgw { 1184e7cbe64fSgw dva_t dva; 1185e7cbe64fSgw vdev_t *vd; 1186e7cbe64fSgw int error; 1187e7cbe64fSgw spa_t *spa = dmu_objset_spa(zv->zv_objset); 1188e7cbe64fSgw 1189e7cbe64fSgw ASSERT(size <= zv->zv_volblocksize); 1190e7cbe64fSgw 1191e7cbe64fSgw /* restrict requests to multiples of the system block size */ 1192e7cbe64fSgw if (P2PHASE(off, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE)) 1193e7cbe64fSgw return (EINVAL); 1194e7cbe64fSgw 1195e7cbe64fSgw if (zvol_get_dva(zv, off, &dva) != 0) 1196e7cbe64fSgw return (EIO); 1197e7cbe64fSgw 1198e7cbe64fSgw spa_config_enter(spa, RW_READER, FTAG); 1199e7cbe64fSgw vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva)); 1200e7cbe64fSgw 1201e7cbe64fSgw error = zvol_dumpio(vd, size, 1202e7cbe64fSgw DVA_GET_OFFSET(&dva) + (off % zv->zv_volblocksize), 1203e7cbe64fSgw addr, bflags & (B_READ | B_WRITE | B_PHYS), isdump); 1204e7cbe64fSgw 1205e7cbe64fSgw spa_config_exit(spa, FTAG); 1206e7cbe64fSgw return (error); 1207e7cbe64fSgw } 1208e7cbe64fSgw 1209fa9e4066Sahrens int 1210fa9e4066Sahrens zvol_strategy(buf_t *bp) 1211fa9e4066Sahrens { 1212fa9e4066Sahrens zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev)); 1213fa9e4066Sahrens uint64_t off, volsize; 1214fa9e4066Sahrens size_t size, resid; 1215fa9e4066Sahrens char *addr; 121622ac5be4Sperrin objset_t *os; 1217c2e6a7d6Sperrin rl_t *rl; 1218fa9e4066Sahrens int error = 0; 1219e7cbe64fSgw boolean_t reading, is_dump = zv->zv_flags & ZVOL_DUMPIFIED; 1220fa9e4066Sahrens 1221fa9e4066Sahrens if (zv == NULL) { 1222fa9e4066Sahrens bioerror(bp, ENXIO); 1223fa9e4066Sahrens biodone(bp); 1224fa9e4066Sahrens return (0); 1225fa9e4066Sahrens } 1226fa9e4066Sahrens 1227fa9e4066Sahrens if (getminor(bp->b_edev) == 0) { 1228fa9e4066Sahrens bioerror(bp, EINVAL); 1229fa9e4066Sahrens biodone(bp); 1230fa9e4066Sahrens return (0); 1231fa9e4066Sahrens } 1232fa9e4066Sahrens 1233e7cbe64fSgw if (!(bp->b_flags & B_READ) && 1234e7cbe64fSgw (zv->zv_flags & ZVOL_RDONLY || 1235e7cbe64fSgw zv->zv_mode & DS_MODE_READONLY)) { 1236fa9e4066Sahrens bioerror(bp, EROFS); 1237fa9e4066Sahrens biodone(bp); 1238fa9e4066Sahrens return (0); 1239fa9e4066Sahrens } 1240fa9e4066Sahrens 1241fa9e4066Sahrens off = ldbtob(bp->b_blkno); 1242fa9e4066Sahrens volsize = zv->zv_volsize; 1243fa9e4066Sahrens 124422ac5be4Sperrin os = zv->zv_objset; 124522ac5be4Sperrin ASSERT(os != NULL); 1246fa9e4066Sahrens 1247fa9e4066Sahrens bp_mapin(bp); 1248fa9e4066Sahrens addr = bp->b_un.b_addr; 1249fa9e4066Sahrens resid = bp->b_bcount; 1250fa9e4066Sahrens 125173ec3d9cSgw if (resid > 0 && (off < 0 || off >= volsize)) 125273ec3d9cSgw return (EIO); 125373ec3d9cSgw 1254a24e15ceSperrin /* 1255a24e15ceSperrin * There must be no buffer changes when doing a dmu_sync() because 1256a24e15ceSperrin * we can't change the data whilst calculating the checksum. 1257a24e15ceSperrin */ 1258a24e15ceSperrin reading = bp->b_flags & B_READ; 1259c2e6a7d6Sperrin rl = zfs_range_lock(&zv->zv_znode, off, resid, 1260c2e6a7d6Sperrin reading ? RL_READER : RL_WRITER); 1261a24e15ceSperrin 1262e7cbe64fSgw if (resid > volsize - off) /* don't write past the end */ 1263e7cbe64fSgw resid = volsize - off; 1264fa9e4066Sahrens 1265e7cbe64fSgw while (resid != 0 && off < volsize) { 1266fa9e4066Sahrens 1267e7cbe64fSgw size = MIN(resid, zvol_maxphys); 1268e7cbe64fSgw if (is_dump) { 1269e7cbe64fSgw /* can't straddle a block boundary */ 1270e7cbe64fSgw size = MIN(size, P2END(off, zv->zv_volblocksize) - off); 1271e7cbe64fSgw error = zvol_physio(zv, bp->b_flags, off, size, 1272e7cbe64fSgw addr, 0); 1273e7cbe64fSgw } else if (reading) { 1274a24e15ceSperrin error = dmu_read(os, ZVOL_OBJ, off, size, addr); 1275fa9e4066Sahrens } else { 127622ac5be4Sperrin dmu_tx_t *tx = dmu_tx_create(os); 1277fa9e4066Sahrens dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); 1278fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 1279fa9e4066Sahrens if (error) { 1280fa9e4066Sahrens dmu_tx_abort(tx); 1281fa9e4066Sahrens } else { 128222ac5be4Sperrin dmu_write(os, ZVOL_OBJ, off, size, addr, tx); 1283feb08c6bSbillm zvol_log_write(zv, tx, off, size); 1284fa9e4066Sahrens dmu_tx_commit(tx); 1285fa9e4066Sahrens } 1286fa9e4066Sahrens } 1287fa9e4066Sahrens if (error) 1288fa9e4066Sahrens break; 1289fa9e4066Sahrens off += size; 1290fa9e4066Sahrens addr += size; 1291fa9e4066Sahrens resid -= size; 1292fa9e4066Sahrens } 1293c2e6a7d6Sperrin zfs_range_unlock(rl); 1294fa9e4066Sahrens 1295fa9e4066Sahrens if ((bp->b_resid = resid) == bp->b_bcount) 1296fa9e4066Sahrens bioerror(bp, off > volsize ? EINVAL : error); 1297fa9e4066Sahrens 1298e7cbe64fSgw if (!(bp->b_flags & B_ASYNC) && !reading && !zil_disable && !is_dump) 1299feb08c6bSbillm zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); 1300feb08c6bSbillm biodone(bp); 130122ac5be4Sperrin 1302fa9e4066Sahrens return (0); 1303fa9e4066Sahrens } 1304fa9e4066Sahrens 130567bd71c6Sperrin /* 130667bd71c6Sperrin * Set the buffer count to the zvol maximum transfer. 130767bd71c6Sperrin * Using our own routine instead of the default minphys() 130867bd71c6Sperrin * means that for larger writes we write bigger buffers on X86 130967bd71c6Sperrin * (128K instead of 56K) and flush the disk write cache less often 131067bd71c6Sperrin * (every zvol_maxphys - currently 1MB) instead of minphys (currently 131167bd71c6Sperrin * 56K on X86 and 128K on sparc). 131267bd71c6Sperrin */ 131367bd71c6Sperrin void 131467bd71c6Sperrin zvol_minphys(struct buf *bp) 131567bd71c6Sperrin { 131667bd71c6Sperrin if (bp->b_bcount > zvol_maxphys) 131767bd71c6Sperrin bp->b_bcount = zvol_maxphys; 131867bd71c6Sperrin } 131967bd71c6Sperrin 1320e7cbe64fSgw int 1321e7cbe64fSgw zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) 1322e7cbe64fSgw { 1323e7cbe64fSgw minor_t minor = getminor(dev); 1324e7cbe64fSgw zvol_state_t *zv; 1325e7cbe64fSgw int error = 0; 1326e7cbe64fSgw uint64_t size; 1327e7cbe64fSgw uint64_t boff; 1328e7cbe64fSgw uint64_t resid; 1329e7cbe64fSgw 1330e7cbe64fSgw if (minor == 0) /* This is the control device */ 1331e7cbe64fSgw return (ENXIO); 1332e7cbe64fSgw 1333e7cbe64fSgw zv = ddi_get_soft_state(zvol_state, minor); 1334e7cbe64fSgw if (zv == NULL) 1335e7cbe64fSgw return (ENXIO); 1336e7cbe64fSgw 1337e7cbe64fSgw boff = ldbtob(blkno); 1338e7cbe64fSgw resid = ldbtob(nblocks); 1339e7cbe64fSgw if (boff + resid > zv->zv_volsize) { 1340e7cbe64fSgw /* dump should know better than to write here */ 1341e7cbe64fSgw ASSERT(blkno + resid <= zv->zv_volsize); 1342e7cbe64fSgw return (EIO); 1343e7cbe64fSgw } 1344e7cbe64fSgw while (resid) { 1345e7cbe64fSgw /* can't straddle a block boundary */ 1346e7cbe64fSgw size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); 1347e7cbe64fSgw 1348e7cbe64fSgw error = zvol_physio(zv, B_WRITE, boff, size, addr, 1); 1349e7cbe64fSgw if (error) 1350e7cbe64fSgw break; 1351e7cbe64fSgw boff += size; 1352e7cbe64fSgw addr += size; 1353e7cbe64fSgw resid -= size; 1354e7cbe64fSgw } 1355e7cbe64fSgw 1356e7cbe64fSgw return (error); 1357e7cbe64fSgw } 1358e7cbe64fSgw 1359fa9e4066Sahrens /*ARGSUSED*/ 1360fa9e4066Sahrens int 1361feb08c6bSbillm zvol_read(dev_t dev, uio_t *uio, cred_t *cr) 1362fa9e4066Sahrens { 1363c7ca1008Sgw minor_t minor = getminor(dev); 1364c7ca1008Sgw zvol_state_t *zv; 136573ec3d9cSgw uint64_t volsize; 1366c2e6a7d6Sperrin rl_t *rl; 1367feb08c6bSbillm int error = 0; 1368fa9e4066Sahrens 1369c7ca1008Sgw if (minor == 0) /* This is the control device */ 1370c7ca1008Sgw return (ENXIO); 1371c7ca1008Sgw 1372c7ca1008Sgw zv = ddi_get_soft_state(zvol_state, minor); 1373c7ca1008Sgw if (zv == NULL) 1374c7ca1008Sgw return (ENXIO); 1375c7ca1008Sgw 137673ec3d9cSgw volsize = zv->zv_volsize; 137773ec3d9cSgw if (uio->uio_resid > 0 && 137873ec3d9cSgw (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) 137973ec3d9cSgw return (EIO); 138073ec3d9cSgw 1381c2e6a7d6Sperrin rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1382c2e6a7d6Sperrin RL_READER); 138373ec3d9cSgw while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1384feb08c6bSbillm uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1385fa9e4066Sahrens 138673ec3d9cSgw /* don't read past the end */ 138773ec3d9cSgw if (bytes > volsize - uio->uio_loffset) 138873ec3d9cSgw bytes = volsize - uio->uio_loffset; 138973ec3d9cSgw 1390feb08c6bSbillm error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes); 1391feb08c6bSbillm if (error) 1392feb08c6bSbillm break; 1393feb08c6bSbillm } 1394c2e6a7d6Sperrin zfs_range_unlock(rl); 1395feb08c6bSbillm return (error); 1396fa9e4066Sahrens } 1397fa9e4066Sahrens 1398fa9e4066Sahrens /*ARGSUSED*/ 1399fa9e4066Sahrens int 1400feb08c6bSbillm zvol_write(dev_t dev, uio_t *uio, cred_t *cr) 1401fa9e4066Sahrens { 1402c7ca1008Sgw minor_t minor = getminor(dev); 1403c7ca1008Sgw zvol_state_t *zv; 140473ec3d9cSgw uint64_t volsize; 1405c2e6a7d6Sperrin rl_t *rl; 1406feb08c6bSbillm int error = 0; 1407feb08c6bSbillm 1408c7ca1008Sgw if (minor == 0) /* This is the control device */ 1409c7ca1008Sgw return (ENXIO); 1410c7ca1008Sgw 1411c7ca1008Sgw zv = ddi_get_soft_state(zvol_state, minor); 1412c7ca1008Sgw if (zv == NULL) 1413c7ca1008Sgw return (ENXIO); 1414c7ca1008Sgw 141573ec3d9cSgw volsize = zv->zv_volsize; 141673ec3d9cSgw if (uio->uio_resid > 0 && 141773ec3d9cSgw (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) 141873ec3d9cSgw return (EIO); 141973ec3d9cSgw 1420e7cbe64fSgw if (zv->zv_flags & ZVOL_DUMPIFIED) { 1421e7cbe64fSgw error = physio(zvol_strategy, NULL, dev, B_WRITE, 1422e7cbe64fSgw zvol_minphys, uio); 1423e7cbe64fSgw return (error); 1424e7cbe64fSgw } 1425e7cbe64fSgw 1426c2e6a7d6Sperrin rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1427c2e6a7d6Sperrin RL_WRITER); 142873ec3d9cSgw while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1429feb08c6bSbillm uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1430feb08c6bSbillm uint64_t off = uio->uio_loffset; 1431feb08c6bSbillm dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 143273ec3d9cSgw 143373ec3d9cSgw if (bytes > volsize - off) /* don't write past the end */ 143473ec3d9cSgw bytes = volsize - off; 143573ec3d9cSgw 1436feb08c6bSbillm dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 1437feb08c6bSbillm error = dmu_tx_assign(tx, TXG_WAIT); 1438feb08c6bSbillm if (error) { 1439feb08c6bSbillm dmu_tx_abort(tx); 1440feb08c6bSbillm break; 1441feb08c6bSbillm } 1442feb08c6bSbillm error = dmu_write_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes, tx); 1443feb08c6bSbillm if (error == 0) 1444feb08c6bSbillm zvol_log_write(zv, tx, off, bytes); 1445feb08c6bSbillm dmu_tx_commit(tx); 1446feb08c6bSbillm 1447feb08c6bSbillm if (error) 1448feb08c6bSbillm break; 1449feb08c6bSbillm } 1450c2e6a7d6Sperrin zfs_range_unlock(rl); 1451feb08c6bSbillm return (error); 1452fa9e4066Sahrens } 1453fa9e4066Sahrens 1454fa9e4066Sahrens /* 1455fa9e4066Sahrens * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). 1456fa9e4066Sahrens */ 1457fa9e4066Sahrens /*ARGSUSED*/ 1458fa9e4066Sahrens int 1459fa9e4066Sahrens zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) 1460fa9e4066Sahrens { 1461fa9e4066Sahrens zvol_state_t *zv; 1462af2c4821Smaybee struct dk_cinfo dki; 1463fa9e4066Sahrens struct dk_minfo dkm; 1464fa9e4066Sahrens dk_efi_t efi; 1465af2c4821Smaybee struct dk_callback *dkc; 1466fa9e4066Sahrens struct uuid uuid = EFI_RESERVED; 1467fa9e4066Sahrens uint32_t crc; 1468fa9e4066Sahrens int error = 0; 1469e7cbe64fSgw rl_t *rl; 1470fa9e4066Sahrens 1471fa9e4066Sahrens mutex_enter(&zvol_state_lock); 1472fa9e4066Sahrens 1473fa9e4066Sahrens zv = ddi_get_soft_state(zvol_state, getminor(dev)); 1474fa9e4066Sahrens 1475fa9e4066Sahrens if (zv == NULL) { 1476fa9e4066Sahrens mutex_exit(&zvol_state_lock); 1477fa9e4066Sahrens return (ENXIO); 1478fa9e4066Sahrens } 1479fa9e4066Sahrens 1480fa9e4066Sahrens switch (cmd) { 1481fa9e4066Sahrens 1482fa9e4066Sahrens case DKIOCINFO: 1483af2c4821Smaybee bzero(&dki, sizeof (dki)); 1484af2c4821Smaybee (void) strcpy(dki.dki_cname, "zvol"); 1485af2c4821Smaybee (void) strcpy(dki.dki_dname, "zvol"); 1486af2c4821Smaybee dki.dki_ctype = DKC_UNKNOWN; 1487af2c4821Smaybee dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); 1488fa9e4066Sahrens mutex_exit(&zvol_state_lock); 1489af2c4821Smaybee if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) 1490fa9e4066Sahrens error = EFAULT; 1491fa9e4066Sahrens return (error); 1492fa9e4066Sahrens 1493fa9e4066Sahrens case DKIOCGMEDIAINFO: 1494fa9e4066Sahrens bzero(&dkm, sizeof (dkm)); 1495fa9e4066Sahrens dkm.dki_lbsize = 1U << zv->zv_min_bs; 1496fa9e4066Sahrens dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 1497fa9e4066Sahrens dkm.dki_media_type = DK_UNKNOWN; 1498fa9e4066Sahrens mutex_exit(&zvol_state_lock); 1499fa9e4066Sahrens if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) 1500fa9e4066Sahrens error = EFAULT; 1501fa9e4066Sahrens return (error); 1502fa9e4066Sahrens 1503fa9e4066Sahrens case DKIOCGETEFI: 1504fa9e4066Sahrens if (ddi_copyin((void *)arg, &efi, sizeof (dk_efi_t), flag)) { 1505fa9e4066Sahrens mutex_exit(&zvol_state_lock); 1506fa9e4066Sahrens return (EFAULT); 1507fa9e4066Sahrens } 150868a5ac4dSmaybee efi.dki_data = (void *)(uintptr_t)efi.dki_data_64; 1509fa9e4066Sahrens 151068a5ac4dSmaybee /* 151168a5ac4dSmaybee * Some clients may attempt to request a PMBR for the 151268a5ac4dSmaybee * zvol. Currently this interface will return ENOTTY to 151368a5ac4dSmaybee * such requests. These requests could be supported by 151468a5ac4dSmaybee * adding a check for lba == 0 and consing up an appropriate 1515e7cbe64fSgw * PMBR. 151668a5ac4dSmaybee */ 151768a5ac4dSmaybee if (efi.dki_lba == 1) { 151868a5ac4dSmaybee efi_gpt_t gpt; 151968a5ac4dSmaybee efi_gpe_t gpe; 152068a5ac4dSmaybee 152168a5ac4dSmaybee bzero(&gpt, sizeof (gpt)); 152268a5ac4dSmaybee bzero(&gpe, sizeof (gpe)); 152368a5ac4dSmaybee 152468a5ac4dSmaybee if (efi.dki_length < sizeof (gpt)) { 152568a5ac4dSmaybee mutex_exit(&zvol_state_lock); 152668a5ac4dSmaybee return (EINVAL); 152768a5ac4dSmaybee } 1528fa9e4066Sahrens 152968a5ac4dSmaybee gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); 153068a5ac4dSmaybee gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 153168a5ac4dSmaybee gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); 153268a5ac4dSmaybee gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL); 153368a5ac4dSmaybee gpt.efi_gpt_LastUsableLBA = 153468a5ac4dSmaybee LE_64((zv->zv_volsize >> zv->zv_min_bs) - 1); 153568a5ac4dSmaybee gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); 1536523be69cSmaybee gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL); 153768a5ac4dSmaybee gpt.efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (gpe)); 1538fa9e4066Sahrens 153968a5ac4dSmaybee UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); 154068a5ac4dSmaybee gpe.efi_gpe_StartingLBA = gpt.efi_gpt_FirstUsableLBA; 154168a5ac4dSmaybee gpe.efi_gpe_EndingLBA = gpt.efi_gpt_LastUsableLBA; 154268a5ac4dSmaybee 154368a5ac4dSmaybee CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); 154468a5ac4dSmaybee gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 1545fa9e4066Sahrens 154668a5ac4dSmaybee CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); 154768a5ac4dSmaybee gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); 1548fa9e4066Sahrens 154968a5ac4dSmaybee mutex_exit(&zvol_state_lock); 155068a5ac4dSmaybee if (ddi_copyout(&gpt, efi.dki_data, sizeof (gpt), flag)) 155168a5ac4dSmaybee error = EFAULT; 155268a5ac4dSmaybee } else if (efi.dki_lba == 2) { 155368a5ac4dSmaybee efi_gpe_t gpe; 1554fa9e4066Sahrens 155568a5ac4dSmaybee bzero(&gpe, sizeof (gpe)); 1556fa9e4066Sahrens 155768a5ac4dSmaybee if (efi.dki_length < sizeof (gpe)) { 155868a5ac4dSmaybee mutex_exit(&zvol_state_lock); 155968a5ac4dSmaybee return (EINVAL); 156068a5ac4dSmaybee } 1561fa9e4066Sahrens 156268a5ac4dSmaybee UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); 156368a5ac4dSmaybee gpe.efi_gpe_StartingLBA = LE_64(34ULL); 156468a5ac4dSmaybee gpe.efi_gpe_EndingLBA = 156568a5ac4dSmaybee LE_64((zv->zv_volsize >> zv->zv_min_bs) - 1); 1566fa9e4066Sahrens 156768a5ac4dSmaybee mutex_exit(&zvol_state_lock); 156868a5ac4dSmaybee if (ddi_copyout(&gpe, efi.dki_data, sizeof (gpe), flag)) 156968a5ac4dSmaybee error = EFAULT; 157068a5ac4dSmaybee } else { 157168a5ac4dSmaybee mutex_exit(&zvol_state_lock); 157268a5ac4dSmaybee error = EINVAL; 157368a5ac4dSmaybee } 1574fa9e4066Sahrens return (error); 1575fa9e4066Sahrens 1576feb08c6bSbillm case DKIOCFLUSHWRITECACHE: 1577af2c4821Smaybee dkc = (struct dk_callback *)arg; 1578feb08c6bSbillm zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); 1579af2c4821Smaybee if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { 1580af2c4821Smaybee (*dkc->dkc_callback)(dkc->dkc_cookie, error); 1581af2c4821Smaybee error = 0; 1582af2c4821Smaybee } 1583feb08c6bSbillm break; 1584feb08c6bSbillm 1585b6130eadSmaybee case DKIOCGGEOM: 1586b6130eadSmaybee case DKIOCGVTOC: 1587e7cbe64fSgw /* 1588e7cbe64fSgw * commands using these (like prtvtoc) expect ENOTSUP 1589e7cbe64fSgw * since we're emulating an EFI label 1590e7cbe64fSgw */ 1591b6130eadSmaybee error = ENOTSUP; 1592b6130eadSmaybee break; 1593b6130eadSmaybee 1594e7cbe64fSgw case DKIOCDUMPINIT: 1595e7cbe64fSgw rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 1596e7cbe64fSgw RL_WRITER); 1597e7cbe64fSgw error = zvol_dumpify(zv); 1598e7cbe64fSgw zfs_range_unlock(rl); 1599e7cbe64fSgw break; 1600e7cbe64fSgw 1601e7cbe64fSgw case DKIOCDUMPFINI: 1602e7cbe64fSgw rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 1603e7cbe64fSgw RL_WRITER); 1604e7cbe64fSgw error = zvol_dump_fini(zv); 1605e7cbe64fSgw zfs_range_unlock(rl); 1606e7cbe64fSgw break; 1607e7cbe64fSgw 1608fa9e4066Sahrens default: 160968a5ac4dSmaybee error = ENOTTY; 1610fa9e4066Sahrens break; 1611fa9e4066Sahrens 1612fa9e4066Sahrens } 1613fa9e4066Sahrens mutex_exit(&zvol_state_lock); 1614fa9e4066Sahrens return (error); 1615fa9e4066Sahrens } 1616fa9e4066Sahrens 1617fa9e4066Sahrens int 1618fa9e4066Sahrens zvol_busy(void) 1619fa9e4066Sahrens { 1620fa9e4066Sahrens return (zvol_minors != 0); 1621fa9e4066Sahrens } 1622fa9e4066Sahrens 1623fa9e4066Sahrens void 1624fa9e4066Sahrens zvol_init(void) 1625fa9e4066Sahrens { 1626fa9e4066Sahrens VERIFY(ddi_soft_state_init(&zvol_state, sizeof (zvol_state_t), 1) == 0); 1627fa9e4066Sahrens mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL); 1628fa9e4066Sahrens } 1629fa9e4066Sahrens 1630fa9e4066Sahrens void 1631fa9e4066Sahrens zvol_fini(void) 1632fa9e4066Sahrens { 1633fa9e4066Sahrens mutex_destroy(&zvol_state_lock); 1634fa9e4066Sahrens ddi_soft_state_fini(&zvol_state); 1635fa9e4066Sahrens } 1636e7cbe64fSgw 1637e7cbe64fSgw static boolean_t 1638e7cbe64fSgw zvol_is_swap(zvol_state_t *zv) 1639e7cbe64fSgw { 1640e7cbe64fSgw vnode_t *vp; 1641e7cbe64fSgw boolean_t ret = B_FALSE; 1642e7cbe64fSgw char *devpath; 1643e7cbe64fSgw size_t devpathlen; 1644e7cbe64fSgw int error; 1645e7cbe64fSgw 1646e7cbe64fSgw devpathlen = strlen(ZVOL_FULL_DEV_DIR) + strlen(zv->zv_name) + 1; 1647e7cbe64fSgw devpath = kmem_alloc(devpathlen, KM_SLEEP); 1648e7cbe64fSgw (void) sprintf(devpath, "%s%s", ZVOL_FULL_DEV_DIR, zv->zv_name); 1649e7cbe64fSgw error = lookupname(devpath, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 1650e7cbe64fSgw kmem_free(devpath, devpathlen); 1651e7cbe64fSgw 1652e7cbe64fSgw ret = !error && IS_SWAPVP(common_specvp(vp)); 1653e7cbe64fSgw 1654e7cbe64fSgw if (vp != NULL) 1655e7cbe64fSgw VN_RELE(vp); 1656e7cbe64fSgw 1657e7cbe64fSgw return (ret); 1658e7cbe64fSgw } 1659e7cbe64fSgw 1660e7cbe64fSgw static int 1661e7cbe64fSgw zvol_dump_init(zvol_state_t *zv, boolean_t resize) 1662e7cbe64fSgw { 1663e7cbe64fSgw dmu_tx_t *tx; 1664e7cbe64fSgw int error = 0; 1665e7cbe64fSgw objset_t *os = zv->zv_objset; 1666e7cbe64fSgw nvlist_t *nv = NULL; 1667e7cbe64fSgw uint64_t checksum, compress, refresrv; 1668e7cbe64fSgw 1669e7cbe64fSgw ASSERT(MUTEX_HELD(&zvol_state_lock)); 1670e7cbe64fSgw 1671e7cbe64fSgw tx = dmu_tx_create(os); 1672e7cbe64fSgw dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 1673e7cbe64fSgw error = dmu_tx_assign(tx, TXG_WAIT); 1674e7cbe64fSgw if (error) { 1675e7cbe64fSgw dmu_tx_abort(tx); 1676e7cbe64fSgw return (error); 1677e7cbe64fSgw } 1678e7cbe64fSgw 1679e7cbe64fSgw /* 1680e7cbe64fSgw * If we are resizing the dump device then we only need to 1681e7cbe64fSgw * update the refreservation to match the newly updated 1682e7cbe64fSgw * zvolsize. Otherwise, we save off the original state of the 1683e7cbe64fSgw * zvol so that we can restore them if the zvol is ever undumpified. 1684e7cbe64fSgw */ 1685e7cbe64fSgw if (resize) { 1686e7cbe64fSgw error = zap_update(os, ZVOL_ZAP_OBJ, 1687e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 1688e7cbe64fSgw &zv->zv_volsize, tx); 1689e7cbe64fSgw } else { 1690e7cbe64fSgw error = dsl_prop_get_integer(zv->zv_name, 1691e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); 1692e7cbe64fSgw error = error ? error : dsl_prop_get_integer(zv->zv_name, 1693e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL); 1694e7cbe64fSgw error = error ? error : dsl_prop_get_integer(zv->zv_name, 1695e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL); 1696e7cbe64fSgw 1697e7cbe64fSgw error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1698e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, 1699e7cbe64fSgw &compress, tx); 1700e7cbe64fSgw error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1701e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx); 1702e7cbe64fSgw error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1703e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 1704e7cbe64fSgw &refresrv, tx); 1705e7cbe64fSgw } 1706e7cbe64fSgw dmu_tx_commit(tx); 1707e7cbe64fSgw 1708e7cbe64fSgw /* Truncate the file */ 1709e7cbe64fSgw if (!error) 1710cdb0ab79Smaybee error = dmu_free_long_range(zv->zv_objset, 1711cdb0ab79Smaybee ZVOL_OBJ, 0, DMU_OBJECT_END); 1712e7cbe64fSgw 1713e7cbe64fSgw if (error) 1714e7cbe64fSgw return (error); 1715e7cbe64fSgw 1716e7cbe64fSgw /* 1717e7cbe64fSgw * We only need update the zvol's property if we are initializing 1718e7cbe64fSgw * the dump area for the first time. 1719e7cbe64fSgw */ 1720e7cbe64fSgw if (!resize) { 1721e7cbe64fSgw VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1722e7cbe64fSgw VERIFY(nvlist_add_uint64(nv, 1723e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); 1724e7cbe64fSgw VERIFY(nvlist_add_uint64(nv, 1725e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 1726e7cbe64fSgw ZIO_COMPRESS_OFF) == 0); 1727e7cbe64fSgw VERIFY(nvlist_add_uint64(nv, 1728e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_CHECKSUM), 1729e7cbe64fSgw ZIO_CHECKSUM_OFF) == 0); 1730e7cbe64fSgw 1731e7cbe64fSgw error = zfs_set_prop_nvlist(zv->zv_name, nv); 1732e7cbe64fSgw nvlist_free(nv); 1733e7cbe64fSgw 1734e7cbe64fSgw if (error) 1735e7cbe64fSgw return (error); 1736e7cbe64fSgw } 1737e7cbe64fSgw 1738e7cbe64fSgw /* Allocate the space for the dump */ 1739e7cbe64fSgw error = zvol_prealloc(zv); 1740e7cbe64fSgw return (error); 1741e7cbe64fSgw } 1742e7cbe64fSgw 1743e7cbe64fSgw static int 1744e7cbe64fSgw zvol_dumpify(zvol_state_t *zv) 1745e7cbe64fSgw { 1746e7cbe64fSgw int error = 0; 1747e7cbe64fSgw uint64_t dumpsize = 0; 1748e7cbe64fSgw dmu_tx_t *tx; 1749e7cbe64fSgw objset_t *os = zv->zv_objset; 1750e7cbe64fSgw 1751e7cbe64fSgw if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) 1752e7cbe64fSgw return (EROFS); 1753e7cbe64fSgw 1754e7cbe64fSgw /* 1755e7cbe64fSgw * We do not support swap devices acting as dump devices. 1756e7cbe64fSgw */ 1757e7cbe64fSgw if (zvol_is_swap(zv)) 1758e7cbe64fSgw return (ENOTSUP); 1759e7cbe64fSgw 1760e7cbe64fSgw if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 1761e7cbe64fSgw 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { 1762e7cbe64fSgw boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE; 1763e7cbe64fSgw 1764e7cbe64fSgw if ((error = zvol_dump_init(zv, resize)) != 0) { 1765e7cbe64fSgw (void) zvol_dump_fini(zv); 1766e7cbe64fSgw return (error); 1767e7cbe64fSgw } 1768e7cbe64fSgw } 1769e7cbe64fSgw 1770e7cbe64fSgw /* 1771e7cbe64fSgw * Build up our lba mapping. 1772e7cbe64fSgw */ 1773e7cbe64fSgw error = zvol_get_lbas(zv); 1774e7cbe64fSgw if (error) { 1775e7cbe64fSgw (void) zvol_dump_fini(zv); 1776e7cbe64fSgw return (error); 1777e7cbe64fSgw } 1778e7cbe64fSgw 1779e7cbe64fSgw tx = dmu_tx_create(os); 1780e7cbe64fSgw dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 1781e7cbe64fSgw error = dmu_tx_assign(tx, TXG_WAIT); 1782e7cbe64fSgw if (error) { 1783e7cbe64fSgw dmu_tx_abort(tx); 1784e7cbe64fSgw (void) zvol_dump_fini(zv); 1785e7cbe64fSgw return (error); 1786e7cbe64fSgw } 1787e7cbe64fSgw 1788e7cbe64fSgw zv->zv_flags |= ZVOL_DUMPIFIED; 1789e7cbe64fSgw error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, 1790e7cbe64fSgw &zv->zv_volsize, tx); 1791e7cbe64fSgw dmu_tx_commit(tx); 1792e7cbe64fSgw 1793e7cbe64fSgw if (error) { 1794e7cbe64fSgw (void) zvol_dump_fini(zv); 1795e7cbe64fSgw return (error); 1796e7cbe64fSgw } 1797e7cbe64fSgw 1798e7cbe64fSgw txg_wait_synced(dmu_objset_pool(os), 0); 1799e7cbe64fSgw return (0); 1800e7cbe64fSgw } 1801e7cbe64fSgw 1802e7cbe64fSgw static int 1803e7cbe64fSgw zvol_dump_fini(zvol_state_t *zv) 1804e7cbe64fSgw { 1805e7cbe64fSgw dmu_tx_t *tx; 1806e7cbe64fSgw objset_t *os = zv->zv_objset; 1807e7cbe64fSgw nvlist_t *nv; 1808e7cbe64fSgw int error = 0; 1809e7cbe64fSgw uint64_t checksum, compress, refresrv; 1810e7cbe64fSgw 1811b7e50089Smaybee /* 1812b7e50089Smaybee * Attempt to restore the zvol back to its pre-dumpified state. 1813b7e50089Smaybee * This is a best-effort attempt as it's possible that not all 1814b7e50089Smaybee * of these properties were initialized during the dumpify process 1815b7e50089Smaybee * (i.e. error during zvol_dump_init). 1816b7e50089Smaybee */ 1817b7e50089Smaybee 1818e7cbe64fSgw tx = dmu_tx_create(os); 1819e7cbe64fSgw dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 1820e7cbe64fSgw error = dmu_tx_assign(tx, TXG_WAIT); 1821e7cbe64fSgw if (error) { 1822e7cbe64fSgw dmu_tx_abort(tx); 1823e7cbe64fSgw return (error); 1824e7cbe64fSgw } 1825b7e50089Smaybee (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); 1826b7e50089Smaybee dmu_tx_commit(tx); 1827e7cbe64fSgw 1828e7cbe64fSgw (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 1829e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); 1830e7cbe64fSgw (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 1831e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); 1832e7cbe64fSgw (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 1833e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); 1834e7cbe64fSgw 1835e7cbe64fSgw VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1836e7cbe64fSgw (void) nvlist_add_uint64(nv, 1837e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); 1838e7cbe64fSgw (void) nvlist_add_uint64(nv, 1839e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); 1840e7cbe64fSgw (void) nvlist_add_uint64(nv, 1841e7cbe64fSgw zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); 1842e7cbe64fSgw (void) zfs_set_prop_nvlist(zv->zv_name, nv); 1843e7cbe64fSgw nvlist_free(nv); 1844e7cbe64fSgw 1845b7e50089Smaybee zvol_free_extents(zv); 1846b7e50089Smaybee zv->zv_flags &= ~ZVOL_DUMPIFIED; 1847b7e50089Smaybee (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END); 1848b7e50089Smaybee 1849e7cbe64fSgw return (0); 1850e7cbe64fSgw } 1851