1fa9e4066Sahrens /*
2fa9e4066Sahrens * CDDL HEADER START
3fa9e4066Sahrens *
4fa9e4066Sahrens * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock * You may not use this file except in compliance with the License.
7fa9e4066Sahrens *
8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens * See the License for the specific language governing permissions
11fa9e4066Sahrens * and limitations under the License.
12fa9e4066Sahrens *
13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens *
19fa9e4066Sahrens * CDDL HEADER END
20fa9e4066Sahrens */
21fa9e4066Sahrens /*
22f80ce222SChris Kirby * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23b77b9231SDan McDonald *
24b77b9231SDan McDonald * Portions Copyright 2010 Robert Milkowski
25b77b9231SDan McDonald *
26047c81d3SSaso Kiselkov * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
2725df42a1SMatthew Ahrens * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
28c3d26abcSMatthew Ahrens * Copyright (c) 2014 Integros [integros.com]
29455e370cSJohn Levon * Copyright 2019 Joyent, Inc.
30fa9e4066Sahrens */
31fa9e4066Sahrens
32fa9e4066Sahrens /*
33fa9e4066Sahrens * ZFS volume emulation driver.
34fa9e4066Sahrens *
35fa9e4066Sahrens * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
36fa9e4066Sahrens * Volumes are accessed through the symbolic links named:
37fa9e4066Sahrens *
38fa9e4066Sahrens * /dev/zvol/dsk/<pool_name>/<dataset_name>
39fa9e4066Sahrens * /dev/zvol/rdsk/<pool_name>/<dataset_name>
40fa9e4066Sahrens *
41681d9761SEric Taylor * These links are created by the /dev filesystem (sdev_zvolops.c).
42fa9e4066Sahrens * Volumes are persistent through reboot. No user command needs to be
43fa9e4066Sahrens * run before opening and using a device.
44fa9e4066Sahrens */
45fa9e4066Sahrens
46fa9e4066Sahrens #include <sys/types.h>
47fa9e4066Sahrens #include <sys/param.h>
48fa9e4066Sahrens #include <sys/errno.h>
49fa9e4066Sahrens #include <sys/uio.h>
50fa9e4066Sahrens #include <sys/buf.h>
51fa9e4066Sahrens #include <sys/modctl.h>
52fa9e4066Sahrens #include <sys/open.h>
53fa9e4066Sahrens #include <sys/kmem.h>
54fa9e4066Sahrens #include <sys/conf.h>
55fa9e4066Sahrens #include <sys/cmn_err.h>
56fa9e4066Sahrens #include <sys/stat.h>
57fa9e4066Sahrens #include <sys/zap.h>
58fa9e4066Sahrens #include <sys/spa.h>
59810e43b2SBill Pijewski #include <sys/spa_impl.h>
60fa9e4066Sahrens #include <sys/zio.h>
61e7cbe64fSgw #include <sys/dmu_traverse.h>
62e7cbe64fSgw #include <sys/dnode.h>
63e7cbe64fSgw #include <sys/dsl_dataset.h>
64fa9e4066Sahrens #include <sys/dsl_prop.h>
65fa9e4066Sahrens #include <sys/dkio.h>
66fa9e4066Sahrens #include <sys/efi_partition.h>
67fa9e4066Sahrens #include <sys/byteorder.h>
68fa9e4066Sahrens #include <sys/pathname.h>
69fa9e4066Sahrens #include <sys/ddi.h>
70fa9e4066Sahrens #include <sys/sunddi.h>
71fa9e4066Sahrens #include <sys/crc32.h>
72fa9e4066Sahrens #include <sys/dirent.h>
73fa9e4066Sahrens #include <sys/policy.h>
74fa9e4066Sahrens #include <sys/fs/zfs.h>
75fa9e4066Sahrens #include <sys/zfs_ioctl.h>
76fa9e4066Sahrens #include <sys/mkdev.h>
7722ac5be4Sperrin #include <sys/zil.h>
78c5c6ffa0Smaybee #include <sys/refcount.h>
79c2e6a7d6Sperrin #include <sys/zfs_znode.h>
80c2e6a7d6Sperrin #include <sys/zfs_rlock.h>
81e7cbe64fSgw #include <sys/vdev_impl.h>
82e7cbe64fSgw #include <sys/zvol.h>
83e7cbe64fSgw #include <sys/dumphdr.h>
841209a471SNeil Perrin #include <sys/zil_impl.h>
8580901aeaSGeorge Wilson #include <sys/dbuf.h>
86810e43b2SBill Pijewski #include <sys/dmu_tx.h>
87810e43b2SBill Pijewski #include <sys/zfeature.h>
88810e43b2SBill Pijewski #include <sys/zio_checksum.h>
891271e4b1SPrakash Surya #include <sys/zil_impl.h>
90c3377ee9SJohn Levon #include <sys/smt.h>
91047c81d3SSaso Kiselkov #include <sys/dkioc_free_util.h>
9279315247SMatthew Ahrens #include <sys/zfs_rlock.h>
93fa9e4066Sahrens
94fa9e4066Sahrens #include "zfs_namecheck.h"
95fa9e4066Sahrens
96c99e4bdcSChris Kirby void *zfsdev_state;
97503ad85cSMatthew Ahrens static char *zvol_tag = "zvol_tag";
98fa9e4066Sahrens
99e7cbe64fSgw #define ZVOL_DUMPSIZE "dumpsize"
100e7cbe64fSgw
101fa9e4066Sahrens /*
102c99e4bdcSChris Kirby * This lock protects the zfsdev_state structure from being modified
103fa9e4066Sahrens * while it's being used, e.g. an open that comes in before a create
104fa9e4066Sahrens * finishes. It also protects temporary opens of the dataset so that,
105fa9e4066Sahrens * e.g., an open doesn't get a spurious EBUSY.
106fa9e4066Sahrens */
107c99e4bdcSChris Kirby kmutex_t zfsdev_state_lock;
108fa9e4066Sahrens static uint32_t zvol_minors;
109fa9e4066Sahrens
110e7cbe64fSgw typedef struct zvol_extent {
11188b7b0f2SMatthew Ahrens list_node_t ze_node;
112e7cbe64fSgw dva_t ze_dva; /* dva associated with this extent */
11388b7b0f2SMatthew Ahrens uint64_t ze_nblks; /* number of blocks in extent */
114e7cbe64fSgw } zvol_extent_t;
115e7cbe64fSgw
116fa9e4066Sahrens /*
117fa9e4066Sahrens * The in-core state of each volume.
118fa9e4066Sahrens */
119fa9e4066Sahrens typedef struct zvol_state {
120fa9e4066Sahrens char zv_name[MAXPATHLEN]; /* pool/dd name */
121fa9e4066Sahrens uint64_t zv_volsize; /* amount of space we advertise */
12267bd71c6Sperrin uint64_t zv_volblocksize; /* volume block size */
123fa9e4066Sahrens minor_t zv_minor; /* minor number */
124fa9e4066Sahrens uint8_t zv_min_bs; /* minimum addressable block shift */
125701f66c4SEric Taylor uint8_t zv_flags; /* readonly, dumpified, etc. */
126fa9e4066Sahrens objset_t *zv_objset; /* objset handle */
127fa9e4066Sahrens uint32_t zv_open_count[OTYPCNT]; /* open counts */
128fa9e4066Sahrens uint32_t zv_total_opens; /* total open count */
12922ac5be4Sperrin zilog_t *zv_zilog; /* ZIL handle */
13088b7b0f2SMatthew Ahrens list_t zv_extents; /* List of extents for dump */
13179315247SMatthew Ahrens rangelock_t zv_rangelock;
1328dfe5547SRichard Yao dnode_t *zv_dn; /* dnode hold */
133fa9e4066Sahrens } zvol_state_t;
134fa9e4066Sahrens
135e7cbe64fSgw /*
136e7cbe64fSgw * zvol specific flags
137e7cbe64fSgw */
138e7cbe64fSgw #define ZVOL_RDONLY 0x1
139e7cbe64fSgw #define ZVOL_DUMPIFIED 0x2
140c7f714e2SEric Taylor #define ZVOL_EXCL 0x4
141701f66c4SEric Taylor #define ZVOL_WCE 0x8
142e7cbe64fSgw
14367bd71c6Sperrin /*
14467bd71c6Sperrin * zvol maximum transfer in one DMU tx.
14567bd71c6Sperrin */
14667bd71c6Sperrin int zvol_maxphys = DMU_MAX_ACCESS/2;
14767bd71c6Sperrin
148893c83baSGeorge Wilson /*
149893c83baSGeorge Wilson * Toggle unmap functionality.
150893c83baSGeorge Wilson */
151893c83baSGeorge Wilson boolean_t zvol_unmap_enabled = B_TRUE;
152893c83baSGeorge Wilson
1531c9272b8SStephen Blinick /*
1541c9272b8SStephen Blinick * If true, unmaps requested as synchronous are executed synchronously,
1551c9272b8SStephen Blinick * otherwise all unmaps are asynchronous.
1561c9272b8SStephen Blinick */
1571c9272b8SStephen Blinick boolean_t zvol_unmap_sync_enabled = B_FALSE;
1581c9272b8SStephen Blinick
15992241e0bSTom Erickson extern int zfs_set_prop_nvlist(const char *, zprop_source_t,
1604445fffbSMatthew Ahrens nvlist_t *, nvlist_t *);
161681d9761SEric Taylor static int zvol_remove_zv(zvol_state_t *);
1621271e4b1SPrakash Surya static int zvol_get_data(void *arg, lr_write_t *lr, char *buf,
1631271e4b1SPrakash Surya struct lwb *lwb, zio_t *zio);
164e7cbe64fSgw static int zvol_dumpify(zvol_state_t *zv);
165e7cbe64fSgw static int zvol_dump_fini(zvol_state_t *zv);
166e7cbe64fSgw static int zvol_dump_init(zvol_state_t *zv, boolean_t resize);
16767bd71c6Sperrin
168fa9e4066Sahrens static void
zvol_size_changed(zvol_state_t * zv,uint64_t volsize)169c61ea566SGeorge Wilson zvol_size_changed(zvol_state_t *zv, uint64_t volsize)
170fa9e4066Sahrens {
171c61ea566SGeorge Wilson dev_t dev = makedevice(ddi_driver_major(zfs_dip), zv->zv_minor);
172fa9e4066Sahrens
173c61ea566SGeorge Wilson zv->zv_volsize = volsize;
174fa9e4066Sahrens VERIFY(ddi_prop_update_int64(dev, zfs_dip,
175681d9761SEric Taylor "Size", volsize) == DDI_SUCCESS);
176fa9e4066Sahrens VERIFY(ddi_prop_update_int64(dev, zfs_dip,
177681d9761SEric Taylor "Nblocks", lbtodb(volsize)) == DDI_SUCCESS);
178e7cbe64fSgw
179e7cbe64fSgw /* Notify specfs to invalidate the cached size */
180e7cbe64fSgw spec_size_invalidate(dev, VBLK);
181e7cbe64fSgw spec_size_invalidate(dev, VCHR);
182fa9e4066Sahrens }
183fa9e4066Sahrens
184fa9e4066Sahrens int
zvol_check_volsize(uint64_t volsize,uint64_t blocksize)185e9dbad6fSeschrock zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
186fa9e4066Sahrens {
187e9dbad6fSeschrock if (volsize == 0)
188be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL));
189fa9e4066Sahrens
190e9dbad6fSeschrock if (volsize % blocksize != 0)
191be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL));
1925c5460e9Seschrock
193fa9e4066Sahrens #ifdef _ILP32
194e9dbad6fSeschrock if (volsize - 1 > SPEC_MAXOFFSET_T)
195be6fd75aSMatthew Ahrens return (SET_ERROR(EOVERFLOW));
196fa9e4066Sahrens #endif
197fa9e4066Sahrens return (0);
198fa9e4066Sahrens }
199fa9e4066Sahrens
200fa9e4066Sahrens int
zvol_check_volblocksize(uint64_t volblocksize)201e9dbad6fSeschrock zvol_check_volblocksize(uint64_t volblocksize)
202fa9e4066Sahrens {
203e9dbad6fSeschrock if (volblocksize < SPA_MINBLOCKSIZE ||
204b5152584SMatthew Ahrens volblocksize > SPA_OLD_MAXBLOCKSIZE ||
205e9dbad6fSeschrock !ISP2(volblocksize))
206be6fd75aSMatthew Ahrens return (SET_ERROR(EDOM));
207fa9e4066Sahrens
208fa9e4066Sahrens return (0);
209fa9e4066Sahrens }
210fa9e4066Sahrens
211fa9e4066Sahrens int
zvol_get_stats(objset_t * os,nvlist_t * nv)212a2eea2e1Sahrens zvol_get_stats(objset_t *os, nvlist_t *nv)
213fa9e4066Sahrens {
214fa9e4066Sahrens int error;
215fa9e4066Sahrens dmu_object_info_t doi;
216a2eea2e1Sahrens uint64_t val;
217fa9e4066Sahrens
218a2eea2e1Sahrens error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
219fa9e4066Sahrens if (error)
220fa9e4066Sahrens return (error);
221fa9e4066Sahrens
222a2eea2e1Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
223a2eea2e1Sahrens
224fa9e4066Sahrens error = dmu_object_info(os, ZVOL_OBJ, &doi);
225fa9e4066Sahrens
226a2eea2e1Sahrens if (error == 0) {
227a2eea2e1Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
228a2eea2e1Sahrens doi.doi_data_block_size);
229a2eea2e1Sahrens }
230fa9e4066Sahrens
231fa9e4066Sahrens return (error);
232fa9e4066Sahrens }
233fa9e4066Sahrens
234fa9e4066Sahrens static zvol_state_t *
zvol_minor_lookup(const char * name)235e9dbad6fSeschrock zvol_minor_lookup(const char *name)
236fa9e4066Sahrens {
237fa9e4066Sahrens minor_t minor;
238fa9e4066Sahrens zvol_state_t *zv;
239fa9e4066Sahrens
240c99e4bdcSChris Kirby ASSERT(MUTEX_HELD(&zfsdev_state_lock));
241fa9e4066Sahrens
242c99e4bdcSChris Kirby for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) {
243c99e4bdcSChris Kirby zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
244fa9e4066Sahrens if (zv == NULL)
245fa9e4066Sahrens continue;
246fa9e4066Sahrens if (strcmp(zv->zv_name, name) == 0)
247f80ce222SChris Kirby return (zv);
248fa9e4066Sahrens }
249fa9e4066Sahrens
250f80ce222SChris Kirby return (NULL);
251fa9e4066Sahrens }
252fa9e4066Sahrens
253e7cbe64fSgw /* extent mapping arg */
254e7cbe64fSgw struct maparg {
25588b7b0f2SMatthew Ahrens zvol_state_t *ma_zv;
25688b7b0f2SMatthew Ahrens uint64_t ma_blks;
257e7cbe64fSgw };
258e7cbe64fSgw
259e7cbe64fSgw /*ARGSUSED*/
260e7cbe64fSgw static int
zvol_map_block(spa_t * spa,zilog_t * zilog,const blkptr_t * bp,const zbookmark_phys_t * zb,const dnode_phys_t * dnp,void * arg)2611b912ec7SGeorge Wilson zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2627802d7bfSMatthew Ahrens const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
263e7cbe64fSgw {
26488b7b0f2SMatthew Ahrens struct maparg *ma = arg;
26588b7b0f2SMatthew Ahrens zvol_extent_t *ze;
26688b7b0f2SMatthew Ahrens int bs = ma->ma_zv->zv_volblocksize;
267e7cbe64fSgw
268a2cdcdd2SPaul Dagnelie if (bp == NULL || BP_IS_HOLE(bp) ||
26943466aaeSMax Grossman zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
27088b7b0f2SMatthew Ahrens return (0);
271e7cbe64fSgw
2725d7b4d43SMatthew Ahrens VERIFY(!BP_IS_EMBEDDED(bp));
2735d7b4d43SMatthew Ahrens
27488b7b0f2SMatthew Ahrens VERIFY3U(ma->ma_blks, ==, zb->zb_blkid);
27588b7b0f2SMatthew Ahrens ma->ma_blks++;
276e7cbe64fSgw
27788b7b0f2SMatthew Ahrens /* Abort immediately if we have encountered gang blocks */
27888b7b0f2SMatthew Ahrens if (BP_IS_GANG(bp))
279be6fd75aSMatthew Ahrens return (SET_ERROR(EFRAGS));
280e7cbe64fSgw
28188b7b0f2SMatthew Ahrens /*
28288b7b0f2SMatthew Ahrens * See if the block is at the end of the previous extent.
28388b7b0f2SMatthew Ahrens */
28488b7b0f2SMatthew Ahrens ze = list_tail(&ma->ma_zv->zv_extents);
28588b7b0f2SMatthew Ahrens if (ze &&
28688b7b0f2SMatthew Ahrens DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) &&
28788b7b0f2SMatthew Ahrens DVA_GET_OFFSET(BP_IDENTITY(bp)) ==
28888b7b0f2SMatthew Ahrens DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) {
28988b7b0f2SMatthew Ahrens ze->ze_nblks++;
29088b7b0f2SMatthew Ahrens return (0);
291e7cbe64fSgw }
292e7cbe64fSgw
29388b7b0f2SMatthew Ahrens dprintf_bp(bp, "%s", "next blkptr:");
294e7cbe64fSgw
29588b7b0f2SMatthew Ahrens /* start a new extent */
29688b7b0f2SMatthew Ahrens ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP);
29788b7b0f2SMatthew Ahrens ze->ze_dva = bp->blk_dva[0]; /* structure assignment */
29888b7b0f2SMatthew Ahrens ze->ze_nblks = 1;
29988b7b0f2SMatthew Ahrens list_insert_tail(&ma->ma_zv->zv_extents, ze);
30088b7b0f2SMatthew Ahrens return (0);
30188b7b0f2SMatthew Ahrens }
302e7cbe64fSgw
30388b7b0f2SMatthew Ahrens static void
zvol_free_extents(zvol_state_t * zv)30488b7b0f2SMatthew Ahrens zvol_free_extents(zvol_state_t *zv)
30588b7b0f2SMatthew Ahrens {
30688b7b0f2SMatthew Ahrens zvol_extent_t *ze;
307e7cbe64fSgw
30888b7b0f2SMatthew Ahrens while (ze = list_head(&zv->zv_extents)) {
30988b7b0f2SMatthew Ahrens list_remove(&zv->zv_extents, ze);
31088b7b0f2SMatthew Ahrens kmem_free(ze, sizeof (zvol_extent_t));
311e7cbe64fSgw }
31288b7b0f2SMatthew Ahrens }
313e7cbe64fSgw
31488b7b0f2SMatthew Ahrens static int
zvol_get_lbas(zvol_state_t * zv)31588b7b0f2SMatthew Ahrens zvol_get_lbas(zvol_state_t *zv)
31688b7b0f2SMatthew Ahrens {
3173adc9019SEric Taylor objset_t *os = zv->zv_objset;
31888b7b0f2SMatthew Ahrens struct maparg ma;
31988b7b0f2SMatthew Ahrens int err;
32088b7b0f2SMatthew Ahrens
32188b7b0f2SMatthew Ahrens ma.ma_zv = zv;
32288b7b0f2SMatthew Ahrens ma.ma_blks = 0;
32388b7b0f2SMatthew Ahrens zvol_free_extents(zv);
32488b7b0f2SMatthew Ahrens
3253adc9019SEric Taylor /* commit any in-flight changes before traversing the dataset */
3263adc9019SEric Taylor txg_wait_synced(dmu_objset_pool(os), 0);
3273adc9019SEric Taylor err = traverse_dataset(dmu_objset_ds(os), 0,
32888b7b0f2SMatthew Ahrens TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma);
32988b7b0f2SMatthew Ahrens if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) {
33088b7b0f2SMatthew Ahrens zvol_free_extents(zv);
33188b7b0f2SMatthew Ahrens return (err ? err : EIO);
332e7cbe64fSgw }
33388b7b0f2SMatthew Ahrens
334e7cbe64fSgw return (0);
335e7cbe64fSgw }
336e7cbe64fSgw
337ecd6cf80Smarks /* ARGSUSED */
338fa9e4066Sahrens void
zvol_create_cb(objset_t * os,void * arg,cred_t * cr,dmu_tx_t * tx)339ecd6cf80Smarks zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
340fa9e4066Sahrens {
341da6c28aaSamw zfs_creat_t *zct = arg;
342da6c28aaSamw nvlist_t *nvprops = zct->zct_props;
343fa9e4066Sahrens int error;
344e9dbad6fSeschrock uint64_t volblocksize, volsize;
345fa9e4066Sahrens
346ecd6cf80Smarks VERIFY(nvlist_lookup_uint64(nvprops,
347e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
348ecd6cf80Smarks if (nvlist_lookup_uint64(nvprops,
349e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
350e9dbad6fSeschrock volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
351e9dbad6fSeschrock
352e9dbad6fSeschrock /*
353e7cbe64fSgw * These properties must be removed from the list so the generic
354e9dbad6fSeschrock * property setting step won't apply to them.
355e9dbad6fSeschrock */
356ecd6cf80Smarks VERIFY(nvlist_remove_all(nvprops,
357e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
358ecd6cf80Smarks (void) nvlist_remove_all(nvprops,
359e9dbad6fSeschrock zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
360e9dbad6fSeschrock
361e9dbad6fSeschrock error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
362fa9e4066Sahrens DMU_OT_NONE, 0, tx);
363fa9e4066Sahrens ASSERT(error == 0);
364fa9e4066Sahrens
365fa9e4066Sahrens error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
366fa9e4066Sahrens DMU_OT_NONE, 0, tx);
367fa9e4066Sahrens ASSERT(error == 0);
368fa9e4066Sahrens
369e9dbad6fSeschrock error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
370fa9e4066Sahrens ASSERT(error == 0);
371fa9e4066Sahrens }
372fa9e4066Sahrens
373b77b9231SDan McDonald /*
374b77b9231SDan McDonald * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we
375b77b9231SDan McDonald * implement DKIOCFREE/free-long-range.
376b77b9231SDan McDonald */
377b77b9231SDan McDonald static int
zvol_replay_truncate(void * arg1,void * arg2,boolean_t byteswap)3783f7978d0SAlan Somers zvol_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
379b77b9231SDan McDonald {
3803f7978d0SAlan Somers zvol_state_t *zv = arg1;
3813f7978d0SAlan Somers lr_truncate_t *lr = arg2;
382b77b9231SDan McDonald uint64_t offset, length;
383b77b9231SDan McDonald
384b77b9231SDan McDonald if (byteswap)
385b77b9231SDan McDonald byteswap_uint64_array(lr, sizeof (*lr));
386b77b9231SDan McDonald
387b77b9231SDan McDonald offset = lr->lr_offset;
388b77b9231SDan McDonald length = lr->lr_length;
389b77b9231SDan McDonald
390b77b9231SDan McDonald return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
391b77b9231SDan McDonald }
392b77b9231SDan McDonald
39322ac5be4Sperrin /*
39422ac5be4Sperrin * Replay a TX_WRITE ZIL transaction that didn't get committed
39522ac5be4Sperrin * after a system failure
39622ac5be4Sperrin */
397eb633035STom Caputi /* ARGSUSED */
39822ac5be4Sperrin static int
zvol_replay_write(void * arg1,void * arg2,boolean_t byteswap)3993f7978d0SAlan Somers zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap)
40022ac5be4Sperrin {
4013f7978d0SAlan Somers zvol_state_t *zv = arg1;
4023f7978d0SAlan Somers lr_write_t *lr = arg2;
40322ac5be4Sperrin objset_t *os = zv->zv_objset;
40422ac5be4Sperrin char *data = (char *)(lr + 1); /* data follows lr_write_t */
405b24ab676SJeff Bonwick uint64_t offset, length;
40622ac5be4Sperrin dmu_tx_t *tx;
40722ac5be4Sperrin int error;
40822ac5be4Sperrin
40922ac5be4Sperrin if (byteswap)
41022ac5be4Sperrin byteswap_uint64_array(lr, sizeof (*lr));
41122ac5be4Sperrin
412b24ab676SJeff Bonwick offset = lr->lr_offset;
413b24ab676SJeff Bonwick length = lr->lr_length;
414b24ab676SJeff Bonwick
415b24ab676SJeff Bonwick /* If it's a dmu_sync() block, write the whole block */
416b24ab676SJeff Bonwick if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
417b24ab676SJeff Bonwick uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
418b24ab676SJeff Bonwick if (length < blocksize) {
419b24ab676SJeff Bonwick offset -= offset % blocksize;
420b24ab676SJeff Bonwick length = blocksize;
421b24ab676SJeff Bonwick }
422b24ab676SJeff Bonwick }
423975c32a0SNeil Perrin
42422ac5be4Sperrin tx = dmu_tx_create(os);
425b24ab676SJeff Bonwick dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length);
4261209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_WAIT);
42722ac5be4Sperrin if (error) {
42822ac5be4Sperrin dmu_tx_abort(tx);
42922ac5be4Sperrin } else {
430b24ab676SJeff Bonwick dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
43122ac5be4Sperrin dmu_tx_commit(tx);
43222ac5be4Sperrin }
43322ac5be4Sperrin
43422ac5be4Sperrin return (error);
43522ac5be4Sperrin }
43622ac5be4Sperrin
43722ac5be4Sperrin /* ARGSUSED */
43822ac5be4Sperrin static int
zvol_replay_err(void * arg1,void * arg2,boolean_t byteswap)4393f7978d0SAlan Somers zvol_replay_err(void *arg1, void *arg2, boolean_t byteswap)
44022ac5be4Sperrin {
441be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTSUP));
44222ac5be4Sperrin }
44322ac5be4Sperrin
44422ac5be4Sperrin /*
44522ac5be4Sperrin * Callback vectors for replaying records.
446b77b9231SDan McDonald * Only TX_WRITE and TX_TRUNCATE are needed for zvol.
44722ac5be4Sperrin */
44822ac5be4Sperrin zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
44922ac5be4Sperrin zvol_replay_err, /* 0 no such transaction type */
45022ac5be4Sperrin zvol_replay_err, /* TX_CREATE */
45122ac5be4Sperrin zvol_replay_err, /* TX_MKDIR */
45222ac5be4Sperrin zvol_replay_err, /* TX_MKXATTR */
45322ac5be4Sperrin zvol_replay_err, /* TX_SYMLINK */
45422ac5be4Sperrin zvol_replay_err, /* TX_REMOVE */
45522ac5be4Sperrin zvol_replay_err, /* TX_RMDIR */
45622ac5be4Sperrin zvol_replay_err, /* TX_LINK */
45722ac5be4Sperrin zvol_replay_err, /* TX_RENAME */
45822ac5be4Sperrin zvol_replay_write, /* TX_WRITE */
459b77b9231SDan McDonald zvol_replay_truncate, /* TX_TRUNCATE */
46022ac5be4Sperrin zvol_replay_err, /* TX_SETATTR */
46122ac5be4Sperrin zvol_replay_err, /* TX_ACL */
462975c32a0SNeil Perrin zvol_replay_err, /* TX_CREATE_ACL */
463975c32a0SNeil Perrin zvol_replay_err, /* TX_CREATE_ATTR */
464975c32a0SNeil Perrin zvol_replay_err, /* TX_CREATE_ACL_ATTR */
465975c32a0SNeil Perrin zvol_replay_err, /* TX_MKDIR_ACL */
466975c32a0SNeil Perrin zvol_replay_err, /* TX_MKDIR_ATTR */
467975c32a0SNeil Perrin zvol_replay_err, /* TX_MKDIR_ACL_ATTR */
468975c32a0SNeil Perrin zvol_replay_err, /* TX_WRITE2 */
46922ac5be4Sperrin };
47022ac5be4Sperrin
471681d9761SEric Taylor int
zvol_name2minor(const char * name,minor_t * minor)472681d9761SEric Taylor zvol_name2minor(const char *name, minor_t *minor)
473681d9761SEric Taylor {
474681d9761SEric Taylor zvol_state_t *zv;
475681d9761SEric Taylor
476c99e4bdcSChris Kirby mutex_enter(&zfsdev_state_lock);
477681d9761SEric Taylor zv = zvol_minor_lookup(name);
478681d9761SEric Taylor if (minor && zv)
479681d9761SEric Taylor *minor = zv->zv_minor;
480c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock);
481681d9761SEric Taylor return (zv ? 0 : -1);
482681d9761SEric Taylor }
483681d9761SEric Taylor
484e7cbe64fSgw /*
485e7cbe64fSgw * Create a minor node (plus a whole lot more) for the specified volume.
486fa9e4066Sahrens */
487fa9e4066Sahrens int
zvol_create_minor(const char * name)488681d9761SEric Taylor zvol_create_minor(const char *name)
489fa9e4066Sahrens {
490c99e4bdcSChris Kirby zfs_soft_state_t *zs;
491fa9e4066Sahrens zvol_state_t *zv;
492fa9e4066Sahrens objset_t *os;
49367bd71c6Sperrin dmu_object_info_t doi;
494fa9e4066Sahrens minor_t minor = 0;
495fa9e4066Sahrens char chrbuf[30], blkbuf[30];
496fa9e4066Sahrens int error;
497fa9e4066Sahrens
498c99e4bdcSChris Kirby mutex_enter(&zfsdev_state_lock);
499fa9e4066Sahrens
5001195e687SMark J Musante if (zvol_minor_lookup(name) != NULL) {
501c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock);
502be6fd75aSMatthew Ahrens return (SET_ERROR(EEXIST));
503fa9e4066Sahrens }
504fa9e4066Sahrens
505503ad85cSMatthew Ahrens /* lie and say we're read-only */
506eb633035STom Caputi error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
507fa9e4066Sahrens
508fa9e4066Sahrens if (error) {
509c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock);
510fa9e4066Sahrens return (error);
511fa9e4066Sahrens }
512fa9e4066Sahrens
513c99e4bdcSChris Kirby if ((minor = zfsdev_minor_alloc()) == 0) {
514eb633035STom Caputi dmu_objset_disown(os, 1, FTAG);
515c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock);
516be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO));
517fa9e4066Sahrens }
518fa9e4066Sahrens
519c99e4bdcSChris Kirby if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) {
520eb633035STom Caputi dmu_objset_disown(os, 1, FTAG);
521c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock);
522be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN));
523fa9e4066Sahrens }
524e9dbad6fSeschrock (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME,
525e9dbad6fSeschrock (char *)name);
526fa9e4066Sahrens
527681d9761SEric Taylor (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor);
528fa9e4066Sahrens
529fa9e4066Sahrens if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
530fa9e4066Sahrens minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
531c99e4bdcSChris Kirby ddi_soft_state_free(zfsdev_state, minor);
532eb633035STom Caputi dmu_objset_disown(os, 1, FTAG);
533c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock);
534be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN));
535fa9e4066Sahrens }
536fa9e4066Sahrens
537681d9761SEric Taylor (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor);
538fa9e4066Sahrens
539fa9e4066Sahrens if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK,
540fa9e4066Sahrens minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
541fa9e4066Sahrens ddi_remove_minor_node(zfs_dip, chrbuf);
542c99e4bdcSChris Kirby ddi_soft_state_free(zfsdev_state, minor);
543eb633035STom Caputi dmu_objset_disown(os, 1, FTAG);
544c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock);
545be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN));
546fa9e4066Sahrens }
547fa9e4066Sahrens
548c99e4bdcSChris Kirby zs = ddi_get_soft_state(zfsdev_state, minor);
549c99e4bdcSChris Kirby zs->zss_type = ZSST_ZVOL;
550c99e4bdcSChris Kirby zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
551681d9761SEric Taylor (void) strlcpy(zv->zv_name, name, MAXPATHLEN);
552fa9e4066Sahrens zv->zv_min_bs = DEV_BSHIFT;
553fa9e4066Sahrens zv->zv_minor = minor;
554fa9e4066Sahrens zv->zv_objset = os;
555f9af39baSGeorge Wilson if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os)))
556681d9761SEric Taylor zv->zv_flags |= ZVOL_RDONLY;
55779315247SMatthew Ahrens rangelock_init(&zv->zv_rangelock, NULL, NULL);
55888b7b0f2SMatthew Ahrens list_create(&zv->zv_extents, sizeof (zvol_extent_t),
55988b7b0f2SMatthew Ahrens offsetof(zvol_extent_t, ze_node));
56067bd71c6Sperrin /* get and cache the blocksize */
56167bd71c6Sperrin error = dmu_object_info(os, ZVOL_OBJ, &doi);
56267bd71c6Sperrin ASSERT(error == 0);
56367bd71c6Sperrin zv->zv_volblocksize = doi.doi_data_block_size;
56422ac5be4Sperrin
565f9af39baSGeorge Wilson if (spa_writeable(dmu_objset_spa(os))) {
566f9af39baSGeorge Wilson if (zil_replay_disable)
567f9af39baSGeorge Wilson zil_destroy(dmu_objset_zil(os), B_FALSE);
568f9af39baSGeorge Wilson else
569f9af39baSGeorge Wilson zil_replay(os, zv, zvol_replay_vector);
570f9af39baSGeorge Wilson }
571eb633035STom Caputi dmu_objset_disown(os, 1, FTAG);
572681d9761SEric Taylor zv->zv_objset = NULL;
573fa9e4066Sahrens
574fa9e4066Sahrens zvol_minors++;
575fa9e4066Sahrens
576c99e4bdcSChris Kirby mutex_exit(&zfsdev_state_lock);
577fa9e4066Sahrens
578fa9e4066Sahrens return (0);
579fa9e4066Sahrens }
580fa9e4066Sahrens
581fa9e4066Sahrens /*
582fa9e4066Sahrens * Remove minor node for the specified volume.
583fa9e4066Sahrens */
584681d9761SEric Taylor static int
zvol_remove_zv(zvol_state_t * zv)585681d9761SEric Taylor zvol_remove_zv(zvol_state_t *zv)
586681d9761SEric Taylor {
587681d9761SEric Taylor char nmbuf[20];
588c99e4bdcSChris Kirby minor_t minor = zv->zv_minor;
589681d9761SEric Taylor
590c99e4bdcSChris Kirby ASSERT(MUTEX_HELD(&zfsdev_state_lock));
591681d9761SEric Taylor if (zv->zv_total_opens != 0)
592be6fd75aSMatthew Ahrens return (SET_ERROR(EBUSY));
593681d9761SEric Taylor
594c99e4bdcSChris Kirby (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor);
595681d9761SEric Taylor ddi_remove_minor_node(zfs_dip, nmbuf);
596681d9761SEric Taylor
597c99e4bdcSChris Kirby (void) snprintf(nmbuf, sizeof (nmbuf), "%u", minor);
598681d9761SEric Taylor ddi_remove_minor_node(zfs_dip, nmbuf);
599681d9761SEric Taylor
60079315247SMatthew Ahrens rangelock_fini(&zv->zv_rangelock);
601681d9761SEric Taylor
602c99e4bdcSChris Kirby kmem_free(zv, sizeof (zvol_state_t));
603c99e4bdcSChris Kirby
604c99e4bdcSChris Kirby ddi_soft_state_free(zfsdev_state, minor);
605681d9761SEric Taylor
606