xref: /illumos-gate/usr/src/uts/common/fs/zfs/zvol.c (revision 7b421453)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
22f80ce222SChris Kirby  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23b77b9231SDan McDonald  *
24b77b9231SDan McDonald  * Portions Copyright 2010 Robert Milkowski
25b77b9231SDan McDonald  *
26047c81d3SSaso Kiselkov  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
2725df42a1SMatthew Ahrens  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
28c3d26abcSMatthew Ahrens  * Copyright (c) 2014 Integros [integros.com]
29455e370cSJohn Levon  * Copyright 2019 Joyent, Inc.
30fa9e4066Sahrens  */
32fa9e4066Sahrens /*
33fa9e4066Sahrens  * ZFS volume emulation driver.
34fa9e4066Sahrens  *
35fa9e4066Sahrens  * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
36fa9e4066Sahrens  * Volumes are accessed through the symbolic links named:
37fa9e4066Sahrens  *
38fa9e4066Sahrens  * /dev/zvol/dsk/<pool_name>/<dataset_name>
39fa9e4066Sahrens  * /dev/zvol/rdsk/<pool_name>/<dataset_name>
40fa9e4066Sahrens  *
41681d9761SEric Taylor  * These links are created by the /dev filesystem (sdev_zvolops.c).
42fa9e4066Sahrens  * Volumes are persistent through reboot.  No user command needs to be
43fa9e4066Sahrens  * run before opening and using a device.
44fa9e4066Sahrens  */
46fa9e4066Sahrens #include <sys/types.h>
47fa9e4066Sahrens #include <sys/param.h>
48fa9e4066Sahrens #include <sys/errno.h>
49fa9e4066Sahrens #include <sys/uio.h>
50fa9e4066Sahrens #include <sys/buf.h>
51fa9e4066Sahrens #include <sys/modctl.h>
52fa9e4066Sahrens #include <sys/open.h>
53fa9e4066Sahrens #include <sys/kmem.h>
54fa9e4066Sahrens #include <sys/conf.h>
55fa9e4066Sahrens #include <sys/cmn_err.h>
56fa9e4066Sahrens #include <sys/stat.h>
57fa9e4066Sahrens #include <sys/zap.h>
58fa9e4066Sahrens #include <sys/spa.h>
59810e43b2SBill Pijewski #include <sys/spa_impl.h>
60fa9e4066Sahrens #include <sys/zio.h>
61e7cbe64fSgw #include <sys/dmu_traverse.h>
62e7cbe64fSgw #include <sys/dnode.h>
63e7cbe64fSgw #include <sys/dsl_dataset.h>
64fa9e4066Sahrens #include <sys/dsl_prop.h>
65fa9e4066Sahrens #include <sys/dkio.h>
66fa9e4066Sahrens #include <sys/efi_partition.h>
67fa9e4066Sahrens #include <sys/byteorder.h>
68fa9e4066Sahrens #include <sys/pathname.h>
69fa9e4066Sahrens #include <sys/ddi.h>
70fa9e4066Sahrens #include <sys/sunddi.h>
71fa9e4066Sahrens #include <sys/crc32.h>
72fa9e4066Sahrens #include <sys/dirent.h>
73fa9e4066Sahrens #include <sys/policy.h>
74fa9e4066Sahrens #include <sys/fs/zfs.h>
75fa9e4066Sahrens #include <sys/zfs_ioctl.h>
76fa9e4066Sahrens #include <sys/mkdev.h>
7722ac5be4Sperrin #include <sys/zil.h>
78c5c6ffa0Smaybee #include <sys/refcount.h>
79c2e6a7d6Sperrin #include <sys/zfs_znode.h>
80c2e6a7d6Sperrin #include <sys/zfs_rlock.h>
81e7cbe64fSgw #include <sys/vdev_impl.h>
82e7cbe64fSgw #include <sys/zvol.h>
83e7cbe64fSgw #include <sys/dumphdr.h>
841209a471SNeil Perrin #include <sys/zil_impl.h>
8580901aeaSGeorge Wilson #include <sys/dbuf.h>
86810e43b2SBill Pijewski #include <sys/dmu_tx.h>
87810e43b2SBill Pijewski #include <sys/zfeature.h>
88810e43b2SBill Pijewski #include <sys/zio_checksum.h>
891271e4b1SPrakash Surya #include <sys/zil_impl.h>
90c3377ee9SJohn Levon #include <sys/smt.h>
91047c81d3SSaso Kiselkov #include <sys/dkioc_free_util.h>
9279315247SMatthew Ahrens #include <sys/zfs_rlock.h>
94fa9e4066Sahrens #include "zfs_namecheck.h"
96c99e4bdcSChris Kirby void *zfsdev_state;
97503ad85cSMatthew Ahrens static char *zvol_tag = "zvol_tag";
99e7cbe64fSgw #define	ZVOL_DUMPSIZE		"dumpsize"
101fa9e4066Sahrens /*
102c99e4bdcSChris Kirby  * This lock protects the zfsdev_state structure from being modified
103fa9e4066Sahrens  * while it's being used, e.g. an open that comes in before a create
104fa9e4066Sahrens  * finishes.  It also protects temporary opens of the dataset so that,
105fa9e4066Sahrens  * e.g., an open doesn't get a spurious EBUSY.
106fa9e4066Sahrens  */
107c99e4bdcSChris Kirby kmutex_t zfsdev_state_lock;
108fa9e4066Sahrens static uint32_t zvol_minors;
110e7cbe64fSgw typedef struct zvol_extent {
11188b7b0f2SMatthew Ahrens 	list_node_t	ze_node;
112e7cbe64fSgw 	dva_t		ze_dva;		/* dva associated with this extent */
11388b7b0f2SMatthew Ahrens 	uint64_t	ze_nblks;	/* number of blocks in extent */
114e7cbe64fSgw } zvol_extent_t;
116fa9e4066Sahrens /*
117fa9e4066Sahrens  * The in-core state of each volume.
118fa9e4066Sahrens  */
119fa9e4066Sahrens typedef struct zvol_state {
120fa9e4066Sahrens 	char		zv_name[MAXPATHLEN]; /* pool/dd name */
121fa9e4066Sahrens 	uint64_t	zv_volsize;	/* amount of space we advertise */
12267bd71c6Sperrin 	uint64_t	zv_volblocksize; /* volume block size */
123fa9e4066Sahrens 	minor_t		zv_minor;	/* minor number */
124fa9e4066Sahrens 	uint8_t		zv_min_bs;	/* minimum addressable block shift */
125701f66c4SEric Taylor 	uint8_t		zv_flags;	/* readonly, dumpified, etc. */
126fa9e4066Sahrens 	objset_t	*zv_objset;	/* objset handle */
127fa9e4066Sahrens 	uint32_t	zv_open_count[OTYPCNT];	/* open counts */
128fa9e4066Sahrens 	uint32_t	zv_total_opens;	/* total open count */
12922ac5be4Sperrin 	zilog_t		*zv_zilog;	/* ZIL handle */
13088b7b0f2SMatthew Ahrens 	list_t		zv_extents;	/* List of extents for dump */
13179315247SMatthew Ahrens 	rangelock_t	zv_rangelock;
1328dfe5547SRichard Yao 	dnode_t		*zv_dn;		/* dnode hold */
133fa9e4066Sahrens } zvol_state_t;
135e7cbe64fSgw /*
136e7cbe64fSgw  * zvol specific flags
137e7cbe64fSgw  */
138e7cbe64fSgw #define	ZVOL_RDONLY	0x1
139e7cbe64fSgw #define	ZVOL_DUMPIFIED	0x2
140c7f714e2SEric Taylor #define	ZVOL_EXCL	0x4
141701f66c4SEric Taylor #define	ZVOL_WCE	0x8
14367bd71c6Sperrin /*
14467bd71c6Sperrin  * zvol maximum transfer in one DMU tx.
14567bd71c6Sperrin  */
14667bd71c6Sperrin int zvol_maxphys = DMU_MAX_ACCESS/2;
148893c83baSGeorge Wilson /*
149893c83baSGeorge Wilson  * Toggle unmap functionality.
150893c83baSGeorge Wilson  */
151893c83baSGeorge Wilson boolean_t zvol_unmap_enabled = B_TRUE;
152893c83baSGeorge Wilson 
1531c9272b8SStephen Blinick /*
1541c9272b8SStephen Blinick  * If true, unmaps requested as synchronous are executed synchronously,
1551c9272b8SStephen Blinick  * otherwise all unmaps are asynchronous.
1561c9272b8SStephen Blinick  */
1571c9272b8SStephen Blinick boolean_t zvol_unmap_sync_enabled = B_FALSE;
1581c9272b8SStephen Blinick 
15992241e0bSTom Erickson extern int zfs_set_prop_nvlist(const char *, zprop_source_t,
1604445fffbSMatthew Ahrens     nvlist_t *, nvlist_t *);
161681d9761SEric Taylor static int zvol_remove_zv(zvol_state_t *);
1621271e4b1SPrakash Surya static int zvol_get_data(void *arg, lr_write_t *lr, char *buf,
1631271e4b1SPrakash Surya     struct lwb *lwb, zio_t *zio);
164e7cbe64fSgw static int zvol_dumpify(zvol_state_t *zv);
165e7cbe64fSgw static int zvol_dump_fini(zvol_state_t *zv);
166e7cbe64fSgw static int zvol_dump_init(zvol_state_t *zv, boolean_t resize);
168fa9e4066Sahrens static void
zvol_size_changed(zvol_state_t * zv,uint64_t volsize)169c61ea566SGeorge Wilson zvol_size_changed(zvol_state_t *zv, uint64_t volsize)
170fa9e4066Sahrens {
171c61ea566SGeorge Wilson 	dev_t dev = makedevice(ddi_driver_major(zfs_dip), zv->zv_minor);
173c61ea566SGeorge Wilson 	zv->zv_volsize = volsize;
174fa9e4066Sahrens 	VERIFY(ddi_prop_update_int64(dev, zfs_dip,
175681d9761SEric Taylor 	    "Size", volsize) == DDI_SUCCESS);
176fa9e4066Sahrens 	VERIFY(ddi_prop_update_int64(dev, zfs_dip,
177681d9761SEric Taylor 	    "Nblocks", lbtodb(volsize)) == DDI_SUCCESS);
179e7cbe64fSgw 	/* Notify specfs to invalidate the cached size */
180e7cbe64fSgw 	spec_size_invalidate(dev, VBLK);
181e7cbe64fSgw 	spec_size_invalidate(dev, VCHR);
182fa9e4066Sahrens }
184fa9e4066Sahrens int
zvol_check_volsize(uint64_t volsize,uint64_t blocksize)185e9dbad6fSeschrock zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
186fa9e4066Sahrens {
187e9dbad6fSeschrock 	if (volsize == 0)
188be6fd75aSMatthew Ahrens 		return (SET_ERROR(EINVAL));
190e9dbad6fSeschrock 	if (volsize % blocksize != 0)
191be6fd75aSMatthew Ahrens 		return (SET_ERROR(EINVAL));
193fa9e4066Sahrens #ifdef _ILP32
194e9dbad6fSeschrock 	if (volsize - 1 > SPEC_MAXOFFSET_T)
195be6fd75aSMatthew Ahrens 		return (SET_ERROR(EOVERFLOW));
196fa9e4066Sahrens #endif
197fa9e4066Sahrens 	return (0);
198fa9e4066Sahrens }
200fa9e4066Sahrens int
zvol_check_volblocksize(uint64_t volblocksize)201e9dbad6fSeschrock zvol_check_volblocksize(uint64_t volblocksize)
202fa9e4066Sahrens {
203e9dbad6fSeschrock 	if (volblocksize < SPA_MINBLOCKSIZE ||
204b5152584SMatthew Ahrens 	    volblocksize > SPA_OLD_MAXBLOCKSIZE ||
205e9dbad6fSeschrock 	    !ISP2(volblocksize))
206be6fd75aSMatthew Ahrens 		return (SET_ERROR(EDOM));
208fa9e4066Sahrens 	return (0);
209fa9e4066Sahrens }
211fa9e4066Sahrens int
zvol_get_stats(objset_t * os,nvlist_t * nv)212a2eea2e1Sahrens zvol_get_stats(objset_t *os, nvlist_t *nv)
213fa9e4066Sahrens {
214fa9e4066Sahrens 	int error;
215fa9e4066Sahrens 	dmu_object_info_t doi;
216a2eea2e1Sahrens 	uint64_t val;
218a2eea2e1Sahrens 	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
219fa9e4066Sahrens 	if (error)
220fa9e4066Sahrens 		return (error);
222a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
224fa9e4066Sahrens 	error = dmu_object_info(os, ZVOL_OBJ, &doi);
226a2eea2e1Sahrens 	if (error == 0) {
227a2eea2e1Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
228a2eea2e1Sahrens 		    doi.doi_data_block_size);
229a2eea2e1Sahrens 	}
231fa9e4066Sahrens 	return (error);
232fa9e4066Sahrens }
234fa9e4066Sahrens static zvol_state_t *
zvol_minor_lookup(const char * name)235e9dbad6fSeschrock zvol_minor_lookup(const char *name)
236fa9e4066Sahrens {
237fa9e4066Sahrens 	minor_t minor;
238fa9e4066Sahrens 	zvol_state_t *zv;
240c99e4bdcSChris Kirby 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
242c99e4bdcSChris Kirby 	for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) {
243c99e4bdcSChris Kirby 		zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
244fa9e4066Sahrens 		if (zv == NULL)
245fa9e4066Sahrens 			continue;
246fa9e4066Sahrens 		if (strcmp(zv->zv_name, name) == 0)
247f80ce222SChris Kirby 			return (zv);
248fa9e4066Sahrens 	}
250f80ce222SChris Kirby 	return (NULL);
251fa9e4066Sahrens }
253e7cbe64fSgw /* extent mapping arg */
254e7cbe64fSgw struct maparg {
25588b7b0f2SMatthew Ahrens 	zvol_state_t	*ma_zv;
25688b7b0f2SMatthew Ahrens 	uint64_t	ma_blks;
257e7cbe64fSgw };
259e7cbe64fSgw /*ARGSUSED*/
260e7cbe64fSgw static int
zvol_map_block(spa_t * spa,zilog_t * zilog,const blkptr_t * bp,const zbookmark_phys_t * zb,const dnode_phys_t * dnp,void * arg)2611b912ec7SGeorge Wilson zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2627802d7bfSMatthew Ahrens     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
263e7cbe64fSgw {
26488b7b0f2SMatthew Ahrens 	struct maparg *ma = arg;
26588b7b0f2SMatthew Ahrens 	zvol_extent_t *ze;
26688b7b0f2SMatthew Ahrens 	int bs = ma->ma_zv->zv_volblocksize;
268a2cdcdd2SPaul Dagnelie 	if (bp == NULL || BP_IS_HOLE(bp) ||
26943466aaeSMax Grossman 	    zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
27088b7b0f2SMatthew Ahrens 		return (0);
2725d7b4d43SMatthew Ahrens 	VERIFY(!BP_IS_EMBEDDED(bp));
2735d7b4d43SMatthew Ahrens 
27488b7b0f2SMatthew Ahrens 	VERIFY3U(ma->ma_blks, ==, zb->zb_blkid);
27588b7b0f2SMatthew Ahrens 	ma->ma_blks++;
27788b7b0f2SMatthew Ahrens 	/* Abort immediately if we have encountered gang blocks */
27888b7b0f2SMatthew Ahrens 	if (BP_IS_GANG(bp))
279be6fd75aSMatthew Ahrens 		return (SET_ERROR(EFRAGS));
28188b7b0f2SMatthew Ahrens 	/*
28288b7b0f2SMatthew Ahrens 	 * See if the block is at the end of the previous extent.
28388b7b0f2SMatthew Ahrens 	 */
28488b7b0f2SMatthew Ahrens 	ze = list_tail(&ma->ma_zv->zv_extents);
28588b7b0f2SMatthew Ahrens 	if (ze &&
28688b7b0f2SMatthew Ahrens 	    DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) &&
28788b7b0f2SMatthew Ahrens 	    DVA_GET_OFFSET(BP_IDENTITY(bp)) ==
28888b7b0f2SMatthew Ahrens 	    DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) {
28988b7b0f2SMatthew Ahrens 		ze->ze_nblks++;
29088b7b0f2SMatthew Ahrens 		return (0);
291e7cbe64fSgw 	}
29388b7b0f2SMatthew Ahrens 	dprintf_bp(bp, "%s", "next blkptr:");
29588b7b0f2SMatthew Ahrens 	/* start a new extent */
29688b7b0f2SMatthew Ahrens 	ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP);
29788b7b0f2SMatthew Ahrens 	ze->ze_dva = bp->blk_dva[0];	/* structure assignment */
29888b7b0f2SMatthew Ahrens 	ze->ze_nblks = 1;
29988b7b0f2SMatthew Ahrens 	list_insert_tail(&ma->ma_zv->zv_extents, ze);
30088b7b0f2SMatthew Ahrens 	return (0);
30188b7b0f2SMatthew Ahrens }
30388b7b0f2SMatthew Ahrens static void
zvol_free_extents(zvol_state_t * zv)30488b7b0f2SMatthew Ahrens zvol_free_extents(zvol_state_t *zv)
30588b7b0f2SMatthew Ahrens {
30688b7b0f2SMatthew Ahrens 	zvol_extent_t *ze;
30888b7b0f2SMatthew Ahrens 	while (ze = list_head(&zv->zv_extents)) {
30988b7b0f2SMatthew Ahrens 		list_remove(&zv->zv_extents, ze);
31088b7b0f2SMatthew Ahrens 		kmem_free(ze, sizeof (zvol_extent_t));
311e7cbe64fSgw 	}
31288b7b0f2SMatthew Ahrens }
31488b7b0f2SMatthew Ahrens static int
zvol_get_lbas(zvol_state_t * zv)31588b7b0f2SMatthew Ahrens zvol_get_lbas(zvol_state_t *zv)
31688b7b0f2SMatthew Ahrens {
3173adc9019SEric Taylor 	objset_t *os = zv->zv_objset;
31888b7b0f2SMatthew Ahrens 	struct maparg	ma;
31988b7b0f2SMatthew Ahrens 	int		err;
32088b7b0f2SMatthew Ahrens 
32188b7b0f2SMatthew Ahrens 	ma.ma_zv = zv;
32288b7b0f2SMatthew Ahrens 	ma.ma_blks = 0;
32388b7b0f2SMatthew Ahrens 	zvol_free_extents(zv);
32488b7b0f2SMatthew Ahrens 
3253adc9019SEric Taylor 	/* commit any in-flight changes before traversing the dataset */
3263adc9019SEric Taylor 	txg_wait_synced(dmu_objset_pool(os), 0);
3273adc9019SEric Taylor 	err = traverse_dataset(dmu_objset_ds(os), 0,
32888b7b0f2SMatthew Ahrens 	    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma);
32988b7b0f2SMatthew Ahrens 	if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) {
33088b7b0f2SMatthew Ahrens 		zvol_free_extents(zv);
33188b7b0f2SMatthew Ahrens 		return (err ? err : EIO);
332e7cbe64fSgw 	}
33388b7b0f2SMatthew Ahrens 
334e7cbe64fSgw 	return (0);
335e7cbe64fSgw }
337ecd6cf80Smarks /* ARGSUSED */
338fa9e4066Sahrens void
zvol_create_cb(objset_t * os,void * arg,cred_t * cr,dmu_tx_t * tx)339ecd6cf80Smarks zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
340fa9e4066Sahrens {
341da6c28aaSamw 	zfs_creat_t *zct = arg;
342da6c28aaSamw 	nvlist_t *nvprops = zct->zct_props;
343fa9e4066Sahrens 	int error;
344e9dbad6fSeschrock 	uint64_t volblocksize, volsize;
346ecd6cf80Smarks 	VERIFY(nvlist_lookup_uint64(nvprops,
347e9dbad6fSeschrock 	    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
348ecd6cf80Smarks 	if (nvlist_lookup_uint64(nvprops,
349e9dbad6fSeschrock 	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
350e9dbad6fSeschrock 		volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
352e9dbad6fSeschrock 	/*
353e7cbe64fSgw 	 * These properties must be removed from the list so the generic
354e9dbad6fSeschrock 	 * property setting step won't apply to them.
355e9dbad6fSeschrock 	 */
356ecd6cf80Smarks 	VERIFY(nvlist_remove_all(nvprops,
357e9dbad6fSeschrock 	    zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
358ecd6cf80Smarks 	(void) nvlist_remove_all(nvprops,
359e9dbad6fSeschrock 	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
361e9dbad6fSeschrock 	error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
362fa9e4066Sahrens 	    DMU_OT_NONE, 0, tx);
363fa9e4066Sahrens 	ASSERT(error == 0);
365fa9e4066Sahrens 	error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
366fa9e4066Sahrens 	    DMU_OT_NONE, 0, tx);
367fa9e4066Sahrens 	ASSERT(error == 0);
369e9dbad6fSeschrock 	error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
370fa9e4066Sahrens 	ASSERT(error == 0);
371fa9e4066Sahrens }
373b77b9231SDan McDonald /*
374b77b9231SDan McDonald  * Replay a TX_TRUNCATE ZIL transaction if asked.  TX_TRUNCATE is how we
375b77b9231SDan McDonald  * implement DKIOCFREE/free-long-range.
376b77b9231SDan McDonald  */
377b77b9231SDan McDonald static int
zvol_replay_truncate(void * arg1,void * arg2,boolean_t byteswap)3783f7978d0SAlan Somers zvol_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
379b77b9231SDan McDonald {
3803f7978d0SAlan Somers 	zvol_state_t *zv = arg1;
3813f7978d0SAlan Somers 	lr_truncate_t *lr = arg2;
382b77b9231SDan McDonald 	uint64_t offset, length;
383b77b9231SDan McDonald 
384b77b9231SDan McDonald 	if (byteswap)
385b77b9231SDan McDonald 		byteswap_uint64_array(lr, sizeof (*lr));
386b77b9231SDan McDonald 
387b77b9231SDan McDonald 	offset = lr->lr_offset;
388b77b9231SDan McDonald 	length = lr->lr_length;
389b77b9231SDan McDonald 
390b77b9231SDan McDonald 	return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
391b77b9231SDan McDonald }
392b77b9231SDan McDonald 
39322ac5be4Sperrin /*
39422ac5be4Sperrin  * Replay a TX_WRITE ZIL transaction that didn't get committed
39522ac5be4Sperrin  * after a system failure
39622ac5be4Sperrin  */
397eb633035STom Caputi /* ARGSUSED */
39822ac5be4Sperrin static int
zvol_replay_write(void * arg1,void * arg2,boolean_t byteswap)3993f7978d0SAlan Somers zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap)
40022ac5be4Sperrin {
4013f7978d0SAlan Somers 	zvol_state_t *zv = arg1;
4023f7978d0SAlan Somers 	lr_write_t *lr = arg2;
40322ac5be4Sperrin 	objset_t *os = zv->zv_objset;
40422ac5be4Sperrin 	char *data = (char *)(lr + 1);	/* data follows lr_write_t */
405b24ab676SJeff Bonwick 	uint64_t offset, length;
40622ac5be4Sperrin 	dmu_tx_t *tx;
40722ac5be4Sperrin 	int error;
40922ac5be4Sperrin 	if (byteswap)
41022ac5be4Sperrin 		byteswap_uint64_array(lr, sizeof (*lr));
412b24ab676SJeff Bonwick 	offset = lr->lr_offset;
413b24ab676SJeff Bonwick 	length = lr->lr_length;
414b24ab676SJeff Bonwick 
415b24ab676SJeff Bonwick 	/* If it's a dmu_sync() block, write the whole block */
416b24ab676SJeff Bonwick 	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
417b24ab676SJeff Bonwick 		uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
418b24ab676SJeff Bonwick 		if (length < blocksize) {
419b24ab676SJeff Bonwick 			offset -= offset % blocksize;
420b24ab676SJeff Bonwick 			length = blocksize;
421b24ab676SJeff Bonwick 		}
422b24ab676SJeff Bonwick 	}
423975c32a0SNeil Perrin 
42422ac5be4Sperrin 	tx = dmu_tx_create(os);
425b24ab676SJeff Bonwick 	dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length);
4261209a471SNeil Perrin 	error = dmu_tx_assign(tx, TXG_WAIT);
42722ac5be4Sperrin 	if (error) {
42822ac5be4Sperrin 		dmu_tx_abort(tx);
42922ac5be4Sperrin 	} else {
430b24ab676SJeff Bonwick 		dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
43122ac5be4Sperrin 		dmu_tx_commit(tx);
43222ac5be4Sperrin 	}
43422ac5be4Sperrin 	return (error);
43522ac5be4Sperrin }
43722ac5be4Sperrin /* ARGSUSED */
43822ac5be4Sperrin static int
zvol_replay_err(void * arg1,void * arg2,boolean_t byteswap)4393f7978d0SAlan Somers zvol_replay_err(void *arg1, void *arg2, boolean_t byteswap)
44022ac5be4Sperrin {
441be6fd75aSMatthew Ahrens 	return (SET_ERROR(ENOTSUP));
44222ac5be4Sperrin }
44422ac5be4Sperrin /*
44522ac5be4Sperrin  * Callback vectors for replaying records.
446b77b9231SDan McDonald  * Only TX_WRITE and TX_TRUNCATE are needed for zvol.
44722ac5be4Sperrin  */
44822ac5be4Sperrin zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
44922ac5be4Sperrin 	zvol_replay_err,	/* 0 no such transaction type */
45022ac5be4Sperrin 	zvol_replay_err,	/* TX_CREATE */
45122ac5be4Sperrin 	zvol_replay_err,	/* TX_MKDIR */
45222ac5be4Sperrin 	zvol_replay_err,	/* TX_MKXATTR */
45322ac5be4Sperrin 	zvol_replay_err,	/* TX_SYMLINK */
45422ac5be4Sperrin 	zvol_replay_err,	/* TX_REMOVE */
45522ac5be4Sperrin 	zvol_replay_err,	/* TX_RMDIR */
45622ac5be4Sperrin 	zvol_replay_err,	/* TX_LINK */
45722ac5be4Sperrin 	zvol_replay_err,	/* TX_RENAME */
45822ac5be4Sperrin 	zvol_replay_write,	/* TX_WRITE */
459b77b9231SDan McDonald 	zvol_replay_truncate,	/* TX_TRUNCATE */
46022ac5be4Sperrin 	zvol_replay_err,	/* TX_SETATTR */
46122ac5be4Sperrin 	zvol_replay_err,	/* TX_ACL */
462975c32a0SNeil Perrin 	zvol_replay_err,	/* TX_CREATE_ACL */
463975c32a0SNeil Perrin 	zvol_replay_err,	/* TX_CREATE_ATTR */
464975c32a0SNeil Perrin 	zvol_replay_err,	/* TX_CREATE_ACL_ATTR */
465975c32a0SNeil Perrin 	zvol_replay_err,	/* TX_MKDIR_ACL */
466975c32a0SNeil Perrin 	zvol_replay_err,	/* TX_MKDIR_ATTR */
467975c32a0SNeil Perrin 	zvol_replay_err,	/* TX_MKDIR_ACL_ATTR */
468975c32a0SNeil Perrin 	zvol_replay_err,	/* TX_WRITE2 */
46922ac5be4Sperrin };
471681d9761SEric Taylor int
zvol_name2minor(const char * name,minor_t * minor)472681d9761SEric Taylor zvol_name2minor(const char *name, minor_t *minor)
473681d9761SEric Taylor {
474681d9761SEric Taylor 	zvol_state_t *zv;
475681d9761SEric Taylor 
476c99e4bdcSChris Kirby 	mutex_enter(&zfsdev_state_lock);
477681d9761SEric Taylor 	zv = zvol_minor_lookup(name);
478681d9761SEric Taylor 	if (minor && zv)
479681d9761SEric Taylor 		*minor = zv->zv_minor;
480c99e4bdcSChris Kirby 	mutex_exit(&zfsdev_state_lock);
481681d9761SEric Taylor 	return (zv ? 0 : -1);
482681d9761SEric Taylor }
483681d9761SEric Taylor 
484e7cbe64fSgw /*
485e7cbe64fSgw  * Create a minor node (plus a whole lot more) for the specified volume.
486fa9e4066Sahrens  */
487fa9e4066Sahrens int
zvol_create_minor(const char * name)488681d9761SEric Taylor zvol_create_minor(const char *name)
489fa9e4066Sahrens {
490c99e4bdcSChris Kirby 	zfs_soft_state_t *zs;
491fa9e4066Sahrens 	zvol_state_t *zv;
492fa9e4066Sahrens 	objset_t *os;
49367bd71c6Sperrin 	dmu_object_info_t doi;
494fa9e4066Sahrens 	minor_t minor = 0;
495fa9e4066Sahrens 	char chrbuf[30], blkbuf[30];
496fa9e4066Sahrens 	int error;
498c99e4bdcSChris Kirby 	mutex_enter(&zfsdev_state_lock);
5001195e687SMark J Musante 	if (zvol_minor_lookup(name) != NULL) {
501c99e4bdcSChris Kirby 		mutex_exit(&zfsdev_state_lock);
502be6fd75aSMatthew Ahrens 		return (SET_ERROR(EEXIST));
503fa9e4066Sahrens 	}
505503ad85cSMatthew Ahrens 	/* lie and say we're read-only */
506eb633035STom Caputi 	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
508fa9e4066Sahrens 	if (error) {
509c99e4bdcSChris Kirby 		mutex_exit(&zfsdev_state_lock);
510fa9e4066Sahrens 		return (error);
511fa9e4066Sahrens 	}
513c99e4bdcSChris Kirby 	if ((minor = zfsdev_minor_alloc()) == 0) {
514eb633035STom Caputi 		dmu_objset_disown(os, 1, FTAG);
515c99e4bdcSChris Kirby 		mutex_exit(&zfsdev_state_lock);
516be6fd75aSMatthew Ahrens 		return (SET_ERROR(ENXIO));
517fa9e4066Sahrens 	}
519c99e4bdcSChris Kirby 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) {
520eb633035STom Caputi 		dmu_objset_disown(os, 1, FTAG);
521c99e4bdcSChris Kirby 		mutex_exit(&zfsdev_state_lock);
522be6fd75aSMatthew Ahrens 		return (SET_ERROR(EAGAIN));
523fa9e4066Sahrens 	}
524e9dbad6fSeschrock 	(void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME,
525e9dbad6fSeschrock 	    (char *)name);
527681d9761SEric Taylor 	(void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor);
529fa9e4066Sahrens 	if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
530fa9e4066Sahrens 	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
531c99e4bdcSChris Kirby 		ddi_soft_state_free(zfsdev_state, minor);
532eb633035STom Caputi 		dmu_objset_disown(os, 1, FTAG);
533c99e4bdcSChris Kirby 		mutex_exit(&zfsdev_state_lock);
534be6fd75aSMatthew Ahrens 		return (SET_ERROR(EAGAIN));
535fa9e4066Sahrens 	}
537681d9761SEric Taylor 	(void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor);
539fa9e4066Sahrens 	if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK,
540fa9e4066Sahrens 	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
541fa9e4066Sahrens 		ddi_remove_minor_node(zfs_dip, chrbuf);
542c99e4bdcSChris Kirby 		ddi_soft_state_free(zfsdev_state, minor);
543eb633035STom Caputi 		dmu_objset_disown(os, 1, FTAG);
544c99e4bdcSChris Kirby 		mutex_exit(&zfsdev_state_lock);
545be6fd75aSMatthew Ahrens 		return (SET_ERROR(EAGAIN));
546fa9e4066Sahrens 	}
548c99e4bdcSChris Kirby 	zs = ddi_get_soft_state(zfsdev_state, minor);
549c99e4bdcSChris Kirby 	zs->zss_type = ZSST_ZVOL;
550c99e4bdcSChris Kirby 	zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
551681d9761SEric Taylor 	(void) strlcpy(zv->zv_name, name, MAXPATHLEN);
552fa9e4066Sahrens 	zv->zv_min_bs = DEV_BSHIFT;
553fa9e4066Sahrens 	zv->zv_minor = minor;
554fa9e4066Sahrens 	zv->zv_objset = os;
555f9af39baSGeorge Wilson 	if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os)))
556681d9761SEric Taylor 		zv->zv_flags |= ZVOL_RDONLY;
55779315247SMatthew Ahrens 	rangelock_init(&zv->zv_rangelock, NULL, NULL);
55888b7b0f2SMatthew Ahrens 	list_create(&zv->zv_extents, sizeof (zvol_extent_t),
55988b7b0f2SMatthew Ahrens 	    offsetof(zvol_extent_t, ze_node));
56067bd71c6Sperrin 	/* get and cache the blocksize */
56167bd71c6Sperrin 	error = dmu_object_info(os, ZVOL_OBJ, &doi);
56267bd71c6Sperrin 	ASSERT(error == 0);
56367bd71c6Sperrin 	zv->zv_volblocksize = doi.doi_data_block_size;
565f9af39baSGeorge Wilson 	if (spa_writeable(dmu_objset_spa(os))) {
566f9af39baSGeorge Wilson 		if (zil_replay_disable)
567f9af39baSGeorge Wilson 			zil_destroy(dmu_objset_zil(os), B_FALSE);
568f9af39baSGeorge Wilson 		else
569f9af39baSGeorge Wilson 			zil_replay(os, zv, zvol_replay_vector);
570f9af39baSGeorge Wilson 	}
571eb633035STom Caputi 	dmu_objset_disown(os, 1, FTAG);
572681d9761SEric Taylor 	zv->zv_objset = NULL;
574fa9e4066Sahrens 	zvol_minors++;
576c99e4bdcSChris Kirby 	mutex_exit(&zfsdev_state_lock);
578fa9e4066Sahrens 	return (0);
579fa9e4066Sahrens }
581fa9e4066Sahrens /*
582fa9e4066Sahrens  * Remove minor node for the specified volume.
583fa9e4066Sahrens  */
584681d9761SEric Taylor static int
zvol_remove_zv(zvol_state_t * zv)585681d9761SEric Taylor zvol_remove_zv(zvol_state_t *zv)
586681d9761SEric Taylor {
587681d9761SEric Taylor 	char nmbuf[20];
588c99e4bdcSChris Kirby 	minor_t minor = zv->zv_minor;
589681d9761SEric Taylor 
590c99e4bdcSChris Kirby 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
591681d9761SEric Taylor 	if (zv->zv_total_opens != 0)
592be6fd75aSMatthew Ahrens 		return (SET_ERROR(EBUSY));
593681d9761SEric Taylor 
594c99e4bdcSChris Kirby 	(void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor);
595681d9761SEric Taylor 	ddi_remove_minor_node(zfs_dip, nmbuf);
596681d9761SEric Taylor 
597c99e4bdcSChris Kirby 	(void) snprintf(nmbuf, sizeof (nmbuf), "%u", minor);
598681d9761SEric Taylor 	ddi_remove_minor_node(zfs_dip, nmbuf);
599681d9761SEric Taylor 
60079315247SMatthew Ahrens 	rangelock_fini(&zv->zv_rangelock);
601681d9761SEric Taylor 
602c99e4bdcSChris Kirby 	kmem_free(zv, sizeof (zvol_state_t));
603c99e4bdcSChris Kirby 
604c99e4bdcSChris Kirby 	ddi_soft_state_free(zfsdev_state, minor);
605681d9761SEric Taylor