1fa9e406ahrens/*
2fa9e406ahrens * CDDL HEADER START
3fa9e406ahrens *
4fa9e406ahrens * The contents of this file are subject to the terms of the
5ea8dc4beschrock * Common Development and Distribution License (the "License").
6ea8dc4beschrock * You may not use this file except in compliance with the License.
7fa9e406ahrens *
8fa9e406ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e406ahrens * or http://www.opensolaris.org/os/licensing.
10fa9e406ahrens * See the License for the specific language governing permissions
11fa9e406ahrens * and limitations under the License.
12fa9e406ahrens *
13fa9e406ahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e406ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e406ahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e406ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e406ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e406ahrens *
19fa9e406ahrens * CDDL HEADER END
20fa9e406ahrens */
21fa9e406ahrens/*
22f80ce22Chris Kirby * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23b77b923Dan McDonald *
24b77b923Dan McDonald * Portions Copyright 2010 Robert Milkowski
25b77b923Dan McDonald *
26047c81dSaso Kiselkov * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
2725df42aMatthew Ahrens * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
28c3d26abMatthew Ahrens * Copyright (c) 2014 Integros [integros.com]
29455e370John Levon * Copyright 2019 Joyent, Inc.
30fa9e406ahrens */
31fa9e406ahrens
32fa9e406ahrens/*
33fa9e406ahrens * ZFS volume emulation driver.
34fa9e406ahrens *
35fa9e406ahrens * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
36fa9e406ahrens * Volumes are accessed through the symbolic links named:
37fa9e406ahrens *
38fa9e406ahrens * /dev/zvol/dsk/<pool_name>/<dataset_name>
39fa9e406ahrens * /dev/zvol/rdsk/<pool_name>/<dataset_name>
40fa9e406ahrens *
41681d976Eric Taylor * These links are created by the /dev filesystem (sdev_zvolops.c).
42fa9e406ahrens * Volumes are persistent through reboot.  No user command needs to be
43fa9e406ahrens * run before opening and using a device.
44fa9e406ahrens */
45fa9e406ahrens
46fa9e406ahrens#include <sys/types.h>
47fa9e406ahrens#include <sys/param.h>
48fa9e406ahrens#include <sys/errno.h>
49fa9e406ahrens#include <sys/uio.h>
50fa9e406ahrens#include <sys/buf.h>
51fa9e406ahrens#include <sys/modctl.h>
52fa9e406ahrens#include <sys/open.h>
53fa9e406ahrens#include <sys/kmem.h>
54fa9e406ahrens#include <sys/conf.h>
55fa9e406ahrens#include <sys/cmn_err.h>
56fa9e406ahrens#include <sys/stat.h>
57fa9e406ahrens#include <sys/zap.h>
58fa9e406ahrens#include <sys/spa.h>
59810e43bBill Pijewski#include <sys/spa_impl.h>
60fa9e406ahrens#include <sys/zio.h>
61e7cbe64gw#include <sys/dmu_traverse.h>
62e7cbe64gw#include <sys/dnode.h>
63e7cbe64gw#include <sys/dsl_dataset.h>
64fa9e406ahrens#include <sys/dsl_prop.h>
65fa9e406ahrens#include <sys/dkio.h>
66fa9e406ahrens#include <sys/efi_partition.h>
67fa9e406ahrens#include <sys/byteorder.h>
68fa9e406ahrens#include <sys/pathname.h>
69fa9e406ahrens#include <sys/ddi.h>
70fa9e406ahrens#include <sys/sunddi.h>
71fa9e406ahrens#include <sys/crc32.h>
72fa9e406ahrens#include <sys/dirent.h>
73fa9e406ahrens#include <sys/policy.h>
74fa9e406ahrens#include <sys/fs/zfs.h>
75fa9e406ahrens#include <sys/zfs_ioctl.h>
76fa9e406ahrens#include <sys/mkdev.h>
7722ac5beperrin#include <sys/zil.h>
78c5c6ffamaybee#include <sys/refcount.h>
79c2e6a7dperrin#include <sys/zfs_znode.h>
80c2e6a7dperrin#include <sys/zfs_rlock.h>
81e7cbe64gw#include <sys/vdev_impl.h>
82e7cbe64gw#include <sys/zvol.h>
83e7cbe64gw#include <sys/dumphdr.h>
841209a47Neil Perrin#include <sys/zil_impl.h>
8580901aeGeorge Wilson#include <sys/dbuf.h>
86810e43bBill Pijewski#include <sys/dmu_tx.h>
87810e43bBill Pijewski#include <sys/zfeature.h>
88810e43bBill Pijewski#include <sys/zio_checksum.h>
891271e4bPrakash Surya#include <sys/zil_impl.h>
90c3377eeJohn Levon#include <sys/smt.h>
91047c81dSaso Kiselkov#include <sys/dkioc_free_util.h>
927931524Matthew Ahrens#include <sys/zfs_rlock.h>
93fa9e406ahrens
94fa9e406ahrens#include "zfs_namecheck.h"
95fa9e406ahrens
96c99e4bdChris Kirbyvoid *zfsdev_state;
97503ad85Matthew Ahrensstatic char *zvol_tag = "zvol_tag";
98fa9e406ahrens
99e7cbe64gw#define	ZVOL_DUMPSIZE		"dumpsize"
100e7cbe64gw
101fa9e406ahrens/*
102c99e4bdChris Kirby * This lock protects the zfsdev_state structure from being modified
103fa9e406ahrens * while it's being used, e.g. an open that comes in before a create
104fa9e406ahrens * finishes.  It also protects temporary opens of the dataset so that,
105fa9e406ahrens * e.g., an open doesn't get a spurious EBUSY.
106fa9e406ahrens */
107c99e4bdChris Kirbykmutex_t zfsdev_state_lock;
108fa9e406ahrensstatic uint32_t zvol_minors;
109fa9e406ahrens
110e7cbe64gwtypedef struct zvol_extent {
11188b7b0fMatthew Ahrens	list_node_t	ze_node;
112e7cbe64gw	dva_t		ze_dva;		/* dva associated with this extent */
11388b7b0fMatthew Ahrens	uint64_t	ze_nblks;	/* number of blocks in extent */
114e7cbe64gw} zvol_extent_t;
115e7cbe64gw
116e7cbe64gw/*
117fa9e406ahrens * The in-core state of each volume.
118fa9e406ahrens */
119fa9e406ahrenstypedef struct zvol_state {
120fa9e406ahrens	char		zv_name[MAXPATHLEN]; /* pool/dd name */
121fa9e406ahrens	uint64_t	zv_volsize;	/* amount of space we advertise */
12267bd71cperrin	uint64_t	zv_volblocksize; /* volume block size */
123fa9e406ahrens	minor_t		zv_minor;	/* minor number */
124fa9e406ahrens	uint8_t		zv_min_bs;	/* minimum addressable block shift */
125701f66cEric Taylor	uint8_t		zv_flags;	/* readonly, dumpified, etc. */
126fa9e406ahrens	objset_t	*zv_objset;	/* objset handle */
127fa9e406ahrens	uint32_t	zv_open_count[OTYPCNT];	/* open counts */
128fa9e406ahrens	uint32_t	zv_total_opens;	/* total open count */
12922ac5beperrin	zilog_t		*zv_zilog;	/* ZIL handle */
13088b7b0fMatthew Ahrens	list_t		zv_extents;	/* List of extents for dump */
1317931524Matthew Ahrens	rangelock_t	zv_rangelock;
1328dfe554Richard Yao	dnode_t		*zv_dn;		/* dnode hold */
133fa9e406ahrens} zvol_state_t;
134fa9e406ahrens
13567bd71cperrin/*
136e7cbe64gw * zvol specific flags
137e7cbe64gw */
138e7cbe64gw#define	ZVOL_RDONLY	0x1
139e7cbe64gw#define	ZVOL_DUMPIFIED	0x2
140c7f714eEric Taylor#define	ZVOL_EXCL	0x4
141701f66cEric Taylor#define	ZVOL_WCE	0x8
142e7cbe64gw
143e7cbe64gw/*
14467bd71cperrin * zvol maximum transfer in one DMU tx.
14567bd71cperrin */
14667bd71cperrinint zvol_maxphys = DMU_MAX_ACCESS/2;
14767bd71cperrin
148893c83bGeorge Wilson/*
149893c83bGeorge Wilson * Toggle unmap functionality.
150893c83bGeorge Wilson */
151893c83bGeorge Wilsonboolean_t zvol_unmap_enabled = B_TRUE;
152893c83bGeorge Wilson
1531c9272bStephen Blinick/*
1541c9272bStephen Blinick * If true, unmaps requested as synchronous are executed synchronously,
1551c9272bStephen Blinick * otherwise all unmaps are asynchronous.
1561c9272bStephen Blinick */
1571c9272bStephen Blinickboolean_t zvol_unmap_sync_enabled = B_FALSE;
1581c9272bStephen Blinick
15992241e0Tom Ericksonextern int zfs_set_prop_nvlist(const char *, zprop_source_t,
1604445fffMatthew Ahrens    nvlist_t *, nvlist_t *);
161681d976Eric Taylorstatic int zvol_remove_zv(zvol_state_t *);
1621271e4bPrakash Suryastatic int zvol_get_data(void *arg, lr_write_t *lr, char *buf,
1631271e4bPrakash Surya    struct lwb *lwb, zio_t *zio);
164e7cbe64gwstatic int zvol_dumpify(zvol_state_t *zv);
165e7cbe64gwstatic int zvol_dump_fini(zvol_state_t *zv);
166e7cbe64gwstatic int zvol_dump_init(zvol_state_t *zv, boolean_t resize);
16767bd71cperrin
168fa9e406ahrensstatic void
169c61ea56George Wilsonzvol_size_changed(zvol_state_t *zv, uint64_t volsize)
170fa9e406ahrens{
171c61ea56George Wilson	dev_t dev = makedevice(ddi_driver_major(zfs_dip), zv->zv_minor);
172fa9e406ahrens
173c61ea56George Wilson	zv->zv_volsize = volsize;
174fa9e406ahrens	VERIFY(ddi_prop_update_int64(dev, zfs_dip,
175681d976Eric Taylor	    "Size", volsize) == DDI_SUCCESS);
176fa9e406ahrens	VERIFY(ddi_prop_update_int64(dev, zfs_dip,
177681d976Eric Taylor	    "Nblocks", lbtodb(volsize)) == DDI_SUCCESS);
178e7cbe64gw
179e7cbe64gw	/* Notify specfs to invalidate the cached size */
180e7cbe64gw	spec_size_invalidate(dev, VBLK);
181e7cbe64gw	spec_size_invalidate(dev, VCHR);
182fa9e406ahrens}
183fa9e406ahrens
184fa9e406ahrensint
185e9dbad6eschrockzvol_check_volsize(uint64_t volsize, uint64_t blocksize)
186fa9e406ahrens{
187e9dbad6eschrock	if (volsize == 0)
188be6fd75Matthew Ahrens		return (SET_ERROR(EINVAL));
189fa9e406ahrens
190e9dbad6eschrock	if (volsize % blocksize != 0)
191be6fd75Matthew Ahrens		return (SET_ERROR(EINVAL));
1925c5460eeschrock
193fa9e406ahrens#ifdef _ILP32
194e9dbad6eschrock	if (volsize - 1 > SPEC_MAXOFFSET_T)
195be6fd75Matthew Ahrens		return (SET_ERROR(EOVERFLOW));
196fa9e406ahrens#endif
197fa9e406ahrens	return (0);
198fa9e406ahrens}
199fa9e406ahrens
200fa9e406ahrensint
201e9dbad6eschrockzvol_check_volblocksize(uint64_t volblocksize)
202fa9e406ahrens{
203e9dbad6eschrock	if (volblocksize < SPA_MINBLOCKSIZE ||
204b515258Matthew Ahrens	    volblocksize > SPA_OLD_MAXBLOCKSIZE ||
205e9dbad6eschrock	    !ISP2(volblocksize))
206be6fd75Matthew Ahrens		return (SET_ERROR(EDOM));
207fa9e406ahrens
208fa9e406ahrens	return (0);
209fa9e406ahrens}
210fa9e406ahrens
211fa9e406ahrensint
212a2eea2eahrenszvol_get_stats(objset_t *os, nvlist_t *nv)
213fa9e406ahrens{
214fa9e406ahrens	int error;
215fa9e406ahrens	dmu_object_info_t doi;
216a2eea2eahrens	uint64_t val;
217fa9e406ahrens
218a2eea2eahrens	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
219fa9e406ahrens	if (error)
220fa9e406ahrens		return (error);
221fa9e406ahrens
222a2eea2eahrens	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
223a2eea2eahrens
224fa9e406ahrens	error = dmu_object_info(os, ZVOL_OBJ, &doi);
225fa9e406ahrens
226a2eea2eahrens	if (error == 0) {
227a2eea2eahrens		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
228a2eea2eahrens		    doi.doi_data_block_size);
229a2eea2eahrens	}
230fa9e406ahrens
231fa9e406ahrens	return (error);
232fa9e406ahrens}
233fa9e406ahrens
234fa9e406ahrensstatic zvol_state_t *
235e9dbad6eschrockzvol_minor_lookup(const char *name)
236fa9e406ahrens{
237fa9e406ahrens	minor_t minor;
238fa9e406ahrens	zvol_state_t *zv;
239fa9e406ahrens
240c99e4bdChris Kirby	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
241fa9e406ahrens
242c99e4bdChris Kirby	for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) {
243c99e4bdChris Kirby		zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
244fa9e406ahrens		if (zv == NULL)
245fa9e406ahrens			continue;
246fa9e406ahrens		if (strcmp(zv->zv_name, name) == 0)
247f80ce22Chris Kirby			return (zv);
248fa9e406ahrens	}
249fa9e406ahrens
250f80ce22Chris Kirby	return (NULL);
251fa9e406ahrens}
252fa9e406ahrens
253e7cbe64gw/* extent mapping arg */
254e7cbe64gwstruct maparg {
25588b7b0fMatthew Ahrens	zvol_state_t	*ma_zv;
25688b7b0fMatthew Ahrens	uint64_t	ma_blks;
257e7cbe64gw};
258e7cbe64gw
259e7cbe64gw/*ARGSUSED*/
260e7cbe64gwstatic int
2611b912ecGeorge Wilsonzvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2627802d7bMatthew Ahrens    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
263e7cbe64gw{
26488b7b0fMatthew Ahrens	struct maparg *ma = arg;
26588b7b0fMatthew Ahrens	zvol_extent_t *ze;
26688b7b0fMatthew Ahrens	int bs = ma->ma_zv->zv_volblocksize;
267e7cbe64gw
268a2cdcddPaul Dagnelie	if (bp == NULL || BP_IS_HOLE(bp) ||
26943466aaMax Grossman	    zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
27088b7b0fMatthew Ahrens		return (0);
271e7cbe64gw
2725d7b4d4Matthew Ahrens	VERIFY(!BP_IS_EMBEDDED(bp));
2735d7b4d4Matthew Ahrens
27488b7b0fMatthew Ahrens	VERIFY3U(ma->ma_blks, ==, zb->zb_blkid);
27588b7b0fMatthew Ahrens	ma->ma_blks++;
276e7cbe64gw
27788b7b0fMatthew Ahrens	/* Abort immediately if we have encountered gang blocks */
27888b7b0fMatthew Ahrens	if (BP_IS_GANG(bp))
279be6fd75Matthew Ahrens		return (SET_ERROR(EFRAGS));
280e7cbe64gw
28188b7b0fMatthew Ahrens	/*
28288b7b0fMatthew Ahrens	 * See if the block is at the end of the previous extent.
28388b7b0fMatthew Ahrens	 */
28488b7b0fMatthew Ahrens	ze = list_tail(&ma->ma_zv->zv_extents);
28588b7b0fMatthew Ahrens	if (ze &&
28688b7b0fMatthew Ahrens	    DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) &&
28788b7b0fMatthew Ahrens	    DVA_GET_OFFSET(BP_IDENTITY(bp)) ==
28888b7b0fMatthew Ahrens	    DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) {
28988b7b0fMatthew Ahrens		ze->ze_nblks++;
29088b7b0fMatthew Ahrens		return (0);
291e7cbe64gw	}
292e7cbe64gw
29388b7b0fMatthew Ahrens	dprintf_bp(bp, "%s", "next blkptr:");
294e7cbe64gw
29588b7b0fMatthew Ahrens	/* start a new extent */
29688b7b0fMatthew Ahrens	ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP);
29788b7b0fMatthew Ahrens	ze->ze_dva = bp->blk_dva[0];	/* structure assignment */
29888b7b0fMatthew Ahrens	ze->ze_nblks = 1;
29988b7b0fMatthew Ahrens	list_insert_tail(&ma->ma_zv->zv_extents, ze);
30088b7b0fMatthew Ahrens	return (0);
30188b7b0fMatthew Ahrens}
302e7cbe64gw
30388b7b0fMatthew Ahrensstatic void
30488b7b0fMatthew Ahrenszvol_free_extents(zvol_state_t *zv)
30588b7b0fMatthew Ahrens{
30688b7b0fMatthew Ahrens	zvol_extent_t *ze;
307e7cbe64gw
30888b7b0fMatthew Ahrens	while (ze = list_head(&zv->zv_extents)) {
30988b7b0fMatthew Ahrens		list_remove(&zv->zv_extents, ze);
31088b7b0fMatthew Ahrens		kmem_free(ze, sizeof (zvol_extent_t));
311e7cbe64gw	}
31288b7b0fMatthew Ahrens}
313e7cbe64gw
31488b7b0fMatthew Ahrensstatic int
31588b7b0fMatthew Ahrenszvol_get_lbas(zvol_state_t *zv)
31688b7b0fMatthew Ahrens{
3173adc901Eric Taylor	objset_t *os = zv->zv_objset;
31888b7b0fMatthew Ahrens	struct maparg	ma;
31988b7b0fMatthew Ahrens	int		err;
32088b7b0fMatthew Ahrens
32188b7b0fMatthew Ahrens	ma.ma_zv = zv;
32288b7b0fMatthew Ahrens	ma.ma_blks = 0;
32388b7b0fMatthew Ahrens	zvol_free_extents(zv);
32488b7b0fMatthew Ahrens
3253adc901Eric Taylor	/* commit any in-flight changes before traversing the dataset */
3263adc901Eric Taylor	txg_wait_synced(dmu_objset_pool(os), 0);
3273adc901Eric Taylor	err = traverse_dataset(dmu_objset_ds(os), 0,
32888b7b0fMatthew Ahrens	    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma);
32988b7b0fMatthew Ahrens	if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) {
33088b7b0fMatthew Ahrens		zvol_free_extents(zv);
33188b7b0fMatthew Ahrens		return (err ? err : EIO);
332e7cbe64gw	}
33388b7b0fMatthew Ahrens
334e7cbe64gw	return (0);
335e7cbe64gw}
336e7cbe64gw
337ecd6cf8marks/* ARGSUSED */
338fa9e406ahrensvoid
339ecd6cf8markszvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
340fa9e406ahrens{
341da6c28aamw	zfs_creat_t *zct = arg;
342da6c28aamw	nvlist_t *nvprops = zct->zct_props;
343fa9e406ahrens	int error;
344e9dbad6eschrock	uint64_t volblocksize, volsize;
345fa9e406ahrens
346ecd6cf8marks	VERIFY(nvlist_lookup_uint64(nvprops,
347e9dbad6eschrock	    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
348ecd6cf8marks	if (nvlist_lookup_uint64(nvprops,
349e9dbad6eschrock	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
350e9dbad6eschrock		volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
351e9dbad6eschrock
352e9dbad6eschrock	/*
353e7cbe64gw	 * These properties must be removed from the list so the generic
354e9dbad6eschrock	 * property setting step won't apply to them.
355e9dbad6eschrock	 */
356ecd6cf8marks	VERIFY(nvlist_remove_all(nvprops,
357e9dbad6eschrock	    zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
358ecd6cf8marks	(void) nvlist_remove_all(nvprops,
359e9dbad6eschrock	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
360e9dbad6eschrock
361e9dbad6eschrock	error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
362fa9e406ahrens	    DMU_OT_NONE, 0, tx);
363fa9e406ahrens	ASSERT(error == 0);
364fa9e406ahrens
365fa9e406ahrens	error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
366fa9e406ahrens	    DMU_OT_NONE, 0, tx);
367fa9e406ahrens	ASSERT(error == 0);
368fa9e406ahrens
369e9dbad6eschrock	error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
370fa9e406ahrens	ASSERT(error == 0);
371fa9e406ahrens}
372fa9e406ahrens
373fa9e406ahrens/*
374b77b923Dan McDonald * Replay a TX_TRUNCATE ZIL transaction if asked.  TX_TRUNCATE is how we
375b77b923Dan McDonald * implement DKIOCFREE/free-long-range.
376b77b923Dan McDonald */
377b77b923Dan McDonaldstatic int
3783f7978dAlan Somerszvol_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
379b77b923Dan McDonald{
3803f7978dAlan Somers	zvol_state_t *zv = arg1;
3813f7978dAlan Somers	lr_truncate_t *lr = arg2;
382b77b923Dan McDonald	uint64_t offset, length;
383b77b923Dan McDonald
384b77b923Dan McDonald	if (byteswap)
385b77b923Dan McDonald		byteswap_uint64_array(lr, sizeof (*lr));
386b77b923Dan McDonald
387b77b923Dan McDonald	offset = lr->lr_offset;
388b77b923Dan McDonald	length = lr->lr_length;
389b77b923Dan McDonald
390b77b923Dan McDonald	return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
391b77b923Dan McDonald}
392b77b923Dan McDonald
393b77b923Dan McDonald/*
39422ac5beperrin * Replay a TX_WRITE ZIL transaction that didn't get committed
39522ac5beperrin * after a system failure
39622ac5beperrin */
397eb63303Tom Caputi/* ARGSUSED */
39822ac5beperrinstatic int
3993f7978dAlan Somerszvol_replay_write(void *arg1, void *arg2, boolean_t byteswap)
40022ac5beperrin{
4013f7978dAlan Somers	zvol_state_t *zv = arg1;
4023f7978dAlan Somers	lr_write_t *lr = arg2;
40322ac5beperrin	objset_t *os = zv->zv_objset;
40422ac5beperrin	char *data = (char *)(lr + 1);	/* data follows lr_write_t */
405b24ab67Jeff Bonwick	uint64_t offset, length;
40622ac5beperrin	dmu_tx_t *tx;
40722ac5beperrin	int error;
40822ac5beperrin
40922ac5beperrin	if (byteswap)
41022ac5beperrin		byteswap_uint64_array(lr, sizeof (*lr));
41122ac5beperrin
412b24ab67Jeff Bonwick	offset = lr->lr_offset;
413b24ab67Jeff Bonwick	length = lr->lr_length;
414b24ab67Jeff Bonwick
415b24ab67Jeff Bonwick	/* If it's a dmu_sync() block, write the whole block */
416b24ab67Jeff Bonwick	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
417b24ab67Jeff Bonwick		uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
418b24ab67Jeff Bonwick		if (length < blocksize) {
419b24ab67Jeff Bonwick			offset -= offset % blocksize;
420b24ab67Jeff Bonwick			length = blocksize;
421b24ab67Jeff Bonwick		}
422b24ab67Jeff Bonwick	}
423975c32aNeil Perrin
42422ac5beperrin	tx = dmu_tx_create(os);
425b24ab67Jeff Bonwick	dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length);
4261209a47Neil Perrin	error = dmu_tx_assign(tx, TXG_WAIT);
42722ac5beperrin	if (error) {
42822ac5beperrin		dmu_tx_abort(tx);
42922ac5beperrin	} else {
430b24ab67Jeff Bonwick		dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
43122ac5beperrin		dmu_tx_commit(tx);
43222ac5beperrin	}
43322ac5beperrin
43422ac5beperrin	return (error);
43522ac5beperrin}
43622ac5beperrin
43722ac5beperrin/* ARGSUSED */
43822ac5beperrinstatic int
4393f7978dAlan Somerszvol_replay_err(void *arg1, void *arg2, boolean_t byteswap)
44022ac5beperrin{
441be6fd75Matthew Ahrens	return (SET_ERROR(ENOTSUP));
44222ac5beperrin}
44322ac5beperrin
44422ac5beperrin/*
44522ac5beperrin * Callback vectors for replaying records.
446b77b923Dan McDonald * Only TX_WRITE and TX_TRUNCATE are needed for zvol.
44722ac5beperrin */
44822ac5beperrinzil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
44922ac5beperrin	zvol_replay_err,	/* 0 no such transaction type */
45022ac5beperrin	zvol_replay_err,	/* TX_CREATE */
45122ac5beperrin	zvol_replay_err,	/* TX_MKDIR */
45222ac5beperrin	zvol_replay_err,	/* TX_MKXATTR */
45322ac5beperrin	zvol_replay_err,	/* TX_SYMLINK */
45422ac5beperrin	zvol_replay_err,	/* TX_REMOVE */
45522ac5beperrin	zvol_replay_err,	/* TX_RMDIR */
45622ac5beperrin	zvol_replay_err,	/* TX_LINK */
45722ac5beperrin	zvol_replay_err,	/* TX_RENAME */
45822ac5beperrin	zvol_replay_write,	/* TX_WRITE */
459b77b923Dan McDonald	zvol_replay_truncate,	/* TX_TRUNCATE */
46022ac5beperrin	zvol_replay_err,	/* TX_SETATTR */
46122ac5beperrin	zvol_replay_err,	/* TX_ACL */
462975c32aNeil Perrin	zvol_replay_err,	/* TX_CREATE_ACL */
463975c32aNeil Perrin	zvol_replay_err,	/* TX_CREATE_ATTR */
464975c32aNeil Perrin	zvol_replay_err,	/* TX_CREATE_ACL_ATTR */
465975c32aNeil Perrin	zvol_replay_err,	/* TX_MKDIR_ACL */
466975c32aNeil Perrin	zvol_replay_err,	/* TX_MKDIR_ATTR */
467975c32aNeil Perrin	zvol_replay_err,	/* TX_MKDIR_ACL_ATTR */
468975c32aNeil Perrin	zvol_replay_err,	/* TX_WRITE2 */
46922ac5beperrin};
47022ac5beperrin
471681d976Eric Taylorint
472681d976Eric Taylorzvol_name2minor(const char *name, minor_t *minor)
473681d976Eric Taylor{
474681d976Eric Taylor	zvol_state_t *zv;
475681d976Eric Taylor
476c99e4bdChris Kirby	mutex_enter(&zfsdev_state_lock);
477681d976Eric Taylor	zv = zvol_minor_lookup(name);
478681d976Eric Taylor	if (minor && zv)
479681d976Eric Taylor		*minor = zv->zv_minor;
480c99e4bdChris Kirby	mutex_exit(&zfsdev_state_lock);
481681d976Eric Taylor	return (zv ? 0 : -1);
482681d976Eric Taylor}
483681d976Eric Taylor
48422ac5beperrin/*
485e7cbe64gw * Create a minor node (plus a whole lot more) for the specified volume.
486fa9e406ahrens */
487fa9e406ahrensint
488681d976Eric Taylorzvol_create_minor(const char *name)
489fa9e406ahrens{
490c99e4bdChris Kirby	zfs_soft_state_t *zs;
491fa9e406ahrens	zvol_state_t *zv;
492fa9e406ahrens	objset_t *os;
49367bd71cperrin	dmu_object_info_t doi;
494fa9e406ahrens	minor_t minor = 0;
495fa9e406ahrens	char chrbuf[30], blkbuf[30];
496fa9e406ahrens	int error;
497fa9e406ahrens
498c99e4bdChris Kirby	mutex_enter(&zfsdev_state_lock);
499fa9e406ahrens
5001195e68Mark J Musante	if (zvol_minor_lookup(name) != NULL) {
501c99e4bdChris Kirby		mutex_exit(&zfsdev_state_lock);
502be6fd75Matthew Ahrens		return (SET_ERROR(EEXIST));
503fa9e406ahrens	}
504fa9e406ahrens
505503ad85Matthew Ahrens	/* lie and say we're read-only */
506eb63303Tom Caputi	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
507fa9e406ahrens
508fa9e406ahrens	if (error) {
509c99e4bdChris Kirby		mutex_exit(&zfsdev_state_lock);
510fa9e406ahrens		return (error);
511fa9e406ahrens	}
512fa9e406ahrens
513c99e4bdChris Kirby	if ((minor = zfsdev_minor_alloc()) == 0) {
514eb63303Tom Caputi		dmu_objset_disown(os, 1, FTAG);
515c99e4bdChris Kirby		mutex_exit(&zfsdev_state_lock);
516be6fd75Matthew Ahrens		return (SET_ERROR(ENXIO));
517fa9e406ahrens	}
518fa9e406ahrens
519c99e4bdChris Kirby	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) {
520eb63303Tom Caputi		dmu_objset_disown(os, 1, FTAG);
521c99e4bdChris Kirby		mutex_exit(&zfsdev_state_lock);
522be6fd75Matthew Ahrens		return (SET_ERROR(EAGAIN));
523fa9e406ahrens	}
524e9dbad6eschrock	(void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME,
525e9dbad6eschrock	    (char *)name);
526fa9e406ahrens
527681d976Eric Taylor	(void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor);
528fa9e406ahrens
529fa9e406ahrens	if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
530fa9e406ahrens	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
531c99e4bdChris Kirby		ddi_soft_state_free(zfsdev_state, minor);
532eb63303Tom Caputi		dmu_objset_disown(os, 1, FTAG);
533c99e4bdChris Kirby		mutex_exit(&zfsdev_state_lock);
534be6fd75Matthew Ahrens		return (SET_ERROR(EAGAIN));
535fa9e406ahrens	}
536fa9e406ahrens
537681d976Eric Taylor	(void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor);
538fa9e406ahrens
539fa9e406ahrens	if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK,
540fa9e406ahrens	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
541fa9e406ahrens		ddi_remove_minor_node(zfs_dip, chrbuf);
542c99e4bdChris Kirby		ddi_soft_state_free(zfsdev_state, minor);
543eb63303Tom Caputi		dmu_objset_disown(os, 1, FTAG);
544c99e4bdChris Kirby		mutex_exit(&zfsdev_state_lock);
545be6fd75Matthew Ahrens		return (SET_ERROR(EAGAIN));
546fa9e406ahrens	}
547fa9e406ahrens
548c99e4bdChris Kirby	zs = ddi_get_soft_state(zfsdev_state, minor);
549c99e4bdChris Kirby	zs->zss_type = ZSST_ZVOL;
550c99e4bdChris Kirby	zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
551681d976Eric Taylor	(void) strlcpy(zv->zv_name, name, MAXPATHLEN);
552fa9e406ahrens	zv->zv_min_bs = DEV_BSHIFT;
553fa9e406ahrens	zv->zv_minor = minor;
554fa9e406ahrens	zv->zv_objset = os;
555f9af39bGeorge Wilson	if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os)))
556681d976Eric Taylor		zv->zv_flags |= ZVOL_RDONLY;
5577931524Matthew Ahrens	rangelock_init(&zv->zv_rangelock, NULL, NULL);
55888b7b0fMatthew Ahrens	list_create(&zv->zv_extents, sizeof (zvol_extent_t),
55988b7b0fMatthew Ahrens	    offsetof(zvol_extent_t, ze_node));
56067bd71cperrin	/* get and cache the blocksize */
56167bd71cperrin	error = dmu_object_info(os, ZVOL_OBJ, &doi);
56267bd71cperrin	ASSERT(error == 0);
56367bd71cperrin	zv->zv_volblocksize = doi.doi_data_block_size;
56422ac5beperrin
565f9af39bGeorge Wilson	if (spa_writeable(dmu_objset_spa(os))) {
566f9af39bGeorge Wilson		if (zil_replay_disable)
567f9af39bGeorge Wilson			zil_destroy(dmu_objset_zil(os), B_FALSE);
568f9af39bGeorge Wilson		else
569f9af39bGeorge Wilson			zil_replay(os, zv, zvol_replay_vector);
570f9af39bGeorge Wilson	}
571eb63303Tom Caputi	dmu_objset_disown(os, 1, FTAG);
572681d976Eric Taylor	zv->zv_objset = NULL;
573fa9e406ahrens
574fa9e406ahrens	zvol_minors++;
575fa9e406ahrens
576c99e4bdChris Kirby	mutex_exit(&zfsdev_state_lock);
577fa9e406ahrens
578fa9e406ahrens	return (0);
579fa9e406ahrens}
580fa9e406ahrens
581fa9e406ahrens/*
582fa9e406ahrens * Remove minor node for the specified volume.
583fa9e406ahrens */
584681d976Eric Taylorstatic int
585681d976Eric Taylorzvol_remove_zv(zvol_state_t *zv)
586681d976Eric Taylor{
587681d976Eric Taylor	char nmbuf[20];
588c99e4bdChris Kirby	minor_t minor = zv->zv_minor;
589681d976Eric Taylor
590c99e4bdChris Kirby	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
591681d976Eric Taylor	if (zv->zv_total_opens != 0)
592be6fd75Matthew Ahrens		return (SET_ERROR(EBUSY));
593681d976Eric Taylor
594c99e4bdChris Kirby	(void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor);
595681d976Eric Taylor	ddi_remove_minor_node(zfs_dip, nmbuf);
596681d976Eric Taylor
597c99e4bdChris Kirby	(void) snprintf(nmbuf, sizeof (nmbuf), "%u", minor);
598681d976Eric Taylor	ddi_remove_minor_node(zfs_dip, nmbuf);
599681d976Eric Taylor
6007931524Matthew Ahrens	rangelock_fini(&zv->zv_rangelock);
601681d976Eric Taylor
602c99e4bdChris Kirby	kmem_free(zv, sizeof (zvol_state_t));
603c99e4bdChris Kirby
604c99e4bdChris Kirby	ddi_soft_state_free(zfsdev_state, minor);
605681d976Eric Taylor
606681d976Eric Taylor	zvol_minors--;
607681d976Eric Taylor	return (0);
608681d976Eric Taylor}
609681d976Eric Taylor
610fa9e406ahrensint
611e9dbad6eschrockzvol_remove_minor(const char *name)
612fa9e406ahrens{
613fa9e406ahrens	zvol_state_t *zv;
614681d976Eric Taylor	int rc;
615fa9e406ahrens
616c99e4bdChris Kirby	mutex_enter(&zfsdev_state_lock);
617e9dbad6eschrock	if ((zv = zvol_minor_lookup(name)) == NULL) {
618c99e4bdChris Kirby		mutex_exit(&zfsdev_state_lock);
619be6fd75Matthew Ahrens		return (SET_ERROR(ENXIO));
620fa9e406ahrens	}
621681d976Eric Taylor	rc = zvol_remove_zv(zv);
622c99e4bdChris Kirby	mutex_exit(&zfsdev_state_lock);
623681d976Eric Taylor	return (rc);
624681d976Eric Taylor}
625fa9e406ahrens
626681d976Eric Taylorint
6278bf394fTom Caputizvol_first_open(zvol_state_t *zv, boolean_t rdonly)
628681d976Eric Taylor{
629681d976Eric Taylor	objset_t *os;
630681d976Eric Taylor	uint64_t volsize;
631681d976Eric Taylor	int error;
632681d976Eric Taylor	uint64_t readonly;
6338bf394fTom Caputi	boolean_t ro;
634fa9e406ahrens
6358bf394fTom Caputi	ro = (rdonly || (strchr(zv->zv_name, '@') != NULL));
6368bf394fTom Caputi	error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, ro, B_TRUE, zv, &os);
637681d976Eric Taylor	if (error)
638681d976Eric Taylor		return (error);
639fa9e406ahrens
640c61ea56George Wilson	zv->zv_objset = os;
641681d976Eric Taylor	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
642681d976Eric Taylor	if (error) {
643681d976Eric Taylor		ASSERT(error == 0);
644262af05Jerry Jelinek		dmu_objset_disown(os, 1, zv);
645681d976Eric Taylor		return (error);
646681d976Eric Taylor	}
647c61ea56George Wilson
6488dfe554Richard Yao	error = dnode_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dn);
64994d1a21Tim Haley	if (error) {
650262af05Jerry Jelinek		dmu_objset_disown(os, 1, zv);
65194d1a21Tim Haley		return (error);
65294d1a21Tim Haley	}
653c61ea56George Wilson
654c61ea56George Wilson	zvol_size_changed(zv, volsize);
655681d976Eric Taylor	zv->zv_zilog = zil_open(os, zvol_get_data);
656fa9e406ahrens
657681d976Eric Taylor	VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly,
658681d976Eric Taylor	    NULL) == 0);
659f9af39bGeorge Wilson	if (readonly || dmu_objset_is_snapshot(os) ||
660f9af39bGeorge Wilson	    !spa_writeable(dmu_objset_spa(os)))
661681d976Eric Taylor		zv->zv_flags |= ZVOL_RDONLY;
662681d976Eric Taylor	else
663681d976Eric Taylor		zv->zv_flags &= ~ZVOL_RDONLY;
664681d976Eric Taylor	return (error);
665681d976Eric Taylor}
666fa9e406ahrens
667681d976Eric Taylorvoid
668681d976Eric Taylorzvol_last_close(zvol_state_t *zv)
669681d976Eric Taylor{
67022ac5beperrin	zil_close(zv->zv_zilog);
67122ac5beperrin	zv->zv_zilog = NULL;
6722e2c135Matthew Ahrens
6738dfe554Richard Yao	dnode_rele(zv->zv_dn, zvol_tag);
6748dfe554Richard Yao	zv->zv_dn = NULL;
6752e2c135Matthew Ahrens
6762e2c135Matthew Ahrens	/*
6772e2c135Matthew Ahrens	 * Evict cached data
6782e2c135Matthew Ahrens	 */
6792e2c135Matthew Ahrens	if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
6802e2c135Matthew Ahrens	    !(zv->zv_flags & ZVOL_RDONLY))
6812e2c135Matthew Ahrens		txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
6823b2aab1Matthew Ahrens	dmu_objset_evict_dbufs(zv->zv_objset);
6832e2c135Matthew Ahrens
684262af05Jerry Jelinek	dmu_objset_disown(zv->zv_objset, 1, zv);
685fa9e406ahrens	zv->zv_objset = NULL;
686fa9e406ahrens}
687fa9e406ahrens
688e7cbe64gwint
689e7cbe64gwzvol_prealloc(zvol_state_t *zv)
690e7cbe64gw{
691e7cbe64gw	objset_t *os = zv->zv_objset;
692e7cbe64gw	dmu_tx_t *tx;
693e7cbe64gw	uint64_t refd, avail, usedobjs, availobjs;
694e7cbe64gw	uint64_t resid = zv->zv_volsize;
695e7cbe64gw	uint64_t off = 0;
696e7cbe64gw
697e7cbe64gw	/* Check the space usage before attempting to allocate the space */
698e7cbe64gw	dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs);
699e7cbe64gw	if (avail < zv->zv_volsize)
700be6fd75Matthew Ahrens		return (SET_ERROR(ENOSPC));
701e7cbe64gw
702e7cbe64gw	/* Free old extents if they exist */
703e7cbe64gw	zvol_free_extents(zv);
704e7cbe64gw
705e7cbe64gw	while (resid != 0) {
706